Python 我如何知道哪些预测是针对哪些数据的?那么,如何评估预测呢?

Python 我如何知道哪些预测是针对哪些数据的?那么,如何评估预测呢?,python,tensorflow,machine-learning,keras,deep-learning,Python,Tensorflow,Machine Learning,Keras,Deep Learning,下面的代码使用人工神经网络(ANN)预测CSV文件中的类 如果我想找到测试数据上的预测,我是否执行以下操作 predictions = model.predict(X_test) # round predictions rounded = [round(x[0]) for x in predictions] prediction = pd.DataFrame(rounded,columns=['predictions']).to_csv('prediction.c

下面的代码使用人工神经网络(ANN)预测CSV文件中的类

如果我想找到测试数据上的预测,我是否执行以下操作

    predictions = model.predict(X_test)
    # round predictions
    rounded = [round(x[0]) for x in predictions]
    prediction = pd.DataFrame(rounded,columns=['predictions']).to_csv('prediction.csv')
在本例中,我将有一个带有预测列表(0和1)的CSV文件。我的问题是:

  • 我如何知道预测所引用的数据(行)

  • 我如何才能找到结果预测的准确性

    import numpy as np 
    import pandas as pd 
    from keras.layers import Dense, Dropout, BatchNormalization, Activation
    import keras.models as md
    import keras.layers.core as core
    import keras.utils.np_utils as kutils
    import keras.layers.convolutional as conv
    
    from keras.layers import MaxPool2D
    
    from subprocess import check_output
    dataset = pd.read_csv('mutation-train.csv')
    
    dataset = dataset[['CDS_Mutation',
                       'Primary_Tissue',
                        'Genomic',
                        'Gene_ID',
                        'Official_Symbol',
                        'Histology']]
    
    X = dataset.iloc[:,0:5].values
    y = dataset.iloc[:,5].values
    
    # Encoding categorical data
    from sklearn.preprocessing import LabelEncoder, OneHotEncoder
    labelencoder_X_0 = LabelEncoder()
    X[:, 0] = labelencoder_X_0.fit_transform(X[:, 0])
    labelencoder_X_1 = LabelEncoder()
    X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
    labelencoder_X_2= LabelEncoder()
    X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
    labelencoder_X_4= LabelEncoder()
    X[:, 4] = labelencoder_X_4.fit_transform(X[:, 4])
    
    X = X.astype(float)
    labelencoder_y= LabelEncoder()
    y = labelencoder_y.fit_transform(y)
    
    onehotencoder0 = OneHotEncoder(categorical_features = [0])
    X = onehotencoder0.fit_transform(X).toarray()
    X = X[:,0:]
    onehotencoder1 = OneHotEncoder(categorical_features = [1])
    X = onehotencoder1.fit_transform(X).toarray()
    X = X[:,0:]
    onehotencoder2 = OneHotEncoder(categorical_features = [2])
    X = onehotencoder2.fit_transform(X).toarray()
    X = X[:,0:]
    onehotencoder4 = OneHotEncoder(categorical_features = [4])
    X = onehotencoder4.fit_transform(X).toarray()
    X = X[:,0:]
    
    # Splitting the dataset training and test sets
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)
    
    # Feature scaling
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    
    # Evaluating the ANN
    from sklearn.model_selection import cross_val_score
    from keras.models import Sequential
    from keras.layers import Dense
    from keras.layers import Dropout
    
    model=Sequential()
    model.add(Dense(32, activation = 'relu', input_shape=(X.shape[1],)))
    model.add(Dense(16, activation = 'relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ["accuracy"])
    
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
    # Fit the model
    model.fit(X,y, epochs=3, batch_size=1)
    
    # Evaluate the model
    scores = model.evaluate(X,y)
    print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    
    # Calculate predictions
    predictions = model.predict(X)
    prediction = pd.DataFrame(predictions,columns=['predictions']).to_csv('prediction.csv')
    
谢谢

我如何知道预测所引用的数据(行)

预测的长度和顺序与输入相同

我如何才能找到结果预测的准确性

import numpy as np 
import pandas as pd 
from keras.layers import Dense, Dropout, BatchNormalization, Activation
import keras.models as md
import keras.layers.core as core
import keras.utils.np_utils as kutils
import keras.layers.convolutional as conv

from keras.layers import MaxPool2D

from subprocess import check_output
dataset = pd.read_csv('mutation-train.csv')

dataset = dataset[['CDS_Mutation',
                   'Primary_Tissue',
                    'Genomic',
                    'Gene_ID',
                    'Official_Symbol',
                    'Histology']]

X = dataset.iloc[:,0:5].values
y = dataset.iloc[:,5].values

# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_0 = LabelEncoder()
X[:, 0] = labelencoder_X_0.fit_transform(X[:, 0])
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2= LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
labelencoder_X_4= LabelEncoder()
X[:, 4] = labelencoder_X_4.fit_transform(X[:, 4])

X = X.astype(float)
labelencoder_y= LabelEncoder()
y = labelencoder_y.fit_transform(y)

onehotencoder0 = OneHotEncoder(categorical_features = [0])
X = onehotencoder0.fit_transform(X).toarray()
X = X[:,0:]
onehotencoder1 = OneHotEncoder(categorical_features = [1])
X = onehotencoder1.fit_transform(X).toarray()
X = X[:,0:]
onehotencoder2 = OneHotEncoder(categorical_features = [2])
X = onehotencoder2.fit_transform(X).toarray()
X = X[:,0:]
onehotencoder4 = OneHotEncoder(categorical_features = [4])
X = onehotencoder4.fit_transform(X).toarray()
X = X[:,0:]

# Splitting the dataset training and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

# Feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Evaluating the ANN
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

model=Sequential()
model.add(Dense(32, activation = 'relu', input_shape=(X.shape[1],)))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ["accuracy"])

# Compile model
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
# Fit the model
model.fit(X,y, epochs=3, batch_size=1)

# Evaluate the model
scores = model.evaluate(X,y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

# Calculate predictions
predictions = model.predict(X)
prediction = pd.DataFrame(predictions,columns=['predictions']).to_csv('prediction.csv')
将预测与您输入的基本事实进行比较。将正确的预测除以输入集的大小

如果您没有输入集的基本事实,则无法找到准确性。您所能做的最好的事情是在模型训练结束时将精度估计为最终测试精度

我如何知道预测所引用的数据(行)

预测的长度和顺序与输入相同

我如何才能找到结果预测的准确性

import numpy as np 
import pandas as pd 
from keras.layers import Dense, Dropout, BatchNormalization, Activation
import keras.models as md
import keras.layers.core as core
import keras.utils.np_utils as kutils
import keras.layers.convolutional as conv

from keras.layers import MaxPool2D

from subprocess import check_output
dataset = pd.read_csv('mutation-train.csv')

dataset = dataset[['CDS_Mutation',
                   'Primary_Tissue',
                    'Genomic',
                    'Gene_ID',
                    'Official_Symbol',
                    'Histology']]

X = dataset.iloc[:,0:5].values
y = dataset.iloc[:,5].values

# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_0 = LabelEncoder()
X[:, 0] = labelencoder_X_0.fit_transform(X[:, 0])
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2= LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
labelencoder_X_4= LabelEncoder()
X[:, 4] = labelencoder_X_4.fit_transform(X[:, 4])

X = X.astype(float)
labelencoder_y= LabelEncoder()
y = labelencoder_y.fit_transform(y)

onehotencoder0 = OneHotEncoder(categorical_features = [0])
X = onehotencoder0.fit_transform(X).toarray()
X = X[:,0:]
onehotencoder1 = OneHotEncoder(categorical_features = [1])
X = onehotencoder1.fit_transform(X).toarray()
X = X[:,0:]
onehotencoder2 = OneHotEncoder(categorical_features = [2])
X = onehotencoder2.fit_transform(X).toarray()
X = X[:,0:]
onehotencoder4 = OneHotEncoder(categorical_features = [4])
X = onehotencoder4.fit_transform(X).toarray()
X = X[:,0:]

# Splitting the dataset training and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

# Feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Evaluating the ANN
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

model=Sequential()
model.add(Dense(32, activation = 'relu', input_shape=(X.shape[1],)))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ["accuracy"])

# Compile model
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
# Fit the model
model.fit(X,y, epochs=3, batch_size=1)

# Evaluate the model
scores = model.evaluate(X,y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

# Calculate predictions
predictions = model.predict(X)
prediction = pd.DataFrame(predictions,columns=['predictions']).to_csv('prediction.csv')
将预测与您输入的基本事实进行比较。将正确的预测除以输入集的大小


如果您没有输入集的基本事实,则无法找到准确性。您所能做的最好的事情是在模型训练结束时将精度估计为最终测试精度。

您可以轻松地向
数据集添加索引列。然后在
train\u test\u split
之后恢复索引的新排列。

您可以轻松地将索引列添加到
数据集
。然后在
列车测试\u分割后
恢复新的索引排列。

感谢您的回复。问题是,当我“打印X_测试”时,我得到的是一个值列表,而不是数据本身。为什么?谢谢你的友好回复。问题是,当我“打印X_测试”时,我得到的是一个值列表,而不是数据本身。为什么呢?