Python 如何使用TensorFlow向CSV文件中的空列添加预测值?

Python 如何使用TensorFlow向CSV文件中的空列添加预测值?,python,csv,tensorflow,Python,Csv,Tensorflow,所以我有一段很棒的代码,它的预测准确率约为93%。我现在想知道的是,如何使用经过培训的程序,让它查看实际的测试数据,而不考虑答案的准确性,并让它填写答案。这是我的代码,预测准确率约为93% import tensorflow as tf import numpy as np from numpy import genfromtxt import sklearn # Convert to one hot def convertOneHot(data): y=np.array([int(i

所以我有一段很棒的代码,它的预测准确率约为93%。我现在想知道的是,如何使用经过培训的程序,让它查看实际的测试数据,而不考虑答案的准确性,并让它填写答案。这是我的代码,预测准确率约为93%

import tensorflow as tf
import numpy as np
from numpy import genfromtxt
import sklearn

# Convert to one hot
def convertOneHot(data):
    y=np.array([int(i[0]) for i in data])
    y_onehot=[0]*len(y)
    for i,j in enumerate(y):
        y_onehot[i]=[0]*(y.max() + 1)
        y_onehot[i][j]=1
    return (y,y_onehot)

data = genfromtxt('cs-training.csv',delimiter=',')  # Training data
test_data = genfromtxt('cs-test.csv',delimiter=',')  # Test data

x_train=np.array([ i[1::] for i in data])
y_train,y_train_onehot = convertOneHot(data)

x_test=np.array([ i[1::] for i in test_data])
y_test,y_test_onehot = convertOneHot(test_data)

A=data.shape[1]-1 # Number of features, Note first is y
B=len(y_train_onehot[0])
tf_in = tf.placeholder("float", [None, A]) # Features
tf_weight = tf.Variable(tf.zeros([A,B]))
tf_bias = tf.Variable(tf.zeros([B]))
tf_softmax = tf.nn.softmax(tf.matmul(tf_in,tf_weight) + tf_bias)

# Training via backpropagation
tf_softmax_correct = tf.placeholder("float", [None,B])
tf_cross_entropy = -tf.reduce_sum(tf_softmax_correct*tf.log(tf_softmax))

# Train using tf.train.GradientDescentOptimizer
tf_train_step = tf.train.GradientDescentOptimizer(0.01).minimize(tf_cross_entropy)

# Add accuracy checking nodes
tf_correct_prediction = tf.equal(tf.argmax(tf_softmax,1), tf.argmax(tf_softmax_correct,1))
tf_accuracy = tf.reduce_mean(tf.cast(tf_correct_prediction, "float"))

saver = tf.train.Saver([tf_weight,tf_bias])

# Initialize and run
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

print("...")
# Run the training
for i in range(100):
    sess.run(tf_train_step, feed_dict={tf_in: x_train, tf_softmax_correct: y_train_onehot})

#Print accuracy
    result = sess.run(tf_accuracy, feed_dict={tf_in: x_test, tf_softmax_correct: y_test_onehot})
print result

现在我有了实际的测试集
cs test actual.csv
,其中第一列完全为空,我需要用预测的1或0填充它。我该怎么做呢?

上面的程序似乎没有保存经过培训的课程。我想你应该分两步来做

  • 训练并保存会话
  • 恢复保存会话,并通过它运行测试数据
  • 第1步:

     #!/usr/bin/env python
    
     import tensorflow as tf
     import numpy as np
     from numpy import genfromtxt
     import sklearn
    
     # Convert to one hot
     def convertOneHot(data):
         y=np.array([int(i[0]) for i in data])
         y_onehot=[0]*len(y)
         for i,j in enumerate(y):
             y_onehot[i]=[0]*(y.max() + 1)
             y_onehot[i][j]=1
         return (y,y_onehot)
    
     # Build Example Data is CSV format, but use Iris data
     from sklearn import datasets
     from sklearn.model_selection import train_test_split
     def buildDataFromIris():
         iris = datasets.load_iris()
         X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42)
         f=open('cs-training.csv','w')
         for i,j in enumerate(X_train):
             k=np.append(np.array(y_train[i]),j   )
             f.write(",".join([str(s) for s in k]) + '\n')
         f.close()
         f=open('cs-test.csv','w')
         for i,j in enumerate(X_test):
             k=np.append(np.array(y_test[i]),j   )
             f.write(",".join([str(s) for s in k]) + '\n')
         f.close()
    
     # Recreate logging and save dir
     # Seems the tensorflow won't always overwrite
     import shutil, os, sys
     TMPDir='./tensorTMP'
     try:
      shutil.rmtree(TMPDir)
     except:
      print "Tmp Dir did not exist...that's okay"
     os.mkdir(TMPDir, 0755 )
    
    
    
     # Populate the data
     buildDataFromIris()
    
     data = genfromtxt('cs-training.csv',delimiter=',')  # Training data
     test_data = genfromtxt('cs-test.csv',delimiter=',')  # Test data
    
     x_train=np.array([ i[1::] for i in data])
     y_train,y_train_onehot = convertOneHot(data)
    
     x_test=np.array([ i[1::] for i in test_data])
     y_test,y_test_onehot = convertOneHot(test_data)
    
     A=data.shape[1]-1 # Number of features, Note first is y
     B=len(y_train_onehot[0])
     tf_in = tf.placeholder("float", [None, A]) # Features
     tf_weight = tf.Variable(tf.zeros([A,B]))
     tf_bias = tf.Variable(tf.zeros([B]))
     tf_softmax = tf.nn.softmax(tf.matmul(tf_in,tf_weight) + tf_bias)
    
     # Training via backpropagation
     tf_softmax_correct = tf.placeholder("float", [None,B])
     tf_cross_entropy = -tf.reduce_sum(tf_softmax_correct*tf.log(tf_softmax))
    
     # Train using tf.train.GradientDescentOptimizer
     tf_train_step = tf.train.GradientDescentOptimizer(0.01).minimize(tf_cross_entropy)
    
     # Add accuracy checking nodes
     tf_correct_prediction = tf.equal(tf.argmax(tf_softmax,1), tf.argmax(tf_softmax_correct,1))
     tf_accuracy = tf.reduce_mean(tf.cast(tf_correct_prediction, "float"))
    
     saver = tf.train.Saver([tf_weight,tf_bias])
    
     # Initialize and run
     init = tf.initialize_all_variables()
     sess = tf.Session()
     sess.run(init)
    
     THRESHOLD = 0.98
     saved = False
     print("...")
     # Run the training
     for i in range(100):
         sess.run(tf_train_step, feed_dict={tf_in: x_train, tf_softmax_correct: y_train_onehot})
         result = sess.run(tf_accuracy, feed_dict={tf_in: x_test, tf_softmax_correct: y_test_onehot})
         # If it's well trained on this iteration, save it. We just need one save.
         if result > THRESHOLD  and saved == False:
             saved = True
             print "saving result {}".format(result)
             saver.save(sess,TMPDir +"/savedSess")
    
    唯一的修改是使用Iris生成样本数据,为会话建立阈值或置信区间。如果超过该阈值,则保存会话。运行第一步后,应对模型进行训练并保存

    第二步:

     #!/usr/bin/env python
    
     import tensorflow as tf
     import numpy as np
     from numpy import genfromtxt
     import sklearn
    
     # Convert to one hot
     def convertOneHot(data):
         y=np.array([int(i[0]) for i in data])
         y_onehot=[0]*len(y)
         for i,j in enumerate(y):
             y_onehot[i]=[0]*(y.max() + 1)
             y_onehot[i][j]=1
         return (y,y_onehot)
    
     # Build Example Data is CSV format, but use Iris data
     from sklearn import datasets
     from sklearn.model_selection import train_test_split
     def buildDataFromIris():
         iris = datasets.load_iris()
         X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42)
         f=open('cs-training.csv','w')
         for i,j in enumerate(X_train):
             k=np.append(np.array(y_train[i]),j   )
             f.write(",".join([str(s) for s in k]) + '\n')
         f.close()
         f=open('cs-test.csv','w')
         for i,j in enumerate(X_test):
             k=np.append(np.array(y_test[i]),j   )
             f.write(",".join([str(s) for s in k]) + '\n')
         f.close()
    
     # Recreate logging and save dir
     # Seems the tensorflow won't always overwrite
     import shutil, os, sys
     TMPDir='./tensorTMP'
     try:
      shutil.rmtree(TMPDir)
     except:
      print "Tmp Dir did not exist...that's okay"
     os.mkdir(TMPDir, 0755 )
    
    
    
     # Populate the data
     buildDataFromIris()
    
     data = genfromtxt('cs-training.csv',delimiter=',')  # Training data
     test_data = genfromtxt('cs-test.csv',delimiter=',')  # Test data
    
     x_train=np.array([ i[1::] for i in data])
     y_train,y_train_onehot = convertOneHot(data)
    
     x_test=np.array([ i[1::] for i in test_data])
     y_test,y_test_onehot = convertOneHot(test_data)
    
     A=data.shape[1]-1 # Number of features, Note first is y
     B=len(y_train_onehot[0])
     tf_in = tf.placeholder("float", [None, A]) # Features
     tf_weight = tf.Variable(tf.zeros([A,B]))
     tf_bias = tf.Variable(tf.zeros([B]))
     tf_softmax = tf.nn.softmax(tf.matmul(tf_in,tf_weight) + tf_bias)
    
     # Training via backpropagation
     tf_softmax_correct = tf.placeholder("float", [None,B])
     tf_cross_entropy = -tf.reduce_sum(tf_softmax_correct*tf.log(tf_softmax))
    
     # Train using tf.train.GradientDescentOptimizer
     tf_train_step = tf.train.GradientDescentOptimizer(0.01).minimize(tf_cross_entropy)
    
     # Add accuracy checking nodes
     tf_correct_prediction = tf.equal(tf.argmax(tf_softmax,1), tf.argmax(tf_softmax_correct,1))
     tf_accuracy = tf.reduce_mean(tf.cast(tf_correct_prediction, "float"))
    
     saver = tf.train.Saver([tf_weight,tf_bias])
    
     # Initialize and run
     init = tf.initialize_all_variables()
     sess = tf.Session()
     sess.run(init)
    
     THRESHOLD = 0.98
     saved = False
     print("...")
     # Run the training
     for i in range(100):
         sess.run(tf_train_step, feed_dict={tf_in: x_train, tf_softmax_correct: y_train_onehot})
         result = sess.run(tf_accuracy, feed_dict={tf_in: x_test, tf_softmax_correct: y_test_onehot})
         # If it's well trained on this iteration, save it. We just need one save.
         if result > THRESHOLD  and saved == False:
             saved = True
             print "saving result {}".format(result)
             saver.save(sess,TMPDir +"/savedSess")
    
    还原保存的会话,并通过它运行培训数据

     #!/usr/bin/env python
    
     import tensorflow as tf
     import numpy as np
     from numpy import genfromtxt
     import sklearn
    
     # Convert to one hot
     def convertOneHot(data):
         y=np.array([int(i[0]) for i in data])
         y_onehot=[0]*len(y)
         for i,j in enumerate(y):
             y_onehot[i]=[0]*(y.max() + 1)
             y_onehot[i][j]=1
         return (y,y_onehot)
    
    
     data = genfromtxt('cs-training.csv',delimiter=',')  # Training data
     test_data = genfromtxt('cs-test.csv',delimiter=',')  # Test data
    
     x_train=np.array([ i[1::] for i in data])
     y_train,y_train_onehot = convertOneHot(data)
    
     x_test=np.array([ i[1::] for i in test_data])
     y_test,y_test_onehot = convertOneHot(test_data)
    
     A=data.shape[1]-1 # Number of features, Note first is y
     B=len(y_train_onehot[0])
     tf_in = tf.placeholder("float", [None, A]) # Features
     tf_weight = tf.Variable(tf.zeros([A,B]))
     tf_bias = tf.Variable(tf.zeros([B]))
     tf_softmax = tf.nn.softmax(tf.matmul(tf_in,tf_weight) + tf_bias)
    
     # Training via backpropagation
     tf_softmax_correct = tf.placeholder("float", [None,B])
     tf_cross_entropy = -tf.reduce_sum(tf_softmax_correct*tf.log(tf_softmax))
    
     # Train using tf.train.GradientDescentOptimizer
     tf_train_step = tf.train.GradientDescentOptimizer(0.01).minimize(tf_cross_entropy)
    
     # Add accuracy checking nodes
     tf_correct_prediction = tf.equal(tf.argmax(tf_softmax,1), tf.argmax(tf_softmax_correct,1))
     tf_accuracy = tf.reduce_mean(tf.cast(tf_correct_prediction, "float"))
    
     saver = tf.train.Saver([tf_weight,tf_bias])
    
     # Initialize and run
     init = tf.initialize_all_variables()
     sess = tf.Session()
     sess.run(init)
    
     TMPDir='./tensorTMP'
     saver.restore(sess, TMPDir + '/savedSess')
     ans = sess.run(tf_softmax, feed_dict={tf_in: x_test, tf_softmax_correct: y_test_onehot})
    
     print ans
    
    注意,您的输出将如下所示

    [[  6.17585704e-02   8.63590300e-01   7.46511072e-02]
    [  9.98804331e-01   1.19561062e-03   3.25832108e-13]
    [  1.52018686e-07   4.49650863e-04   9.99550164e-01]
    

    这对sklearn提供的Iris数据有效,但对我的项目无效。对于每个应该有1、0甚至概率的地方,它只返回
    nan
    (我希望它返回的比任何东西都多)。