Python 3.x TensorFlow，训练模型后的损失与随机梯度下降最后一个历元期间打印的损失不同。_Python 3.x_Machine Learning_Tensorflow

Python 3.x TensorFlow，训练模型后的损失与随机梯度下降最后一个历元期间打印的损失不同。

python-3.x machine-learning tensorflow

Python 3.x TensorFlow，训练模型后的损失与随机梯度下降最后一个历元期间打印的损失不同。,python-3.x,machine-learning,tensorflow,Python 3.x,Machine Learning,Tensorflow,我试着在两个螺旋上进行二元分类。为了进行测试，我给我的神经网络提供了无噪声的精确螺旋数据，并且该模型在SGD期间的损失接近0时似乎可以工作。然而，在使用我的模型推断SGD完成后完全相同的数据点后，我得到的损失与SGD最后一个纪元期间打印的完全不同 import tensorflow as tf import numpy as np import matplotlib.pyplot as plt np.set_printoptions(threshold=np.nan) # get the s

我试着在两个螺旋上进行二元分类。为了进行测试，我给我的神经网络提供了无噪声的精确螺旋数据，并且该模型在SGD期间的损失接近0时似乎可以工作。然而，在使用我的模型推断SGD完成后完全相同的数据点后，我得到的损失与SGD最后一个纪元期间打印的完全不同

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

np.set_printoptions(threshold=np.nan)

# get the spiral points

t_p = np.linspace(0, 4, 1000)

x1_p = t_p * np.cos(t_p*2*np.pi)
y1_p = t_p * np.sin(t_p*2*np.pi)
x2_p = t_p * np.cos(t_p*2*np.pi + np.pi)
y2_p = t_p * np.sin(t_p*2*np.pi + np.pi)

plt.plot(x1_p, y1_p, x2_p, y2_p)

# generate data points

x1_dat = x1_p
y1_dat = y1_p
x2_dat = x2_p
y2_dat = y2_p

def model_variable(shape, name, initializer):
    variable = tf.get_variable(name=name,
                               dtype=tf.float32,
                               shape=shape,
                               initializer=initializer
    )
    tf.add_to_collection('model_variables', variable)
    return variable

class Model():
    #layer specifications includes bias nodes
    def __init__(self, sess, data, nEpochs, learning_rate, layer_specifications):
        self.sess = sess
        self.data = data
        self.nEpochs = nEpochs
        self.learning_rate = learning_rate
        if layer_specifications[0] != 2 or layer_specifications[-1] !=  1:
            raise ValueError('First layer only two nodes, last layer only 1 node')
        else:
            self.layer_specifications = layer_specifications
        self.build_model()

    def build_model(self):
        # x is the two nodes that will be layer one, will input an x, y coordinate
        # and need to classify which spiral is it on, the non phase shifted or the phase
        # shifted one.
        # y is the output of the model
        self.x = tf.placeholder(tf.float32, shape=[2, 1])
        self.y = tf.placeholder(tf.float32, shape=[])
        self.thetas = []
        self.biases = []
        for i in range(1, len(self.layer_specifications)):
            self.thetas.append(model_variable([self.layer_specifications[i], self.layer_specifications[i-1]], 'theta'+str(i), tf.random_normal_initializer(stddev=0.1)))
            self.biases.append(model_variable([self.layer_specifications[i], 1], 'bias'+str(i), tf.constant_initializer()))

        #forward propagation
        intermediate = self.x
        for i in range(0, len(self.layer_specifications)-1):
            if i != (len(self.layer_specifications) - 2):
                intermediate = tf.nn.elu(tf.add(tf.matmul(self.thetas[i], intermediate), self.biases[i]))
            else:
                intermediate = tf.add(tf.matmul(self.thetas[i], intermediate), self.biases[i])

        self.yhat = tf.squeeze(intermediate)
        self.loss = tf.nn.sigmoid_cross_entropy_with_logits(self.yhat, self.y);

    def train_init(self):

        model_variables = tf.get_collection('model_variables')
        self.optim = (
            tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
            .minimize(self.loss, var_list=model_variables)
            )
        self.check = tf.add_check_numerics_ops()
        self.sess.run(tf.initialize_all_variables())

    # here is where x and y combine to get just x in tf with shape [2, 1] and where label becomes y in tf
    def train_iter(self, x, y):
        loss, _, _ = sess.run([self.loss, self.optim, self.check],
            feed_dict = {self.x: x, self.y: y})
        print('loss: {0} on:{1}'.format(loss, x))

    # here x and y are still x and y coordinates, label is separate
    def train(self):
        for _ in range(self.nEpochs):
            for x, y, label in self.data():
                print(label)
                self.train_iter([[x], [y]], label)
            print("NEW ONE:\n")
    # here x and y are still x and y coordinates, label is separate
    def infer(self, x, y, label):
        return self.sess.run((tf.sigmoid(self.yhat), self.loss), feed_dict={self.x : [[x], [y]], self.y : label})

def data():
    #so first spiral is label 0, second is label 1

    for _ in range(len(x1_dat)-1, -1, -1):
        for dat in range(2):
            if dat == 0:
                yield x1_dat[_], y1_dat[_], 0
            else:
                yield x2_dat[_], y2_dat[_], 1

layer_specifications = [2, 100, 100, 100, 1]
sess = tf.Session()
model = Model(sess, data, nEpochs=10, learning_rate=1.1e-2, layer_specifications=layer_specifications)
model.train_init()
model.train()

inferrences_1 = []
inferrences_2 = []

losses = 0
for i in range(len(t_p)-1, -1, -1):
    infer, loss = model.infer(x1_p[i], y1_p[i], 0)
    if infer >= 0.5:
        print('loss: {0} on point {1}, {2}'.format(loss, x1_p[i], y1_p[i]))
        losses = losses + 1
        inferrences_1.append('r')
    else:
        inferrences_1.append('g')
for i in range(len(t_p)-1, -1, -1):
    infer, loss = model.infer(x2_p[i], y2_p[i], 1)
    if infer >= 0.5:
        inferrences_2.append('r')
    else:
        print('loss: {0} on point {1}, {2}'.format(loss, x2_p[i], y2_p[i]))
        losses = losses + 1
        inferrences_2.append('g')

print('total losses: {}'.format(losses))

plt.scatter(x1_p, y1_p, c=inferrences_1)
plt.scatter(x2_p, y2_p, c=inferrences_2)
plt.show()

Tensorflow是否需要一个集合来获得最终结果，比如平均值？我不相信。在这种情况下，这样做是没有意义的。如果没有收敛，那么训练和测试的损失值似乎不一样！不确定丢失是怎么回事（培训似乎有点不稳定），但如果不在每次分类时定义ops（使用

tf.sigmoid

调用

infere

），您可以更快地进行推断。你能试着追踪到某个特定点的分类变化吗？