Tensorflow 当培训损失可能';t下拉列表
首先,我可以确认列车批次是否正确提供 批量大小为8,输出和张量形状在上面,这是经过预处理的,可以在Tensorflow 当培训损失可能';t下拉列表,tensorflow,Tensorflow,首先,我可以确认列车批次是否正确提供 批量大小为8,输出和张量形状在上面,这是经过预处理的,可以在opencv中看到 问题是我的训练损失无法减少 请参见下图和代码: 这是tensorboard 代码如下: #import package import tensorflow as tf import os #using for provide train batch, batch_size=32 import cifar10_input_pipeline # images shape ca
opencv
中看到
问题是我的训练损失无法减少
请参见下图和代码:
这是tensorboard
代码如下:
#import package
import tensorflow as tf
import os
#using for provide train batch, batch_size=32
import cifar10_input_pipeline
# images shape can be [batch_size, height, width, 3]
def inference(images):
with tf.variable_scope('conv1'):
kernel = tf.get_variable('weights',
[3,3,3,32],
tf.float32,
initializer=tf.random_normal_initializer(stddev=1e-3))
bias = tf.get_variable('bias',
[32],
tf.float32,
initializer=tf.zeros_initializer())
conv1 = tf.nn.conv2d(images, kernel, [1,1,1,1], padding='SAME', name='conv')
conv1 = tf.nn.relu(tf.nn.bias_add(conv1, bias), name='relu')
pool1 = tf.nn.max_pool(conv1, [1,2,2,1], [1,2,2,1], padding='VALID', name='pool1')
with tf.variable_scope('conv2'):
kernel = tf.get_variable('weights',
[3,3,32,64],
tf.float32,
initializer=tf.random_normal_initializer(stddev=1e-3))
bias = tf.get_variable('bias',
[64],
tf.float32,
initializer=tf.zeros_initializer())
conv2 = tf.nn.conv2d(pool1, kernel, [1,1,1,1], padding='SAME', name='conv')
conv2 = tf.nn.relu(tf.nn.bias_add(conv2, bias), name='relu')
pool2 = tf.nn.max_pool(conv2, [1,2,2,1], [1,2,2,1], padding='VALID', name='pool2')
batch_size = images.get_shape()[0].value
flatten = tf.reshape(pool2, shape=[batch_size, -1])
dim = flatten.get_shape()[1].value
with tf.variable_scope('fc1'):
weights = tf.get_variable('weights',
[dim, 384],
tf.float32,
initializer=tf.random_normal_initializer(stddev=1e-3))
bias = tf.get_variable('bias',
[384],
tf.float32,
initializer=tf.zeros_initializer())
fc1 = tf.matmul(flatten, weights, name='fc')
fc1 = tf.nn.sigmoid(tf.nn.bias_add(fc1, bias), name='sigmoid')
with tf.variable_scope('softmax_linear'):
weights = tf.get_variable('weights',
[384, 10],
tf.float32,
initializer=tf.random_normal_initializer(stddev=1e-3))
bias = tf.get_variable('bias',
[10],
tf.float32,
initializer=tf.zeros_initializer())
fc2 = tf.matmul(fc1, weights, name='fc')
# final connected layer, return without softmax function
logits = tf.nn.bias_add(fc2, bias, name='logits')
print('inference sucess')
return logits
#caculate loss
def loss(logits, labels):
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels,
name='cross_entropy')
loss = tf.reduce_mean(cross_entropy, name='loss')
print('loss sucess')
return loss
#return training op
def train(loss):
opt = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train_op = opt.minimize(loss)
print('train sucess')
return train_op
#calculate accuracy, use train data batch
def accuracy(logits, labels):
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, axis=1, output_type=tf.int32), labels),
tf.int32),
name='accuracy')
return accuracy
if __name__ == '__main__':
data_dir = '/home/mao/Notebooks/cifar10/cifar-10-batches-bin/'
filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i) for i in range(1, 6)]
example_batch = cifar10_input_pipeline.input_pipeline(filenames, batch_size=32, num_epochs=None)
images = example_batch[0]
labels = example_batch[1]
_logits = inference(images)
_loss = loss(_logits, labels)
_train_op = train(_loss)
sess = tf.Session()
#global_step = tf.train.get_or_create_global_step()
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
sess.run(init_op)
tf.summary.FileWriterCache.clear()
writer = tf.summary.FileWriter('./test_model', sess.graph)
Loss = tf.summary.scalar('Loss', _loss)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
try:
for i in range(1000):
if not coord.should_stop():
_, sum1 = sess.run([_train_op, Loss])
writer.add_summary(sum1, i)
if i % 100 == 0:
print(sess.run(labels))
except tf.errors.OutOfRangeError:
print('catch OutOfRangeError')
finally:
coord.request_stop()
coord.join(threads)
writer.flush()
writer.close()
sess.close()
更多详情:
下面是sess.run(train_op)时的损失摘要和标签批处理,同样,我确信标签和图像批处理在训练时会被洗牌
那么,函数调用中是否有错误?即使删除输出层(logits)中的tf.nn.relu,损失仍然无法下降。我很困惑
有人能帮忙吗?谢谢 不要在输出层(logits)中使用
tf.nn.relu
。@xdurch0嘿,伙计,我也面临着同样的问题……你的解决方案就像一个符咒……它背后的逻辑是什么?@xdurch0我曾经在输出层(logits)中删除过tf.nn.relu,但它帮不上忙,是不是有逻辑错误,就像函数调用错误一样,问题也得到了初步解决,通过增加权重、学习率,损失变得更大dropdown@DuttaA一般来说,我认为对logit应用任何非线性都不是一个好主意,因为基本上在之后应用softmax(TF交叉熵函数在内部这样做),可能会弄乱梯度。ReLU尤其是一个坏主意,因为对于小于0的输入,它的梯度为0。因此,在许多情况下,输出层的许多部分将没有梯度,完全防止网络从错误中学习(因为没有任何东西可以反向传播)。