Python 2.7 Tensorflow估计模型发散,损失=NaN
我使用的是一个设置为CNN的tensorflow估计器,每次运行代码时都会出现以下错误:Python 2.7 Tensorflow估计模型发散,损失=NaN,python-2.7,tensorflow,nan,convolutional-neural-network,tensorflow-estimator,Python 2.7,Tensorflow,Nan,Convolutional Neural Network,Tensorflow Estimator,我使用的是一个设置为CNN的tensorflow估计器,每次运行代码时都会出现以下错误: ERROR:tensorflow:Model diverged with loss = NaN. Traceback (most recent call last): File "cnn_training_v3.py", line 108, in <module> classifier.train(input_fn=train_input_fn, steps=200, hooks=[
ERROR:tensorflow:Model diverged with loss = NaN.
Traceback (most recent call last):
File "cnn_training_v3.py", line 108, in <module>
classifier.train(input_fn=train_input_fn, steps=200, hooks=[logging_hook])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 363, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 843, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 859, in _train_model_default
saving_listeners)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.py", line 1059, in _train_with_estimator_spec
_, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 567, in run
run_metadata=run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1043, in run
run_metadata=run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1134, in run
raise six.reraise(*original_exc_info)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1119, in run
return self._sess.run(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/monitored_session.py", line 1199, in run
run_metadata=run_metadata))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/basic_session_run_hooks.py", line 623, in after_run
raise NanLossDuringTrainingError
tensorflow.python.training.basic_session_run_hooks.NanLossDuringTrainingError: NaN loss during training.
这是我的主要代码。我的目标是训练CNN观察一个街区塔的图像,并预测图像中有多少街区
# Load and process dataset
image_files = []
text_files = []
images = []
labels = []
# load files from folder
for root, dirs, files in os.walk("images"):
for filename in files:
if 'before' in filename:
image_files.append(filename)
elif 'text' in filename:
text_files.append(filename)
# for each pair of files, append relevant data to image and label lists
# note to self: label 0 means 2 blocks, label 1 means 3 blocks, label 2 means 4 blocks, label 3 means 5 blocks
for imagename in image_files:
images.append(cv2.imread('images/'+filename))
num = imagename[7:len(imagename)-4]
for textname in text_files:
if ('_'+num+'.') in textname:
textfile = open('images/'+textname, 'r')
for line in textfile:
if 'Number of blocks' in line:
nblocks = int(line[18:].strip('\n'))
if nblocks == 2:
label = 0
elif nblocks == 3:
label = 1
elif nblocks == 4:
label = 2
elif nblocks == 5:
label = 3
labels.append(label)
# separate images and labels into train and test sets - 50% train, 50% evaluate
train_images = images[0:len(images)/2]
train_labels = labels[0:len(labels)/2]
test_images = images[len(images)/2:]
test_labels = labels[len(labels)/2:]
# convert dataset into numpy arrays
train_data_numpy = np.array(train_images, np.float32)
train_labels_numpy = np.array(train_labels, np.int32)
test_data_numpy = np.array(test_images, np.float32)
test_labels_numpy = np.array(test_labels, np.int32)
# Put images through CNN
# Create the Estimator
classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, model_dir="models/cnn")
# Set up logging for predictions
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=1)
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"images":train_data_numpy}, y=train_labels_numpy, batch_size=1, num_epochs=None, shuffle=True)
classifier.train(input_fn=train_input_fn, steps=200, hooks=[logging_hook])
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"images":test_data_numpy}, y=test_labels_numpy, num_epochs=1, shuffle=False)
eval_results = classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
我正在Ubuntu 16.04上使用Python 2.7.12。如果您能深入了解NaN丢失的原因,我们将不胜感激。找到了解决方案!原来模型以前的检查点与当前培训会话冲突,因此我删除了模型保存检查点所在文件夹中的所有内容,现在它正在培训,没有任何丢失错误。找到了解决方案!原来模型以前的检查点与当前培训会话冲突,所以我删除了模型保存检查点的文件夹中的所有内容,现在它正在培训,没有任何错误
# Load and process dataset
image_files = []
text_files = []
images = []
labels = []
# load files from folder
for root, dirs, files in os.walk("images"):
for filename in files:
if 'before' in filename:
image_files.append(filename)
elif 'text' in filename:
text_files.append(filename)
# for each pair of files, append relevant data to image and label lists
# note to self: label 0 means 2 blocks, label 1 means 3 blocks, label 2 means 4 blocks, label 3 means 5 blocks
for imagename in image_files:
images.append(cv2.imread('images/'+filename))
num = imagename[7:len(imagename)-4]
for textname in text_files:
if ('_'+num+'.') in textname:
textfile = open('images/'+textname, 'r')
for line in textfile:
if 'Number of blocks' in line:
nblocks = int(line[18:].strip('\n'))
if nblocks == 2:
label = 0
elif nblocks == 3:
label = 1
elif nblocks == 4:
label = 2
elif nblocks == 5:
label = 3
labels.append(label)
# separate images and labels into train and test sets - 50% train, 50% evaluate
train_images = images[0:len(images)/2]
train_labels = labels[0:len(labels)/2]
test_images = images[len(images)/2:]
test_labels = labels[len(labels)/2:]
# convert dataset into numpy arrays
train_data_numpy = np.array(train_images, np.float32)
train_labels_numpy = np.array(train_labels, np.int32)
test_data_numpy = np.array(test_images, np.float32)
test_labels_numpy = np.array(test_labels, np.int32)
# Put images through CNN
# Create the Estimator
classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, model_dir="models/cnn")
# Set up logging for predictions
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=1)
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"images":train_data_numpy}, y=train_labels_numpy, batch_size=1, num_epochs=None, shuffle=True)
classifier.train(input_fn=train_input_fn, steps=200, hooks=[logging_hook])
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"images":test_data_numpy}, y=test_labels_numpy, num_epochs=1, shuffle=False)
eval_results = classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)