Python 保存模型时的Tensorflow DNN累加器错误?
我使用DNNRegressor作为遗传算法的适应度函数,因此我需要执行数千次,然后在每次迭代中保存模型。代码的轻版本显示在这里。模拟在某个点之后进行得很好,然后出现错误,模拟停止。我正在使用Tensorflow 1.0.1。如果您对解决此问题有任何建议,我将非常感谢您的帮助 错误: tensorflow.python.framework.errors\u impl.InvalidArgumentError: TensorSliceReader构造函数失败:无法获取匹配 存档 /主页/xxxx/workspace2/NNEvolution/4_15_136/6/model.ckpt-1500: 资源耗尽:/home/xxxx/workspace2/nRevolution/4_15_136/6Python 保存模型时的Tensorflow DNN累加器错误?,python,python-2.7,machine-learning,tensorflow,deep-learning,Python,Python 2.7,Machine Learning,Tensorflow,Deep Learning,我使用DNNRegressor作为遗传算法的适应度函数,因此我需要执行数千次,然后在每次迭代中保存模型。代码的轻版本显示在这里。模拟在某个点之后进行得很好,然后出现错误,模拟停止。我正在使用Tensorflow 1.0.1。如果您对解决此问题有任何建议,我将非常感谢您的帮助 错误: tensorflow.python.framework.errors\u impl.InvalidArgumentError: TensorSliceReader构造函数失败:无法获取匹配 存档 /主页/xxxx/w
[[Node:save/RestoreV2_5=RestoreV2[dtypes=[DT_FLOAT], _device=“/job:localhost/replica:0/task:0/cpu:0”](_recv_save/Const_0,save/RestoreV2_5/tensor_names,save/RestoreV2_5/shape_and_slices)]
[[Node:save/RestoreV2\u 7/\u 7=\u Recvclient\u terminated=false, recv_device=“/job:localhost/replica:0/task:0/gpu:0”, send_device=“/job:localhost/replica:0/task:0/cpu:0”, 发送\u设备\u化身=1,tensor\u name=“edge\u 6\u保存/恢复2\u 7”, 张量类型=DT浮点数, _device=“/job:localhost/replica:0/task:0/gpu:0”]] InvalidArgumentError(回溯见上文):未成功 TensorSliceReader构造函数:无法在上获取匹配的文件 /主页/xxxx/workspace2/NNEvolution/4_15_136/6/model.ckpt-1500: 资源耗尽:/home/xxxx/workspace2/nRevolution/4_15_136/6
[[Node:save/RestoreV2_5=RestoreV2[dtypes=[DT_FLOAT], _device=“/job:localhost/replica:0/task:0/cpu:0”](_recv_save/Const_0,save/RestoreV2_5/tensor_names,save/RestoreV2_5/shape_and_slices)]
[[Node:save/RestoreV2\u 7/\u 7=\u Recvclient\u terminated=false, recv_device=“/job:localhost/replica:0/task:0/gpu:0”, send_device=“/job:localhost/replica:0/task:0/cpu:0”, 发送\u设备\u化身=1,tensor\u name=“edge\u 6\u保存/恢复2\u 7”, 张量类型=DT浮点数, _device=“/job:localhost/replica:0/task:0/gpu:0”]] 过程结束,退出代码为139(被信号11中断: SIGSEGV) 代码详情如下:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
import tensorflow as tf
import shutil
tf.logging.set_verbosity(tf.logging.INFO)
def FitnessFunction ( hhLayer, epoch, pathModel, train_set, val_set, test_set, every_n_steps, early_stopping_rounds, nnExperiment, gaExperiment, gaGeneration, globalIndCounter ):
avgMSE = 0
# Data sets
config1 = tf.ConfigProto()
config1.gpu_options.allow_growth = True
feature_columns = [tf.contrib.layers.real_valued_column("")]
for nnExp in range(0, nnExperiment):
validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(
val_set.data,
val_set.target,
every_n_steps=every_n_steps,
early_stopping_metric="loss",
early_stopping_metric_minimize=False,
early_stopping_rounds=early_stopping_rounds)
#validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(
# val_set.data,
# val_set.target,
# every_n_steps=every_n_steps,
# early_stopping_metric="loss",
# early_stopping_metric_minimize=True)
# Specify that all features have real-value data
strName = str(gaExperiment) + "_" + str(gaGeneration)+ "_"+str(globalIndCounter)
#indPath = pathModel + "/" + strName +"/" + str( nnExp ) +"/"+ "model.ckpt"
indPath = os.path.join( os.path.sep, pathModel, strName, str( nnExp ) )
#if (os.path.isdir(pathModel)):
# shutil.rmtree(pathModel)
#Creating the model
#mlp = tf.contrib.learn.DNNRegressor( feature_columns=feature_columns, hidden_units=hhLayer, optimizer=tf.train.AdamOptimizer(1e-4), model_dir=pathModel, enable_centered_bias = True, dropout=0.4)
#mlp = tf.contrib.learn.DNNRegressor(feature_columns=feature_columns, hidden_units=hhLayer,
# optimizer=tf.train.AdamOptimizer(1e-4), model_dir=pathModel,
# enable_centered_bias=True)
#try:
mlp = tf.contrib.learn.DNNRegressor(feature_columns=feature_columns, hidden_units=hhLayer, model_dir=indPath, activation_fn=tf.nn.relu,enable_centered_bias=True,
dropout=0.01, config=tf.contrib.learn.RunConfig(gpu_memory_fraction=0.30, save_checkpoints_secs= 43200) )
#except:
# a=1
#mlp = tf.contrib.learn.DNNRegressor(feature_columns=feature_columns, hidden_units=hhLayer, model_dir=pathModel, enable_centered_bias=True, optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=0.1, l1_regularization_strength=0.001), dropout=0.01)
# optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=0.1, l1_regularization_strength=0.001 )
# Trainning the model.
#try:
mlp.fit( x=train_set.data, y=train_set.target, steps=epoch, monitors=[validation_monitor], batch_size=50 )
#except:
# a=1
MSE = mlp.evaluate( x=test_set.data, y=test_set.target)["loss"]
# Classify two new flower samples.
# new_samples = np.array(
# [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float)
# y = list(classifier.predict(new_samples))
# print("Predictions: {}".format(str(y)))
avgMSE = avgMSE + MSE
del mlp
return (avgMSE/nnExperiment)
if __name__ == "__main__":
# Data sets
#config = tf.ConfigProto()
#config.gpu_options.allow_growth = True
IRIS_TRAINING = os.path.join(os.path.dirname(__file__), "boston_train.csv")
IRIS_VAL = os.path.join(os.path.dirname(__file__), "boston_val.csv")
IRIS_TEST = os.path.join(os.path.dirname(__file__), "boston_test.csv")
train_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename=IRIS_TRAINING, target_dtype=np.float, features_dtype=np.float )
val_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename=IRIS_VAL, target_dtype=np.float, features_dtype=np.float )
test_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename=IRIS_TEST, target_dtype=np.float, features_dtype=np.float )
#initialPath = "/home/xxxx/workspace2/NNEvolution"
initialPath = os.path.join( os.path.sep, 'home', 'xxxx', 'workspace2', 'NNEvolution' )
if ( os.path.isdir( initialPath ) ):
shutil.rmtree( initialPath )
idxCount = 1
for exp in range(0,50):
for ger in range(0,30):
mse = FitnessFunction( [30,30,30,30], 1500, initialPath, train_set, val_set, test_set, 1, 200, nnExperiment=10, gaExperiment=exp, gaGeneration=ger, globalIndCounter=idxCount)
idxCount = idxCount +1
print("MSE: {0:f}".format(mse))