为什么tensorflow不更新网络的权重
我正在尝试使用tensorflow,当我开始学习我的网络时,我发现当我运行优化时,Weights没有更新。我不明白为什么tensorflow不更新网络的权重。 这是我在执行任务时使用的代码:为什么tensorflow不更新网络的权重,tensorflow,Tensorflow,我正在尝试使用tensorflow,当我开始学习我的网络时,我发现当我运行优化时,Weights没有更新。我不明白为什么tensorflow不更新网络的权重。 这是我在执行任务时使用的代码: import tensorflow as tf import numpy as np def importDataset(path,nsample): #--------------------------------------------------------------IMPORT DATAS
import tensorflow as tf
import numpy as np
def importDataset(path,nsample):
#--------------------------------------------------------------IMPORT DATASET---------------------------------------------------------------------------------------------------
filename_queue = tf.train.string_input_producer([path],shuffle=True)
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# Default values, in case of empty columns. Also specifies the type of the
## decoded result.
record_defaults = [[1.0], [1.0], [1.0], [1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0],[1.0]]
col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14 = tf.decode_csv(value, record_defaults=record_defaults)
features = tf.stack([col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14])
with tf.Session() as sess:
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
training_data= np.matrix([])
training_y= np.matrix([])
for i in range(nsample):
# Retrieve a single instance:
example, label = sess.run([features, col1])
#creo un vettore con tre zeri che rappresenta le mie ipotetiche tre classi
temp=np.zeros(3)
#devo assegnare nella posizione corrispettiva del vettore la classe che rappresenta
#es. in posizione 2 ci sara' uno se l'esempio appartiene alla classe 2
temp[int(label)-1] = 1.0
if i==0:
training_data=np.vstack([example])
training_y=np.vstack([temp])
else:
training_data=np.vstack([training_data,example])
training_y=np.vstack([training_y,temp])
coord.request_stop()
coord.join(threads)
#print(len(training_data[:,1]))
#print(len(training_y[:,1]))
#print(training_y)
return training_data,training_y
def splitDataset(nsample, testPerc, path):
example,example_y= importDataset(path,nsample)
#convert training_data and training_y in a list
example_list = example.tolist()
example_y_list = example_y.tolist()
training_data = list()
training_y = list()
percent = int((nsample*testPerc)/100)
#begin uniform extraction from data.
for i in range(percent):
index = np.random.randint(0,len(example_list))
training_data.append(example_list[index])
training_y.append(example_y_list[index])
example_list.remove(example_list[index])
example_y_list.remove(example_y_list[index])
training_data = np.matrix(training_data)
training_y = np.matrix(training_y)
test_data = np.matrix(example_list)
test_y = np.matrix(example_y_list)
#print(len(training_data[:,1]))
#print(len(training_y[:,1]))
#print(len(test_data[:,1]))
#print(len(test_y[:,1]))
return training_data,training_y,test_data,test_y
#---------------------------------------------INIZIO DEFINIZIONE MODELLO--------------------------------------
x = tf.placeholder(tf.float32, [None,13])
y = tf.placeholder(tf.float32, [None,3])
hidden_Layer1 ={'weights':tf.Variable(tf.truncated_normal([13,3],stddev=0.001)), 'biases':tf.Variable(tf.truncated_normal([3],stddev=0.001))}
hidden_Layer2 ={'weights':tf.Variable(tf.truncated_normal([3,3],stddev=0.001)), 'biases':tf.Variable(tf.truncated_normal([3],stddev=0.001))}
hidden_Layer3 ={'weights':tf.Variable(tf.truncated_normal([3,3],stddev=0.001)), 'biases':tf.Variable(tf.truncated_normal([3],stddev=0.001))}
output_Layer ={'weights':tf.Variable(tf.truncated_normal([3,3],stddev=0.001)), 'biases':tf.Variable(tf.truncated_normal([3],stddev=0.001))}
#output layer #1
output_Layer1 = tf.add( tf.matmul(x,hidden_Layer1['weights']),hidden_Layer1['biases'])
output_Layer1 = tf.nn.sigmoid(output_Layer1)
#output layer #2
output_Layer2 = tf.add(tf.matmul(output_Layer1,hidden_Layer2['weights']),hidden_Layer2['biases'])
output_Layer2 = tf.nn.sigmoid(output_Layer2)
#output layer #3
output_Layer3 = tf.add(tf.matmul(output_Layer2,hidden_Layer3['weights']),hidden_Layer3['biases'])
output_Layer3 = tf.nn.sigmoid(output_Layer3)
#output layer #output
output_Layer_Output = tf.nn.sigmoid(tf.add(tf.matmul(output_Layer3,output_Layer['weights']),output_Layer['biases']))
#--------------------------------------------FINE DEFINIZIONE MODELLO-------------------------------------------
#--------------------------------------------TRAINING DEL MODELLO-----------------------------------------------
error = tf.nn.l2_loss(output_Layer_Output-y, name="squared_error_cost")
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(error)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
#
training_data,training_label,test_data,test_label = splitDataset(178,70,"datasetvino.csv")
for _ in range(100):
print(sess.run([error,train_step,hidden_Layer1['weights']],feed_dict={x:training_data, y:training_label}))
correct_class = tf.equal(tf.argmax(output_Layer_Output,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_class, tf.float32))
#print(sess.run([accuracy], feed_dict={x:test_data,y:test_label}))
您应该围绕您的参数、图层、过滤器大小、过滤器数量和批次大小来实现您的目标性能,您应该注意以下几点: 1-您使用非常低的标准偏差初始化权重,尝试通过0.1 StdDev初始化权重
'weights':tf.变量(tf.截断_normal([13,3],stddev=0.1))
而且,你的偏见太小了,不需要那样初始化,试试看
“偏差”:tf.Variable(tf.constant(0.1,shape=[3]))
2-您的学习率0.5
太高,请将其降至0.001
使用这些参数,可以在这里随意发布结果,我希望它能有所帮助。我修改了一点代码,因为我用iris数据集更改了数据集。我用你的提示修改了我的代码,我打印了错误,在迭代过程中错误没有减少,我不知道为什么,也许我应该尝试添加更多的隐藏层
import tensorflow as tf
import numpy as np
def importDataset(path,nsample):
#--------------------------------------------------------------IMPORT DATASET---------------------------------------------------------------------------------------------------
filename_queue = tf.train.string_input_producer([path],shuffle=True)
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# Default values, in case of empty columns. Also specifies the type of the
## decoded result.
record_defaults = [[1.0], [1.0], [1.0], [1.0],[1.0]]
col1, col2, col3, col4, col5 = tf.decode_csv(value, record_defaults=record_defaults)
features = tf.stack([col1, col2, col3, col4])
with tf.Session() as sess:
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
training_data= np.matrix([])
training_y= np.matrix([])
for i in range(nsample):
# Retrieve a single instance:
example, label = sess.run([features, col5])
#creo un vettore con tre zeri che rappresenta le mie ipotetiche tre classi
temp=np.zeros(3)
#devo assegnare nella posizione corrispettiva del vettore la classe che rappresenta
#es. in posizione 2 ci sara' uno se l'esempio appartiene alla classe 2
temp[int(label)] = 1.0
if i==0:
training_data=np.vstack([example])
training_y=np.vstack([temp])
else:
training_data=np.vstack([training_data,example])
training_y=np.vstack([training_y,temp])
coord.request_stop()
coord.join(threads)
#print(len(training_data[:,1]))
#print(len(training_y[:,1]))
return training_data,training_y
def splitDataset(nsample, testPerc, path):
example,example_y= importDataset(path,nsample)
#convert training_data and training_y in a list
example_list = example.tolist()
example_y_list = example_y.tolist()
training_data = list()
training_y = list()
percent = int((nsample*testPerc)/100)
#begin uniform extraction from data.
for i in range(percent):
index = np.random.randint(0,len(example_list))
training_data.append(example_list[index])
training_y.append(example_y_list[index])
example_list.remove(example_list[index])
example_y_list.remove(example_y_list[index])
training_data = np.matrix(training_data)
training_y = np.matrix(training_y)
test_data = np.matrix(example_list)
test_y = np.matrix(example_y_list)
#print(len(training_data[:,1]))
#print(len(training_y[:,1]))
#print(len(test_data[:,1]))
#print(len(test_y[:,1]))
# print(training_y[0])
# print(type(training_data))
return training_data,training_y,test_data,test_y
#---------------------------------------------INIZIO DEFINIZIONE MODELLO--------------------------------------
x = tf.placeholder(tf.float32, [None,4])
y = tf.placeholder(tf.float32, [None,3])
hidden_Layer1 ={'weights':tf.Variable(tf.truncated_normal([4,10],stddev=0.1)), 'biases':tf.Variable(tf.constant(0.1,shape=[10]))}
hidden_Layer2 ={'weights':tf.Variable(tf.truncated_normal([10,10],stddev=0.1)), 'biases':tf.Variable(tf.constant(0.1,shape=[10]))}
hidden_Layer3 ={'weights':tf.Variable(tf.truncated_normal([10,20],stddev=0.1)), 'biases':tf.Variable(tf.constant(0.1,shape=[20]))}
output_Layer ={'weights':tf.Variable(tf.truncated_normal([20,3],stddev=0.1)), 'biases':tf.Variable(tf.constant(0.1,shape=[3]))}
#output layer #1
output_Layer1 = tf.add( tf.matmul(x,hidden_Layer1['weights']),hidden_Layer1['biases'])
output_Layer1 = tf.nn.sigmoid(output_Layer1)
#output layer #2
output_Layer2 = tf.add(tf.matmul(output_Layer1,hidden_Layer2['weights']),hidden_Layer2['biases'])
output_Layer2 = tf.nn.sigmoid(output_Layer2)
#output layer #3
output_Layer3 = tf.add(tf.matmul(output_Layer2,hidden_Layer3['weights']),hidden_Layer3['biases'])
output_Layer3 = tf.nn.sigmoid(output_Layer3)
#output layer #output
output_Layer_nosig = tf.add(tf.matmul(output_Layer3,output_Layer['weights']),output_Layer['biases'])
output_Layer_Output = tf.nn.sigmoid(output_Layer_nosig)
#--------------------------------------------FINE DEFINIZIONE MODELLO-------------------------------------------
#--------------------------------------------TRAINING DEL MODELLO-----------------------------------------------
error = tf.nn.l2_loss(output_Layer_Output-y, name="squared_error_cost")
#error=tf.nn.softmax_cross_entropy_with_logits(logits=output_Layer_nosig,labels=y)
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(error)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
training_data,training_label,test_data,test_label = splitDataset(100,90,"iris1.csv")
for _ in range(2000):
print(sess.run(error,feed_dict={x:training_data, y:training_label}))
correct_class = tf.equal(tf.argmax(output_Layer_Output,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_class, tf.float32))
print(sess.run([accuracy], feed_dict={x:test_data,y:test_label}))
很抱歉,我忘了在代码的这一部分插入train_步骤:
print(sess.run(error,feed_dict={x:training_data, y:training_label}))
现在,我试图设置超参数以获得良好的分类。谢谢你的回答