Tensorflow 神经网络的误差不收敛
我用julia用TensorFlow创建了一个神经网络 我的网络运行,但错误不收敛,以下是TensorBoard结果: 为了检查我的错误函数,我使用了的教程,并用我的函数替换了“准确性” 它的工作原理是: 接下来,我认为我的网络出现了问题 你能帮我吗 这是我的密码:Tensorflow 神经网络的误差不收敛,tensorflow,neural-network,julia,Tensorflow,Neural Network,Julia,我用julia用TensorFlow创建了一个神经网络 我的网络运行,但错误不收敛,以下是TensorBoard结果: 为了检查我的错误函数,我使用了的教程,并用我的函数替换了“准确性” 它的工作原理是: 接下来,我认为我的网络出现了问题 你能帮我吗 这是我的密码: ENV["CUDA_VISIBLE_DEVICES"] = "0" # It is to use the gpu using TensorFlow using Distributions sess = Session(Grap
ENV["CUDA_VISIBLE_DEVICES"] = "0" # It is to use the gpu
using TensorFlow
using Distributions
sess = Session(Graph())
batch_size = 30
num_pixels = 64
###########
# Data base: 1000 arrays, the first array is fill with 1, the second with 2 etc...
arrays_data = zeros(Float32,1000,num_pixels,num_pixels)
arrays_labels = zeros(Float32,1000)
for k in 1:num_pixels, j in 1:num_pixels, i in 1:1000
arrays_data[i,j,k] = i
end
for i in 1:1000
arrays_labels[i] = i
end
###########
# inputs
x = placeholder(Float32, shape= [batch_size, 1, 1, 1])
y = placeholder(Float32)
###########
# Function to create a batch
function create_batch(batch_size)
x = zeros(Float32, batch_size,num_pixels, num_pixels)
y = zeros(Float32, batch_size)
index = shuffle(1:1000) # To choose a random batch
for i in 1:batch_size
x[i, : ,:] = arrays_data[index[i],:,:]
y[i] = arrays_labels[index[i]]
end
y, x
end
###########
# Summary to use TensorBoard
summary = TensorFlow.summary
# Create the different layers ; poids = weight
variable_scope("mymodel" * randstring(), initializer=Normal(0, .001)) do
global poids_1 = get_variable("p1", [2,2,2,1], Float32)
global poids_2 = get_variable("p2",[4,4,3,2],Float32)
global poids_3 = get_variable("p3",[2,2,4,3],Float32)
global poids_4 = get_variable("p4",[1,4,4,4],Float32)
global poids_5 = get_variable("p5",[1,4,4,4],Float32)
global poids_6 = get_variable("p6",[1,4,4,4],Float32)
global biases_1 = get_variable("b1",[2],Float32)
global biases_2 = get_variable("b2",[3],Float32)
global biases_3 = get_variable("b3",[4],Float32)
global biases_4 = get_variable("b4",[4],Float32)
global biases_5 = get_variable("b5",[4],Float32)
global biases_6 = get_variable("b6",[4],Float32)
end
logits_1 = nn.relu(nn.conv2d_transpose(x, poids_1, [batch_size,2,2,2], [1,2,2,1],padding = "SAME") + biases_1)
logits_2 = nn.relu(nn.conv2d_transpose(logits_1,poids_2, [batch_size,4,4,3], [1,2,2,1],padding = "SAME") + biases_2)
logits_3 = nn.relu(nn.conv2d_transpose(logits_2,poids_3, [batch_size,8,8,4], [1,2,2,1],padding = "SAME") + biases_3)
logits_4 = nn.relu(nn.conv2d_transpose(logits_3,poids_4, [batch_size,16,16,4], [1,2,2,1],padding = "SAME") + biases_4)
logits_5 = nn.relu(nn.conv2d_transpose(logits_4,poids_5, [batch_size,32,32,4], [1,2,2,1],padding = "SAME") + biases_5)
logits_6 = nn.relu(nn.conv2d_transpose(logits_5,poids_6, [batch_size,64,64,4], [1,2,2,1],padding = "SAME") + biases_6)
logits_6 = reduce_sum(logits_6, axis=[4])
logits = reshape(logits_6, [batch_size,num_pixels*num_pixels]) # Output of network
smax = nn.softmax(logits)
cross_entropy = reduce_mean(-reduce_sum(y.*log(smax))) # loss function
optimizer = train.AdamOptimizer(0.0001)
train_op = train.minimize(optimizer,cross_entropy)
error = (1/(num_pixels*num_pixels*batch_size)).*sqrt(sum((smax - y)^2))
summary.histogram("Error",error)
merged = summary.merge_all()
run(sess, global_variables_initializer())
# summary_writer = summary.FileWriter("Folder Path") # If you want use TensorBoard
# Train loop
for i in 1:500
batch = create_batch(batch_size)
x_ = run(sess, train_op, Dict(x => reshape(batch[1], (batch_size,1,1,1)), y => reshape(batch[2], (batch_size,64*64))))
if i%100 == 1
err = run(sess, error, Dict(x => reshape(batch[1], (batch_size,1,1,1)), y => reshape(batch[2], (batch_size,64*64))))
info("train $i , error = $err")
end
# If you use TensorBoard, please use the following commands
# new = run(sess,merged, Dict(x => reshape(batch[1], (batch_size,1,1,1)), y => reshape(batch[2], (batch_size,64*64))))
# write(summary_writer, new, i)
end
close(sess)
编辑
以下代码正在运行:
using TensorFlow
using Distributions
sess = Session(Graph())
batch_size = 30
num_pixels = 256
###########
# Data base: 10000 arrays, the first array is fill with 1, the second with 2 etc...
arrays_data = zeros(Float32,10000,num_pixels,num_pixels)
arrays_labels = zeros(Float32,10000)
for k in 1:num_pixels, j in 1:num_pixels, i in 1:10000
arrays_data[i,j,k] = i
end
for i in 1:10000
arrays_labels[i] = i
end
###########
# inputs
x = placeholder(Float32, shape= [batch_size, 1, 1, 1])
y = placeholder(Float32)
###########
# Function to create a batch
function create_batch(batch_size)
x = zeros(Float32, batch_size,num_pixels, num_pixels)
y = zeros(Float32, batch_size)
index = shuffle(1:10000) # To choose a random batch
for i in 1:batch_size
x[i, : ,:] = arrays_data[index[i],:,:]
y[i] = arrays_labels[index[i]]
end
y, x
end
###########
# Summary to use TensorBoard
summary = TensorFlow.summary
# Create the different layers ; poids = weight
variable_scope("mymodel" * randstring(), initializer=Normal(0, .001)) do
global poids_1 = get_variable("p1", [3,3,2,1], Float32)
global poids_2 = get_variable("p2",[3,3,3,2],Float32)
global poids_3 = get_variable("p3",[3,3,4,3],Float32)
global poids_4 = get_variable("p4",[3,3,4,4],Float32)
global poids_5 = get_variable("p5",[3,3,4,4],Float32)
global poids_6 = get_variable("p6",[3,3,4,4],Float32)
global poids_7 = get_variable("p7",[3,3,8,4],Float32)
global poids_8 = get_variable("p8",[3,3,8,8],Float32)
global biases_1 = get_variable("b1",[2],Float32)
global biases_2 = get_variable("b2",[3],Float32)
global biases_3 = get_variable("b3",[4],Float32)
global biases_4 = get_variable("b4",[4],Float32)
global biases_5 = get_variable("b5",[4],Float32)
global biases_6 = get_variable("b6",[4],Float32)
global biases_7 = get_variable("b7",[8],Float32)
global biases_8 = get_variable("b8",[8],Float32)
end
logits_1 = nn.relu(nn.conv2d_transpose(x, poids_1, [batch_size,2,2,2], [1,2,2,1],padding = "SAME") + biases_1)
logits_2 = nn.relu(nn.conv2d_transpose(logits_1,poids_2, [batch_size,4,4,3], [1,2,2,1],padding = "SAME") + biases_2)
logits_3 = nn.relu(nn.conv2d_transpose(logits_2,poids_3, [batch_size,8,8,4], [1,2,2,1],padding = "SAME") + biases_3)
logits_4 = nn.relu(nn.conv2d_transpose(logits_3,poids_4, [batch_size,16,16,4], [1,2,2,1],padding = "SAME") + biases_4)
logits_5 = nn.relu(nn.conv2d_transpose(logits_4,poids_5, [batch_size,32,32,4], [1,2,2,1],padding = "SAME") + biases_5)
logits_6 = nn.relu(nn.conv2d_transpose(logits_5,poids_6, [batch_size,64,64,4], [1,2,2,1],padding = "SAME") + biases_6)
logits_7 = nn.relu(nn.conv2d_transpose(logits_6,poids_7, [batch_size,128,128,8], [1,2,2,1],padding = "SAME") + biases_7)
logits_8 = nn.relu(nn.conv2d_transpose(logits_7,poids_8, [batch_size,256,256,8], [1,2,2,1],padding = "SAME") + biases_8)
logits_8 = reduce_sum(logits_8, axis=[4])
logits = reshape(logits_8, [batch_size,num_pixels*num_pixels]) # Output of network
# Don't use a softmax here...
least_square = reduce_mean(sqrt(sum((y - logits).^2))) # Loss function
optimizer = train.AdamOptimizer(0.0001)
train_op = train.minimize(optimizer,least_square)
error = sqrt(sum((y - logits).^2)./(num_pixels.*num_pixels.*batch_size))
summary.histogram("Error",error)
merged = summary.merge_all()
run(sess, global_variables_initializer())
# summary_writer = summary.FileWriter("Folder Path") # If you want use TensorBoard
# Train loop
for i in 1:1500
batch = create_batch(batch_size)
x_ = run(sess, train_op, Dict(x => reshape(batch[1], (batch_size,1,1,1)), y => reshape(batch[2], (batch_size,num_pixels*num_pixels))))
if i%100 == 1
err = run(sess, error, Dict(x => reshape(batch[1], (batch_size,1,1,1)), y => reshape(batch[2], (batch_size,num_pixels*num_pixels))))
info("train $i , error = $err")
end
# If you use TensorBoard, please use the following commands
# newer = run(sess,merged, Dict(x => reshape(batch[1], (batch_size,1,1,1)), y => reshape(batch[2], (batch_size,num_pixels*num_pixels))))
# write(summary_writer, newer, i)
end
close(sess)
error
是否需要定义为函数
比如:
错误(smax,y)=(1/(num_像素*num_像素*批量大小))*sqrt(总和((smax-y)^2))
我终于找到了这个问题的解决方案
三个要点:
- Malmaud的教程将softmax应用于网络的输出,因为有不同的可能结果,必须选择最佳结果(概率较高)。在这种情况下,输出是一张图片,我们不必应用softmax;只需将输出与输入进行比较
- 对于损失函数,不需要交叉熵,选择最小二乘法
- 只有64像素的数据不够,所以256像素的数据库更好
我在问题中添加了我的新代码 谢谢你的建议,但这并不能解决问题。这里有完全相同的错误。如果它能很好地训练内置的错误函数,并且这是唯一的补充,那么我将从这里开始。变量y和smax是什么?我习惯于从网络中得到一个y_pred和一个y_true来比较。在你的例子中,y是y_true,smax是y_pred(它是smax,因为它是softmax函数的输出…),可能会将你的脚本更改为与Malmauds相同(它有点不同-例如,没有@tf begin/end,等等)然后一步一步地调整它,直到你看到问题。什么是“@tf begin/end”?我只试了一层,但没用。。。