Tensorflow 神经网络的误差不收敛

Tensorflow 神经网络的误差不收敛,tensorflow,neural-network,julia,Tensorflow,Neural Network,Julia,我用julia用TensorFlow创建了一个神经网络 我的网络运行,但错误不收敛,以下是TensorBoard结果: 为了检查我的错误函数,我使用了的教程,并用我的函数替换了“准确性” 它的工作原理是: 接下来,我认为我的网络出现了问题 你能帮我吗 这是我的密码: ENV["CUDA_VISIBLE_DEVICES"] = "0" # It is to use the gpu using TensorFlow using Distributions sess = Session(Grap

我用julia用TensorFlow创建了一个神经网络

我的网络运行,但错误不收敛,以下是TensorBoard结果:

为了检查我的错误函数,我使用了的教程,并用我的函数替换了“准确性”

它的工作原理是:

接下来,我认为我的网络出现了问题

你能帮我吗

这是我的密码:

ENV["CUDA_VISIBLE_DEVICES"] = "0" # It is to use the gpu
using TensorFlow
using Distributions

sess = Session(Graph())

batch_size = 30  
num_pixels = 64

###########

# Data base: 1000 arrays, the first array is fill with 1, the second with 2 etc...

arrays_data = zeros(Float32,1000,num_pixels,num_pixels)

arrays_labels = zeros(Float32,1000)

for k in 1:num_pixels, j in 1:num_pixels, i in 1:1000
        arrays_data[i,j,k] = i
end

for i in 1:1000
    arrays_labels[i] = i
end

###########

# inputs

x = placeholder(Float32, shape= [batch_size, 1, 1, 1])

y = placeholder(Float32)

###########

 # Function to create a batch

function create_batch(batch_size)
    x = zeros(Float32, batch_size,num_pixels, num_pixels)
    y = zeros(Float32, batch_size)

index = shuffle(1:1000) # To choose a random batch

    for i in 1:batch_size
        x[i, : ,:] = arrays_data[index[i],:,:]

        y[i] = arrays_labels[index[i]]
    end
    y, x
end


###########


# Summary to use TensorBoard

 summary = TensorFlow.summary

# Create the different layers ; poids = weight

variable_scope("mymodel" * randstring(), initializer=Normal(0, .001)) do
    global poids_1 = get_variable("p1", [2,2,2,1], Float32)
    global poids_2 = get_variable("p2",[4,4,3,2],Float32)
    global poids_3 = get_variable("p3",[2,2,4,3],Float32)
    global poids_4 = get_variable("p4",[1,4,4,4],Float32)
    global poids_5 = get_variable("p5",[1,4,4,4],Float32)
    global poids_6 = get_variable("p6",[1,4,4,4],Float32)
    global biases_1 = get_variable("b1",[2],Float32)
    global biases_2 = get_variable("b2",[3],Float32)
    global biases_3 = get_variable("b3",[4],Float32)
    global biases_4 = get_variable("b4",[4],Float32)
    global biases_5 = get_variable("b5",[4],Float32)
    global biases_6 = get_variable("b6",[4],Float32)
end

logits_1 = nn.relu(nn.conv2d_transpose(x, poids_1, [batch_size,2,2,2], [1,2,2,1],padding = "SAME") + biases_1)

logits_2 = nn.relu(nn.conv2d_transpose(logits_1,poids_2, [batch_size,4,4,3], [1,2,2,1],padding = "SAME") + biases_2)

logits_3 = nn.relu(nn.conv2d_transpose(logits_2,poids_3, [batch_size,8,8,4], [1,2,2,1],padding = "SAME") + biases_3)

logits_4 = nn.relu(nn.conv2d_transpose(logits_3,poids_4, [batch_size,16,16,4], [1,2,2,1],padding = "SAME") + biases_4)

logits_5 = nn.relu(nn.conv2d_transpose(logits_4,poids_5, [batch_size,32,32,4], [1,2,2,1],padding = "SAME") + biases_5)

 logits_6 = nn.relu(nn.conv2d_transpose(logits_5,poids_6, [batch_size,64,64,4], [1,2,2,1],padding = "SAME") + biases_6)

logits_6 = reduce_sum(logits_6, axis=[4])



logits = reshape(logits_6, [batch_size,num_pixels*num_pixels])  # Output of network




smax = nn.softmax(logits)



cross_entropy = reduce_mean(-reduce_sum(y.*log(smax))) # loss function

optimizer = train.AdamOptimizer(0.0001)

train_op = train.minimize(optimizer,cross_entropy)

error = (1/(num_pixels*num_pixels*batch_size)).*sqrt(sum((smax - y)^2))

 summary.histogram("Error",error)

 merged = summary.merge_all()

run(sess, global_variables_initializer())

# summary_writer = summary.FileWriter("Folder Path") # If you want use TensorBoard

# Train loop

for i in 1:500

batch = create_batch(batch_size)


x_ = run(sess, train_op, Dict(x => reshape(batch[1], (batch_size,1,1,1)), y => reshape(batch[2], (batch_size,64*64))))


  if i%100 == 1

    err = run(sess, error, Dict(x => reshape(batch[1], (batch_size,1,1,1)), y => reshape(batch[2], (batch_size,64*64))))
       info("train $i , error = $err")
       end

# If you use TensorBoard, please use the following commands

      # new = run(sess,merged, Dict(x => reshape(batch[1], (batch_size,1,1,1)), y => reshape(batch[2], (batch_size,64*64))))

      # write(summary_writer, new, i)

end

close(sess)
编辑

以下代码正在运行:

using TensorFlow
using Distributions

sess = Session(Graph())

batch_size = 30
num_pixels = 256

###########

# Data base: 10000 arrays, the first array is fill with 1, the second with 2 etc...

arrays_data = zeros(Float32,10000,num_pixels,num_pixels)

arrays_labels = zeros(Float32,10000)

for k in 1:num_pixels, j in 1:num_pixels, i in 1:10000
            arrays_data[i,j,k] = i
end


for i in 1:10000
    arrays_labels[i] = i
end

###########

# inputs

x = placeholder(Float32, shape= [batch_size, 1, 1, 1])

y = placeholder(Float32)

###########

 # Function to create a batch

function create_batch(batch_size)
    x = zeros(Float32, batch_size,num_pixels, num_pixels)
    y = zeros(Float32, batch_size)

index = shuffle(1:10000) # To choose a random batch

    for i in 1:batch_size
        x[i, : ,:] = arrays_data[index[i],:,:]

        y[i] = arrays_labels[index[i]]
    end
    y, x
end


###########


# Summary to use TensorBoard

 summary = TensorFlow.summary

# Create the different layers ; poids = weight

variable_scope("mymodel" * randstring(), initializer=Normal(0, .001)) do
    global poids_1 = get_variable("p1", [3,3,2,1], Float32)
    global poids_2 = get_variable("p2",[3,3,3,2],Float32)
    global poids_3 = get_variable("p3",[3,3,4,3],Float32)
    global poids_4 = get_variable("p4",[3,3,4,4],Float32)
    global poids_5 = get_variable("p5",[3,3,4,4],Float32)
    global poids_6 = get_variable("p6",[3,3,4,4],Float32)
    global poids_7 = get_variable("p7",[3,3,8,4],Float32)
    global poids_8 = get_variable("p8",[3,3,8,8],Float32)
    global biases_1 = get_variable("b1",[2],Float32)
    global biases_2 = get_variable("b2",[3],Float32)
    global biases_3 = get_variable("b3",[4],Float32)
    global biases_4 = get_variable("b4",[4],Float32)
    global biases_5 = get_variable("b5",[4],Float32)
    global biases_6 = get_variable("b6",[4],Float32)
    global biases_7 = get_variable("b7",[8],Float32)
    global biases_8 = get_variable("b8",[8],Float32)
end

logits_1 = nn.relu(nn.conv2d_transpose(x, poids_1, [batch_size,2,2,2], [1,2,2,1],padding = "SAME") + biases_1)

logits_2 = nn.relu(nn.conv2d_transpose(logits_1,poids_2, [batch_size,4,4,3], [1,2,2,1],padding = "SAME") + biases_2)

logits_3 = nn.relu(nn.conv2d_transpose(logits_2,poids_3, [batch_size,8,8,4], [1,2,2,1],padding = "SAME") + biases_3)

logits_4 = nn.relu(nn.conv2d_transpose(logits_3,poids_4, [batch_size,16,16,4], [1,2,2,1],padding = "SAME") + biases_4)

logits_5 = nn.relu(nn.conv2d_transpose(logits_4,poids_5, [batch_size,32,32,4], [1,2,2,1],padding = "SAME") + biases_5)

 logits_6 = nn.relu(nn.conv2d_transpose(logits_5,poids_6, [batch_size,64,64,4], [1,2,2,1],padding = "SAME") + biases_6)

logits_7 = nn.relu(nn.conv2d_transpose(logits_6,poids_7, [batch_size,128,128,8], [1,2,2,1],padding = "SAME") + biases_7)

logits_8 = nn.relu(nn.conv2d_transpose(logits_7,poids_8, [batch_size,256,256,8], [1,2,2,1],padding = "SAME") + biases_8)

logits_8 = reduce_sum(logits_8, axis=[4])


logits = reshape(logits_8, [batch_size,num_pixels*num_pixels])  # Output of network


# Don't use a softmax here...


least_square = reduce_mean(sqrt(sum((y - logits).^2))) # Loss function

optimizer = train.AdamOptimizer(0.0001)

train_op = train.minimize(optimizer,least_square)


error = sqrt(sum((y - logits).^2)./(num_pixels.*num_pixels.*batch_size))

 summary.histogram("Error",error)

 merged = summary.merge_all()

run(sess, global_variables_initializer())

# summary_writer = summary.FileWriter("Folder Path") # If you want use TensorBoard

# Train loop

for i in 1:1500

batch = create_batch(batch_size)


x_ = run(sess, train_op, Dict(x => reshape(batch[1], (batch_size,1,1,1)), y => reshape(batch[2], (batch_size,num_pixels*num_pixels))))


  if i%100 == 1

    err = run(sess, error, Dict(x => reshape(batch[1], (batch_size,1,1,1)), y => reshape(batch[2], (batch_size,num_pixels*num_pixels))))
       info("train $i , error = $err")
       end

# If you use TensorBoard, please use the following commands

       # newer = run(sess,merged, Dict(x => reshape(batch[1], (batch_size,1,1,1)), y => reshape(batch[2], (batch_size,num_pixels*num_pixels))))

       # write(summary_writer, newer, i)

end

close(sess)

error
是否需要定义为函数

比如:

错误(smax,y)=(1/(num_像素*num_像素*批量大小))*sqrt(总和((smax-y)^2))

我终于找到了这个问题的解决方案

三个要点:

  • Malmaud的教程将softmax应用于网络的输出,因为有不同的可能结果,必须选择最佳结果(概率较高)。在这种情况下,输出是一张图片,我们不必应用softmax;只需将输出与输入进行比较

  • 对于损失函数,不需要交叉熵,选择最小二乘法

  • 只有64像素的数据不够,所以256像素的数据库更好


我在问题中添加了我的新代码

谢谢你的建议,但这并不能解决问题。这里有完全相同的错误。如果它能很好地训练内置的错误函数,并且这是唯一的补充,那么我将从这里开始。变量y和smax是什么?我习惯于从网络中得到一个y_pred和一个y_true来比较。在你的例子中,y是y_true,smax是y_pred(它是smax,因为它是softmax函数的输出…),可能会将你的脚本更改为与Malmauds相同(它有点不同-例如,没有@tf begin/end,等等)然后一步一步地调整它,直到你看到问题。什么是“@tf begin/end”?我只试了一层,但没用。。。