keras损失为nan,但准确度定义良好

keras损失为nan,但准确度定义良好,keras,Keras,我正在培训unet使用python中的Keras和tensorflow后端。经过一两个培训步骤(批量为1)后,我的损失变为nan。我检查了数据,并确认我的培训数据中没有nan值。我还定义了一个clipnorm,以防止渐变爆炸。这没有效果。有人知道这种损失可能来自哪里吗 我使用以下代码 import keras import os import random import numpy as np path = 'db/clouds_total/new/' epochs = 280 cl

我正在培训unet使用python中的Keras和tensorflow后端。经过一两个培训步骤(批量为1)后,我的损失变为nan。我检查了数据,并确认我的培训数据中没有nan值。我还定义了一个clipnorm,以防止渐变爆炸。这没有效果。有人知道这种损失可能来自哪里吗

我使用以下代码

import keras
import os
import random
import numpy as np





path = 'db/clouds_total/new/'

epochs = 280
classes = 2
files_labels = os.listdir(path +  'accepted_np' )
files_raws =  os.listdir(path + 'raw_np' )


def get_one_hot(targets, nb_classes):
   res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
   return res.reshape(list(targets.shape)+[nb_classes])


def generator():
   while(True):
     files_labels = os.listdir(path +  'accepted_np' )
     files_raws =  os.listdir(path + 'raw_np' )

     samp = np.random.choice( np.arange(len(files_labels)) , replace = False, size = len(files_labels) )

     for i in samp: 
        label = np.load( path + 'accepted_np/' + files_labels[i])
        r = np.load(path + 'raw_np/' + files_raws[i])
        yield( [r, label])










#built network

input_im =keras.engine.Input( shape = [512,512,14], dtype = 'float32' )

l0 = keras.layers.convolutional.Conv2D( filters=64, kernel_size= (3,3),padding="same",     activation = 'relu' )(input_im)
l0 = keras.layers.convolutional.Conv2D( filters=64, kernel_size= (3,3),padding="same",     activation = 'relu' )(l0)

l1 = keras.layers.AvgPool2D(pool_size = (2,2))(l0)
l1 = keras.layers.convolutional.Conv2D( filters=128, kernel_size= (3,3),padding="same",     activation = 'relu' )(l1)
l1 = keras.layers.convolutional.Conv2D( filters=128, kernel_size= (3,3),padding="same",     activation = 'relu' )(l1)

l2 = keras.layers.AvgPool2D(pool_size = (2,2))(l1)
l2 = keras.layers.convolutional.Conv2D( filters=256, kernel_size= (3,3),padding="same",     activation = 'relu' )(l2)
l2 = keras.layers.convolutional.Conv2D( filters=256, kernel_size= (3,3),padding="same",     activation = 'relu' )(l2)

l3 = keras.layers.AvgPool2D(pool_size = (2,2))(l2)
l3 = keras.layers.convolutional.Conv2D( filters=512, kernel_size= (3,3),padding="same",     activation = 'relu' )(l3)
l3 = keras.layers.convolutional.Conv2D( filters=512, kernel_size= (3,3),padding="same",     activation = 'relu' )(l3)

l4 = keras.layers.AvgPool2D(pool_size = (2,2))(l3)
l4 = keras.layers.convolutional.Conv2D( filters=1024, kernel_size= (3,3),padding="same",     activation = 'relu' )(l4)
l4 = keras.layers.convolutional.Conv2D( filters=1024, kernel_size= (3,3),padding="same",     activation = 'relu' )(l4)


l3_up = keras.layers.convolutional.Conv2DTranspose(filters = 512 , kernel_size=(3,3) ,strides = (2, 2), padding="same")(l4)
l3_up = keras.layers.concatenate([l3,l3_up])
l3_up = keras.layers.convolutional.Conv2D( filters=512, kernel_size= (3,3),padding="same",     activation = 'relu' )(l3_up)
l3_up = keras.layers.convolutional.Conv2D( filters=512, kernel_size= (3,3),padding="same",     activation = 'relu' )(l3_up)

l2_up = keras.layers.convolutional.Conv2DTranspose(filters = 256 , kernel_size=(3,3) ,strides = (2, 2), padding="same")(l3_up)
l2_up = keras.layers.concatenate([l2,l2_up])
l2_up = keras.layers.convolutional.Conv2D( filters=256, kernel_size= (3,3),padding="same",     activation = 'relu' )(l2_up)
l2_up = keras.layers.convolutional.Conv2D( filters=256, kernel_size= (3,3),padding="same",     activation = 'relu' )(l2_up)

l1_up = keras.layers.convolutional.Conv2DTranspose(filters = 128 , kernel_size=(3,3) ,strides = (2, 2), padding="same")(l2_up)
l1_up = keras.layers.concatenate([l1,l1_up])
l1_up = keras.layers.convolutional.Conv2D( filters=128, kernel_size= (3,3),padding="same",     activation = 'relu' )(l1_up)
l1_up = keras.layers.convolutional.Conv2D( filters=128, kernel_size= (3,3),padding="same",     activation = 'relu' )(l1_up)

l0_up = keras.layers.convolutional.Conv2DTranspose(filters = 64 , kernel_size=(3,3) ,strides = (2, 2), padding="same")(l1_up)
l0_up = keras.layers.concatenate([l0,l0_up])
l0_up = keras.layers.convolutional.Conv2D( filters=64, kernel_size= (3,3),padding="same",     activation = 'relu' )(l0_up)
l0_up = keras.layers.convolutional.Conv2D( filters=64, kernel_size= (3,3),padding="same",     activation = 'relu' )(l0_up)

output = keras.layers.convolutional.Conv2D( filters=classes, kernel_size= (3,3),padding="same",     activation = 'relu' )(l0_up)

model = keras.models.Model(inputs = input_im, outputs = output)

opt = keras.optimizers.adam( lr= 0.0001 , decay = 0,  clipnorm = 0.5 )
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics = ["accuracy"])



#train 
for epoch in range(epochs):
    print(epoch)
    model.fit_generator(generator = generator(), steps_per_epoch = len(files_labels), epochs = 1 )
    if epoch % 20 == 0:
       name = path + 'model/model_' + str(epoch)
       model.save(name)

最后我投了一个乙状结肠而不是雷卢。这似乎有帮助。我不太清楚为什么,因为我认为clipnorm会处理爆炸的梯度。当输入值变大时,交叉熵似乎变为Nan?

我在末尾加入了一个sigmoid,而不是ReLu。这似乎有帮助。我不太清楚为什么,因为我认为clipnorm会处理爆炸的梯度。当输入值变大时,交叉熵似乎得到Nan?

我认为
Nan
值是由于交叉熵函数内的
0.0*np.log(0.0)
计算而产生的

当ReLU得到的值小于零时,它将发出0.0。 当交叉熵计算
-p*log(p)
时, 这将导致值
nan


Sigmoid确保输出概率保持在0和1之间。

我认为
nan
值是由于交叉熵函数内的
0.0*np.log(0.0)
计算而产生的

当ReLU得到的值小于零时,它将发出0.0。 当交叉熵计算
-p*log(p)
时, 这将导致值
nan

Sigmoid确保输出概率保持在0和1之间