Python 用激活函数Selu训练CNN模型_Python_Tensorflow_Deep Learning_Conv Neural Network

Python 用激活函数Selu训练CNN模型

python tensorflow deep-learning

Python 用激活函数Selu训练CNN模型,python,tensorflow,deep-learning,conv-neural-network,Python,Tensorflow,Deep Learning,Conv Neural Network,我正在使用Tensorflow训练自己的模型。然而，当我将激活功能从Relu更改为Selu时，我遇到了一些麻烦事情就是这样。学习曲线意外下降，我不知道发生了什么我的学习曲线像这样据我所知，Selu可以防止过度拟合，所以我尝试在我的模型中实现它。当我想使用Selu时，有什么提示或条件吗这是我的代码：这是我更改激活功能的地方 ----- ----- 图表训练 -----------2007年9月18日------------ 我总能重现同样的结果这是我的代码，我用Jupyter写的

我正在使用Tensorflow训练自己的模型。然而，当我将激活功能从Relu更改为Selu时，我遇到了一些麻烦

事情就是这样。学习曲线意外下降，我不知道发生了什么

我的学习曲线

像这样

据我所知，Selu可以防止过度拟合，所以我尝试在我的模型中实现它。当我想使用Selu时，有什么提示或条件吗

这是我的代码：

这是我更改激活功能的地方 ----- ----- 图表训练 -----------2007年9月18日------------

我总能重现同样的结果

这是我的代码，我用Jupyter写的。但很抱歉，我无法上载培训数据：

您能否在多次运行中一致地重现此结果？如果是这样的话，我也会前往TensorFlow GitHub并打开一个问题。对我来说，代码似乎很好（特别是，因为它在一开始就训练得很好……一个问题可能是您使用的学习率和衰减值。我怀疑在某个点上存在数值不稳定性。是的，我总是在训练结束时得到相同的结果。学习率为1.7e-3（0.0017）衰变率是0.9。另外，我把整个代码放在我的问题里。

def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name):

conv_layer = tf.layers.conv2d(x_tensor, conv_num_outputs, kernel_size=conv_ksize, strides=conv_strides, activation=tf.nn.selu, name = layer_name)   
conv_layer = tf.layers.max_pooling2d(conv_layer, pool_size=pool_ksize, strides=pool_strides)

return conv_layer

tf.reset_default_graph()

#### placeholder ####
input_img = tf.placeholder(dtype=tf.float32, shape=(None, img_size, img_size, 3))
y_true = tf.placeholder(dtype=tf.float32, shape=(None, num_class))
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
lr_in = tf.placeholder(dtype = tf.float32, name  = 'learning_rate')

conv_ksize = (3,3)
conv_strides = (1,1)
pool_ksize = (2,2)
pool_strides = (2,2)
n_filters_1 = 32
n_filters_2 = 64
n_filters_3 = 128
n_filters_4 = 256
onebyone_ksize = (1,1)

#CNN
conv_1 = conv2d_maxpool(input_img, n_filters_1, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv1")

# conv_1 = tf.layers.conv2d(conv_1, conv_num_outputs, kernel_size=conv_ksize, strides=conv_strides, activation=tf.nn.relu)  

# conv_1_norm = tf.layers.batch_normalization(conv_1, name = "batch_norm1")
# conv_1_dropout = tf.layers.dropout(conv_1_norm, rate = keep_prob, training = True, name = "dropout1")

conv_2 = conv2d_maxpool(conv_1, n_filters_2, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv2")
# conv_2_norm = tf.layers.batch_normalization(conv_2)

conv_3 = conv2d_maxpool(conv_2, n_filters_3, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv3")
# conv_3_norm = tf.layers.batch_normalization(conv_3, name = "batch_norm3")
# conv_3_dropout = tf.layers.dropout(conv_3_norm, rate = keep_prob, training = True, name = "dropout3")

conv_4 = conv2d_maxpool(conv_3, n_filters_4, conv_ksize, conv_strides, pool_ksize, pool_strides, layer_name = "conv4")


flatten = tf.layers.flatten(conv_4)

fc1 = tf.layers.dense(flatten, 256, activation = tf.nn.relu)

out = tf.layers.dense(fc1, 6, activation=None, name= "logits") #logit

predict = tf.nn.softmax(out)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = out, labels = y_true))
optimizer = tf.train.AdamOptimizer(lr).minimize(cost)

##accuracy
correct_pred = tf.equal(tf.argmax(out, 1), tf.argmax(y_true, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')

#history/record
train_loss, train_acc = [], []
valid_loss, valid_acc = [], []
update_per_epoch = int(np.floor(X_train.shape[0] / batch_size))

## early stopping and learning rate congig
es_patience = 10
es_n = 0

lr_patience = 3
lr_n = 0

save_model_path = './save'

saver = tf.train.Saver()
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)


# Initializing the variables
batch_gen = img_gen.flow(generator_input(X_train), y_train, batch_size = 32)
val_batch_gen = img_gen.flow(generator_input(X_valid), y_valid, batch_size = len(X_valid))

for i in range(epoch):

    epoch_loss = 0
    epoch_acc  = 0

    for j in range(update_per_epoch):                    


        image, label = next(batch_gen)

        _, this_loss, this_acc = sess.run([optimizer, cost, accuracy], feed_dict={
            input_img : image,
            y_true : label,
            lr_in: lr,
            keep_prob : keep_probability
        })

        epoch_loss += this_loss
        epoch_acc += this_acc

    ## end of epoch

    epoch_loss /= update_per_epoch
    epoch_acc /= update_per_epoch

    train_loss.append(epoch_loss)
    train_acc.append(epoch_acc)

    print('Epoch {:>2}   Loss: {:>4.4f} Training Accuracy: {:.6f}'.format(i + 1, epoch_loss, epoch_acc))



    valid_image, valid_label = next(val_batch_gen)

    valid_this_loss, valid_this_acc = sess.run([cost, accuracy], feed_dict = {
        input_img: valid_image,
        y_true: valid_label,
        lr_in: lr,
        keep_prob: 1.
    })

    valid_loss.append(valid_this_loss)
    valid_acc.append(valid_this_acc)

    print('Epoch {:>2}   Loss: {:>4.4f} Validation Accuracy: {:.6f}'.format(i + 1,valid_this_loss, valid_this_acc))



    # early stop

    if valid_this_loss > np.min(valid_loss):
        es_n += 1
        lr_n += 1
    else:
        es_n = 0
        lr_n = 0
        saver.save(sess, os.path.join(os.getcwd(), 'bestsession.ckpt'))

    # early stop
    if es_n >= es_patience:
        print("-----------early stopping-------------")
        break

    # adaptive learning rate

    if lr_n >= lr_patience:
        lr *= lr_decay_rate
        lr_n = 0
        print("-----------adjust learning rate------------")







# Save Model
save_path = saver.save(sess, save_model_path)
print('-----model save ------')