Tensorflow 为什么列车失去RNN是奇怪的?
我是机器学习和tensorflow方面的新手。我还学习了一些相关的材料。我做了一些教程中教的项目,比如mnist,图像识别。现在我想在我自己的数据集上进行训练,这是一组15秒的音乐片段。我使用librosa.mfcc来提取它们的特征。我将mfcc阵列作为输入,使用RNN模型对其进行训练。但在训练中,输球的结果是很奇怪的Tensorflow 为什么列车失去RNN是奇怪的?,tensorflow,machine-learning,recurrent-neural-network,librosa,Tensorflow,Machine Learning,Recurrent Neural Network,Librosa,我是机器学习和tensorflow方面的新手。我还学习了一些相关的材料。我做了一些教程中教的项目,比如mnist,图像识别。现在我想在我自己的数据集上进行训练,这是一组15秒的音乐片段。我使用librosa.mfcc来提取它们的特征。我将mfcc阵列作为输入,使用RNN模型对其进行训练。但在训练中,输球的结果是很奇怪的 Step 0,train loss = 11.72 Step 50,train loss = 0.00 Step 100,train loss =0.72 Step 150,tr
Step 0,train loss = 11.72
Step 50,train loss = 0.00
Step 100,train loss =0.72
Step 150,train loss =0.08
Step 200,train loss =0.64
Step 250,train loss =0.64
Step 300,train loss =0.00
Step 350,train loss =0.62
Step 400,train loss =0.61
Step 450,train loss = 115.77
我不知道发生了什么事。这是输入数据预处理的问题吗?还是RNN模型不适合使用
形状为(1281293)的输入数据如下:
[[-2.40214356e+02 -2.54111029e+02 -2.81576989e+02 ... -3.26748334e+02
-3.13127357e+02 -3.10083835e+02]
[ 1.55226378e+02 1.88829858e+02 2.22116743e+02 ... 2.02720581e+02
1.88478421e+02 1.71466354e+02]
[-7.25124769e+01 -7.66927520e+01 -7.35990460e+01 ... -6.97141304e+01
-8.91782486e+01 -1.01798663e+02]
...
[-2.13188683e+00 -1.47389498e+00 4.32850268e-01 ... -8.17353566e-01
1.74879699e-01 1.55565475e+00]
[-1.18913985e+00 -1.75976975e+00 -5.36811511e-01 ... -1.70165869e+00
1.08840259e+00 3.49373224e+00]
[-1.80539142e-01 -4.37886115e-01 -5.02952858e-01 ... -1.91972103e+00
-1.48080339e-01 9.51365549e-01]]
def inference(input_mfcc, train):
with tf.device('/gpu:0'):
with tf.variable_scope('conv1'):
# 128*1293 conv1 29*294*32 ===> 100*1000*32
# 100*1000*32 pool1 4*4 s4====>25*250*32
conv1 = tf.layers.conv2d(inputs=input_mfcc,
filters=32,
kernel_size=[29,294],
padding='valid',
activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1,pool_size=[4,4],strides=4)
print("conv1:",conv1.get_shape().as_list())
print("pool1:",pool1.get_shape().as_list())
with tf.variable_scope('conv2'):
# 25*250 conv2 6*51*64 ===> 20*200*64
# 20*200*64 pool1 4*4 s4====> 5*50*64
conv2 = tf.layers.conv2d(inputs=pool1,
filters=64,
kernel_size=[6,51],
padding='valid',
activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2,pool_size=[4,4],strides=4)
print("conv2:",conv2.get_shape().as_list())
print("pool2:",pool2.get_shape().as_list())
with tf.variable_scope('conv3'):
#5*5*64
conv3 = tf.layers.conv2d(inputs=pool2,
filters=64,
kernel_size=[1,46],
padding='valid',
activation=tf.nn.relu)
print("conv3",conv3.get_shape().as_list())
with tf.variable_scope('fc1'):
pool2_flat = tf.reshape(pool2,[1,-1])
print("pool2_flat",pool2_flat.get_shape().as_list())
fc1 = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
dropout1 = tf.layers.dropout(inputs=fc1, rate=0.4, training=train)
print("dropout1",dropout1.get_shape().as_list())
with tf.variable_scope('logits'):
logits = tf.layers.dense(inputs=dropout1, units=2)
#predit = tf.nn.softmax(logits=logits)
print("logits",logits.get_shape().as_list())
#print("predit",predit.get_shape().as_list())
return logits
我的RNN模型是这样的:
[[-2.40214356e+02 -2.54111029e+02 -2.81576989e+02 ... -3.26748334e+02
-3.13127357e+02 -3.10083835e+02]
[ 1.55226378e+02 1.88829858e+02 2.22116743e+02 ... 2.02720581e+02
1.88478421e+02 1.71466354e+02]
[-7.25124769e+01 -7.66927520e+01 -7.35990460e+01 ... -6.97141304e+01
-8.91782486e+01 -1.01798663e+02]
...
[-2.13188683e+00 -1.47389498e+00 4.32850268e-01 ... -8.17353566e-01
1.74879699e-01 1.55565475e+00]
[-1.18913985e+00 -1.75976975e+00 -5.36811511e-01 ... -1.70165869e+00
1.08840259e+00 3.49373224e+00]
[-1.80539142e-01 -4.37886115e-01 -5.02952858e-01 ... -1.91972103e+00
-1.48080339e-01 9.51365549e-01]]
def inference(input_mfcc, train):
with tf.device('/gpu:0'):
with tf.variable_scope('conv1'):
# 128*1293 conv1 29*294*32 ===> 100*1000*32
# 100*1000*32 pool1 4*4 s4====>25*250*32
conv1 = tf.layers.conv2d(inputs=input_mfcc,
filters=32,
kernel_size=[29,294],
padding='valid',
activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(inputs=conv1,pool_size=[4,4],strides=4)
print("conv1:",conv1.get_shape().as_list())
print("pool1:",pool1.get_shape().as_list())
with tf.variable_scope('conv2'):
# 25*250 conv2 6*51*64 ===> 20*200*64
# 20*200*64 pool1 4*4 s4====> 5*50*64
conv2 = tf.layers.conv2d(inputs=pool1,
filters=64,
kernel_size=[6,51],
padding='valid',
activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2,pool_size=[4,4],strides=4)
print("conv2:",conv2.get_shape().as_list())
print("pool2:",pool2.get_shape().as_list())
with tf.variable_scope('conv3'):
#5*5*64
conv3 = tf.layers.conv2d(inputs=pool2,
filters=64,
kernel_size=[1,46],
padding='valid',
activation=tf.nn.relu)
print("conv3",conv3.get_shape().as_list())
with tf.variable_scope('fc1'):
pool2_flat = tf.reshape(pool2,[1,-1])
print("pool2_flat",pool2_flat.get_shape().as_list())
fc1 = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
dropout1 = tf.layers.dropout(inputs=fc1, rate=0.4, training=train)
print("dropout1",dropout1.get_shape().as_list())
with tf.variable_scope('logits'):
logits = tf.layers.dense(inputs=dropout1, units=2)
#predit = tf.nn.softmax(logits=logits)
print("logits",logits.get_shape().as_list())
#print("predit",predit.get_shape().as_list())
return logits
其余代码为:
def losses(logits,labels):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=labels,logits=logits,name='cross_entropy')
cross_entropy_loss = tf.reduce_mean(cross_entropy)
return cross_entropy_loss
def training(loss,learning_rate):
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
global_step = tf.Variable(0, name="global_step", trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
ckpt="./model/music/model.ckpt"
N_CLASSES = 2
MFCC_ROW = 128
MFCC_COL = 1293
INPUT_NODE = MFCC_ROW * MFCC_COL
BATCH_SIZE = 1
CAPACITY = 10
MAX_STEP = 500
learning_rate = 0.0001
def run_train():
train_dir = ""
logs_train_dir = ""
mfcc, label= read_TFRecord()
train_batch,train_labels_batch = tf.train.shuffle_batch([mfcc,label],batch_size=BATCH_SIZE,num_threads=1,capacity=CAPACITY,min_after_dequeue=5)
print("train_batch",train_batch.get_shape().as_list())
print("labels_batch",train_labels_batch.get_shape().as_list())
train_logits = inference(train_batch,True)
print("train_logits",train_logits.get_shape().as_list())
train_loss = losses(train_logits, train_labels_batch)
train_op = training(train_loss,learning_rate)
#train_acc = evaluation(train_logits,train_labels_batch)
with tf.Session() as sess:
saver = tf.train.Saver()
init_op = tf.group(tf.local_variables_initializer(),
tf.global_variables_initializer())
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
try:
for step in range(MAX_STEP):
if coord.should_stop():
break;
_,tra_loss = sess.run([train_op,train_loss])
# print some
if step%50==0:
print('Step %d,train loss = %.4f'%(step,tra_loss))
# 100 save
if step % 100 ==0 or (step +1) == MAX_STEP:
saver.save(sess,ckpt,global_step = step)
except tf.errors.OutOfRangeError:
print('Done training epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
主要内容:
运行列车()
有谁能给我一些建议吗?非常感谢你 根据我的经验,我认为在插入modelooh之前应该规范化输入数据!!太神了正常化后,损失变为正常。非常感谢塔索斯克!步骤0,列车损失=0.6997步骤50,列车损失=0.2867步骤100,列车损失=0.6949步骤150,列车损失=0.6844步骤200,列车损失=0.6044步骤250,列车损失=0.6722步骤300,列车损失=0.0187步骤350,列车损失=0.3694步骤400,列车损失=0.0051步骤450,列车损失=0.0196。嗯…还是不太正常吧?好吧,经过一系列步骤后,你不再训练了-[喂食过量]。你获得的步数[历次]总是有限制的。当你通过这个,没有任何改善!你将如何找到这个阈值?这取决于您的数据大小、学习率、培训损失方法等,所以它就在您的手上。。我很高兴我帮助了你!别忘了为答案竖起大拇指;)根据我的经验,我认为在插入modelooh之前应该规范化输入数据!!太神了正常化后,损失变为正常。非常感谢塔索斯克!步骤0,列车损失=0.6997步骤50,列车损失=0.2867步骤100,列车损失=0.6949步骤150,列车损失=0.6844步骤200,列车损失=0.6044步骤250,列车损失=0.6722步骤300,列车损失=0.0187步骤350,列车损失=0.3694步骤400,列车损失=0.0051步骤450,列车损失=0.0196。嗯…还是不太正常吧?好吧,经过一系列步骤后,你不再训练了-[喂食过量]。你获得的步数[历次]总是有限制的。当你通过这个,没有任何改善!你将如何找到这个阈值?这取决于您的数据大小、学习率、培训损失方法等,所以它就在您的手上。。我很高兴我帮助了你!别忘了为答案竖起大拇指;)