Python 为什么我的基于AlexNet的有线电视新闻网(CNN)在开始训练的准确率为98%时失败了?
对于培训的初始尝试,培训的准确度结果为Python 为什么我的基于AlexNet的有线电视新闻网(CNN)在开始训练的准确率为98%时失败了?,python,tensorflow,conv-neural-network,Python,Tensorflow,Conv Neural Network,对于培训的初始尝试,培训的准确度结果为98%!这是不可能的高 X列车--[50000,32,32,3]y列车--[50000] 我使用了一个基于AlexNet Tensorflow的CNN,在某些部分进行了微调。我的代码如下: import tensorflow as tf import matplotlib.pyplot as plt from getdata import get_data import numpy as np #build a model tf.reset_default
98%
!这是不可能的高
X列车--[50000,32,32,3]y列车--[50000]
我使用了一个基于AlexNet Tensorflow的CNN,在某些部分进行了微调。我的代码如下:
import tensorflow as tf
import matplotlib.pyplot as plt
from getdata import get_data
import numpy as np
#build a model
tf.reset_default_graph()
sess = tf.InteractiveSession()
#get cifar-10 from getdata.py
X_train,y_train,Xte,yte = get_data()
X = tf.placeholder(tf.float32,[None,32,32,3])
y = tf.placeholder(tf.int64,[None])
def model(X,y):
'''AlexNet has an architecture of
conv1+pool1----conv2+pool2----conv3----conv4----conv5+pool5----FC1----FC2----softmaxoutput,
while in this section i'll fine turning the architecture of the network :1.add BN after each
conv to normalization the data,through BN we'll remake the data obey normal distribution.
2.take out the conv4.then my net is
conv1+bn1----pool1----conv2+pool2----bn2----conv3----conv4+pool4----FC1----FC2----softmaxoutput'''
Wconv1 = tf.get_variable("Wconv1",[5,5,3,64],
initializer=tf.contrib.layers.xavier_initializer())
bconv1 = tf.get_variable("bconv1",[64])
hconv1 = tf.nn.relu(tf.nn.conv2d(X,Wconv1,[1,1,1,1],padding='SAME') + bconv1)
h_bn1 = tf.contrib.layers.batch_norm(hconv1)
h_pool1 = tf.nn.max_pool(h_bn1,[1,2,2,1],[1,2,2,1],padding='VALID')
# after above you get [N,16,16,64]
Wconv2 = tf.get_variable("Wconv2",[3,3,64,96],
initializer=tf.contrib.layers.xavier_initializer())
bconv2 = tf.get_variable("bconv2",[96])
hconv2 = tf.nn.relu(tf.nn.conv2d(h_pool1,Wconv2,[1,1,1,1],padding='SAME') + bconv2)
h_pool2 = tf.nn.max_pool(hconv2,[1,2,2,1],[1,2,2,1],padding='VALID')
h_bn2 = tf.contrib.layers.batch_norm(h_pool2)
# after above you get [N,8,8,96]
Wconv3 = tf.get_variable("Wconv3",[3,3,96,128],
initializer=tf.contrib.layers.xavier_initializer())
bconv3 = tf.get_variable("bconv3",[128])
hconv3 = tf.nn.relu(tf.nn.conv2d(h_bn2,Wconv3,[1,1,1,1],padding='SAME') + bconv3)
h_bn3 = tf.contrib.layers.batch_norm(hconv3)
#after above you get [N,8,8,128]
Wconv4 = tf.get_variable("Wconv4",[3,3,128,256],
initializer=tf.contrib.layers.xavier_initializer())
bconv4 = tf.get_variable("bconv4",[256])
hconv4 = tf.nn.relu(tf.nn.conv2d(h_bn3,Wconv4,[1,1,1,1],padding='SAME') + bconv4)
h_bn4 = tf.contrib.layers.batch_norm(hconv4)
h_pool4 = tf.nn.max_pool(h_bn4,[1,1,1,1],[1,1,1,1],padding='SAME')
# after above we get [N,8,8,256]
h4flat = tf.reshape(h_pool4,[-1,8*8*256])
W1 = tf.get_variable("W1",[8 * 8 * 256, 1024],
initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.get_variable("b1",[1024])
FC1 = tf.nn.relu(tf.matmul(h4flat,W1)+b1)
hdrop = tf.nn.dropout(FC1,0.4)
W2 = tf.get_variable("W2",[1024, 10])
b2 = tf.get_variable("b2",[10])
FC2 = tf.matmul(hdrop,W2)+b2
return FC2
y_pred = model(X,y)
total_loss = tf.losses.softmax_cross_entropy(tf.one_hot(y,10),logits=y_pred)
mean_loss = tf.reduce_mean(total_loss)
train_step = tf.train.AdamOptimizer(1e-4).minimize(mean_loss)
correct_pred = tf.equal(tf.argmax(y_pred,axis=1),y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
sess.run(tf.global_variables_initializer())
#sess = tf.InteractiveSession()
losses = []
batch_size = 64
epoch = 200
keep_prob = tf.placeholder(tf.float32)
xs = []
ys = []
with tf.device("/gpu:0"):
for e in range(epoch):
batch_start_idx = (e*batch_size)%(X_train.shape[0]-batch_size)
batch_end_idx = batch_start_idx + batch_size
batch_xs = X_train[batch_start_idx:batch_end_idx]
batch_ys = y_train[batch_start_idx:batch_end_idx]
xs = np.array(batch_xs)
ys = np.array(batch_ys)
feed = {X:xs,y:ys,keep_prob: 1.0}
train_step.run(feed_dict={X:xs,y:ys,keep_prob:0.5})
if e%1000 == 0:
train_accuracy = accuracy.eval(feed_dict=feed)
print("step %d, training accuracy %g"%(e, train_accuracy))
# y_pred = y_pred.eval(feed_dict=feed)
# print (y_pred)
# correct = correct_pred.eval(feed_dict=feed)
# print (correct)
loss = mean_loss.eval(feed_dict=feed)
# print (loss)
losses.append(loss)
plt.figure(1)
plt.plot(losses)
plt.grid(True)
plt.xlabel('epoch')
plt.ylabel('loss')
sess.close()
我看你的模型没有什么明显的问题。当你说“98%准确率”时,你是如何衡量的?如果您的训练集的准确率为98%,则可能是您过度适应了训练集。您应该在单独的验证或测试集上测量精度。我忘了添加。但当我这么做的时候,我的代码似乎没有学到任何东西,因为它的训练acc几乎等于10%@Peter Hawkins