Python 为什么我的基于AlexNet的有线电视新闻网（CNN）在开始训练的准确率为98%时失败了？_Python_Tensorflow_Conv Neural Network

Python 为什么我的基于AlexNet的有线电视新闻网（CNN）在开始训练的准确率为98%时失败了？

python tensorflow

Python 为什么我的基于AlexNet的有线电视新闻网（CNN）在开始训练的准确率为98%时失败了？,python,tensorflow,conv-neural-network,Python,Tensorflow,Conv Neural Network,对于培训的初始尝试，培训的准确度结果为98%！这是不可能的高 X列车--[50000,32,32,3]y列车--[50000] 我使用了一个基于AlexNet Tensorflow的CNN，在某些部分进行了微调。我的代码如下： import tensorflow as tf import matplotlib.pyplot as plt from getdata import get_data import numpy as np #build a model tf.reset_default

对于培训的初始尝试，培训的准确度结果为

98%

！这是不可能的高

X列车--[50000,32,32,3]y列车--[50000]

我使用了一个基于AlexNet Tensorflow的CNN，在某些部分进行了微调。我的代码如下：

import tensorflow as tf
import matplotlib.pyplot as plt
from getdata import get_data
import numpy as np

#build a model
tf.reset_default_graph()
sess = tf.InteractiveSession()

#get cifar-10 from getdata.py
X_train,y_train,Xte,yte = get_data()
X = tf.placeholder(tf.float32,[None,32,32,3])
y = tf.placeholder(tf.int64,[None])

def model(X,y):
    '''AlexNet has an architecture of 
    conv1+pool1----conv2+pool2----conv3----conv4----conv5+pool5----FC1----FC2----softmaxoutput,
    while in this section i'll fine turning the architecture of the network :1.add BN after each 
    conv to normalization the data,through BN we'll remake the data obey normal distribution.
    2.take out the conv4.then my net is
     conv1+bn1----pool1----conv2+pool2----bn2----conv3----conv4+pool4----FC1----FC2----softmaxoutput'''
    Wconv1 = tf.get_variable("Wconv1",[5,5,3,64],
                             initializer=tf.contrib.layers.xavier_initializer())
    bconv1 = tf.get_variable("bconv1",[64]) 

    hconv1 = tf.nn.relu(tf.nn.conv2d(X,Wconv1,[1,1,1,1],padding='SAME') + bconv1)

    h_bn1 = tf.contrib.layers.batch_norm(hconv1)

    h_pool1 = tf.nn.max_pool(h_bn1,[1,2,2,1],[1,2,2,1],padding='VALID')

    # after above you get [N,16,16,64]

    Wconv2 = tf.get_variable("Wconv2",[3,3,64,96],
                             initializer=tf.contrib.layers.xavier_initializer())
    bconv2 = tf.get_variable("bconv2",[96])

    hconv2 = tf.nn.relu(tf.nn.conv2d(h_pool1,Wconv2,[1,1,1,1],padding='SAME') + bconv2)

    h_pool2 = tf.nn.max_pool(hconv2,[1,2,2,1],[1,2,2,1],padding='VALID')

    h_bn2 = tf.contrib.layers.batch_norm(h_pool2)

    # after above you get [N,8,8,96]

    Wconv3 = tf.get_variable("Wconv3",[3,3,96,128],
                             initializer=tf.contrib.layers.xavier_initializer())
    bconv3 = tf.get_variable("bconv3",[128])

    hconv3 = tf.nn.relu(tf.nn.conv2d(h_bn2,Wconv3,[1,1,1,1],padding='SAME') + bconv3)

    h_bn3 = tf.contrib.layers.batch_norm(hconv3)

    #after above you get [N,8,8,128]

    Wconv4 = tf.get_variable("Wconv4",[3,3,128,256],
                             initializer=tf.contrib.layers.xavier_initializer())
    bconv4 = tf.get_variable("bconv4",[256])

    hconv4 = tf.nn.relu(tf.nn.conv2d(h_bn3,Wconv4,[1,1,1,1],padding='SAME') + bconv4)

    h_bn4 = tf.contrib.layers.batch_norm(hconv4)

    h_pool4 = tf.nn.max_pool(h_bn4,[1,1,1,1],[1,1,1,1],padding='SAME')

    # after above we get [N,8,8,256]

    h4flat = tf.reshape(h_pool4,[-1,8*8*256])

    W1 = tf.get_variable("W1",[8 * 8 * 256, 1024],
                             initializer=tf.contrib.layers.xavier_initializer())
    b1 = tf.get_variable("b1",[1024])

    FC1 = tf.nn.relu(tf.matmul(h4flat,W1)+b1)

    hdrop = tf.nn.dropout(FC1,0.4)

    W2 = tf.get_variable("W2",[1024, 10])
    b2 = tf.get_variable("b2",[10])

    FC2 = tf.matmul(hdrop,W2)+b2

    return FC2


y_pred = model(X,y)

total_loss = tf.losses.softmax_cross_entropy(tf.one_hot(y,10),logits=y_pred)  
mean_loss = tf.reduce_mean(total_loss)

train_step = tf.train.AdamOptimizer(1e-4).minimize(mean_loss) 

correct_pred = tf.equal(tf.argmax(y_pred,axis=1),y)

accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

sess.run(tf.global_variables_initializer())

#sess = tf.InteractiveSession()


losses = []
batch_size = 64
epoch = 200
keep_prob = tf.placeholder(tf.float32)
xs = []
ys = []


with tf.device("/gpu:0"): 
    for e in range(epoch):
        batch_start_idx = (e*batch_size)%(X_train.shape[0]-batch_size)
        batch_end_idx = batch_start_idx + batch_size
        batch_xs = X_train[batch_start_idx:batch_end_idx]
        batch_ys = y_train[batch_start_idx:batch_end_idx]
        xs = np.array(batch_xs)
        ys = np.array(batch_ys)
        feed = {X:xs,y:ys,keep_prob: 1.0} 

        train_step.run(feed_dict={X:xs,y:ys,keep_prob:0.5})
        if e%1000 == 0:
            train_accuracy = accuracy.eval(feed_dict=feed)
            print("step %d, training accuracy %g"%(e, train_accuracy))
#            y_pred = y_pred.eval(feed_dict=feed)
#            print (y_pred)
#            correct = correct_pred.eval(feed_dict=feed)
#            print (correct)
            loss = mean_loss.eval(feed_dict=feed)
#            print (loss)
            losses.append(loss)

plt.figure(1)
plt.plot(losses)
plt.grid(True)
plt.xlabel('epoch')
plt.ylabel('loss')            

sess.close()

我看你的模型没有什么明显的问题。当你说“98%准确率”时，你是如何衡量的？如果您的训练集的准确率为98%，则可能是您过度适应了训练集。您应该在单独的验证或测试集上测量精度。我忘了添加。但当我这么做的时候，我的代码似乎没有学到任何东西，因为它的训练acc几乎等于10%@Peter Hawkins