Python 在处理CNN中的维度错误时遇到问题
我是tensorflow的新手,正在尝试脱离mnist数据集,尝试一些不同的东西。我正在使用情感数据集,似乎无法修改代码以成功运行在此数据集上。对于那些想复制我作品的人,我找到了经过处理的图像和标签。您将在ck+scaled文件夹中找到图像,在processed文件夹中找到标签 我们正在处理265幅大小为[256 x 256]的图像 这是我的代码:Python 在处理CNN中的维度错误时遇到问题,python,tensorflow,neural-network,conv-neural-network,Python,Tensorflow,Neural Network,Conv Neural Network,我是tensorflow的新手,正在尝试脱离mnist数据集,尝试一些不同的东西。我正在使用情感数据集,似乎无法修改代码以成功运行在此数据集上。对于那些想复制我作品的人,我找到了经过处理的图像和标签。您将在ck+scaled文件夹中找到图像,在processed文件夹中找到标签 我们正在处理265幅大小为[256 x 256]的图像 这是我的代码: import os import tensorflow as tf import sys import urllib import numpy as
import os
import tensorflow as tf
import sys
import urllib
import numpy as np
from PIL import Image
import glob
train = []
for filename in glob.glob('/Users/madhavthaker/Documents/CSCI63/Final Project/face-emoticon-master/data/ck+_scaled/*.png'): #assuming gif
img=np.asarray(Image.open(filename))
img_flat = img.reshape(img.size)
train.append(img_flat)
### MNIST EMBEDDINGS ###
ckp_labels = [5, 0, 3, 5, 4, 0, 1, 3, 5, 4, 0, 3, 5, 0, 1, 5, 4, 0, 0, 0, 2, 1, 3, 5, 0, 3, 5, 1, 3, 5, 0, 3, 5, 4, 0, 3, 5, 3, 1, 1, 0, 4, 5, 2, 1, 5, 3, 5, 1, 5, 3, 1, 5, 1, 5, 0, 1, 5, 3, 5, 1, 3, 0, 1, 5, 2, 3, 1, 5, 3, 1, 3, 1, 5, 3, 2, 5, 3, 1, 5, 3, 4, 0, 5, 0, 3, 1, 3, 2, 5, 1, 3, 5, 1, 5, 4, 0, 3, 1, 5, 1, 2, 5, 1, 3, 5, 3, 5, 1, 3, 5, 5, 3, 1, 1, 3, 4, 1, 5, 4, 1, 5, 0, 1, 3, 5, 2, 3, 5, 5, 3, 5, 1, 0, 1, 5, 3, 0, 5, 1, 0, 3, 5, 0, 3, 5, 3, 1, 4, 5, 1, 3, 5, 1, 3, 1, 3, 5, 1, 5, 0, 3, 5, 1, 1, 4, 1, 5, 1, 4, 1, 0, 1, 3, 5, 5, 0, 1, 0, 5, 4, 0, 5, 3, 5, 3, 5, 1, 3, 5, 2, 0, 5, 2, 0, 5, 2, 3, 4, 3, 2, 5, 1, 5, 0, 3, 0, 1, 3, 5, 0, 1, 3, 5, 0, 4, 3, 3, 1, 4, 2, 1, 3, 5, 5, 3, 0, 3, 1, 5, 5, 0, 3, 5, 3, 2, 5, 3, 4, 7, 7, 7, 7, 7, 7, 7, 7, 0, 2, 4, 0, 7, 2, 0, 7, 0, 7, 2, 4, 4, 0, 2, 4, 7, 2]
if sys.version_info[0] >= 3:
from urllib.request import urlretrieve
else:
from urllib import urlretrieve
LOGDIR = 'log3/'
GITHUB_URL ='https://raw.githubusercontent.com/mamcgrath/TensorBoard-TF-Dev-Summit-Tutorial/master/'
### MNIST EMBEDDINGS ###
mnist = tf.contrib.learn.datasets.mnist.read_data_sets(train_dir=LOGDIR + 'data', one_hot=True)
### Get a sprite and labels file for the embedding projector ###
urlretrieve(GITHUB_URL + 'labels_1024.tsv', LOGDIR + 'labels_1024.tsv')
urlretrieve(GITHUB_URL + 'sprite_1024.png', LOGDIR + 'sprite_1024.png')
# Add convolution layer
def conv_layer(input, size_in, size_out, name="conv"):
with tf.name_scope(name):
#w = tf.Variable(tf.zeros([5, 5, size_in, size_out]), name="W")
#b = tf.Variable(tf.zeros([size_out]), name="B")
w = tf.Variable(tf.truncated_normal([4, 4, size_in, size_out], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME")
act = tf.nn.relu(conv + b)
tf.summary.histogram("weights", w)
tf.summary.histogram("biases", b)
tf.summary.histogram("activations", act)
return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
# Add fully connected layer
def fc_layer(input, size_in, size_out, name="fc"):
with tf.name_scope(name):
w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
act = tf.nn.relu(tf.matmul(input, w) + b)
tf.summary.histogram("weights", w)
tf.summary.histogram("biases", b)
tf.summary.histogram("activations", act)
return act
def mnist_model(learning_rate, use_two_conv, use_two_fc, hparam):
tf.reset_default_graph()
tf.set_random_seed(1)
sess = tf.Session()
# Setup placeholders, and reshape the data
x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x")
x_image = tf.reshape(x, [-1, 256, 256, 1])
tf.summary.image('input', x_image, 3)
y = tf.placeholder(tf.float32, shape=[None, ], name="labels")
if use_two_conv:
conv1 = conv_layer(x_image, 1, 32, "conv1")
conv_out = conv_layer(conv1, 32, 64, "conv2")
else:
conv1 = conv_layer(x_image, 1, 64, "conv")
conv_out = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
flattened = tf.reshape(conv_out, [-1, 16 * 16 * 16])
if use_two_fc:
fc1 = fc_layer(flattened, 16 * 16 * 16, 40, "fc1")
embedding_input = fc1
embedding_size = 40
logits = fc_layer(fc1, 40, 1, "fc2")
else:
embedding_input = flattened
embedding_size = 7*7*64
logits = fc_layer(flattened, 7*7*64, 10, "fc")
with tf.name_scope("xent"):
xent = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=y), name="xent")
tf.summary.scalar("xent", xent)
with tf.name_scope("train"):
train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(tf.argmax(logits, -1), tf.argmax(y, -1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", accuracy)
summ = tf.summary.merge_all()
embedding = tf.Variable(tf.zeros([1024, embedding_size]), name="test_embedding")
assignment = embedding.assign(embedding_input)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter(LOGDIR + hparam)
writer.add_graph(sess.graph)
config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding_config = config.embeddings.add()
embedding_config.tensor_name = embedding.name
embedding_config.sprite.image_path = LOGDIR + 'sprite_1024.png'
embedding_config.metadata_path = LOGDIR + 'labels_1024.tsv'
# Specify the width and height of a single thumbnail.
embedding_config.sprite.single_image_dim.extend([256, 256])
tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)
for i in range(300):
if i % 5 == 0:
[train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: train, y: ckp_labels})
writer.add_summary(s, i)
print ("train accuracy:", train_accuracy)
sess.run(train_step, feed_dict={x: train, y: ckp_labels})
def make_hparam_string(learning_rate, use_two_fc, use_two_conv):
conv_param = "conv2" if use_two_conv else "conv1"
fc_param = "fc2" if use_two_fc else "fc1"
return "lr_%.0E%s%s" % (learning_rate, conv_param, fc_param)
def main():
# You can try adding some more learning rates
#for learning_rate in [1E-3, 1E-4, 1E-5]:
for learning_rate in [1E-4]:
# Include "False" as a value to try different model architectures
#for use_two_fc in [True, False]:
for use_two_fc in [True]:
#for use_two_conv in [True, False]:
for use_two_conv in [True]:
# Construct a hyperparameter string for each one (example: "lr_1E-3fc2conv2")
hparam = make_hparam_string(learning_rate, use_two_fc, use_two_conv)
print('Starting run for %s' % hparam)
sys.stdout.flush() # this forces print-ed lines to show up.
# Actually run with the new settings
mnist_model(learning_rate, use_two_fc, use_two_conv, hparam)
if __name__ == '__main__':
main()
下面是我收到的错误:
InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[16960,1] labels_size=[1,265]
[[Node: xent/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](xent/Reshape, xent/Reshape_1)]]
真正让我困惑的是为什么我的logits形状是[16960,1]。任何帮助都将不胜感激。首先,y应该是形状(批次大小):(在您的情况下是(265);也许它也可以与(265,1)一起工作) 由于y不在一个热编码中,您必须使用
tf.nn.sparse\u softmax\u cross\u entropy\u和\u logits
,而不是softmax\u cross\u entropy\u和\u logits
然后:在第二个conv层(和最大池数)的末尾,映像大小为(256x256)/2/2=(64,64)。使用64个深度,每个样本可以获得64*64*64个值。但是你可以做展平=tf.重塑(conv_out,[-1,16*16*16])
,这给了你一个形状张量[265*2^6,16*16*16]
(265*2^6=16960,这就是它的来源)。将其替换为展平=tf.重塑(conv_out,[-1,64*64*64])
此外,logits=fc_层(fc1,40,1,“fc2”)
也是一个错误,您应该有logits=fc_层(fc1,40,num_类,“fc2”)
,并且您似乎有num_类=8
这些更改应该为您提供形状的logit(265,num\u类),这是您想要的tf.nn.sparse\u softmax\u cross\u entropy\u with\u logits
对于use_two_fc或use_two_conv为false的情况,您还需要进行其他更改,我会让您解决这些问题。你真的应该在每一步对所有张量的形状更加小心,如果有必要,打印它们以检查它们是否真的是你想要的。也许可以使用更多的变量,如num_classes_uu\u batch_size等,以确保事情的连贯性,并使其更具可读性。嘿,你的评论帮助我解决了最初的问题,我专注于寻找有关如何为CNN设置维度的更多信息。现在,我将conv中的过滤器更改为(17,17),并将展平改为
展平=tf。重塑(conv_out,[-1,55*55*64])
。我的数学检查出来了,但我得到了以下错误:InvalidArgumentError(回溯见上文):重塑的输入是一个值为6553600的张量,但请求的形状需要193600的倍数。但它最终会使用[-1,64*64*64]
。很抱歉,我不明白你到底改变了什么。如果你吃了你的问题或者用你当前的代码开始一个新的问题,也许会更清楚
y = tf.placeholder(tf.float32, shape=[None], name="labels")