Python CNN的降维似乎违背了我对理论的理解
我有一个两层CNN,结构如下: 以下是tensorflow中表示的架构:Python CNN的降维似乎违背了我对理论的理解,python,tensorflow,conv-neural-network,dimensionality-reduction,Python,Tensorflow,Conv Neural Network,Dimensionality Reduction,我有一个两层CNN,结构如下: 以下是tensorflow中表示的架构: import os import tensorflow as tf import sys import urllib import numpy as np import random from sklearn.preprocessing import OneHotEncoder from PIL import Image import glob train = [] for filename in glob.glob('
import os
import tensorflow as tf
import sys
import urllib
import numpy as np
import random
from sklearn.preprocessing import OneHotEncoder
from PIL import Image
import glob
train = []
for filename in glob.glob('/Users/madhavthaker/Documents/CSCI63/Final Project/face-emoticon-master/data/ck+_scaled/*.png'): #assuming gif
img=np.asarray(Image.open(filename))
img_flat = img.reshape(img.size)
train.append(img_flat)
if sys.version_info[0] >= 3:
from urllib.request import urlretrieve
else:
from urllib import urlretrieve
LOGDIR = 'log3/'
GITHUB_URL ='https://raw.githubusercontent.com/mamcgrath/TensorBoard-TF-Dev-Summit-Tutorial/master/'
### MNIST EMBEDDINGS ###
ckp_labels = [5, 0, 3, 5, 4, 0, 1, 3, 5, 4, 0, 3, 5, 0, 1, 5, 4, 0, 0, 0, 2, 1, 3, 5, 0, 3, 5, 1, 3, 5, 0, 3, 5, 4, 0, 3, 5, 3, 1, 1, 0, 4, 5, 2, 1, 5, 3, 5, 1, 5, 3, 1, 5, 1, 5, 0, 1, 5, 3, 5, 1, 3, 0, 1, 5, 2, 3, 1, 5, 3, 1, 3, 1, 5, 3, 2, 5, 3, 1, 5, 3, 4, 0, 5, 0, 3, 1, 3, 2, 5, 1, 3, 5, 1, 5, 4, 0, 3, 1, 5, 1, 2, 5, 1, 3, 5, 3, 5, 1, 3, 5, 5, 3, 1, 1, 3, 4, 1, 5, 4, 1, 5, 0, 1, 3, 5, 2, 3, 5, 5, 3, 5, 1, 0, 1, 5, 3, 0, 5, 1, 0, 3, 5, 0, 3, 5, 3, 1, 4, 5, 1, 3, 5, 1, 3, 1, 3, 5, 1, 5, 0, 3, 5, 1, 1, 4, 1, 5, 1, 4, 1, 0, 1, 3, 5, 5, 0, 1, 0, 5, 4, 0, 5, 3, 5, 3, 5, 1, 3, 5, 2, 0, 5, 2, 0, 5, 2, 3, 4, 3, 2, 5, 1, 5, 0, 3, 0, 1, 3, 5, 0, 1, 3, 5, 0, 4, 3, 3, 1, 4, 2, 1, 3, 5, 5, 3, 0, 3, 1, 5, 5, 0, 3, 5, 3, 2, 5, 3, 4, 7, 7, 7, 7, 7, 7, 7, 7, 0, 2, 4, 0, 7, 2, 0, 7, 0, 7, 2, 4, 4, 0, 2, 4, 7, 2]
labels_test = np.array(ckp_labels).reshape(-1,1)
enc = OneHotEncoder()
enc.fit(labels_test)
labels_final = enc.transform(labels_test).toarray()
train = np.asarray(train)
# Add convolution layer
def conv_layer(input, size_in, size_out, name="conv"):
with tf.name_scope(name):
#w = tf.Variable(tf.zeros([5, 5, size_in, size_out]), name="W")
#b = tf.Variable(tf.zeros([size_out]), name="B")
w = tf.Variable(tf.truncated_normal([17, 17, size_in, size_out], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME")
act = tf.nn.relu(conv + b)
tf.summary.histogram("weights", w)
tf.summary.histogram("biases", b)
tf.summary.histogram("activations", act)
return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
# Add fully connected layer
def fc_layer(input, size_in, size_out, name="fc"):
with tf.name_scope(name):
w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
act = tf.nn.relu(tf.matmul(input, w) + b)
tf.summary.histogram("weights", w)
tf.summary.histogram("biases", b)
tf.summary.histogram("activations", act)
return act
def mnist_model(learning_rate, use_two_conv, use_two_fc, hparam):
tf.reset_default_graph()
tf.set_random_seed(1)
sess = tf.Session()
# Setup placeholders, and reshape the data
x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x")
x_image = tf.reshape(x, [-1, 256, 256, 1])
tf.summary.image('input', x_image, 3)
y = tf.placeholder(tf.float32, shape=[None, 7], name="labels")
if use_two_conv:
conv1 = conv_layer(x_image, 1, 32, "conv1")
conv_out = conv_layer(conv1, 32, 64, "conv2")
else:
conv1 = conv_layer(x_image, 1, 64, "conv")
conv_out = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") #adding padding "VALID" means no padding
flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64])
if use_two_fc:
fc1 = fc_layer(flattened, 55 * 55 * 64, 40, "fc1")
embedding_input = fc1
embedding_size = 40
logits = fc_layer(fc1, 40, 7, "fc2")
else:
embedding_input = flattened
embedding_size = 7*7*64
logits = fc_layer(flattened, 7*7*64, 10, "fc")
with tf.name_scope("xent"):
xent = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=y), name="xent")
tf.summary.scalar("xent", xent)
with tf.name_scope("train"):
train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(tf.argmax(logits, -1), tf.argmax(y, -1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("accuracy", accuracy)
summ = tf.summary.merge_all()
embedding = tf.Variable(tf.zeros([1024, embedding_size]), name="test_embedding")
assignment = embedding.assign(embedding_input)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter(LOGDIR + hparam)
writer.add_graph(sess.graph)
config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
embedding_config = config.embeddings.add()
embedding_config.tensor_name = embedding.name
embedding_config.sprite.image_path = LOGDIR + 'sprite_1024.png'
embedding_config.metadata_path = LOGDIR + 'labels_1024.tsv'
# Specify the width and height of a single thumbnail.
embedding_config.sprite.single_image_dim.extend([256, 256])
tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)
for i in range(300):
batch_index = random.sample(range(0,100),25)
if i % 5 == 0:
[train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: train[batch_index], y: labels_final[batch_index]})
writer.add_summary(s, i)
print ("train accuracy:", train_accuracy)
sess.run(train_step, feed_dict={x: train[batch_index], y: labels_final[batch_index]})
def make_hparam_string(learning_rate, use_two_fc, use_two_conv):
conv_param = "conv2" if use_two_conv else "conv1"
fc_param = "fc2" if use_two_fc else "fc1"
return "lr_%.0E%s%s" % (learning_rate, conv_param, fc_param)
def main():
# You can try adding some more learning rates
#for learning_rate in [1E-3, 1E-4, 1E-5]:
for learning_rate in [1E-4]:
# Include "False" as a value to try different model architectures
#for use_two_fc in [True, False]:
for use_two_fc in [True]:
#for use_two_conv in [True, False]:
for use_two_conv in [True]:
# Construct a hyperparameter string for each one (example: "lr_1E-3fc2conv2")
hparam = make_hparam_string(learning_rate, use_two_fc, use_two_conv)
print('Starting run for %s' % hparam)
sys.stdout.flush() # this forces print-ed lines to show up.
# Actually run with the new settings
mnist_model(learning_rate, use_two_fc, use_two_conv, hparam)
if __name__ == '__main__':
main()
根据我的数学计算,一切正常,但当我运行代码时,我得到以下错误:
InvalidArgumentError (see above for traceback): Input to reshape is a tensor with 6553600 values, but the requested shape requires a multiple of 193600
我在这行代码中得到错误:
flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64])
我真的不知道为什么会这样。我的数学有问题,因为他们假设conv_out的展平大小应该是[-1,64*64*64]
任何帮助都将不胜感激。如果你需要更多的信息,请告诉我 在我看来,您似乎计算错了每个卷积/池层的输出大小。下面是你如何找到答案的。我将您的代码提炼为:
import tensorflow as tf
import numpy as np
def conv_layer(input, size_in, size_out, name="conv"):
with tf.name_scope(name):
w = tf.Variable(tf.truncated_normal([17, 17, size_in, size_out], stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="VALID")
act = tf.nn.relu(conv + b)
return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
# Setup placeholders, and reshape the data
x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x")
x_image = tf.reshape(x, [-1, 256, 256, 1])
conv1 = conv_layer(x_image, 1, 32, "conv1")
conv_out = conv_layer(conv1, 32, 64, "conv2")
flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64])
sess.run(tf.global_variables_initializer())
print(sess.run(tf.shape(conv1), {x: np.zeros([1, 256*256])}))
此代码提供正确形状的零输入,并使用tf.shape()
计算conv1
输出的形状。我回来了:
[1 128 32]
这和你计算的数字不符
我怀疑你计算错了填充,但如果不知道你是如何得出顶部表格中的数字,很难说。如果没有其他内容,则第一次卷积的填充相同
和跨步1
,因此输入和输出将具有相同的空间维度
希望这有帮助 你能不能说清楚这个错误到底发生在哪里,以及你期望的尺寸是什么?考虑到大量的代码,我很难跟踪这个问题。当然,我已经添加了引发此错误的行。预期尺寸是该代码行中的输入。我希望[55,55,64]是尺寸,但[64,64,64]有效。不知道为什么