Python Theano-CNN在训练后生成特征图

Python Theano-CNN在训练后生成特征图,python,machine-learning,neural-network,theano,conv-neural-network,Python,Machine Learning,Neural Network,Theano,Conv Neural Network,训练完CNN后,我试图通过网络运行一幅图像,并在第一个卷积层之后绘制特征图。以下是我的CNN图层类代码: class ConvPoolLayer(object): def __init__(self, filter_shape, image_shape, poolsize=(2, 2), activation_fn=sigmoid): self.filter_shape = filter_shape self.im

训练完CNN后,我试图通过网络运行一幅图像,并在第一个卷积层之后绘制特征图。以下是我的CNN图层类代码:

class ConvPoolLayer(object):    
    def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
                 activation_fn=sigmoid):
        self.filter_shape = filter_shape
        self.image_shape = image_shape
        self.poolsize = poolsize
        self.activation_fn=activation_fn
        # initialize weights and biases
        n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
        self.w = theano.shared(
            np.asarray(
                np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
                dtype=theano.config.floatX),
            borrow=True)
        self.b = theano.shared(
            np.asarray(
                np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
                dtype=theano.config.floatX),
            borrow=True)
        self.params = [self.w, self.b]

    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        self.inpt = inpt.reshape(self.image_shape)
        self.conv_out = conv2d(
            input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
            input_shape=self.image_shape)

        self.feature_maps = theano.function([self.inpt], self.conv_out)

        pooled_out = pool.pool_2d(
            input=self.conv_out, ds=self.poolsize, ignore_border=True)
        self.output = self.activation_fn(
            pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
        self.output_dropout = self.output # no dropout in the convolutional layers
我创建了一个theano函数
feature\u maps
,当提供图像时,它将在卷积后输出结果(有更干净的方法吗?)

然后在我的主代码(训练后)中,我拍摄一张图像并将其传递到第一层对象的
功能映射中:

image_4d = image_2d.reshape(1, 1, 28, 28)
activations = conv_net[0].layers[0].feature_maps(image_4d)
我正在使用10的小批量进行训练,因此当我传入一张图像以获取特征贴图时,会出现以下错误:

ValueError: The hardcoded shape for the batch size (10) isn't the run time shape (1).
Apply node that caused the error: ConvOp{('imshp', (1, 28, 28)),('kshp', (5, 5)),('nkern', 20),('bsize', 10),('dx', 1),('dy', 1),('out_mode', 'valid'),('unroll_batch', 5),('unroll_kern', 2),('unroll_patch', False),('imshp_logical', (1, 28, 28)),('kshp_logical', (5, 5)),('kshp_logical_top_aligned', True)}(<TensorType(float32, (False, True, False, False))>, <TensorType(float32, 4D)>)
Toposort index: 0
Inputs types: [TensorType(float32, (False, True, False, False)), TensorType(float32, 4D)]
Inputs shapes: [(1L, 1L, 28L, 28L), (20L, 1L, 5L, 5L)]
Inputs strides: [(3136L, 3136L, 112L, 4L), (100L, 100L, 20L, 4L)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [['output']]

Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
  File "C:\Users\Simon\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py", line 2885, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-3-8e78399125dc>", line 1, in <module>
    runfile('F:/python-machine-learning/CNN/mnist.py', wdir='F:/python-machine-learning/CNN')
  File "C:\Users\Simon\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 714, in runfile
    execfile(filename, namespace)
  File "C:\Users\Simon\Anaconda2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 74, in execfile
    exec(compile(scripttext, filename, 'exec'), glob, loc)
  File "F:/python-machine-learning/CNN/mnist.py", line 53, in <module>
    conv_net = basic_conv(n=1, epochs=1)
  File "F:/python-machine-learning/CNN/mnist.py", line 47, in basic_conv
    SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
  File "network.py", line 95, in __init__
    init_layer.set_inpt(self.x, self.x, self.mini_batch_size)
  File "network.py", line 273, in set_inpt
    input_shape=self.image_shape)
软最大值:

class SoftmaxLayer(object):

    def __init__(self, n_in, n_out, p_dropout=0.0):
        self.n_in = n_in
        self.n_out = n_out
        self.p_dropout = p_dropout
        # Initialize weights and biases
        self.w = theano.shared(
            np.zeros((n_in, n_out), dtype=theano.config.floatX),
            name='w', borrow=True)
        self.b = theano.shared(
            np.zeros((n_out,), dtype=theano.config.floatX),
            name='b', borrow=True)
        self.params = [self.w, self.b]

    def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
        self.inpt = inpt.reshape((mini_batch_size, self.n_in))
        self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
        self.y_out = T.argmax(self.output, axis=1)  # Predicted class
        self.inpt_dropout = dropout_layer(
            inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
        self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)

    def cost(self, net):
        "Return the log-likelihood cost."
        return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y])

    def accuracy(self, y):
        "Return the accuracy for the mini-batch."
        return T.mean(T.eq(y, self.y_out))
最后,这里是随机梯度下降的代码:

def SGD(self, training_data, epochs, mini_batch_size, alpha,
        validation_data, test_data, lmbda=0.0):
    """Train the network using mini-batch stochastic gradient descent."""
    training_x, training_y = training_data
    validation_x, validation_y = validation_data
    test_x, test_y = test_data

    # compute number of minibatches for training, validation and testing
    num_training_batches = size(training_data)/mini_batch_size
    num_validation_batches = size(validation_data)/mini_batch_size
    num_test_batches = size(test_data)/mini_batch_size

    # define (regularized) cost function, symbolic gradients, and updates
    l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
    cost = self.layers[-1].cost(self)+\
           0.5*lmbda*l2_norm_squared/num_training_batches
    grads = T.grad(cost, self.params)
    updates = [(param, param - alpha*grad)
               for param, grad in zip(self.params, grads)]

    # define functions to train a mini-batch, and to compute the
    # accuracy in validation and test mini-batches.
    i = T.lscalar() # mini-batch index

    train_mb = theano.function(
        [i], cost, updates=updates,
        givens={
            self.x:
            training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
            self.y:
            training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
        })

    validate_mb_accuracy = theano.function(
        [i], self.layers[-1].accuracy(self.y),
        givens={
            self.x:
            validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
            self.y:
            validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
        })

    test_mb_accuracy = theano.function(
        [i], self.layers[-1].accuracy(self.y),
        givens={
            self.x:
            test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
            self.y:
            test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
        })

    self.test_mb_predictions = theano.function(
        [i], self.layers[-1].y_out,
        givens={
            self.x:
            test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
        })

    # Do the actual training
    best_validation_accuracy = 0.0                            
    start_time = time.time()

    for epoch in xrange(epochs):
        for minibatch_index in xrange(num_training_batches):
            iteration = num_training_batches * epoch + minibatch_index

            cost_ij = train_mb(minibatch_index)

            if (iteration+1) % num_training_batches == 0:
                validation_accuracy = np.mean(
                    [validate_mb_accuracy(j) for j in xrange(num_validation_batches)])

                if validation_accuracy >= best_validation_accuracy:
                    best_validation_accuracy = validation_accuracy
                    best_iteration = iteration

                if test_data:
                    test_accuracy = np.mean(
                        [test_mb_accuracy(j) for j in xrange(num_test_batches)])

如何使用
mini_batch_size
mini_batch_size
用于重塑进入完全连接和softmax层的输入,用于预测计算,并训练mini-batch渐变下降。我已经在我的问题中添加了额外的代码
def SGD(self, training_data, epochs, mini_batch_size, alpha,
        validation_data, test_data, lmbda=0.0):
    """Train the network using mini-batch stochastic gradient descent."""
    training_x, training_y = training_data
    validation_x, validation_y = validation_data
    test_x, test_y = test_data

    # compute number of minibatches for training, validation and testing
    num_training_batches = size(training_data)/mini_batch_size
    num_validation_batches = size(validation_data)/mini_batch_size
    num_test_batches = size(test_data)/mini_batch_size

    # define (regularized) cost function, symbolic gradients, and updates
    l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
    cost = self.layers[-1].cost(self)+\
           0.5*lmbda*l2_norm_squared/num_training_batches
    grads = T.grad(cost, self.params)
    updates = [(param, param - alpha*grad)
               for param, grad in zip(self.params, grads)]

    # define functions to train a mini-batch, and to compute the
    # accuracy in validation and test mini-batches.
    i = T.lscalar() # mini-batch index

    train_mb = theano.function(
        [i], cost, updates=updates,
        givens={
            self.x:
            training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
            self.y:
            training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
        })

    validate_mb_accuracy = theano.function(
        [i], self.layers[-1].accuracy(self.y),
        givens={
            self.x:
            validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
            self.y:
            validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
        })

    test_mb_accuracy = theano.function(
        [i], self.layers[-1].accuracy(self.y),
        givens={
            self.x:
            test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
            self.y:
            test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
        })

    self.test_mb_predictions = theano.function(
        [i], self.layers[-1].y_out,
        givens={
            self.x:
            test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
        })

    # Do the actual training
    best_validation_accuracy = 0.0                            
    start_time = time.time()

    for epoch in xrange(epochs):
        for minibatch_index in xrange(num_training_batches):
            iteration = num_training_batches * epoch + minibatch_index

            cost_ij = train_mb(minibatch_index)

            if (iteration+1) % num_training_batches == 0:
                validation_accuracy = np.mean(
                    [validate_mb_accuracy(j) for j in xrange(num_validation_batches)])

                if validation_accuracy >= best_validation_accuracy:
                    best_validation_accuracy = validation_accuracy
                    best_iteration = iteration

                if test_data:
                    test_accuracy = np.mean(
                        [test_mb_accuracy(j) for j in xrange(num_test_batches)])