Python 在批处理模式下使用theano构建CNN
我试图用theano批量模式构建一个CNN:我希望在卷积池层结束时开始一个完整的MLP训练(例如100次迭代),从后者获得成本,这样我就可以估计梯度和更新。我正在关注这个。 因此,我创建了一个conv池层类和一个MLP类(一个隐藏层加一个逻辑回归层)。唯一的区别是conv池有一个方法“buildLayer”,用于在给定输入时构建层:Python 在批处理模式下使用theano构建CNN,python,python-3.x,python-2.7,deep-learning,theano,Python,Python 3.x,Python 2.7,Deep Learning,Theano,我试图用theano批量模式构建一个CNN:我希望在卷积池层结束时开始一个完整的MLP训练(例如100次迭代),从后者获得成本,这样我就可以估计梯度和更新。我正在关注这个。 因此,我创建了一个conv池层类和一个MLP类(一个隐藏层加一个逻辑回归层)。唯一的区别是conv池有一个方法“buildLayer”,用于在给定输入时构建层: class CPLayer(object): def __init__(self, rng, filter_shape, image_shape, pool
class CPLayer(object):
def __init__(self, rng, filter_shape, image_shape, pool_size=(2, 2)):
self.rng=rng
self.filter_shape=filter_shape
self.image_shape=image_shape
self.pool_size=pool_size
def buildLayer(self, input):
fan_out = (self.filter_shape[0] * np.prod(self.filter_shape[2:]) //
np.prod(self.pool_size))
W_bound = np.sqrt(6. / (fan_in + fan_out))
self.W = theano.shared(np.asarray(self.rng.uniform(low=-W_bound, high=W_bound, size=self.filter_shape),dtype=theano.config.floatX),borrow=True)
# the bias is a 1D tensor -- one bias per output feature map
b_values = np.zeros((self.filter_shape[0],), dtype=theano.config.floatX)
self.b = theano.shared(value=b_values, borrow=True)
# convolve input feature maps with filters
conv_out = conv2d(input=input,filters=self.W,
filter_shape=self.filter_shape,
input_shape=self.image_shape
)
# pool each feature map individually, using maxpooling
pooled_out = pool.pool_2d(
input=conv_out,
ds=self.pool_size,
ignore_border=True
)
self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
# store parameters of this layer
self.params = [self.W, self.b]
# keep track of model input
self.input = input
CNN课程:
class CNN(dict):
def __init__(self, n_layers, nkerns, batch_size, n_out, image_size):
self.nkerns=nkerns
self.batch_size=batch_size
self.n_layers=n_layers
self.n_out=n_out
self.image_size=image_size
def setConvPoolParameters(self, id_layer, filter_size=(5,5), pool_size=(2,2), mode='max', type='tanh', alpha=0):
if id_layer==0:
image_size=self.image_size
else:
image_size=[(self[id_layer-1].image_shape[2+i]-self[id_layer-1].filter_shape[2+i]+1)//self[id_layer-1].pool_size[i] for i in range(2)]
image_shape=(self.batch_size, (1 if id_layer==0 else self.nkerns[id_layer-1]), image_size[0], image_size[1])
filter_shape=(self.nkerns[id_layer], (1 if id_layer==0 else self.nkerns[id_layer-1]), filter_size[0], filter_size[1])
self[id_layer]=CPLayer(rng=self.rng, filter_shape=filter_shape, image_shape=image_shape, pool_size=pool_size)
def setModelParameters(self, model_name='MLP', **kwargs):
n_in=self.nkerns[-1]*(self[self.n_layers-1].image_shape[2]-self[self.n_layers-1].filter_shape[2]+1)//self[self.n_layers-1].pool_size[0]*(self[self.n_layers-1].image_shape[3]-self[self.n_layers-1].filter_shape[3]+1)//self[self.n_layers-1].pool_size[1]
self[self.n_layers]=MLP(name=model_name, rng=self.rng, n_in=n_in, n_out=self.n_out, kwargs=kwargs)
def evaluate(self, learning_rate=0.1, n_epochs=1, printOut=0):
dataset='mnist.pkl.gz'
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0]
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
n_test_batches = test_set_x.get_value(borrow=True).shape[0]
n_train_batches //= self.batch_size
n_valid_batches //= self.batch_size
n_test_batches //= self.batch_size
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
x = T.matrix('x') # the data is presented as rasterized images
y = T.ivector('y') # the labels are presented as 1D vector of
# [int] labels
######################
# BUILD ACTUAL MODEL #
######################
print('... building the model')
# Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
# to a 4D tensor, compatible with our LeNetConvPoolLayer
# (28, 28) is the size of MNIST images.
layer0_input = x.reshape((self.batch_size, 1, self.image_size[0], self.image_size[1]))
self[0].buildLayer(layer0_input)
for id_layer in range(1, self.n_layers):
self[id_layer].buildLayer(self[id_layer-1].output)
layerN_input=self[self.n_layers-1].output.flatten(2)
self[self.n_layers].buildModel(layerN_input)
params=[]
for id_layer in range(self.n_layers-1, -1, -1):
params += self[id_layer].params
self.params=params
cost = self[self.n_layers].train(y)
grads = T.grad(cost, params)
updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)]
train = theano.function(
inputs=[index],
outputs=cost,
updates=updates,
givens={
x: train_set_x[index * self.batch_size: (index + 1) * self.batch_size],
y: train_set_y[index * self.batch_size: (index + 1) * self.batch_size]
}
)
因此,当我调用“self[self.n_layers].train(y)”时,我调用一个MLP方法:
class MLP(object):
def __init__(self, rng, n_in, n_hidden, n_out, learning_rate=None, L1_reg=None, L2_reg=None, n_epochs=None):
learning_rate=learning_rate if not learning_rate is None else 0.01
L1_reg=L1_reg if not L1_reg is None else 0.00
L2_reg=L2_reg if not L2_reg is None else 0.0001
n_epochs=n_epochs if not n_epochs is None else 100
self.rng=rng
self.n_in=n_in
self.n_hidden=n_hidden
self.n_out=n_out
self.learning_rate=learning_rate
self.L1_reg=L1_reg
self.L2_reg=L2_reg
self.n_epochs=n_epochs
def buildModel(self, input):
self.input=input
self.hiddenLayer = HiddenLayer(
rng=self.rng,
input=self.input,
n_in=self.n_in,
n_out=self.n_hidden,
activation=T.tanh
)
self.logRegressionLayer = LogisticRegression(
input=self.hiddenLayer.output,
n_in=self.n_hidden,
n_out=self.n_out
)
self.L1 = (
abs(self.hiddenLayer.W).sum()
+ abs(self.logRegressionLayer.W).sum()
)
self.L2_sqr = (
(self.hiddenLayer.W ** 2).sum()
+ (self.logRegressionLayer.W ** 2).sum()
)
self.params = self.logRegressionLayer.params +self.hiddenLayer.params
self.negative_log_likelihood = (
self.logRegressionLayer.negative_log_likelihood
)
self.errors = self.logRegressionLayer.errors
def train(self, y):#, input, y):
start_time = timeit.default_timer()
epoch = -1
done_looping = False
self.updates_list=[]
while (epoch < self.n_epochs) and (not done_looping):
epoch = epoch + 1
self.cost=(self.negative_log_likelihood(y) + self.L1_reg*self.L1 + self.L2_reg*self.L2_sqr)
self.gparams = [T.grad(self.cost, param) for param in self.params]
self.updates = [(param, param - self.learning_rate * gparam) for param, gparam in zip(self.params, self.gparams)]
batch_avg_cost = self.cost
self.updates_list.append(copy.deepcopy(self.updates))
end_time = timeit.default_timer()
return batch_avg_cost
类MLP(对象):
定义初始(self,rng,n_in,n_hidden,n_out,learning_rate=None,L1_reg=None,L2_reg=None,n_epochs=None):
学习率=如果不学习,则学习率为0.01
L1_reg=L1_reg,如果不是L1_reg,则为None other 0.00
L2_reg=L2_reg,如果不是L2_reg,则为None other 0.0001
n_epochs=n_epochs如果不是n_epochs则不是其他100
self.rng=rng
self.n_in=n_in
self.n\u hidden=n\u hidden
self.n_out=n_out
自学习率=学习率
self.L1\u reg=L1\u reg
self.L2_reg=L2_reg
self.n_epochs=n_epochs
def构建模型(自身,输入):
self.input=输入
self.hiddenLayer=hiddenLayer(
rng=self.rng,
输入=自我输入,
n_in=self.n_in,
n_out=self.n_hidden,
激活=T.tanh
)
self.logRegressionLayer=逻辑回归(
输入=self.hiddenLayer.output,
n_in=self.n_hidden,
n_out=self.n_out
)
self.L1=(
abs(self.hiddenLayer.W).sum()
+abs(self.logRegressionLayer.W).sum()
)
self.L2_sqr=(
(self.hiddenLayer.W**2.sum()
+(self.logRegressionLayer.W**2.sum()
)
self.params=self.logRegressionLayer.params+self.hiddenLayer.params
self.negative\u log\u似然=(
self.logRegressionLayer.negative\u log\u似然
)
self.errors=self.logRegressionLayer.errors
def序列(自身,y):#,输入,y):
start\u time=timeit.default\u timer()
历元=-1
完成循环=错误
self.updates_list=[]
while(epoch
现在我不明白为什么没有更新任何参数。你们当中有人知道原因吗??
谢谢