Python 为什么Chainer Batchnormalization不能很好地与mnist_nn配合使用
Chainer batchnormalization不能很好地处理我的代码,尽管tensorflow的批处理规范化可以工作。我使用数据集mnist,代码如下所示 使用chainerversion=6.1.0,在不使用batchnormalization的情况下,100个历元后的验证精度在0.97和0.98之间,而使用batchnormalization的情况下,100个历元后的验证精度小于0.80 当我对tensorflowversion=1.14.0使用相同的方法时,无论是否使用batchnormalization,双向验证精度都在0.98左右 这是我代码的一部分。 历元数为100,其批量大小为1000。我使用Adam作为优化器,学习率为0.01 数据集、列车数据、验证数据 列车图像、列车标签、测试图像、测试标签=mnist.load\u数据 列车图像=列车图像。整形60000,28*28 测试图像=测试图像。重塑10000,28*28 x_train=train_图像。A键入'float32'/255 y\u列车=列车标签。A键入'int32' x_val=test_images.astype'float32'/255 y_val=测试标签。aType'int32' 模型与条件链表 定义模型 类别MyModelChain: 定义初始自我,n_输入=784,n_隐藏=100,n_输出=10: 初始值设定项=chainer.initializers.HeNormal 超级__ 使用self.init_作用域: self.l1=L.Linearn\u in,n\u hidden,initialW=initializer self.l2=L.Linearn\u hidden,n\u hidden,initialW=initializer self.l3=L.Linearn\u hidden,n\u out,initialW=initializer self.bn=L.BatchNormalizationn_隐藏,衰减=0.99,eps=0.001 def forwardself,x: h=F.reluself.bnself.l1x h=F.reluself.bnself.l2h 返回self.l3h model=MyModel 优化器=优化器 optimizer.setupmodel n_=100 n_batchsize=1000 张量流模型与条件Python 为什么Chainer Batchnormalization不能很好地与mnist_nn配合使用,python,tensorflow,batch-normalization,chainer,Python,Tensorflow,Batch Normalization,Chainer,Chainer batchnormalization不能很好地处理我的代码,尽管tensorflow的批处理规范化可以工作。我使用数据集mnist,代码如下所示 使用chainerversion=6.1.0,在不使用batchnormalization的情况下,100个历元后的验证精度在0.97和0.98之间,而使用batchnormalization的情况下,100个历元后的验证精度小于0.80 当我对tensorflowversion=1.14.0使用相同的方法时,无论是否使用batchno
python
import tensorflow as tf
from keras.datasets import mnist
from functools import partial
import numpy as np
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images=train_images.reshape(60000, 28*28)
test_images = test_images.reshape(10000, 28*28)
X_train = train_images.astype('float32')/255
y_train = train_labels.astype('int32')
X_valid = test_images.astype('float32')/255
y_valid = test_labels.astype('int32')
n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10
batch_norm_momentum = 0.9
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')
with tf.name_scope("dnn"):
he_init = tf.variance_scaling_initializer()
my_batch_norm_layer = partial(tf.layers.batch_normalization,
training=training,
momentum=batch_norm_momentum)
my_dense_layer = partial(tf.layers.dense)
hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
bn1 = tf.nn.relu(my_batch_norm_layer(hidden1))
hidden2 = my_dense_layer(bn1, n_hidden2, name="hidden2")
bn2 = tf.nn.relu(my_batch_norm_layer(hidden2))
logits_before_bn = my_dense_layer(bn2, n_outputs, name="outputs")
logits = my_batch_norm_layer(logits_before_bn)
with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
learning_rate = 0.01
with tf.name_scope("train"):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 100
batch_size = 1000
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
print("epoch train/loss val/loss train/acc val/acc")
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
loss_list = []
accuracy_list = []
for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
sess.run([training_op, extra_update_ops],
feed_dict={training: True, X: X_batch, y: y_batch})
loss_batch = loss.eval(feed_dict={X: X_batch, y: y_batch})
accuracy_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
loss_list.append(loss_batch)
accuracy_list.append(accuracy_batch)
loss_val = loss.eval(feed_dict={X: X_valid, y: y_valid})
accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
print('{0:>4d} {1:>10.4f} {2:>10.4f} {3:>10.4f} {4:>10.4f}'
.format(epoch,np.mean(loss_list),loss_val,np.mean(accuracy_list),accuracy_val))
n_输入=28*28
n_hidden1=100
n_hidden2=100
n_输出=10
批次标准动量=0.9
X=tf.placeholdertf.float32,shape=None,n_输入,name=X
y=tf.Placeholder tf.int32,shape=None,name=y
training=tf.placeholder_,带有_defaultFalse,shape=,name='training'
使用tf.name_scopednn:
he_init=tf.variance_scaling_初始值设定项
my_batch_norm_layer=partialtf.layers.batch_规范化,
培训=培训,
动量=批次\标准\动量
my_dense_layer=partialtf.layers.dense,
kernel\u initializer=he\u init
hidden1=我的密集图层,n\u hidden1,name=hidden1
bn1=tf.nn.relumy\u批次\u规范\u分层设计1
hidden2=我的密集图层B1,n\u hidden2,name=hidden2
bn2=tf.nn.relumy\u批次\u规范\u分层定义2
logits\u before\u bn=my\u densite\u layerbn2,n\u输出,name=outputs
logits=my\u batch\u norm\u layerlogits\u before\u bn
使用tf.name\u scopeloss:
xentropy=tf.nn.sparse\u softmax\u cross\u entropy\u带有logitslabels=y,logits=logits
损耗=tf.reduce\u means熵,name=损耗
使用tf.name\u范围列车:
优化器=tf.train.ADAMOPTIMIZER学习率=0.001
training_op=optimizer.minimizeloss
使用tf.name\u scopeval:
正确=tf.nn.in\u top\u klogits,y,1
精度=tf.reduce_meanstf.castcorrect,tf.float32
我在chainer中使用的所有代码
将numpy作为np导入
导入链器
从chainer导入cuda、函数、梯度检查、报告、培训、UTIL、变量
从chainer导入数据集、迭代器、优化器、序列化器
从链接器导入链接、链、链列表
导入chainer.F函数
将chainer.links导入为L
从chainer.training导入扩展
从keras.dataset导入mnist
作为cp导入cupy
列车图像、列车标签、测试图像、测试标签=mnist.load\u数据
列车图像=列车图像。整形60000,28*28
测试图像=测试图像。重塑10000,28*28
x_train=train_图像。A键入'float32'/255
y\u列车=列车标签。A键入'int32'
x_val=test_images.astype'float32'/255
y_val=测试标签。aType'int32'
定义模型
类别MyModelChain:
定义初始自我,n_输入=784,n_隐藏=100,n_输出=10:
初始值设定项=chainer.initializers.HeNormal
超级__
使用self.init_作用域:
self.l1=L.Linearn\u in,n\u hidden,initialW=initializer
self.l2=L.Linearn\u hidden,n\u hidden,initialW=initializer
self.l3=L.Linearn\u hidden,n\u out,initialW=initializer
self.bn=L.BatchNormalizationn_隐藏,衰减=0.9,eps=0.001
def forwardself,x:
h=F.reluself.bnself.l1x
h=F.reluself.bnself.l2h
返回self.l3h
定义优化器
model=MyModel
优化器=优化器。Adamalpha=0.01
optimizer.setupmodel
学习网络
n_=100
n_batchsize=1000
迭代=0
gpu_id=0
cuda.get\u设备gpu\u id.use
将网络发送到gpu内存
model.to_gpugpu_id
打印EPOCH列车/丢失val/丢失列车/附件val/acc
对于Range_历元中的历元:
随机排序数据集
顺序=np.random.permutationrangelenx\u序列
损失清单=[]
准确度列表=[]
对于0范围内的i,lenorder,n_batchsiz
e:
索引=订单[i:i+n\u批量大小]
x_列\u批次=x_列[索引:]
y_列_批次=y_列[索引]
x_系列批次=cp.asarrayx_系列批次
y_列_批次=cp.asarrayy_列_批次
输出列车=X型列车批次
丢失序列批次=F.softmax交叉输入输出序列,y序列批次
精度\u列\u批次=F.精度输出\u列,y\u列\u批次
丢失\u list.appendcuda.to\u cpuloss\u train\u batch.array
准确度\u list.appendcuda.to\u cpu准确度\u train\u batch.array
模型.cleargrads
丢失\u列\u批次向后
optimizer.update
迭代次数+=1
损失列车=np.meanloss\u列表
精度\u序列=np.平均精度\u列表
一个历元后,使用验证数据进行评估
x_val=cp.asarrayx_val
y_val=cp.asarrayy_val
使用chainer.using_config'train',False,chainer.using_config'enable_backprop',False:
输出值=模型x值
损耗值=F.softmax\u交叉输入输出值,y值
损耗值=cuda.to\u cpuloss\u val.array
精度值=F.accuracyoutput\u val,y\u val
精度值=cuda.to\U CPU精度值数组
打印“{0:>4d}{1:>10.4f}{2:>10.4f}{3:>10.4f}{4:>10.4f}”。格式纪元,损失序列,损失值,准确度序列,准确度值
我在tensorflow中使用的所有代码
python
import tensorflow as tf
from keras.datasets import mnist
from functools import partial
import numpy as np
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
yield X_batch, y_batch
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images=train_images.reshape(60000, 28*28)
test_images = test_images.reshape(10000, 28*28)
X_train = train_images.astype('float32')/255
y_train = train_labels.astype('int32')
X_valid = test_images.astype('float32')/255
y_valid = test_labels.astype('int32')
n_inputs = 28 * 28
n_hidden1 = 100
n_hidden2 = 100
n_outputs = 10
batch_norm_momentum = 0.9
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")
training = tf.placeholder_with_default(False, shape=(), name='training')
with tf.name_scope("dnn"):
he_init = tf.variance_scaling_initializer()
my_batch_norm_layer = partial(tf.layers.batch_normalization,
training=training,
momentum=batch_norm_momentum)
my_dense_layer = partial(tf.layers.dense)
hidden1 = my_dense_layer(X, n_hidden1, name="hidden1")
bn1 = tf.nn.relu(my_batch_norm_layer(hidden1))
hidden2 = my_dense_layer(bn1, n_hidden2, name="hidden2")
bn2 = tf.nn.relu(my_batch_norm_layer(hidden2))
logits_before_bn = my_dense_layer(bn2, n_outputs, name="outputs")
logits = my_batch_norm_layer(logits_before_bn)
with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")
learning_rate = 0.01
with tf.name_scope("train"):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 100
batch_size = 1000
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
print("epoch train/loss val/loss train/acc val/acc")
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
loss_list = []
accuracy_list = []
for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
sess.run([training_op, extra_update_ops],
feed_dict={training: True, X: X_batch, y: y_batch})
loss_batch = loss.eval(feed_dict={X: X_batch, y: y_batch})
accuracy_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
loss_list.append(loss_batch)
accuracy_list.append(accuracy_batch)
loss_val = loss.eval(feed_dict={X: X_valid, y: y_valid})
accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
print('{0:>4d} {1:>10.4f} {2:>10.4f} {3:>10.4f} {4:>10.4f}'
.format(epoch,np.mean(loss_list),loss_val,np.mean(accuracy_list),accuracy_val))
我预计chainer的批量标准化率将在98%左右,但不到80%。
我是否以错误的方式使用chainer的batchnormalization,或者chainer和tensorflow之间的batchnormalization结构有很大不同???为了在层之间使用不同的批次统计信息,模型定义必须与以下代码类似,在我的环境中经历了100个时代后,其验证准确率达到98%
class MyModel(Chain):
def __init__(self,n_in=784,n_hidden=100,n_out=10):
initializer = chainer.initializers.HeNormal()
super().__init__()
with self.init_scope():
self.l1=L.Linear(n_in, n_hidden, initialW=initializer)
self.l2=L.Linear(n_hidden, n_hidden, initialW=initializer)
self.l3=L.Linear(n_hidden, n_out, initialW=initializer)
self.bn1=L.BatchNormalization(n_hidden, decay=0.9, eps=0.001)
self.bn2=L.BatchNormalization(n_hidden, decay=0.9, eps=0.001)
def forward(self,x):
h = F.relu(self.bn1(self.l1(x)))
h = F.relu(self.bn2(self.l2(h)))
return self.l3(h)