Python TensorFlow可分离_conv2d比正常conv2d慢
我曾试图使用Python TensorFlow可分离_conv2d比正常conv2d慢,python,tensorflow,deep-learning,gpu,Python,Tensorflow,Deep Learning,Gpu,我曾试图使用separable_conv2d构建一个小型模型,用于对Cifar10图像进行分类,根据论文,它应该比传统的ConvNet更快,但事实证明,当过滤器数量较少时,它实际上速度较慢 我测试了一个只有两个conv2d的模型,另一个模型有一个conv2d然后是一个separable_conv2d,每个层有32个过滤器,下面是我在一个GTX1080ti上的结果: training time for normal_conv after 2000 step: 8.18395892999979 se
separable_conv2d
构建一个小型模型,用于对Cifar10图像进行分类,根据论文,它应该比传统的ConvNet更快,但事实证明,当过滤器数量较少时,它实际上速度较慢
我测试了一个只有两个conv2d
的模型,另一个模型有一个conv2d
然后是一个separable_conv2d
,每个层有32个过滤器,下面是我在一个GTX1080ti上的结果:
training time for normal_conv after 2000 step: 8.18395892999979 sec
time for normal_conv after one forward step: 0.003980965999289765 sec
training time for sep_conv after 2000 step: 9.158266903999902 sec
time for sep_conv after one forward step: 0.0036441169995669043 sec
但当我将过滤器数量增加到128时,结果是:
training time for normal_conv after 2000 step: 14.281644162001612 sec
time for normal_conv after one forward step: 0.007151683001211495 sec
training time for sep_conv after 2000 step: 13.0701530740007 sec
time for sep_conv after one forward step: 0.005937158999586245 sec
我正在使用TF1.3、cuda8和cudnn6,结果正常吗
请帮我澄清这是正常的还是我做错了什么
这是我的测试代码,只需复制和粘贴,它应该正在运行:
import tensorflow as tf
import timeit
import numpy as np
from tensorflow.contrib.keras.python.keras.datasets.cifar10 import load_data
(x_train, y_train), (x_val, y_val) = load_data()
learning_rate = 0.001
num_steps = 1000
n_classes = 10
batch_size = 32
def reformat(labels):
# Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
labels = (np.arange(n_classes) == labels[:,None]).astype(np.float32)
return labels.reshape(labels.shape[0],10)
train_labels = reformat(y_train)
tf.reset_default_graph()
x = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.float32, [None, 10])
weights1 = {}
weights2 = {}
dtype = tf.float32
with tf.name_scope('INIT_OP'):
conv_initializer = tf.contrib.layers.xavier_initializer_conv2d(dtype=dtype)
fc_initializer = tf.contrib.layers.xavier_initializer(dtype=dtype)
k = 3
kernel = 16
# Define weights for normal ConvNet
with tf.name_scope('VARIABLES_1'):
weights1['conv1'] = tf.get_variable('conv1', [k, k, 3, kernel], initializer=conv_initializer, dtype=dtype, trainable=True)
weights1['b1'] = tf.get_variable('b1', initializer=tf.zeros([kernel]))
weights1['conv2'] = tf.get_variable('conv2', [k, k, kernel, kernel], initializer=conv_initializer, dtype=dtype, trainable=True)
weights1['b2'] = tf.get_variable('b2', initializer=tf.zeros([kernel]))
weights1['wd1'] = tf.get_variable('wd1', [8*8*kernel, 512], initializer=fc_initializer, dtype=dtype, trainable=True)
weights1['bd1'] = tf.get_variable('bd1', initializer=tf.zeros([512]) )
weights1['wd2'] = tf.get_variable('wd2', [512, 10], initializer=fc_initializer, dtype=dtype, trainable=True)
weights1['bd2'] = tf.get_variable('bd2', initializer=tf.zeros([10]) )
#Define weights for separable ConvNet
with tf.name_scope('VARIABLES_sep'):
weights2['conv1'] = tf.get_variable('2_conv1', [k, k, 3, kernel], initializer=conv_initializer, dtype=dtype, trainable=True)
weights2['conv_dw2'] = tf.get_variable('conv_dw2', [k, k, kernel, 1], initializer=conv_initializer, dtype=dtype, trainable=True)
weights2['conv_pw2'] = tf.get_variable('conv_pw2', [1, 1, kernel, kernel], initializer=conv_initializer, dtype=dtype, trainable=True)
weights2['b1'] = tf.get_variable('2_b1', initializer=tf.zeros([kernel]))
weights2['b2'] = tf.get_variable('2_b2', initializer=tf.zeros([kernel]))
weights2['wd1'] = tf.get_variable('2_wd1', [8*8*kernel, 512], initializer=fc_initializer, dtype=dtype, trainable=True)
weights2['bd1'] = tf.get_variable('2_bd1', initializer=tf.zeros([512]) )
weights2['wd2'] = tf.get_variable('2_wd2', [512, 10], initializer=fc_initializer, dtype=dtype, trainable=True)
weights2['bd2'] = tf.get_variable('2_bd2', initializer=tf.zeros([10]) )
def forward_conv_sep( inp, weights):
hidden = conv_block(inp, weights2['conv1'], weights2['b1'])
hidden = maxpool2d(hidden)
hidden = conv_block_dw(hidden, weights2['conv_dw2'], weights2['conv_pw2'], weights2['b2'])
hidden = maxpool2d(hidden)
hidden = tf.reshape( hidden, [-1, np.prod([int(dim) for dim in hidden.get_shape()[1:]])] )
fc1 = tf.matmul(hidden, weights2['wd1']) + weights2['bd1']
fc1 = tf.nn.relu(fc1)
return tf.matmul(fc1, weights2['wd2']) + weights2['bd2']
def forward_conv( inp, weights):
hidden = conv_block(inp, weights1['conv1'], weights1['b1'])
hidden = maxpool2d(hidden)
hidden = conv_block(hidden, weights1['conv2'], weights1['b2'])
hidden = maxpool2d(hidden)
hidden = tf.reshape( hidden, [-1, np.prod([int(dim) for dim in hidden.get_shape()[1:]])] )
fc1 = tf.matmul(hidden, weights1['wd1']) + weights1['bd1']
fc1 = tf.nn.relu(fc1)
return tf.matmul(fc1, weights1['wd2']) + weights1['bd2']
def conv_block_dw(inp, cweight_w, cweight_p, bweight):
no_stride = [1,1,1,1]
conv_output = tf.nn.separable_conv2d(inp, cweight_w, cweight_p, no_stride, 'SAME') + bweight
return tf.nn.relu(conv_output)
def conv_block(inp, cweight, bweight, activation=tf.nn.relu):
no_stride = [1,1,1,1]
conv_output = tf.nn.conv2d(inp, cweight, no_stride, 'SAME') + bweight
return tf.nn.relu(conv_output)
def maxpool2d(inp, k=2):
return tf.nn.max_pool(inp, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
#logits for normal ConvNet
with tf.name_scope("forward_conv"):
pred1 = forward_conv(x, weights1)
#Cost for normal ConvNet
with tf.name_scope("cost1"):
loss1 = tf.nn.softmax_cross_entropy_with_logits(logits=pred1, labels=y)
cost1 = tf.reduce_mean(loss1)
#training op for normal ConvNet
with tf.name_scope('train_op1'):
train_op1 = tf.train.RMSPropOptimizer(learning_rate, 0.9).minimize(cost1)
#logits for separable ConvNet
with tf.name_scope("forward_conv_sep"):
pred2 = forward_conv_sep(x, weights2)
#Cost for separable ConvNet
with tf.name_scope("cost2"):
loss2 = tf.nn.softmax_cross_entropy_with_logits(logits=pred2, labels=y)
cost2 = tf.reduce_mean(loss2)
# training op for separable ConvNet
with tf.name_scope('train_op2'):
train_op2 = tf.train.RMSPropOptimizer(learning_rate, 0.9).minimize(cost2)
with tf.name_scope('INIT'):
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
#train normal ConvNet for 2000 steps
start = timeit.default_timer()
for step in range(num_steps):
r = np.random.choice(y_train.shape[0], batch_size, replace=False)
batch_data = x_train[r]
batch_labels = train_labels[r]
feed_dict = {x : batch_data, y: batch_labels}
_ , l = sess.run([train_op1,cost1], feed_dict=feed_dict)
stop = timeit.default_timer()
print ('training time for normal_conv after '+str(num_steps)+' step:',stop - start)
start = timeit.default_timer()
feed_dict = {x : batch_data, y: batch_labels}
predictions1 = sess.run(pred1, feed_dict=feed_dict)
stop = timeit.default_timer()
print ('time for normal_conv after one forward step: ',stop - start)
# train separable ConvNet for 2000 steps
start = timeit.default_timer()
for step in range(num_steps):
r = np.random.choice(y_train.shape[0], batch_size, replace=False)
batch_data = x_train[r]
batch_labels = train_labels[r]
feed_dict = {x : batch_data, y: batch_labels}
_ , l = sess.run([train_op2,cost2], feed_dict=feed_dict)
stop = timeit.default_timer()
print ('training time for sep_conv after '+str(num_steps)+' step:',stop - start)
start = timeit.default_timer()
feed_dict = {x : batch_data, y: batch_labels}
predictions = sess.run(pred2, feed_dict=feed_dict)
stop = timeit.default_timer()
print ('time for sep_conv after one forward step: ',stop - start)