Tensorflow 基于vgg16的低层次高层次特征融合特征提取
我试图实现一个反褶积特征金字塔网络 dfpn的目标是将低级特征图与高级特征图集成。 请阅读“反褶积特征金字塔网络”一节下的文章。dfpn的输出将在分类网络和回归网络中的卷积层之后使用。但我得到的损失值不好,特别是对于bbox回归 我的代码是Tensorflow 基于vgg16的低层次高层次特征融合特征提取,tensorflow,deep-learning,faster-rcnn,Tensorflow,Deep Learning,Faster Rcnn,我试图实现一个反褶积特征金字塔网络 dfpn的目标是将低级特征图与高级特征图集成。 请阅读“反褶积特征金字塔网络”一节下的文章。dfpn的输出将在分类网络和回归网络中的卷积层之后使用。但我得到的损失值不好,特别是对于bbox回归 我的代码是 def generate_dfpn(self): self.training_parameters1=[] #first step self.shape1=tf.shape(self.base_net.conv
def generate_dfpn(self):
self.training_parameters1=[]
#first step
self.shape1=tf.shape(self.base_net.conv5_3)
self.shape2=tf.shape(self.base_net.conv3_3)
self.deconv1=tf.image.resize(images=self.base_net.conv5_3,size=tf.shape(self.base_net.conv3_3)[1:3])
self.shape3=tf.shape(self.deconv1)
#self.conv1=tf.layers.conv2d(inputs=self.deconv1, filters=16, kernel_size=(3, 3), padding='same', activation=tf.nn.relu)
kernel1 = tf.Variable(tf.truncated_normal([3,3,512,256], dtype=tf.float32,
stddev=1e-2), name='weights')
dimensions=[3,3,512,256]
self.conv1= tf.nn.conv2d(self.deconv1,filters=kernel1,strides=[1,1,1,1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape=dimensions[-1:],dtype=tf.float32), trainable=True, name='biases')
self.output1 = tf.nn.bias_add(self.conv1,biases)
self.output1=tf.cond(self.is_training,lambda:tf.keras.layers.BatchNormalization(scale=False,trainable=True)(self.conv1),lambda:tf.keras.layers.BatchNormalization(scale=False,trainable=False)(self.conv1))
self.output1=tf.nn.relu(self.output1)
self.shape4=tf.shape(self.output1)
self.training_parameters1 += [kernel1,biases]
#second step
self.output2=tf.cond(self.is_training,lambda:tf.keras.layers.BatchNormalization(trainable=True)(self.base_net.conv3_3),lambda:tf.keras.layers.BatchNormalization(trainable=False)(self.base_net.conv3_3))
self.shape5=tf.shape(self.output2)
self.output2=tf.keras.layers.Concatenate()([self.output1,self.output2])
self.deconv2=tf.image.resize_nearest_neighbor(images=self.output2,size=tf.shape(self.base_net.conv2_2)[1:3])
kernel2 = tf.Variable(tf.truncated_normal([3,3,256,128], dtype=tf.float32,
stddev=1e-2), name='weights')
dimensions2=[3,3,256,128]
self.conv2= tf.nn.conv2d(self.deconv2,filters=kernel2,strides=[1,1,1,1], padding = 'SAME')
biases2 = tf.Variable(tf.constant(0.0, shape=dimensions2[-1:],dtype=tf.float32), trainable=True, name='biases')
self.output3=tf.cond(self.is_training,lambda:tf.keras.layers.BatchNormalization(scale=False,trainable=True)(self.conv2),lambda:tf.keras.layers.BatchNormalization(scale=False,trainable=False)(self.conv2))
self.output3=tf.nn.relu(self.output3)
self.training_parameters1 += [kernel2,biases2]
#self.shape2=tf.shape(self.output3)
#add weights of conv layers to the var_list of the minimizer
#self.base_net.training_parameters += [kernel1,kernel2]
#third step
self.output4=tf.cond(self.is_training,lambda:tf.keras.layers.BatchNormalization(trainable=True)(self.base_net.conv2_2),lambda:tf.keras.layers.BatchNormalization(trainable=False)(self.base_net.conv2_2))
self.output4=tf.keras.layers.Concatenate()([self.output3,self.output4])
self.shape7=tf.shape(self.output4)
#fourth step (pooling)
self.max_pool = tf.nn.max_pool(self.output4, ksize=[1,2,2,1], strides=[1,4,4,1], padding ='SAME', name='max_pool')
self.avg_pool = tf.nn.avg_pool(self.output4, ksize=[1,2,2,1], strides=[1,4,4,1], padding ='SAME', name='avg_pool')
self.dfpn_layer=tf.keras.layers.Concatenate()([self.max_pool,self.avg_pool])
self.shape_dfpn_layer=tf.shape(self.dfpn_layer)[1:3]
self.base_net.output_map=self.dfpn_layer
self.base_net.shape_output_map=self.shape_dfpn_layer
#self.base_net.shape_output_map1=tf.shape(self.max_pool)
self.base_net.shape_output_map1=tf.shape(self.dfpn_layer)
对于vgg16代码:
# conv1_1
self.conv1_1, param = self.conv_layer(images,name_of_scope="conv1_1",dimensions=[3,3,3,64])
self.parameters += param
# conv1_2
self.conv1_2, param = self.conv_layer(self.conv1_1, name_of_scope="conv1_2", dimensions=[3,3,64,64])
self.parameters += param
# pool1
self.pool1 = tf.nn.max_pool(self.conv1_2, ksize=[1,2,2,1], strides=[1,2,2,1],
padding = 'SAME', name='pool1')
# conv2_1
self.conv2_1, param = self.conv_layer(self.pool1, name_of_scope="conv2_1", dimensions=[3,3,64,128])
self.parameters += param
# conv2_2
self.conv2_2, param = self.conv_layer(self.conv2_1, name_of_scope="conv2_2", dimensions=[3,3,128,128])
self.parameters += param
# pool2
self.pool2 = tf.nn.max_pool(self.conv2_2, ksize=[1,2,2,1],strides=[1,2,2,1], padding ='SAME', name='pool2')
# conv3_1
self.conv3_1, param = self.conv_layer(self.pool2, name_of_scope="conv3_1", dimensions=[3,3,128,256])
self.parameters += param
# conv3_2
self.conv3_2, param = self.conv_layer(self.conv3_1, name_of_scope="conv3_2", dimensions=[3,3,256,256])
self.parameters += param
# conv3_3
self.conv3_3, param = self.conv_layer(self.conv3_2, name_of_scope="conv3_3", dimensions=[3,3,256,256])
self.parameters += param
# pool3
self.pool3 = tf.nn.max_pool(self.conv3_3, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name='pool3')
# conv4_1
self.conv4_1, param = self.conv_layer(self.pool3, name_of_scope="conv4_1", dimensions=[3,3,256,512])
self.parameters += param
# conv4_2
self.conv4_2, param = self.conv_layer(self.conv4_1, name_of_scope="conv4_2", dimensions=[3,3,512,512])
self.parameters += param
# conv4_3
self.conv4_3, param = self.conv_layer(self.conv4_2, name_of_scope="conv4_3", dimensions=[3,3,512,512])
self.parameters += param
# pool4
self.pool4 = tf.nn.max_pool(self.conv4_3, ksize= [1,2,2,1], strides=[1,2,2,1], padding='SAME', name='pool4')
# conv5_1
self.conv5_1, param = self.conv_layer(self.pool4, name_of_scope="conv5_1", dimensions=[3,3,512,512])
self.parameters += param
# conv5_2
self.conv5_2, param = self.conv_layer(self.conv5_1, name_of_scope="conv5_2", dimensions=[3,3,512,512])
self.parameters += param
# conv5_3
self.conv5_3, param = self.conv_layer(self.conv5_2, name_of_scope="conv5_3", dimensions=[3,3,512,512])
self.parameters += param
# pool5
self.pool5 = tf.nn.max_pool(self.conv5_3, ksize=[1,2,2,1], strides=[1,2,2,1], padding ='SAME', name='pool5')
## shape
self.shape_conv5_3 = tf.shape(self.conv5_3)[1:3]
我使用vgg16.0的conv5_3作为输出特征映射来创建类和回归网络。现在我使用dfpn的输出。也在它们指示的伪代码中
[步骤一]VGG-16网络由预先训练的模型初始化
对于i=1到maxiter Do
[步骤二]将调整大小的图像发送到VGG-16网络进行前向传播计算
[步骤三]DFPN用于集成底层特征映射和顶层特征映射
[步骤四]根据输出特征生成新的锚盒
[步骤五]基于顶层特征计算分类和回归损失函数
[步骤六]执行ERPN的反向传播
结束
那么,dfpn的输出是否被视为顶级功能,或者这是我的问题,我需要改变某些东西
如果你能帮助我,我将非常感激。
提前感谢我使用了tf.image.resize,然后使用conv层作为反褶积层,以增加更高feautre贴图的尺寸,而不是tf.nn.conv2d_转置,因为我使用的是TF1,对于输出形状参数,我需要形状的值,而不是张量,所以我不得不运行一个有点复杂的会话