Tensorflow 基于vgg16的低层次高层次特征融合特征提取

Tensorflow 基于vgg16的低层次高层次特征融合特征提取,tensorflow,deep-learning,faster-rcnn,Tensorflow,Deep Learning,Faster Rcnn,我试图实现一个反褶积特征金字塔网络 dfpn的目标是将低级特征图与高级特征图集成。 请阅读“反褶积特征金字塔网络”一节下的文章。dfpn的输出将在分类网络和回归网络中的卷积层之后使用。但我得到的损失值不好,特别是对于bbox回归 我的代码是 def generate_dfpn(self): self.training_parameters1=[] #first step self.shape1=tf.shape(self.base_net.conv

我试图实现一个反褶积特征金字塔网络 dfpn的目标是将低级特征图与高级特征图集成。 请阅读“反褶积特征金字塔网络”一节下的文章。dfpn的输出将在分类网络和回归网络中的卷积层之后使用。但我得到的损失值不好,特别是对于bbox回归

我的代码是

def generate_dfpn(self):
        self.training_parameters1=[]
        #first step
        self.shape1=tf.shape(self.base_net.conv5_3)
        self.shape2=tf.shape(self.base_net.conv3_3)
        self.deconv1=tf.image.resize(images=self.base_net.conv5_3,size=tf.shape(self.base_net.conv3_3)[1:3])
        self.shape3=tf.shape(self.deconv1)
        #self.conv1=tf.layers.conv2d(inputs=self.deconv1, filters=16, kernel_size=(3, 3), padding='same', activation=tf.nn.relu)
        kernel1 = tf.Variable(tf.truncated_normal([3,3,512,256], dtype=tf.float32,
                                                    stddev=1e-2), name='weights')
        dimensions=[3,3,512,256]
        self.conv1= tf.nn.conv2d(self.deconv1,filters=kernel1,strides=[1,1,1,1], padding = 'SAME')
        biases = tf.Variable(tf.constant(0.0, shape=dimensions[-1:],dtype=tf.float32), trainable=True, name='biases')
        self.output1 = tf.nn.bias_add(self.conv1,biases)
        self.output1=tf.cond(self.is_training,lambda:tf.keras.layers.BatchNormalization(scale=False,trainable=True)(self.conv1),lambda:tf.keras.layers.BatchNormalization(scale=False,trainable=False)(self.conv1))
        self.output1=tf.nn.relu(self.output1)
        self.shape4=tf.shape(self.output1)
        self.training_parameters1 += [kernel1,biases]
         
        #second step
        
        self.output2=tf.cond(self.is_training,lambda:tf.keras.layers.BatchNormalization(trainable=True)(self.base_net.conv3_3),lambda:tf.keras.layers.BatchNormalization(trainable=False)(self.base_net.conv3_3))
        self.shape5=tf.shape(self.output2)
        self.output2=tf.keras.layers.Concatenate()([self.output1,self.output2])

        self.deconv2=tf.image.resize_nearest_neighbor(images=self.output2,size=tf.shape(self.base_net.conv2_2)[1:3])
        kernel2 = tf.Variable(tf.truncated_normal([3,3,256,128], dtype=tf.float32,
                                                    stddev=1e-2), name='weights')
        dimensions2=[3,3,256,128]                                          
        self.conv2= tf.nn.conv2d(self.deconv2,filters=kernel2,strides=[1,1,1,1], padding = 'SAME')
        biases2 = tf.Variable(tf.constant(0.0, shape=dimensions2[-1:],dtype=tf.float32), trainable=True, name='biases')
        self.output3=tf.cond(self.is_training,lambda:tf.keras.layers.BatchNormalization(scale=False,trainable=True)(self.conv2),lambda:tf.keras.layers.BatchNormalization(scale=False,trainable=False)(self.conv2))
        self.output3=tf.nn.relu(self.output3)
        self.training_parameters1 += [kernel2,biases2]
        #self.shape2=tf.shape(self.output3)
        #add weights of conv layers to the var_list of the minimizer
        #self.base_net.training_parameters += [kernel1,kernel2]

        #third step
        self.output4=tf.cond(self.is_training,lambda:tf.keras.layers.BatchNormalization(trainable=True)(self.base_net.conv2_2),lambda:tf.keras.layers.BatchNormalization(trainable=False)(self.base_net.conv2_2))
        self.output4=tf.keras.layers.Concatenate()([self.output3,self.output4])
        self.shape7=tf.shape(self.output4)

         #fourth step (pooling)
        self.max_pool = tf.nn.max_pool(self.output4, ksize=[1,2,2,1], strides=[1,4,4,1], padding ='SAME', name='max_pool')
        self.avg_pool = tf.nn.avg_pool(self.output4, ksize=[1,2,2,1], strides=[1,4,4,1], padding ='SAME', name='avg_pool')
        self.dfpn_layer=tf.keras.layers.Concatenate()([self.max_pool,self.avg_pool])
        self.shape_dfpn_layer=tf.shape(self.dfpn_layer)[1:3]
        self.base_net.output_map=self.dfpn_layer
        self.base_net.shape_output_map=self.shape_dfpn_layer
        #self.base_net.shape_output_map1=tf.shape(self.max_pool)
        self.base_net.shape_output_map1=tf.shape(self.dfpn_layer)


对于vgg16代码:

# conv1_1 
        self.conv1_1, param = self.conv_layer(images,name_of_scope="conv1_1",dimensions=[3,3,3,64])
        self.parameters += param
        
            
        # conv1_2
        self.conv1_2, param = self.conv_layer(self.conv1_1, name_of_scope="conv1_2", dimensions=[3,3,64,64])
        self.parameters += param
            
        # pool1
        self.pool1 = tf.nn.max_pool(self.conv1_2, ksize=[1,2,2,1], strides=[1,2,2,1],
                                   padding = 'SAME', name='pool1')
        
        # conv2_1
        self.conv2_1, param = self.conv_layer(self.pool1, name_of_scope="conv2_1", dimensions=[3,3,64,128])
        self.parameters += param           
        
        # conv2_2 
        self.conv2_2, param = self.conv_layer(self.conv2_1, name_of_scope="conv2_2", dimensions=[3,3,128,128])
        self.parameters += param    
            
        # pool2
        self.pool2 = tf.nn.max_pool(self.conv2_2, ksize=[1,2,2,1],strides=[1,2,2,1], padding ='SAME', name='pool2')
        
        # conv3_1
        self.conv3_1, param = self.conv_layer(self.pool2, name_of_scope="conv3_1", dimensions=[3,3,128,256])
        self.parameters += param 
                
        # conv3_2
        self.conv3_2, param = self.conv_layer(self.conv3_1, name_of_scope="conv3_2", dimensions=[3,3,256,256])
        self.parameters += param 
                
        # conv3_3
        self.conv3_3, param = self.conv_layer(self.conv3_2, name_of_scope="conv3_3", dimensions=[3,3,256,256])
        self.parameters += param 
                    
        # pool3
        self.pool3 = tf.nn.max_pool(self.conv3_3, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME', name='pool3')
        
        # conv4_1
        self.conv4_1, param = self.conv_layer(self.pool3, name_of_scope="conv4_1", dimensions=[3,3,256,512])
        self.parameters += param 
                    
        # conv4_2
        self.conv4_2, param = self.conv_layer(self.conv4_1, name_of_scope="conv4_2", dimensions=[3,3,512,512])
        self.parameters += param 
            
        # conv4_3
        self.conv4_3, param = self.conv_layer(self.conv4_2, name_of_scope="conv4_3", dimensions=[3,3,512,512])
        self.parameters += param 

        # pool4
        self.pool4 = tf.nn.max_pool(self.conv4_3, ksize= [1,2,2,1], strides=[1,2,2,1], padding='SAME', name='pool4')
        
        # conv5_1
        self.conv5_1, param = self.conv_layer(self.pool4, name_of_scope="conv5_1", dimensions=[3,3,512,512])
        self.parameters += param 
                   
        # conv5_2 
        self.conv5_2, param = self.conv_layer(self.conv5_1, name_of_scope="conv5_2", dimensions=[3,3,512,512])
        self.parameters += param 
        
        # conv5_3
        self.conv5_3, param = self.conv_layer(self.conv5_2, name_of_scope="conv5_3", dimensions=[3,3,512,512])
        self.parameters += param 
            
        # pool5
        self.pool5 = tf.nn.max_pool(self.conv5_3, ksize=[1,2,2,1], strides=[1,2,2,1], padding ='SAME', name='pool5')

        ## shape 
        self.shape_conv5_3 = tf.shape(self.conv5_3)[1:3]
我使用vgg16.0的conv5_3作为输出特征映射来创建类和回归网络。现在我使用dfpn的输出。也在它们指示的伪代码中 [步骤一]VGG-16网络由预先训练的模型初始化

对于i=1到maxiter Do

[步骤二]将调整大小的图像发送到VGG-16网络进行前向传播计算

[步骤三]DFPN用于集成底层特征映射和顶层特征映射

[步骤四]根据输出特征生成新的锚盒

[步骤五]基于顶层特征计算分类和回归损失函数

[步骤六]执行ERPN的反向传播

结束

那么,dfpn的输出是否被视为顶级功能,或者这是我的问题,我需要改变某些东西

如果你能帮助我,我将非常感激。
提前感谢

我使用了tf.image.resize,然后使用conv层作为反褶积层,以增加更高feautre贴图的尺寸,而不是tf.nn.conv2d_转置,因为我使用的是TF1,对于输出形状参数,我需要形状的值,而不是张量,所以我不得不运行一个有点复杂的会话