Deep learning caffe重塑/上采样完全连接层_Deep Learning_Reshape_Caffe

Deep learning caffe重塑/上采样完全连接层

deep-learning

Deep learning caffe重塑/上采样完全连接层,deep-learning,reshape,caffe,Deep Learning,Reshape,Caffe,假设我们有这样一个层： layer { name: "fully-connected" type: "InnerProduct" bottom: "bottom" top: "top" inner_product_param { num_output: 1 } } 输出为批量大小x 1。在几篇论文中（对于顶部的exmaple第3页图片，或顶部的第4页），我看到他们最终使用了这样一个层，以产生用于像素预测的2D图像。如何将其转换为2D图像？我在考虑重塑或反褶积，但

假设我们有这样一个层：

layer {
  name: "fully-connected"
  type: "InnerProduct"
  bottom: "bottom"
  top: "top"
  inner_product_param {
    num_output: 1
  }
}

输出为批量大小x 1。在几篇论文中（对于顶部的exmaple第3页图片，或顶部的第4页），我看到他们最终使用了这样一个层，以产生用于像素预测的2D图像。如何将其转换为2D图像？我在考虑重塑或反褶积，但我不知道这是怎么回事。举个简单的例子会很有帮助

更新：我的输入图像是304x228，我的地面真实（深度图像）是75x55

################# Main net ##################

layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 96
    kernel_size: 11
    stride: 4
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "norm1"
  type: "LRN"
  bottom: "conv1"
  top: "norm1"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "norm1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 2
    kernel_size: 5
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "norm2"
  type: "LRN"
  bottom: "conv2"
  top: "norm2"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "norm2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "conv4"
  type: "Convolution"
  bottom: "conv3"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "conv4"
}
layer {
  name: "conv5"
  type: "Convolution"
  bottom: "conv4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "conv5"
}
layer {
  name: "pool5"
  type: "Pooling"
  bottom: "conv5"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "fc6"
  type: "InnerProduct"
  bottom: "pool5"
  top: "fc6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relufc6"
  type: "ReLU"
  bottom: "fc6"
  top: "fc6"
}
layer {
  name: "drop6"
  type: "Dropout"
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}

layer {
  name: "fc7"
  type: "InnerProduct"
  bottom: "fc6"
  top: "fc7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4070
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}

layer {
  type: "Reshape"
  name: "reshape"
  bottom: "fc7"
  top: "fc7_reshaped"
  reshape_param {
    shape { dim:  1  dim: 1  dim:  55 dim: 74 }
  }
}

layer {
  name: "deconv1"
  type: "Deconvolution"
  bottom: "fc7_reshaped"
  top: "deconv1"
  convolution_param {
    num_output: 64
    kernel_size: 5
    pad: 2
    stride: 1
      #group: 256
    weight_filler {
        type: "bilinear"
    }
    bias_term: false
  }
}

#########################

layer {
  name: "conv6"
  type: "Convolution"
  bottom: "data"
  top: "conv6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 63
    kernel_size: 9
    stride: 2
    pad: 1
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "conv6"
  top: "conv6"
}

layer {
  name: "pool6"
  type: "Pooling"
  bottom: "conv6"
  top: "pool6"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}

########################
layer {
  name: "concat"
  type: "Concat"
  bottom: "deconv1"
  bottom: "pool6"
  top: "concat"
  concat_param {
    concat_dim: 1
  }
}

layer {
  name: "conv7"
  type: "Convolution"
  bottom: "concat"
  top: "conv7"
  convolution_param {
    num_output: 64
    kernel_size: 5
    pad: 2
    stride: 1
    weight_filler {
      type: "gaussian"
      std: 0.011
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layer {
    name: "relu7"
    type: "ReLU"
    bottom: "conv7"
    top: "conv7"
    relu_param{
    negative_slope: 0.01
        engine: CUDNN
    }
}

layer {
  name: "conv8"
  type: "Convolution"
  bottom: "conv7"
  top: "conv8"
  convolution_param {
    num_output: 64
    kernel_size: 5
    pad: 2
    stride: 1
    weight_filler {
      type: "gaussian"
      std: 0.011
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layer {
    name: "relu8"
    type: "ReLU"
    bottom: "conv8"
    top: "conv8"
    relu_param{
    negative_slope: 0.01
        engine: CUDNN
    }
}

layer {
  name: "conv9"
  type: "Convolution"
  bottom: "conv8"
  top: "conv9"
  convolution_param {
    num_output: 1
    kernel_size: 5
    pad: 2
    stride: 1
    weight_filler {
      type: "gaussian"
      std: 0.011
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layer {
    name: "relu9"
    type: "ReLU"
    bottom: "conv9"
    top: "result"
    relu_param{
    negative_slope: 0.01
        engine: CUDNN
    }
}

日志：

如果您只需要像传统的多层感知器那样完全连接的网络，使用2D BLOB（

shape（N，D）

）并调用

InnerProductLayer

，对于像素级预测，最后一个完全连接的层的num_输出值将不会是

。它将等于输入图像的

w*h

是什么让你觉得这个值是1

编辑1：

以下是link1第3页图中提到的各层尺寸：

LAYER        OUTPUT DIM [c*h*w]
course1     96*h1*w1     conv layer
course2     256*h2*w2    conv layer
course3     384*h3*w3    conv layer
course4     384*h4*w4    conv layer
course5     256*h5*w5    conv layer
course6     4096*1*1     fc layer
course7     X*1*1        fc layer    where 'X' could be interpreted as w*h

为了进一步理解这一点，我们假设我们有一个网络来预测图像的像素。图像大小为10*10。因此，fc层的最终输出也将具有尺寸100*1*1（如课程7中所示）。这可以解释为10*10

现在的问题是，一维阵列如何正确预测二维图像。为此，您必须注意，损耗是使用可能与像素数据对应的标签为此输出计算的。因此，在训练期间，权重将学习预测像素数据

编辑2:

尝试在caffe中使用

draw_net.py

绘制网络时，会显示以下信息：

与

conv6

和

fc6

连接的

relu

层具有相同的名称，导致绘制图像中的连接复杂。我不确定这是否会导致培训期间出现一些问题，但我建议您将其中一个relu层重命名为唯一名称，以避免出现一些不可预见的问题

回到您的问题，在完全连接层之后，似乎没有发生上采样。如日志所示：

I1108 19:34:57.881680  4277 net.cpp:150] Setting up fc7
I1108 19:34:57.881718  4277 net.cpp:157] Top shape: 1 4070 (4070)

I1108 19:34:57.881826  4277 net.cpp:150] Setting up reshape
I1108 19:34:57.881846  4277 net.cpp:157] Top shape: 1 1 55 74 (4070)

I1108 19:34:57.884768  4277 net.cpp:150] Setting up conv6
I1108 19:34:57.885309  4277 net.cpp:150] Setting up pool6
I1108 19:34:57.885327  4277 net.cpp:157] Top shape: 1 63 55 74 (256410)

fc7

的输出维度为4070*1*1。这将被重塑为1*55*74，作为输入传递到

conv6

层

整个网络的输出在

conv9

中生成，其输出维度为

1*55*74

，与标签的维度（深度数据）完全相似

如果我的答案仍然不清楚，请准确指出您认为样本增加的地方。

请引用相关论文。更好的是，引用描述真正困扰你的事情的段落对不起，他们补充道：）你能举个例子吗，我无法理解你的想法。如果你告诉我你到底想要什么，我会尽力帮助你。我只是添加了一些参考资料。你能看一下上面的第三页吗。有一张图片，我想在caffe中复制该网络。然而，他们最终使用了完全连接的层，这导致了1x1分辨率，之后他们得到了类似74x55（粗7）层的分辨率。除了那一层，我什么都懂……在link1中，我得到了这个错误：

没有论文'arXiv:1406.2283v1.pdff

上面提到的论文！请看第3页的网络图像。但是如果输出是

w*h

您将如何检索2d图像？图中只提到图像的深度为1。但是，您可以看到输出是如图所示的2d图像。你可以通过假设第一个“w”像素对应于第一行来检索灰度2d图像的像素值，依此类推。好的，我已经尝试复制整个网络。但我不确定这是否正确。我已经更新了我的问题并添加了我的网络。你能检查一下这是否是你的意思，网络是否正确吗？为了获得一个好的可视化效果，请访问并复制NetPase。培训完上述脚本后，我一直在检查输出是否正确。但似乎输入图像是输出，而不是深度图像。。你确定你是对的，或者更确切地说，如果我的实现是正确的…？请共享在培训初始化期间打印每个blob大小的日志。

I1108 19:34:57.881680  4277 net.cpp:150] Setting up fc7
I1108 19:34:57.881718  4277 net.cpp:157] Top shape: 1 4070 (4070)

I1108 19:34:57.881826  4277 net.cpp:150] Setting up reshape
I1108 19:34:57.881846  4277 net.cpp:157] Top shape: 1 1 55 74 (4070)

I1108 19:34:57.884768  4277 net.cpp:150] Setting up conv6
I1108 19:34:57.885309  4277 net.cpp:150] Setting up pool6
I1108 19:34:57.885327  4277 net.cpp:157] Top shape: 1 63 55 74 (256410)