Deep learning caffe重塑/上采样完全连接层
假设我们有这样一个层:Deep learning caffe重塑/上采样完全连接层,deep-learning,reshape,caffe,Deep Learning,Reshape,Caffe,假设我们有这样一个层: layer { name: "fully-connected" type: "InnerProduct" bottom: "bottom" top: "top" inner_product_param { num_output: 1 } } 输出为批量大小x 1。在几篇论文中(对于顶部的exmaple第3页图片,或顶部的第4页),我看到他们最终使用了这样一个层,以产生用于像素预测的2D图像。如何将其转换为2D图像?我在考虑重塑或反褶积,但
layer {
name: "fully-connected"
type: "InnerProduct"
bottom: "bottom"
top: "top"
inner_product_param {
num_output: 1
}
}
输出为批量大小x 1。在几篇论文中(对于顶部的exmaple第3页图片,或顶部的第4页),我看到他们最终使用了这样一个层,以产生用于像素预测的2D图像。如何将其转换为2D图像?我在考虑重塑或反褶积,但我不知道这是怎么回事。举个简单的例子会很有帮助
更新:我的输入图像是304x228,我的地面真实(深度图像)是75x55
################# Main net ##################
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "norm1"
type: "LRN"
bottom: "conv1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "norm1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "norm2"
type: "LRN"
bottom: "conv2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "norm2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "conv4"
type: "Convolution"
bottom: "conv3"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "conv4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relufc6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 4070
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
type: "Reshape"
name: "reshape"
bottom: "fc7"
top: "fc7_reshaped"
reshape_param {
shape { dim: 1 dim: 1 dim: 55 dim: 74 }
}
}
layer {
name: "deconv1"
type: "Deconvolution"
bottom: "fc7_reshaped"
top: "deconv1"
convolution_param {
num_output: 64
kernel_size: 5
pad: 2
stride: 1
#group: 256
weight_filler {
type: "bilinear"
}
bias_term: false
}
}
#########################
layer {
name: "conv6"
type: "Convolution"
bottom: "data"
top: "conv6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 63
kernel_size: 9
stride: 2
pad: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "conv6"
}
layer {
name: "pool6"
type: "Pooling"
bottom: "conv6"
top: "pool6"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
########################
layer {
name: "concat"
type: "Concat"
bottom: "deconv1"
bottom: "pool6"
top: "concat"
concat_param {
concat_dim: 1
}
}
layer {
name: "conv7"
type: "Convolution"
bottom: "concat"
top: "conv7"
convolution_param {
num_output: 64
kernel_size: 5
pad: 2
stride: 1
weight_filler {
type: "gaussian"
std: 0.011
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "conv7"
top: "conv7"
relu_param{
negative_slope: 0.01
engine: CUDNN
}
}
layer {
name: "conv8"
type: "Convolution"
bottom: "conv7"
top: "conv8"
convolution_param {
num_output: 64
kernel_size: 5
pad: 2
stride: 1
weight_filler {
type: "gaussian"
std: 0.011
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu8"
type: "ReLU"
bottom: "conv8"
top: "conv8"
relu_param{
negative_slope: 0.01
engine: CUDNN
}
}
layer {
name: "conv9"
type: "Convolution"
bottom: "conv8"
top: "conv9"
convolution_param {
num_output: 1
kernel_size: 5
pad: 2
stride: 1
weight_filler {
type: "gaussian"
std: 0.011
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu9"
type: "ReLU"
bottom: "conv9"
top: "result"
relu_param{
negative_slope: 0.01
engine: CUDNN
}
}
日志:
如果您只需要像传统的多层感知器那样完全连接的网络,使用2D BLOB(
shape(N,D)
)并调用InnerProductLayer
,对于像素级预测,最后一个完全连接的层的num_输出值将不会是1
。它将等于输入图像的w*h
是什么让你觉得这个值是1
编辑1:
以下是link1第3页图中提到的各层尺寸:
LAYER OUTPUT DIM [c*h*w]
course1 96*h1*w1 conv layer
course2 256*h2*w2 conv layer
course3 384*h3*w3 conv layer
course4 384*h4*w4 conv layer
course5 256*h5*w5 conv layer
course6 4096*1*1 fc layer
course7 X*1*1 fc layer where 'X' could be interpreted as w*h
为了进一步理解这一点,我们假设我们有一个网络来预测图像的像素。图像大小为10*10。因此,fc层的最终输出也将具有尺寸100*1*1(如课程7中所示)。这可以解释为10*10
现在的问题是,一维阵列如何正确预测二维图像。为此,您必须注意,损耗是使用可能与像素数据对应的标签为此输出计算的。因此,在训练期间,权重将学习预测像素数据
编辑2:
尝试在caffe中使用draw_net.py
绘制网络时,会显示以下信息:
与conv6
和fc6
连接的relu
层具有相同的名称,导致绘制图像中的连接复杂。我不确定这是否会导致培训期间出现一些问题,但我建议您将其中一个relu层重命名为唯一名称,以避免出现一些不可预见的问题
回到您的问题,在完全连接层之后,似乎没有发生上采样。如日志所示:
I1108 19:34:57.881680 4277 net.cpp:150] Setting up fc7
I1108 19:34:57.881718 4277 net.cpp:157] Top shape: 1 4070 (4070)
I1108 19:34:57.881826 4277 net.cpp:150] Setting up reshape
I1108 19:34:57.881846 4277 net.cpp:157] Top shape: 1 1 55 74 (4070)
I1108 19:34:57.884768 4277 net.cpp:150] Setting up conv6
I1108 19:34:57.885309 4277 net.cpp:150] Setting up pool6
I1108 19:34:57.885327 4277 net.cpp:157] Top shape: 1 63 55 74 (256410)
fc7
的输出维度为4070*1*1。这将被重塑为1*55*74,作为输入传递到conv6
层
整个网络的输出在conv9
中生成,其输出维度为1*55*74
,与标签的维度(深度数据)完全相似
如果我的答案仍然不清楚,请准确指出您认为样本增加的地方。请引用相关论文。更好的是,引用描述真正困扰你的事情的段落对不起,他们补充道:)你能举个例子吗,我无法理解你的想法。如果你告诉我你到底想要什么,我会尽力帮助你。我只是添加了一些参考资料。你能看一下上面的第三页吗。有一张图片,我想在caffe中复制该网络。然而,他们最终使用了完全连接的层,这导致了1x1分辨率,之后他们得到了类似74x55(粗7)层的分辨率。除了那一层,我什么都懂……在link1中,我得到了这个错误:
没有论文'arXiv:1406.2283v1.pdff
上面提到的论文!请看第3页的网络图像。但是如果输出是w*h
您将如何检索2d图像?图中只提到图像的深度为1。但是,您可以看到输出是如图所示的2d图像。你可以通过假设第一个“w”像素对应于第一行来检索灰度2d图像的像素值,依此类推。好的,我已经尝试复制整个网络。但我不确定这是否正确。我已经更新了我的问题并添加了我的网络。你能检查一下这是否是你的意思,网络是否正确吗?为了获得一个好的可视化效果,请访问并复制NetPase。培训完上述脚本后,我一直在检查输出是否正确。但似乎输入图像是输出,而不是深度图像。。你确定你是对的,或者更确切地说,如果我的实现是正确的…?请共享在培训初始化期间打印每个blob大小的日志。
I1108 19:34:57.881680 4277 net.cpp:150] Setting up fc7
I1108 19:34:57.881718 4277 net.cpp:157] Top shape: 1 4070 (4070)
I1108 19:34:57.881826 4277 net.cpp:150] Setting up reshape
I1108 19:34:57.881846 4277 net.cpp:157] Top shape: 1 1 55 74 (4070)
I1108 19:34:57.884768 4277 net.cpp:150] Setting up conv6
I1108 19:34:57.885309 4277 net.cpp:150] Setting up pool6
I1108 19:34:57.885327 4277 net.cpp:157] Top shape: 1 63 55 74 (256410)