2016-11-08 44 views
1

假設我們有一個這樣的層:CAFFE重塑/上採樣完全連接層

layer { 
    name: "fully-connected" 
    type: "InnerProduct" 
    bottom: "bottom" 
    top: "top" 
    inner_product_param { 
    num_output: 1 
    } 
} 

輸出的batch_size是X 1.在幾篇論文(爲〔實施例link1頁3圖片的頂部,或link2 4頁)我已經看到他們最終使用了這樣一個圖層來提供用於像素級預測的2D圖像。如何將其轉換爲2D圖像?我正在考慮重塑或解卷積,但我無法弄清楚這將如何工作。一個簡單的例子會有幫助

更新:我的輸入圖像是304x228,我的ground_truth(深度圖像)是75x55。

################# Main net ################## 

layer { 
    name: "conv1" 
    type: "Convolution" 
    bottom: "data" 
    top: "conv1" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 96 
    kernel_size: 11 
    stride: 4 
    weight_filler { 
     type: "gaussian" 
     std: 0.01 
    } 
    bias_filler { 
     type: "constant" 
     value: 0 
    } 
    } 
} 
layer { 
    name: "relu1" 
    type: "ReLU" 
    bottom: "conv1" 
    top: "conv1" 
} 
layer { 
    name: "norm1" 
    type: "LRN" 
    bottom: "conv1" 
    top: "norm1" 
    lrn_param { 
    local_size: 5 
    alpha: 0.0001 
    beta: 0.75 
    } 
} 
layer { 
    name: "pool1" 
    type: "Pooling" 
    bottom: "norm1" 
    top: "pool1" 
    pooling_param { 
    pool: MAX 
    kernel_size: 3 
    stride: 2 
    } 
} 
layer { 
    name: "conv2" 
    type: "Convolution" 
    bottom: "pool1" 
    top: "conv2" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 256 
    pad: 2 
    kernel_size: 5 
    group: 2 
    weight_filler { 
     type: "gaussian" 
     std: 0.01 
    } 
    bias_filler { 
     type: "constant" 
     value: 0.1 
    } 
    } 
} 
layer { 
    name: "relu2" 
    type: "ReLU" 
    bottom: "conv2" 
    top: "conv2" 
} 
layer { 
    name: "norm2" 
    type: "LRN" 
    bottom: "conv2" 
    top: "norm2" 
    lrn_param { 
    local_size: 5 
    alpha: 0.0001 
    beta: 0.75 
    } 
} 
layer { 
    name: "pool2" 
    type: "Pooling" 
    bottom: "norm2" 
    top: "pool2" 
    pooling_param { 
    pool: MAX 
    kernel_size: 3 
    stride: 2 
    } 
} 
layer { 
    name: "conv3" 
    type: "Convolution" 
    bottom: "pool2" 
    top: "conv3" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 384 
    pad: 1 
    kernel_size: 3 
    weight_filler { 
     type: "gaussian" 
     std: 0.01 
    } 
    bias_filler { 
     type: "constant" 
     value: 0 
    } 
    } 
} 
layer { 
    name: "relu3" 
    type: "ReLU" 
    bottom: "conv3" 
    top: "conv3" 
} 
layer { 
    name: "conv4" 
    type: "Convolution" 
    bottom: "conv3" 
    top: "conv4" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 384 
    pad: 1 
    kernel_size: 3 
    group: 2 
    weight_filler { 
     type: "gaussian" 
     std: 0.01 
    } 
    bias_filler { 
     type: "constant" 
     value: 0.1 
    } 
    } 
} 
layer { 
    name: "relu4" 
    type: "ReLU" 
    bottom: "conv4" 
    top: "conv4" 
} 
layer { 
    name: "conv5" 
    type: "Convolution" 
    bottom: "conv4" 
    top: "conv5" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 256 
    pad: 1 
    kernel_size: 3 
    group: 2 
    weight_filler { 
     type: "gaussian" 
     std: 0.01 
    } 
    bias_filler { 
     type: "constant" 
     value: 0.1 
    } 
    } 
} 
layer { 
    name: "relu5" 
    type: "ReLU" 
    bottom: "conv5" 
    top: "conv5" 
} 
layer { 
    name: "pool5" 
    type: "Pooling" 
    bottom: "conv5" 
    top: "pool5" 
    pooling_param { 
    pool: MAX 
    kernel_size: 3 
    stride: 2 
    } 
} 
layer { 
    name: "fc6" 
    type: "InnerProduct" 
    bottom: "pool5" 
    top: "fc6" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    inner_product_param { 
    num_output: 4096 
    weight_filler { 
     type: "gaussian" 
     std: 0.005 
    } 
    bias_filler { 
     type: "constant" 
     value: 0.1 
    } 
    } 
} 
layer { 
    name: "relufc6" 
    type: "ReLU" 
    bottom: "fc6" 
    top: "fc6" 
} 
layer { 
    name: "drop6" 
    type: "Dropout" 
    bottom: "fc6" 
    top: "fc6" 
    dropout_param { 
    dropout_ratio: 0.5 
    } 
} 

layer { 
    name: "fc7" 
    type: "InnerProduct" 
    bottom: "fc6" 
    top: "fc7" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    inner_product_param { 
    num_output: 4070 
    weight_filler { 
     type: "gaussian" 
     std: 0.005 
    } 
    bias_filler { 
     type: "constant" 
     value: 0.1 
    } 
    } 
} 

layer { 
    type: "Reshape" 
    name: "reshape" 
    bottom: "fc7" 
    top: "fc7_reshaped" 
    reshape_param { 
    shape { dim: 1 dim: 1 dim: 55 dim: 74 } 
    } 
} 

layer { 
    name: "deconv1" 
    type: "Deconvolution" 
    bottom: "fc7_reshaped" 
    top: "deconv1" 
    convolution_param { 
    num_output: 64 
    kernel_size: 5 
    pad: 2 
    stride: 1 
     #group: 256 
    weight_filler { 
     type: "bilinear" 
    } 
    bias_term: false 
    } 
} 

######################### 

layer { 
    name: "conv6" 
    type: "Convolution" 
    bottom: "data" 
    top: "conv6" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 63 
    kernel_size: 9 
    stride: 2 
    pad: 1 
    weight_filler { 
     type: "gaussian" 
     std: 0.01 
    } 
    bias_filler { 
     type: "constant" 
     value: 0 
    } 
    } 
} 
layer { 
    name: "relu6" 
    type: "ReLU" 
    bottom: "conv6" 
    top: "conv6" 
} 

layer { 
    name: "pool6" 
    type: "Pooling" 
    bottom: "conv6" 
    top: "pool6" 
    pooling_param { 
    pool: MAX 
    kernel_size: 3 
    stride: 2 
    } 
} 

######################## 
layer { 
    name: "concat" 
    type: "Concat" 
    bottom: "deconv1" 
    bottom: "pool6" 
    top: "concat" 
    concat_param { 
    concat_dim: 1 
    } 
} 

layer { 
    name: "conv7" 
    type: "Convolution" 
    bottom: "concat" 
    top: "conv7" 
    convolution_param { 
    num_output: 64 
    kernel_size: 5 
    pad: 2 
    stride: 1 
    weight_filler { 
     type: "gaussian" 
     std: 0.011 
    } 
    bias_filler { 
     type: "constant" 
     value: 0 
    } 
    } 
} 

layer { 
    name: "relu7" 
    type: "ReLU" 
    bottom: "conv7" 
    top: "conv7" 
    relu_param{ 
    negative_slope: 0.01 
     engine: CUDNN 
    } 
} 

layer { 
    name: "conv8" 
    type: "Convolution" 
    bottom: "conv7" 
    top: "conv8" 
    convolution_param { 
    num_output: 64 
    kernel_size: 5 
    pad: 2 
    stride: 1 
    weight_filler { 
     type: "gaussian" 
     std: 0.011 
    } 
    bias_filler { 
     type: "constant" 
     value: 0 
    } 
    } 
} 

layer { 
    name: "relu8" 
    type: "ReLU" 
    bottom: "conv8" 
    top: "conv8" 
    relu_param{ 
    negative_slope: 0.01 
     engine: CUDNN 
    } 
} 

layer { 
    name: "conv9" 
    type: "Convolution" 
    bottom: "conv8" 
    top: "conv9" 
    convolution_param { 
    num_output: 1 
    kernel_size: 5 
    pad: 2 
    stride: 1 
    weight_filler { 
     type: "gaussian" 
     std: 0.011 
    } 
    bias_filler { 
     type: "constant" 
     value: 0 
    } 
    } 
} 

layer { 
    name: "relu9" 
    type: "ReLU" 
    bottom: "conv9" 
    top: "result" 
    relu_param{ 
    negative_slope: 0.01 
     engine: CUDNN 
    } 
} 

日誌:

I1108 19:34:57.239722 4277 data_layer.cpp:41] output data size: 1,1,228,304 
I1108 19:34:57.243340 4277 data_layer.cpp:41] output data size: 1,1,55,74 
I1108 19:34:57.247392 4277 net.cpp:150] Setting up conv1 
I1108 19:34:57.247407 4277 net.cpp:157] Top shape: 1 96 55 74 (390720) 
I1108 19:34:57.248191 4277 net.cpp:150] Setting up pool1 
I1108 19:34:57.248196 4277 net.cpp:157] Top shape: 1 96 27 37 (95904) 
I1108 19:34:57.253263 4277 net.cpp:150] Setting up conv2 
I1108 19:34:57.253276 4277 net.cpp:157] Top shape: 1 256 27 37 (255744) 
I1108 19:34:57.254202 4277 net.cpp:150] Setting up pool2 
I1108 19:34:57.254220 4277 net.cpp:157] Top shape: 1 256 13 18 (59904) 
I1108 19:34:57.269943 4277 net.cpp:150] Setting up conv3 
I1108 19:34:57.269961 4277 net.cpp:157] Top shape: 1 384 13 18 (89856) 
I1108 19:34:57.285303 4277 net.cpp:150] Setting up conv4 
I1108 19:34:57.285338 4277 net.cpp:157] Top shape: 1 384 13 18 (89856) 
I1108 19:34:57.294801 4277 net.cpp:150] Setting up conv5 
I1108 19:34:57.294841 4277 net.cpp:157] Top shape: 1 256 13 18 (59904) 
I1108 19:34:57.295207 4277 net.cpp:150] Setting up pool5 
I1108 19:34:57.295210 4277 net.cpp:157] Top shape: 1 256 6 9 (13824) 
I1108 19:34:57.743222 4277 net.cpp:150] Setting up fc6 
I1108 19:34:57.743259 4277 net.cpp:157] Top shape: 1 4096 (4096) 
I1108 19:34:57.881680 4277 net.cpp:150] Setting up fc7 
I1108 19:34:57.881718 4277 net.cpp:157] Top shape: 1 4070 (4070) 

I1108 19:34:57.881826 4277 net.cpp:150] Setting up reshape 
I1108 19:34:57.881846 4277 net.cpp:157] Top shape: 1 1 55 74 (4070) 

I1108 19:34:57.884768 4277 net.cpp:150] Setting up conv6 
I1108 19:34:57.885309 4277 net.cpp:150] Setting up pool6 
I1108 19:34:57.885327 4277 net.cpp:157] Top shape: 1 63 55 74 (256410) 

I1108 19:34:57.885395 4277 net.cpp:150] Setting up concat 
I1108 19:34:57.885412 4277 net.cpp:157] Top shape: 1 64 55 74 (260480) 

I1108 19:34:57.886759 4277 net.cpp:150] Setting up conv7 
I1108 19:34:57.886786 4277 net.cpp:157] Top shape: 1 64 55 74 (260480) 

I1108 19:34:57.897269 4277 net.cpp:150] Setting up conv8 
I1108 19:34:57.897303 4277 net.cpp:157] Top shape: 1 64 55 74 (260480) 
I1108 19:34:57.899129 4277 net.cpp:150] Setting up conv9 
I1108 19:34:57.899138 4277 net.cpp:157] Top shape: 1 1 55 74 (4070) 
+0

請引用相關論文。更好的是,請引用描述真正困擾你的段落 – Shai

+0

對不起,添加了:) – thigi

回答

2

對於像素級預測,最後完全連接層的num_output的值不會是1。它將等於輸入圖像的w*h

是什麼讓你覺得值是1?

編輯1

下面是LINK1頁面提及3-圖中每個層的尺寸:

LAYER  OUTPUT DIM [c*h*w] 
course1  96*h1*w1  conv layer 
course2  256*h2*w2 conv layer 
course3  384*h3*w3 conv layer 
course4  384*h4*w4 conv layer 
course5  256*h5*w5 conv layer 
course6  4096*1*1  fc layer 
course7  X*1*1  fc layer where 'X' could be interpreted as w*h 

爲了理解這一點進一步,讓我們假設我們有一個網絡來預測的像素圖片。圖像大小爲10 * 10。因此,fc層的最終輸出也將具有100 * 1 * 1的尺寸(如在過程7中)。這可以解釋爲10 * 10。

現在的問題是,1D陣列如何正確預測2D圖像。爲此,您必須注意,使用可能與像素數據相對應的標籤來計算此輸出的損耗。因此,在訓練期間,權重將學習預測像素數據。

編輯2:

企圖拉攏利用朱古力draw_net.py網,爲您提供了這樣: enter image description here

reluconv6fc6連接具有相同的名稱,導致複雜繪製圖像中的連通性。我不確定這是否會在培訓期間造成一些問題,但我建議您將其中一個relu圖層重命名爲唯一名稱,以避免一些未解決的問題。

回到你的問題,在完全連接的層之後似乎沒有發生上採樣。如所看到的在日誌中:

I1108 19:34:57.881680 4277 net.cpp:150] Setting up fc7 
I1108 19:34:57.881718 4277 net.cpp:157] Top shape: 1 4070 (4070) 

I1108 19:34:57.881826 4277 net.cpp:150] Setting up reshape 
I1108 19:34:57.881846 4277 net.cpp:157] Top shape: 1 1 55 74 (4070) 

I1108 19:34:57.884768 4277 net.cpp:150] Setting up conv6 
I1108 19:34:57.885309 4277 net.cpp:150] Setting up pool6 
I1108 19:34:57.885327 4277 net.cpp:157] Top shape: 1 63 55 74 (256410) 

fc7具有4070 * 1 * 1輸出尺寸。這被重塑爲1 * 55 * 74作爲conv6層的輸入。

整個網絡的輸出產生於conv9,其輸出尺寸爲1*55*74,與標籤的尺寸(深度數據)完全相似。

如果我的答案仍然不清楚,請確定在哪裏感覺上採樣正在發生。

+0

上面提到的論文!看看第3頁的網絡圖像。但是如果輸出結果是'w * h',你將如何檢索2d圖像? – thigi

+0

該圖只是提到圖像的深度爲1.但是,您可以看到輸出是如圖所示的二維圖像。通過假定第一個'w'像素對應第一行,您可以檢索灰度二維圖像的像素值,依此類推。 –

+0

看看回答編輯1 –

-1

,如果你只需要全連接網絡,如傳統的多層感知,使用2D斑點(shape (N, D)),並調用InnerProductLayer

+0

你能否舉個例子,我不能跟隨你的想法。 – thigi

+0

如果你告訴你究竟想要什麼,我會盡力幫助你。 –

+0

我只是添加了一些參考。你能看看第一頁上的[link1](https://arxiv.org/pdf/1406.2283v1.pdff)第3頁。有一張圖片,我想用caffe複製那個網絡。但是,他們最終使用完全連接的層,導致1x1分辨率,之後他們提出了像74x55(粗略7)層的分辨率。我明白一切,但這一層... – thigi