2017-03-03 94 views
0

現在我花了很長時間(近兩個月)纔開始使用FCN32進行單通道圖像的語義分割。我玩了不同的學習率,甚至添加了BatchNormalization圖層。然而,我甚至沒有看到任何輸出。除了立即在這裏尋求幫助之外,我沒有別的選擇。我真的不知道我做錯了什麼。我在做這個語義分割有什麼問題嗎?

我發送一個圖像到網絡作爲batch.This列車損耗曲線LR=1e-9lr_policy="fixed"enter image description here

我學習率增加至1e-4(下圖)。看起來損失正在下降,但是,學習曲線並不正常。 enter image description here

我減少原始FCN的各層如下: (1)Conv64 - RELU - Conv64 - RELU - MaxPool

(2)Conv128 - RELU - Conv128 - RELU - MaxPool

( 3)Conv256 - RELU - Conv256 - RELU - MaxPool

(4)Conv4096 - RELU - Dropout0.5

(5)Conv4096 - RELU - Dropout0.5

(6)CONV2

(7)Deconv32x - 作物

(8)SoftmaxWithLoss

layer { 
    name: "data" 
    type: "Data" 
    top: "data" 
    include { 
    phase: TRAIN 
    } 
    transform_param { 
    mean_file: "/jjj/FCN32_mean.binaryproto" 
    } 

    data_param { 
    source: "/jjj/train_lmdb/" 
    batch_size: 1 
    backend: LMDB 
    } 
} 
layer { 
    name: "label" 
    type: "Data" 
    top: "label" 
    include { 
    phase: TRAIN 
    } 
    data_param { 
    source: "/jjj/train_label_lmdb/" 
    batch_size: 1 
    backend: LMDB 
    } 
} 
layer { 
    name: "data" 
    type: "Data" 
    top: "data" 
    include { 
    phase: TEST 
    } 
    transform_param { 
    mean_file: "/jjj/FCN32_mean.binaryproto" 
    } 
    data_param { 
    source: "/jjj/val_lmdb/" 
    batch_size: 1 
    backend: LMDB 
    } 
} 
layer { 
    name: "label" 
    type: "Data" 
    top: "label" 
    include { 
    phase: TEST 
    } 
    data_param { 
    source: "/jjj/val_label_lmdb/" 
    batch_size: 1 
    backend: LMDB 
    } 
} 

layer { 
    name: "conv1_1" 
    type: "Convolution" 
    bottom: "data" 
    top: "conv1_1" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 64 
    pad: 100 
    kernel_size: 3 
    stride: 1 
    } 
} 
layer { 
    name: "relu1_1" 
    type: "ReLU" 
    bottom: "conv1_1" 
    top: "conv1_1" 
} 
layer { 
    name: "conv1_2" 
    type: "Convolution" 
    bottom: "conv1_1" 
    top: "conv1_2" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 64 
    pad: 1 
    kernel_size: 3 
    stride: 1 
    } 
} 
layer { 
    name: "relu1_2" 
    type: "ReLU" 
    bottom: "conv1_2" 
    top: "conv1_2" 
} 
layer { 
    name: "pool1" 
    type: "Pooling" 
    bottom: "conv1_2" 
    top: "pool1" 
    pooling_param { 
    pool: MAX 
    kernel_size: 2 
    stride: 2 
    } 
} 
layer { 
    name: "conv2_1" 
    type: "Convolution" 
    bottom: "pool1" 
    top: "conv2_1" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 128 
    pad: 1 
    kernel_size: 3 
    stride: 1 
    } 
} 
layer { 
    name: "relu2_1" 
    type: "ReLU" 
    bottom: "conv2_1" 
    top: "conv2_1" 
} 
layer { 
    name: "conv2_2" 
    type: "Convolution" 
    bottom: "conv2_1" 
    top: "conv2_2" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 128 
    pad: 1 
    kernel_size: 3 
    stride: 1 
    } 
} 
layer { 
    name: "relu2_2" 
    type: "ReLU" 
    bottom: "conv2_2" 
    top: "conv2_2" 
} 
layer { 
    name: "pool2" 
    type: "Pooling" 
    bottom: "conv2_2" 
    top: "pool2" 
    pooling_param { 
    pool: MAX 
    kernel_size: 2 
    stride: 2 
    } 
} 
layer { 
    name: "conv3_1" 
    type: "Convolution" 
    bottom: "pool2" 
    top: "conv3_1" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 256 
    pad: 1 
    kernel_size: 3 
    stride: 1 
    } 
} 
layer { 
    name: "relu3_1" 
    type: "ReLU" 
    bottom: "conv3_1" 
    top: "conv3_1" 
} 
layer { 
    name: "conv3_2" 
    type: "Convolution" 
    bottom: "conv3_1" 
    top: "conv3_2" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 256 
    pad: 1 
    kernel_size: 3 
    stride: 1 
    } 
} 
layer { 
    name: "relu3_2" 
    type: "ReLU" 
    bottom: "conv3_2" 
    top: "conv3_2" 
} 
layer { 
    name: "pool3" 
    type: "Pooling" 
    bottom: "conv3_2" 
    top: "pool3" 
    pooling_param { 
    pool: MAX 
    kernel_size: 2 
    stride: 2 
    } 
} 
layer { 
    name: "fc6" 
    type: "Convolution" 
    bottom: "pool3" 
    top: "fc6" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 4096 
    pad: 0 
    kernel_size: 7 
    stride: 1 
    } 
} 
layer { 
    name: "relu6" 
    type: "ReLU" 
    bottom: "fc6" 
    top: "fc6" 
} 
layer { 
    name: "drop6" 
    type: "Dropout" 
    bottom: "fc6" 
    top: "fc6" 
    dropout_param { 
    dropout_ratio: 0.5 
    } 
} 
layer { 
    name: "fc7" 
    type: "Convolution" 
    bottom: "fc6" 
    top: "fc7" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 4096 
    pad: 0 
    kernel_size: 1 
    stride: 1 
    } 
} 
layer { 
    name: "relu7" 
    type: "ReLU" 
    bottom: "fc7" 
    top: "fc7" 
} 
layer { 
    name: "drop7" 
    type: "Dropout" 
    bottom: "fc7" 
    top: "fc7" 
    dropout_param { 
    dropout_ratio: 0.5 
    } 
} 
layer { 
    name: "score_fr" 
    type: "Convolution" 
    bottom: "fc7" 
    top: "score_fr" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 5 #21 
    pad: 0 
    kernel_size: 1 
    weight_filler { 
     type: "xavier" 
    } 
    bias_filler { 
     type: "constant" 
    } 
    } 
} 
layer { 
    name: "upscore" 
    type: "Deconvolution" 
    bottom: "score_fr" 
    top: "upscore" 
    param { 
    lr_mult: 0 
    } 
    convolution_param { 
    num_output: 5 #21 
    bias_term: false 
    kernel_size: 64 
    stride: 32 
    group: 5 #2 
    weight_filler: { 
     type: "bilinear" 
    } 
    } 
} 
layer { 
    name: "score" 
    type: "Crop" 
    bottom: "upscore" 
    bottom: "data" 
    top: "score" 
    crop_param { 
    axis: 2 
    offset: 19 
    } 
} 
layer { 
    name: "accuracy" 
    type: "Accuracy" 
    bottom: "score" 
    bottom: "label" 
    top: "accuracy" 
    include { 
    phase: TRAIN 
    } 
} 

layer { 
    name: "accuracy" 
    type: "Accuracy" 
    bottom: "score" 
    bottom: "label" 
    top: "accuracy" 
    include { 
    phase: TEST 
    } 
} 
layer { 
    name: "loss" 
    type: "SoftmaxWithLoss" 
    bottom: "score" 
    bottom: "label" 
    top: "loss" 
    loss_param { 
    ignore_label: 255 
    normalize: true 
    } 
} 

,這是解算器的定義:

net: "train_val.prototxt" 
#test_net: "val.prototxt" 
test_iter: 736 
# make test net, but don't invoke it from the solver itself 
test_interval: 2000 #1000000 
display: 50 
average_loss: 50 
lr_policy: "step" #"fixed" 
stepsize: 2000 #+ 
gamma: 0.1 #+ 
# lr for unnormalized softmax 
base_lr: 0.0001 
# high momentum 
momentum: 0.99 
# no gradient accumulation 
iter_size: 1 
max_iter: 10000 
weight_decay: 0.0005 
snapshot: 2000 
snapshot_prefix: "snapshot/NET1" 
test_initialization: false 
solver_mode: GPU 

在開始時,損失開始下降,但經過一些迭代後,它沒有表現出良好的學習行爲: enter image description here

我是深度學習的初學者,caffe。我真的不明白爲什麼會發生這種情況。我非常感謝那些有專業知識的人,請看看模型定義,如果你能幫助我,我將非常感激。

+0

您是否正在使用預訓練權重來啓動,還是您從頭開始訓練網絡(隨機權重)? –

+0

我正在從零開始訓練。感謝您的幫助 –

回答

2

問題是你從零開始訓練。

FCN paper會告訴你,他們總是用那些在ImageNet預訓練的網絡,它將工作,如果你從頭開始訓練它,它必須從預訓練網絡微調,。如果您從隨機權重訓練,則優化問題不會收斂。

+0

感謝您的評論。通過參考點編號爲4的[鏈接](http://cs231n.github.io/transfer-learning/),這就是說「新數據集很大並且與原始數據集非常不同。由於數據集是非常大,我們可以期望我們能夠從頭開始培養一個ConvNet「,並且由於我的數據與預訓練模型的原始數據集有很大不同,會發生什麼?我覺得我很困惑。非常感謝。 –

+0

我能做什麼?你的建議是什麼?非常感謝 –

+0

@ S.EB最簡單的方法是在數據集上預先訓練網絡,比如ImageNet進行圖像分類,然後更改部分體系結構並對其進行微調。如果你不能這樣做,那就不要使用你自己的網絡架構,只使用像VGG/ResNet這樣的預訓練網絡。 –