2017-08-17 57 views
-1

我正在嘗試使用Theano來實現CNN,並嘗試使用我的較大數據集的小樣本集來測試我的代碼。我試圖將一組8280張圖片(250 * 250大小)分類成115個類別,而我的樣本集合是前兩個類別的32張圖片(每張16張圖片)。我遇到的問題是,從第一個時代開始,NaN的訓練損失和它在更進一步的時代不會改變。使用Theano的簡單CNN訓練的準確性非常低

from __future__ import print_function 

import sys 
import os 
import time 

import numpy as np 
import theano 
import theano.tensor as T 

import lasagne 
import re 
import cv2 
from lasagne.layers import Conv2DLayer, MaxPool2DLayer , DropoutLayer 
from lasagne.layers import InputLayer, DenseLayer, batch_norm 

def split_list(a_list): 
    half = len(a_list)/2 
    return a_list[:half], a_list[half:] 

def load_dataset(path=''): 
    cat_list = [] 
    filelist = sorted(os.listdir(path)) 
    trainlist = [] 
    testlist = [] 
    tmptrain = [] 
    tmptest = [] 
    max_id = 0 
    for f in filelist: 
     match = re.match(r'C(\d+)([F|G])(\d+)\.PNG', f) 
     id = int(match.group(1)) - 1 
     max_id = max(max_id,id) 
     fg_class = match.group(2) 
     fg_id = int(match.group(3)) 
     if id not in [p[0] for p in cat_list]: 
      cat_list.append([id, [], []]) 
     if fg_class == 'G': 
      cat_list[-1][1].append(f) 
     else: 
      cat_list[-1][2].append(f) 
    for f in cat_list: 
     id = f[0] 
     trainG, testG = split_list(f[1]) 
     trainF, testF = split_list(f[2]) 
     tmptrain = tmptrain + [(id, 1, F) for F in trainF] + [(id, 0, G) for G in trainG] # (Class_id,Forgery,Img) 
     tmptest = tmptest + [(id, 1, F) for F in testF] + [(id, 0, F) for F in testG] 
    X_train = np.array([cv2.imread(path+f[2],0) for f in tmptrain]).astype(np.int32) 
    y_train = np.array([f[0] for f in tmptrain]).astype(np.int32) 
    X_test = np.array([cv2.imread(path+f[2],0) for f in tmptest]).astype(np.int32) 
    y_test = np.array([f[0] for f in tmptest]).astype(np.int32) 
    fg_train = np.array([f[1] for f in tmptrain]).astype(np.int32) 
    fg_test = np.array([f[1] for f in tmptest]).astype(np.int32) 

    X_train = np.expand_dims(X_train,axis=1).astype(np.int32) 
    X_test = np.expand_dims(X_test, axis=1).astype(np.int32) 

    return X_train, y_train, X_test, y_test, fg_train , fg_test 


def ExplicitNegativeCorrelation(net,layer='fc2',lr=0.00001): 
    for param in lasagne.layers.get_all_params(net[layer]): 
     if param.name.startswith('W'): 
      W = param 
      mean = T.mean(W,0) * lr 
      W = W - mean#T.mean(T.mean(W,0)) 
def ImplicitNegativeCorrelation(MSE,Cross,Hinge): 
    mean = T.mean((MSE+Cross+Hinge),axis=0) 
    return ((MSE-mean)**2+(Cross-mean)**2+(Hinge-mean)**2)/3 

def build_cnn(inputvar,input_shape, trained_weights=None): 

    net = {} 

    net['input'] = InputLayer(input_shape,input_var=inputvar) 
    net['drop_input'] = DropoutLayer(net['input'],p=0.2) 
    net['conv1'] = batch_norm(Conv2DLayer(net['input'], num_filters=96, filter_size=11, stride=4, flip_filters=False))#,W=lasagne.init.HeNormal())) 
    net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=3, stride=2) 

    net['conv2'] = batch_norm(Conv2DLayer(net['pool1'], num_filters=256, filter_size=5, pad=2, flip_filters=False))#, W=lasagne.init.HeNormal())) 
    net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=3, stride=2) 

    net['conv3'] = batch_norm(Conv2DLayer(net['pool2'], num_filters=384, filter_size=3, pad=1, flip_filters=False))#, W=lasagne.init.HeNormal())) 
    net['conv4'] = batch_norm(Conv2DLayer(net['conv3'], num_filters=384, filter_size=3, pad=1, flip_filters=False))#, W=lasagne.init.HeNormal())) 
    net['conv5'] = batch_norm(Conv2DLayer(net['conv4'], num_filters=256, filter_size=3, pad=1, flip_filters=False))#, W=lasagne.init.HeNormal())) 
    net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=3, stride=2) 

    net['fc1'] = batch_norm(DenseLayer(net['pool5'], num_units=2048)) 
    net['drop_fc1'] = DropoutLayer(net['fc1']) 

    net['fc2'] = batch_norm(DenseLayer(net['drop_fc1'], num_units=2048)) 
    net['fc_class'] = batch_norm(DenseLayer(net['fc2'],num_units=115)) 

    return net 



def iterate_minibatches(inputs, targets_class,targets_verif, batchsize, shuffle=False): 
    assert len(inputs) == len(targets_class) 
    assert len(inputs) == len(targets_verif) 
    if shuffle: 
     indices = np.arange(len(inputs)) 
     np.random.shuffle(indices) 
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): 
     if shuffle: 
      excerpt = indices[start_idx:start_idx + batchsize] 
     else: 
      excerpt = slice(start_idx, start_idx + batchsize) 
     yield inputs[excerpt], targets_class[excerpt], targets_verif[excerpt] 





def main(num_epochs=500): 

    print("Loading data...") 
    X_train, y_train, X_test, y_test, fg_train, fg_test = load_dataset('./signatures/tmp4/') 
    X_val, y_val, fg_val = X_train, y_train, fg_train 
    print(y_train.shape) 

    input_var = T.tensor4('inputs') 
    target_var_class = T.ivector('targets') 

    network = build_cnn(input_var, (None, 1, 250, 250)) 

    class_prediction = lasagne.layers.get_output(network['fc_class']) # ,inputs={network['input']:input_var}) 
    loss_class = lasagne.objectives.categorical_crossentropy(class_prediction, target_var_class) 

    loss = loss_class.mean() 
    params = lasagne.layers.get_all_params([network['fc_class']], trainable=True) 


    lr = 0.01 
    updates = lasagne.updates.nesterov_momentum(
     loss, params, learning_rate=lr, momentum=0.9) 

    test_prediction_class = lasagne.layers.get_output(network['fc_class'], deterministic=True) 
    test_loss_class = lasagne.objectives.categorical_crossentropy(test_prediction_class, 
                  target_var_class) 
    test_loss_class = test_loss_class.mean() 
    test_acc_class = T.mean(T.eq(T.argmax(test_prediction_class, axis=1), target_var_class), 
         dtype=theano.config.floatX) 


    predict_class = theano.function([input_var], T.argmax(test_prediction_class,axis=1)) 

    train_fn = theano.function([input_var, target_var_class], loss, updates=updates) 

    val_fn_class = theano.function([input_var, target_var_class], [test_loss_class, test_acc_class]) 

    print("Starting training...") 
    BatchSize = 2 
    for epoch in range(num_epochs): 
     train_err = 0 
     train_batches = 0 
     start_time = time.time() 
     for batch in iterate_minibatches(X_train, y_train,fg_train, BatchSize, shuffle=True): 
      inputs, targets_class, targets_verif = batch 
      train_err += train_fn(inputs, targets_class) 
      #ExplicitNegativeCorrelation(network, layer='fc2',lr=lr/10) 
      print(targets_class,predict_class(inputs)) 
      train_batches += 1 

     val_err_class = 0 
     val_acc_class = 0 

     val_batches = 0 
     for batch in iterate_minibatches(X_val, y_val, fg_val, BatchSize, shuffle=False): 
      inputs, targets_class, targets_verif = batch 
      err_class, acc_class = val_fn_class(inputs, targets_class) 
      val_err_class += err_class 
      val_acc_class += acc_class 
      val_batches += 1 

     print("Epoch {} of {} took {:.3f}s".format(
      epoch + 1, num_epochs, time.time() - start_time)) 
     print(" training loss:\t\t{:.6f}".format(train_err/train_batches)) 
     print(" Classification loss:\t\t{:.6f}".format(val_err_class/val_batches)) 
     print(" Classification accuracy:\t\t{:.2f} %".format(
      val_acc_class/val_batches * 100)) 

    test_err_class = 0 
    test_acc_class = 0 
    test_err_verif = 0 
    test_acc_verif = 0 
    test_batches = 0 
    for batch in iterate_minibatches(X_test, y_test, fg_test, BatchSize, shuffle=False): 
     inputs, targets_class, targets_verif = batch 
     err_class, acc_class = val_fn_class(inputs, targets_class) 
     test_err_class += err_class 
     test_acc_class += acc_class 
     test_batches += 1 
    print("Final results:") 
    print(" test loss (Classification):\t\t\t{:.6f}".format(test_err_class/test_batches)) 
    print(" test accuracy (Classification):\t\t{:.2f} %".format(
     test_acc_class/test_batches * 100)) 

if __name__ == '__main__': 
    main() 

我試圖把在DenseLayers lasagne.nonlinearities.softmax但它確實解決了NaN的問題,但在訓練模型的準確性不會有任何好,將在0至25%的波動。(50後時代!)。

我已經實現了一個load_dataset函數,我認爲它可以正常工作(我已經多次測試函數),並且我將每張圖片的類ID作爲丟失函數中的目標。所以,我的投入和目標會是這樣的:

Input Shape: (BatchSize, 1, 250, 250) 
Target Shape: (BatchSize, 1) : vector of class ids 

我已經上傳這裏採樣設置this link

回答

0

看起來我們有4類,根據該數據,所以我改變加載代碼,以反映它:

y_train = np.array([f[0] * 2 + f[1] for f in tmptrain]).astype(np.int32) 
y_test = np.array([f[0] * 2 + f[1] for f in tmptest]).astype(np.int32) 

在輸出層單元數量應該等於類的數量,所以我添加使用SoftMax輸出層:

net['fo_class'] = DenseLayer(net['fc_class'],num_units=4, 
        nonlinearity=lasagne.nonlinearities.softmax) 

我建議剛投入後去除脫落層 - 你可以用它進行比較的結果,沒有它,以確保這一點

批量大小= 2太小和學習速率過高

下面是代碼與這些變化的例子:

from __future__ import print_function 

import sys 
import os 
import time 

import numpy as np 
import theano 
import theano.tensor as T 

import lasagne 
import re 
import cv2 
from lasagne.layers import Conv2DLayer, MaxPool2DLayer , DropoutLayer 
from lasagne.layers import InputLayer, DenseLayer 

def split_list(a_list): 
    half = len(a_list)/2 
    return a_list[:half], a_list[half:] 

def load_dataset(path=''): 
    cat_list = [] 
    filelist = sorted(os.listdir(path)) 
    tmptrain = [] 
    tmptest = [] 
    max_id = 0 
    for f in filelist: 
     match = re.match(r'C(\d+)([F|G])(\d+)\.PNG', f) 
     id = int(match.group(1)) - 1 
     max_id = max(max_id,id) 
     fg_class = match.group(2) 
     if id not in [p[0] for p in cat_list]: 
      cat_list.append([id, [], []]) 
     if fg_class == 'G': 
      cat_list[-1][1].append(f) 
     else: 
      cat_list[-1][2].append(f) 
    for f in cat_list: 
     id = f[0] 
     trainG, testG = split_list(f[1]) 
     trainF, testF = split_list(f[2]) 
     tmptrain = tmptrain + [(id, 1, F) for F in trainF] + [(id, 0, G) for G in trainG] 
     tmptest = tmptest + [(id, 1, F) for F in testF] + [(id, 0, F) for F in testG] 
    X_train = np.array([cv2.imread(path+f[2],0) for f in tmptrain]).astype(np.float32) 
    y_train = np.array([f[0] * 2 + f[1] for f in tmptrain]).astype(np.int32) 
    X_test = np.array([cv2.imread(path+f[2],0) for f in tmptest]).astype(np.float32) 
    y_test = np.array([f[0] * 2 + f[1] for f in tmptest]).astype(np.int32) 
    fg_train = np.array([f[1] for f in tmptrain]).astype(np.float32) 
    fg_test = np.array([f[1] for f in tmptest]).astype(np.float32) 

    X_train = np.expand_dims(X_train,axis=1).astype(np.float32) 
    X_test = np.expand_dims(X_test, axis=1).astype(np.float32) 

    return X_train, y_train, X_test, y_test, fg_train , fg_test 


def ExplicitNegativeCorrelation(net,layer='fc2',lr=0.00001): 
    for param in lasagne.layers.get_all_params(net[layer]): 
     if param.name.startswith('W'): 
      W = param 
      mean = T.mean(W,0) * lr 
      W = W - mean 

def ImplicitNegativeCorrelation(MSE,Cross,Hinge): 
    mean = T.mean((MSE+Cross+Hinge),axis=0) 
    return ((MSE-mean)**2+(Cross-mean)**2+(Hinge-mean)**2)/3 

def build_cnn(inputvar,input_shape, trained_weights=None): 

    net = {} 

    net['input'] = InputLayer(input_shape,input_var=inputvar)  
    net['conv1'] = Conv2DLayer(net['input'], num_filters=96, filter_size=11, stride=4) 
    net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=3, stride=2) 

    net['conv2'] = Conv2DLayer(net['pool1'], num_filters=256, filter_size=5, pad=2) 
    net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=3, stride=2) 

    net['conv3'] = Conv2DLayer(net['pool2'], num_filters=384, filter_size=3, pad=1) 
    net['conv4'] = Conv2DLayer(net['conv3'], num_filters=384, filter_size=3, pad=1) 
    net['conv5'] = Conv2DLayer(net['conv4'], num_filters=256, filter_size=3, pad=1) 

    net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=3, stride=2) 

    net['fc1'] = DenseLayer(net['pool5'], num_units=2048) 
    net['drop_fc1'] = DropoutLayer(net['fc1']) 

    net['fc2'] = DenseLayer(net['drop_fc1'], num_units=2048)  
    net['fc_class'] = DenseLayer(net['fc2'],num_units=115) 

    net['fo_class'] = DenseLayer(net['fc_class'],num_units=4, 
         nonlinearity=lasagne.nonlinearities.softmax) 
    return net 


def iterate_minibatches(inputs, targets_class,targets_verif, batchsize, shuffle=False): 
    assert len(inputs) == len(targets_class) 
    assert len(inputs) == len(targets_verif) 
    if shuffle: 
     indices = np.arange(len(inputs)) 
     np.random.shuffle(indices) 
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize): 
     if shuffle: 
      excerpt = indices[start_idx:start_idx + batchsize] 
     else: 
      excerpt = slice(start_idx, start_idx + batchsize) 
     yield inputs[excerpt], targets_class[excerpt], targets_verif[excerpt] 

def main(num_epochs=500): 

    print("Loading data...") 
    X_train, y_train, X_test, y_test, fg_train, fg_test = load_dataset('./signatures/tmp4/') 
    X_train /= 255 
    X_val, y_val, fg_val = X_train, y_train, fg_train 
    print(y_train.shape)  
    check = X_train[0][0] 
    print(check)  

    input_var = T.tensor4('inputs') 
    target_var_class = T.ivector('targets') 

    network = build_cnn(input_var, (None, 1, 250, 250)) 

    class_prediction = lasagne.layers.get_output(network['fo_class']) 

    loss_class = lasagne.objectives.categorical_crossentropy(class_prediction, target_var_class) 

    loss = loss_class.mean() 
    params = lasagne.layers.get_all_params([network['fo_class']], trainable=True) 


    lr = 0.0007 
    updates = lasagne.updates.nesterov_momentum(
     loss, params, learning_rate=lr, momentum=0.9) 

    test_prediction_class = lasagne.layers.get_output(network['fo_class'], deterministic=True) 
    test_loss_class = lasagne.objectives.categorical_crossentropy(test_prediction_class, 
                  target_var_class) 

    test_loss_class = test_loss_class.mean() 
    test_acc_class = T.mean(T.eq(T.argmax(test_prediction_class, axis=1), target_var_class), 
         dtype=theano.config.floatX)      

    predict_class = theano.function([input_var], T.argmax(test_prediction_class,axis=1)) 

    train_fn = theano.function([input_var, target_var_class], loss, updates=updates) 

    val_fn_class = theano.function([input_var, target_var_class], [test_loss_class, test_acc_class]) 

    print("Starting training...") 
    BatchSize = 16 
    for epoch in range(num_epochs): 
     train_err = 0 
     train_batches = 0 
     start_time = time.time() 
     for batch in iterate_minibatches(X_train, y_train,fg_train, BatchSize, shuffle=True): 
      inputs, targets_class, targets_verif = batch 
      train_err += train_fn(inputs, targets_class) 
      print(targets_class,predict_class(inputs)) 
      train_batches += 1 

     val_err_class = 0 
     val_acc_class = 0 

     val_batches = 0 
     for batch in iterate_minibatches(X_val, y_val, fg_val, BatchSize, shuffle=False): 
      inputs, targets_class, targets_verif = batch 
      err_class, acc_class = val_fn_class(inputs, targets_class) 
      val_err_class += err_class 
      val_acc_class += acc_class 
      val_batches += 1 

     print("Epoch {} of {} took {:.3f}s".format(
      epoch + 1, num_epochs, time.time() - start_time)) 
     print(" training loss:\t\t{:.6f}".format(train_err/train_batches)) 
     print(" Classification loss:\t\t{:.6f}".format(val_err_class/val_batches)) 
     print(" Classification accuracy:\t\t{:.2f} %".format(
      val_acc_class/val_batches * 100)) 

    test_err_class = 0 
    test_acc_class = 0 
    test_batches = 0 
    for batch in iterate_minibatches(X_test, y_test, fg_test, BatchSize, shuffle=False): 
     inputs, targets_class, targets_verif = batch 
     err_class, acc_class = val_fn_class(inputs, targets_class) 
     test_err_class += err_class 
     test_acc_class += acc_class 
     test_batches += 1 
    print("Final results:") 
    print(" test loss (Classification):\t\t\t{:.6f}".format(test_err_class/test_batches)) 
    print(" test accuracy (Classification):\t\t{:.2f} %".format(
     test_acc_class/test_batches * 100)) 

if __name__ == '__main__': 
    main() 
相關問題