0

我正在訓練憲法神經網絡,每次運行我的模型時我都會得到不同的訓練錯誤率。在我之前的問題Here 我發現主要原因是權重的生成方式(隨機),每次我訓練我的CNN時,它會從不同的點開始。所以,我正在尋找一種方法來幫助我控制權重的隨機性,並且每次都保持相同的輸出。如何控制生成的權重的隨機性?

我以前的問題中的一個人建議使用種子。事實上,我在代碼的開頭嘗試了這段代碼,但我不太確定它是否正常工作。

這裏是函數負責初始化隨機的權重:

function init(flag) 
% In this function, a number of the fields will be added to the Config 
% structure to complete the full configuration of our CNN model for both Forward and Backward pipeline. 
% Notice that all the functions with names have a B captal letter at medil 
% are for Backword pipeline (e.g. convBpool, and convBconv). 
% Inputs: 
s = RandStream('mt19937ar','Seed',1); 
RandStream.setGlobalStream(s); 
% flag: 0 for training, 1 for testing 
global config; 

config.GEN_OUTPUT = @gen_output_copy; 
if strcmp(config.compute_device, 'GPU') 
    init_gpu(1);% Enable the GPU device. 
    config.NEW_MEM = @to_gpu; % it a handle function to send the data on GPU 
    config.IM2COL = @im2col_gpu;% Perform the im2col function on the GPU 
else 
    config.NEW_MEM = @to_cpu; 
    config.IM2COL = @im2col; 
end 
% Perform th all the nonlinearity functions on the GPU 
if strcmp(config.nonlinearity, 'relu') 
    config.NONLINEARITY = @relu; 
elseif strcmp(config.nonlinearity, 'tanh') 
    config.NONLINEARITY = @tanh; 
elseif strcmp(config.nonlinearity, 'sigmoid') 
    config.NONLINEARITY = @sigmoid; 
else 
    config.NONLINEARITY = @tanh; 
    fprintf('nonlinearity spec error, use tanh by default\n'); 
end 

if strcmp(config.output_activation, 'softmax') 
    config.OUT_ACT = @softmax; 
elseif strcmp(config.output_activation, 'inherit') 
    config.OUT_ACT = config.NONLINEARITY; 
elseif strcmp(config.output_activation, 'nil') 
    config.OUT_ACT = @nonlinearity_nil; 
else 
    config.OUT_ACT = @softmax; 
    fprintf('output_activation spec error, use softmax by default\n'); 
end 

if strcmp(config.cost_function, 'cross entropy') 
    config.COST_FUN = @cross_entropy; 
elseif strcmp(config.cost_function, 'L2 norm') 
    config.COST_FUN = @L2_norm; 
else 
    config.COST_FUN = @cross_entropy; 
    fprintf('cost_function spec error, use cross_entropy by default\n'); 
end 

config.cost = 0; 
config.misc.current_layer = 1; 

% initialize weights and calculate some statistics 
r = config.weight_range;  
conv_layer_c = 0; 
pool_layer_c = 0; 
full_layer_c = 0; 

layer_num = length(config.forward_pass_scheme)-1;% length(config.forward_pass_scheme) is the number of the layers. 
config.layer_num = layer_num; 

config.feature_map_sizes = {}; 
config.weights = {}; 
for idx = 1:layer_num 
    if idx == 1 
     conv_layer_c = conv_layer_c + 1; 
             % Determine the size of the features maps in the first layer along with the Depth of Volume. 
     config.feature_map_sizes{idx} = [config.input_size(1)-config.kernel_size(1,1)+1 config.input_size(2)-config.kernel_size(1,2)+1 ... 
             config.conv_hidden_size(conv_layer_c)]; 
     %config.misc.mask_type = 16;  % hard code here for now 
     %config.misc.mask_type = 4; 
     if strcmp(config.forward_pass_scheme{idx}, 'conv_v_sr') 
      config.weights{idx} = {}; 
      for t = 1:config.misc.mask_type 
       config.weights{idx}{t} = config.NEW_MEM(randn(config.feature_map_sizes{idx}(3), ... 
              config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.chs)*r); 
      end 
      % create mask and generate conv index 
      mask_mem(); 
      %mask = config.NEW_MEM([1 0;0 0]); 
      mask = config.NEW_MEM([1 0 0 0;0 0 0 0;0 0 0 0;0 0 0 0]); 
      mask = repmat(mask, config.input_size(1)/sqrt(config.misc.mask_type), config.input_size(2)/sqrt(config.misc.mask_type), config.chs); 
      mask = repmat(mask, 1,1,1,config.batch_size); 
      mask2conv(mask); 
     elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v') 
      config.weights{idx} = config.NEW_MEM(randn(config.feature_map_sizes{idx}(3), ... 
              config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.chs)*r); 
      if config.normalize_init_weights 
       config.weights{idx} = config.weights{idx}/sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * config.conv_hidden_size(conv_layer_c)); 
      end 
     elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v_mask_norm') 
      config.weights{idx} = config.NEW_MEM(randn(config.feature_map_sizes{idx}(3), ... 
              config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.chs)*r) + r; 
      if config.normalize_init_weights 
       config.weights{idx} = config.weights{idx}/sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * config.conv_hidden_size(conv_layer_c)); 
      end 
     end 
    elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v') 
     conv_layer_c = conv_layer_c + 1; 
     config.feature_map_sizes{idx} = [config.feature_map_sizes{idx-1}(1)-config.kernel_size(conv_layer_c,1)+1 ... 
             config.feature_map_sizes{idx-1}(2)-config.kernel_size(conv_layer_c,2)+1 ... 
             config.conv_hidden_size(conv_layer_c)]; 
     config.weights{idx} = config.NEW_MEM(randn(config.feature_map_sizes{idx}(3), ... 
             config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.feature_map_sizes{idx-1}(3))*r); 
     if config.normalize_init_weights 
      config.weights{idx} = config.weights{idx}/sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * config.conv_hidden_size(conv_layer_c)); 
     end 
    elseif strcmp(config.forward_pass_scheme{idx}, 'conv_f') 
     conv_layer_c = conv_layer_c + 1; 
     if idx == layer_num 
      config.weights{idx} = config.NEW_MEM(randn(config.kernel_size(conv_layer_c, 1)*config.kernel_size(conv_layer_c, 2)*config.output_size(3), config.conv_hidden_size(conv_layer_c-1))*r); 
      if config.normalize_init_weights 
       config.weights{idx} = config.weights{idx}/sqrt(config.kernel_size(conv_layer_c, 1) * config.kernel_size(conv_layer_c, 2) * size(config.weights{idx}, 1)); 
      end 
      config.GEN_OUTPUT = @gen_output_from_conv_f; 
     else 
      fprintf('in init(): conv_f layer in the hidden layer not supported yet.\n'); 
     end 
    elseif strcmp(config.forward_pass_scheme{idx}, 'pool') 
     pool_layer_c = pool_layer_c + 1; % Determine the zise of the feature maps in the pool layer. 
     config.feature_map_sizes{idx} = [config.feature_map_sizes{idx-1}(1)/2 config.feature_map_sizes{idx-1}(2)/2 ... 
             config.feature_map_sizes{idx-1}(3)];    
     config.weights{idx} = config.NEW_MEM(randn(config.feature_map_sizes{idx-1}(3), 1) * r)/4;    
    elseif strcmp(config.forward_pass_scheme{idx}, 'full') 
     full_layer_c = full_layer_c + 1;    
     if idx == layer_num 
      config.weights{idx} = config.NEW_MEM(randn(config.output_size(3), config.feature_map_sizes{idx-1}(3)) * r); 
      if config.normalize_init_weights 
       config.weights{idx} = config.weights{idx}/sqrt(config.output_size(3)); 
      end 
     else 
      config.feature_map_sizes{idx} = [1 1 config.full_hidden_size(full_layer_c)]; 
      config.weights{idx} = config.NEW_MEM(randn(config.feature_map_sizes{idx}(3), ... 
       config.feature_map_sizes{idx-1}(1)*config.feature_map_sizes{idx-1}(2)*config.feature_map_sizes{idx-1}(3)) * r); 
      if config.normalize_init_weights 
       config.weights{idx} = config.weights{idx}/sqrt(config.feature_map_sizes{idx}(3)); 
      end 
     end    
    end 
end 

% initialize bias 
for idx = 1:layer_num-1 
    config.weights{idx+layer_num} = config.NEW_MEM(zeros(config.feature_map_sizes{idx}(3), 1)+0.01); 
end 
if strcmp(config.forward_pass_scheme{layer_num}, 'conv_f') 
    config.weights{layer_num*2} = config.NEW_MEM(zeros(size(config.weights{layer_num}, 1), 1)+0.05); 
else 
    config.weights{layer_num*2} = config.NEW_MEM(zeros(config.output_size(3), 1)+0.05); 
end 

% prepare memory 
reset_mem(); 
input_mem(); 
if strcmp(config.forward_pass_scheme{1}, 'conv_v_mask_norm') 
    mask_mem(); 
end 
if strcmp(config.forward_pass_scheme{2}, 'conv_v') 
    conv2conv_mem(1); 
end 
for m = 2:layer_num 
    if strfind(config.forward_pass_scheme{m}, 'conv') 
     conv_mem(m); 
     if strcmp(config.forward_pass_scheme{m+1}, 'out') 
      conv2out_mem(); 
     elseif strcmp(config.forward_pass_scheme{m+1}, 'conv_v') 
      conv2conv_mem(m); 
     end 
    elseif strcmp(config.forward_pass_scheme{m}, 'pool') 
     pool_mem(m); 
     if strcmp(config.forward_pass_scheme{m+1}, 'conv_v') 
      pool2conv_mem(m); 
     end 
    elseif strcmp(config.forward_pass_scheme{m}, 'full') 
     full_mem(m); 
    end 
end 

% building forward pipeline 
config.pipeline_forward = {}; 
config.pipeline_forward{1} = @input2conv; 
if strcmp(config.forward_pass_scheme{1}, 'conv_v_mask_norm') 
    config.pipeline_forward{2} = @mask2conv; 
end 
conv_layer_c = 1; 
for idx = 1:layer_num 
    if strfind(config.forward_pass_scheme{idx}, 'conv') 
     conv_layer_c = conv_layer_c + 1; 
     if strcmp(config.forward_pass_scheme{idx}, 'conv_v_sr') 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @conv_forward_SR; 
     else 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @conv_forward; 
     end 
     if strcmp(config.forward_pass_scheme{idx}, 'conv_v_mask_norm') 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @mask_conv_forward; 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @mask_normalize; 
     end 
     if strcmp(config.forward_pass_scheme{idx+1}, 'conv_v') 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @nonlinearity; 
      if config.kernel_size(conv_layer_c, 1) == 1 && config.kernel_size(conv_layer_c, 2) == 1 
       config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2conv1by1; 
      else 
       config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2conv; 
      end 
     elseif strcmp(config.forward_pass_scheme{idx+1}, 'conv_f') 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @nonlinearity; 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2conv_f; 
     elseif strcmp(config.forward_pass_scheme{idx+1}, 'pool') 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @nonlinearity; 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2pool; 
     elseif strcmp(config.forward_pass_scheme{idx+1}, 'full') 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @nonlinearity; 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2full; 
     elseif strcmp(config.forward_pass_scheme{idx+1}, 'out') 
      if strcmp(config.forward_pass_scheme{idx}, 'conv_f') 
       config.pipeline_forward{length(config.pipeline_forward)+1} = @conv2out; 
       config.pipeline_forward{length(config.pipeline_forward)+1} = @out_forward; 
      else 
       fprintf('in init(): currently only support conv_f as the output conv layer.\n'); 
      end 
     end 
    elseif strcmp(config.forward_pass_scheme{idx}, 'pool') 
     config.pipeline_forward{length(config.pipeline_forward)+1} = @pool_forward; 
     config.pipeline_forward{length(config.pipeline_forward)+1} = @nonlinearity; 
     if strcmp(config.forward_pass_scheme{idx+1}, 'conv_v') 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @pool2conv; 
     elseif strcmp(config.forward_pass_scheme{idx+1}, 'pool') 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @pool2pool; 
     elseif strcmp(config.forward_pass_scheme{idx+1}, 'full') 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @pool2full; 
     end 
    elseif strcmp(config.forward_pass_scheme{idx}, 'full') 
     config.pipeline_forward{length(config.pipeline_forward)+1} = @full_forward; 
     if strcmp(config.forward_pass_scheme{idx+1}, 'full') 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @nonlinearity; 
      if config.dropout_full_layer == 1 
       config.pipeline_forward{length(config.pipeline_forward)+1} = @dropout_forward; 
      end 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @full2full; 
     elseif strcmp(config.forward_pass_scheme{idx+1}, 'out') 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @full2out; 
      config.pipeline_forward{length(config.pipeline_forward)+1} = @out_forward; 
     end 
    end 
end 

config.SCALE_INPUT = @scale_input_nil; 
config.SCALE_OUTPUT = @scale_output_nil; 

if flag ~= 0 
    return; 
end 
config.EXPAND_DELTA_OUT = @expand_delta_out_nil; 
if strcmp(config.nonlinearity, 'relu') 
    config.DERI_NONLINEARITY = @deri_relu; 
elseif strcmp(config.nonlinearity, 'tanh') 
    config.DERI_NONLINEARITY = @deri_tanh; 
elseif strcmp(config.nonlinearity, 'sigmoid') 
    config.DERI_NONLINEARITY = @deri_sigmoid; 
else 
    config.DERI_NONLINEARITY = @deri_tanh;   
end 

if strcmp(config.output_activation, 'softmax') 
    config.DERI_OUT_ACT = @deri_softmax; 
elseif strcmp(config.output_activation, 'inherit') 
    config.DERI_OUT_ACT = @deri_inherit; 
elseif strcmp(config.output_activation, 'nil') 
    config.DERI_OUT_ACT = @deri_nonlinearity_nil; 
else 
    config.DERI_OUT_ACT = @deri_softmax;   
end 

if strcmp(config.cost_function, 'cross entropy') 
    config.DERI_COST_FUN = @deri_cross_entropy; 
elseif strcmp(config.cost_function, 'L2 norm') 
    config.DERI_COST_FUN = @deri_L2_norm; 
else 
    config.DERI_COST_FUN = @deri_cross_entropy;   
end 

for m = 2:layer_num   
    if strcmp(config.forward_pass_scheme{m}, 'conv_v')    
     if strcmp(config.forward_pass_scheme{m-1}, 'pool') 
      convBpool_mem(m); 
     elseif strfind(config.forward_pass_scheme{m}, 'conv') 
      conv_layer_id = get_conv_layer_idx_from_layer_idx(m); 
      if config.kernel_size(conv_layer_id, 1) ~= 1 && config.kernel_size(conv_layer_id, 2) ~= 1 
       convBconv_mem(m); 
      end 
     end   
    end 
end 

% building pipeline for backprop 
config.pipeline_backprop = {}; 
config.pipeline_backprop{1} = @out_backprop; 
for idx = layer_num+1:-1:3 
    if strcmp(config.forward_pass_scheme{idx}, 'out') 
     if strcmp(config.forward_pass_scheme{idx-1}, 'conv_f') 
      config.EXPAND_DELTA_OUT = @expand_delta_out_for_conv_f; 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @outBconv; 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop; 
     elseif strcmp(config.forward_pass_scheme{idx-1}, 'full') 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @outBfull; 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @full_backprop; 
     else 
      fprintf('in init(): backprop from the output layer to the specified layer is not yet supported.\n'); 
     end    
    elseif strcmp(config.forward_pass_scheme{idx}, 'conv_f') 
     if strcmp(config.forward_pass_scheme{idx-1}, 'conv_v')     
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv_1by1;     
     else 
      fprintf('in init(): backprop from conv_f to the specified layer is not yet supported.\n'); 
     end 
     config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop; 
    elseif strcmp(config.forward_pass_scheme{idx}, 'conv_v') 
     if strfind(config.forward_pass_scheme{idx-1}, 'conv') 
      conv_layer_id = get_conv_layer_idx_from_layer_idx(idx); 
      if config.kernel_size(conv_layer_id, 1) == 1 && config.kernel_size(conv_layer_id, 2) == 1 
       config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv_1by1; 
      else 
       config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv; 
      end 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop; 
     elseif strcmp(config.forward_pass_scheme{idx-1}, 'pool') 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBpool; 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @pool_backprop; 
     end    
    elseif strcmp(config.forward_pass_scheme{idx}, 'pool') 
     if strcmp(config.forward_pass_scheme{idx-1}, 'conv_v') 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @poolBconv; 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @conv_backprop; 
     elseif strcmp(config.forward_pass_scheme{idx-1}, 'pool') 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @poolBpool; 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @pool_backprop; 
     end    
    elseif strcmp(config.forward_pass_scheme{idx}, 'full') 
     if strcmp(config.forward_pass_scheme{idx-1}, 'full') 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @fullBfull; 
     elseif strcmp(config.forward_pass_scheme{idx-1}, 'conv_v') 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @fullBconv; 
     elseif strcmp(config.forward_pass_scheme{idx-1}, 'pool') 
      config.pipeline_backprop{length(config.pipeline_backprop)+1} = @fullBpool; 
     end 
     config.pipeline_backprop{length(config.pipeline_backprop)+1} = @full_backprop; 
    end     
end 
if strcmp(config.forward_pass_scheme{2}, 'conv_v') && config.kernel_size(2, 1) ~= 1 && config.kernel_size(2, 2) ~= 1 
    config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBconv_last; 
end 
if strcmp(config.forward_pass_scheme{1}, 'conv_v_mask_norm') 
    if strcmp(config.mask_for_SR, 'true') 
     config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput_with_mask_accel; 
    else 
     config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput_with_mask; 
    end 
elseif strcmp(config.forward_pass_scheme{1}, 'conv_v_sr') 
    config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput_SR; 
else 
    config.pipeline_backprop{length(config.pipeline_backprop)+1} = @convBinput; 
end  

if strcmp(config.optimization, 'adagrad')   
    config.his_grad = {}; 
    config.fudge_factor = 1e-6; 
    if strcmp(config.forward_pass_scheme{1}, 'conv_v_sr') 
     config.UPDATE_WEIGHTS = @update_weights_adagrad_SR; 
     config.his_grad{1} = {}; 
     for m = 1:config.misc.mask_type 
      config.his_grad{1}{m} = config.NEW_MEM(zeros(size(config.weights{1}{m}))); 
     end 
     for m = 2:length(config.weights) 
      config.his_grad{m} = config.NEW_MEM(zeros(size(config.weights{m}))); 
     end 
    else 
     config.UPDATE_WEIGHTS = @update_weights_adagrad; 
     for m = 1:length(config.weights) 
      config.his_grad{m} = config.NEW_MEM(zeros(size(config.weights{m})));% Attach all the weights vectors on the GPU. 
     end 
    end 
else 
    fprintf('optimization method not supported, use adagrad as default\n'); 
    config.UPDATE_WEIGHTS = @update_weights_adagrad; 
end 
end 

我會很感激,如果你能幫助我理清這個問題了。

+0

請發佈說明您的不確定性的MCVE。 – Prune

+0

你是什麼意思由MCVE?! –

+0

如果你沒有發佈任何代碼 - 你不會得到任何代碼相關的答案。 – lejlot

回答

0

事實上,我發現,把

s = RandStream('mt19937ar','Seed',1); 
RandStream.setGlobalStream(s); 

在我的函數的頂部可以控制隨機性產生在我CNN的權重。爲了測試它的工作,我實施了相同的模型5次,每次獲得大致相同的結果。 謝謝大家。

0

我恰好碰到了我的張量流代碼的非確定性結果問題。我也嘗試設置隨機發生器的種子,但沒有幫助。
後來我發現這個討論:(https://github.com/tensorflow/tensorflow/issues/2732)。它指出,原因可能是由於現代GPU架構,所以在CUDA更新它之前似乎沒有好的解決方法。