4

我想訓練一個卷積網絡輸出一個數字0-100。但是很快,模型會停止更新權重,並且只有完全連接圖層中的偏差發生變化。我無法理解爲什麼。權重張量流不訓練(只偏差變化)

圖片: enter image description here

我用不同的層數等等玩耍了,但我始終只對FC偏見改變了同樣的問題運行。

這是當前測試的代碼。我已經剝去了像輟學這樣的東西。過度貼合在這個時刻不是一個問題。其實,我想嘗試在適合的數據只是這樣我就可以看到我的模型學習什麼

from __future__ import print_function 

import tensorflow as tf 

from tensorflow.examples.tutorials.mnist import input_data 
import matplotlib.pyplot as plt 
import matplotlib.image as mpimg 


################################################################################### 
############################# Read Data ########################################### 

with tf.name_scope("READ_DATA"): 

    def read_my_file_format(filename_queue): 
    reader = tf.WholeFileReader() 
    key, record_string = reader.read(filename_queue) 
    split_res = tf.string_split([key],'_') 
    key = split_res.values[5] 
    example = tf.image.decode_png(record_string) 
    example = tf.image.rgb_to_grayscale(example, name=None) 

    processed_example = resize_img(example) 
    processed_example = reshape_img(processed_example) 
    return processed_example, key 


    def resize_img(imgg): 
    return tf.image.resize_images(imgg,[102,525]) 

    def reshape_img(imgg): 
    return tf.reshape(imgg,shape=[102,525,1]) 


    def input_pipeline(bsize=30, num_epochs=None): 
    filename_queue = tf.train.string_input_producer(
     tf.train.match_filenames_once("./png_imgs/*.png"), num_epochs=num_epochs, shuffle=True) 
    example, label = read_my_file_format(filename_queue) 

    min_after_dequeue = bsize 
    capacity = min_after_dequeue + 3 * 8 

    example_batch, label_batch = tf.train.shuffle_batch(
     [example, label], batch_size=bsize, capacity=capacity, 
     min_after_dequeue=min_after_dequeue) 
    return example_batch, label_batch 

    imb_batch1,label_batch1 = input_pipeline() 

    single_img, single_lbl = input_pipeline(bsize=1) 

############################# Read Data ########################################### 
################################################################################### 



# Parameters 
#learning_rate = 0.0001 
training_iters = 200000 
batch_size = 30 

# Network Parameters 
n_input = 600*300*3 
n_classes = 1 # MNIST total classes (0-9 digits) 
dropout = 0.75 # Dropout, probability to keep units 

# tf Graph input 
x = tf.placeholder(tf.float32, [None, 102,525,1]) 
y = tf.placeholder(tf.float32, [None, 1]) 
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability) 
learning_rate = tf.placeholder(tf.float32) 


# Create some wrappers for simplicity 
def conv2d(x, W, b, strides=1): 
    # Conv2D wrapper, with bias and relu activation 
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') 
    x = tf.nn.bias_add(x, b) 
    return tf.nn.relu(x) 


def maxpool2d(x, k=2): 
    # MaxPool2D wrapper 
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], 
          padding='SAME') 


# Create model 
def conv_net(x, dropout): 

    # Convolution Layer 
    with tf.variable_scope('conv1') as scope: 
     w = tf.get_variable('weights',[5,5,1,32], initializer=tf.contrib.layers.xavier_initializer()) 
     b = tf.get_variable('biases',[32],initializer=tf.random_normal_initializer()) 
     conv1 = conv2d(x,w,b) 
     tf.summary.histogram('weights',w) 
     tf.summary.histogram('biases',b) 

    with tf.variable_scope('conv2') as scope: 
     w = tf.get_variable('weights',[5,5,32,32], initializer=tf.contrib.layers.xavier_initializer()) 
     b = tf.get_variable('biases',[32],initializer=tf.random_normal_initializer()) 
     conv2 = conv2d(conv1,w,b) 
     tf.summary.histogram('weights',w) 
     tf.summary.histogram('biases',b) 

    with tf.name_scope("Maxpool"): 
     conv2 = maxpool2d(conv2,k=2) 

    with tf.variable_scope('FC1') as scope: 
     w = tf.get_variable('weights',[32*263*51,64], initializer=tf.contrib.layers.xavier_initializer()) 
     b = tf.get_variable('biases',[64],initializer=tf.random_normal_initializer()) 
     FC1 = tf.reshape(conv2,[-1,w.get_shape().as_list()[0]]) 
     FC1 = tf.add(tf.matmul(FC1,w),b) 
     tf.summary.histogram('weights',w) 
     tf.summary.histogram('biases',b) 


    with tf.variable_scope('FC2') as scope: 
     w = tf.get_variable('weights',[64,1], initializer=tf.contrib.layers.xavier_initializer()) 
     b = tf.get_variable('biases',[1],initializer=tf.random_normal_initializer()) 
     FC2 = tf.add(tf.matmul(FC1,w),b) 
     tf.summary.histogram('weights',w) 
     tf.summary.histogram('biases',b) 

    return FC2 


# Construct model 
pred = conv_net(x, keep_prob) 

def cost(): 
    with tf.name_scope("Cost"): 

    diff = tf.abs(tf.subtract(y,pred)) 
    cost=tf.reduce_mean(diff) 
    print(cost) 
    tf.summary.histogram('Label',y) 
    tf.summary.histogram('predicted',pred) 
    tf.summary.scalar('cost',cost) 
    return cost 

with tf.name_scope("Optimizer"): 
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost()) 
# optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost()) 


# Initializing the variables 
saver = tf.train.Saver() 
init = tf.global_variables_initializer() 
merged = tf.summary.merge_all() 


# Launch the graph 
with tf.Session() as sess: 

    sess.run(init) 

    coord = tf.train.Coordinator() 
    threads = tf.train.start_queue_runners(coord=coord) 
    writer = tf.summary.FileWriter("/tmp/tensorboard/log01") 
    writer.add_graph(sess.graph) 
    step = 1 
    l_rate= 0.1 

    # Keep training until reach max iterations 
    while step * batch_size < training_iters: 
     print("step: ",step) 
     batch_x, batch_y = sess.run([imb_batch1,label_batch1]) 

     batch_y = batch_y.reshape(-1,1) 
     if step % 100 == 0 : 
      l_rate = l_rate/5 

     if l_rate < 0.000001 : 
      l_rate= 0.000001 

     if step > 20: 
      _,sumry = sess.run([optimizer,merged], feed_dict={x: batch_x, y: batch_y, 
             keep_prob: dropout, learning_rate: l_rate}) 
      writer.add_summary(sumry,step) 
     else : 
      sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, 
             keep_prob: dropout, learning_rate: l_rate}) 


     step += 1 

    print("Training Done!") 



    coord.request_stop() 
    coord.join(threads) 

是否有一個愚蠢的錯誤在什麼地方造成這種代碼?

+0

我的預感:對我來說,這可能表明學習率太高。由於在訓練過程中您正在改變學習速度......也許您可以嘗試使用恆定的LR並查看問題是否仍然存在? – jjmontes

+0

@jjmontes,是的,我試過不斷的學習率,大的和小的,但問題仍然存在。我的希望是以較大的LR開始,因爲權重會被大的LR「震驚」,從而產生更積極的變化,稍後更精細的LR會調整。但沒有這樣的運氣。 – Simmeman

+0

您是否嘗試將xavier_initializer()切換爲正常的? – Dotan

回答

0

在第一個完全連接的層中沒有非線性,所以它沒有增加與只有一個完全連接的層相關的值。

相關問題