4
我想訓練一個卷積網絡輸出一個數字0-100。但是很快,模型會停止更新權重,並且只有完全連接圖層中的偏差發生變化。我無法理解爲什麼。權重張量流不訓練(只偏差變化)
我用不同的層數等等玩耍了,但我始終只對FC偏見改變了同樣的問題運行。
這是當前測試的代碼。我已經剝去了像輟學這樣的東西。過度貼合在這個時刻不是一個問題。其實,我想嘗試在適合的數據只是這樣我就可以看到我的模型學習什麼
from __future__ import print_function
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
###################################################################################
############################# Read Data ###########################################
with tf.name_scope("READ_DATA"):
def read_my_file_format(filename_queue):
reader = tf.WholeFileReader()
key, record_string = reader.read(filename_queue)
split_res = tf.string_split([key],'_')
key = split_res.values[5]
example = tf.image.decode_png(record_string)
example = tf.image.rgb_to_grayscale(example, name=None)
processed_example = resize_img(example)
processed_example = reshape_img(processed_example)
return processed_example, key
def resize_img(imgg):
return tf.image.resize_images(imgg,[102,525])
def reshape_img(imgg):
return tf.reshape(imgg,shape=[102,525,1])
def input_pipeline(bsize=30, num_epochs=None):
filename_queue = tf.train.string_input_producer(
tf.train.match_filenames_once("./png_imgs/*.png"), num_epochs=num_epochs, shuffle=True)
example, label = read_my_file_format(filename_queue)
min_after_dequeue = bsize
capacity = min_after_dequeue + 3 * 8
example_batch, label_batch = tf.train.shuffle_batch(
[example, label], batch_size=bsize, capacity=capacity,
min_after_dequeue=min_after_dequeue)
return example_batch, label_batch
imb_batch1,label_batch1 = input_pipeline()
single_img, single_lbl = input_pipeline(bsize=1)
############################# Read Data ###########################################
###################################################################################
# Parameters
#learning_rate = 0.0001
training_iters = 200000
batch_size = 30
# Network Parameters
n_input = 600*300*3
n_classes = 1 # MNIST total classes (0-9 digits)
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
x = tf.placeholder(tf.float32, [None, 102,525,1])
y = tf.placeholder(tf.float32, [None, 1])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
learning_rate = tf.placeholder(tf.float32)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Create model
def conv_net(x, dropout):
# Convolution Layer
with tf.variable_scope('conv1') as scope:
w = tf.get_variable('weights',[5,5,1,32], initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable('biases',[32],initializer=tf.random_normal_initializer())
conv1 = conv2d(x,w,b)
tf.summary.histogram('weights',w)
tf.summary.histogram('biases',b)
with tf.variable_scope('conv2') as scope:
w = tf.get_variable('weights',[5,5,32,32], initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable('biases',[32],initializer=tf.random_normal_initializer())
conv2 = conv2d(conv1,w,b)
tf.summary.histogram('weights',w)
tf.summary.histogram('biases',b)
with tf.name_scope("Maxpool"):
conv2 = maxpool2d(conv2,k=2)
with tf.variable_scope('FC1') as scope:
w = tf.get_variable('weights',[32*263*51,64], initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable('biases',[64],initializer=tf.random_normal_initializer())
FC1 = tf.reshape(conv2,[-1,w.get_shape().as_list()[0]])
FC1 = tf.add(tf.matmul(FC1,w),b)
tf.summary.histogram('weights',w)
tf.summary.histogram('biases',b)
with tf.variable_scope('FC2') as scope:
w = tf.get_variable('weights',[64,1], initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable('biases',[1],initializer=tf.random_normal_initializer())
FC2 = tf.add(tf.matmul(FC1,w),b)
tf.summary.histogram('weights',w)
tf.summary.histogram('biases',b)
return FC2
# Construct model
pred = conv_net(x, keep_prob)
def cost():
with tf.name_scope("Cost"):
diff = tf.abs(tf.subtract(y,pred))
cost=tf.reduce_mean(diff)
print(cost)
tf.summary.histogram('Label',y)
tf.summary.histogram('predicted',pred)
tf.summary.scalar('cost',cost)
return cost
with tf.name_scope("Optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost())
# optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost())
# Initializing the variables
saver = tf.train.Saver()
init = tf.global_variables_initializer()
merged = tf.summary.merge_all()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
writer = tf.summary.FileWriter("/tmp/tensorboard/log01")
writer.add_graph(sess.graph)
step = 1
l_rate= 0.1
# Keep training until reach max iterations
while step * batch_size < training_iters:
print("step: ",step)
batch_x, batch_y = sess.run([imb_batch1,label_batch1])
batch_y = batch_y.reshape(-1,1)
if step % 100 == 0 :
l_rate = l_rate/5
if l_rate < 0.000001 :
l_rate= 0.000001
if step > 20:
_,sumry = sess.run([optimizer,merged], feed_dict={x: batch_x, y: batch_y,
keep_prob: dropout, learning_rate: l_rate})
writer.add_summary(sumry,step)
else :
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
keep_prob: dropout, learning_rate: l_rate})
step += 1
print("Training Done!")
coord.request_stop()
coord.join(threads)
是否有一個愚蠢的錯誤在什麼地方造成這種代碼?
我的預感:對我來說,這可能表明學習率太高。由於在訓練過程中您正在改變學習速度......也許您可以嘗試使用恆定的LR並查看問題是否仍然存在? – jjmontes
@jjmontes,是的,我試過不斷的學習率,大的和小的,但問題仍然存在。我的希望是以較大的LR開始,因爲權重會被大的LR「震驚」,從而產生更積極的變化,稍後更精細的LR會調整。但沒有這樣的運氣。 – Simmeman
您是否嘗試將xavier_initializer()切換爲正常的? – Dotan