2016-10-27 249 views
0

有沒有人有使用tf.train.AdadeltaOptimizer獲得良好結果的代碼示例?AdadeltaOptimizer示例代碼

我有一個TF圖,最初是用tf.train.AdamOptimizer設置的,並且運行良好。當我用AdadeltaOptimizer替換它時,使用默認參數,它會給出糟糕的結果。

我用Cuda 7.5。

回答

0

以下是使用'AdadeltaOptimizer'優化器的示例代碼。它與'亞當'一起工作。他們之間唯一的區別是Adam對「學習率」和「Adadelta」不敏感。 我建議您閱讀有關優化算法的更多信息(如here)。 在你自己的例子中,試着將「學習率」改爲更小或更大(它被命名爲「超參數優化」)。根據我的經驗,'Adam'對於RNN來說是一個非常好的優化器,比'AdaDelta'更好(使用示例代碼,'Adam'獲得更好的得分更快)。另一方面,對於CNN而言,SGD + Momentum效果最佳。

代碼,瞭解使用雙LSTM MNIST分類:

# Mnist classification using Bi-LSTM 
import tensorflow as tf 
from tensorflow.examples.tutorials.mnist import input_data 
import numpy as np 

mnist = input_data.read_data_sets("MNIST_data", one_hot=True) 
learning_rate = 0.01 
training_epochs = 100 
batch_size = 64 
seq_length = 28 
heigh_image = 28 
hidden_size = 128 
class_numer = 10 
input = tf.placeholder(tf.float32, [None, None, heigh_image]) 
target = tf.placeholder(tf.float32, [None, class_numer]) 
seq_len = tf.placeholder(tf.int32, [None]) 

def fulconn_layer(input_data, output_dim, activation_func=None): 
    input_dim = int(input_data.get_shape()[1]) 
    W = tf.Variable(tf.random_normal([input_dim, output_dim])) 
    b = tf.Variable(tf.random_normal([output_dim])) 
    if activation_func: 
     return activation_func(tf.matmul(input_data, W) + b) 
    else: 
     return tf.matmul(input_data, W) + b  

with tf.name_scope("BiLSTM"): 
    with tf.variable_scope('forward'): 
    lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True) 
    with tf.variable_scope('backward'): 
    lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True) 
    outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, inputs=input,sequence_length=seq_len, dtype=tf.float32, scope="BiLSTM") 

# As we have Bi-LSTM, we have two output, which are not connected. So merge them 
outputs = tf.concat(2, outputs) 
# As we want do classification, we only need the last output from LSTM. 
last_output = outputs[:,0,:] 
# Create the final classification layer 
yhat = fulconn_layer(last_output, class_numer) 

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(yhat, target)) 
optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(cost) # AdamOptimizer 
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(target, 1), tf.argmax(yhat, 1)), tf.float32)) 
gpu_opts = tf.GPUOptions(per_process_gpu_memory_fraction=0.3) 
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) as session: 
    session.run(tf.initialize_all_variables()) 
    print ("Start Learing") 
    for epoch in range(training_epochs): 
     for i in range(int(mnist.train.num_examples/batch_size)): 
      x_batch, y_batch = mnist.train.next_batch(batch_size) 
      x_batch = x_batch.reshape([batch_size, seq_length, heigh_image]) 
      train_seq_len = np.ones(batch_size) * seq_length 
      session.run([optimizer], feed_dict={input: x_batch, target: y_batch, seq_len: train_seq_len}) 

     train_accuracy = session.run(accuracy, feed_dict={input: x_batch, target: y_batch, seq_len: train_seq_len}) 
     x_test = mnist.test.images.reshape([-1, seq_length, heigh_image]) 
     y_test = mnist.test.labels 
     test_seq_len = np.ones(x_test.shape[0]) * seq_length 
     test_accuracy = session.run(accuracy, feed_dict={input: x_test, target: y_test, seq_len: test_seq_len}) 
     print("epoch: %d, train_accuracy: %3f, test_accuracy: %3f" % (epoch, train_accuracy, test_accuracy))