0
有沒有人有使用tf.train.AdadeltaOptimizer獲得良好結果的代碼示例?AdadeltaOptimizer示例代碼
我有一個TF圖,最初是用tf.train.AdamOptimizer設置的,並且運行良好。當我用AdadeltaOptimizer替換它時,使用默認參數,它會給出糟糕的結果。
我用Cuda 7.5。
有沒有人有使用tf.train.AdadeltaOptimizer獲得良好結果的代碼示例?AdadeltaOptimizer示例代碼
我有一個TF圖,最初是用tf.train.AdamOptimizer設置的,並且運行良好。當我用AdadeltaOptimizer替換它時,使用默認參數,它會給出糟糕的結果。
我用Cuda 7.5。
以下是使用'AdadeltaOptimizer'優化器的示例代碼。它與'亞當'一起工作。他們之間唯一的區別是Adam對「學習率」和「Adadelta」不敏感。 我建議您閱讀有關優化算法的更多信息(如here)。 在你自己的例子中,試着將「學習率」改爲更小或更大(它被命名爲「超參數優化」)。根據我的經驗,'Adam'對於RNN來說是一個非常好的優化器,比'AdaDelta'更好(使用示例代碼,'Adam'獲得更好的得分更快)。另一方面,對於CNN而言,SGD + Momentum效果最佳。
代碼,瞭解使用雙LSTM MNIST分類:
# Mnist classification using Bi-LSTM
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
learning_rate = 0.01
training_epochs = 100
batch_size = 64
seq_length = 28
heigh_image = 28
hidden_size = 128
class_numer = 10
input = tf.placeholder(tf.float32, [None, None, heigh_image])
target = tf.placeholder(tf.float32, [None, class_numer])
seq_len = tf.placeholder(tf.int32, [None])
def fulconn_layer(input_data, output_dim, activation_func=None):
input_dim = int(input_data.get_shape()[1])
W = tf.Variable(tf.random_normal([input_dim, output_dim]))
b = tf.Variable(tf.random_normal([output_dim]))
if activation_func:
return activation_func(tf.matmul(input_data, W) + b)
else:
return tf.matmul(input_data, W) + b
with tf.name_scope("BiLSTM"):
with tf.variable_scope('forward'):
lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
with tf.variable_scope('backward'):
lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, inputs=input,sequence_length=seq_len, dtype=tf.float32, scope="BiLSTM")
# As we have Bi-LSTM, we have two output, which are not connected. So merge them
outputs = tf.concat(2, outputs)
# As we want do classification, we only need the last output from LSTM.
last_output = outputs[:,0,:]
# Create the final classification layer
yhat = fulconn_layer(last_output, class_numer)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(yhat, target))
optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(cost) # AdamOptimizer
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(target, 1), tf.argmax(yhat, 1)), tf.float32))
gpu_opts = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) as session:
session.run(tf.initialize_all_variables())
print ("Start Learing")
for epoch in range(training_epochs):
for i in range(int(mnist.train.num_examples/batch_size)):
x_batch, y_batch = mnist.train.next_batch(batch_size)
x_batch = x_batch.reshape([batch_size, seq_length, heigh_image])
train_seq_len = np.ones(batch_size) * seq_length
session.run([optimizer], feed_dict={input: x_batch, target: y_batch, seq_len: train_seq_len})
train_accuracy = session.run(accuracy, feed_dict={input: x_batch, target: y_batch, seq_len: train_seq_len})
x_test = mnist.test.images.reshape([-1, seq_length, heigh_image])
y_test = mnist.test.labels
test_seq_len = np.ones(x_test.shape[0]) * seq_length
test_accuracy = session.run(accuracy, feed_dict={input: x_test, target: y_test, seq_len: test_seq_len})
print("epoch: %d, train_accuracy: %3f, test_accuracy: %3f" % (epoch, train_accuracy, test_accuracy))