用於使用Tensorflow進行預測的多層LSTM

我在跟隨this repo以使用RNN進行時間序列預測。但在這個回購中，測試錯誤達到了24％左右。所以我嘗試使用多層LSTM模型作爲提高預測精度的方法。代碼如下：用於使用Tensorflow進行預測的多層LSTM

import numpy as np 
import tensorflow as tf 
from tensorflow.contrib import rnn 
import data_loader 
import matplotlib.pyplot as plt 


class SeriesPredictor(object): 

    def __init__(self, input_dim, seq_size, lstm_size, num_layers=2): 

     # Hyperparameters 
     self.input_dim = input_dim 
     self.seq_size = seq_size 
     self.lstm_size = lstm_size 
     self.num_layers = num_layers 

     # Weight variables and input placeholders 
     self.W_out = tf.Variable(tf.random_normal([lstm_size, 1]), name='W_out') 
     self.b_out = tf.Variable(tf.random_normal([1]), name='b_out') 
     self.x = tf.placeholder(tf.float32, [None, seq_size, input_dim]) 
     self.y = tf.placeholder(tf.float32, [None, seq_size]) 

     # Cost optimizer 
     self.cost = tf.reduce_mean(tf.square(self.model(2) - self.y)) 
     self.train_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize(self.cost) 

     # Auxiliary ops 
     self.saver = tf.train.Saver() 

    def model(self, num_layers): 
     """ 
     :param x: inputs of size [T, batch_size, input_size] 
     :param W: matrix of fully-connected output layer weights 
     :param b: vector of fully-connected output layer biases 
     """ 
     cell = rnn.BasicLSTMCell(self.lstm_size) 
     stacked_lstm_cell = tf.contrib.rnn.MultiRNNCell(
      [tf.contrib.rnn.DropoutWrapper(cell, 
              output_keep_prob=0.8) 
      for _ in range(num_layers)] 
     ) 
     outputs, states = tf.nn.dynamic_rnn(stacked_lstm_cell, self.x, dtype=tf.float32) 
     num_examples = tf.shape(self.x)[0] 
     W_repeated = tf.tile(tf.expand_dims(self.W_out, 0), [num_examples, 1, 1]) 
     out = tf.matmul(outputs, W_repeated) + self.b_out 
     out = tf.squeeze(out) 
     return out 

    def train(self, train_x, train_y, test_x, test_y): 
     with tf.Session() as sess: 
      tf.get_variable_scope().reuse_variables() 
      sess.run(tf.global_variables_initializer()) 
      max_patience = 3 
      patience = max_patience 
      min_test_err = float('inf') 
      step = 0 
      while patience > 0: 
       _, train_err = sess.run([self.train_op, self.cost], feed_dict={ 
        self.x: train_x, self.y: train_y}) 
       if step % 100 == 0: 
        test_err = sess.run(self.cost, feed_dict={self.x: test_x, self.y: test_y}) 
        print('step: {}\t\ttrain err: {}\t\ttest err: {}'.format(step, train_err, test_err)) 
        if test_err < min_test_err: 
         min_test_err = test_err 
         patience = max_patience 
        else: 
         patience -= 1 
       step += 1 
      save_path = self.saver.save(
       sess, 'model.ckpt') 
      print('Model saved to {}'.format(save_path)) 

    def test(self, sess, test_x): 
     tf.get_variable_scope().reuse_variables() 
     self.saver.restore(sess, './model.ckpt') 
     output = sess.run(self.model(2), feed_dict={self.x: test_x}) 
     return output 

    def plot_results(train_x, predictions, actual, filename): 
     plt.figure() 
     num_train = len(train_x) 
     plt.plot(list(range(num_train)), train_x, color='b', label='training data') 
     plt.plot(list(range(num_train, num_train + len(predictions))), 
       predictions, color='r', label='predicted') 
     plt.plot(list(range(num_train, num_train + len(actual))), 
       actual, color='g', label='test data') 
     plt.legend() 
     if filename is not None: 
      plt.savefig(filename) 
     else: 
      plt.show() 


if __name__ == '__main__': 
    seq_size = 5 
    predictor = SeriesPredictor(input_dim=1, seq_size=seq_size, lstm_size=100) 
    data = data_loader.load_series('international-airline-passengers.csv') 
    train_data, actual_vals = data_loader.split_data(data) 

    train_x, train_y = [], [] 
    for i in range(len(train_data) - seq_size - 1): 
     train_x.append(np.expand_dims(train_data[i:i + seq_size], axis=1).tolist()) 
     train_y.append(train_data[i + 1:i + seq_size + 1]) 

    test_x, test_y = [], [] 
    for i in range(len(actual_vals) - seq_size - 1): 
     test_x.append(np.expand_dims(actual_vals[i:i + seq_size], axis=1).tolist()) 
     test_y.append(actual_vals[i + 1:i + seq_size + 1]) 

    predictor.train(train_x, train_y, test_x, test_y) 

    with tf.Session() as sess: 
     predicted_vals = predictor.test(sess, test_x)[:, 0] 
     print('predicted_vals', np.shape(predicted_vals)) 
     plot_results(train_data, predicted_vals, actual_vals, 'predictions.png') 

     prev_seq = train_x[-1] 
     predicted_vals = [] 
     for i in range(20): 
      next_seq = predictor.test(sess, [prev_seq]) 
      predicted_vals.append(next_seq[-1]) 
      prev_seq = np.vstack((prev_seq[1:], next_seq[-1])) 
     plot_results(train_data, predicted_vals, actual_vals, 'hallucinations.png')

但我發現了以下錯誤：

ValueError: Trying to share variable rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, but specified shape (200, 400) and found shape (101, 400).

我想長來解決問題。但沒有得到原因。任何人都可以請指導我爲什麼我得到這個錯誤？

謝謝！

來源

2017-10-07 Beta

多層RNN的每一層都應該有一個不同的圖層，因此您必須多次調用BasicLSTMCell構造函數的層數。在DropoutWrapper中調用rnn.BasicLSTMCell（self.lstm_size），而不是'cell'。 –

@AbhishekBansal：謝謝你的回答！它解決了這個問題。如果你能解釋爲什麼「單元格」不工作（因爲「單元格」不過是「rnn.BasicLSTMCell（self.lstm_size）」）。如果你把答案放在答案中，我會將其標記爲答案。再次感謝！ – Beta

對象cell只是BasicLSTMCell類的一個實例。您在MultiRNNCell的所有圖層中使用同一個對象。相反，每個圖層應該有一個不同的類BasicLSTMCell的對象實例。

因此，您應該通過每次調用構造函數爲每個圖層實例化一個單獨的實例。

stacked_lstm_cell = tf.contrib.rnn.MultiRNNCell([tf.contrib.rnn.DropoutWrapper(rnn.BasicLSTMCell(self.lstm_size),output_keep_prob=0.8) for _ in range(num_layers)])

來源

2017-10-07 16:18:01

謝謝阿布舍克！ – Beta

沒問題，很高興幫助。 –

用於使用Tensorflow進行預測的多層LSTM

回答

相關問題