2017-08-14 17 views
0

我想要構建40級LSTM分類器來分析時間序列數據。我有從13個傳感器收集的13維實時數據。當我運行下面的代碼時,我不斷收到此錯誤消息。當我使用keras庫訓練RNN時,如何糾正我一直得到的尺寸誤差?

ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 1 arrays but instead got the following list of 241458 arrays: [array([[ 0.64817517, 0.12892013, 0.01879949, 0.00946322, 0.00458952, 0.01668651, 0.04776124, 0.03301365, 0.0360659 , 0.15013408, 0.10112171, 0.05494366, 0.02620634],

RNN代碼

from __future__ import print_function 
import keras 
from keras import metrics 
from keras.models import Sequential 
from keras.layers import Dense, LSTM, Dropout, Activation 
from keras.utils import np_utils 
from keras.layers.normalization import BatchNormalization 
from sklearn.cross_validation import train_test_split 
import pandas as pd 
from keras.callbacks import CSVLogger 
from keras.models import load_model 
from keras.layers import LSTM 
import numpy as np 
import tensorflow as tf 
from sklearn.preprocessing import LabelEncoder 
import keras 


def top_k_acc(y_true, y_pred): 
    return metrics.top_k_categorical_accuracy(y_true, y_pred, k=5) 


# train Parameters 
sequence_length = 60 
data_dim = 13 
num_classes = 40 
batch_size = 15000 
epochs = 10 


# tf.set_random_seed(777) # reproducibility 


def MinMaxScaler(data): 
    ''' Min Max Normalization 
    Parameters 
    ---------- 
    data : numpy.ndarray 
     input data to be normalized 
     shape: [Batch size, dimension] 
    Returns 
    ---------- 
    data : numpy.ndarry 
     normalized data 
     shape: [Batch size, dimension] 
    References 
    ---------- 
    .. [1] http://sebastianraschka.com/Articles/2014_about_feature_scaling.html 
    ''' 
    numerator = data - np.min(data, 0) 
    denominator = np.max(data, 0) - np.min(data, 0) 
    # noise term prevents the zero division 
    return numerator/(denominator + 1e-7) 



# Load data 
xy = np.loadtxt('sc_total_for 60s v4.0 test.csv', delimiter=',', skiprows=1) 
x = xy[:, 1:14] 
x = MinMaxScaler(x) 
y = xy[:,0] 


# Build a dataset 
x_data = [] 
y_data = [] 
for i in range(0, len(y) - sequence_length): 
    _x = x[i:i + sequence_length] 
    _y = y[i + sequence_length] 
    # print(_x, "->", _y) 
    x_data.append(_x) 
    y_data.append(_y) 



# One-hot encoding 
encoder = LabelEncoder() 
encoder.fit(y_data) 
encoded_Y = encoder.transform(y_data) 
dummy_y = np_utils.to_categorical(encoded_Y) 



#train/test split 
x_train,x_test,y_train,y_test=train_test_split(x_data,dummy_y,random_state=4,test_size=0.3); 
# print(x_train[0],"->",y_train[0]) 


# Network 
model = Sequential() 
model.add(LSTM(40, batch_input_shape=(batch_size, sequence_length, data_dim),return_sequences=True)) 
model.add(LSTM(40, return_sequences=False)) 
model.add(Dense(40)) 
model.add(Activation("linear")) 

# model.add(Dense(40)) 
# model.add(Dense(25, init='uniform', activation='relu')) 
# model.add(BatchNormalization()) 
# model.add(Dense(30, init='uniform', activation='relu')) 
# model.add(BatchNormalization()) 
# model.add(Dense(40, init='uniform', activation='softmax')) 

model.summary() 


model.compile(loss='mean_squared_error', 
       optimizer='adam', 
       metrics=['accuracy']) 


csv_logger = CSVLogger('LSTM 1111.log') 


history = model.fit(x_train, y_train, 
        batch_size=batch_size, 
        epochs=epochs, 
        verbose=1, 
        validation_data=(x_test, y_test), 
        callbacks=[csv_logger]) 


score = model.evaluate(x_test, y_test, verbose=0) 


predictions=model.predict(x_test) 


# model.save('New Model6 save.h5') 


#plot_model(model, to_file='model1.png') 

# print('Test loss:', score[0]) 
# print('Test accuracy:', score[1]) 

回答

1

的問題是:

# Build a dataset 
x_data = [] 
y_data = [] 
for i in range(0, len(y) - sequence_length): 
    _x = x[i:i + sequence_length] 
    _y = y[i + sequence_length] 
    # print(_x, "->", _y) 
    x_data.append(_x) 
    y_data.append(_y) 

你正在構建2D numpy的陣列的列表爲x_data時Keras預計爲一個單一的,三維數組LSTM。請改爲:

num_samples = len(y) - sequence_length 

x_data = np.zeros((num_samples, sequence_length, data_dim)) 
y_data = np.zeros((num_samples)) 

for i in range(num_samples): 
    x_data[i] = x[i:i + sequence_length] 
    y_data[i] = y[i + sequence_length]