2017-07-24 242 views
0

最初,我有6列的csv文件:日期,電力消耗和對消耗有影響4個其他氣候特性(如溫度,溼度等)LSTM多對一預測使用keras輸入形狀

到目前爲止,我只能在消耗列上運行我的LSTM,它給了我非常準確的結果,但我需要爲我的LSTM提供其他功能。我試圖修改Python代碼根據以前的意見here但仍然有一個重整錯誤。

下面就一些修改後,我的代碼:

import numpy 
import matplotlib.pyplot as plt 
import pandas 
import math 

from keras.models import Sequential 
from keras.layers import Dense, LSTM, Dropout 
from sklearn.preprocessing import MinMaxScaler 
from sklearn.metrics import mean_squared_error 


# convert an array of values into a dataset matrix 

def create_dataset(dataset, look_back=1): 
    dataX, dataY = [], [] 
    for i in range(len(dataset) - look_back - 1): 
    a = dataset[i:(i + look_back), :] 
    dataX.append(a) 
    dataY.append(dataset[i + look_back, 2]) 
    return numpy.array(dataX), numpy.array(dataY) 


    # fix random seed for reproducibility 
numpy.random.seed(7) 


# load the dataset 
dataframe = pandas.read_csv('out_meteo.csv', engine='python') 
dataset = dataframe.values 

# normalize the dataset 
scaler = MinMaxScaler(feature_range=(0, 1)) 
dataset = scaler.fit_transform(dataset) 

# split into train and test sets 
train_size = int(len(dataset) * 0.67) 
test_size = len(dataset) - train_size 
train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :] 

# reshape into X=t and Y=t+1 
look_back = 3 
trainX, trainY = create_dataset(train, look_back) 
testX, testY = create_dataset(test, look_back) 

# reshape input to be [samples, time steps, features] 
trainX = numpy.reshape(trainX, (trainX.shape[0], look_back, 3)) 
testX = numpy.reshape(testX, (testX.shape[0],look_back, 3)) 

# create and fit the LSTM network 

model = Sequential() 
model.add(LSTM(4, input_shape=(look_back,3))) 
model.add(Dense(1)) 
model.compile(loss='mean_squared_error', optimizer='adam') 
history= model.fit(trainX, trainY,validation_split=0.33, nb_epoch=5, batch_size=32) 



# make predictions 
trainPredict = model.predict(trainX) 
testPredict = model.predict(testX) 

# Get something which has as many features as dataset 
trainPredict_extended = numpy.zeros((len(trainPredict),3)) 
# Put the predictions there 
trainPredict_extended[:,2] = trainPredict 
# Inverse transform it and select the 3rd column. 
trainPredict = scaler.inverse_transform(trainPredict_extended)[:,2] 

print(trainPredict) 
# Get something which has as many features as dataset 
testPredict_extended = numpy.zeros((len(testPredict),3)) 
# Put the predictions there 
testPredict_extended[:,2] = testPredict[:,0] 
# Inverse transform it and select the 3rd column. 
testPredict = scaler.inverse_transform(testPredict_extended)[:,2] 


trainY_extended = numpy.zeros((len(trainY),3)) 
trainY_extended[:,2]=trainY 
trainY=scaler.inverse_transform(trainY_extended)[:,2] 


testY_extended = numpy.zeros((len(testY),3)) 
testY_extended[:,2]=testY 
testY=scaler.inverse_transform(testY_extended)[:,2] 


# calculate root mean squared error 
trainScore = math.sqrt(mean_squared_error(trainY, trainPredict)) 
print('Train Score: %.2f RMSE' % (trainScore)) 
testScore = math.sqrt(mean_squared_error(testY, testPredict)) 
print('Test Score: %.2f RMSE' % (testScore)) 

# shift train predictions for plotting 
trainPredictPlot = numpy.empty_like(dataset) 
trainPredictPlot[:, :] = numpy.nan 
trainPredictPlot[look_back:len(trainPredict)+look_back, 2] = trainPredict 

# shift test predictions for plotting 
testPredictPlot = numpy.empty_like(dataset) 
testPredictPlot[:, :] = numpy.nan 
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, 2] = testPredict 

# plot baseline and predictions 
plt.plot(scaler.inverse_transform(dataset)) 
plt.plot(trainPredictPlot) 
plt.plot(testPredictPlot) 
plt.show() 

我得到的錯誤是以下

Traceback (most recent call last): 
    File "desp.py", line 48, in <module> 
    trainX = numpy.reshape(trainX, (trainX.shape[0], look_back, 3)) 
    File "/usr/local/lib/python2.7/dist-packages/numpy/core/fromnumeric.py", line 232, in reshape 
    return _wrapfunc(a, 'reshape', newshape, order=order) 
    File "/usr/local/lib/python2.7/dist-packages/numpy/core/fromnumeric.py", line 57, in _wrapfunc 
    return getattr(obj, method)(*args, **kwds) 
ValueError: cannot reshape array of size 35226 into shape (1957,3,3) 

請注意,我仍然是一個新手,而且重塑的概念仍然是一個對我來說一點點ambigus。

+0

這就是答案:不能重塑規模35226型數組形狀(1957,3,3) – Paddy

+0

但沒關係,給我請了trainX的形狀和testX。 – Paddy

+0

trainX.shape =(1957,3,6) testX.shape =(963,3,6) –

回答

0

這裏是我認爲需要的所有列

import numpy 
import matplotlib.pyplot as plt 
import pandas 
import math 

from keras.models import Sequential 
from keras.layers import Dense, LSTM, Dropout 
from sklearn.preprocessing import MinMaxScaler 
from sklearn.metrics import mean_squared_error 
# convert an array of values into a dataset matrix 
def create_dataset(dataset, look_back=1): 
    dataX, dataY = [], [] 
    for i in range(len(dataset) - look_back - 1): 
    a = dataset[i:(i + look_back), :] 
    dataX.append(a) 
    dataY.append(dataset[i + look_back, 2]) 
    return numpy.array(dataX), numpy.array(dataY) 


# fix random seed for reproducibility 
numpy.random.seed(7) 

#load the dataset 
dataframe = pandas.read_csv('out_meteo.csv', engine='python') 
dataset = dataframe.values 

# normalize the dataset 
scaler = MinMaxScaler(feature_range=(0, 1)) 
dataset = scaler.fit_transform(dataset) 

# split into train and test sets 
train_size = int(len(dataset) * 0.7) 
test_size = len(dataset) - train_size 
train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :] 

# reshape into X=t and Y=t+1 
look_back = 3 
trainX, trainY = create_dataset(train, look_back) 
testX, testY = create_dataset(test, look_back) 


# create and fit the LSTM network 

model = Sequential() 
model.add(LSTM(20, input_shape=(look_back,6))) 
model.add(Dense(1)) 
model.compile(loss='mean_squared_error', optimizer='adam') 
history= model.fit(trainX, trainY,validation_split=0.33, nb_epoch=15, batch_size=15) 

# make predictions 
trainPredict = model.predict(trainX) 
testPredict = model.predict(testX) 

print(trainPredict) 

# calculate root mean squared error 
trainScore = math.sqrt(mean_squared_error(trainY, trainPredict)) 
print('Train Score: %.2f RMSE' % (trainScore)) 
testScore = math.sqrt(mean_squared_error(testY, testPredict)) 
print('Test Score: %.2f RMSE' % (testScore)) 

# shift train predictions for plotting 
trainPredictPlot = numpy.empty_like(dataset) 
trainPredictPlot[:, :] = numpy.nan 
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict 

# shift test predictions for plotting 
testPredictPlot = numpy.empty_like(dataset) 
testPredictPlot[:, :] = numpy.nan 
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict 

# plot baseline and predictions 
plt.plot((dataset)) 
plt.plot(trainPredictPlot) 
plt.plot(testPredictPlot) 
plt.show() 

的迄今爲止最後的代碼,它在我所有的csv列上運行良好,我也刪除了許多行(重整,MinMAxScaler轉換),但仍然無法正確顯示我的最終數據(具有實際值),它顯示了非常小的值或嚴格的行。
該數據集的回程的火車和測試得分分別爲0.03和0.05