2017-09-04 23 views
0

我正在使用Long Short Term Memory算法處理時間序列項目。我需要將我的預測數據可視化

我需要使用其他列作爲功能預測out_meteo.csv的最後一列。在算法結束時,我無法繪製我的數據的正確值:它給我提供了不切實際的和小的值,我認爲它與MinMaxScaler和inverse_transform屬性有關。

這裏是Python代碼我用來預測我的變量不使用其他功能(正常工作)

import numpy 
import matplotlib.pyplot as plt 
from pandas import read_csv 
import math 
from keras.models import Sequential 
from keras.layers import Dense 
from keras.layers import LSTM 
from sklearn.preprocessing import MinMaxScaler 
from sklearn.metrics import mean_squared_error 

# convert an array of values into a dataset matrix 
def create_dataset(dataset, look_back=1): 
    dataX, dataY = [], [] 
    for i in range(len(dataset)-look_back-1): 
     a = dataset[i:(i+look_back), :] 
     dataX.append(a) 
     dataY.append(dataset[i + look_back, 0]) 
    return numpy.array(dataX), numpy.array(dataY) 

# fix random seed for reproducibility 
numpy.random.seed(7) 

# load the dataset 
dataframe = read_csv('out_meteo.csv', usecols=[5], engine='python', header=0) 
dataset = dataframe.values 
dataset = dataset.astype('float32') 

# normalize the dataset 
scaler = MinMaxScaler(feature_range=(0, 1)) 
dataset = scaler.fit_transform(dataset) 

# split into train and test sets 
train_size = int(len(dataset) * 0.7) 
test_size = len(dataset) - train_size 
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:] 

# reshape into X=t and Y=t+1 
look_back = 3 
trainX, trainY = create_dataset(train, look_back) 
testX, testY = create_dataset(test, look_back) 

# reshape input to be [samples, time steps, features] 
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1])) 

# create and fit the LSTM network 
model = Sequential() 
model.add(LSTM(4, input_shape=(1, look_back))) 
model.add(Dense(1)) 
model.compile(loss='mean_squared_error', optimizer='adam') 
model.fit(trainX, trainY, epochs=15, batch_size=15, verbose=2) 

# make predictions 
trainPredict = model.predict(trainX) 
testPredict = model.predict(testX) 

# invert predictions 
trainPredict = scaler.inverse_transform(trainPredict) 
trainY = scaler.inverse_transform([trainY]) 
testPredict = scaler.inverse_transform(testPredict) 
testY = scaler.inverse_transform([testY]) 

# calculate root mean squared error 
trainScore = math.sqrt (mean_squared_error(trainY[0], trainPredict[:,:])) 
print('Train Score: %.2f RMSE' % (trainScore)) 
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,:])) 
print('Test Score: %.2f RMSE' % (testScore)) 

# shift train predictions for plotting 
trainPredictPlot = numpy.empty_like(dataset) 
trainPredictPlot[:, :] = numpy.nan 
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict 

# shift test predictions for plotting 
testPredictPlot = numpy.empty_like(dataset) 
testPredictPlot[:, :] = numpy.nan 
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict 
# plot baseline and predictions 
plt.plot(scaler.inverse_transform(dataset)) 
plt.plot(trainPredictPlot) 
plt.plot(testPredictPlot) 
plt.show() 

我已經修改了它讀取所有的功能(我所有的CSV列),所以這是最後一個給我一個錯誤的陰謀

import numpy 
import matplotlib.pyplot as plt 
from pandas import read_csv 
import math 
from keras.models import Sequential 
from keras.layers import Dense 
from keras.layers import LSTM 
from sklearn.preprocessing import MinMaxScaler 
from sklearn.metrics import mean_squared_error 

# convert an array of values into a dataset matrix 
def create_dataset(dataset, look_back=1): 
    dataX, dataY = [], [] 
    for i in range(len(dataset)-look_back-1): 
     a = dataset[i:(i+look_back), :] 
     dataX.append(a) 
     dataY.append(dataset[i + look_back, 0]) 
    return numpy.array(dataX), numpy.array(dataY) 

# fix random seed for reproducibility 
numpy.random.seed(7) 

# load the dataset 
dataframe = read_csv('out_meteo.csv', usecols=[5], engine='python', header=0) 
dataset = dataframe.values 
dataset = dataset.astype('float32') 

# normalize the dataset 
scaler = MinMaxScaler(feature_range=(0, 1)) 
dataset = scaler.fit_transform(dataset) 

# split into train and test sets 
train_size = int(len(dataset) * 0.7) 
test_size = len(dataset) - train_size 
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:] 

# reshape into X=t and Y=t+1 
look_back = 3 
trainX, trainY = create_dataset(train, look_back) 
testX, testY = create_dataset(test, look_back) 

# reshape input to be [samples, time steps, features] 
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) 
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1])) 

# create and fit the LSTM network 
model = Sequential() 
model.add(LSTM(4, input_shape=(1, look_back))) 
model.add(Dense(1)) 
model.compile(loss='mean_squared_error', optimizer='adam') 
model.fit(trainX, trainY, epochs=15, batch_size=15, verbose=2) 

# make predictions 
trainPredict = model.predict(trainX) 
testPredict = model.predict(testX) 

# invert predictions 
trainPredict = scaler.inverse_transform(trainPredict) 
trainY = scaler.inverse_transform([trainY]) 
testPredict = scaler.inverse_transform(testPredict) 
testY = scaler.inverse_transform([testY]) 

# calculate root mean squared error 
trainScore = math.sqrt (mean_squared_error(trainY[0], trainPredict[:,:])) 
print('Train Score: %.2f RMSE' % (trainScore)) 
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,:])) 
print('Test Score: %.2f RMSE' % (testScore)) 

# shift train predictions for plotting 
trainPredictPlot = numpy.empty_like(dataset) 
trainPredictPlot[:, :] = numpy.nan 
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict 

# shift test predictions for plotting 
testPredictPlot = numpy.empty_like(dataset) 
testPredictPlot[:, :] = numpy.nan 
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict 
# plot baseline and predictions 
plt.plot(scaler.inverse_transform(dataset)) 
plt.plot(trainPredictPlot) 
plt.plot(testPredictPlot) 
plt.show() 

回答

0

你不需要規模迴歸ANN的Y值。在回看功能後調用你的比例。

look_back = 3 
trainX, trainY = create_dataset(train, look_back) 
testX, testY = create_dataset(test, look_back) 

scaler = MinMaxScaler(feature_range=(0, 1)) 
trainX = scaler.fit_transform(trainX) 
testX = scaler.transform(testX) 

然後繪製實際VS預測

+0

感謝您的答覆,我已經編輯我的問題。這個問題更關係到使用幾個功能並繪製最終結果 –

+0

非常混亂。你正在嘗試繪製這些特徵還是僅僅測試'testY'和'testPredict'?我說的是在網絡之前對數據進行縮放,然後在訓練之後進行縮放 – DJK