我試圖預測時間序列數據,但是通過在訓練和預測之前抵消了date_offset
時間點的結果。這樣做的原因是嘗試使用當前數據預測未來的date_offset
時間點。一個例子見http://glowingpython.blogspot.co.za/2015/01/forecasting-beer-consumption-with.html。使用日期偏移不正確的scikit-learn線性模型預測
因此,在總結: data = [1,2,3,4,5]
應該預測result = [2,3,4,5,6]
如果date_offset = 1
下面對劇情結果表明紅線由date_offset
被轉移,而不是預測date_offset
到未來。無論我有多大date_offset
,它都在不斷變化,並且不會預測我的最後結果,即result = 5
(已知)。實際上,紅線根本不應該移位,只是寬鬆的準確度更大的date_offset
成爲。我究竟做錯了什麼?
參見下面的例子代碼和生成的圖像:
from sklearn import linear_model
import matplotlib.pyplot as plt
import numpy as np
date_offset = 1
data = np.array([9330.0, 9470.0, 9550.0, 9620.0, 9600.0, 9585.0, 9600.0, 9600.0, 9430.0, 9460.0, 9450.0, 9650.0, 9620.0, 9650.0, 9500.0, 9400.0, 9165.0, 9100.0, 8755.0, 8850.0, 8990.0, 9150.0, 9195.0, 9175.0, 9250.0, 9200.0, 9350.0, 9280.0, 9370.0, 9470.0, 9445.0, 9440.0, 9280.0, 9325.0, 9170.0, 9270.0, 9200.0, 9450.0, 9510.0, 9371.0, 9499.0, 9499.0, 9400.0, 9500.0, 9550.0, 9670.0, 9700.0, 9760.0, 9767.4599999999991, 9652.0, 9520.0, 9600.0, 9610.0, 9700.0, 9825.0, 9900.0, 9950.0, 9801.0, 9770.0, 9545.0, 9630.0, 9710.0, 9700.0, 9700.0, 9600.0, 9615.0, 9575.0, 9500.0, 9600.0, 9480.0, 9565.0, 9510.0, 9475.0, 9600.0, 9400.0, 9400.0, 9400.0, 9300.0, 9430.0, 9410.0, 9380.0, 9320.0, 9000.0, 9100.0, 9000.0, 9200.0, 9210.0, 9251.0, 9460.0, 9400.0, 9600.0, 9621.0, 9440.0, 9490.0, 9675.0, 9850.0, 9680.0, 10100.0, 9900.0, 10100.0, 9949.0, 10040.0, 10050.0, 10200.0, 10400.0, 10350.0, 10200.0, 10175.0, 10001.0, 10110.0, 10400.0, 10401.0, 10300.0, 10548.0, 10515.0, 10475.0, 10200.0, 10481.0, 10500.0, 10540.0, 10559.0, 10300.0, 10400.0, 10202.0, 10330.0, 10450.0, 10540.0, 10540.0, 10650.0, 10450.0, 10550.0, 10501.0, 10206.0, 10250.0, 10345.0, 10225.0, 10330.0, 10506.0, 11401.0, 11245.0, 11360.0, 11549.0, 11415.0, 11450.0, 11460.0, 11600.0, 11530.0, 11450.0, 11402.0, 11299.0])
data = data[np.newaxis].T
results = np.array([9470.0, 9545.0, 9635.0, 9640.0, 9600.0, 9622.0, 9555.0, 9429.0, 9495.0, 9489.0, 9630.0, 9612.0, 9630.0, 9501.0, 9372.0, 9165.0, 9024.0, 8780.0, 8800.0, 8937.0, 9051.0, 9100.0, 9166.0, 9220.0, 9214.0, 9240.0, 9254.0, 9400.0, 9450.0, 9470.0, 9445.0, 9301.0, 9316.0, 9170.0, 9270.0, 9251.0, 9422.0, 9466.0, 9373.0, 9440.0, 9415.0, 9410.0, 9500.0, 9520.0, 9620.0, 9705.0, 9760.0, 9765.0, 9651.0, 9520.0, 9600.0, 9610.0, 9700.0, 9805.0, 9900.0, 9950.0, 9800.0, 9765.0, 9602.0, 9630.0, 9790.0, 9710.0, 9800.0, 9649.0, 9580.0, 9780.0, 9560.0, 9501.0, 9511.0, 9530.0, 9498.0, 9475.0, 9595.0, 9500.0, 9460.0, 9400.0, 9310.0, 9382.0, 9375.0, 9385.0, 9320.0, 9100.0, 8990.0, 9045.0, 9129.0, 9201.0, 9251.0, 9424.0, 9440.0, 9500.0, 9621.0, 9490.0, 9512.0, 9599.0, 9819.0, 9684.0, 10025.0, 9984.0, 10110.0, 9950.0, 10048.0, 10095.0, 10200.0, 10338.0, 10315.0, 10200.0, 10166.0, 10095.0, 10110.0, 10400.0, 10445.0, 10360.0, 10548.0, 10510.0, 10480.0, 10180.0, 10488.0, 10520.0, 10510.0, 10565.0, 10450.0, 10400.0, 10240.0, 10338.0, 10410.0, 10540.0, 10481.0, 10521.0, 10530.0, 10325.0, 10510.0, 10446.0, 10249.0, 10236.0, 10211.0, 10340.0, 10394.0, 11370.0, 11250.0, 11306.0, 11368.0, 11415.0, 11400.0, 11452.0, 11509.0, 11500.0, 11455.0, 11400.0, 11300.0, 11369.0])
# Date offset to predict next i-days results
data = data[:-date_offset]
results = results[date_offset:]
train_data = data[:-50]
train_results = results[:-50]
test_data = data[-50:]
test_results = results[-50:]
regressor = linear_model.BayesianRidge(normalize=True)
regressor.fit(train_data, train_results)
plt.figure(figsize=(8,6))
plt.plot(regressor.predict(test_data), '--', color='#EB3737', linewidth=2, label='Prediction')
plt.plot(test_results, label='True', color='green', linewidth=2)
plt.legend(loc='best')
plt.show()
所有的