0

移動使用標準偏差平均值我想安裝使用RandomForestRegressor因爲我考慮this link大熊貓在Python

import pandas as pd 
import math 
import matplotlib 
import matplotlib.pyplot as plt 
import numpy as np 
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor 
from sklearn.model_selection import GridSearchCV 
from sklearn.metrics import r2_score, mean_squared_error, make_scorer 
from sklearn.model_selection import train_test_split 
from math import sqrt 
from sklearn.cross_validation import train_test_split 


n_features=3000 

df = pd.read_csv('cubic32.csv') 

for i in range(1,n_features): 
    df['X_t'+str(i)] = df['X'].shift(i) 

print(df) 

df.dropna(inplace=True) 


X = df.drop('Y', axis=1) 
y = df['Y'] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40) 

X_train = X_train.drop('time', axis=1) 
X_test = X_test.drop('time', axis=1) 


parameters = {'n_estimators': [10]} 
clf_rf = RandomForestRegressor(random_state=1) 
clf = GridSearchCV(clf_rf, parameters, cv=5, scoring='neg_mean_squared_error', n_jobs=-1) 
model = clf.fit(X_train, y_train) 
model.cv_results_['params'][model.best_index_] 
math.sqrt(model.best_score_*-1) 
model.grid_scores_ 

##### 
print() 
print(model.grid_scores_) 
print("The best score: ",model.best_score_) 

print("RMSE:",math.sqrt(model.best_score_*-1)) 

clf_rf.fit(X_train,y_train) 
modelPrediction = clf_rf.predict(X_test) 
print(modelPrediction) 

print("Number of predictions:",len(modelPrediction)) 

meanSquaredError=mean_squared_error(y_test, modelPrediction) 
print("Mean Square Error (MSE):", meanSquaredError) 
rootMeanSquaredError = sqrt(meanSquaredError) 
print("Root-Mean-Square Error (RMSE):", rootMeanSquaredError) 

fig, ax = plt.subplots() 
index_values=range(0,len(y_test)) 

y_test.sort_index(inplace=True) 
X_test.sort_index(inplace=True) 

modelPred_test = clf_rf.predict(X_test) 
ax.plot(pd.Series(index_values), y_test.values) 

smoothed=pd.rolling_mean(modelPred_test, 90, min_periods=90, freq=None, center=False, how=None) 
PlotInOne=pd.DataFrame(pd.concat([pd.Series(smoothed), pd.Series(y_test.values)], axis=1)) 
plt.figure(); PlotInOne.plot(); plt.legend(loc='best') 

使用發現然而,一個數據集的迴歸模型後,用moving average filter平滑噪聲預測值的圖似乎(如下所示)非常粗糙(藍線)。

橙色線是實際值的圖。

enter image description here

我們如何計算以上所示的標準預測(藍線)的在劇情偏差並將其傳遞作爲區間參數到該窗口運行在移動平均?目前,我將移動窗口的大小手動設置爲50,但我想要傳遞標準偏差的值。

smoothed=pd.rolling_mean(modelPred_test, 50, min_periods=50, freq=None, center=False, how=None) 

回答

0
pd.rolling_mean(modelPred_test,windows=round(np.std(modelPred_test))) 

您可以插入標準偏差爲最小窗口以及〜

0
smoothed=pd.rolling(modelPred_test, 50, min_periods=50, freq=None, center=False, how=None).std()