我正在嘗試每半小時填充一次碳通量。我想通過訓練測試驗證交叉驗證來識別最簡約的LSTM模型,方法是訓練帶有所有可用輸入的模型,然後對其進行修剪,直到得分停止改進。對於每個模型,我使用k-fold CV來分割90%的訓練,10%的驗證,然後在model.fit()中將火車進一步分割成火車和測試集。我正在使用提前停止以幫助最小化運行時間,並使用ModelCheckpoint保存最佳權重(具有最低「val_loss」的時代)。然後,我想加載這些模型權重並計算10%的數據預留驗證分數(MSE),以便在模型外部使用測試集上執行得最好的權重進行驗證。Keras在使用MXNet後端時無法加載模型權重
這裏是我的代碼訓練的LSTM 9個因素與13個timestimes(在6小時領導到每個觀察)
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations
from functools import partial
from multiprocessing import Pool
from sklearn.neural_network import MLPRegressor as MPR
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import RepeatedKFold
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping,ModelCheckpoint
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.9
session = tf.Session(config=config)
def TimeShape(rolls,X1):
X = np.zeros(shape = (X1.shape[0],rolls+1,X1.shape[1]))
X[:,0,:] = X1
if rolls > 0:
for roll in range(0,rolls):
X2 = np.roll(X1,(roll+1),axis=0)
X[:,roll+1,:] = X2
return(X)
def LSTM_Model(time_steps,inputs,load=None):
model = Sequential()
model.add(LSTM(12, input_shape=(time_steps+1,inputs),return_sequences=True,init='normal', activation='tanh'))
model.add(LSTM(6,init='normal', activation='tanh'))
model.add(Dense(1, init='normal',activation='linear'))
NUM_GPU = 1 # or the number of GPUs available on your machine
gpu_list = []
for i in range(NUM_GPU): gpu_list.append('gpu(%d)' % i)
model.compile(loss='mean_squared_error', optimizer='adam',context=gpu_list) # - Add if using MXNET
return(model)
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self.train_losses = []
self.test_losses = []
def on_epoch_end(self, batch, logs={}):
self.train_losses.append(logs.get('loss'))
self.test_losses.append(logs.get('val_loss'))
class LSTM_Optimize:
def __init__(self,Path,y_var):
# **Read and prep Data Data**
self.Master = pd.read_csv(Path,delimiter = ',',header = 0,na_values = -9999)
self.Master = self.Master.set_index(pd.DatetimeIndex(pd.to_datetime(self.Master['datetime'])))
self.Master['DOY'] = self.Master.index.dayofyear*1.0
self.Master['HR'] = self.Master.index.hour*1.0
self.Data = self.Master[np.isfinite(self.Master[y_var])]
self.Data = self.Data.interpolate().bfill()
self.Data = self.Data.interpolate().ffill()
# ** Nomralize Y variable**
# ** Pipeline takes care of X, but not Y, I've foun the models work better when normalizing Y **
self.y = self.Data[y_var].values
self.YStandard = StandardScaler()
self.YScaled = self.YStandard.fit(self.y.reshape(-1, 1))
Yscale = self.YScaled.transform(self.y.reshape(-1, 1))
self.y = np.ndarray.flatten(Yscale)
self.Ytru = self.YScaled.inverse_transform(self.y.reshape(-1,1))
def Run(self,Inputs):
# Preparing the input data
time_steps = 12
X = self.Data[Inputs]
input_shape = len(Inputs)
self.XStandard = StandardScaler()
self.XScaled= self.XStandard.fit(X)
Xscale = self.XScaled.transform(X)
Xscale = TimeShape(time_steps,Xscale)
Xscale = Xscale[time_steps+1:,:,:]
self.y = self.y[time_steps+1:]
ES = EarlyStopping(monitor='val_loss', min_delta=0.0, patience=25, verbose=1, mode='auto')
CH = ModelCheckpoint(filepath='weights.hdf5',monitor='val_loss', verbose=0, save_best_only=True)
HS=LossHistory()
MSE = []
kf = RepeatedKFold(n_splits=10,n_repeats=2)
batch_size=25
Mod = LSTM_Model(time_steps,input_shape)
plt.figure(figsize = (7,7))
for train,test in kf.split(Xscale,self.y):
Mod.fit(Xscale[train],self.y[train],batch_size=batch_size, nb_epoch=1000,validation_split=0.1,
shuffle=True,callbacks=[ES,CH,HS],verbose=0)
Y = Mod.predict(Xscale[test],batch_size = batch_size)
Mod.load_weights('weights.hdf5')
Y = Mod.predict(Xscale[test],batch_size = batch_size)
MSE.append(metrics.mean_squared_error(self.y[test],Y))
plt.plot(HS.test_losses,linestyle='--')
plt.plot(HS.train_losses)
print(Mod.summary())
print(np.asanyarray(MSE).mean())
Path = 'FluxData.csv'
% matplotlib inline
start_time = time.time()
if __name__ == '__main__':
CH4_Model = ['Sedge','Shrubby','Temp','VWC','ustar','wind_speed','air_pressure',
'PPFD_Avg','NR_Wm2_Avg','AirTC_Avg']
y_var = 'ch4_flux'
Model = CH4_Model
Best = LSTM_Optimize(Path,y_var)
Best.Run(Model)
print()
print("--- %s seconds ---" % (time.time() - start_time))
這裏是我的數據集的幾行的工作示例 - 實際系列有1000的觀察
datetime,co2_flux,ch4_flux,ustar,wind_speed,AirTC_Avg,air_pressure,AirTC_Min,RH,PPFD_Avg,NR_Wm2_Avg,VWC,Temp,Sedge,Shrubby
7/11/2016 8:00,-0.337747167,0.011732699,0.404379747,3.887986435,15.07,101118.6513,15.03,92.7,414.2,225.1,0.5895,7.950660426,0.001292044,0.823794007
7/11/2016 8:30,-1.021087283,0.010256442,0.424094541,3.94983083,14.89,101144.0926,14.84,92.8,339.7,177.1,0.5895,8.24119905,0.001058732,0.826866339
7/11/2016 9:00,-0.146511388,0.008503355,0.456274817,4.687202214,14.71,101177.3176,14.63,93.4,354.4,183.7,0.5895,8.146344257,0.000474955,0.84272365
7/11/2016 9:30,0.144368521,0.009458078,0.462915317,4.810986576,14.27,101203.9191,14.2,93.3,370.2,188.4,0.5895,7.995179025,0.00147768,0.854715683
7/11/2016 10:00,1.471425801,0.014895985,0.47095652,5.098075355,13.7,1.9171,13.62,94.3,462.9,233.9,0.5895,7.521166721,4.64E-05,0.871581919
7/11/2016 10:30,0.889911286,0.01564225,0.487227522,4.969666239,13.13,101277.0195,13.04,96,309.9,155.2,0.5895,7.923818563,8.14E-06,0.880709962
當我運行這與Tensorflow支持,一切順利,我得到。 Howeverif我嘗試用後端MXNet運行它,它無法加載保存模型權重,我得到這個回溯:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-1-14c6597a2feb> in <module>()
114 Model = CH4_Model
115 Best = LSTM_Optimize(Path,y_var)
--> 116 Best.Run(Model)
117 print()
118 print("--- %s seconds ---" % (time.time() - start_time))
<ipython-input-1-14c6597a2feb> in Run(self, Inputs)
96 shuffle=True,callbacks=[ES,CH,HS],verbose=0)
97 Y = Mod.predict(Xscale[test],batch_size = batch_size)
---> 98 Mod.load_weights('weights.hdf5')
99 Y = Mod.predict(Xscale[test],batch_size = batch_size)
100 MSE.append(metrics.mean_squared_error(self.y[test],Y))
/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/engine/topology.py in load_weights(self, filepath, by_name)
2718 self.load_weights_from_hdf5_group_by_name(f)
2719 else:
-> 2720 self.load_weights_from_hdf5_group(f)
2721
2722 if hasattr(f, 'close'):
/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/engine/topology.py in load_weights_from_hdf5_group(self, f)
2804 weight_values[0] = w
2805 weight_value_tuples += zip(symbolic_weights, weight_values)
-> 2806 K.batch_set_value(weight_value_tuples)
2807
2808 def load_weights_from_hdf5_group_by_name(self, f):
/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/backend/mxnet_backend.py in batch_set_value(tuples)
2205 """
2206 for p, w in tuples:
-> 2207 set_value(p, w)
2208
2209
/usr/local/lib/python3.5/dist-packages/Keras-1.2.2-py3.5.egg/keras/backend/mxnet_backend.py in set_value(x, value)
2193 if isinstance(value, Number):
2194 value = [value]
-> 2195 x.bind(mx.nd.array(value))
2196
2197
/usr/local/lib/python3.5/dist-packages/mxnet-0.11.0-py3.5.egg/mxnet/ndarray.py in array(source_array, ctx, dtype)
1295 raise TypeError('source_array must be array like object')
1296 arr = empty(source_array.shape, ctx, dtype)
-> 1297 arr[:] = source_array
1298 return arr
1299
/usr/local/lib/python3.5/dist-packages/mxnet-0.11.0-py3.5.egg/mxnet/ndarray.py in __setitem__(self, key, value)
384 _internal._set_value(float(value), out=self)
385 elif isinstance(value, (np.ndarray, np.generic)):
--> 386 self._sync_copyfrom(value)
387 else:
388 raise TypeError(
/usr/local/lib/python3.5/dist-packages/mxnet-0.11.0-py3.5.egg/mxnet/ndarray.py in _sync_copyfrom(self, source_array)
556 print(self.shape)
557 raise ValueError('Shape inconsistent: expected %s vs got %s'%(
--> 558 str(self.shape), str(source_array.shape)))
559 check_call(_LIB.MXNDArraySyncCopyFromCPU(
560 self.handle,
ValueError: Shape inconsistent: expected() vs got (1,)
爲什麼我要使用MXNet?它似乎比張量流更快,我將不得不在許多具有不同輸入和不同節點和超參數的模型上進行訓練測試驗證。通過使用多處理技術來並行訓練多個不同的模型,我已經能夠顯着提高具有MXNet後端的keras模型的速度。但是,使用tensroflow後端時,嘗試執行多處理時會出現線程鎖定錯誤。
對於上下文,我在p2.xlarge實例上使用深度學習AMI Ubuntu Linux - 2.3_Sep2017(ami-d6ee1dae)環境。
任何想法將不勝感激!
嘿,你知道了嗎? – HalaKuwatly
不支持MXNet,我放棄了它並切換到tensorflow。我想出瞭如何實現tensorflow多處理以同時訓練測試驗證模型。 –