2017-05-26 87 views
9

我似乎得到當我使用的回調函數modelcheckpoint這個錯誤..callbackFunction參數modelcheckpoint導致錯誤keras

我從GitHub的問題,閱讀的解決辦法是利用model.get_weight,但我只含蓄存儲,因爲我只存儲最好的權重。

Keras似乎只能使用h5保存權重,這讓我質疑有沒有其他方法可以使用eras API來存儲它們,如果是的話如何?如果沒有,我如何存儲它?

製造爲例來重現問題:

#!/usr/bin/python 


import glob, os 
import sys 
from os import listdir 
from os.path import isfile, join 
import numpy as np 
import warnings 
import matplotlib.pyplot as plt 
from mpl_toolkits.mplot3d import Axes3D 
from keras.utils import np_utils 
from keras import metrics 
import keras 
from keras import backend as K 
from keras.models import Sequential 
from keras.optimizers import SGD, Adam 
from keras.layers.core import Dense, Activation, Lambda, Reshape,Flatten 
from keras.layers import Conv1D,Conv2D,MaxPooling2D, MaxPooling1D, Reshape 
#from keras.utils.visualize_util import plot 
from keras.models import Model 
from keras.layers import Input, Dense 
from keras.layers.merge import Concatenate, Add 
import h5py 
import random 
import tensorflow as tf 
import math 
from keras.callbacks import CSVLogger 
from keras.callbacks import ModelCheckpoint 


if len(sys.argv) < 5: 
    print "Missing Arguments!" 
    print "python keras_convolutional_feature_extraction.py <workspace> <totale_frames> <fbank-dim> <window-height> <batch_size>" 
    print "Example:" 
    print "python keras_convolutional_feature_extraction.py deltas 15 40 5 100" 
    sys.exit() 


total_frames = int(sys.argv[2]) 
total_frames_with_deltas = total_frames*3 
dim = int(sys.argv[3]) 
window_height = int(sys.argv[4]) 
inserted_batch_size = int(sys.argv[5]) 
stride = 1 
splits = ((dim - window_height)+1)/stride 

#input_train_data = "/media/carl/E2302E68302E443F/"+str(sys.argv[1])+"/fbank/org_train_total_frames_"+str(total_frames)+"_dim_"+str(dim)+"_winheig_"+str(window_height)+"_batch_"+str(inserted_batch_size)+"_fws_input" 
#output_train_data ="/media/carl/E2302E68302E443F/"+str(sys.argv[1])+"/fbank/org_train_total_frames_"+str(total_frames)+"_dim_"+str(dim)+"_winheig_"+str(window_height)+"_batch_"+str(inserted_batch_size)+"_fws_output" 
#input_test_data = "/media/carl/E2302E68302E443F/"+str(sys.argv[1])+"/fbank/org_test_total_frames_"+str(total_frames)+"_dim_"+str(dim)+"_winheig_"+str(window_height)+"_batch_"+str(1)+"_fws_input" 
#output_test_data = "/media/carl/E2302E68302E443F/"+str(sys.argv[1])+"/fbank/org_test_total_frames_"+str(total_frames)+"_dim_"+str(dim)+"_winheig_"+str(window_height)+"_batch_"+str(1)+"_fws_output" 

#train_files =[f for f in listdir(input_train_data) if isfile(join(input_train_data, f))] 
#test_files =[f for f in listdir(input_test_data) if isfile(join(input_test_data, f))] 

#print len(train_files) 
np.random.seed(100) 
print "hallo" 
def train_generator(): 
    while True: 
#  input = random.choice(train_files) 
#  h5f = h5py.File(input_train_data+'/'+input, 'r') 
#  train_input = h5f['train_input'][:] 
#  train_output = h5f['train_output'][:] 
#  h5f.close() 
     train_input = np.random.randint(100,size=((inserted_batch_size,splits*total_frames_with_deltas,window_height,3))) 
     train_list_list = [] 
     train_input = train_input.reshape((inserted_batch_size,splits*total_frames_with_deltas,window_height,3)) 
     train_input_list = np.split(train_input,splits*total_frames_with_deltas,axis=1) 
     for i in range(len(train_input_list)): 
      train_input_list[i] = train_input_list[i].reshape(inserted_batch_size,window_height,3) 


     #for i in range(len(train_input_list)): 
     # train_input_list[i] = train_input_list[i].reshape(inserted_batch_size,33,window_height,1,3) 

     train_output = np.random.randint(5, size = (1,total_frames,5)) 
     middle = int(math.ceil(total_frames/2)) 

     train_output = train_output[:,middle:middle+1,:].reshape((inserted_batch_size,1,5)) 
     #print train_output.shape 
     #print len(train_input_list) 
     #print train_input_list[0].shape 
     yield (train_input_list, train_output) 
print "hallo" 
def test_generator(): 
    while True: 
#  input = random.choice(test_files) 
#  h5f = h5py.File(input_test_data+'/'+input, 'r') 
#  test_input = h5f['test_input'][:] 
#  test_output = h5f['test_output'][:] 
#  h5f.close() 
     test_input = np.random.randint(100,size=((inserted_batch_size,splits*total_frames_with_deltas,window_height,3))) 
     test_input = test_input.reshape((inserted_batch_size,splits*total_frames_with_deltas,window_height,3)) 
     test_input_list = np.split(test_input,splits*total_frames_with_deltas,axis=1) 
     #test_input_list = np.split(test_input,45,axis=3) 

     for i in range(len(test_input_list)): 
      test_input_list[i] = test_input_list[i].reshape(inserted_batch_size,window_height,3) 

     #for i in range(len(test_input_list)): 
     # test_input_list[i] = test_input_list[i].reshape(inserted_batch_size,33,window_height,1,3) 

     test_output = np.random.randint(5, size = (1,total_frames,5)) 

     middle = int(math.ceil(total_frames/2)) 

     test_output = test_output[:,middle:middle+1,:].reshape((inserted_batch_size,1,5)) 

     yield (test_input_list, test_output) 
print "hallo" 

def fws(): 
    #print "Inside" 
    # Params: 
    # batch , lr, decay , momentum, epochs 
    # 
    #Input shape: (batch_size,40,45,3) 
    #output shape: (1,15,50) 
    # number of unit in conv_feature_map = splitd 
    next(train_generator()) 
    model_output = [] 
    list_of_input = [Input(shape=(8,3)) for i in range(splits*total_frames_with_deltas)] 
    output = [] 

    #Conv 
    skip = total_frames_with_deltas 
    for steps in range(total_frames_with_deltas): 
     conv = Conv1D(filters = 100, kernel_size = 8) 
     column = 0 
     for _ in range(splits): 
      #print "column " + str(column) + "steps: " + str(steps) 
      output.append(conv(list_of_input[(column*skip)+steps])) 
      column = column + 1 

    #print len(output) 
    #print splits*total_frames_with_deltas 


    conv = [] 
    for section in range(splits): 
     column = 0 
     skip = splits 
     temp = [] 
     for _ in range(total_frames_with_deltas): 
      temp.append(output[((column*skip)+section)]) 
      column = column + 1 
     conv.append(Add()(temp)) 
     #print len(conv) 



    output_conc = Concatenate()(conv) 
    #print output_conc.get_shape 
    output_conv = Reshape((splits, -1))(output_conc) 
    #print output_conv.get_shape 

    #Pool 
    pooled = MaxPooling1D(pool_size = 6, strides = 2)(output_conv) 
    reshape = Reshape((1,-1))(pooled) 

    #Fc 
    dense1 = Dense(units = 1024, activation = 'relu', name = "dense_1")(reshape) 
    #dense2 = Dense(units = 1024, activation = 'relu', name = "dense_2")(dense1) 
    dense3 = Dense(units = 1024, activation = 'relu', name = "dense_3")(dense1) 
    final = Dense(units = 5, activation = 'relu', name = "final")(dense3) 

    model = Model(inputs = list_of_input , outputs = final) 
    sgd = SGD(lr=0.1, decay=1e-1, momentum=0.9, nesterov=True) 
    model.compile(loss="categorical_crossentropy", optimizer=sgd , metrics = ['accuracy']) 
    print "compiled" 

    model_yaml = model.to_yaml() 
    with open("model.yaml", "w") as yaml_file: 
     yaml_file.write(model_yaml) 

    print "Model saved!" 

    log= CSVLogger('/home/carl/kaldi-trunk/dnn/experimental/yesno_cnn_50_training_total_frames_'+str(total_frames)+"_dim_"+str(dim)+"_window_height_"+str(window_height)+".csv") 
    filepath='yesno_cnn_50_training_total_frames_'+str(total_frames)+"_dim_"+str(dim)+"_window_height_"+str(window_height)+"weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5" 
    checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_weights_only=True, mode='max') 


    print "log" 
    #plot_model(model, to_file='model.png') 
    print "Fit" 
    hist_current = model.fit_generator(train_generator(), 
         steps_per_epoch=444,#len(train_files), 
         epochs = 10000, 
         verbose = 1, 
         validation_data = test_generator(), 
         validation_steps=44,#len(test_files), 
         pickle_safe = True, 
         workers = 4, 
         callbacks = [log,checkpoint]) 

fws() 

通過執行該腳本:蟒name_of_script.py日元50 40 8 1

這給我一個完整回溯:

full traceback 錯誤:

[email protected]:~/Dropbox$ python mini.py yesno 50 40 8 1 
Using TensorFlow backend. 
Couldn't import dot_parser, loading of dot files will not be possible. 
hallo 
hallo 
hallo 
compiled 
Model saved! 
log 
Fit 
/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:2252: UserWarning: Expected no kwargs, you passed 1 
kwargs passed to function are ignored with Tensorflow backend 
    warnings.warn('\n'.join(msg)) 
Epoch 1/10000 
2017-05-26 13:01:45.851125: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations. 
2017-05-26 13:01:45.851345: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations. 
2017-05-26 13:01:45.851392: W tensorflow/core/platform/cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations. 
443/444 [============================>.] - ETA: 4s - loss: 100.1266 - acc: 0.3138Epoch 00000: saving model to yesno_cnn_50_training_total_frames_50_dim_40_window_height_8weights-improvement-00-0.48.hdf5 
Traceback (most recent call last): 
    File "mini.py", line 205, in <module> 

    File "mini.py", line 203, in fws 

    File "/usr/local/lib/python2.7/dist-packages/keras/legacy/interfaces.py", line 88, in wrapper 
    return func(*args, **kwargs) 
    File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 1933, in fit_generator 
    callbacks.on_epoch_end(epoch, epoch_logs) 
    File "/usr/local/lib/python2.7/dist-packages/keras/callbacks.py", line 77, in on_epoch_end 
    callback.on_epoch_end(epoch, logs) 
    File "/usr/local/lib/python2.7/dist-packages/keras/callbacks.py", line 411, in on_epoch_end 
    self.model.save_weights(filepath, overwrite=True) 
    File "/usr/local/lib/python2.7/dist-packages/keras/engine/topology.py", line 2503, in save_weights 
    save_weights_to_hdf5_group(f, self.layers) 
    File "/usr/local/lib/python2.7/dist-packages/keras/engine/topology.py", line 2746, in save_weights_to_hdf5_group 
    f.attrs['layer_names'] = [layer.name.encode('utf8') for layer in layers] 
    File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper (/tmp/pip-4rPeHA-build/h5py/_objects.c:2684) 
    File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper (/tmp/pip-4rPeHA-build/h5py/_objects.c:2642) 
    File "/usr/local/lib/python2.7/dist-packages/h5py/_hl/attrs.py", line 93, in __setitem__ 
    self.create(name, data=value, dtype=base.guess_dtype(value)) 
    File "/usr/local/lib/python2.7/dist-packages/h5py/_hl/attrs.py", line 183, in create 
    attr = h5a.create(self._id, self._e(tempname), htype, space) 
    File "h5py/_objects.pyx", line 54, in h5py._objects.with_phil.wrapper (/tmp/pip-4rPeHA-build/h5py/_objects.c:2684) 
    File "h5py/_objects.pyx", line 55, in h5py._objects.with_phil.wrapper (/tmp/pip-4rPeHA-build/h5py/_objects.c:2642) 
    File "h5py/h5a.pyx", line 47, in h5py.h5a.create (/tmp/pip-4rPeHA-build/h5py/h5a.c:1904) 
RuntimeError: Unable to create attribute (Object header message is too large) 
+0

無法看到回溯,你可以添加錯誤描述? –

+0

新增了它......或者可以在pastebin鏈接中看到它 –

回答

3

如果你看一下Keras試圖下layer_names屬性保存(輸出HDF5文件被創建中)的數據量,你會發現,它需要超過64KB。

np.asarray([layer.name.encode('utf8') for layer in model.layers]).nbytes 
>> 77100 

我從https://support.hdfgroup.org/HDF5/faq/limits.html引用:

Is there an object header limit and how does that affect HDF5 ?

There is a limit (in HDF5-1.8) of the object header, which is 64 KB. The datatype for a dataset is stored in the object header, so there is therefore a limit on the size of the datatype that you can have. (See HDFFV-1089)

上面的代碼是(幾乎完全)從回溯複製:

File "/usr/local/lib/python2.7/dist-packages/keras/engine/topology.py", line 2746, in save_weights_to_hdf5_group 
f.attrs['layer_names'] = [layer.name.encode('utf8') for layer in layers] 

我使用numpy的asarray方法來獲得圖快但h5py得到類似的數字(我猜),如果你想找到確切的數字,請參閱https://github.com/h5py/h5py/blob/master/h5py/_hl/attrs.py#L102

無論如何,您需要實現自己的方法來保存/加載重量(或使用現有的解決方法),或者您需要給一個非常短的名稱ALL模型中的圖層:),是這樣的:

list_of_input = [Input(shape=(8,3), name=('i%x' % i)) for i in range(splits*total_frames_with_deltas)] 
conv = Conv1D(filters = 100, kernel_size = 8, name='cv%x' % steps) 
conv.append(Add(name='add%x' % section)(temp)) 
output_conc = Concatenate(name='ct')(conv) 
output_conv = Reshape((splits, -1), name='rs1')(output_conc) 
pooled = MaxPooling1D(pool_size = 6, strides = 2, name='pl')(output_conv) 
reshape = Reshape((1,-1), name='rs2')(pooled) 
dense1 = Dense(units = 1024, activation = 'relu', name = "d1")(reshape) 
dense2 = Dense(units 
= 1024, activation = 'relu', name = "d2")(dense1) 
dense3 = Dense(units = 1024, activation = 'relu', name = "d3")(dense1) 
final = Dense(units = 5, activation = 'relu', name = "fl")(dense3) 

你一定不要忘了名字的所有層,因爲(numpy的)字符串數組到其中的圖層名稱轉換爲使用最長的字符串的大小爲它時,每一個人串它被保存了!

如上所述重新命名圖層(需要將近26kB)後,模型已成功保存。希望這個詳盡的答案可以幫助某人。

更新:我剛剛作出了一個公關Keras應該解決的問題,而實施任何自定義加載/保存方法,請參閱7508

1

一個簡單的解決方案,雖然可能不是最優雅的,但可以使用epochs = 1運行一個while循環。

  1. 在每一個時代的結束與準確性和損失
  2. 保存獲取的權重一起的權重文件1model.get_weight
  3. 如果精度比以前的時期更大(即循環),權重存儲到不同的文件(文件)
  4. 運行循環再次從文件1
  5. 加載的權重
  6. 打破循環設置手動提早終止,使其分解如果損失不針對特定數量的循環
1

的改善可以一起使用get_weights()numpy.save

這不是最好的解決方案,因爲它會保存多個文件,但實際上它可以工作。

問題是,您不會將「優化器」與當前狀態一起保存。但是你也許可以在加載後使用較小的學習率來解決這個問題。

自定義回調使用numpy.save:

def myCallback(epoch,logs): 
    global storedLoss 
    #do your comparisons here using the "logs" var. 
    print(logs) 


    if (logs['loss'] < storedLoss): 

     storedLoss = logs['loss'] 
     for i in range(len(model.layers)): 

      WandB = model.layers[i].get_weights() 

      if len (WandB) > 0: #necessary because some layers have no weights 

       np.save("W" + "-" + str(i), WandB[0],False) 
       np.save("B" + "-" + str(i), WandB[1],False) 


    #remember that get and set weights use a list: [weights,biases] 
    #it may happen (not sure) that there is no bias, and thus you may have to check it (len(WandB)==1). 

logs VAR帶來了名爲指標字典,如 「損失」 和 「準確性」,如果你使用它。

您可以將回調中的損失存儲在全局變量中,並比較每個損失是好還是差。

當擬合,使用拉姆達回調:

from keras.callbacks import LambdaCallback 
model.fit(...,callbacks=[LambdaCallback(on_epoch_end=myCallback)]) 

在上面的例子中,我使用的LambdaCallback,其具有比剛剛on_epoch_end更多的可能性。

裝貨,做類似的循環:

#you have to create the model first and then set the layers 
def loadModel(model): 
    for i in range(len(model.layers)): 
     WandBForCheck = model.layers[i].get_weights() 

     if len (WandBForCheck) > 0: #necessary because some layers have no weights 
      W = np.load(Wfile + str(i)) 
      B = np.load(Bfile + str(i)) 
      model.layers[i].set_weights([W,B]) 
1

見後續在https://github.com/fchollet/keras/issues/6766https://github.com/farizrahman4u/keras-contrib/pull/90

我看到YAML和根本原因可能是你有這麼多的輸入。一些具有多個維度的輸入比許多輸入更受歡迎,特別是如果您可以使用掃描和批量操作來高效完成所有操作。

現在,忽視了完全,這裏是你如何可以保存和加載你的模型,如果有太多的東西要保存爲JSON有效:

你可以通過save_weights_only=True。這不會節省優化器權重,所以不是一個好的解決方案。

只是放在一起保存模型權重和優化器權重,但不配置的PR。當你想加載時,首先實例化和編譯模型,就像你要訓練模型一樣,然後使用load_all_weights將模型和優化器權重加載到該模型中。我會盡快將它合併,以便您可以在主分支中使用它。

你可以使用它是這樣的:

from keras.callbacks import LambdaCallback 
from keras_contrib.utils.save_load_utils import save_all_weights, load_all_weights 
# do some stuff to create and compile model 
# use `save_all_weights` as a callback to checkpoint your model and optimizer weights 
model.fit(..., callbacks=[LambdaCallback(on_epoch_end=lambda epoch, logs: save_all_weights(model, "checkpoint-{:05d}.h5".format(epoch))]) 
# use `load_all_weights` to load model and optimizer weights into an existing model 
# if not compiled (no `model.optimizer`), this will just load model weights 
load_all_weights(model, 'checkpoint-1337.h5') 

所以我不認可的模式,但是如果你想要得到它的保存和加載反正這也許應該爲你工作。

作爲一個方面說明,如果你想以不同的格式保存權重,像這樣的東西可以工作。

pickle.dump([K.get_value(w) for w in model.weights], open("save.p", "wb")) 

乾杯