0
嗨我試圖訓練一個modell,當我得到這個錯誤信息:如果我理解它的權利,說我沒有VRAM,但我有一個華碩GTX1080 A8G遊戲應該有足夠的Vram 。我在所有工作之前嘗試過,但突然間無處可去。 我深深網:圖形卡的OOM CUDA/Tensorflow
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 29 11:52:11 2017
@author: tobia
"""
#importing pre_processing libaries
import numpy as np
from keras.models import load_model
import os
#importing Deep Learning Libaries
from keras import layers
from keras.models import Sequential
from keras.callbacks import TensorBoard
from keras.layers import Flatten,Dense,Conv2D,MaxPooling2D,Dropout,BatchNormalization,Activation
def load_data():
key_values = np.empty((0,8),dtype = 'uint8')
picture_data = np.empty((0,60,80), dtype = 'uint8')
for i in range(len(os.listdir('data/key_values'))):
buffer = np.load('data/key_values/values_{0}.npy'.format(i+1))
key_values = np.append(key_values,buffer,axis = 0)
buffer_2 = np.load('data/video/video_{}.npy'.format(i+1))
picture_data = np.append(picture_data,buffer_2,axis = 0)
picture_data = picture_data.reshape((len(key_values),60,80,1))
"""
train_data = np.load("data/Processed/train_data.npy")
train = train_data[:]
picture_data = np.array([i[1] for i in train]).reshape(-1,60,80,1)
key_values = np.array([i[0] for i in train])
key_values = np.squeeze(key_values)
"""
# key_values = np.reshape(key_values,(len(key_values[:]),1,7))
# picture_data = np.reshape(picture_data,(len(picture_data[:]),1,60,80,1))
return key_values,picture_data
class Network:
def __init__(self):
pass
def model_1(self,picture_data,key_values):
model = Sequential()
model.add(Conv2D(96, 11,input_shape = (60,80,1),activation = "relu"))
model.add(MaxPooling2D(pool_size = 3,strides =1))
model.add(BatchNormalization(axis = 1))
model.add(Flatten())
model.add(Dense(units = 8, activation ="softmax"))
model.compile(optimizer ='adam', loss = 'categorical_crossentropy',metrics = ['accuracy'])
model.summary()
return model
def start(self,picture_data,key_values):
model = self.model_1(picture_data,key_values)
tbCallBack= TensorBoard(log_dir = "./logs",histogram_freq=0,write_graph=True,write_images=True)
model.fit(picture_data,key_values,batch_size = 1000,epochs =10,validation_split = 0.1,callbacks = [tbCallBack])
def predict_key(self,live_image,model):
self.model = model
x = self.model.predict(live_image,batch_size =3)
return x
input_k = input("Start new Training press: N or to contiune learning press C")
if(input_k == 'N'):
key_values,picture_data= load_data()
test = Network()
test.start(picture_data,key_values)
elif(input_k == 'C'):
model = load_model('Models/Modell.h5')
visual = TensorBoard(log_dir = "./logs",histogram_freq=0,write_graph=True,write_images=True)
key_values,picture_data = load_data()
model.fit(picture_data,key_values,batch_size = 1000,epochs=1,validation_split = 0.1,callbacks = [visual])
model.save("Models/Modell.h5")
錯誤消息:
File "<ipython-input-1-73951c078cac>", line 1, in <module>
runfile('C:/Users/tobia/Desktop/Ai_Star/ai_train.py', wdir='C:/Users/tobia/Desktop/Ai_Star')
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\site\sitecustomize.py", line 880, in runfile
execfile(filename, namespace)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 81, in <module>
test.start(picture_data,key_values)
File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 66, in start
model.fit(picture_data,key_values,batch_size = 1000,epochs =10,validation_split = 0.1,callbacks = [tbCallBack])
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\models.py", line 870, in fit
initial_epoch=initial_epoch)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\engine\training.py", line 1507, in fit
initial_epoch=initial_epoch)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\engine\training.py", line 1156, in _fit_loop
outs = f(ins_batch)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\backend\tensorflow_backend.py", line 2269, in __call__
**self.session_kwargs)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\client\session.py", line 789, in run
run_metadata_ptr)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\client\session.py", line 997, in _run
feed_dict_string, options, run_metadata)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\client\session.py", line 1132, in _do_run
target_list, options, run_metadata)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\client\session.py", line 1152, in _do_call
raise type(e)(node_def, op, message)
ResourceExhaustedError: OOM when allocating tensor with shape[313344,8]
[[Node: gradients/dense_1/MatMul_grad/MatMul_1 = MatMul[T=DT_FLOAT, _class=["loc:@dense_1/MatMul"], transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](flatten_1/Reshape, gradients/dense_1/Softmax_grad/mul_1)]]
Caused by op 'gradients/dense_1/MatMul_grad/MatMul_1', defined at:
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 231, in <module>
main()
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 227, in main
kernel.start()
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
ioloop.IOLoop.instance().start()
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tornado\ioloop.py", line 888, in start
handler_func(fd_obj, events)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
handler(stream, idents, msg)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\IPython\core\interactiveshell.py", line 2698, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\IPython\core\interactiveshell.py", line 2808, in run_ast_nodes
if self.run_code(code, result):
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\IPython\core\interactiveshell.py", line 2862, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-73951c078cac>", line 1, in <module>
runfile('C:/Users/tobia/Desktop/Ai_Star/ai_train.py', wdir='C:/Users/tobia/Desktop/Ai_Star')
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\site\sitecustomize.py", line 880, in runfile
execfile(filename, namespace)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 81, in <module>
test.start(picture_data,key_values)
File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 66, in start
model.fit(picture_data,key_values,batch_size = 1000,epochs =10,validation_split = 0.1,callbacks = [tbCallBack])
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\models.py", line 870, in fit
initial_epoch=initial_epoch)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\engine\training.py", line 1490, in fit
self._make_train_function()
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\engine\training.py", line 1014, in _make_train_function
self.total_loss)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\optimizers.py", line 405, in get_updates
grads = self.get_gradients(loss, params)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\optimizers.py", line 71, in get_gradients
grads = K.gradients(loss, params)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\backend\tensorflow_backend.py", line 2307, in gradients
return tf.gradients(loss, variables, colocate_gradients_with_ops=True)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 540, in gradients
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 346, in _MaybeCompile
return grad_fn() # Exit early
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 540, in <lambda>
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\math_grad.py", line 825, in _MatMulGrad
grad_b = math_ops.matmul(a, grad, transpose_a=True)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1816, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1217, in _mat_mul
transpose_b=transpose_b, name=name)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\framework\ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\framework\ops.py", line 1269, in __init__
self._traceback = _extract_stack()
...which was originally created as op 'dense_1/MatMul', defined at:
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 231, in <module>
main()
[elided 20 identical lines from previous traceback]
File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 81, in <module>
test.start(picture_data,key_values)
File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 64, in start
model = self.model_1(picture_data,key_values)
File "C:/Users/tobia/Desktop/Ai_Star/ai_train.py", line 57, in model_1
model.add(Dense(units = 8, activation ="softmax"))
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\models.py", line 476, in add
output_tensor = layer(self.outputs[0])
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\engine\topology.py", line 596, in __call__
output = self.call(inputs, **kwargs)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\layers\core.py", line 843, in call
output = K.dot(inputs, self.kernel)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\keras\backend\tensorflow_backend.py", line 976, in dot
out = tf.matmul(x, y)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1816, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1217, in _mat_mul
transpose_b=transpose_b, name=name)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\framework\ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\tobia\Anaconda3\envs\StarCraft\lib\site-packages\tensorflow\python\framework\ops.py", line 1269, in __init__
self._traceback = _extract_stack()
ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[313344,8]
[[Node: gradients/dense_1/MatMul_grad/MatMul_1 = MatMul[T=DT_FLOAT, _class=["loc:@dense_1/MatMul"], transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](flatten_1/Reshape, gradients/dense_1/Softmax_grad/mul_1)]]
對此:「我想它之前的一切工作,但突然冒出它不會工作了。」也許你的GPU有分配內存的殭屍進程。你通常可以用'nvidia-smi'來觀察。在這種情況下,一個可能的解決方案是重新啓動。 –
您可能想看看https://stackoverflow.com/documentation/tensorflow/10621/tensorflow-gpu-setup/31879/control-the-gpu-memory-allocation#t=201706291458179435665來管理CUDA內存。 – npf