這是示例MNIST代碼我運行:Tensorflow深MNIST:資源耗盡:OOM具有形狀分配張量時[10000,32,28,28]
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
import tensorflow as tf
sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
x_image = tf.reshape(x, [-1,28,28,1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.initialize_all_variables()
config = tf.ConfigProto()
config.gpu_options.allocator_type = 'BFC'
with tf.Session(config = config) as s:
sess.run(init)
for i in range(20000):
batch = mnist.train.next_batch(50)
if i%100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x:batch[0], y_: batch[1], keep_prob: 1.0})
print("step %d, training accuracy %g"%(i, train_accuracy))
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
print("test accuracy %g"%accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
的GPU我使用的是:GeForce GTX 750 Ti
錯誤:
...
...
...
step 19900, training accuracy 1
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (256): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (512): Total Chunks: 1, Chunks in use: 0 768B allocated for chunks. 1.20MiB client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (1024): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (2048): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (4096): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (8192): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (16384): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (32768): Total Chunks: 1, Chunks in use: 0 36.8KiB allocated for chunks. 4.79MiB client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (65536): Total Chunks: 1, Chunks in use: 0 78.5KiB allocated for chunks. 4.79MiB client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (131072): Total Chunks: 1, Chunks in use: 0 200.0KiB allocated for chunks. 153.1KiB client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (262144): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (524288): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (1048576): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (2097152): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (4194304): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (8388608): Total Chunks: 1, Chunks in use: 0 11.86MiB allocated for chunks. 390.6KiB client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (16777216): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (33554432): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (67108864): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (134217728): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:639] Bin (268435456): Total Chunks: 0, Chunks in use: 0 0B allocated for chunks. 0B client-requested for chunks. 0B in use in bin. 0B client-requested in use in bin.
I tensorflow/core/common_runtime/bfc_allocator.cc:656] Bin for 957.03MiB was 256.00MiB, Chunk State:
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a40000 of size 1280
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a40500 of size 1280
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a40a00 of size 31488
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a48500 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a48600 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a48700 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a48800 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a48900 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a48a00 of size 4096
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a49a00 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a49b00 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a49c00 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a49d00 of size 3328
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a4aa00 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a4ab00 of size 204800
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a7cb00 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x601a7cc00 of size 12845056
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026bcc00 of size 4096
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026bdc00 of size 40960
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026c7c00 of size 31488
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026cf700 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026cf800 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026cf900 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026cfa00 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026cfb00 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026cfc00 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026cfd00 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026cfe00 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026cff00 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026d0000 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026d0100 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026d0500 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026d0600 of size 3328
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026d1300 of size 40960
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6026db300 of size 80128
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x602702600 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x602734700 of size 204800
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x603342700 of size 4096
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x603343700 of size 3328
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x60334d700 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x60334d800 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x60334d900 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x60334da00 of size 3328
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x60334e700 of size 3328
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x60334f400 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x60334f500 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x60334f600 of size 204800
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x603381600 of size 204800
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6033b3600 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6033b3700 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6033b3800 of size 12845056
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x603ff3800 of size 12845056
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x604c33800 of size 4096
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x604c34800 of size 4096
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x604c35800 of size 40960
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x604c3f800 of size 40960
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x604c49800 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x604c49900 of size 256
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x604c49a00 of size 13053184
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6058bc700 of size 31360000
I tensorflow/core/common_runtime/bfc_allocator.cc:674] Chunk at 0x6076a4b00 of size 1801385216
I tensorflow/core/common_runtime/bfc_allocator.cc:683] Free at 0x6026d0200 of size 768
I tensorflow/core/common_runtime/bfc_allocator.cc:683] Free at 0x6026eec00 of size 80384
I tensorflow/core/common_runtime/bfc_allocator.cc:683] Free at 0x602702700 of size 204800
I tensorflow/core/common_runtime/bfc_allocator.cc:683] Free at 0x602766700 of size 12435456
I tensorflow/core/common_runtime/bfc_allocator.cc:683] Free at 0x603344400 of size 37632
I tensorflow/core/common_runtime/bfc_allocator.cc:689] Summary of in-use Chunks by size:
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 32 Chunks of size 256 totalling 8.0KiB
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 2 Chunks of size 1280 totalling 2.5KiB
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 5 Chunks of size 3328 totalling 16.2KiB
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 5 Chunks of size 4096 totalling 20.0KiB
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 2 Chunks of size 31488 totalling 61.5KiB
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 4 Chunks of size 40960 totalling 160.0KiB
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 1 Chunks of size 80128 totalling 78.2KiB
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 4 Chunks of size 204800 totalling 800.0KiB
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 3 Chunks of size 12845056 totalling 36.75MiB
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 1 Chunks of size 13053184 totalling 12.45MiB
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 1 Chunks of size 31360000 totalling 29.91MiB
I tensorflow/core/common_runtime/bfc_allocator.cc:692] 1 Chunks of size 1801385216 totalling 1.68GiB
I tensorflow/core/common_runtime/bfc_allocator.cc:696] Sum Total of in-use chunks: 1.76GiB
I tensorflow/core/common_runtime/bfc_allocator.cc:698] Stats:
Limit: 1898266624
InUse: 1885507584
MaxInUse: 1885907712
NumAllocs: 2387902
MaxAllocSize: 1801385216
W tensorflow/core/common_runtime/bfc_allocator.cc:270] **********************************************************xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
W tensorflow/core/common_runtime/bfc_allocator.cc:271] Ran out of memory trying to allocate 957.03MiB. See logs for memory state.
W tensorflow/core/framework/op_kernel.cc:968] Resource exhausted: OOM when allocating tensor with shape[10000,32,28,28]
Traceback (most recent call last):
File "trainer_deepMnist.py", line 109, in <module>
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 559, in eval
return _eval_using_default_session(self, feed_dict, self.graph, session)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 3648, in _eval_using_default_session
return session.run(tensors, feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 710, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 908, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 958, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 978, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.ResourceExhaustedError: OOM when allocating tensor with shape[10000,32,28,28]
[[Node: Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/gpu:0"](Reshape, Variable_2/read)]]
Caused by op u'Conv2D', defined at:
File "trainer_deepMnist.py", line 61, in <module>
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
File "trainer_deepMnist.py", line 46, in conv2d
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_nn_ops.py", line 394, in conv2d
data_format=data_format, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2320, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1239, in __init__
self._traceback = _extract_stack()
我看了相關的同樣的問題,有些問題的GitHub(here,here),但不明白我應該怎麼改我的代碼來解決這個問題。