filename_queue = tf.train.string_input_producer(["file0.csv", "file1.csv"])
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
record_defaults = [[1], [1], [1], [1], [1]]
col1, col2, col3, col4, col5 = tf.decode_csv(
value, record_defaults=record_defaults)
features = tf.pack([col1, col2, col3, col4])
with tf.Session() as sess:
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(1200):
# Retrieve a single instance:
example, label = sess.run([features, col5])
問題1:1200 for循環的意義是什麼?數據中的記錄數是多少?
def read_my_file_format(filename_queue):
reader = tf.SomeReader()
key, record_string = reader.read(filename_queue)
example, label = tf.some_decoder(record_string)
processed_example = some_processing(example)
return processed_example, label
def input_pipeline(filenames, batch_size, num_epochs=None):
filename_queue = tf.train.string_input_producer(
filenames, num_epochs=num_epochs, shuffle=True)
example, label = read_my_file_format(filename_queue)
# min_after_dequeue defines how big a buffer we will randomly sample
# from -- bigger means better shuffling but slower start up and more
# memory used.
# capacity must be larger than min_after_dequeue and the amount larger
# determines the maximum we will prefetch. Recommendation:
# min_after_dequeue + (num_threads + a small safety margin) * batch_size
min_after_dequeue = 10000
capacity = min_after_dequeue + 3 * batch_size
example_batch, label_batch = tf.train.shuffle_batch(
[example, label], batch_size=batch_size, capacity=capacity,
return example_batch, label_batch
我很感激任何澄清,因爲這是我第一次使用TensorFlow 。謝謝您的幫助!
這很有道理!所以還有2個問題。 Q1:如果我有100條記錄,並且我的培訓批量大小爲80,那麼我可以在input_pipeline中返回80條記錄(使用80作爲batch_size參數吧?),然後跟蹤其他20條記錄以進行測試嗎?基本上,你知道一種方法,我可以跟蹤哪些80我用於訓練,所以我可以用其餘的測試(當然是洗牌後)。問題2:基本上,當我在我的代碼(位於另一個文件中)初始化並運行Session來訓練,測試等時,我應該在調用input_pipeline之後輸入數據?謝謝! –