2017-08-06 44 views
0

我已經承擔了一個項目,我必須使用卷積網絡,將輸出一個圖像,而不是logit類預測。爲此我適配了我從https://github.com/aymericdamien/TensorFlow-Examples下載的CNN代碼Tensorflow卷積網絡,返回一個圖像(沒有logits)

我的輸入數據是從二進制文件中讀取的64x64圖像。二進制文件由兩個64x64圖像的記錄組成。我需要最小化成本函數,這是第二幅圖像和網絡64x64輸出的差異。

這是我寫讀取輸入數據的模塊:

import tensorflow as tf 

# various initialization variables 
BATCH_SIZE = 128 
N_FEATURES = 9 

# This function accepts a tensor of size [batch_size, 2 ,record_size] 
# and segments in into two tensors of size [batch_size, record] along the second dimension 
# IMPORTANT: to be executed within an active session 
def segment_batch(batch_p, batch_size, n_input): 
    batch_xs = tf.slice(batch_p, [0,0,0], [batch_size,1,n_input]) # optical data tensor 
    batch_ys = tf.slice(batch_p, [0,1,0], [batch_size,1,n_input])   # GT data tensor 
    optical = tf.reshape([batch_xs], [batch_size, n_input]) 
    gt = tf.reshape([batch_ys], [batch_size, n_input]) 

    return [optical, gt] 



def batch_generator(filenames, record_size, batch_size): 
    """ filenames is the list of files you want to read from. 
    record_bytes: The size of a record in bytes 
    batch_size: The size a data batch (examples/batch) 
    """ 

    filename_queue = tf.train.string_input_producer(filenames) 
    reader = tf.FixedLengthRecordReader(record_bytes=2*record_size) #  record size is double the value given (optical + ground truth images) 
    _, value = reader.read(filename_queue) 


    # read in the data (UINT8) 
    content = tf.decode_raw(value, out_type=tf.uint8) 



    # The bytes read represent the image, which we reshape 
    # from [depth * height * width] to [depth, height, width]. 
    # read optical data slice 
    depth_major = tf.reshape(
    tf.strided_slice(content, [0], 
        [record_size]), 
    [1, 64, 64]) 

    # read GT (ground truth) data slice 
    depth_major1 = tf.reshape(
    tf.strided_slice(content, [record_size], 
        [2*record_size]), 
    [1, 64, 64]) 

    # Optical data 
    # Convert from [depth, height, width] to [height, width, depth]. 
    uint8image = tf.transpose(depth_major, [1, 2, 0]) 
    uint8image = tf.reshape(uint8image, [record_size]) # reshape into a single-dimensional vector 
    uint8image = tf.cast(uint8image, tf.float32) # cast into a float32 
    uint8image = uint8image/255 # normalize 

    # Ground Truth data 
    # Convert from [depth, height, width] to [height, width, depth]. 
    gt_image = tf.transpose(depth_major1, [1, 2, 0]) 
    gt_image = tf.reshape(gt_image, [record_size]) # reshape into a single-dimensional vector 
    gt_image = tf.cast(gt_image, tf.float32) # cast into a float32 
    gt_image = gt_image/255 # normalize 

    # stack them into a single features tensor 
    features = tf.stack([uint8image, gt_image]) 

    # minimum number elements in the queue after a dequeue, used to ensure 
    # that the samples are sufficiently mixed 
    # I think 10 times the BATCH_SIZE is sufficient 
    min_after_dequeue = 10 * batch_size 

    # the maximum number of elements in the queue 
    capacity = 20 * batch_size 

    # shuffle the data to generate BATCH_SIZE sample pairs 
    data_batch = tf.train.shuffle_batch([features], batch_size=batch_size, 
            capacity=capacity, min_after_dequeue=min_after_dequeue) 

    return data_batch 

這是我實現的主要代碼:

from __future__ import print_function 

# Various initialization variables 
DATA_PATH_OPTICAL_TRAIN = 'data/building_ground_truth_for_training.bin' 
DATA_PATH_EVAL = 'data/building_ground_truth_for_eval.bin' 

import tensorflow as tf 
import numpy as np 
import matplotlib.pyplot as plt 
import time 

# custom imports 
import data_reader2 


# Parameters 
learning_rate = 0.001 
training_iters = 200000 
batch_size = 128 
epochs = 10 
display_step = 10 
rows = 64 
cols = 64 

# Network Parameters 
n_input = 4096 # optical image data (img shape: 64*64) 
n_classes = 4096 # output is an image of same resolution as initial image 
dropout = 0.75 # Dropout, probability to keep units 

# input data parameters 
record_size = 64**2 
total_bytes_of_optical_binary_file = 893329408 # total size of binary file containing training data ([64z64 optical] [64x64 GT]) 

# create the data batches (queue) 
# Accepts two parameters. The tensor containing the binary files and the size of a record 
data_batch = data_reader2.batch_generator([DATA_PATH_OPTICAL_TRAIN],record_size, batch_size) # train set 
data_batch_eval = data_reader2.batch_generator([DATA_PATH_EVAL],record_size, batch_size) # train set 

############################################################## 
######################### FUNCTIONS ########################## 
############################################################## 

# extract optical array from list 
# A helper function. Data returned from segment_batch is a list which contains two arrays. 
# The first array contains the optical data while the second contains the ground truth data 
def extract_optical_from_list(full_batch): 
    optical = full_batch[0] # extract array from list 
    return optical 

# extract ground truth array from list 
# A helper function. Data returned from segment_batch is a list which contains two arrays. 
# The first array contains the optical data while the second contains the ground truth data 
def extract_gt_from_list(full_batch): 
    gt = full_batch[1] # extract array from list 
    return gt 

# This function accepts a tensor of size [batch_size, 2 ,record_size] 
# and segments in into two tensors of size [batch_size, record] along the second dimension 
# IMPORTANT: to be executed within an active session 
def segment_batch(batch_p): 
    batch_xs = tf.slice(batch_p, [0,0,0], [batch_size,1,n_input]) # optical data tensor 
    batch_ys = tf.slice(batch_p, [0,1,0], [batch_size,1,n_input])   # GT data tensor 
    optical = tf.reshape([batch_xs], [batch_size, n_input]) 
    gt = tf.reshape([batch_ys], [batch_size, n_input]) 

    return [optical, gt] 

# Create some wrappers for simplicity 
def conv2d(x, W, b, strides=1): 
# Conv2D wrapper, with bias and relu activation 
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') 
    x = tf.nn.bias_add(x, b) 
    return tf.nn.relu(x) 


def maxpool2d(x, k=2): 
    # MaxPool2D wrapper 
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], 
         padding='SAME') 


# Create model 
def conv_net(x, weights, biases, dropout): 
    # Reshape input picture into 64x64 subimages [rows, rows, cols, channels] 
    x1 = tf.reshape(x, shape=[-1, rows, cols, 1]) # this is the 4-dimensional that tf.conv2D expects as Input 

    # Convolution Layer 
    conv1 = conv2d(x1, weights['wc1'], biases['bc1']) 
    # Max Pooling (down-sampling) 
    conv1 = maxpool2d(conv1, k=2) 

    # Convolution Layer 
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2']) 
    # Max Pooling (down-sampling) 
    conv2 = maxpool2d(conv2, k=2) 

    # Fully connected layer 
    # Reshape conv2 output to fit fully connected layer input 
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]]) 
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) 
    fc1 = tf.nn.relu(fc1) 
    # Apply Dropout 
    #fc1 = tf.nn.dropout(fc1, dropout) 

    # Output image (edge), prediction 
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out']) 

    # Add print operation 
    out = tf.Print(out, [out], message="This is out: ") 

    return [out, x] 

# Store layers weight & bias 
weights = { 
    # 5x5 conv, 1 input, 32 outputs 
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])), 
    # 5x5 conv, 32 inputs, 64 outputs 
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])), 
    # fully connected, 7*7*64 inputs, 1024 outputs 
    'wd1': tf.Variable(tf.random_normal([16*16*64, 1024])), 
    # 1024 inputs, 10 outputs (class prediction) 
    'out': tf.Variable(tf.random_normal([1024, n_classes])) 
} 

biases = { 
    'bc1': tf.Variable(tf.random_normal([32])), 
    'bc2': tf.Variable(tf.random_normal([64])), 
    'bd1': tf.Variable(tf.random_normal([1024])), 
    'out': tf.Variable(tf.random_normal([n_classes])) 
} 


#################################################################### 
##################### PLACEHOLDERS ################################# 
#################################################################### 
# tf Graph input (only pictures) 
X = tf.placeholder_with_default(extract_optical_from_list(segment_batch(data_batch)), [batch_size, n_input]) 
#################################################################### 
##################### END OF PLACEHOLDERS ########################## 
#################################################################### 

# tf Graph input 
keep_prob = tf.Variable(dropout) #dropout (keep probability) 

# Construct model 
pred = conv_net(extract_optical_from_list(X), weights, biases, keep_prob) # x[0] is the optical data 
y_true = extract_gt_from_list(extract_gt_from_list(X)) # y_true is the ground truth data 

# Define loss and optimizer 
cost = tf.reduce_mean(tf.pow(y_true - pred[0], 2)) 
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost) 


# Initializing the variables 
init = tf.global_variables_initializer() 

# Launch the graph 
with tf.Session() as sess: 
    sess.run(init) 
    step = 1 
    # Keep training until reach max iterations 
    while step * batch_size < training_iters: 
    print("Optimizing") 
    sess.run(optimizer) 
    print("Iter " + str(step*batch_size)) 
    step += 1 
print("Optimization Finished!") 

很多與形狀調整後我設法解決了語法錯誤的張量。不幸的是,它剛剛開始執行Graph的優化部分。由於我沒有辦法調試這個(在使用Tensorflow調試器時發現非常稀有的信息),我真的很遺憾什麼問題出現了!如果對Tensorflow有更多經驗的人可以指出這段代碼有什麼問題,它會對我有很大的幫助。

提前致謝

+0

我的意思是「它只是掛起」?什麼都沒發生?你等多久了?隊列必須首先初始化並填充_capacity_圖像。爲了「調試」,您可以嘗試將容量降至較低的數字,以檢查這是否確實是錯誤。 – aseipel

回答

0

您需要啓動隊列運行程序以從隊列中獲取用於優化的數據。

.... 
coord = tf.train.Coordinator() 
with tf.Session() as sess: 
    sess.run(init) 
    tf.train.start_queue_runners(sess=sess, coord=coord) 
    .... 
# also use tf.nn.sparse_softmax_cross_entropy_with_logits for cost 
+0

嘖嘖,謝謝。這真是我很愚蠢! – divined

+0

太棒了!你可以接受答案。 –