無法處理我的CNN中的維度錯誤

我是tensorflow的新手，我試圖偏離mnist數據集並嘗試一些與衆不同的東西。我正在處理情感數據集CK+，似乎無法修改我的代碼以成功運行此數據集。對於那些想複製我的作品的人，我找到了經過處理的圖像和標籤here。您可以在ck +縮放文件夾中找到圖像，並在處理的文件夾中找到標籤。無法處理我的CNN中的維度錯誤

我們正在處理大小爲[256 x 256]的265幅圖像。

所以，這裏是我的代碼：

import os 
import tensorflow as tf 
import sys 
import urllib 
import numpy as np 
from PIL import Image 
import glob 
train = [] 
for filename in glob.glob('/Users/madhavthaker/Documents/CSCI63/Final Project/face-emoticon-master/data/ck+_scaled/*.png'): #assuming gif 
    img=np.asarray(Image.open(filename)) 
    img_flat = img.reshape(img.size) 
    train.append(img_flat) 

### MNIST EMBEDDINGS ### 
ckp_labels = [5, 0, 3, 5, 4, 0, 1, 3, 5, 4, 0, 3, 5, 0, 1, 5, 4, 0, 0, 0, 2, 1, 3, 5, 0, 3, 5, 1, 3, 5, 0, 3, 5, 4, 0, 3, 5, 3, 1, 1, 0, 4, 5, 2, 1, 5, 3, 5, 1, 5, 3, 1, 5, 1, 5, 0, 1, 5, 3, 5, 1, 3, 0, 1, 5, 2, 3, 1, 5, 3, 1, 3, 1, 5, 3, 2, 5, 3, 1, 5, 3, 4, 0, 5, 0, 3, 1, 3, 2, 5, 1, 3, 5, 1, 5, 4, 0, 3, 1, 5, 1, 2, 5, 1, 3, 5, 3, 5, 1, 3, 5, 5, 3, 1, 1, 3, 4, 1, 5, 4, 1, 5, 0, 1, 3, 5, 2, 3, 5, 5, 3, 5, 1, 0, 1, 5, 3, 0, 5, 1, 0, 3, 5, 0, 3, 5, 3, 1, 4, 5, 1, 3, 5, 1, 3, 1, 3, 5, 1, 5, 0, 3, 5, 1, 1, 4, 1, 5, 1, 4, 1, 0, 1, 3, 5, 5, 0, 1, 0, 5, 4, 0, 5, 3, 5, 3, 5, 1, 3, 5, 2, 0, 5, 2, 0, 5, 2, 3, 4, 3, 2, 5, 1, 5, 0, 3, 0, 1, 3, 5, 0, 1, 3, 5, 0, 4, 3, 3, 1, 4, 2, 1, 3, 5, 5, 3, 0, 3, 1, 5, 5, 0, 3, 5, 3, 2, 5, 3, 4, 7, 7, 7, 7, 7, 7, 7, 7, 0, 2, 4, 0, 7, 2, 0, 7, 0, 7, 2, 4, 4, 0, 2, 4, 7, 2] 

if sys.version_info[0] >= 3: 
    from urllib.request import urlretrieve 
else: 
    from urllib import urlretrieve 

LOGDIR = 'log3/' 
GITHUB_URL ='https://raw.githubusercontent.com/mamcgrath/TensorBoard-TF-Dev-Summit-Tutorial/master/' 

### MNIST EMBEDDINGS ### 
mnist = tf.contrib.learn.datasets.mnist.read_data_sets(train_dir=LOGDIR + 'data', one_hot=True) 
### Get a sprite and labels file for the embedding projector ### 
urlretrieve(GITHUB_URL + 'labels_1024.tsv', LOGDIR + 'labels_1024.tsv') 
urlretrieve(GITHUB_URL + 'sprite_1024.png', LOGDIR + 'sprite_1024.png') 

# Add convolution layer 
def conv_layer(input, size_in, size_out, name="conv"): 
    with tf.name_scope(name): 
    #w = tf.Variable(tf.zeros([5, 5, size_in, size_out]), name="W") 
    #b = tf.Variable(tf.zeros([size_out]), name="B") 
    w = tf.Variable(tf.truncated_normal([4, 4, size_in, size_out], stddev=0.1), name="W") 
    b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B") 
    conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME") 
    act = tf.nn.relu(conv + b) 
    tf.summary.histogram("weights", w) 
    tf.summary.histogram("biases", b) 
    tf.summary.histogram("activations", act) 
    return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") 


# Add fully connected layer 
def fc_layer(input, size_in, size_out, name="fc"): 
    with tf.name_scope(name): 
    w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W") 
    b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B") 
    act = tf.nn.relu(tf.matmul(input, w) + b) 
    tf.summary.histogram("weights", w) 
    tf.summary.histogram("biases", b) 
    tf.summary.histogram("activations", act) 
    return act 


def mnist_model(learning_rate, use_two_conv, use_two_fc, hparam): 

    tf.reset_default_graph() 
    tf.set_random_seed(1) 
    sess = tf.Session() 

    # Setup placeholders, and reshape the data 
    x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x") 
    x_image = tf.reshape(x, [-1, 256, 256, 1]) 
    tf.summary.image('input', x_image, 3) 
    y = tf.placeholder(tf.float32, shape=[None, ], name="labels") 

    if use_two_conv: 
    conv1 = conv_layer(x_image, 1, 32, "conv1") 
    conv_out = conv_layer(conv1, 32, 64, "conv2") 
    else: 
    conv1 = conv_layer(x_image, 1, 64, "conv") 
    conv_out = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") 

    flattened = tf.reshape(conv_out, [-1, 16 * 16 * 16]) 


    if use_two_fc: 
    fc1 = fc_layer(flattened, 16 * 16 * 16, 40, "fc1") 
    embedding_input = fc1 
    embedding_size = 40 
    logits = fc_layer(fc1, 40, 1, "fc2") 
    else: 
    embedding_input = flattened 
    embedding_size = 7*7*64 
    logits = fc_layer(flattened, 7*7*64, 10, "fc") 

    with tf.name_scope("xent"): 
    xent = tf.reduce_mean(
     tf.nn.softmax_cross_entropy_with_logits(
      logits=logits, labels=y), name="xent") 
    tf.summary.scalar("xent", xent) 

    with tf.name_scope("train"): 
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent) 

    with tf.name_scope("accuracy"): 
    correct_prediction = tf.equal(tf.argmax(logits, -1), tf.argmax(y, -1)) 
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 
    tf.summary.scalar("accuracy", accuracy) 

    summ = tf.summary.merge_all() 


    embedding = tf.Variable(tf.zeros([1024, embedding_size]), name="test_embedding") 
    assignment = embedding.assign(embedding_input) 
    saver = tf.train.Saver() 

    sess.run(tf.global_variables_initializer()) 
    writer = tf.summary.FileWriter(LOGDIR + hparam) 
    writer.add_graph(sess.graph) 

    config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig() 
    embedding_config = config.embeddings.add() 
    embedding_config.tensor_name = embedding.name 
    embedding_config.sprite.image_path = LOGDIR + 'sprite_1024.png' 
    embedding_config.metadata_path = LOGDIR + 'labels_1024.tsv' 
    # Specify the width and height of a single thumbnail. 
    embedding_config.sprite.single_image_dim.extend([256, 256]) 
    tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config) 

    for i in range(300): 
    if i % 5 == 0: 
     [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: train, y: ckp_labels}) 
     writer.add_summary(s, i) 
     print ("train accuracy:", train_accuracy) 
    sess.run(train_step, feed_dict={x: train, y: ckp_labels}) 

def make_hparam_string(learning_rate, use_two_fc, use_two_conv): 
    conv_param = "conv2" if use_two_conv else "conv1" 
    fc_param = "fc2" if use_two_fc else "fc1" 
    return "lr_%.0E%s%s" % (learning_rate, conv_param, fc_param) 

def main(): 
    # You can try adding some more learning rates 
    #for learning_rate in [1E-3, 1E-4, 1E-5]: 
    for learning_rate in [1E-4]: 

    # Include "False" as a value to try different model architectures 
    #for use_two_fc in [True, False]: 
    for use_two_fc in [True]: 
     #for use_two_conv in [True, False]: 
     for use_two_conv in [True]: 
     # Construct a hyperparameter string for each one (example: "lr_1E-3fc2conv2") 
     hparam = make_hparam_string(learning_rate, use_two_fc, use_two_conv) 
     print('Starting run for %s' % hparam) 
     sys.stdout.flush() # this forces print-ed lines to show up. 

     # Actually run with the new settings 
     mnist_model(learning_rate, use_two_fc, use_two_conv, hparam) 


if __name__ == '__main__': 
    main()

，這裏是我收到的錯誤：

InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[16960,1] labels_size=[1,265] 
    [[Node: xent/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](xent/Reshape, xent/Reshape_1)]]

什麼是真正令人困惑我的是爲什麼我的logits形狀[16960,1 ]。任何幫助將非常感激。

來源

2017-05-04 madsthaks

首先，Y應該是形狀（batch_size時）的：（你的情況是這樣（265）;也許它也可以與（265工作，1））

y = tf.placeholder(tf.float32, shape=[None], name="labels")

而且的Y的一個不被-hot編碼，你必須使用tf.nn.sparse_softmax_cross_entropy_with_logits而不是softmax_cross_entropy_with_logits。

然後：在你的端第二CONV層（和最大poolings），圖像大小是（256×256）/ 2/2 =（64,64）。深度爲64時，每個樣本得到64 * 64 * 64個值。但你做flattened = tf.reshape(conv_out, [-1, 16 * 16 * 16])，它給你一個形狀張量[265*2^6, 16*16*16]（265 * 2^6 = 16960，這是它來自哪裏）。將其替換爲flattened = tf.reshape(conv_out, [-1, 64*64*64])。

更遠，logits = fc_layer(fc1, 40, 1, "fc2")也是一個錯誤，你應該有logits = fc_layer(fc1, 40, num_classes, "fc2")，你似乎有num_classes = 8。

這些變化應該給你的形狀logits（265，num_classes），這是你想要的tf.nn.sparse_softmax_cross_entropy_with_logits。

您還有其他變化，使對地方use_two_fc還是use_two_conv都是假的情況下，我會告訴你的數字出來。在每一步中，你應該更仔細地處理所有張量的形狀，必要時打印它們以檢查它們是否真的是你想要的。也許可以使用更多的變量，如num_classes_ batch_size等，以確保事物是連貫的，並且它會更具可讀性。

來源

2017-05-04 12:55:20 gdelab

嘿，你的評論幫助我解決了我最初的問題，並且我專注於尋找關於如何爲CNN設置我的維度的更多信息。現在，我在後來將conv中的過濾器更改爲（17,17），並將其平展爲'flattened = tf.reshape（conv_out，[-1,55 * 55 * 64]）'。我的數學檢查出來了，但是我得到了這個錯誤：'InvalidArgumentError（請參閱上面的回溯）：輸入重塑是一個有6553600個值的張量，但所需的形狀需要193600的倍數。但它最終會使用'[-1,64 * 64 * 64]'工作。無法理解發生了什麼 – madsthaks

對不起，我不明白你改變了什麼。也許它會更清晰，如果你吃你的問題或開始一個新的，用你所有的當前代碼 – gdelab

無法處理我的CNN中的維度錯誤

回答

相關問題