0
我的項目目標是檢查我的數據集的神經網絡權重圖。當我將MNIST tensorflow代碼應用於我的數據集時,爲什麼我會得到「nan」損失
我跟着MNIST example code工作正常。
MNIST數據集具有784(28 * 28)pixel_data
輸入和10 class_data
輸出。 我的數據集有72(8 * 9)pixel_data
輸入和4 class_data
輸出。 我做了代碼來處理我的數據集相同的格式作爲MNIST數據集 ,但是當我訓練時,損失繼續給出「NAN」值。您可以檢查my code and dataset in my github。
import os
import glob
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import random as ran
#데이터를 8x9이미지 형태로 display
def display_digit(num):
print(y_data[num])
label = y_data[num].argmax(axis=0)
image = x_data[num].reshape([8,9])
plt.title('Example: %d Label: %d' % (num, label))
plt.imshow(image, cmap=plt.get_cmap('gray_r'))
plt.show()
#데이터를 vector형태로 dispaly
def display_mult_flat(start, stop):
images = x_data[start].reshape([1,72])
for i in range(start+1,stop):
images = np.concatenate((images, x_data[i].reshape([1,72])))
plt.imshow(images, cmap=plt.get_cmap('gray_r'))
plt.show()
#y_data을 oneshot방법으로 표현
def oneshot(n):
if n=="1":
return [1,0,0,0]
elif n=="2":
return [0,1,0,0]
elif n=="3":
return [0,0,1,0]
elif n=="4":
return [0,0,0,1]
# input, out data 반환, (MNIST에서 사용되는 형태)
def Get_data():
Glass_dir='./glass_data/'
csv_filenames = [i for i in glob.glob('./glass_data/*.{}'.format('csv'))]
y_data=[]
x_data=[]
for filename in csv_filenames:
y=oneshot(filename[13])
csv_file = pd.read_csv(filename)
df = pd.DataFrame(csv_file).T
df.columns = df.iloc[0]
df = df[1:]
df = df.ffill()
for i in range(len(df.index)):
y_data.append(y)
for row in df.iterrows():
index, data = row
x_data.append(data.tolist())
combined = list(zip(x_data, y_data))
ran.shuffle(combined)
x_data[:], y_data[:] = zip(*combined)
y_data=np.array(y_data)
x_data=np.array(x_data, dtype=np.float32)
return x_data, y_data
#각 class의 5개씩을 test로 사용
def Get_testdata():
Glass_dir='./glass_data/'
csv_filenames = [i for i in glob.glob('./glass_data/*.{}'.format('csv'))]
y_test=[]
x_test=[]
for filename in csv_filenames:
y=oneshot(filename[13])
csv_file = pd.read_csv(filename)
df = pd.DataFrame(csv_file).T
df.columns = df.iloc[0]
df = df[1:]
df = df.ffill()
for i in range(5):
y_test.append(y)
df2=df.head()
for row in df2.iterrows():
index, data = row
x_test.append(data.tolist())
y_test=np.array(y_test)
x_test=np.array(x_test, dtype=np.float32)
return x_test, y_test
# In[3]:
x_data, y_data = Get_data()
# In[4]:
x_data
# In[5]:
y_data
# In[6]:
display_digit(ran.randint(0, x_data.shape[0]))
# In[7]:
display_mult_flat(0,200)
# In[8]:
sess = tf.Session()
x = tf.placeholder(tf.float32, shape=[None, 72])
y_ = tf.placeholder(tf.float32, shape=[None, 4])
W = tf.Variable(tf.zeros([72,4]))
b = tf.Variable(tf.zeros([4]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
print(y)
# In[9]:
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
# In[10]:
x_train, y_train = Get_data()
x_test, y_test= Get_testdata()
LEARNING_RATE = 0.01
TRAIN_STEPS = 2500
# In[11]:
init = tf.global_variables_initializer()
sess.run(init)
# In[12]:
training = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# In[13]:
for i in range(TRAIN_STEPS+1):
sess.run(training, feed_dict={x: x_train, y_: y_train})
if i%100 == 0:
print('Training Step:' + str(i) + ' Accuracy = ' + str(sess.run(accuracy, feed_dict={x: x_train, y_: y_train})) + ' Loss = ' + str(sess.run(cross_entropy, {x: x_train, y_: y_train})))
# In[14]:
for i in range(4):
plt.subplot(2, 5, i+1)
weight = sess.run(W)[:,i]
plt.title(i)
plt.imshow(weight.reshape([8,9]), cmap=plt.get_cmap('seismic'))
frame1 = plt.gca()
frame1.axes.get_xaxis().set_visible(False)
frame1.axes.get_yaxis().set_visible(False)
# In[15]:
plt.show()
作爲損耗設定'sess.run(cross_entropy,{X:x_train,Y_:y_train})',我會通過調查啓動'cross_entropy'函數中的哪一行代碼會引發'nan'。從那裏你可以改進你的問題。這裏複製/粘貼的大部分代碼與問題無關。縮小可能性並編輯您的問題。 – Eskapp