2017-10-29 121 views
0

下面是用Keras編寫的代碼,用於迴歸正弦函數。它工作完美。預測正弦張量流與keras不同

import numpy as np 
from keras.layers import Dense, Activation 
from keras.models import Sequential 
import matplotlib.pyplot as plt 
import math 
import time 

x = np.arange(0, math.pi*2*2, 0.1) 
y = np.sin(x) 
model = Sequential([Dense(10, input_shape=(1,)), Activation('tanh'), Dense(3),Activation('tanh'),Dense(1)]) 

model.compile(loss='mean_squared_error', optimizer='SGD', metrics=['mean_squared_error']) 

t1 = time.clock() 
for i in range(40): 
    model.fit(x, y, epochs=1000, batch_size=len(x), verbose=0) 
    predictions = model.predict(x) 
    print i," ", np.mean(np.square(predictions - y))," t: ", time.clock()-t1 

    plt.hold(False) 
    plt.plot(x, y, 'b', x, predictions, 'r--') 
    plt.hold(True) 
    plt.ylabel('Y/Predicted Value') 
    plt.xlabel('X Value') 
    plt.title([str(i)," Loss: ",np.mean(np.square(predictions - y))," t: ", str(time.clock()-t1)]) 
    plt.pause(0.001) 
plt.savefig("fig2.png") 
plt.show() 

我嘗試使用較低的API編寫相同的代碼,以瞭解神經網絡如何工作。 這裏是代碼我寫的退步正弦函數與Tensorflow:

import tensorflow as tf 
import numpy as np 
import matplotlib.pyplot as plt 
import math 

# Model input and output 
x = tf.placeholder(tf.float32, [None, 1]) 
y = tf.placeholder(tf.float32, [None, 1]) 

# training data 
x_plot = np.arange(0, math.pi*2*2, 0.1) 
x_train = x_plot.reshape(-1, 1) 
y_train_tf = tf.sin(x) 

# Model parameters 
W1 = tf.Variable(tf.ones([1,10])*.3, dtype=tf.float32) 
b1 = tf.Variable(tf.ones([10])*(-.3), dtype=tf.float32) 
W2 = tf.Variable(tf.ones([10,3])*.3, dtype=tf.float32) 
b2 = tf.Variable(tf.ones([3])*(-.3), dtype=tf.float32) 
W3 = tf.Variable(tf.ones([3,1])*.3, dtype=tf.float32) 
b3 = tf.Variable(tf.ones([1])*(-.3), dtype=tf.float32) 

layer1 = tf.tanh(tf.multiply(x,W1) + b1) 
layer2 = tf.tanh(tf.matmul(layer1, W2) + b2) 
linear_model = tf.reduce_sum(tf.matmul(layer2, W3), 1, keep_dims=True) + b3 

# loss 
loss = tf.reduce_sum(tf.square(linear_model - y)) # sum of the squares 
# optimizer 
optimizer = tf.train.GradientDescentOptimizer(0.01) 
train = optimizer.minimize(loss) 

# training loop 
init = tf.global_variables_initializer() 
sess = tf.Session() 
sess.run(init) # reset values to wrong 

fig, ax = plt.subplots() 

for i in range(40000): 
    y_train = sess.run(y_train_tf, {x: x_train}) # das kann weg, dafuer ist dann in der naechsten zeile nur xtrain input, kein ytrain 
    f_predict, _ = sess.run([linear_model, train], feed_dict={x: x_train, y: y_train}) 
    curr_layer1, curr_layer2, curr_W1, curr_b1, curr_W2, curr_b2, curr_W3, curr_b3, curr_loss = sess.run([layer1, layer2, W1, b1, W2, b2, W3, b3, loss], 
                       {x: x_train, y: y_train}) 
    if i % 1000 == 999: 
     print "step ", i 
     print("W1: %s b1: %s" % (curr_W1, curr_b1)) 
     print("W2: %s b2: %s" % (curr_W2, curr_b2)) 
     print("W3: %s b3: %s" % (curr_W3, curr_b3)) 
     print("layer1: %s layer2: %s" % (curr_layer1, curr_layer2)) 
     print("linear_model: %s loss: %s" % (f_predict, curr_loss)) 
     print " " 
     y_plot = y_train.reshape(1, -1)[0] 
     pred_plot = f_predict.reshape(1, -1)[0] 
     plt.hold(False) 
     ax.plot(x_plot, y_train[:]) 
     plt.hold(True) 
     ax.plot(x_plot, f_predict, 'o-') 
     ax.set(xlabel='X Value', ylabel='Y/Predicted Value', 
       title=[str(i)," Loss: ",curr_loss]) 
     plt.pause(0.001) 

fig.savefig("fig1.png") 
plt.show() 
在其不工作

不過。 我不明白差異在哪裏。 Keras代碼的學習率默認爲0.01。 優化器是一樣的。網絡是一樣的。我不知道我的錯誤在哪裏。

回答

1

這是答案!我忘了找到合適的重量開始! tf.random_normal([1,10],stddev = 0.03)

import tensorflow as tf 
import numpy as np 
import matplotlib.pyplot as plt 
import math 

# Model input and output 
x = tf.placeholder(tf.float32, [None, 1]) 

# training data 
x_plot = np.arange(0, math.pi*2*2, 0.1) 
x_train = x_plot.reshape(-1, 1) 
y_train_tf = tf.sin(x) 

# Model parameters 
W1 = tf.Variable(tf.random_normal([1,10], stddev=0.03), dtype=tf.float32, name='W1') 
b1 = tf.Variable(tf.random_normal([10], stddev=0.03), dtype=tf.float32, name='b1') 
W2 = tf.Variable(tf.random_normal([10,3], stddev=0.03), dtype=tf.float32, name='W2') 
b2 = tf.Variable(tf.random_normal([3], stddev=0.03), dtype=tf.float32, name='b2') 
W3 = tf.Variable(tf.random_normal([3,1], stddev=0.03), dtype=tf.float32, name='W3') 
b3 = tf.Variable(tf.random_normal([1], stddev=0.03), dtype=tf.float32, name='b3') 

layer1 = tf.tanh(tf.multiply(x,W1) + b1) 
layer2 = tf.tanh(tf.matmul(layer1, W2) + b2) 
linear_model = tf.reduce_sum(tf.matmul(layer2, W3) + b3, 1, keep_dims=True) 

# loss 
#loss = tf.reduce_sum(tf.square(linear_model - y_train_tf)) # sum of the squares 
loss = tf.losses.mean_squared_error(y_train_tf,linear_model) 

tf.summary.scalar('loss', loss) 
# optimizer 
optimizer = tf.train.GradientDescentOptimizer(0.01) 
train = optimizer.minimize(loss) 

# training loop 
init = tf.global_variables_initializer() 
sess = tf.Session() 
# Merge all the summaries 
merged = tf.summary.merge_all() 
train_writer = tf.summary.FileWriter('train_tensorboard',sess.graph) 

sess.run(init) # reset values to wrong 

fig, ax = plt.subplots() 

for i in range(40000): 
    summary, f_predict, _ = sess.run([merged, linear_model, train], feed_dict={x: x_train}) 
    y_train, curr_layer1, curr_layer2, curr_W1, curr_b1, curr_W2, curr_b2, curr_W3, curr_b3, curr_loss = sess.run([y_train_tf,layer1, layer2, W1, b1, W2, b2, W3, b3, loss], 
                       {x: x_train}) 
    train_writer.add_summary(summary, i) 
    if i % 1000 == 999: 
     print "step ", i 
     print("W1: %s b1: %s" % (curr_W1, curr_b1)) 
     print("W2: %s b2: %s" % (curr_W2, curr_b2)) 
     print("W3: %s b3: %s" % (curr_W3, curr_b3)) 
     print("layer1: %s layer2: %s" % (curr_layer1, curr_layer2)) 
     print("linear_model: %s loss: %s" % (f_predict, curr_loss)) 
     print " " 
     y_plot = y_train.reshape(1, -1)[0] 
     pred_plot = f_predict.reshape(1, -1)[0] 
     plt.hold(False) 
     ax.plot(x_plot, y_train[:]) 
     plt.hold(True) 
     ax.plot(x_plot, f_predict, 'g--') 
     ax.set(xlabel='X Value', ylabel='Y/Predicted Value', title=[str(i)," Loss: ", curr_loss]) 
     plt.pause(0.001) 

fig.savefig("fig1.png") 
plt.show()