0

我有下面的代碼段,當我運行theano_build()方法,它引發作爲錯誤說爲什麼此方法在參數個數方面會引發錯誤?

File "rnn_theano.py", line 28, in __init__ 
self.__theano_build__() 

    File "rnn_theano.py", line 45, in __theano_build__ 
non_sequences=[U, V, W1, W12, W2], 

    File "/usr/local/lib/python2.7/dist-packages/theano/scan_module/scan.py", line 745, in scan 
condition, outputs, updates =  scan_utils.get_updates_and_outputs(fn(*args)) 

TypeError: forward_prop_step() takes exactly 8 arguments (7 given) 

下面是在Theano的代碼。它基本上是一個雙隱分層迴歸神經網絡

import numpy as np 
import theano as theano 
import theano.tensor as T 
from utils import * 
import operator 

class RNNTheano: 
    def __init__(self, word_dim, hidden_dim=100, bptt_truncate=4): 
     # Assign instance variables 
     self.word_dim = word_dim 
     self.hidden_dim = hidden_dim 
     self.bptt_truncate = bptt_truncate 
     # Randomly initialize the network parameters 
     U = np.random.uniform(-np.sqrt(1./word_dim), np.sqrt(1./word_dim), (hidden_dim, word_dim)) 
     V = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (word_dim, hidden_dim)) 
     W1 = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim)) 
     W12 = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim)) 
     W2 = np.random.uniform(-np.sqrt(1./hidden_dim), np.sqrt(1./hidden_dim), (hidden_dim, hidden_dim)) 
     # Theano: Created shared variables 
     self.U = theano.shared(name='U', value=U.astype(theano.config.floatX)) 
     self.V = theano.shared(name='V', value=V.astype(theano.config.floatX)) 
     self.W1 = theano.shared(name='W1', value=W1.astype(theano.config.floatX))  
     self.W12 = theano.shared(name='W12', value=W12.astype(theano.config.floatX))  
     self.W2 = theano.shared(name='W2', value=W2.astype(theano.config.floatX)) 
     # We store the Theano graph here 
     self.theano = {} 
     self.__theano_build__() 

    def forward_prop_step(self, x_t, s_t1_prev, s_t2_prev, U, V, W1, W12, W2): 
     s_t1 = T.tanh(U[:,x_t] + W1.dot(s_t1_prev)) 
     s_t2 = T.tanh(W12.dot(s_t1) + W2.dot(s_t2_prev)) 
     o_t = T.nnet.softmax(V.dot(s_t2)) 
     return [o_t[0], s_t1, s_t2] 

    def __theano_build__(self): 
     U, V, W1, W12, W2 = self.U, self.V, self.W1, self.W12, self.W2 
     x = T.ivector('x') 
     y = T.ivector('y') 

     [o,s1,s2], updates = theano.scan(
      self.forward_prop_step, 
      sequences=x, 
      outputs_info=[None, dict(initial=T.zeros(self.hidden_dim)), dict(initial=T.zeros(self.hidden_dim))], 
      non_sequences=[U, V, W1, W12, W2], 
      truncate_gradient=self.bptt_truncate, 
      strict=False) 

     prediction = T.argmax(o, axis=1) 
     o_error = T.sum(T.nnet.categorical_crossentropy(o, y)) 

     # Gradients 
     dU = T.grad(o_error, U) 
     dV = T.grad(o_error, V) 
     dW1 = T.grad(o_error, W1) 
     dW12 = T.grad(o_error, W12) 
     dW2 = T.grad(o_error, W2) 

     # Assign functions 
     self.forward_propagation = theano.function([x], o) 
     self.predict = theano.function([x], prediction) 
     self.ce_error = theano.function([x, y], o_error) 
     self.bptt = theano.function([x, y], [dU, dV, dW1, dW12, dW2]) 

     # SGD 
     learning_rate = T.scalar('learning_rate') 
     self.sgd_step = theano.function([x,y,learning_rate], [], 
         updates=[(self.U, self.U - learning_rate * dU), 
           (self.V, self.V - learning_rate * dV), 
           (self.W1, self.W1 - learning_rate * dW1) 
           (self.W12, self.W12 - learning_rate * dW12), 
           (self.W2, self.W2 - learning_rate * dW2)]) 

    def calculate_total_loss(self, X, Y): 
     return np.sum([self.ce_error(x,y) for x,y in zip(X,Y)]) 

    def calculate_loss(self, X, Y): 
     # Divide calculate_loss by the number of words 
     num_words = np.sum([len(y) for y in Y]) 
     return self.calculate_total_loss(X,Y)/float(num_words) 
+1

它不*出現*,因爲任何原因''forward_prop_step'需要成爲方法。它不使用任何對象的屬性,並將其所有數據作爲參數傳遞。也許它應該是'@ staticmethod' –

+0

我在forward_pop_step的聲明之前嘗試使用@staticmethod,但它仍然會拋出相同的錯誤。 –

+3

如果您將其設爲靜態方法,則需要確保刪除自身參數作爲第一個參數。 –

回答

2

嘗試改變

return [o_t[0], s_t1, s_t2] 

return o_t[0], s_t1, s_t2 

我認爲前者是造成該方法返回的東西是由Theano裹挾變成單個張量,而後者明確地返回三個物體,如outputs_info中所示。

相關問題