我對所有圖層和輸出使用sigmoid,並得到0.00012的最終錯誤率,但是當我使用Relu時,理論上更好,我得到可能的最差結果。任何人都可以解釋爲什麼會發生?我使用的網站100可用一個非常簡單的2層實現的代碼,但仍然很下面給它,Relu表現比sigmoid更差?
import numpy as np
#test
#avg(nonlin(np.dot(nonlin(np.dot([0,0,1],syn0)),syn1)))
#returns list >> [predicted_output, confidence]
def nonlin(x,deriv=False):#Sigmoid
if(deriv==True):
return x*(1-x)
return 1/(1+np.exp(-x))
def relu(x, deriv=False):#RELU
if (deriv == True):
for i in range(0, len(x)):
for k in range(len(x[i])):
if x[i][k] > 0:
x[i][k] = 1
else:
x[i][k] = 0
return x
for i in range(0, len(x)):
for k in range(0, len(x[i])):
if x[i][k] > 0:
pass # do nothing since it would be effectively replacing x with x
else:
x[i][k] = 0
return x
X = np.array([[0,0,1],
[0,0,0],
[0,1,1],
[1,0,1],
[1,0,0],
[0,1,0]])
y = np.array([[0],[1],[0],[0],[1],[1]])
np.random.seed(1)
# randomly initialize our weights with mean 0
syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1
def avg(i):
if i > 0.5:
confidence = i
return [1,float(confidence)]
else:
confidence=1.0-float(i)
return [0,confidence]
for j in xrange(500000):
# Feed forward through layers 0, 1, and 2
l0 = X
l1 = nonlin(np.dot(l0,syn0Performing))
l2 = nonlin(np.dot(l1,syn1))
#print 'this is',l2,'\n'
# how much did we miss the target value?
l2_error = y - l2
#print l2_error,'\n'
if (j% 100000) == 0:
print "Error:" + str(np.mean(np.abs(l2_error)))
print syn1
# in what direction is the target value?
# were we really sure? if so, don't change too much.
l2_delta = l2_error*nonlin(l2,deriv=True)
# how much did each l1 value contribute to the l2 error (according to the weights)?
l1_error = l2_delta.dot(syn1.T)
# in what direction is the target l1?
# were we really sure? if so, don't change too much.
l1_delta = l1_error * nonlin(l1,deriv=True)
syn1 += l1.T.dot(l2_delta)
syn0 += l0.T.dot(l1_delta)
print "Final Error:" + str(np.mean(np.abs(l2_error)))
def p(l):
return avg(nonlin(np.dot(nonlin(np.dot(l,syn0)),syn1)))
因此P(x)是教育訓練,其中x是一個1×3矩陣後的預測中功能輸入值。
可能的結果是什麼? –
如果您想要更詳細的回覆,請使用ReLU發佈代碼。 –