我想編寫一個類似於sklearn.linear_model.LinearRegression
的LinearRegression模型。首先,我訓練使用sklearn.linear_model.LinearRegression
標準線性迴歸模型:爲什麼我的線性迴歸的批梯度下降不收斂?
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
# training data
train_x = np.array([1,2,3,4,5,6], dtype=np.float64).reshape(6,1)
train_y = np.array([1,2,3,3.25,3.5,3.8], dtype=np.float64)
# test data
predict_x = np.arange(0, 7, 0.1)
predict_x = predict_x.reshape(predict_x.size, 1)
# Simple regression
model1 = linear_model.LinearRegression()
model1.fit(train_x, train_y);
print model1.coef_, model1.intercept_
# Quadratic regression
model2 = linear_model.LinearRegression()
model2.fit(np.concatenate((train_x, train_x**2), axis=1), train_y);
print model2.coef_, model2.intercept_
# Five-order polynomial regression
model5 = linear_model.LinearRegression()
model5.fit(np.concatenate((train_x, train_x**2, train_x**3, train_x**4, train_x**5), axis=1), train_y);
print model5.coef_, model5.intercept_
# Predict
predict_y1 = model1.predict(predict_x)
predict_y2 = model2.predict(np.concatenate((predict_x, predict_x**2), axis=1))
predict_y5 = model5.predict(np.concatenate((predict_x, predict_x**2, predict_x**3, predict_x**4, predict_x**5), axis=1))
# plot
plt.figure(figsize = (10,10))
plt.scatter(train_x, train_y, color='black')
plt.plot(predict_x, predict_y1, color='blue', label='underfitting')
plt.plot(predict_x, predict_y2, color='green', label='fair')
plt.plot(predict_x, predict_y5, color='red', label='overfitting')
plt.axis([0,7,0,5])
plt.legend(loc=2)
plt.show()
然後,我取得良好的效果:
[0.53571429] 0.883333333333
[1.34821429 -0.11607143] -0.2
[-8.52333333 7.0625 -2.30833333 0.3375 -0.01833333] 4.45
之後,我執行我的模型MyLinearRegression
。首先,我選擇批量梯度下降和一個固定的迭代數來測試我的代碼是否正確。
# center data
def center_matrix(X):
assert(isinstance(X, np.ndarray))
X_offset = np.average(X, axis=0)
return X - X_offset, X_offset
class MyLinearRegression(object):
def __init__(self):
self.coef_ = None
self.intercept_ = None
self.learning_rate = None
def fit(self, X, y):
n_samples, n_features = X.shape
n_samples_, = y.shape
assert(n_samples == n_samples_)
X, X_offset = center_matrix(X)
y, y_offset = center_matrix(y)
self.coef_ = np.ones((n_features,), dtype=np.float64)
self.learning_rate = -0.0001
error = None
# using fixed iteration number
for epoch in np.arange(500000):
y_hat = X.dot(self.coef_)
error_ = y_hat-y
if error is not None and sum(error_**2) > sum(error**2): # if square error is increasing, then half learning_rate.
self.learning_rate /= 2.
continue
error = error_
coef = self.coef_ + self.learning_rate * (X.T.dot(error))
if np.isfinite(coef).all(): # if overflow happen, half learning_rate.
self.coef_ = coef
else:
self.learning_rate /= 2.
self.intercept_ = y_offset - self.coef_.dot(X_offset.T)
return self
def predict(self, X):
n_samples, n_features = X.shape
assert(n_features == self.coef_.size)
return X.dot(self.coef_) + self.intercept_
# Simple regression
my_model1 = MyLinearRegression()
my_model1.fit(train_x, train_y)
print my_model1.coef_, my_model1.intercept_
# Quadratic regression
my_model2 = MyLinearRegression()
my_model2.fit(np.concatenate((train_x, train_x**2), axis=1), train_y);
print my_model2.coef_, my_model2.intercept_
# Five-order polynomial regression
my_model5 = MyLinearRegression()
my_model5.fit(np.concatenate((train_x, train_x**2, train_x**3, train_x**4, train_x**5), axis=1), train_y);
print my_model5.coef_, my_model5.intercept_
# predict
my_predict_y1 = my_model1.predict(predict_x)
my_predict_y2 = my_model2.predict(np.concatenate((predict_x, predict_x**2), axis=1))
my_predict_y5 = my_model5.predict(np.concatenate((predict_x, predict_x**2, predict_x**3, predict_x**4, predict_x**5), axis=1))
# plot
plt.figure(figsize = (10,10))
plt.scatter(train_x, train_y, color='black')
plt.plot(predict_x, my_predict_y1, color='blue', label='underfitting')
plt.plot(predict_x, my_predict_y2, color='green', label='fair')
plt.plot(predict_x, my_predict_y5, color='red', label='overfitting')
plt.axis([0,7,0,5])
plt.legend(loc=2)
plt.show()
然後,我得到壞的結果:
[0.53571433] 0.883333191266
[1.34821275 -0.11607122] -0.199997815791
[-1.95681250e + 00 -2.20847875e + 01 -1.48602362e + 02 -9.20144807e + 02 -5.56577136e + 03] 11678151.1386
我可以對MyLinearRegression
my_model1
和my_model2
的好成績,而關閉這些上sklearn.linear_model.LinearRegression
。但是,無論我如何調整learning_rate
和迭代號碼,my_model5
都不會收斂。誰能幫忙?
你爲什麼要在每一步更新學習率是多少?它應該是固定的。 –
如果學習速度太快,GD可能會保持揮杆而不收斂。如果它太小,可能會收斂兩次。所以,我開始學習一個很大的價值,並且隨着錯誤的縮小而降低。 – expoter