3
這裏是批梯度下降算法的R(theoretical details here)的實現:爲什麼我的邏輯迴歸實現如此緩慢?
logreg = function(y, x) {
x = as.matrix(x)
x = apply(x, 2, scale)
x = cbind(1, x)
m = nrow(x)
n = ncol(x)
alpha = 2/m
# b = matrix(rnorm(n))
# b = matrix(summary(lm(y~x))$coef[, 1])
b = matrix(rep(0, n))
h = 1/(1 + exp(-x %*% b))
J = -(t(y) %*% log(h) + t(1-y) %*% log(1 -h))
derivJ = t(x) %*% (h-y)
niter = 0
while(1) {
niter = niter + 1
newb = b - alpha * derivJ
h = 1/(1 + exp(-x %*% newb))
newJ = -(t(y) %*% log(h) + t(0-y) %*% log(1 -h))
while((newJ - J) >= 0) {
print("inner while...")
# step adjust
alpha = alpha/1.15
newb = b - alpha * derivJ
h = 1/(1 + exp(-x %*% newb))
newJ = -(t(y) %*% log(h) + t(1-y) %*% log(1 -h))
}
if(max(abs(b - newb)) < 0.001) {
break
}
b = newb
J = newJ
derivJ = t(x) %*% (h-y)
}
b
v = exp(-x %*% b)
h = 1/(1 + v)
w = h^2 * v
# # hessian matrix of cost function
hess = t(x) %*% diag(as.vector(w)) %*% x
seMat = sqrt(diag(solve(hess)))
zscore = b/seMat
cbind(b, zscore)
}
nr = 5000
nc = 3
# set.seed(17)
x = matrix(rnorm(nr*nc, 0, 999), nr)
x = apply(x, 2, scale)
# y = matrix(sample(0:1, nr, repl=T), nr)
h = 1/(1 + exp(-x %*% rnorm(nc)))
y = round(h)
y[1:round(nr/2)] = sample(0:1, round(nr/2), repl=T)
testglm = function() {
for(i in 1:20) {
res = summary(glm(y~x, family=binomial))$coef
}
print(res)
}
testlogreg = function() {
for(i in 1:20) {
res = logreg(y, x)
}
print(res)
}
print(system.time(testlogreg()))
print(system.time(testglm()))
算法給了我正確的結果,但它是慢十倍。
print(system.time(testlogreg()))
[,1] [,2]
[1,] -0.0358877 -1.16332
[2,] 0.1904964 6.09873
[3,] -0.1428953 -4.62629
[4,] -0.9151143 -25.33478
user system elapsed
4.013 1.037 5.062
#////////////////////////////////////////////////////
print(system.time(testglm()))
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.0360447 0.0308617 -1.16794 2.42829e-01
x1 0.1912254 0.0312500 6.11922 9.40373e-10
x2 -0.1432585 0.0309001 -4.63618 3.54907e-06
x3 -0.9178177 0.0361598 -25.38226 3.95964e-142
user system elapsed
0.482 0.040 0.522
但是,如果我沒有計算標準誤差和Z值,那麼它比GLM快一點:
#////////////////////////////////////////////////////
print(system.time(testlogreg()))
[,1]
[1,] -0.0396199
[2,] 0.2281502
[3,] -0.3941912
[4,] 0.8456839
user system elapsed
0.404 0.001 0.405
#////////////////////////////////////////////////////
print(system.time(testglm()))
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.0397529 0.0309169 -1.28580 1.98514e-01
x1 0.2289063 0.0312998 7.31336 2.60551e-13
x2 -0.3956140 0.0319847 -12.36884 3.85328e-35
x3 0.8483669 0.0353760 23.98144 4.34358e-127
user system elapsed
0.474 0.000 0.475
因此很明顯,SE和z值的計算需要大量的時間,但glm如何做呢?我如何改進我的實施?