2012-12-10 175 views
5

我想使用SciPy fmin_bfgs函數對Python中的邏輯迴歸進行編碼,但我遇到了一些問題。我爲logistic(sigmoid)轉換函數和代價函數編寫函數,並且這些函數很好地工作(我已經使用了通過canned軟件發現的參數向量的優化值來測試函數,以及那些匹配)。我不太確定我是如何實現漸變函數的,但它看起來很合理。使用SciPy的邏輯迴歸

下面是代碼:

# purpose: logistic regression 
import numpy as np 
import scipy.optimize 

# prepare the data 
data = np.loadtxt('data.csv', delimiter=',', skiprows=1) 
vY = data[:, 0] 
mX = data[:, 1:] 
intercept = np.ones(mX.shape[0]).reshape(mX.shape[0], 1) 
mX = np.concatenate((intercept, mX), axis = 1) 
iK = mX.shape[1] 
iN = mX.shape[0] 

# logistic transformation 
def logit(mX, vBeta): 
    return((1/(1.0 + np.exp(-np.dot(mX, vBeta))))) 

# test function call 
vBeta0 = np.array([-.10296645, -.0332327, -.01209484, .44626211, .92554137, .53973828, 
    1.7993371, .7148045 ]) 
logit(mX, vBeta0) 

# cost function 
def logLikelihoodLogit(vBeta, mX, vY): 
    return(-(np.sum(vY*np.log(logit(mX, vBeta)) + (1-vY)*(np.log(1-logit(mX, vBeta)))))) 
logLikelihoodLogit(vBeta0, mX, vY) # test function call 

# gradient function 
def likelihoodScore(vBeta, mX, vY): 
    return(np.dot(mX.T, 
        ((np.dot(mX, vBeta) - vY)/ 
        np.dot(mX, vBeta)).reshape(iN, 1)).reshape(iK, 1)) 

likelihoodScore(vBeta0, mX, vY).shape # test function call 

# optimize the function (without gradient) 
optimLogit = scipy.optimize.fmin_bfgs(logLikelihoodLogit, 
            x0 = np.array([-.1, -.03, -.01, .44, .92, .53, 
              1.8, .71]), 
            args = (mX, vY), gtol = 1e-3) 

# optimize the function (with gradient) 
optimLogit = scipy.optimize.fmin_bfgs(logLikelihoodLogit, 
            x0 = np.array([-.1, -.03, -.01, .44, .92, .53, 
              1.8, .71]), fprime = likelihoodScore, 
            args = (mX, vY), gtol = 1e-3) 
  • 第一優化(無梯度)與一大堆東西約除以零結束。

  • 第二個優化(帶漸變)以矩陣未對齊錯誤結束,這可能意味着我已經得到了梯度返回錯誤的方式。

任何與此有關的幫助表示讚賞。如果有人想嘗試這個,數據包含在下面。

low,age,lwt,race,smoke,ptl,ht,ui 
0,19,182,2,0,0,0,1 
0,33,155,3,0,0,0,0 
0,20,105,1,1,0,0,0 
0,21,108,1,1,0,0,1 
0,18,107,1,1,0,0,1 
0,21,124,3,0,0,0,0 
0,22,118,1,0,0,0,0 
0,17,103,3,0,0,0,0 
0,29,123,1,1,0,0,0 
0,26,113,1,1,0,0,0 
0,19,95,3,0,0,0,0 
0,19,150,3,0,0,0,0 
0,22,95,3,0,0,1,0 
0,30,107,3,0,1,0,1 
0,18,100,1,1,0,0,0 
0,18,100,1,1,0,0,0 
0,15,98,2,0,0,0,0 
0,25,118,1,1,0,0,0 
0,20,120,3,0,0,0,1 
0,28,120,1,1,0,0,0 
0,32,121,3,0,0,0,0 
0,31,100,1,0,0,0,1 
0,36,202,1,0,0,0,0 
0,28,120,3,0,0,0,0 
0,25,120,3,0,0,0,1 
0,28,167,1,0,0,0,0 
0,17,122,1,1,0,0,0 
0,29,150,1,0,0,0,0 
0,26,168,2,1,0,0,0 
0,17,113,2,0,0,0,0 
0,17,113,2,0,0,0,0 
0,24,90,1,1,1,0,0 
0,35,121,2,1,1,0,0 
0,25,155,1,0,0,0,0 
0,25,125,2,0,0,0,0 
0,29,140,1,1,0,0,0 
0,19,138,1,1,0,0,0 
0,27,124,1,1,0,0,0 
0,31,215,1,1,0,0,0 
0,33,109,1,1,0,0,0 
0,21,185,2,1,0,0,0 
0,19,189,1,0,0,0,0 
0,23,130,2,0,0,0,0 
0,21,160,1,0,0,0,0 
0,18,90,1,1,0,0,1 
0,18,90,1,1,0,0,1 
0,32,132,1,0,0,0,0 
0,19,132,3,0,0,0,0 
0,24,115,1,0,0,0,0 
0,22,85,3,1,0,0,0 
0,22,120,1,0,0,1,0 
0,23,128,3,0,0,0,0 
0,22,130,1,1,0,0,0 
0,30,95,1,1,0,0,0 
0,19,115,3,0,0,0,0 
0,16,110,3,0,0,0,0 
0,21,110,3,1,0,0,1 
0,30,153,3,0,0,0,0 
0,20,103,3,0,0,0,0 
0,17,119,3,0,0,0,0 
0,17,119,3,0,0,0,0 
0,23,119,3,0,0,0,0 
0,24,110,3,0,0,0,0 
0,28,140,1,0,0,0,0 
0,26,133,3,1,2,0,0 
0,20,169,3,0,1,0,1 
0,24,115,3,0,0,0,0 
0,28,250,3,1,0,0,0 
0,20,141,1,0,2,0,1 
0,22,158,2,0,1,0,0 
0,22,112,1,1,2,0,0 
0,31,150,3,1,0,0,0 
0,23,115,3,1,0,0,0 
0,16,112,2,0,0,0,0 
0,16,135,1,1,0,0,0 
0,18,229,2,0,0,0,0 
0,25,140,1,0,0,0,0 
0,32,134,1,1,1,0,0 
0,20,121,2,1,0,0,0 
0,23,190,1,0,0,0,0 
0,22,131,1,0,0,0,0 
0,32,170,1,0,0,0,0 
0,30,110,3,0,0,0,0 
0,20,127,3,0,0,0,0 
0,23,123,3,0,0,0,0 
0,17,120,3,1,0,0,0 
0,19,105,3,0,0,0,0 
0,23,130,1,0,0,0,0 
0,36,175,1,0,0,0,0 
0,22,125,1,0,0,0,0 
0,24,133,1,0,0,0,0 
0,21,134,3,0,0,0,0 
0,19,235,1,1,0,1,0 
0,25,95,1,1,3,0,1 
0,16,135,1,1,0,0,0 
0,29,135,1,0,0,0,0 
0,29,154,1,0,0,0,0 
0,19,147,1,1,0,0,0 
0,19,147,1,1,0,0,0 
0,30,137,1,0,0,0,0 
0,24,110,1,0,0,0,0 
0,19,184,1,1,0,1,0 
0,24,110,3,0,1,0,0 
0,23,110,1,0,0,0,0 
0,20,120,3,0,0,0,0 
0,25,241,2,0,0,1,0 
0,30,112,1,0,0,0,0 
0,22,169,1,0,0,0,0 
0,18,120,1,1,0,0,0 
0,16,170,2,0,0,0,0 
0,32,186,1,0,0,0,0 
0,18,120,3,0,0,0,0 
0,29,130,1,1,0,0,0 
0,33,117,1,0,0,0,1 
0,20,170,1,1,0,0,0 
0,28,134,3,0,0,0,0 
0,14,135,1,0,0,0,0 
0,28,130,3,0,0,0,0 
0,25,120,1,0,0,0,0 
0,16,95,3,0,0,0,0 
0,20,158,1,0,0,0,0 
0,26,160,3,0,0,0,0 
0,21,115,1,0,0,0,0 
0,22,129,1,0,0,0,0 
0,25,130,1,0,0,0,0 
0,31,120,1,0,0,0,0 
0,35,170,1,0,1,0,0 
0,19,120,1,1,0,0,0 
0,24,116,1,0,0,0,0 
0,45,123,1,0,0,0,0 
1,28,120,3,1,1,0,1 
1,29,130,1,0,0,0,1 
1,34,187,2,1,0,1,0 
1,25,105,3,0,1,1,0 
1,25,85,3,0,0,0,1 
1,27,150,3,0,0,0,0 
1,23,97,3,0,0,0,1 
1,24,128,2,0,1,0,0 
1,24,132,3,0,0,1,0 
1,21,165,1,1,0,1,0 
1,32,105,1,1,0,0,0 
1,19,91,1,1,2,0,1 
1,25,115,3,0,0,0,0 
1,16,130,3,0,0,0,0 
1,25,92,1,1,0,0,0 
1,20,150,1,1,0,0,0 
1,21,200,2,0,0,0,1 
1,24,155,1,1,1,0,0 
1,21,103,3,0,0,0,0 
1,20,125,3,0,0,0,1 
1,25,89,3,0,2,0,0 
1,19,102,1,0,0,0,0 
1,19,112,1,1,0,0,1 
1,26,117,1,1,1,0,0 
1,24,138,1,0,0,0,0 
1,17,130,3,1,1,0,1 
1,20,120,2,1,0,0,0 
1,22,130,1,1,1,0,1 
1,27,130,2,0,0,0,1 
1,20,80,3,1,0,0,1 
1,17,110,1,1,0,0,0 
1,25,105,3,0,1,0,0 
1,20,109,3,0,0,0,0 
1,18,148,3,0,0,0,0 
1,18,110,2,1,1,0,0 
1,20,121,1,1,1,0,1 
1,21,100,3,0,1,0,0 
1,26,96,3,0,0,0,0 
1,31,102,1,1,1,0,0 
1,15,110,1,0,0,0,0 
1,23,187,2,1,0,0,0 
1,20,122,2,1,0,0,0 
1,24,105,2,1,0,0,0 
1,15,115,3,0,0,0,1 
1,23,120,3,0,0,0,0 
1,30,142,1,1,1,0,0 
1,22,130,1,1,0,0,0 
1,17,120,1,1,0,0,0 
1,23,110,1,1,1,0,0 
1,17,120,2,0,0,0,0 
1,26,154,3,0,1,1,0 
1,20,106,3,0,0,0,0 
1,26,190,1,1,0,0,0 
1,14,101,3,1,1,0,0 
1,28,95,1,1,0,0,0 
1,14,100,3,0,0,0,0 
1,23,94,3,1,0,0,0 
1,17,142,2,0,0,1,0 
1,21,130,1,1,0,1,0 

回答

2

這裏是the answer I sent back to the SciPy list這個問題被交叉發佈。感謝@tiago的回答。基本上,我重新研究了可能性函數。此外,還添加了對check_grad函數的調用。

#===================================================== 
# purpose: logistic regression 
import numpy as np 
import scipy as sp 
import scipy.optimize 

import matplotlib as mpl 
import os 

# prepare the data 
data = np.loadtxt('data.csv', delimiter=',', skiprows=1) 
vY = data[:, 0] 
mX = data[:, 1:] 
# mX = (mX - np.mean(mX))/np.std(mX) # standardize the data; if required 

intercept = np.ones(mX.shape[0]).reshape(mX.shape[0], 1) 
mX = np.concatenate((intercept, mX), axis = 1) 
iK = mX.shape[1] 
iN = mX.shape[0] 

# logistic transformation 
def logit(mX, vBeta): 
    return((np.exp(np.dot(mX, vBeta))/(1.0 + np.exp(np.dot(mX, vBeta))))) 

# test function call 
vBeta0 = np.array([-.10296645, -.0332327, -.01209484, .44626211, .92554137, .53973828, 
    1.7993371, .7148045 ]) 
logit(mX, vBeta0) 

# cost function 
def logLikelihoodLogit(vBeta, mX, vY): 
    return(-(np.sum(vY*np.log(logit(mX, vBeta)) + (1-vY)*(np.log(1-logit(mX, vBeta)))))) 
logLikelihoodLogit(vBeta0, mX, vY) # test function call 

# different parametrization of the cost function 
def logLikelihoodLogitVerbose(vBeta, mX, vY): 
    return(-(np.sum(vY*(np.dot(mX, vBeta) - np.log((1.0 + np.exp(np.dot(mX, vBeta))))) + 
        (1-vY)*(-np.log((1.0 + np.exp(np.dot(mX, vBeta)))))))) 
logLikelihoodLogitVerbose(vBeta0, mX, vY) # test function call 

# gradient function 
def likelihoodScore(vBeta, mX, vY): 
    return(np.dot(mX.T, 
        (logit(mX, vBeta) - vY))) 
likelihoodScore(vBeta0, mX, vY).shape # test function call 
sp.optimize.check_grad(logLikelihoodLogitVerbose, likelihoodScore, 
         vBeta0, mX, vY) # check that the analytical gradient is close to 
               # numerical gradient 

# optimize the function (without gradient) 
optimLogit = scipy.optimize.fmin_bfgs(logLikelihoodLogitVerbose, 
            x0 = np.array([-.1, -.03, -.01, .44, .92, .53, 
              1.8, .71]), 
            args = (mX, vY), gtol = 1e-3) 

# optimize the function (with gradient) 
optimLogit = scipy.optimize.fmin_bfgs(logLikelihoodLogitVerbose, 
            x0 = np.array([-.1, -.03, -.01, .44, .92, .53, 
              1.8, .71]), fprime = likelihoodScore, 
            args = (mX, vY), gtol = 1e-3) 
#===================================================== 
4

你的問題是,你正在試圖儘量減少,logLikelihoodLogit,該函數將返回NaN與值非常接近,你的初步估算。它也會嘗試評估負對數並遇到​​其他問題。 fmin_bfgs不知道這一點,會嘗試評估這些值的功能並遇到麻煩。

我建議使用有界優化來代替。你可以使用scipy的optimize.fmin_l_bfgs_b這個。它使用與fmin_bfgs類似的算法,但它支持參數空間中的邊界。你同樣地調用它,只需添加一個bounds關鍵字。這裏有一個關於你怎麼稱呼fmin_l_bfgs_b一個簡單的例子:

from scipy.optimize import fmin_bfgs, fmin_l_bfgs_b 

# list of bounds: each item is a tuple with the (lower, upper) bounds 
bd = [(0, 1.), ...] 

test = fmin_l_bfgs_b(logLikelihoodLogit, x0=x0, args=(mX, vY), bounds=bd, 
         approx_grad=True) 

在這裏,我使用近似梯度(似乎正常工作與您的數據),但你可以在你的榜樣傳遞fprime爲(我不沒有時間去檢查它的正確性)。您將比我更瞭解您的參數空間,只要確保爲您的參數可以採用的所有有意義的值構建邊界數組。

+0

蒂亞戈,感謝您的回答。我重新設計了這種可能性,以避免你指出的那種數值上的困難,並且它現在可以工作(我將在稍後作爲答案發布它)。如果我不得不提供簡單的logit問題的界限,我不會很高興。 :) – tchakravarty

0

我正面臨同樣的問題。當我在scipy.optimize.minimize中使用不同的算法實現時,我發現爲了找到我的數據集的最佳邏輯迴歸參數,Newton Conjugate Gradient被證明是有幫助的。可以打電話給它:

Result = scipy.optimize.minimize(fun = logLikelihoodLogit, 
           x0 = np.array([-.1, -.03, -.01, .44, .92, .53,1.8, .71]), 
           args = (mX, vY), 
           method = 'TNC', 
           jac = likelihoodScore); 
optimLogit = Result.x;