2017-07-03 118 views
0

我想做一個廣義最小二乘擬合,找到通過一些(x,y)數據點的最佳擬合線。我可以通過scipy做到這一點,但我在應用重量時遇到了問題。我想從原始擬合的殘差中獲得權重,並嘗試使用權重通過最小二乘法進行重新安裝。權重應該是殘差的倒數,但由於-1 < residuals < 1這只是一個例子,我可以將殘差用作權重。 (x,y)數據點在其他地方計算,目標是找到最佳擬合線y = x/alpha +截距的alpha(1 /斜率)和截距。我的代碼如下:如何在這個scipy最小平方優化程序中應用權重?

import numpy as np 
from scipy.optimize import least_squares 

ydata = [9.7372923, 10.0587245, 10.3838510, 10.6931371, 10.9616260, 11.1833220, 11.3806770, 11.5248917, 11.7353000] 
xdata = np.array([j+5 for j in range(len(ydata))]) 

def get_weights(resid): 
    """ 
    This function calculates the weights per (x,y) by using the inverse of the squared residuals divided by the total sum of the inverse of the squared residuals. 
    This might be incorrect but should work for the sake of example. 
    """ 
    total = sum([abs(resid[i]) for i in range(len(resid))]) 
    fract = np.array([resid[i]/total for i in range(len(resid))]) 
    return fract 

def get_least_squares_fit(xs, ys): 
    """ 
    This function finds alpha (1/slope) and the y-intercept in the equation 
    of a line y = x/alpha + intercept = mx + b 
    """ 
    ## SPECIFY INITIAL GUESS OF OPTIMIZED PARAMETERS 
    params_guess = [1/3, 9] ## ps = [alpha, intercept] 
    ## DEFINE FITTING FUNCTIONS 
    fit_func = lambda ps,x : x/ps[0] + ps[1] 
    err_func = lambda ps,x,y : fit_func(ps,x) - y 
    ## GET OPTIMIZED PARAMETERS, RESIDUALS & WEIGHTS 
    res = least_squares(err_func, params_guess, args=(xs, ys)) 
    alpha, intercept, residuals = res.x[0], res.x[1], res.fun 
    weights = get_weights(residuals) 
    return alpha, intercept, residuals, weights 

alpha, intercept, residuals, weights = get_least_squares_fit(xdata, ydata) 

print(alpha, intercept) 
>> 4.03378447722 8.6198247828 

print(residuals) 
>> [ 0.12206326 0.04853721 -0.02868313 -0.09006308 -0.11064582 -0.08443567 
-0.03388451 0.06980694 0.1073048 ] 

編輯:我得到使用scipy curve_fit,其中有sigmaabsolute_sigma相同的結果。我猜這是繼續下去的最好方法。我仍然試圖弄清楚。

+0

難道是足以達到你的整體目標上的原始數據進行相對迴歸,擬合平方相對誤差最低的總和,而不是安裝到最低平方絕對誤差的總和? –

+0

這個例子就足夠了。我更關心如何應用權重而不是權重本身;絕對廣場只是我的一個猜測。 – mikey

回答

0

我相信這是正確的。這個想法是每個殘差應該乘以相應的權重。最小化的功能是這些產品的總和。我沒有使用外部模塊來進行最小二乘擬合,而是使用了良好的ol'scipy.optimize.minimize,它爲未加權的最小二乘擬合(get_gls_fit(..., weights=None, ...))和外部模塊的結果提供了相同的結果。

import numpy as np 
from scipy import optimize 
## same xdata and ydata as stated in question 

def guess_initial_parameters(xs, ys): 
    """ 
    xs   : type<list> or type<array> 
    ys   : type<list> or type<array> 
    """ 
    ## GUESS SLOPE 
    slope = (ys[-1]-ys[0])/(xs[-1]-xs[0]) 
    alpha = 1/slope 
    ## GUESS INTERCEPT 
    intercept = np.mean([ys[-1] - xs[-1]/alpha, ys[0] - xs[0]/alpha]) 
    return [alpha, intercept] 

def update_weights(residuals, power=1): 
    """ 
    residuals : type<list> or type<array> 
    power  : type<float> 
    """ 
    ## INVERT RESIDUALS 
    invs = [1/residuals[idr] for idr in range(len(residuals))] 
    ## NORMALIZE RESIDUALS 
    invs = [abs(inv)**power for inv in invs] 
    total = sum(invs) 
    return [invs[idv]/total for idv in range(len(invs))] 

def fit_func(ps, xs): 
    """ 
    ps   : [alpha, intercept] 
    xs   : type<list> or type<array> 
    """ 
    ## FIT TO EQUATION OF LINE 
    return [xs[idx]/ps[0] + ps[1] for idx in range(len(xs))] ## alpha = 1/slope 

def get_residuals(ps, xs, ys): 
    """ 
    ps   : [alpha, intercept] 
    xs   : type<list> or type<array> 
    ys   : type<list> or type<array> 
    """ 
    ## GET LINEAR FIT 
    ys_trial = fit_func(ps, xs) 
    ## GET RESIDUALS 
    residuals = [(ys[idx] - ys_trial[idx])**2 for idx in range(len(ys))] 
    return residuals 

def err_func(ps, xs, ys, wts): 
    """ 
    ps   : [alpha, intercept] 
    xs   : type<list> or type<array> 
    ys   : type<list> or type<array> 
    wts   : type<list> or type<array> 
    """ 
    ## GET RESIDUALS 
    residuals = get_residuals(ps, xs, ys) 
    ## SUM WEIGHTED RESIDUALS 
    return sum([wts[idr] * residuals[idr] for idr in range(len(residuals))]) 

def get_gls_fit(xs, ys, ps_init, weights=None, power=2, routine='Nelder-Mead'): 
    """ 
    xs   : type<list> or type<array> 
    ys   : type<list> or type<array> 
    ps_init  : [alpha, intercept] 
    weights  : None or type<list> or type<array> 
    power  : type<float> 
    routine  : 'Nelder-Mead' 
    """ 
    ## GET INITIAL PARAMETER GUESS 
    if type(ps_init) == (list or np.array): 
     pass 
    elif ps_init == 'estimate': 
     ps_init = guess_initial_parameters(xs, ys) 
    else: 
     raise ValueError("ps_init = type<list> or type<array> or 'estimate'") 
    ## GET WEIGHTS 
    if weights is None: 
     wts = np.ones(len(xs)) 
     print(">>>>>>>>>>>\nORDINARY LEAST SQUARES (OLS) FIT:") 
    else: 
     wts = weights[:] 
     print(">>>>>>>>>>>\nGENERALIZED LEAST SQUARES (GLS) FIT:") 
    ## MINIMIZE SUM OF WEIGHTED RESIDUALS 
    ans = optimize.minimize(err_func, x0=ps_init, args=(xs, ys, wts,), method=routine) 
    ## GET OPTIMIZED PARAMETERS 
    alpha, intercept = ans.x[0], ans.x[1] 
    ## GET RESIDUALS 
    residuals = np.array(get_residuals([alpha, intercept], xs, ys)) 
    ## GET UPDATED WEIGHTS FOR REFITTING 
    wts_upd = np.array(update_weights(residuals, power)) 
    ## PRINT & RETURN RESULTS 
    print("\n ALPHA = %.3f, INTERCEPT = %.3f" %(alpha, intercept)) 
    print("\n RESIDUALS: \n", residuals) 
    print("\n WEIGHTS (used): \n", wts) 
    print("\n WEIGHTS (updated): \n", wts_upd, "\n\n") 
    return [alpha, intercept], residuals, wts_upd 

輸出:

[alpha_og, intercept_og], residuals_og, wts_og = get_gls_fit(xdata, ydata, ps_init='estimate') 
[alpha_up, intercept_up], residuals_up, wts_up = get_gls_fit(xdata, ydata, ps_init=[alpha_og, intercept_og], weights=wts_og) 

>>>>>>>>>>> 
ORDINARY LEAST SQUARES (OLS) FIT: 

    ALPHA = 4.034, INTERCEPT = 8.620 

    RESIDUALS: 
[ 0.01489916 0.00235566 0.00082289 0.00811204 0.01224353 0.00713032 
    0.0011486 0.00487199 0.01151256] 

    WEIGHTS (used): 
[ 1. 1. 1. 1. 1. 1. 1. 1. 1.] 

    WEIGHTS (updated): 
[ 0.00179424 0.07177589 0.58819594 0.00605264 0.002657 0.00783406 
    0.3019051 0.01678001 0.00300511] 


>>>>>>>>>>> 
GENERALIZED LEAST SQUARES (GLS) FIT: 

    ALPHA = 3.991, INTERCEPT = 8.621 

    RESIDUALS: 
[ 1.86965273e-02 4.34039033e-03 7.51041961e-05 4.53922546e-03 
    7.27337443e-03 3.18112777e-03 1.00990091e-05 1.06473505e-02 
    2.05510268e-02] 

    WEIGHTS (used): 
[ 0.00179424 0.07177589 0.58819594 0.00605264 0.002657 0.00783406 
    0.3019051 0.01678001 0.00300511] 

    WEIGHTS (updated): 
[ 2.86578120e-07 5.31749819e-06 1.77597366e-02 4.86184846e-06 
    1.89362088e-06 9.89925930e-06 9.82216884e-01 8.83653134e-07 
    2.37190819e-07]