2016-11-10 19 views
0

我很困惑爲什麼我得到錯誤ValueError: setting an array element with a sequence.當我試圖最小化的函數顯式返回並接受一個向量。使用scipy優化2次迭代後的ValueError儘管顯式返回1d數組

這裏是我的實現,這是多項Logistic迴歸從http://czep.net/stat/mlelr.pdf

from __future__ import division 
import os 
import re 
import numpy as np 
import pandas as pd 
from collections import defaultdict, Counter, OrderedDict 
cont_data = pd.read_csv('data.csv', sep=' ') 
cont_data['age'] = [np.mean(map(int, x.split('_'))) for x in cont_data['age']] 

d = cont_data.drop('age', axis=1) 

# vector n contains the number of members in each group 
n = d.sum(axis=1) 

K = 1 # we are using 0 based indexing 
J = 2 

# matrix y is the input data with the baseline class dropped 
y = d.drop('none', axis=1) 

# design matrix 
X = np.array([[age ** k for k in xrange(K + 2)] for age in cont_data.age]) 


# calculate pi following equations 24 and 25 


def calculate_pi_i_j(i, j, b): 
    try: 
     return np.exp(np.sum([X[i][k] * b[k][j] for k in xrange(K)])) 
    except Exception, e: 
     assert False, (e, i, j, k, b) 


def calc_prob(i, j, b): 
    num = calculate_pi_i_j(i, j, b) 
    denom = 1 + np.sum([calculate_pi_i_j(i, j, b) for j in xrange(J - 1)]) 
    return num/denom 


def calculate_pi(b): 
    # calculate pi for the first 2 columns 
    pi = np.array([[calc_prob(i, j, b) for j in xrange(J - 1)] for i in xrange(len(n))]) 
    # calculate pi for the last column where J=J 
    pi_j = np.array([1/(1 + np.sum([calculate_pi_i_j(i, j, b) for j in xrange(J - 1)])) for i in xrange(len(n))]) 
    return np.hstack([pi, pi_j.reshape(len(n), 1)]) 


# equation to optimize 
def eqn_32(b): 
    # b comes in as a vector 
    b = b.reshape(3, 2) 
    pi = calculate_pi(b) 
    r = [] # will hold the result of the gradient calculation for each member of beta 
    try: 
     for k in xrange(b.shape[0]): 
      for j in xrange(b.shape[1]): 
       r.append(np.sum([(y.iloc[i][j] * X[i][k]) * (n.iloc[i] * pi[i][j] * X[i][k]) for i in xrange(len(n))])) 
    except Exception, e: 
     assert False, (e, k, j, b) 
    print r 
    return np.array(r) 


from scipy.optimize import * 
# starting values for coefficients 
b = np.zeros(6) 
#minimize(eqn_32, x0=b, method='Nelder-Mead') 
fmin_bfgs(eqn_32, x0=b) 

適應那裏data.csv包含:

age ster other none 
15_19 3 61 232 
20_24 80 137 400 
25_29 216 131 301 
30_34 268 76 203 
35_39 197 50 188 
40_44 150 24 164 
45_49 91 10 183 

正如你所看到的,b最初的6陣列,並且返回值r是一個6成員列表,然後將其轉換爲數組。當我使用調試打印語句運行代碼時可以看到這一點:

[249525.5, 130873.5, 270069249.5, 100471089.0, 342109256925.5, 94938609711.0] 
[249525.50185910985, 130873.49902491644, 270069251.5121727, 100471088.25143206, 342109259474.41266, 94938609003.652222] 
--------------------------------------------------------------------------- 
ValueError        Traceback (most recent call last) 
<ipython-input-8-edd793c643e2> in <module>() 
    67 b = np.zeros(6) 
    68 #minimize(eqn_32, x0=b, method='Nelder-Mead') 
---> 69 fmin_bfgs(eqn_32, x0=b) 

/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in fmin_bfgs(f, x0, fprime, args, gtol, norm, epsilon, maxiter, full_output, disp, retall, callback) 
    791    'return_all': retall} 
    792 
--> 793  res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts) 
    794 
    795  if full_output: 

/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options) 
    845  else: 
    846   grad_calls, myfprime = wrap_function(fprime, args) 
--> 847  gfk = myfprime(x0) 
    848  k = 0 
    849  N = len(x0) 

/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in function_wrapper(*wrapper_args) 
    287  def function_wrapper(*wrapper_args): 
    288   ncalls[0] += 1 
--> 289   return function(*(wrapper_args + args)) 
    290 
    291  return ncalls, function_wrapper 

/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in approx_fprime(xk, f, epsilon, *args) 
    620 
    621  """ 
--> 622  return _approx_fprime_helper(xk, f, epsilon, args=args) 
    623 
    624 

/Users/ifiddes/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.pyc in _approx_fprime_helper(xk, f, epsilon, args, f0) 
    560   ei[k] = 1.0 
    561   d = epsilon * ei 
--> 562   grad[k] = (f(*((xk + d,) + args)) - f0)/d[k] 
    563   ei[k] = 0.0 
    564  return grad 

ValueError: setting an array element with a sequence. 

爲什麼在2次迭代後失敗?

回答

1

我認爲eqn_32必須返回一個標量。

相關問題