0
以下代碼來自 「用於數據分析的Python」,第11章,組轉換和分析。 我顯示每個庫的版本如下。如何與groupby一起使用ols?
# -*- coding: utf-8 -*-
""" Created on Sun Jun 4 13:33:47 2017
"Python for Data Analysis",chp 11,group transforms and analysis.
"""
import numpy as np # np.__version__'1.12.1'
import pandas as pd # pd.__version__ '0.20.2'
import random; random.seed(a=0,version=2)
import statsmodels.api as sm # statsmodels.__version__ '0.8.0'
import string
# generate tickers from random
N=1000
def rands(n):
choices=string.ascii_uppercase
return (''.join([random.choice(choices) for _ in range(n)]))
tickers=np.array([rands(5) for _ in range(N)])
# generate data for tickers
M=500
df=pd.DataFrame({'Momentum': np.random.randn(M)/200+0.03,
'Value':np.random.randn(M)/200+0.08,
'ShortInterest':np.random.randn(M)/200-0.02},
index=tickers[:M])
# create industry
ind_names=np.array(['Financial','Tech'])
sampler=np.random.randint(low=0,high=len(ind_names),size=N, dtype='l')
industries=pd.Series(ind_names[sampler],index=tickers,
name='industry')
#%% factor analysis
fac1,fac2,fac3=np.random.rand(3,1000)
ticker_subset=tickers.take(np.random.permutation(N)[:1000])
port=pd.Series(0.7*fac1-1.2*fac2+0.3*fac3+np.random.rand(1000),
index=ticker_subset)
factors=pd.DataFrame({'f1':fac1,'f2':fac2,'f3':fac3},
index=ticker_subset)
by_ind=port.groupby(industries)
這部分來自本書,而pd.ols已折舊。
#%% use pd.ols, which is depreciated.
# AttributeError: module 'pandas' has no attribute 'ols'
def beta_exposure(chuck,factors=None):
return pd.ols(y=chuck, x=factors).beta
exposures_pd=by_ind.apply(beta_exposure,factors=factors)
print('\nexposures_pd\n',exposures_pd.unstack())
我想使用sm.OLS,但我在爲x選擇對應的行時遇到困難。我應該如何處理它?
#%% use sm.OLS, which is not show in the book.
def exposure(chuck,factors):
y=np.array(chuck).reshape(len(chuck),1)
# The following code is wrong, as the rows number is not the corresponding rows as y
# I use [:len(chuck)] just to keep x have same rows number as y.
x=factors[['f1','f2','f3']][:len(chuck)]
print(x[:5])
print(x.shape)
sx=sm.OLS(y,x).fit()
print(sx.summary())
return sm.OLS(y,x).fit()
exposures_sm=exposure(port, factors)