2013-11-01 50 views





(['region'] ['Quradate'][-1:-1])-(['region'] ['Quradate'][-2:-2]) 
& (['region'] ['Quradate'][-1:-1])-(['region'] ['Quradate'][-5:-5]) 



                Score1  Score2 
region           Quradate   
North_Central-Birmingham-Tuscaloosa-Anniston 2010-01-15    47   50 
              2010-04-15    45   60 
              2010-07-15    45   40 
              2010-10-15    42   43 
              2011-01-15    46   44 
              2011-04-15    45   45 
              2011-07-15    45   45 
              2011-10-15    43   46 
              2012-01-15    51   55 
              2012-04-15    53   56 
              2012-07-15    51   57 
              2012-10-15    52   58 
              2013-01-15    50   50 
              2013-04-15    55   55 
              2013-07-15    55   56 
              2013-10-15    51   66 
North_Huntsville-Decatur-Florence   2010-01-15    55   55 




def find_diffs(region): 
    score_cols = ['Score1', 'Score2'] 

    most_recent_date = region.Quradate.max() 
    last_quarter = most_recent_date - datetime.timedelta(365/4) # shift by 4 months 
    last_year = most_recent_date - datetime.timedelta(365) # shift by a year 

    quarter_score_diff = region[region.Quradate == most_recent_date OR region.Quradate == last_quarter)].diff() 
    quarter_score_diff['id'] = 'quarter_diff' 

    year_score_diff = region[region.Quradate == most_recent_date OR region.Quradate == last_year)].diff() 
    year_score_diff['id'] = 'year_diff' 

    df_temp = quarter_score_diff.append(year_score_diff) 
    return df_temp 





我不得不修改要添加的DIFF線(並改變「或」的大小寫,但儘管不工作EG:quarter_score_diff = region [(region.Quradate == most_recent_date)或(region.Quradate == las diff_)獲得:: ----> 8 quarter_score_diff = region [(region.Quradate == most_recent_date)或(region.Quradate == last_quarter)]。diff() 9 quarter_score_diff ['id' ] ='quarter_diff' ValueError:具有多個元素的數組的真值是不明確的。使用a.any()或a.all() – dartdog


我也刪除了原來的多餘部分),並得到相同的錯誤,例如:quarter_score_diff = region [region。Quradate == most_recent_date或region.Quradate == last_quarter] .diff()並獲得相同的值錯誤 – dartdog


希望澄清:: http://stackoverflow.com/questions/19756108/selecting-a-new-dataframe-via-一的多索引-框架中有使用大熊貓-指數-名 – dartdog



#First I create some data 
Dates = pd.date_range('2010-1-1', periods = 14, freq = 'Q') 
Regions = ['Western', 'Eastern', 'Southern', 'Norhtern'] 
df = DataFrame({'Regions': [elem for elem in Regions for x in range(14)], \ 
      'Score1' : np.random.rand(56), 'Score2' : np.random.rand(56), 'Score3' : np.random.rand(56), \ 
      'Score4' : np.random.rand(56), 'Score5' : np.random.rand(56)}, index = list(Dates)*4) 

# Create a dictionary to hold your data 
SCORES = ['Score1', 'Score2', 'Score3', 'Score4', 'Score5'] 
ValuesDict = {region : {score : [int(), int()] for score in SCORES} for region in df.Regions.unique()} 

#This dictionary will contain keys that are your regions, and these will correspond to a dictionary that has keys that are your scores and those correspond to a list of which the fisrt element is the most recent - last quarter calculation, and the second is the most recent - last year calcuation. 

#Now group the data 
dfGrouped = df.groupby('Regions') 

#Now iterate through the groups creating lists of the underlying data. The data that is at the last index point of the list is by definition the newest (due to the sorting when grouping) and the obervation one year previous to that is - 4 index points away. 

for group in dfGrouped: 
    Score1List = list(group[1].Score1) 
    Score2List = list(group[1].Score2) 
    Score3List = list(group[1].Score3) 
    Score4List = list(group[1].Score4) 
    Score5List = list(group[1].Score5) 
    MasterList = [Score1List, Score2List, Score3List, Score4List, Score5List] 
    for x in xrange(1, 6): 
     ValuesDict[group[0]]['Score' + str(x)][0] = MasterList[x-1][-1] - MasterList[x-1][-2] 
     ValuesDict[group[0]]['Score' + str(x)][1] = MasterList[x-1][-1] - MasterList[x-1][-5] 

