列名稱存在問題 - 在兩個減法文件DataFrames
中都需要相同的列名稱。也可用於創建DataFrames
,您可以使用boolean indexing。
import pandas as pd
import io
temp=u"""
1,a1453905960,a,95.4500,95.0900,95.0980,433810,s,95.4500,95.0900,95.0980,433810
1,a1453906020,b,95.4700,94.9500,95.4500,934980,d,85.4520,94.100,95.7980,433810
2,a1453906080,f,95.1000,94.8700,95.0900,791657,e,95.4500,934980,9.4400,85.4520
2,a1453906140,c,4.0300,94.7000,94.9620,763531,w,95.1000,94.8700,95.0900,791657
2,a1453906200,f,95.0300,94.8200,94.8918,501298,r,95.1000,94.8700,95.0900,791657"""
headings =[ 'injection', 'treatment','filename', 'l0', 'l1', 'l2', 'l3', 'filename', 'r0', 'r1', 'r2', 'r3']
#after testing replace io.StringIO(temp) to filename
df = pd.read_csv(io.StringIO(temp), names=headings)
print df
injection treatment filename l0 l1 l2 l3 filename \
0 1 a1453905960 s 95.45 95.09 95.0980 433810 s
1 1 a1453906020 d 95.47 94.95 95.4500 934980 d
2 2 a1453906080 e 95.10 94.87 95.0900 791657 e
3 2 a1453906140 w 4.03 94.70 94.9620 763531 w
4 2 a1453906200 r 95.03 94.82 94.8918 501298 r
r0 r1 r2 r3
0 95.450 95.09 95.098 433810.000
1 85.452 94.10 95.798 433810.000
2 95.450 934980.00 9.440 85.452
3 95.100 94.87 95.090 791657.000
4 95.100 94.87 95.090 791657.000
#convert values to floats
df[['l0','l1','l2','l3','r0','r1','r2','r3']] = df[['l0','l1','l2','l3','r0','r1','r2','r3']].astype(float)
#select data by column injection to 2 new dataframes by boolean indexing
df_os = df[df['injection'] == 1]
#print df_os
df_od = df[df['injection'] == 2]
#print df_od
#subtract and assign to a different array to normalize data to baseline
df_os, df_od = df[['l1','l2','l3']].sub(df['l0'], axis=0), df[['r1','r2','r3']].sub(df['r0'], axis=0)
print df_os
l1 l2 l3
0 -0.36 -0.3520 433714.55
1 -0.52 -0.0200 934884.53
2 -0.23 -0.0100 791561.90
3 90.67 90.9320 763526.97
4 -0.21 -0.1382 501202.97
print df_od
r1 r2 r3
0 -0.360 -0.352 433714.550
1 8.648 10.346 433724.548
2 934884.550 -86.010 -9.998
3 -0.230 -0.010 791561.900
4 -0.230 -0.010 791561.900
#set column names in df_od by df_os
df_od.columns = df_os.columns
ioc = df_os - df_od
print ioc
l1 l2 l3
0 0.000 0.0000 0.000
1 -9.168 -10.3660 501159.982
2 -934884.780 86.0000 791571.898
3 90.900 90.9420 -28034.930
4 0.020 -0.1282 -290358.930
ioc = df_os.sub(df_od)
print ioc
l1 l2 l3
0 0.000 0.0000 0.000
1 -9.168 -10.3660 501159.982
2 -934884.780 86.0000 791571.898
3 90.900 90.9420 -28034.930
4 0.020 -0.1282 -290358.930
或者您也可以通過values
轉換DataFrames
到numpy arrays
然後。減去:
ioc = df_os.values - df_od.values
print ioc
[[ 0.00000000e+00 0.00000000e+00 0.00000000e+00]
[ -9.16800000e+00 -1.03660000e+01 5.01159982e+05]
[ -9.34884780e+05 8.60000000e+01 7.91571898e+05]
[ 9.09000000e+01 9.09420000e+01 -2.80349300e+04]
[ 2.00000000e-02 -1.28200000e-01 -2.90358930e+05]]
print pd.DataFrame(ioc, columns=['a','b','c'])
a b c
0 0.000 0.0000 0.000
1 -9.168 -10.3660 501159.982
2 -934884.780 86.0000 791571.898
3 90.900 90.9420 -28034.930
4 0.020 -0.1282 -290358.930
你必須通過展示*沒有工作,解釋*手段,這行'df.groupby(['injection'])'什麼都不做 – EdChum
Like @EdCh嗯寫道:請顯示*沒有工作*部分看起來像。例如,顯示'sub()'的結果。 – CaptSolo