0
給定一個任意長度的索引列表,例如[6, 12]
,在這些索引上拆分DataFrame的最佳方法是什麼,所以最後我有三個不同的DataFrame?給定索引列表,如何在這些索引上拆分DataFrame?
給定一個任意長度的索引列表,例如[6, 12]
,在這些索引上拆分DataFrame的最佳方法是什麼,所以最後我有三個不同的DataFrame?給定索引列表,如何在這些索引上拆分DataFrame?
這是處理非整數索引的更一般的策略。
import pandas as pd
import numpy as np
np.random.seed(0)
df = pd.DataFrame(np.random.randn(100, 2), columns=['A', 'B']).set_index('A')
Out[32]:
B
A
1.7641 0.4002
0.9787 2.2409
1.8676 -0.9773
0.9501 -0.1514
-0.1032 0.4106
0.1440 1.4543
0.7610 0.1217
0.4439 0.3337
1.4941 -0.2052
0.3131 -0.8541
-2.5530 0.6536
0.8644 -0.7422
2.2698 -1.4544
0.0458 -0.1872
1.5328 1.4694
... ...
0.9209 0.3187
0.8568 -0.6510
-1.0342 0.6816
-0.8034 -0.6895
-0.4555 0.0175
-0.3540 -1.3750
-0.6436 -2.2234
0.6252 -1.6021
-1.1044 0.0522
-0.7396 1.5430
-1.2929 0.2671
-0.0393 -1.1681
0.5233 -0.1715
0.7718 0.8235
2.1632 1.3365
[100 rows x 1 columns]
# say you want -1 and 1 as split point
cutoffs = np.array([-np.inf, -1, 1, np.inf])
df['group_level'] = pd.cut(df.index.values, bins=cutoffs)
# then groupby will give you what you want.
grouped = df.groupby('group_level')
for group_name, group_df in grouped:
print(group_name)
print(group_df)
(-inf, -1]
B group_level
A
-2.5530 0.6536 (-inf, -1]
-1.0486 -1.4200 (-inf, -1]
-1.7063 1.9508 (-inf, -1]
-1.2528 0.7775 (-inf, -1]
-1.6139 -0.2127 (-inf, -1]
-1.6302 0.4628 (-inf, -1]
-1.1651 0.9008 (-inf, -1]
-1.0708 1.0545 (-inf, -1]
-1.2705 0.9694 (-inf, -1]
-1.1731 1.9436 (-inf, -1]
-1.4913 0.4394 (-inf, -1]
-1.3159 -0.4616 (-inf, -1]
-1.1475 -0.4378 (-inf, -1]
-1.2254 0.8444 (-inf, -1]
-1.0002 -1.5448 (-inf, -1]
-1.0342 0.6816 (-inf, -1]
-1.1044 0.0522 (-inf, -1]
-1.2929 0.2671 (-inf, -1]
(-1, 1]
B group_level
A
0.9787 2.2409 (-1, 1]
0.9501 -0.1514 (-1, 1]
-0.1032 0.4106 (-1, 1]
0.1440 1.4543 (-1, 1]
0.7610 0.1217 (-1, 1]
0.4439 0.3337 (-1, 1]
0.3131 -0.8541 (-1, 1]
0.8644 -0.7422 (-1, 1]
0.0458 -0.1872 (-1, 1]
0.1549 0.3782 (-1, 1]
-0.8878 -1.9808 (-1, 1]
-0.3479 0.1563 (-1, 1]
-0.3873 -0.3023 (-1, 1]
-0.5097 -0.4381 (-1, 1]
-0.8955 0.3869 (-1, 1]
... ... ...
-0.7448 -0.8264 (-1, 1]
-0.0985 -0.6635 (-1, 1]
-0.4980 1.9295 (-1, 1]
0.9494 0.0876 (-1, 1]
0.9209 0.3187 (-1, 1]
0.8568 -0.6510 (-1, 1]
-0.8034 -0.6895 (-1, 1]
-0.4555 0.0175 (-1, 1]
-0.3540 -1.3750 (-1, 1]
-0.6436 -2.2234 (-1, 1]
0.6252 -1.6021 (-1, 1]
-0.7396 1.5430 (-1, 1]
-0.0393 -1.1681 (-1, 1]
0.5233 -0.1715 (-1, 1]
0.7718 0.8235 (-1, 1]
[66 rows x 2 columns]
(1, inf]
B group_level
A
1.7641 0.4002 (1, inf]
1.8676 -0.9773 (1, inf]
1.4941 -0.2052 (1, inf]
2.2698 -1.4544 (1, inf]
1.5328 1.4694 (1, inf]
1.2303 1.2024 (1, inf]
1.1394 -1.2348 (1, inf]
1.4883 1.8959 (1, inf]
1.1788 -0.1799 (1, inf]
1.8832 -1.3478 (1, inf]
1.9229 1.4805 (1, inf]
1.8676 0.9060 (1, inf]
2.3831 0.9445 (1, inf]
1.1266 -1.0799 (1, inf]
1.1880 0.3169 (1, inf]
2.1632 1.3365 (1, inf]