如果您仍在尋找答案,本示例可能會以某種方式幫助您。
import pandas as pd
import numpy as np
import datetime
# create some random data
df = pd.DataFrame(columns=["col1","col2","timestamp"])
df.col1 = np.random.randint(100, size = 10)
df.col2 = np.random.randint(100, size = 10)
df.timestamp = [datetime.datetime(2000,1,1) + \
datetime.timedelta(hours=int(i)) for i in np.random.randint(100, size = 10)]
# sort data by timestamp and reset index
df = df.sort_values(by="timestamp").reset_index(drop=True)
# create the bins by taking last first time and last time with freq 6h
bins = pd.date_range(start=df.timestamp.values[0],end=df.timestamp.values[-1], freq="6h") # change to reasonable freq (d, h, m, s)
# zip them to pairs
startend = list(zip(bins, bins.shift(1)))
# define a function that finds bin index
def time_in_range(x):
"""Return true if x is in the range [start, end]"""
for ind,(start,end) in enumerate(startend):
if start <= x <= end:
return ind
# Add bin index to column named index
df['index'] = df.timestamp.apply(time_in_range)
# groupby index to find sum and count
df = df.groupby('index')["col1","col2"].agg(['sum','count']).reset_index()
# Create output df2 (with bins)
df2 = pd.DataFrame(startend, columns=["start","end"]).reset_index()
# Join the two dataframes with column index
df3 =pd.merge(df2, df, how='outer', on='index').fillna(0)
# Final adjustments
df3.columns = ["index","start","end","col1","delete","col2","count"]
df3.drop(['delete','index'], axis=1, inplace=True)
輸出:
<table border="1" class="dataframe"> <thead> <tr style="text-align: right;"> <th></th> <th>start</th> <th>end</th> <th>col1</th> <th>col2</th> <th>count</th> </tr> </thead> <tbody> <tr> <th>0</th> <td>2000-01-01 21:00:00</td> <td>2000-01-02 03:00:00</td> <td>89.0</td> <td>136.0</td> <td>2.0</td> </tr> <tr> <th>1</th> <td>2000-01-02 03:00:00</td> <td>2000-01-02 09:00:00</td> <td>0.0</td> <td>0.0</td> <td>0.0</td> </tr> <tr> <th>2</th> <td>2000-01-02 09:00:00</td> <td>2000-01-02 15:00:00</td> <td>69.0</td> <td>27.0</td> <td>1.0</td> </tr> <tr> <th>3</th> <td>2000-01-02 15:00:00</td> <td>2000-01-02 21:00:00</td> <td>0.0</td> <td>0.0</td> <td>0.0</td> </tr> <tr> <th>4</th> <td>2000-01-02 21:00:00</td> <td>2000-01-03 03:00:00</td> <td>0.0</td> <td>0.0</td> <td>0.0</td> </tr> <tr> <th>5</th> <td>2000-01-03 03:00:00</td> <td>2000-01-03 09:00:00</td> <td>0.0</td> <td>0.0</td> <td>0.0</td> </tr> <tr> <th>6</th> <td>2000-01-03 09:00:00</td> <td>2000-01-03 15:00:00</td> <td>108.0</td> <td>57.0</td> <td>2.0</td> </tr> <tr> <th>7</th> <td>2000-01-03 15:00:00</td> <td>2000-01-03 21:00:00</td> <td>35.0</td> <td>85.0</td> <td>2.0</td> </tr> <tr> <th>8</th> <td>2000-01-03 21:00:00</td> <td>2000-01-04 03:00:00</td> <td>102.0</td> <td>92.0</td> <td>2.0</td> </tr> <tr> <th>9</th> <td>2000-01-04 03:00:00</td> <td>2000-01-04 09:00:00</td> <td>0.0</td> <td>0.0</td> <td>0.0</td> </tr> <tr> <th>10</th> <td>2000-01-04 09:00:00</td> <td>2000-01-04 15:00:00</td> <td>0.0</td> <td>0.0</td> <td>0.0</td> </tr> <tr> <th>11</th> <td>2000-01-04 15:00:00</td> <td>2000-01-04 21:00:00</td> <td>91.0</td> <td>3.0</td> <td>1.0</td> </tr> </tbody></table>
你能提供初始樣本數據? – Wen