import datetime as dt
dates = [
dt.datetime(1970, 1, 1, 0, 2),
dt.datetime(1970, 1, 1, 0, 3),
dt.datetime(1970, 1, 1, 0, 12),
dt.datetime(1970, 1, 1, 0, 7),
dt.datetime(1970, 1, 1, 0, 8),
dt.datetime(1970, 1, 1, 0, 9),
dt.datetime(1970, 1, 1, 0, 13)
]
def group_dates(dates, delta):
it = iter(dates)
prev = next(it)
grouped, total = [[prev]], delta.total_seconds()
for dte in it:
if (dte - prev).total_seconds() <= total:
grouped[-1].append(dte)
else:
grouped.append([dte])
prev = dte
return grouped
def td(l):
seconds = sum((d - dt.datetime(1970, 1, 1)).total_seconds() for d in l)/len(l)
return dt.datetime.utcfromtimestamp(seconds)
from pprint import pprint as pp
pp([td(sub) for sub in group_dates(dates,dt.timedelta(minutes=2))])
爲了避免不必要的函數調用,檢查LEN:
pp([td(sub) if len(sub) > 1 else sub[0] for sub in [datetime.datetime(1970, 1, 1, 0, 2, 30),
datetime.datetime(1970, 1, 1, 0, 12),
datetime.datetime(1970, 1, 1, 0, 8),
datetime.datetime(1970, 1, 1, 0, 13)]group_dates(dates,dt.timedelta(minutes=2))])
或屈服值,當您去:
def group_dates(dates, delta):
it = iter(dates)
prev = next(it)
grouped, total = (prev,),delta.total_seconds()
for dte in it:
if (dte - prev).total_seconds() <= total:
grouped = grouped + (dte,)
else:
yield td(grouped)
grouped = (dte,)
prev = dte
yield td(grouped)
pp(list(group_dates(dates, delta=dt.timedelta(minutes=2))))
[datetime.datetime(1970, 1, 1, 0, 2, 30),
datetime.datetime(1970, 1, 1, 0, 12),
datetime.datetime(1970, 1, 1, 0, 8),
datetime.datetime(1970, 1, 1, 0, 13)]
一些計時:
In [28]: dates = [
dt.datetime(1970, 1, 1, 0, 2),
dt.datetime(1970, 1, 1, 0, 3),
dt.datetime(1970, 1, 1, 0, 4),
dt.datetime(1970, 1, 1, 0, 7),
dt.datetime(1970, 1, 1, 0, 8),
dt.datetime(1970, 1, 1, 0, 9),
dt.datetime(1970, 1, 1, 0, 15),
dt.datetime(1970, 1, 1, 0, 22),
dt.datetime(1970, 1, 1, 0, 24),
dt.datetime(1970, 1, 1, 0, 27)
]
In [41]: for i in range(10000):
dates.append(dates[-1]+dt.timedelta(minutes=choice([1,2,3,4])))
....:
In [42]: timeit [td(sub) if len(sub) > 1 else sub[0] for sub in group_dates(dates,dt.timedelta(minutes=2))]
100 loops, best of 3: 15.8 ms per loop
In [43]: timeit reduce_datetime_list_by_delta(dates, delta)
100 loops, best of 3: 16.9 ms per loop
In [44]: timeit timestamps = map(avgtm, groupby(dates, key=grouper(delta)))
10 loops, best of 3: 18.8 ms per loop
In [45]: timeit (list(group_dates_iter(dates, delta = dt.timedelta(minutes=2))))
10 loops, best of 3: 18.4 ms per loop
只是要清楚,你的目標是通過初始列表運行和當前值的時間差內消除任何條目? – wnnmaw
你的意思是什麼意思timedelta neborhood? 在預期的情況下,您可以在第一個和第三個值上添加30秒。 – tgdn
@tgdn一個鄰域是一組近似值 – JuanPablo