from numba import njit
import pandas as pd, numpy as np
@njit
def dfill(g, v, n):
d = np.arange(n) * np.nan
l = np.arange(n) * np.nan
c = np.arange(n) * np.nan
r = np.arange(g.size) * np.nan
for i in range(g.size):
x = g[i]
y = v[i]
if np.isnan(y):
c[x] += d[x]
r[i] = c[x]
else:
d[x] = y - l[x]
r[i] = l[x] = c[x] = y
return r
g, u = pd.factorize(df.internal_id.values)
df.assign(membership=dfill(g, df.membership.values, u.size))
internal_id time_id membership
0 a 2017-01-01 10.0
1 a 2017-02-01 20.0
2 a 2017-03-01 30.0
3 a 2017-04-01 40.0
4 b 2017-01-01 10.0
5 b 2017-02-01 15.0
6 b 2017-03-01 20.0
7 b 2017-04-01 25.0
8 c 2017-01-01 10.0
9 c 2017-02-01 12.0
10 c 2017-03-01 14.0
11 c 2017-04-01 16.0
定時
下面的代碼
pir john scott
10 1.0 61.777804 69.453241
30 1.0 173.136378 183.454086
100 1.0 518.193661 589.781918
300 1.0 1336.147050 1497.396670
1000 1.0 2502.225163 2621.045714
![enter image description here](https://i.stack.imgur.com/07SAD.png)
@njit
def dfill(g, v, n):
d = np.arange(n) * np.nan
l = np.arange(n) * np.nan
c = np.arange(n) * np.nan
r = np.arange(g.size) * np.nan
for i in range(g.size):
x = g[i]
y = v[i]
if np.isnan(y):
c[x] += d[x]
r[i] = c[x]
else:
d[x] = y - l[x]
r[i] = l[x] = c[x] = y
return r
def pir(d):
g, u = pd.factorize(d.internal_id.values)
return d.assign(membership=dfill(g, d.membership.values, u.size))
def john(d):
return d.assign(membership=d.groupby('internal_id').membership.transform(lambda x: x.ffill() + x.diff().shift().ffill().cumsum().fillna(0)))
def scott(d):
d2 = (d.groupby('internal_id')
.transform(lambda x: x.fillna(x.diff().max())
.where(x.index != x.index.min()).cumsum()))
return d.combine_first(d2)
results = pd.DataFrame(
index=[10, 30, 100, 300, 1000],
columns='pir john scott'.split(),
dtype=float
)
k = len(df)
for i in results.index:
r = np.arange(1, i + 1).repeat(k).astype(str)
d = pd.concat([df] * i, ignore_index=True)
d.internal_id += r
for j in results.columns:
stmt = '{}(d)'.format(j)
setp = 'from __main__ import d, {}'.format(j)
results.at[i, j] = timeit(stmt, setp, number=10)
(lambda r: r.div(r.min(1), 0))(results)
添加定時到[我的回答(https://stackoverflow.com/a/45874258/4909087) - 爲所有有關各方。 –