我正在嘗試提高以下進程的計算效率。我創建了一個使用數據進行審查的玩具示例。第一種方法的運行時間是第二種方法的一半。改善循環的運行時間
如何改進第一種方法的運行時間?
library(sqldf)
id = c(1,1,1,1,2,2,2,5,5,5,5,5,5)
qn = c(0,0,1,1,0,1,0,0,0,1,0,1,0)
d = data.frame(cbind(id,qn))
names(d) = c("id", "qn")
un = unique(d$id)
holder = matrix(0,length(un), 1)
counter = 0
x = proc.time()
for (i in un)
{
z = head(which(d[d$id == i,]$qn==1),1)
counter = counter + 1
holder[counter,] = z
}
proc.time() - x
f = sqldf("select id, count(qn) from d group by id", drv = 'SQLite')
f = cbind(f,holder)
#################################
un = unique(d$id)
holder = matrix(0,length(un), 1)
counter = 0
x = proc.time()
for (i in 1:length(un))
{
y = paste("select * from d where id = ", un[i])
y = sqldf(y, drv = 'SQLite')
y = min(which(y$qn==1))
counter = counter + 1
holder[counter,] = y
}
proc.time() - x
f = sqldf("select id, count(qn) from d group by id", drv = 'SQLite')
f = cbind(f,holder)
我試圖計算每個ID的1
預期輸出的第一個實例:使用
# id first
# 1: 1 3
# 2: 2 2
# 3: 5 3
最好是試着描述一下你正在嘗試做的,而不是隻顯示你如何」我已經做到了。什麼是最終目標? – MrFlick
感謝您的快速回復!我試着爲每個ID計算一個1. –