我有兩個data.tables有34列,其中列是完全相同的。Data.table循環效率
Month SpId1 SpId2 ... SpId33
編輯:這是從Reproducible Example
AltSuitSp1 <- data.table(structure(list(Month = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,12L, 12L, 12L), .Label = c("1", "10", "11", "12", "2", "3", "4","5", "6", "7", "8", "9"), class = "factor"), SpdSpSuit = c(0,0, 0, 0, 0, 0, 0, 0, 0, 0), SpdIncSuit = c(0, 0, 0,0, 0, 0, 0, 0, 0, 0), SpdGrowSuit = c(0.4625, 0.4625, 0.4625, 0.4625, 0.4625, 0.4625, 0.4625,0.4625, 0.4625, 0.4625), RzbSpSuit = c(0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333,0.283333333, 0.283333333, 0.283333333, 0.283333333), RzbIncSuit = c(0.34,0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.34), RzbGrowSuit = c(0.283333333,0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333), FMSSpSuit = c(0.34,0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.34, 0.34), FMSIncSuit = c(0.425, 0.425, 0.425, 0.425, 0.425, 0.425, 0.425, 0.425, 0.425, 0.425), FMSGrowSuit = c(0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333), BhsSpSuit = c(0.283333333,0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.233333333, 0.233333333, 0.233333333), BhsIncSuit = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), BhsGrowSuit = c(0.283333333,0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.283333333, 0.233333333, 0.233333333, 0.233333333), BrtSpSuit = c(0.866666667,0.866666667, 0.866666667, 0.866666667, 0.866666667, 0.866666667,0.866666667, 0.54, 0.54, 0.54), BrtIncSuit = c(0.8, 0.8, 0.8, 0.8, 0.8,0.8, 0.8, 0.43, 0.43, 0.43), BrtGrSuit = c(0.8, 0.8, 0.8, 0.8, 0.8, 0.8,0.8, 0.86, 0.86, 0.86), CcfSpSuit = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), CcfIncSuit = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),CcfGrSuit = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), GsfSpSuit = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), GsfIncSuit = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), GsfGrSuit = c(0, 0, 0, 0, 0,0, 0, 0, 0, 0), RbtSpSuit = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), RbtIncSuit = c(0, 0, 0, 0, 0, 0, 0, 0, 0,0), RbtGrSuit = c(0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.925, 0.925, 0.925), SmbSpSuit = c(0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.675, 0.675,0.675), SmbIncSuit = c(0.75, 0.75, 0.75, 0.75, 0.75, 0.75, 0.75, 0.766666667,0.766666667, 0.766666667), SmbGrSuit = c(0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.0875, 0.0875, 0.0875), StbSpSuit = c(0.425, 0.425, 0.425, 0.425, 0.425, 0.425, 0.425, 0.425, 0.425, 0.425), StbIncSuit = c(0, 0,0, 0, 0, 0, 0, 0, 0, 0), StbGrSuit = c(0, 0, 0, 0, 0,0, 0, 0, 0, 0), HbcSpSuit = c(0, 0, 0, 0, 0, 0, 0,0, 0, 0), HbcIncSuit = c(0, 0, 0, 0, 0, 0, 0, 0, 0,0), HbcGrSuit = c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.425, 0.425, 0.425)), .Names = c("Month", "SpdSpSuit", "SpdIncSuit", "SpdGrowSuit","RzbSpSuit", "RzbIncSuit", "RzbGrowSuit", "FMSSpSuit", "FMSIncSuit","FMSGrowSuit", "BhsSpSuit", "BhsIncSuit", "BhsGrowSuit", "BrtSpSuit","BrtIncSuit", "BrtGrSuit", "CcfSpSuit", "CcfIncSuit", "CcfGrSuit","GsfSpSuit", "GsfIncSuit", "GsfGrSuit", "RbtSpSuit", "RbtIncSuit","RbtGrSuit", "SmbSpSuit", "SmbIncSuit", "SmbGrSuit", "StbSpSuit","StbIncSuit", "StbGrSuit", "HbcSpSuit", "HbcIncSuit", "HbcGrSuit"), class = c("data.table", "data.frame"), row.names = c(NA, -10L)))
AltSuitDates <- data.table(structure(list(Month = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 10L, 11L,12L), SpdSpT = c(NA, NA, NA, NA, NA, 1L, 1L, NA, NA, NA), SpdIncT = c(NA,NA, NA, NA, NA, 1L, 1L, NA, NA, NA), SpdGrT = c(1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L), RzbSpT = c(NA, NA, NA, 1L, 1L, 1L, NA,NA, NA, NA), RzbIncT = c(NA, NA, NA, 1L, 1L, 1L, NA, NA, NA,NA), RzbGrT = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), FmsSpT = c(NA,NA, 1L, 1L, NA, NA, NA, NA, NA, NA), FmsIncT = c(NA, NA, 1L,1L, 1L, NA, NA, NA, NA, NA), FMSGrT = c(1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L), BhsSpT = c(NA, NA, NA, 1L, 1L, 1L, NA, NA, NA,NA), BhsIncT = c(NA, NA, NA, 1L, 1L, 1L, NA, NA, NA, NA), BhsGrT = c(1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BRTsp = c(1L, 1L, 1L, NA,NA, NA, NA, 1L, 1L, 1L), BRTinc = c(1L, 1L, 1L, 1L, NA, NA, NA,1L, 1L, 1L), BRTgr = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), CCFsp = c(NA, NA, NA, NA, 1L, 1L, 1L, NA, NA, NA), CCFinc = c(NA,NA, NA, NA, 1L, 1L, 1L, NA, NA, NA), CCFgr = c(1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L), GSFsp = c(NA, NA, NA, NA, 1L, 1L, 1L,NA, NA, NA), GSFinc = c(NA, NA, NA, NA, 1L, 1L, 1L, NA, NA, NA), GSFgr = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), RBTsp = c(1L,1L, 1L, 1L, 1L, 1L, 1L, NA, NA, NA), RBTinc = c(1L, 1L, 1L, 1L,1L, 1L, 1L, NA, NA, NA), RBTgr = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L), SMBsp = c(NA, NA, NA, 1L, 1L, 1L, 1L, NA, NA, NA), SMBinc = c(NA, NA, NA, 1L, 1L, 1L, 1L, NA, NA, NA), SMBgr = c(1L,1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), STBsp = c(NA, NA, NA, NA,NA, 1L, 1L, NA, NA, NA), STBinc = c(NA, NA, NA, NA, NA, 1L, 1L,NA, NA, NA), STBgr = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), HBCsp = c(NA, NA, NA, 1L, 1L, 1L, NA, NA, NA, NA), HBCinc = c(NA,NA, NA, 1L, 1L, 1L, NA, NA, NA, NA), HBCgr = c(1L, 1L, 1L, 1L,1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("Month", "SpdSpT", "SpdIncT","SpdGrT", "RzbSpT", "RzbIncT", "RzbGrT", "FmsSpT", "FmsIncT","FMSGrT", "BhsSpT", "BhsIncT", "BhsGrT", "BRTsp", "BRTinc", "BRTgr","CCFsp", "CCFinc", "CCFgr", "GSFsp", "GSFinc", "GSFgr", "RBTsp","RBTinc", "RBTgr", "SMBsp", "SMBinc", "SMBgr", "STBsp", "STBinc","STBgr", "HBCsp", "HBCinc", "HBCgr"), class = c("data.table","data.frame"), row.names = c(NA, -10L)))
凡SPID是一個物種標識符使用再生功能的樣本數據。一個DT是500萬行以上(AltSuitSp1),另一個是12(AltSuitDates)。我使用12行長的DT(相當於12個月)來更新較大的DT。目前我使用嵌套如果,否則,如果結構在for循環檢查的條件和更新更大的基於DT關閉小DT(請參見下面的代碼)
h <- 1
n <- length(AltSuitSp1[,Month])
stm <- AltSuitSp1[,Month] # AltSuitSp1 is the 5+ million row DT
cond1 <- which(stm == 1) # list of all rows of AltSuitSp1 where the Month is = 1
cond2 <- which(stm == 2) # list of all rows of AltSuitSp1 where the Month is = 2
...
cond12 <- which(stm == 12)
for (h in seq(n)){
if (any(cond1 == h)){
set(AltSuitSp1,h,2:34,(AltSuitSp1[h,2:34,with=F] * AltSuitDates[1,2:34,with=F]))
}else if (any(cond2 == h)){
set(AltSuitSp1,h,2:34,(AltSuitSp1[h,2:34,with=F] * AltSuitDates[2,2:34,with=F]))
}else if ...
}else if (any(cond12)){
set(AltSuitSp1,h,2:34,(AltSuitSp1[h,2:34,with=F] * AltSuitDates[12,2:34,with=F]))
}else
break
}
現在,我已經運行該代碼1分鐘,並檢查看看h有多遠。目前我每秒看到大約29-30個循環,而h已經提前到大約1800次迭代。然而,即使在每秒30次循環(這相當慢:Using Set in DT),此代碼大約需要2天才能完成。但是,正如下面的輸出所顯示的那樣,它正在做我希望/期望它做的事情。
AltSuitSp1Results <- data.table(structure(list(Month = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,12L, 12L, 12L), .Label = c("1", "10", "11", "12", "2", "3", "4","5", "6", "7", "8", "9"), class = "factor"), SpdSpSuit = c(NA,NA, NA, NA, NA, NA, NA, 0, 0, 0), SpdIncSuit = c(NA, NA, NA,NA, NA, NA, NA, 0, 0, 0), SpdGrowSuit = c(0, 0, 0, 0, 0, 0, 0,0.4625, 0.4625, 0.4625), RzbSpSuit = c(NA, NA, NA, NA, NA, NA,NA, 0.283333333, 0.283333333, 0.283333333), RzbIncSuit = c(NA,NA, NA, NA, NA, NA, NA, 0.34, 0.34, 0.34), RzbGrowSuit = c(0,0, 0, 0, 0, 0, 0, 0.283333333, 0.283333333, 0.283333333), FMSSpSuit = c(NA,NA, NA, NA, NA, NA, NA, 0.34, 0.34, 0.34), FMSIncSuit = c(NA,NA, NA, NA, NA, NA, NA, 0.425, 0.425, 0.425), FMSGrowSuit = c(0,0, 0, 0, 0, 0, 0, 0.283333333, 0.283333333, 0.283333333), BhsSpSuit = c(NA,NA, NA, NA, NA, NA, NA, 0.233333333, 0.233333333, 0.233333333), BhsIncSuit = c(NA, NA, NA, NA, NA, NA, NA, 0, 0, 0), BhsGrowSuit = c(0,0, 0, 0, 0, 0, 0, 0.233333333, 0.233333333, 0.233333333), BrtSpSuit = c(0.866666667,0.866666667, 0.866666667, 0.866666667, 0.866666667, 0.866666667,0.866666667, 0, 0, 0), BrtIncSuit = c(0.8, 0.8, 0.8, 0.8, 0.8,0.8, 0.8, 0.43, 0.43, 0.43), BrtGrSuit = c(0, 0, 0, 0, 0, 0,0, 0.86, 0.86, 0.86), CcfSpSuit = c(NA, NA, NA, NA, NA, NA, NA,0, 0, 0), CcfIncSuit = c(NA, NA, NA, NA, NA, NA, NA, 0, 0, 0),CcfGrSuit = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), GsfSpSuit = c(NA,NA, NA, NA, NA, NA, NA, 0, 0, 0), GsfIncSuit = c(NA, NA,NA, NA, NA, NA, NA, 0, 0, 0), GsfGrSuit = c(0, 0, 0, 0, 0,0, 0, 0, 0, 0), RbtSpSuit = c(NA, NA, NA, NA, NA, NA, NA,0, 0, 0), RbtIncSuit = c(NA, NA, NA, NA, NA, NA, NA, 0, 0,0), RbtGrSuit = c(0, 0, 0, 0, 0, 0, 0, 0.925, 0.925, 0.925), SmbSpSuit = c(NA, NA, NA, NA, NA, NA, NA, 0.675, 0.675,0.675), SmbIncSuit = c(NA, NA, NA, NA, NA, NA, NA, 0.766666667,0.766666667, 0.766666667), SmbGrSuit = c(0, 0, 0, 0, 0, 0,0, 0.0875, 0.0875, 0.0875), StbSpSuit = c(NA, NA, NA, NA,NA, NA, NA, 0.425, 0.425, 0.425), StbIncSuit = c(NA, NA,NA, NA, NA, NA, NA, 0, 0, 0), StbGrSuit = c(0, 0, 0, 0, 0,0, 0, 0, 0, 0), HbcSpSuit = c(NA, NA, NA, NA, NA, NA, NA,0, 0, 0), HbcIncSuit = c(NA, NA, NA, NA, NA, NA, NA, 0, 0,0), HbcGrSuit = c(0, 0, 0, 0, 0, 0, 0, 0.425, 0.425, 0.425)), .Names = c("Month", "SpdSpSuit", "SpdIncSuit", "SpdGrowSuit","RzbSpSuit", "RzbIncSuit", "RzbGrowSuit", "FMSSpSuit", "FMSIncSuit","FMSGrowSuit", "BhsSpSuit", "BhsIncSuit", "BhsGrowSuit", "BrtSpSuit","BrtIncSuit", "BrtGrSuit", "CcfSpSuit", "CcfIncSuit", "CcfGrSuit","GsfSpSuit", "GsfIncSuit", "GsfGrSuit", "RbtSpSuit", "RbtIncSuit","RbtGrSuit", "SmbSpSuit", "SmbIncSuit", "SmbGrSuit", "StbSpSuit","StbIncSuit", "StbGrSuit", "HbcSpSuit", "HbcIncSuit", "HbcGrSuit"), class = c("data.table", "data.frame"), row.names = c(NA, -10L)))
很明顯,我不會以有效的方式解決這個問題,而是在做一些草率的編程。但是,我正在努力弄清楚我可以優化我的代碼的位置。我是否試圖重塑內置DT功能DT?我在其中一個圈子裏;我錯過了一個可以矢量化的地方:R Inferno?
基本上,我需要根據AltSuitDates DT更新AltSuitSp1 DT中的列2:34,使用Month列作爲條件來知道從AltSuitDates DT使用哪一行來更新AltSuitSp1。任何幫助表示讚賞。
如果您將2個數據表合併爲1,則不需要循環。沒有示例數據,除此之外很難說。 –
@DeanMacGregor,不包括示例數據讓我進入第9圈!感謝提醒我包含示例數據。 – duHaas
@duHaas,在你輸入數據之前,你能不能把你的數據as.data.frame?我認爲'dputing'數據表存在問題。以前嘗試使用代碼,並確認它是做你想做的,或者解釋它爲什麼不做。 – BrodieG