2014-03-06 32 views
0
rm(list = ls()) 
a <- seq(from = 1, to = 50000, by = 1) 
b <- seq(from = 1, to = 10000, by = 2) 
c <- seq(from = 1, to = 10000, by = 3) 

two <- rep(NA, length(a)) 
three <- rep(NA, length(a)) 

system.time(
    for (i in seq_along(a)) 
    { 
    if (length(tail(which(a[i] > b),1)) != 0 & length(tail(which(a[i] > c),1)) != 0) 
    {  
     two[i] <- tail(which(a[i] > b),1) 
     three[i] <- tail(which(a[i] > c),1) 
    } 
    else  
    {  
     two[i] <- NA 
     three[i] <- NA 
    } 
    } 
) 
build_b <- b[two] 
build_c <- c[three] 

我想要做的是找到bc看起來像在a時。因此,我在預測向量twothree中的內存以節省一些時間,因此我可以跟蹤這些事件的索引。循環完成後,根據剛剛計算的索引建立新的向量。目前該操作需要大約10秒來計算。我的問題是如何加快這一操作?我怎麼能加快以下循環?

謝謝!

+1

首先,你不需要'else'部分,因爲'two'和'three'被初始化爲'NA'。然後,'length(tail(which(a [i]> b),1))'可以被'any(a [i]> b)替換' – Pop

+0

'a','b'和'c'被排序或者在你的例子中就是這種情況)? – sgibb

+0

@ sgibb他們排序是。他們實際上是時代。 @ Pop NA引入了else語句,以便我可以在其中放置某些東西,例如「0」,或任何我想要的 –

回答

3

這裏是另一種解決方案使用findInterval

## assume a, b and c are sorted 
two <- findInterval(a-1L, b) 
three <- findInterval(a-1L, c) 

two[two==0] <- NA 
three[three==0] <- NA 

build_b <- b[two] 
build_c <- c[three] 

這裏有點風向標:

a <- seq(from = 1, to = 50000, by = 1) 
b <- seq(from = 1, to = 10000, by = 2) 
c <- seq(from = 1, to = 10000, by = 3) 

pops <- function(a, b, c) { 

    two <- rep(NA, length(a)) 
    three <- rep(NA, length(a)) 

    for (i in seq_along(a)) 
    { 
    if (length(tail(which(a[i] > b),1)) != 0 & length(tail(which(a[i] > c),1)) != 0) 
    {  
     two[i] <- tail(which(a[i] > b),1) 
     three[i] <- tail(which(a[i] > c),1) 
    } 
    else  
    {  
     two[i] <- NA 
     three[i] <- NA 
    } 
    } 
    return(list(b=b[two], c=c[three])) 
} 

droopy <- function(a, b, c) { 

    two <- rep(NA, length(a)) 
    three <- rep(NA, length(a)) 

    for (i in seq_along(a)) 
    { 
    if (any(u <- (a[i] > b)) & any(v <- (a[i] > c))) 
    {  
     two[i] <- sum(u) 
     three[i] <- sum(v) 
    } 
    else  
    {  
     two[i] <- NA 
     three[i] <- NA 
    } 
    } 
    return(list(b=b[two], c=c[three])) 
} 

sgibb <- function(a, b, c) { 
    ## assume a, b and c are sorted 
    two <- findInterval(a-1L, b) 
    three <- findInterval(a-1L, c) 

    two[two==0] <- NA 
    three[three==0] <- NA 

    return(list(b=b[two], c=c[three])) 
} 

基準:

library("rbenchmark") 
benchmark(pops(a, b, c), droopy(a, b, c), sgibb(a, b, c), order="relative", replications=2) 
#    test replications elapsed relative user.self sys.self user.child sys.child 
#3 sgibb(a, b, c)   2 0.010  1.0  0.008 0.004   0   0 
#2 droopy(a, b, c)   2 8.639 863.9  8.613 0.000   0   0 
#1 pops(a, b, c)   2 26.838 2683.8 26.753 0.004   0   0 

identical(pops(a, b, c), sgibb(a, b, c)) 
## TRUE 
identical(droopy(a, b, c), sgibb(a, b, c)) 
## TRUE 
1

一種可能性:

a <- seq(from = 1, to = 50000, by = 1) 
b <- seq(from = 1, to = 10000, by = 2) 
c <- seq(from = 1, to = 10000, by = 3) 

two <- integer(length(a)) 
three <- integer(length(a)) 

system.time(
{ 
    for (i in seq_along(a)) 
    { 
    if (any(u <- (a[i] > b)) & any(v <- (a[i] > c))) 
    {  
     two[i] <- sum(u) 
     three[i] <- sum(v) 
    } 
    else  
    {  
     two[i] <- NA 
     three[i] <- NA 
    } 
    } 
}) 

build_b <- b[two] 
build_c <- c[three]