2017-06-21 33 views
-1

請任何一個可以幫助我,使這個功能運行得更快:速度了研發功能

st=c(0 ,0, 9,39,44 ,100, 0, 0, 8,26 ,100, 0, 0, 6, 9,16,20,24,29,35,37,47,54,73 ,100, 0, 0, 6,35,44 ,100, 0, 0,10,16,27,40,51,91, 100, 0, 0,3, 7,28,69,71,75, 100, 0, 0,19 ,100, 0, 0, 7,24,29,35 ,100, 0, 0, 8,11,14,15,18,31,32,33,50,53,56,62,79,80,82,87,88,89, 100, 0, 0, 2,7,31,34,40 ,100, 0, 0,10,41,51,76 ,100, 0, 0, 4,32,41,46 ,100, 0, 0,19,26,59,76,83,88,92 ,100, 0, 0,11,27,51, 100, 0, 0, 5, 7,45,56,78,3 ,100, 0, 0, 3,12,23,46,53,72 ,100) 

id=c(1:length(st)) 
data=cbind(id,st) 

a=gRbase::combnPrim(c(0:100), 4, simplify = T) 
c=a[,a[1,]==0] 
d=c[,c[4,]==100] 
list_d=c(unname(as.data.frame(d))) 

    p=c() 

f=function(dataf,dec,...){ 
    cc<-vector("list", 3) 
    dataf=data.frame(dataf) 
    for(j in 1:3){ 
     cc[[j]] <-c(dec[j],dec[j+1]); 
     for(k in 1:nrow(dataf)){ 
      if(round(dataf[k,"st"],digits=3)>= round(cc[[j]][1],digits=3) && round(dataf[k,"st"],digits=3) <= round(cc[[j]][2],digits=3)){ 
       dataf[k,"p"]=j 
      } 
     } 
    } 
    return(dataf) 
} 

l=vector("list",length(list_d)) 

library(doSNOW) 
cluster = makeCluster(4, type = "SOCK") 
registerDoSNOW(cluster) 

system.time(l<-foreach(i= 1:length(list_d)) %dopar% f(data,list_d[[i]])) 
stopCluster(cluster) 

,這是結果的分析:

enter image description here

!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

+4

你嘗試過什麼?你是否分析了功能並確定了瓶頸?你沒有提供數據來首先運行它。 –

+1

請給我們dataf和dec –

+2

函數做什麼?算法是什麼?你應該擺脫R循環並嘗試矢量化。順便說一句,不要嘗試分析並行化的循環。分析沒有看到工人發生了什麼。您應該使用'%do%'進行配置。 – Roland

回答

1

據我所知,你的函數決定了哪個區間有些值,dataf $ st包含這些值,dec包含了區間的值。

在這種情況下,這個工程:加快

# original function 
f=function(dataf,dec,...){ 
    cc<-vector("list", 3) 
    dataf=data.frame(dataf) 
    for(j in 1:3){ 
    cc[[j]] <-c(dec[j],dec[j+1]); 
    for(k in 1:nrow(dataf)){ 
     if(round(dataf[k,"st"],digits=3)>= round(cc[[j]][1],digits=3) && round(dataf[k,"st"],digits=3) <= round(cc[[j]][2],digits=3)){   
     dataf[k,"p"]=j 
     } 
    } 
    } 
    return(dataf) 
} 

# my proposal 
f2=function(dataf,dec,...){ 
    dec <- round(dec,digits=3) 
    dataf=data.frame(dataf) 
    dataf$p <- sapply(round(dataf$st,digits=3), function(x){match(TRUE,x<dec)-1}) 
    dataf$p[dataf$p==0] <- NA 
    return(dataf) 
} 

dec <- 1:4 
df1 <- data.frame(st = sample(1:50000,1000)/10000,stringsAsFactors=FALSE) 
original <- f(df1,dec) 
new  <- f2(df1,dec) 
all.equal(original,new) # TRUE, results are the same 

# now let's check the speed 
library(microbenchmark) 
microbenchmark(original = f(df1,dec), 
       new = f2(df1,dec), 
       times = 10) 

# Unit: milliseconds 
#  expr  min  lq  mean median  uq  max neval 
# original 82.025627 82.600381 84.834773 84.059917 86.214891 89.83188 10 
# new  2.115691 2.142234 2.234287 2.192843 2.289462 2.49650 10 

40倍

+0

感謝這看起來很酷 –