2015-04-02 26 views
3

是否有更快的(容易擴展的)和更簡潔的方式從x中獲取y?查找列表元素交集的長度

x <- list(c("a", "b", "c", "d"), 
c("a", "b", "e"), 
c("x", "y"), 
c("z", "x")) 

y <- vector(mode = "list", length = length(x)) 

for(i in 1:length(x)){ 
for(j in 1:length(x)){ 
    y[[i]] <- append(y[[i]], length(intersect(x[[i]], x[[j]])))}} 

y <- do.call(rbind, y) 

回答

2
un <- unique(unlist(x)) 
crossprod(sapply(x,function(y)un%in%y)) 
#  [,1] [,2] [,3] [,4] 
# [1,] 4 2 0 0 
# [2,] 2 3 0 0 
# [3,] 0 0 2 1 
# [4,] 0 0 1 2 


microbenchmark::microbenchmark(user1389960(), times = 1000) 
# Unit: microseconds 
#   expr  min  lq mean median  uq  max neval 
# user1389960() 172.631 181.5195 243.918 187.1015 198.716 45083.95 1000 
microbenchmark::microbenchmark(eipi10(), times = 1000) 
# Unit: microseconds 
#  expr  min  lq  mean median  uq  max neval 
# eipi10() 218.625 225.9635 246.9797 234.469 245.4545 1175.439 1000 
microbenchmark::microbenchmark(Julius(), times = 1000) 
# Unit: microseconds 
#  expr min  lq  mean median  uq  max neval 
# Julius() 30.322 32.511 37.61541 34.0175 37.957 1026.268 1000 
microbenchmark::microbenchmark(ColonelBeauvel(), times = 1000) 
# Unit: microseconds 
#    expr  min  lq  mean median  uq  max neval 
# ColonelBeauvel() 162.103 169.548 183.9076 175.683 183.677 1052.435 1000 
1

這是清潔的,但不是更快:

sapply(x, function(a) { 
    sapply(x, function(b) length(intersect(a,b))) 
}) 

時序:

microbenchmark::microbenchmark(sapply(x, function(a) { 
    sapply(x, function(b) length(intersect(a,b)))})) 

    min  lq  mean median  uq  max neval 
377.513 392.2505 406.1243 404.318 416.22 511.877 100 

microbenchmark::microbenchmark(for(i in 1:length(x)){ 
    for(j in 1:length(x)){ 
    y[[i]] <- append(y[[i]], length(intersect(x[[i]], x[[j]])))}}) 

    min  lq  mean median  uq  max neval 
350.471 375.7305 422.0248 388.695 414.41 2386.736 100 
1

我會用mapply

n = length(x) 
matrix(mapply(function(u,v) length(intersect(u,v)), rep(x, n), rep(x, each=n)), ncol=n) 

#  [,1] [,2] [,3] [,4] 
#[1,] 4 2 0 0 
#[2,] 2 3 0 0 
#[3,] 0 0 2 1 
#[4,] 0 0 1 2