宥
可以使用i
,j
,x
符號的sparseMatrix
library(Matrix)
# data
set.seed(1)
# Changed the dim size to fit in my laptop memory
nc=10
nr=100
n=5
df <- data.frame(replicate(n,sample(0:1,nr,rep = TRUE)))
var_nums <- sample(1:nc,size = n)
#Yours
x <- Matrix(0, nrow = nr, ncol = nc, sparse = TRUE)
for (i in 1:n){
x[,var_nums[i]] <- df[,i]
}
# new version
i = ((which(df==1)-1) %% nr) +1
j = rep(var_nums, times=colSums(df))
y = sparseMatrix(i=i, j=j, x=1, dims=c(nrow(df), nc))
all.equal(x, y, check.attributes=FALSE)
比較速度
f1 <- function(){
for (i in 1:n){
x[,var_nums[i]] <- df[,i]
}
x
}
f2 <- function(){
i = ((which(df==1)-1) %% nr) +1
j = rep(var_nums, times=colSums(df))
y = sparseMatrix(i=i, j=j, x=1, dims=c(nrow(df), nc))
y
}
microbenchmark::microbenchmark(f1(), f2())
Unit: milliseconds
expr min lq mean median uq max neval cld
f1() 4.594229 4.694205 5.010071 4.770475 4.891649 12.666554 100 b
f2() 1.274745 1.298663 1.464237 1.329534 1.392146 7.153076 100 a
嘗試更大
nc=100
nr=10000
n=50
set.seed(1)
df <- data.frame(replicate(n,sample(0:1,nr,rep = TRUE)))
var_nums <- sample(1:nc,size = n)
x <- Matrix(0, nrow = nr, ncol = nc, sparse = TRUE)
all.equal(f1(), f2(), check.attributes=FALSE)
microbenchmark::microbenchmark(f1(), f2(), times=1)
Unit: milliseconds
expr min lq mean median uq max neval
f1() 21605.60251 21605.60251 21605.60251 21605.60251 21605.60251 21605.60251 1
f2() 60.87275 60.87275 60.87275 60.87275 60.87275 60.87275 1