這裏的另一種方法:
# write a simple function
similarity <- function(string){
sets <- gsub("\\+", ":", strsplit(string, ",")[[1]])
n <- as.numeric(tail(strsplit(gsub("[[:punct:]]", "", string), "")[[1]], 1))
mat <- mat.or.vec(n, n)
ind <- suppressWarnings(lapply(sets, function(x) eval(parse(text=x))))
for(i in 1:length(ind)){
mat[ind[[i]], ind[[i]]] <- 1
}
return(mat)
}
# Use that function
> similarity("1+2,3,4+5+6")
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 1 0 0 0 0
[2,] 1 1 0 0 0 0
[3,] 0 0 1 0 0 0
[4,] 0 0 0 1 1 1
[5,] 0 0 0 1 1 1
[6,] 0 0 0 1 1 1
# Using other string
> similarity("1+2,3,5+6+7, 8")
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 1 1 0 0 0 0 0 0
[2,] 1 1 0 0 0 0 0 0
[3,] 0 0 1 0 0 0 0 0
[4,] 0 0 0 0 0 0 0 0
[5,] 0 0 0 0 1 1 1 0
[6,] 0 0 0 0 1 1 1 0
[7,] 0 0 0 0 1 1 1 0
[8,] 0 0 0 0 0 0 0 1
我編輯的速度更快(即,刪除了不必要的'data.frame'轉換) –