test_data <- data.frame(mean=c(1.48, 1.59, 1.81,1.94),CI_lower=c(1.29,1.38,1.54, 1.62),CI_upper=c(1.56,1.84, 2.3, 2.59)) 

    mean CI_lower CI_upper 
1 1.48  1.29  1.56 
2 1.59  1.38  1.84 
3 1.81  1.54  2.30 
4 1.94  1.62  2.59 


final <- data.frame(mean=c(1.48, 1.59, 1.81,1.94),CI_lower=c(1.29, 1.38,1.54, 1.62),CI_upper=c(1.56,1.84, 2.3, 2.59),letters = c("a","ab","ab","b")) 

    mean CI_lower CI_upper letters 
1 1.48  1.29  1.56  a 
2 1.59  1.38  1.84  ab 
3 1.81  1.54  2.30  ab 
4 1.94  1.62  2.59  b 


same <- outer(test_data$CI_lower, test_data$CI_upper,"-") 
same <- same<0 
same <- lower.tri(same, diag = FALSE) & same 

same_ind <- which(same,arr.ind = T) 

groups <- as.list(as.numeric(rep(NA,nrow(test_data)))) 

for(i in 1:nrow(same_ind)){ 
    group_pos <- as.numeric(same_ind[i,]) 
    for(i2 in group_pos){ 
    groups[[i2]] <- c(groups[[i2]],i) 

letters_notation <- sapply(groups,function(x){ 
    x <- x[!is.na(x)] 
    x <- letters[x] 
    x <- paste0(x,collapse="") 


mean CI_lower CI_upper letters 
1 1.48  1.29  1.56  ab 
2 1.59  1.38  1.84  acd 
3 1.81  1.54  2.30  bce 
4 1.94  1.62  2.59  de 



不知道我理解所需的輸出 – 2014-12-04 12:40:18


我想還是這意味着什麼? 我需要的是c(「a」,「ab」,「ab」,「b」)。 這意味着前3個條目都有重疊的CI。最後3個具有重疊的配置項。雖然第一次和最後一次輸入不會影響。 – 2014-12-04 12:44:31


我有一個可能的非常有效的解決方案,但不完全是你想要的輸出,但你將能夠確定重疊的路徑,不知道是否發佈它 – 2014-12-04 13:03:59





test_data <- data.frame(mean=c(1.48, 1.59, 1.81,1.94),CI_lower=c(1.29,1.38,1.54, 1.62),CI_upper=c(1.56,1.84, 2.3, 2.59)) 

n <- nrow(test_data) 

g <- outer(test_data$CI_lower, test_data$CI_upper,"-") 
g <- !(g<0) 
g <- g + t(g) # not necessary, but make matrix symmetric 
g <- g!=1 
rownames(g) <- 1:n # change row names 
colnames(g) <- 1:n # change column names 

# Re-arrange data into an "edge list" for use in igraph (i.e. which groups are "connected") - Solution from "David Eisenstat"() 
same <- which(g==1) 
g2 <- data.frame(N1=((same-1) %% n) + 1, N2=((same-1) %/% n) + 1) 
g2 <- g2[order(g2[[1]]),] # Get rid of loops and ensure right naming of vertices 
g3 <- simplify(graph.data.frame(g2,directed = FALSE)) 

# Calcuate the maximal cliques - these are groupings where every node is connected to all others 
cliq <- maximal.cliques(g3) # Solution from "majom"() 
cliq2 <- lapply(cliq, as.numeric) 

# Reorder by level order - Solution from "MrFlick"() 
ml<-max(sapply(cliq, length)) 
reord <- do.call(order, data.frame(
      lapply(cliq2, function(x) c(sort(x), rep.int(0, ml-length(x)))) 
cliq <- cliq[reord] 

# Generate labels to factor levels 
lab.txt <- vector(mode="list", n) # empty list 
lab <- letters[seq(cliq)] # clique labels 
for(i in seq(cliq)){ # loop to concatenate clique labels 
    for(j in cliq[[i]]){ 
    lab.txt[[j]] <- paste0(lab.txt[[j]], lab[i]) 


[1] "a" "ab" "ab" "b" 

與當前igraph版本'reord < - ... '似乎不工作: 錯誤parse_op_args(...,what =「a vertex」,is_fun = is_igraph_vs,: 不是頂點序列 – ckluss 2015-07-12 10:24:02


不知道它是否是一種好方法,但可以用 修復 cliq2 < - lapply(cliq,as。)數字)' 'reord < - do.call(... cliq2 ...)' – ckluss 2015-07-12 12:27:27


似乎要做的伎倆。謝謝! – 2015-09-10 22:01:17



setkey(setDT(test_data), CI_lower, CI_upper) 
Overlaps <- foverlaps(test_data, test_data, type = "any", which = TRUE) ## returns overlap indices 
test_data[ , overlaps := Overlaps[, paste(letters[yid], collapse = ""), xid]$V1][] 
# mean CI_lower CI_upper overlaps 
# 1: 1.48  1.29  1.56  abc <~~ not overlapping with d 
# 2: 1.59  1.38  1.84  abcd 
# 3: 1.81  1.54  2.30  abcd 
# 4: 1.94  1.62  2.59  bcd <~~ not overlapping with a 

謝謝!這確實非常接近。你是否也有一個好的/有效的想法來清除冗餘?基本上我猜它應該總是有效,如果我刪除所有條目中存在的字母,然後重新編號剩下的從a,b,c等開始...... – 2014-12-04 13:25:52