2012-12-08 76 views
0

我試圖組合兩個表並通過兩個表之間的組提取值。按R中的組合並兩個表

我有TEST1表:

structure(list(Similarity = c(999L, 888L, 756L, 879L, 567L, 567L), Peak = c(797L, 833L,999L, 798L, 834L, 444L), Name = structure(c(2L, 5L, 6L, 1L, 3L, 4L), .Label = c("Benzene", "Cyclopentane", "Hexadecane", "Nafhtalene", "Tetradecene", "dodecadiene"), class = "factor"),Sample1 = c(22237L, 86032349L, NA, NA, NA, 3333L), Sample2 = c(444567L, 
NA, NA, NA, 115127L, 22222L)), .Names = c("Similarity", "Peak", "Name", "Sample1", "Sample2"), class = "data.frame", row.names = c(NA, -6L)) 

和TEST2表:

structure(list(Similarity = c(757L, 859L, 999L, 879L, 577L), 
Peak = c(798L, 797L, 999L, 834L, 833L), Name = structure(c(1L, 2L, 5L, 3L, 4L), .Label = c("Benzene", "Cyclopentane", "Hexadecane","Tetradecene", "dodecadiene"), class ="factor"),Sample3 = c(NA, 115127L, NA, NA, 86032349L), Sample4 = c(NA, 43359706L, NA,115127L, NA)),.Names = c("Similarity", "Peak", "Name", "Sample3", "Sample4"), class = "data.frame",row.names = c(NA, -5L)) 

當我結合這兩個表:

cbind(test1,test2) 

structure(list(Row.names = structure(c("1", "2", "3", "4", "5"), class ="AsIs"),Similarity.x = c(999L, 888L, 756L, 879L, 567L), Peak.x = c(797L, 833L, 999L, 798L, 834L), Name.x = structure(c(2L, 5L, 6L, 1L, 3L),.Label=c("Benzene","Cyclopentane","Hexadecane", "Nafhtalene", "Tetradecene", "dodecadiene"), class = "factor"), Sample1 = c(22237L, 86032349L, NA, NA, NA), Sample2 = c(444567L, NA, NA, NA, 115127L), Similarity.y = c(757L, 859L, 999L,879L, 577L), Peak.y = c(798L, 797L, 999L, 834L, 833L), Name.y = structure(c(1L,2L, 5L, 3L, 4L), .Label = c("Benzene", "Cyclopentane", "Hexadecane", "Tetradecene", "dodecadiene"), class = "factor"), Sample3 = c(NA, 115127L, NA, NA, 86032349L), Sample4 = c(NA, 43359706L, NA,115127L, NA)), .Names = c("Row.names", "Similarity.x", "Peak.x","Name.x", "Sample1", "Sample2", "Similarity.y", "Peak.y", "Name.y","Sample3", "Sample4"), row.names = c(NA, -5L), class = "data.frame") 

我需要的行具有相同名稱合併維護對應的樣本並提取最大相似度。

是這樣的:

structure(list(Similarity = c(757L, 859L, 999L, 879L, 577L, 567L), Peak = c(798L, 797L, 999L, 834L, 833L, 444L), Name = structure(c(1L, 2L, 6L, 3L, 5L, 4L), .Label = c("Benzene", "Cyclopentane", "Hexadecane","Nafhtalene", "Tetradecene", "dodecadiene"), class = "factor"), Sample1 = c(NA, 22237L, NA, NA, 86032349L, 3333L), Sample2 = c(NA,444567L, NA, 115127L, NA, 22222L), Sample3 = c(NA, 115127L,NA, NA, 86032349L, NA), Sample4 = c(NA, 43359706L, NA, 115127L,NA, NA)), .Names = c("Similarity", "Peak", "Name", "Sample1", "Sample2", "Sample3", "Sample4"), class ="data.frame", row.names = c(NA,-6L)) 

有什麼建議?

回答

1
> x <- merge(test1, test2, by='Name', all=TRUE) 
> x$Similarity <- pmax(x$Similarity.x, x$Similarity.y, na.rm=TRUE) 
> x$Peak <- x$Peak.x 

> x[c('Name','Similarity', 'Peak', 'Sample1', 'Sample2', 'Sample3', 'Sample4')] 
      Name Similarity Peak Sample1 Sample2 Sample3 Sample4 
1  Benzene  879 798  NA  NA  NA  NA 
2 Cyclopentane  999 797 22237 444567 115127 43359706 
3 Hexadecane  879 834  NA 115127  NA 115127 
4 Nafhtalene  567 444  3333 22222  NA  NA 
5 Tetradecene  888 833 86032349  NA 86032349  NA 
6 dodecadiene  999 999  NA  NA  NA  NA 
+0

謝謝馬修! – Francisco