我想找出對這兩個表之間的重疊:提取overlaping行對來自DF兩列
> dput(data1)
structure(list(Name.x = c("MDH1", "MDH1", "IDH2", "IDH2", "IDH2",
"IDH2", "IDH2", "IDH2", "IDH2", "SCOALB", "SCOALB", "CSY4", "CSY4",
"CSY4", "CSY4", "CSY4", "FUM1", "FUM1", "IDH6", "IDH6", "IDH6",
"ODC1-1", "ODC1-1", "ODC1-1", "ODC1-1", "ODC1-1", "ODC2-1", "ODC2-1",
"ODC2-1", "ACO2", "IDH1", "IDH1", "IDH1", "IDH1", "ODC2-2"),
Name.y = c("SCOALB", "SCOALA-1", "CSY4", "IDH6", "ODC1-1",
"ODC2-1", "IDH1", "ODC2-2", "ODC1-2", "SCOALA-1", "SCOALA-2",
"IDH6", "SDH2-1", "IDH1", "IDH5", "ICDH", "ODC1-1", "ODC1-2",
"ACO2", "IDH1", "IDH5", "ODC2-1", "IDH1", "IDH5", "ODC2-2",
"ODC1-2", "IDH1", "ODC2-2", "ODC1-2", "IDH1", "IDH5", "SCOALA-2",
"ODC2-2", "ODC1-2", "ODC1-2")), .Names = c("Name.x", "Name.y"
), class = "data.frame", row.names = c(NA, -35L))
> dput(data2)
structure(list(Protein1 = structure(c(3L, 7L, 18L, 19L, 7L, 19L,
6L, 18L, 6L, 18L, 18L, 19L, 9L, 8L, 19L, 18L, 9L, 7L, 18L, 12L,
8L, 19L, 5L, 29L, 12L, 29L, 12L, 18L, 7L, 17L, 6L, 5L, 9L, 19L,
12L, 3L, 19L, 16L, 18L, 17L, 16L, 17L, 9L, 29L, 12L, 7L, 29L,
18L, 16L, 18L, 29L, 8L, 17L, 16L, 17L, 12L, 6L, 8L, 17L, 29L,
9L, 17L, 29L, 19L, 8L, 17L, 29L, 9L, 9L, 16L, 29L, 29L, 19L,
19L, 19L, 29L, 12L, 19L, 17L, 29L, 17L, 16L, 16L, 19L, 16L, 4L,
1L, 5L, 17L, 9L, 18L, 18L, 6L, 4L, 8L, 16L, 16L, 29L, 7L, 12L,
8L, 4L, 29L, 12L, 5L), .Label = c("ACO2", "ACO3", "CSY4", "FUM1",
"ICDH", "IDH1", "IDH2", "IDH5", "IDH6", "LPD1", "LPD2", "MDH1",
"MDH2", "ME1", "ME2", "ODC1-1", "ODC1-2", "ODC2-1", "ODC2-2",
"PDC1a-1", "PDC1a-2", "PDC1b", "PDC2-1", "PDC2-2", "SCoALa-1",
"SCoALa-2", "SCoALb", "SDH1-1", "SDH2-1", "SDH2-2", "SDH2-3",
"SDH3-1", "SDH4", "SDH5", "SDH6", "SDH7a", "SDH7b", "SDH8"), class = "factor"),
Protein2 = structure(c(1L, 6L, 7L, 17L, 1L, 16L, 3L, 9L,
1L, 5L, 17L, 9L, 8L, 7L, 18L, 18L, 5L, 3L, 16L, 3L, 5L, 8L,
4L, 7L, 5L, 3L, 6L, 6L, 5L, 3L, 5L, 3L, 3L, 6L, 7L, 3L, 7L,
9L, 1L, 8L, 5L, 16L, 7L, 6L, 4L, 7L, 4L, 3L, 3L, 12L, 1L,
1L, 9L, 7L, 7L, 9L, 6L, 6L, 5L, 8L, 1L, 17L, 29L, 3L, 8L,
6L, 9L, 9L, 6L, 12L, 5L, 19L, 12L, 5L, 1L, 16L, 1L, 19L,
4L, 18L, 12L, 1L, 4L, 4L, 6L, 3L, 1L, 1L, 1L, 4L, 4L, 8L,
4L, 1L, 3L, 8L, 16L, 12L, 4L, 12L, 4L, 4L, 17L, 8L, 5L), .Label = c("ACO2",
"ACO3", "CSY4", "FUM1", "ICDH", "IDH1", "IDH2", "IDH5", "IDH6",
"LPD1", "LPD2", "MDH1", "MDH2", "ME1", "ME2", "ODC1-1", "ODC1-2",
"ODC2-1", "ODC2-2", "PDC1a-1", "PDC1a-2", "PDC1b", "PDC2-1",
"PDC2-2", "SCoALa-1", "SCoALa-2", "SCoALb", "SDH1-1", "SDH2-1",
"SDH2-2", "SDH2-3", "SDH3-1", "SDH4", "SDH5", "SDH6", "SDH7a",
"SDH7b", "SDH8"), class = "factor")), .Names = c("Protein1",
"Protein2"), class = "data.frame", row.names = c(1L, 4L, 6L,
12L, 22L, 25L, 28L, 33L, 44L, 48L, 51L, 52L, 53L, 60L, 68L, 70L,
72L, 76L, 86L, 109L, 110L, 119L, 133L, 144L, 146L, 158L, 170L,
197L, 202L, 206L, 211L, 213L, 226L, 227L, 237L, 271L, 272L, 286L,
290L, 297L, 304L, 305L, 306L, 319L, 323L, 327L, 347L, 348L, 351L,
357L, 370L, 372L, 373L, 378L, 379L, 392L, 406L, 410L, 414L, 417L,
419L, 437L, 442L, 445L, 448L, 455L, 457L, 462L, 471L, 479L, 482L,
483L, 488L, 503L, 509L, 522L, 536L, 563L, 618L, 620L, 623L, 628L,
630L, 644L, 647L, 666L, 668L, 673L, 676L, 678L, 679L, 690L, 691L,
694L, 698L, 703L, 709L, 714L, 715L, 722L, 723L, 724L, 727L, 739L,
740L))
在每個df
有哪些存儲字符串兩列。表之間的字符串重疊。但是,成對之間的順序可能不同。來自這對的一個字符串可能在data1
的第一列中找到,並在第二列data2
中找到。如何找到數據集之間有多少對和多少重疊?
分享所需要的輸出 – RUser
你的意思是匹配:'一,B'用'C,了'? – zx8754