2016-10-11 45 views
2

我想將其中一列中具有相同「基本」字符串的行進行分組/子集,並將它們繪製在一張圖上。將所有內容放在一個pdf文件中將會非常棒。每張圖都在pdf的單獨頁面上。在其中一列中具有相似字符串的子集行並將它們繪製在一起

數據:

structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `34` = c(0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 370500, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1091361.9, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1512409.6, 
0, 0, 0, 0, 0, 0), `59` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 4231358.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 5995680.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 2266775, 0, 0, 0, 0, 0, 0, 6864490.1, 0, 0, 
0, 0, 0, 0), `84` = c(0, 0, 0, 0, 1783350, 0, 0, 0, 1177650, 
0, 0, 0, 0, 0, 0, 0, 0, 4316664.7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 9262556.7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 2831286.1, 0, 0, 0, 0, 0, 0, 10643218.2, 
0, 0, 0, 0, 0, 0), `110` = c(0, 0, 0, 0, 1778743.3, 0, 0, 0, 
1465966.7, 0, 0, 0, 0, 0, 0, 0, 0, 3111700, 0, 0, 1955337.5, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5584784.4, 5584784.4, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3092525, 0, 
0, 0, 0, 0, 0, 7847143.8, 0, 0, 0, 0, 0, 0), `134` = c(0, 0, 
0, 0, 1121869.4, 0, 0, 0, 1439430.6, 0, 0, 0, 0, 0, 0, 0, 0, 
2854250, 0, 0, 0, 0, 0, 0, 914890, 0, 0, 847880, 0, 0, 0, 0, 
0, 0, 0, 8191800, 0, 0, 0, 0, 0, 0, 1830904.5, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 1650150, 0, 0, 837130, 0, 0, 0, 4925095.1, 0, 
0, 0, 0, 0, 0), `165` = c(0, 0, 0, 0, 1432775, 0, 0, 0, 1394186.1, 
0, 1120183.3, 0, 0, 0, 0, 0, 0, 2262421.7, 0, 0, 0, 615660, 0, 
0, 1292795.8, 0, 0, 712622.5, 0, 0, 0, 0, 0, 0, 0, 2683469.4, 
0, 0, 0, 0, 0, 0, 2318485.5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1561800, 
0, 0, 0, 0, 0, 0, 4382993.7, 0, 0, 763460, 0, 0, 0), `199` = c(0, 
0, 0, 0, 1314220, 0, 0, 0, 1439718.8, 0, 1929266.7, 0, 0, 0, 
1101800, 0, 0, 2759366.7, 0, 0, 0, 1291728.6, 0, 0, 2489775.6, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2858345.8, 0, 0, 0, 1819542.1, 
0, 0, 1497640.3, 0, 0, 0, 1300250, 0, 0, 0, 0, 0, 0, 1566875, 
0, 0, 0, 0, 0, 0, 4625895.6, 0, 0, 1308158.3, 0, 0, 0), `234` = c(1257250, 
0, 0, 0, 0, 0, 0, 0, 1276080, 0, 1848500, 0, 0, 0, 1529350, 0, 
0, 2155275, 0, 0, 0, 2023041.9, 0, 0, 1966447.7, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 1184200, 1184200, 0, 0, 1652350, 0, 0, 2018581.7, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1835225, 0, 0, 0, 0, 0, 0, 4639414.7, 
0, 0, 720715, 0, 0, 0), `257` = c(0, 0, 0, 0, 0, 669442.5, 0, 
0, 1253026.7, 0, 960410, 960410, 0, 0, 1258267.5, 0, 0, 1707392.5, 
0, 0, 0, 563280, 0, 0, 2403237.9, 0, 0, 0, 1044100, 0, 2075700, 
0, 0, 0, 0, 0, 5718450, 0, 0, 1704550, 0, 0, 1350286.9, 0, 0, 
0, 0, 2011700, 0, 0, 0, 0, 0, 1739500, 0, 0, 0, 0, 0, 0, 4612520.8, 
4612520.8, 0, 0, 0, 0, 0), `362` = c(0, 1593500, 0, 0, 0, 1610625.3, 
0, 0, 1234902.5, 0, 0, 1481036.8, 0, 0, 1583647.5, 0, 0, 1752089.2, 
0, 0, 0, 0, 0, 0, 2410809.2, 0, 0, 0, 654940, 0, 0, 0, 0, 0, 
0, 0, 7014905.6, 0, 0, 0, 0, 0, 1165672.1, 0, 0, 0, 0, 0, 0, 
0, 1029910, 0, 0, 2153087.5, 0, 0, 0, 422920, 0, 0, 0, 7495855.9, 
0, 0, 0, 0, 0), `433` = c(0, 0, 0, 0, 0, 1340283.9, 0, 0, 1268996.9, 
0, 0, 1416683.3, 0, 0, 1047862.5, 0, 0, 1819653.8, 0, 0, 0, 0, 
0, 0, 2227565.7, 0, 0, 0, 763765, 0, 0, 1595430, 0, 0, 0, 0, 
4894549, 0, 0, 0, 0, 0, 1061375.4, 0, 0, 0, 0, 0, 2251950, 0, 
1042130, 0, 0, 2055300, 0, 0, 0, 696278.3, 0, 0, 0, 5353797.8, 
0, 0, 0, 0, 0), `506` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2020300, 
2020300, 0, 0, 0, 0, 0, 0, 7681526, 0, 0, 0, 0, 0), `581` = c(0, 
0, 1749237.5, 0, 0, 0, 2421665.8, 0, 0, 1773262.5, 0, 0, 2251004.3, 
0, 0, 2570175, 0, 0, 3379756.9, 0, 0, 0, 2054455.6, 0, 0, 2518270.8, 
0, 0, 0, 0, 0, 0, 2917968.2, 0, 0, 0, 0, 7004350, 0, 0, 1451600, 
0, 0, 1394411, 0, 0, 0, 0, 0, 2507858.3, 0, 2377012.5, 0, 0, 
3719165.4, 0, 0, 0, 1472870.3, 0, 0, 9666916.1, 0, 0, 1730300, 
0, 0), `652` = c(0, 0, 476910, 476910, 0, 0, 1149078.8, 1149078.8, 
0, 1082468.7, 0, 0, 882769.7, 0, 0, 1370449.4, 1370449.4, 0, 
1529049, 1529049, 0, 0, 943632.2, 0, 0, 916587.8, 0, 0, 0, 988261.1, 
0, 0, 1778007.1, 1778007.1, 0, 0, 0, 3087304.8, 3087304.8, 0, 
782860, 782860, 0, 510158.5, 510158.5, 0, 0, 0, 0, 1503750, 0, 
1100677.5, 1100677.5, 0, 1669260, 1669260, 0, 0, 770733.2, 0, 
0, 4939242.8, 4939242.8, 0, 643564.4, 643564.4, 0), `733` = c(0, 
0, 0, 1095060, 0, 0, 0, 1674089.3, 0, 1252101.3, 0, 0, 1259111, 
0, 0, 0, 2429293.3, 0, 0, 2326928.3, 0, 0, 1259216.5, 0, 0, 1238837.5, 
0, 0, 0, 1224858.3, 0, 0, 0, 2952529.9, 0, 0, 0, 0, 4626414.7, 
0, 0, 1121440, 0, 0, 1025386.2, 0, 0, 0, 0, 1917900, 0, 0, 2197533.3, 
0, 0, 2840155.5, 0, 0, 1054285.7, 0, 0, 0, 7516814.2, 0, 0, 1329434.4, 
0), `818` = c(0, 0, 0, 720551.1, 0, 0, 0, 714662.7, 0, 617012.9, 
0, 0, 549850.8, 0, 0, 0, 1197460, 0, 0, 771979.2, 0, 0, 585847.5, 
585847.5, 0, 875475.4, 0, 0, 0, 576774, 0, 0, 0, 1147389.8, 0, 
0, 0, 0, 2292421.7, 0, 0, 755258.3, 0, 0, 0, 0, 0, 0, 0, 858930, 
0, 0, 1242668.3, 0, 0, 1580088.3, 0, 0, 641938.6, 641938.6, 0, 
0, 3838660.4, 0, 0, 733140.8, 733140.8), `896` = c(0, 0, 0, 590480, 
0, 0, 0, 817087.6, 0, 569869.5, 0, 0, 650822.5, 650822.5, 0, 
0, 1624052.5, 0, 0, 682570.8, 0, 0, 0, 1538800, 0, 690488.6, 
690488.6, 0, 0, 797923.9, 0, 0, 0, 1204889.3, 0, 0, 0, 0, 2184432.2, 
0, 0, 676654.7, 0, 0, 0, 210680, 0, 0, 0, 791152.5, 0, 0, 1599855.8, 
0, 0, 1358543.8, 0, 0, 0, 931288, 0, 0, 4683895.2, 0, 0, 0, 1202806 
), `972` = c(0, 0, 0, 799116.4, 0, 0, 0, 759169.9, 0, 408845, 
0, 0, 0, 948980, 0, 0, 968766.7, 0, 0, 675349.7, 0, 0, 0, 0, 
0, 0, 1811117.6, 0, 0, 609098.5, 0, 0, 0, 1073749.1, 0, 0, 0, 
0, 2392258.9, 0, 0, 743580, 0, 0, 0, 1020485, 0, 0, 0, 446596.7, 
0, 0, 1178583, 0, 0, 1438261.7, 0, 0, 0, 1133057.9, 0, 0, 4445814.7, 
0, 0, 0, 1057776.9), `1039` = c(0, 0, 0, 447255.3, 0, 0, 0, 609409.1, 
0, 304340, 0, 0, 0, 0, 0, 0, 694232.8, 0, 0, 473015.3, 0, 0, 
0, 0, 0, 0, 419524.9, 0, 0, 447760.6, 0, 0, 0, 932513.5, 0, 0, 
0, 0, 1251960.5, 0, 0, 276560, 0, 0, 0, 259640, 0, 0, 0, 354995, 
0, 0, 1570222.5, 0, 0, 1021822, 0, 0, 0, 811614, 0, 0, 2941698.2, 
0, 0, 0, 1199942.5), Gene = c("AT1G04170_1", "AT1G04170_2", "AT1G04170_3", 
"AT1G04170_4", "AT1G08520_1", "AT1G08520_2", "AT1G08520_3", "AT1G08520_4", 
"AT1G10670_1", "AT1G10670_2", "AT1G53500_1", "AT1G53500_2", "AT1G53500_3", 
"AT1G53500_4", "AT1G54270_1", "AT1G54270_2", "AT1G54270_3", "AT1G80480_1", 
"AT1G80480_2", "AT1G80480_3", "AT2G16950_1", "AT2G16950_2", "AT2G16950_3", 
"AT2G16950_4", "AT3G03960_1", "AT3G03960_2", "AT3G03960_3", "AT3G57290_1", 
"AT3G57290_2", "AT3G57290_3", "AT3G63460_1", "AT3G63460_2", "AT3G63460_3", 
"AT3G63460_4", "AT4G20890_1", "AT4G20890_2", "AT4G20890_3", "AT4G20890_4", 
"AT4G20890_5", "AT4G20980_1", "AT4G20980_2", "AT4G20980_3", "AT4G24190_1", 
"AT4G24190_2", "AT4G24190_3", "AT4G24190_4", "AT4G29670_1", "AT4G29670_2", 
"AT4G29670_3", "AT4G29670_4", "AT5G23740_1", "AT5G23740_2", "AT5G23740_3", 
"AT5G23860_1", "AT5G23860_2", "AT5G23860_3", "AT5G40450_1", "AT5G40450_2", 
"AT5G40450_3", "AT5G40450_4", "AT5G62700_1", "AT5G62700_2", "AT5G62700_3", 
"ATCG00780_1", "ATCG00780_2", "ATCG00780_3", "ATCG00780_4")), .Names = c("10", 
"34", "59", "84", "110", "134", "165", "199", "234", "257", "362", 
"433", "506", "581", "652", "733", "818", "896", "972", "1039", 
"Gene"), row.names = c("AT1G04170_1", "AT1G04170_2", "AT1G04170_3", 
"AT1G04170_4", "AT1G08520_1", "AT1G08520_2", "AT1G08520_3", "AT1G08520_4", 
"AT1G10670_1", "AT1G10670_2", "AT1G53500_1", "AT1G53500_2", "AT1G53500_3", 
"AT1G53500_4", "AT1G54270_1", "AT1G54270_2", "AT1G54270_3", "AT1G80480_1", 
"AT1G80480_2", "AT1G80480_3", "AT2G16950_1", "AT2G16950_2", "AT2G16950_3", 
"AT2G16950_4", "AT3G03960_1", "AT3G03960_2", "AT3G03960_3", "AT3G57290_1", 
"AT3G57290_2", "AT3G57290_3", "AT3G63460_1", "AT3G63460_2", "AT3G63460_3", 
"AT3G63460_4", "AT4G20890_1", "AT4G20890_2", "AT4G20890_3", "AT4G20890_4", 
"AT4G20890_5", "AT4G20980_1", "AT4G20980_2", "AT4G20980_3", "AT4G24190_1", 
"AT4G24190_2", "AT4G24190_3", "AT4G24190_4", "AT4G29670_1", "AT4G29670_2", 
"AT4G29670_3", "AT4G29670_4", "AT5G23740_1", "AT5G23740_2", "AT5G23740_3", 
"AT5G23860_1", "AT5G23860_2", "AT5G23860_3", "AT5G40450_1", "AT5G40450_2", 
"AT5G40450_3", "AT5G40450_4", "AT5G62700_1", "AT5G62700_2", "AT5G62700_3", 
"ATCG00780_1", "ATCG00780_2", "ATCG00780_3", "ATCG00780_4"), class = "data.frame") 

我想用同樣的「基礎」,這意味着破折號前串子集行。

我試圖結合subsetgrep函數,但它只適用於我手動給字符串。對字符串進行字符串處理將會做很多工作。

+1

是的,它的作用。這正是我需要的。謝謝! –

回答

3

你可以gather()您的數據爲長格式,separate()Gene列到labelnumber,併爲更好的情節美觀,使用na_if()替換NA S中的0值在y列,並使用指定x軸的順序factor()

library(dplyr) 
library(tidyr) 
long_df <- df %>% 
    gather(x, y, -Gene) %>% 
    separate(Gene, into = c("label", "number")) %>% 
    mutate(y = na_if(y, 0), 
     x = factor(x, levels = unique(x))) 

然後,基於this answer,您可以創建情節p和使用dplyr的do()或purrr的通過迭代組

library(ggplot2) 
p = ggplot(data = long_df, aes(x = x, y = y, color = number)) + geom_point() 

# Using dplyr's do() 
res <- long_df %>% 
    group_by(label) %>% 
    do(plots = p %+% . + facet_wrap(~label)) 

# Using purrr's by_slice() 
library(purrr) 
res <- long_df %>% 
    slice_rows("label") %>% 
    by_slice(~(p %+% . + facet_wrap(~label)), .to = "plots") 

其中給出:

#Source: local data frame [19 x 2] 
#Groups: <by row> 
# 
## A tibble: 19 × 2 
#  label plots 
#*  <chr> <list> 
#1 AT1G04170 <S3: gg> 
#2 AT1G08520 <S3: gg> 
#3 AT1G10670 <S3: gg> 
#4 AT1G53500 <S3: gg> 
#5 AT1G54270 <S3: gg> 
#6 AT1G80480 <S3: gg> 
#7 AT2G16950 <S3: gg> 
#8 AT3G03960 <S3: gg> 
#9 AT3G57290 <S3: gg> 
#10 AT3G63460 <S3: gg> 
#11 AT4G20890 <S3: gg> 
#12 AT4G20980 <S3: gg> 
#13 AT4G24190 <S3: gg> 
#14 AT4G29670 <S3: gg> 
#15 AT5G23740 <S3: gg> 
#16 AT5G23860 <S3: gg> 
#17 AT5G40450 <S3: gg> 
#18 AT5G62700 <S3: gg> 
#19 ATCG00780 <S3: gg> 

然後,您可以訪問每一個情節,例如:

res$plots[1] 

其中給出:

enter image description here


對所有地塊保存到一個PDF格式,簡單地做:

相關問題