2014-02-11 45 views
4

我目前正在嘗試使用ggplot2製作一個'熱圖'來顯示一系列p值,但無法弄清楚如何定製實際的顏色分配和傳說。ggplot格式的顏色和圖例geom_tile的p值

sampledata.m <- melt(sampledata) 
sampledata.m$var2 <- as.character(sampledata.m$var2) 
sampledata.m$var2 <- factor(sampledata.m$var2, levels=unique(sampledata.m$var2),ordered=TRUE) 
sampledata.m$var1 <- as.character(sampledata.m$var1) 
sampledata.m$var1 <- factor(sampledata.m$var1, levels=unique(sampledata.m$var1),ordered=TRUE) 

這樣做是爲了保持我的變量順序。

p <- ggplot(sampledata.m, aes(var2, var1)) + 
    geom_tile(aes(fill = value), colour = "transparent") +  
    scale_fill_gradientn(colours=c("light green","dark green", "black"), 
    values=rescale(c(0,0.0003,0.05,0.5,1)),limits=c(0,1))) 
p + theme_bw(base_size = base_size) + labs(x = "", y = "") + 
    scale_x_discrete(expand = c(0,0)) + 
    theme(legend.position = "bottom", axis.ticks = element_blank(), 
    axis.text.x = element_text(size = base_size * 0.8, angle = 310, 
    hjust = 0, colour = "black")) 

這將創建一個好看的情節,但是我的傳說,我的色彩漸變並不代表我分配的重新調整。如果這是一個簡單的解決方法,請原諒我的無知,但我現在只編寫了R約2周。理想情況下,我喜歡我的情節和傳說模仿類似於本文的配色方案和圖例標籤:http://www.ncbi.nlm.nih.gov/pubmed/22496159

structure(list(var1 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L 
), .Label = c("A", "B", "C", 
"D", "E"), class = "factor"), var2 = structure(c(1L, 
5L, 23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L, 
3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L, 23L, 18L, 9L, 
8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L, 3L, 19L, 16L, 
4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L, 23L, 18L, 9L, 8L, 14L, 12L, 
20L, 6L, 21L, 11L, 2L, 22L, 10L, 3L, 19L, 16L, 4L, 7L, 15L, 17L, 
13L, 24L, 1L, 5L, 23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L, 
2L, 22L, 10L, 3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L, 24L, 1L, 5L, 
23L, 18L, 9L, 8L, 14L, 12L, 20L, 6L, 21L, 11L, 2L, 22L, 10L, 
3L, 19L, 16L, 4L, 7L, 15L, 17L, 13L), .Label = c("1", "2", 
"3", "4", "5", "6", "7", "8", 
"9", "10", "11", "12", "13", "14", "15", 
"16", "17", "18", "19", "20", "21", 
"22", "23", "24"), class = "factor"), variable = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L), .Label = "pvalue", class = "factor"), 
value = c(0.810172671, 0.596026338, 0.076550169, 0.908670635, 
0.300418653, 0.051553286, 0.124196482, 0.601568833, 0.058431468, 
0.341726981, 0.876674726, 0.002698295, 0.812059425, 0.068199656, 
0.758383287, 0.60362134, 0.89265723, 0.246111936, 0.156348035, 
0.909574522, 0.020202377, 0.388843992, 0.769441835, 0.102272916, 
0.38895717, 0.882296525, 0.792438683, 0.000491393, 0.004233434, 
0.202424095, 0.426941568, 0.08520186, 0.763036306, 0.602828564, 
0.037278697, 0.121642743, 0.669123606, 0.974328438, 0.834329923, 
0.050413697, 0.078476666, 0.387647156, 0.000540422, 0.379576632, 
0.361428444, 0.502439758, 0.001326035, 0.027652693, 0.188885638, 
0.579244445, 0.471985778, 0.677458228, 0.119307242, 0.364857868, 
0.238260538, 0.53472206, 0.204344281, 0.291888993, 0.295809688, 
0.00029, 0.005476157, 0.960975822, 0.00029, 0.055915429, 
0.618284682, 0.040605253, 0.521649682, 0.421086546, 0.164333061, 
0.755528982, 0.306854182, 0.012832628, 0.270393143, 0.946675764, 
0.59227376, 0.112658388, 0.429091426, 0.01662083, 0.017342483, 
0.065817234, 0.012140224, 0.359828816, 0.031969725, 0.00029, 
0.14555102, 0.18865081, 0.00029, 0.064107531, 0.505257768, 
0.070224536, 0.017082975, 0.375864198, 0.00029, 0.104103689, 
0.898979883, 0.004879605, 0.003597954, 0.036722932, 0.849058218, 
0.00029, 0.003739938, 0.00029, 0.00029, 0.00029, 0.008179017, 
0.193870353, 0.460181712, 0.389475522, 0.00029, 0.8785017, 
0.070414642, 0.584977921, 0.990764677, 0.767253318, 0.002234906, 
0.051331823, 0.00446149, 0.234477639, 0.275139791)), .Names = c("var1", "var2", "variable", "value"), row.names = c(NA, -119L), class = "data.frame") 
+0

+1用於包含您的數據。一些建議:1 - 保持數據名稱相同(你有'sampledata.m',然後是'pdata.m' - 在編輯中修復); 2 - 包含所需的軟件包(您需要重塑2,以及縮放,以及ggplot2); 3 - 不包含數據中不存在的變量(在本例中爲'base_size') – alexwhan

回答

3

我不會進入所有的你已經得到了主題設置 - 據我瞭解,問題的關鍵是填充漸變的比例。您可以在scale_fill_gradient()與對數變換設置此:

p <- ggplot(sampledata.m, aes(var2, var1)) + 
    geom_tile(aes(fill = value), colour = "transparent") + 
    scale_fill_gradient(trans = "log", low = "light green", high = "black", 
    breaks = c(0, 0.001, 0.05, 0.5)) 

enter image description here

+0

感謝您提供提示和代碼建議。它工作出色。 – diaferiaj

+1

但是我也有類似的問題。我還使用scale_fill_gradient2()繪製關聯數據,其中藍色與低值相關,黑色中等,紅色表示高值。是否可以調整「中」值的定義,以便邊距更緊? – diaferiaj

0
dt <- data.frame(
    N=letters[5:11], 
    a=c(0.01,0.05,0.1,0.5,1,5,10), 
    b=c(10,20,50,100,200,1000,2000)) 
    dt.mlt <- melt(dt,variable.name="Cls",value.name="Val") 
    ggplot(dt.mlt,aes(x=N,y=Cls,fill=Val))+ 
    geom_tile()+ 
    scale_fill_gradient2(
     low="green",high="red",mid="black",trans="log",breaks=c(0,0.01,0.1,1,10,100,1000))+ 
    geom_text(data=dt.mlt,aes(x=N,y=Cls,label=Val)) 

enter image description here

但是,如果我添加了midpoint=10scale_fill_gradient2,畫面將變爲:

enter image description here