2012-09-10 38 views
3

我一直在羣集我的一些值,然後對它們進行分組。然後我使用ggplot2繪製一些密度圖並覆蓋這些羣。示例圖像如下: enter image description hereggplot2中的顏色密度圖通過羣集組

對於羣集中的每個組,我繪製一個密度圖並覆蓋它們。密度圖中的着色對應於聚類中的分組。

我的問題是,我已經根據分組手動分割數據,並將它們放在它們自己的單獨文本表中(請參閱下面的代碼)。這是非常低效的,對於大數據集可能會非常繁瑣。我如何動態繪製ggplot2中的密度圖而不將它們分離到各自的文本表中?

原來的輸入表看起來像這樣它被分裂之前:

scores <- read.table(textConnection(" 
file  max  min  avg    lowest 
132   5112.0  6520.0  5728.0   5699.0 
133   4720.0  6064.0  5299.0   5277.0 
5   4617.0  5936.0  5185.0   5165.0 
1   4384.0  5613.0  4917.0   4895.0 
1010  5008.0  6291.0  5591.0   5545.0 
104   4329.0  5554.0  4858.0   4838.0 
105   4636.0  5905.0  5193.0   5165.0 
35   4304.0  5578.0  4842.0   4831.0 
36   4360.0  5580.0  4891.0   4867.0 
37   4444.0  5663.0  4979.0   4952.0 
31   4328.0  5559.0  4858.0   4839.0 
39   4486.0  5736.0  5031.0   5006.0 
32   4334.0  5558.0  4864.0   4843.0 
"), header=TRUE) 

我用來生成劇情代碼: 請注意結合基本圖形與電網仍無法正常工作

library(ggplot2) 
library(grid) 

layout(matrix(c(1,2,3,1,4,5), 2, 3, byrow = TRUE)) 

# define function to create multi-plot setup (nrow, ncol) 
vp.setup <- function(x,y){ 
grid.newpage() 
pushViewport(viewport(layout = grid.layout(x,y))) 
} 

# define function to easily access layout (row, col) 
vp.layout <- function(x,y){ 
viewport(layout.pos.row=x, layout.pos.col=y) 
} 

vp.setup(2,3) 

file_vals <- read.table(textConnection(" 
file  avg_vals 
133   1.5923 
132   1.6351 
1010  1.6532 
104   1.6824 
105   1.6087 
39   1.8694 
32   1.9934 
31   1.9919 
37   1.8638 
36   1.9691 
35   1.9802 
1   1.7283 
5   1.7637 
"), header=TRUE) 

red <- read.table(textConnection(" 
file  max  min  avg    lowest 
31   4328.0  5559.0  4858.0   4839.0 
32   4334.0  5558.0  4864.0   4843.0 
36   4360.0  5580.0  4891.0   4867.0 
35   4304.0  5578.0  4842.0   4831.0 
"), header=TRUE) 

blue <- read.table(textConnection(" 
file  max  min  avg    lowest 
133   4720.0  6064.0  5299.0   5277.0 
105   4636.0  5905.0  5193.0   5165.0 
104   4329.0  5554.0  4858.0   4838.0 
132   5112.0  6520.0  5728.0   5699.0 
1010  5008.0  6291.0  5591.0   5545.0 
"), header=TRUE) 

green <- read.table(textConnection(" 
file  max  min  avg    lowest 
39   4486.0  5736.0  5031.0   5006.0 
37   4444.0  5663.0  4979.0   4952.0 
5   4617.0  5936.0  5185.0   5165.0 
1   4384.0  5613.0  4917.0   4895.0 
"), header=TRUE) 


# Perform Cluster 
d <- dist(file_vals$avg_vals, method = "euclidean") 
fit <- hclust(d, method="ward") 
plot(fit, labels=file_vals$file) 
groups <- cutree(fit, k=3) 

cols = c('red', 'blue', 'green', 'purple', 'orange', 'magenta', 'brown', 'chartreuse4','darkgray','cyan1') 
rect.hclust(fit, k=3, border=cols) 


# Desnity plots 
dat = rbind(data.frame(Cluster='Red', max_vals = red$max), data.frame(Cluster='Blue', max_vals = blue$max), data.frame(Cluster='Green', max_vals = green$max)) 
max = (ggplot(dat,aes(x=max_vals))) 
max = max + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(3500, 5500)) + scale_fill_manual(values=c("red",'blue',"green")) 
max = max + labs(fill = 'Clusters') 
print(max, vp=vp.layout(1,2)) 

dat = rbind(data.frame(Cluster='Red', min_vals = red$min), data.frame(Cluster='Blue', min_vals = blue$min), data.frame(Cluster='Green', min_vals = green$min)) 
min = (ggplot(dat,aes(x=min_vals))) 
min = min + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(5000, 7000)) + scale_fill_manual(values=c("red",'blue',"green")) 
min = min + labs(fill = 'Clusters') 
print(min, vp=vp.layout(1,3)) 

dat = rbind(data.frame(Cluster='Red', avg_vals = red$avg), data.frame(Cluster='Blue', avg_vals = blue$avg), data.frame(Cluster='Green', avg_vals = green$avg)) 
avg = (ggplot(dat,aes(x=avg_vals))) 
avg = avg + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(4000, 6000)) + scale_fill_manual(values=c("red",'blue',"green")) 
avg = avg + labs(fill = 'Clusters') 
print(avg, vp=vp.layout(2,2)) 

dat = rbind(data.frame(Cluster='Red', lowest_vals = red$lowest), data.frame(Cluster='Blue', lowest_vals = blue$lowest), data.frame(Cluster='Green', lowest_vals = green$lowest)) 
lowest = (ggplot(dat,aes(x=lowest_vals))) 
lowest = lowest + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(4000, 6000)) + scale_fill_manual(values=c("red",'blue',"green")) 
lowest = lowest + labs(fill = 'Clusters') 
print(lowest, vp=vp.layout(2,3)) 

回答

1

通過這種方式,您可以使用4個面板自動創建所需的繪圖。

首先,數據:

scores <- read.table(textConnection(" 
file  max  min  avg    lowest 
132   5112.0  6520.0  5728.0   5699.0 
133   4720.0  6064.0  5299.0   5277.0 
5   4617.0  5936.0  5185.0   5165.0 
1   4384.0  5613.0  4917.0   4895.0 
1010  5008.0  6291.0  5591.0   5545.0 
104   4329.0  5554.0  4858.0   4838.0 
105   4636.0  5905.0  5193.0   5165.0 
35   4304.0  5578.0  4842.0   4831.0 
36   4360.0  5580.0  4891.0   4867.0 
37   4444.0  5663.0  4979.0   4952.0 
31   4328.0  5559.0  4858.0   4839.0 
39   4486.0  5736.0  5031.0   5006.0 
32   4334.0  5558.0  4864.0   4843.0 
"), header=TRUE) 

file_vals <- read.table(textConnection(" 
file  avg_vals 
            133   1.5923 
            132   1.6351 
            1010  1.6532 
            104   1.6824 
            105   1.6087 
            39   1.8694 
            32   1.9934 
            31   1.9919 
            37   1.8638 
            36   1.9691 
            35   1.9802 
            1   1.7283 
            5   1.7637 
            "), header=TRUE) 

兩個數據幀可以被合併成一個單一的一個:

dat <- merge(scores, file_vals, by = "file") 

擬合:

d <- dist(dat$avg_vals, method = "euclidean") 
fit <- hclust(d, method="ward") 
groups <- cutree(fit, k=3) 
cols <- c('red', 'blue', 'green', 'purple', 'orange', 'magenta', 'brown', 'chartreuse4','darkgray','cyan1') 

添加列與顏色名(基於擬合):

dat$group <- cols[groups] 

重塑從廣角到長格式的數據:

dat_re <- reshape(dat, varying = c("max", "min", "avg", "lowest"), direction = "long", drop = c("file", "avg_vals"), v.names = "value", idvar = "group", times = c("max", "min", "avg", "lowest"), new.row.names = seq(nrow(scores) * 4)) 

簡介:

p <- (ggplot(dat_re ,aes(x = value))) + 
geom_density(aes(fill = group), alpha=.3) + 
scale_fill_manual(values=cols) + 
labs(fill = 'Clusters') + 
facet_wrap(~ time) 

print(p) 

enter image description here

+0

感謝您的回答,但我要如何擴展和x軸添加到圖?這些圖不一定適用於相同的x軸比例。 – Harpal

+0

您可以使用'facet_wrap'的另一個參數,以允許基於每個面板中數據範圍的x軸範圍:'facet_wrap(〜time,scales =「free_x」)'。請注意,使用ggplot2不能爲面板指定不同的x軸。如果所有面板都具有相同的x軸就足夠了,則可以將以下內容添加到您的圖中:'+ coord_cartesian(xlim = c(3500,7000))'。 –