2015-08-27 28 views
1

我有一個數據集,看起來像這樣:řGGPLOT2 geom_area()不工作

頭():

      ViolDesc RESULTDTTM count cumsum 
1 Adequate Handwashing/Where/When/How 2007-03-05  1  1 
2 Adequate Handwashing/Where/When/How 2007-07-09  1  2 
3 Adequate Handwashing/Where/When/How 2007-07-16  1  3 
4 Adequate Handwashing/Where/When/How 2007-07-17  3  6 
5 Adequate Handwashing/Where/When/How 2007-07-23  3  9 
6 Adequate Handwashing/Where/When/How 2007-07-27  1  10 
7 Adequate Handwashing/Where/When/How 2007-07-30  1  11 
8 Adequate Handwashing/Where/When/How 2007-07-31  1  12 
9 Adequate Handwashing/Where/When/How 2007-08-07  1  13 
10 Adequate Handwashing/Where/When/How 2007-08-13  2  15 

dput()的50行頭樣品:

structure(list(ViolDesc = c("Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How", "Adequate Handwashing/Where/When/How", 
"Adequate Handwashing/Where/When/How"), RESULTDTTM = structure(c(1173052800, 
1183939200, 1184544000, 1184630400, 1185148800, 1185494400, 1185753600, 
1185840000, 1186444800, 1186963200, 1187049600, 1187136000, 1187654400, 
1187827200, 1187913600, 1188172800, 1188432000, 1188864000, 1188950400, 
1189036800, 1189468800, 1189641600, 1189987200, 1190246400, 1190332800, 
1190764800, 1190851200, 1190937600, 1191283200, 1191369600, 1191542400, 
1192406400, 1192492800, 1192665600, 1193011200, 1193270400, 1193702400, 
1193788800, 1193875200, 1193961600, 1194220800, 1194307200, 1194393600, 
1194480000, 1194566400, 1194912000, 1195084800, 1195171200, 1195430400, 
1195516800), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    count = c(1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
    1L, 3L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 1L, 3L, 1L, 1L, 1L, 2L, 1L, 2L, 3L, 2L, 2L, 
    2L, 1L, 1L, 3L, 3L, 1L, 1L, 2L), cumsum = c(1L, 2L, 3L, 6L, 
    9L, 10L, 11L, 12L, 13L, 15L, 17L, 18L, 19L, 22L, 23L, 24L, 
    27L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 
    40L, 42L, 43L, 44L, 47L, 48L, 49L, 50L, 52L, 53L, 55L, 58L, 
    60L, 62L, 64L, 65L, 66L, 69L, 72L, 73L, 74L, 76L)), .Names = c("ViolDesc", 
"RESULTDTTM", "count", "cumsum"), row.names = c(NA, 50L), class = "data.frame") 

dput()50行尾樣本:

structure(list(ViolDesc = c("Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's.", 
"Washing fruits and veg's.", "Washing fruits and veg's.", "Washing fruits and veg's." 
), RESULTDTTM = structure(c(1342656000, 1344556800, 1345680000, 
1350518400, 1350950400, 1351036800, 1354147200, 1360022400, 1360195200, 
1363305600, 1365638400, 1366070400, 1369872000, 1369958400, 1376265600, 
1376524800, 1383696000, 1385942400, 1388016000, 1389744000, 1396224000, 
1400803200, 14, 1403568000, 1405641600, 1406505600, 1407283200, 
1407801600, 1410739200, 1411603200, 1411689600, 1413849600, 1414368000, 
1415145600, 1416787200, 1417651200, 1418860800, 1420675200, 1424908800, 
1425254400, 1425427200, 1425600000, 1426118400, 1428451200, 1429056000, 
1430438400, 1430870400, 1433376000, 1438214400, 1438819200), class = c("POSIXct", 
"POSIXt"), tzone = "UTC"), count = c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L), cumsum = c(92L, 
93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 
105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 
116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L, 125L, 126L, 
127L, 128L, 129L, 130L, 131L, 132L, 134L, 135L, 136L, 137L, 138L, 
139L, 140L, 142L, 143L)), .Names = c("ViolDesc", "RESULTDTTM", 
"count", "cumsum"), row.names = 15178:15227, class = "data.frame") 

c列lasses:

> sapply(r, class) 
$ViolDesc 
[1] "character" 

$RESULTDTTM 
[1] "POSIXct" "POSIXt" 

$count 
[1] "integer" 

$cumsum 

[1] "integer" 

大約有15種不同ViolDesc秒。

如果我做ggplot(data=r, aes(x=RESULTDTTM, y=cumsum)) + geom_line(aes(color=ViolDesc)) %>% print

我得到:

enter image description here

我只需要這條線圖,成爲一個堆疊面積圖。然而,當我這樣做:ggplot(data=r, aes(x=RESULTDTTM, y=cumsum, fill=ViolDesc)) + geom_area() %>% print,我得到:

enter image description here

正如你所看到的,它成爲所有尖尖的,而不是在所有喜歡geom_line()。有人知道爲什麼嗎?我覺得代碼非常簡單,以至於數據框必定有問題?

+0

我做了隨機數據的一些嘗試,這在我看來,geom_area需要的y值各組每個x存在於數據集中,因爲否則它不知道如何堆疊。你的數據是這種情況嗎? – Heroka

+0

我的猜測是,你的X不符,您的不同因素之間,嘗試你能在哪裏使用'nrow(R)'闡述切換爲nrow(R) – erasmortg

+0

? –

回答

2

您的數據有x個值並不總是匹配。 ggplot假定你在每個時間點有一個缺失值的變量有0,但另一個有一個度量。

我們可以通過使用每個時間點上每個值的累積最大值對這些值進行插值來解決這個問題。

下面介紹如何使用dplyrtidyr來做到這一點。

先裝包:

library(tidyr) 
library(dplyr) 
library(ggplot2) 

然後在數據讀取和修復的日期格式:

z <- read.csv("sample.csv") 
z$RESULTDTTM <- as.POSIXct(z$RESULTDTTM, format = "%m/%d/%Y") 

現在,我們想使數據「寬」 - 一行每次點,填寫時間點與每一列的累計最高,然後使之「長」再次繪製:

z <- z %>% spread(ViolDesc, cumsum) %>% #this makes the data wide 
      group_by(RESULTDTTM) %>%  #this makes a group for each time point 
      summarise_each(funs(sum(., na.rm = TRUE)), -RESULTDTTM, -count) %>% 
      #this line grouped time points together by summing - some time points had two measures 
      mutate_each(funs(cummax(.)), -RESULTDTTM) %>% #this takes the cumulative maximum 
      gather(var, value, -RESULTDTTM) #this shapes the data back to long 

現在我們就可以繪製出來:

ggplot(data = z, aes(x = RESULTDTTM, y = value)) + 
     geom_area(aes(colour = var, fill = var), position = 'stack') 

這給了我這個圖: enter image description here

+0

有趣它給了我上面的第二個陰謀(即尖刺)..現在看看這個。另外你的ggplot版本是什麼? –

+0

我在這裏上傳的數據:http://www.sharecsv.com/s/581ce3a8b4b6eb6e51f6dc2ff999d499/sample.csv。數據框中沒有空數據,所以我正在尋找其他方法來找出問題所在。 '任何(is.na(r))' –

+0

這工作!你的回答遠遠超過我的頭(我從來沒有用過tidyr),但我能夠複製你的照片。非常感謝你,特別是對於及時的反饋。 –