2013-05-17 53 views
1

我有以下data_frame結構已從csv文件(附加)讀取。基本上,這對每個運營商(A M D L J)總結他們的得分是優秀的,好的,好的,差的還是可怕的。其他領域的日期和得分手(我計劃稍後使用,但目前不需要)。重塑/重塑數據框來創建規範化的條形圖和餅圖

我正在努力的是如何減少這種數據的格式,使我可以繪製一個條形圖(標準化除以每個操作員的總數)和條形圖。我該如何將這個數據框減少到像下面這樣允許我使用geom_bar。

Operator Score Count 
A  Good 11 
A  Poor 5 
A  Ok 3 
A  Terrible 0 
A  Excellent 0 
D  Good 36 
D  Poor 50 
D  Ok 10 
D  Terrible 1 
D  Excellent 0 

我知道我可以根據運營商子初始數據幀,然後從總結

dfA = subset(df, Operator=='A') 
summary(dfA) 

得到的數字,但我想這個過程自動化(即自動重塑數據幀到上面的結構,我可以使用ggplot2來顯示結果)。但是,我不知道從哪裏開始解決這個問題

structure(list(Operator = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 
3L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 5L, 2L, 2L, 2L, 
2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 
4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 1L, 1L, 1L, 5L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 5L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 2L, 
2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 
3L, 3L, 1L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 2L, 4L, 4L, 4L, 4L, 
3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 2L, 2L, 2L, 
2L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 
3L, 5L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 
3L, 3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 2L, 2L, 4L, 4L, 4L, 
4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 5L, 2L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L), .Label = c("A", "D", "J", "L", "M"), class = "factor"), 
    ROI_Score = structure(c(3L, 1L, 1L, 2L, 1L, 3L, 1L, 3L, 3L, 
    2L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 
    3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 
    1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 2L, 3L, 1L, 1L, 1L, 3L, 3L, 
    3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 
    3L, 1L, 1L, 1L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 1L, 1L, 3L, 3L, 
    1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 
    1L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 
    3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 
    1L, 3L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 
    1L, 3L, 1L, 3L, 1L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 1L, 2L, 
    1L, 3L, 2L, 3L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 3L, 
    3L, 1L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 
    3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 
    1L, 1L, 1L, 1L, 2L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 1L, 2L, 3L, 
    3L, 3L, 3L, 3L, 3L, 2L, 1L, 1L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 
    1L, 1L, 3L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 
    1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 1L, 1L, 3L, 1L, 2L, 3L, 1L, 
    3L, 3L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 3L, 3L, 
    3L, 2L, 3L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 
    4L, 3L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 
    3L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 1L, 3L, 1L, 1L, 2L, 
    3L, 1L, 1L, 1L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 
    1L, 1L, 2L, 3L, 3L, 3L, 1L, 3L, 3L, 2L, 1L, 3L, 3L, 3L, 1L, 
    2L, 3L, 3L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 2L, 
    2L, 3L, 1L, 3L, 1L, 3L, 2L, 1L, 1L, 3L, 3L, 1L, 3L, 3L, 2L, 
    3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L), .Label = c("Good", 
    "OK", "Poor", "Terrible"), class = "factor"), Date = structure(c(3L, 
    3L, 5L, 5L, 5L, 7L, 3L, 3L, 9L, 9L, 9L, 11L, 11L, 3L, 3L, 
    5L, 5L, 5L, 7L, 7L, 7L, 11L, 11L, 11L, 3L, 15L, 15L, 21L, 
    13L, 17L, 17L, 19L, 21L, 13L, 13L, 13L, 15L, 15L, 17L, 17L, 
    17L, 19L, 19L, 19L, 21L, 21L, 30L, 30L, 23L, 25L, 25L, 25L, 
    27L, 27L, 27L, 29L, 29L, 29L, 23L, 23L, 25L, 25L, 25L, 27L, 
    27L, 27L, 30L, 30L, 30L, 30L, 30L, 32L, 32L, 36L, 2L, 36L, 
    36L, 36L, 39L, 39L, 34L, 34L, 34L, 36L, 36L, 36L, 39L, 39L, 
    2L, 2L, 32L, 34L, 34L, 36L, 41L, 41L, 41L, 43L, 1L, 38L, 
    38L, 41L, 42L, 43L, 38L, 38L, 41L, 41L, 41L, 42L, 42L, 42L, 
    43L, 43L, 1L, 1L, 1L, 38L, 42L, 42L, 42L, 42L, 1L, 1L, 1L, 
    3L, 3L, 7L, 3L, 3L, 3L, 5L, 7L, 11L, 3L, 3L, 3L, 3L, 5L, 
    5L, 5L, 7L, 7L, 7L, 9L, 9L, 11L, 11L, 11L, 13L, 15L, 17L, 
    19L, 19L, 21L, 21L, 13L, 21L, 13L, 13L, 13L, 15L, 17L, 17L, 
    17L, 19L, 19L, 21L, 21L, 21L, 29L, 29L, 29L, 30L, 23L, 25L, 
    29L, 29L, 23L, 23L, 23L, 25L, 25L, 25L, 27L, 27L, 30L, 30L, 
    30L, 32L, 32L, 32L, 2L, 2L, 39L, 39L, 32L, 32L, 32L, 34L, 
    34L, 34L, 36L, 36L, 2L, 2L, 2L, 43L, 1L, 38L, 41L, 41L, 42L, 
    42L, 42L, 43L, 43L, 1L, 1L, 43L, 1L, 42L, 1L, 1L, 1L, 32L, 
    32L, 36L, 2L, 36L, 36L, 36L, 39L, 39L, 34L, 34L, 34L, 36L, 
    36L, 36L, 39L, 39L, 2L, 2L, 32L, 34L, 34L, 36L, 10L, 4L, 
    6L, 6L, 10L, 10L, 10L, 12L, 4L, 4L, 12L, 12L, 6L, 6L, 6L, 
    8L, 8L, 8L, 12L, 12L, 14L, 16L, 14L, 14L, 18L, 20L, 14L, 
    18L, 18L, 18L, 14L, 14L, 14L, 16L, 16L, 16L, 22L, 22L, 22L, 
    28L, 28L, 31L, 28L, 28L, 28L, 31L, 31L, 31L, 33L, 33L, 33L, 
    35L, 35L, 35L, 37L, 37L, 37L, 33L, 33L, 33L, 35L, 37L, 37L, 
    40L, 40L, 32L, 32L, 32L, 2L, 2L, 39L, 39L, 32L, 32L, 32L, 
    34L, 34L, 34L, 36L, 36L, 2L, 2L, 2L, 6L, 6L, 10L, 10L, 10L, 
    10L, 4L, 4L, 6L, 6L, 8L, 8L, 8L, 10L, 10L, 12L, 4L, 8L, 8L, 
    8L, 8L, 12L, 4L, 4L, 4L, 4L, 8L, 12L, 16L, 16L, 14L, 16L, 
    18L, 18L, 20L, 20L, 20L, 14L, 14L, 20L, 20L, 22L, 22L, 14L, 
    16L, 18L, 18L, 18L, 18L, 24L, 24L, 24L, 26L, 26L, 31L, 31L, 
    24L, 26L, 26L, 26L, 26L, 24L, 24L, 24L, 24L, 31L, 31L, 40L, 
    37L, 33L, 33L, 33L, 33L, 35L, 35L, 35L, 37L, 37L, 37L, 37L, 
    40L), .Label = c("01/02/2013", "01/03/2013", "04/02/2013", 
    "04/03/2013", "05/02/2013", "05/03/2013", "06/02/2013", "06/03/2013", 
    "07/02/2013", "07/03/2013", "08/02/2013", "08/03/2013", "11/02/2013", 
    "11/03/2013", "12/02/2013", "12/03/2013", "13/02/2013", "13/03/2013", 
    "14/02/2013", "14/03/2013", "15/02/2013", "15/03/2013", "18/02/2013", 
    "18/03/2013", "19/02/2013", "19/03/2013", "20/02/2013", "20/03/2013", 
    "21/02/2013", "22/02/2013", "22/03/2013", "25/02/2013", "25/03/2013", 
    "26/02/2013", "26/03/2013", "27/02/2013", "27/03/2013", "28/01/2013", 
    "28/02/2013", "28/03/2013", "29/01/2013", "30/01/2013", "31/01/2013" 
    ), class = "factor"), Scorer = structure(c(2L, 2L, 3L, 3L, 
    2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 
    2L, 2L, 2L, 1L, 1L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 
    2L, 2L, 1L, 1L, 2L, 1L, 2L, 3L, 1L, 3L, 1L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 1L, 2L, 
    1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 1L, 3L, 2L, 2L, 
    3L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 1L, 3L, 3L, 1L, 
    3L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
    1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 3L, 3L, 3L, 2L, 2L, 
    2L, 2L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 
    3L, 1L, 3L, 1L, 3L, 3L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 3L, 3L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 3L, 2L, 1L, 
    1L, 1L, 1L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 1L, 2L, 3L, 1L, 3L, 
    2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 
    2L, 2L, 3L, 2L, 2L, 1L, 1L, 3L, 2L, 2L, 2L, 3L, 3L, 2L, 1L, 
    2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
    1L, 3L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 
    1L, 3L, 3L, 1L, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 2L, 3L, 1L, 2L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 
    2L, 3L, 3L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 1L, 2L, 1L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
    2L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 
    3L, 1L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 3L, 2L, 2L, 1L, 
    3L, 2L, 1L, 3L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 1L, 1L, 
    1L, 3L, 2L, 1L, 1L, 3L, 1L, 3L, 2L, 2L, 1L, 3L, 2L, 1L, 3L, 
    3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 1L, 3L, 3L, 
    1L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 1L, 1L, 3L, 3L, 2L, 1L, 
    2L, 2L, 1L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L), .Label = c("", "B", "G"), class = "factor")), .Names = c("Operator", 
"ROI_Score", "Date", "Scorer"), row.names = c(NA, -412L), class = "data.frame") 
+0

用'as.data.frame創建表(表( df $ ROI_Score,df $ Operator))'。我沒有按照你的繪圖要求... – Chase

回答

1

下面是使用data.table準備資料:

require(data.table) 
dt <- data.table(df) 
ops <- as.character(unique(dt$Operator)) 
scr <- as.character(unique(dt$ROI_Score)) 
oo <- setkey(dt[, .N, by="Operator,ROI_Score"], Operator, 
       ROI_Score)[CJ(ops, scr)][is.na(N), N:= 0L] 

而且這裏是你如何能得到標準化的條形圖與此數據:

繪製該
oo[, N.norm := N/sum(N), by=Operator] 

一種方法是用X =運營商:

require(ggplot2) 
ggplot(data = oo, aes(x = Operator, y = N.norm)) + 
     geom_bar(positon="stack", stat="identity", aes(fill = ROI_Score)) 

enter image description here

+0

非常感謝。很多小R技巧我不知道 – moadeep

+0

如果我在data.table說日期(2014年1月,2013年2月,2013年3月等)中有一個額外的列,並使用'dts < - unique(dt $ Date)'。我怎樣才能得到每個運營商和每個月的相對頻率。例如'oo < - setkey(dt [,.N,by =「Operator,ROI_Score,Date」],Operator,ROI_Score,Date)[CJ(ops,scr,dts)] [is.na(N),N := 0L]''oo [,N.norm:= N/sum(N),by = Operator,Date]' – moadeep

1

你可以簡單地做這樣的事情來準備數據:

data.frame(table(Operator=df$Operator, Score=df$ROI_Score)) 

其中給出:

Operator Score Freq 
1   A  Good 11 
2   D  Good 36 
3   J  Good 54 
4   L  Good 44 
5   M  Good 28 
6   A  OK 3 
7   D  OK 10 
8   J  OK 9 
9   L  OK 4 
10  M  OK 7 
11  A  Poor 5 
12  D  Poor 50 
13  J  Poor 56 
14  L  Poor 67 
15  M  Poor 27 
16  A Terrible 0 
17  D Terrible 1 
18  J Terrible 0 
19  L Terrible 0 
20  M Terrible 0