2017-08-24 41 views
-2

我正在使用鑽石數據集。每種分類變量組合的平均價格 - R

> dput(diamonds_2[1:100,]) 
structure(list(carat = structure(c(4L, 2L, 4L, 10L, 12L, 5L, 
5L, 7L, 3L, 4L, 11L, 4L, 3L, 12L, 1L, 13L, 11L, 11L, 11L, 11L, 
11L, 4L, 4L, 12L, 12L, 4L, 5L, 11L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 12L, 7L, 14L, 14L, 14L, 7L, 7L, 13L, 10L, 13L, 13L, 6L, 
10L, 5L, 4L, 13L, 3L, 3L, 11L, 11L, 11L, 11L, 11L, 16L, 11L, 
11L, 11L, 23L, 9L, 13L, 12L, 12L, 5L, 5L, 11L, 11L, 11L, 11L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 19L, 7L, 5L, 5L, 5L, 5L, 13L, 
45L, 61L, 45L, 46L, 53L, 45L, 45L, 71L, 48L, 55L), .Label = c("0.2", 
"0.21", "0.22", "0.23", "0.24", "0.25", "0.26", "0.27", "0.28", 
"0.29", "0.3", "0.31", "0.32", "0.33", "0.34", "0.35", "0.36", 
"0.37", "0.38", "0.39", "0.4", "0.41", "0.42", "0.43", "0.5", 
"0.51", "0.52", "0.53", "0.54", "0.55", "0.56", "0.57", "0.58", 
"0.59", "0.6", "0.61", "0.62", "0.63", "0.64", "0.65", "0.66", 
"0.67", "0.68", "0.69", "0.7", "0.71", "0.72", "0.73", "0.74", 
"0.75", "0.76", "0.77", "0.78", "0.79", "0.8", "0.81", "0.82", 
"0.83", "0.84", "0.85", "0.86", "0.87", "0.88", "0.89", "0.9", 
"0.91", "0.92", "0.93", "0.94", "0.95", "0.96", "0.97", "0.98", 
"0.99", "1", "1.01", "1.02", "1.03", "1.04", "1.05", "1.06", 
"1.07", "1.08", "1.09", "1.1", "1.11", "1.12", "1.13", "1.14", 
"1.15", "1.16", "1.17", "1.18", "1.19", "1.2", "1.21", "1.22", 
"1.23", "1.24", "1.25", "1.27", "1.28", "1.29", "1.31", "1.5", 
"1.51", "1.52"), class = "factor"), color = structure(c(2L, 2L, 
2L, 6L, 7L, 7L, 6L, 5L, 2L, 5L, 7L, 7L, 3L, 7L, 2L, 2L, 6L, 7L, 
7L, 7L, 6L, 2L, 5L, 7L, 7L, 4L, 6L, 7L, 1L, 3L, 3L, 3L, 2L, 2L, 
1L, 3L, 2L, 5L, 1L, 6L, 6L, 7L, 1L, 1L, 5L, 3L, 5L, 5L, 2L, 5L, 
3L, 4L, 6L, 2L, 1L, 6L, 7L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 6L, 4L, 
6L, 4L, 4L, 2L, 1L, 5L, 5L, 5L, 5L, 3L, 2L, 1L, 1L, 2L, 2L, 1L, 
2L, 6L, 2L, 4L, 5L, 5L, 5L, 6L, 2L, 2L, 4L, 2L, 4L, 2L, 3L, 3L, 
2L, 5L), .Label = c("1", "2", "3", "4", "5", "6", "7"), class = "factor"), 
    clarity = structure(c(2L, 3L, 5L, 4L, 2L, 6L, 7L, 3L, 4L, 
    5L, 3L, 5L, 3L, 2L, 2L, 1L, 2L, 3L, 3L, 3L, 2L, 4L, 5L, 3L, 
    3L, 6L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 4L, 
    2L, 2L, 3L, 4L, 5L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 5L, 3L, 4L, 
    4L, 2L, 2L, 3L, 3L, 3L, 5L, 3L, 3L, 3L, 2L, 6L, 7L, 3L, 3L, 
    7L, 7L, 3L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 6L, 2L, 
    7L, 7L, 7L, 7L, 6L, 3L, 3L, 2L, 4L, 4L, 2L, 4L, 5L, 2L, 3L, 
    3L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"), 
    price = c(481, 481, 492, 558, 568, 579, 579, 590, 590, 601, 
    610, 621, 642, 660, 671, 671, 700, 729, 729, 729, 729, 740, 
    750, 750, 750, 761, 772, 793, 793, 793, 951, 951, 951, 951, 
    951, 951, 951, 951, 952, 952, 952, 952, 952, 952, 952, 952, 
    952, 952, 953, 953, 953, 953, 953, 953, 953, 954, 954, 954, 
    954, 954, 958, 958, 958, 958, 958, 959, 959, 959, 959, 959, 
    959, 960, 960, 960, 960, 960, 960, 960, 960, 960, 960, 960, 
    960, 960, 960, 960, 960, 960, 960, 960, 1, 1, 1, 2, 2, 2, 
    2, 2, 3, 3), cut_new = structure(c(1L, 1L, 2L, 1L, 2L, 3L, 
    3L, 3L, 2L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 2L, 
    3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 
    2L, 2L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L, 3L, 3L, 3L, 
    1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 3L, 1L, 1L, 3L, 3L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 3L, 2L, 
    2L, 2L, 3L, 1L), .Label = c("Above average", "Below average", 
    "Very Good"), class = "factor")), .Names = c("carat", "color", 
"clarity", "price", "cut_new"), row.names = c(NA, 100L), class = "data.frame") 

現在我想獲得的平均價格,這樣對每顆鑽石,我看到的價格和它旁邊也平均價格(基於 色彩的組合和cut_new)。

enter image description here

我曾嘗試以下代碼,但不可能得到它的權利:

Atttempt 1:

head(diamonds_2) 
diamonds_2 <- x <- as.data.frame(diamonds_2) 
diamonds_2$price <- as.numeric(diamonds_2$price) 
mean <- tapply(diamonds_2$price, list(diamonds_2$color, diamonds_2$cut_new), mean, na.rm = T) 
combine <- merge (diamonds_2, mean, by.x = "cut_new", by.y= "color") 

ATTEMPT2:

results <- summaryBy(price~color, data= diamonds_2, FUN = mean) 

任何想法如何使其中一個工作?

謝謝

+0

是什麼' dtplyr'? – mtoto

+0

@mtoto它是由Hadley發明的data.table/dplyr mishmash https://github.com/hadley/dtplyr –

+0

您是否檢查[below](https://stackoverflow.com/a/45857150/4836511)解決方案? – Prradep

回答

-1

在你的輸入數據集是df的假設,你可以通過下面的代碼片斷基於colorcut_new變量每顆鑽石的平均價格:

library(dplyr) 

df %>% group_by(color, cut_new) %>% 
    summarise(AvgPrice= mean(price)) 

# # A tibble: 20 x 3 
# color  cut_new  AvgPrice 
# <fctr>  <fctr>  <dbl> 
# 1  1 Above average 956.7500 
# 2  1 Below average 952.0000 
# 3  1  Very Good 933.5714 
# 4  2 Above average 647.1250 
# 5  2 Below average 499.3333 
# 6  2  Very Good 720.0000 
# 7  3 Above average 797.0000 
# 8  3 Below average 318.3333 
# 9  3  Very Good 921.6000 
# 10  4 Above average 766.4000 
# 11  4  Very Good 574.0000 
# 12  5 Above average 800.5000 
# 13  5 Below average 953.7500 
# 14  5  Very Good 801.0000 
# 15  6 Above average 886.3333 
# 16  6 Below average 841.5000 
# 17  6  Very Good 829.0000 
# 18  7 Above average 796.7500 
# 19  7 Below average 659.0000 
# 20  7  Very Good 720.2000