2017-07-27 63 views
-2

的頻率我有這樣一個數據幀:以三種不同的列

df <- structure(list(col1 = structure(c(1L, 1L, 2L, 3L, 1L, 3L, 1L, 
3L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 4L), .Label = c("stock1", 
"stock2", "stock3", "stock4"), class = "factor"), col2 = structure(c(4L, 
5L, 7L, 6L, 5L, 5L, 5L, 6L, 6L, 8L, 8L, 4L, 3L, 3L, 1L, 2L, 3L 
), .Label = c("comapny1", "comapny1+comapny4", "comapny4", "company1", 
"company2", "company2+company1", "company3", "company4"), class = "factor"), 
    col3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("predictor1", "predictor2" 
    ), class = "factor")), .Names = c("col1", "col2", "col3"), class = "data.frame", row.names = c(NA, 
-17L)) 

我想借頻率從三列。

預期輸出

df2 <- structure(list(col1 = structure(c(1L, 1L, 1L, 2L, 4L, 1L, 1L, 
3L, 3L, 1L, 2L, 1L), .Label = c("stock1", "stock2", "stock3", 
"stock4"), class = "factor"), col2 = structure(c(1L, 2L, 3L, 
3L, 3L, 4L, 5L, 5L, 6L, 6L, 7L, 8L), .Label = c("comapany1", 
"comapany1+comapany4", "comapany4", "company1", "company2", "company2+company1", 
"company3", "company4"), class = "factor"), col3 = structure(c(2L, 
2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("predictor1", 
"predictor2"), class = "factor"), frequency = c(1L, 1L, 1L, 1L, 
1L, 2L, 3L, 1L, 2L, 1L, 1L, 2L)), .Names = c("col1", "col2", 
"col3", "frequency"), class = "data.frame", row.names = c(NA, 
-12L)) 

怎麼可能做呢?

回答

2

我們可以使用count

library(dplyr) 
count(df, col1, col2, col3) 
# A tibble: 12 x 4 
#  col1    col2  col3  n 
# <fctr>   <fctr>  <fctr> <int> 
# 1 stock1   comapny1 predictor2  1 
# 2 stock1 comapny1+comapny4 predictor2  1 
# 3 stock1   comapny4 predictor2  1 
# 4 stock1   company1 predictor1  2 
# 5 stock1   company2 predictor1  3 
# 6 stock1 company2+company1 predictor1  1 
# 7 stock1   company4 predictor1  2 
# 8 stock2   comapny4 predictor2  1 
# 9 stock2   company3 predictor1  1 
#10 stock3   company2 predictor1  1 
#11 stock3 company2+company1 predictor1  2 
#12 stock4   comapny4 predictor2  1 

或用data.table

library(data.table) 
setDT(df)[, .N, .(col1, col2, col3)] 
相關問題