2017-09-21 50 views
0

,我有以下數據集:dplyr ::計數()多列

dat = structure(list(C86_1981 = c("Outer London", "Buckinghamshire", 
NA, "Ross and Cromarty", "Cornwall and Isles of Scilly", NA, 
"Kirkcaldy", "Devon", "Kent", "Renfrew"), C96_1981 = c("Outer London", 
"Buckinghamshire", NA, "Ross and Cromarty", "Not known/missing", 
NA, "Kirkcaldy", NA, NA, NA), C00_1981 = c("Outer London", "Inner London", 
"Lancashire", "Ross and Cromarty", NA, "Humberside", "Kirkcaldy", 
NA, NA, NA), C04_1981 = c("Kent", NA, NA, "Ross and Cromarty", 
NA, "Humberside", "Not known/missing", NA, NA, "Renfrew"), C08_1981 = c("Kent", 
"Oxfordshire", NA, "Ross and Cromarty", "Cornwall and Isles of Scilly", 
"Humberside", "Dunfermline", NA, NA, "Renfrew"), C12_1981 = c("Kent", 
NA, NA, "Ross and Cromarty", "Cornwall and Isles of Scilly", 
"Humberside", "Dunfermline", NA, NA, "Renfrew")), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"), .Names = c("C86_1981", 
"C96_1981", "C00_1981", "C04_1981", "C08_1981", "C12_1981")) 

我想dplyr::count()每一列。預期成果:

# A tibble: 10 x 3 
         C86_1981 dat86_n dat96_n ... 
          <chr> <int> <int> 
1    Buckinghamshire  1  1 
2 Cornwall and Isles of Scilly  1  NA 
3      Devon  1  NA 
4       Kent  1  NA 
5     Kirkcaldy  1  1 
6     Outer London  1  1 
7      Renfrew  1  NA 
8   Ross and Cromarty  1  1 
9       <NA>  2  5 
10   Not known/missing  NA  1 

目前我在做這個手工然後dplyr::full_join()荷蘭國際集團的結果:

library("tidyverse") 

dat86_n = dat %>% 
    count(C86_1981) %>% 
    rename(dat86_n = n) 
dat96_n = dat %>% 
    count(C96_1981) %>% 
    rename(dat96_n = n) 
# ... 

dat_counts = dat86_n %>% 
    full_join(dat96_n, by = c("C86_1981" = "C96_1981")) 
    # ... 

其中一期工程,但並不完全一樣強勁,如果我的任何數據更改後。我曾希望以編程方式做到這一點。

我試過一個循環:

lapply(dat, count) 
# Error in UseMethod("groups") : 
# no applicable method for 'groups' applied to an object of class "character" 

purrr::map()給出了同樣的錯誤)。我認爲,這個錯誤是因爲count()需要一個tbl並作爲獨立參數變量,所以我嘗試太:

lapply(dat, function(x) { 
    count(dat, x) 
}) 
# Error in grouped_df_impl(data, unname(vars), drop) : 
# Column `x` is unknown 

再次,purrr::map()給出了同樣的錯誤。我也試過的summarise_all()變種:

dat %>% 
    summarise_all(count) 
    # Error in summarise_impl(.data, dots) : 
    # Evaluation error: no applicable method for 'groups' applied to an object of class "character". 

我覺得我失去了一些東西明顯,解決方案應該是簡單的。 dplyr解決方案特別受歡迎,因爲這是我最常用的解決方案。

回答

2

也使用tidyr包,下面的代碼將這樣的伎倆:

dat %>% tidyr::gather(name, city) %>% dplyr::group_by(name, city) %>% dplyr::count() %>% dplyr::ungroup %>% tidyr::spread(name, n) 

結果:

# A tibble: 15 x 7 
          city C00_1981 C04_1981 C08_1981 C12_1981 C86_1981 C96_1981 
*      <chr> <int> <int> <int> <int> <int> <int> 
1    Buckinghamshire  NA  NA  NA  NA  1  1 
2 Cornwall and Isles of Scilly  NA  NA  1  1  1  NA 
3      Devon  NA  NA  NA  NA  1  NA 
4     Dunfermline  NA  NA  1  1  NA  NA 
5     Humberside  1  1  1  1  NA  NA 
6     Inner London  1  NA  NA  NA  NA  NA 
7       Kent  NA  1  1  1  1  NA 
8     Kirkcaldy  1  NA  NA  NA  1  1 
9     Lancashire  1  NA  NA  NA  NA  NA 
10   Not known/missing  NA  1  NA  NA  NA  1 
11     Outer London  1  NA  NA  NA  1  1 
12     Oxfordshire  NA  NA  1  NA  NA  NA 
13      Renfrew  NA  1  1  1  1  NA 
14   Ross and Cromarty  1  1  1  1  1  1 
15       <NA>  4  5  3  4  2  5 
+0

是的!謝謝!一個調整:你可以不用'ungroup()'作爲'count()'來爲你做,但是否則完美。 – Phil

2

@友leee剛剛擊敗我給它;)

使用整體;

library(tidyverse) 

df <- 
    dat %>% 
    gather (year, county) %>% 
    group_by(year, county) %>% 
    summarise(no = n()) %>% 
    spread (year, no) 

# A tibble: 15 x 7 
         county C00_1981 C04_1981 C08_1981 C12_1981 C86_1981 C96_1981 
*      <chr> <int> <int> <int> <int> <int> <int> 
1    Buckinghamshire  NA  NA  NA  NA  1  1 
2 Cornwall and Isles of Scilly  NA  NA  1  1  1  NA 
3      Devon  NA  NA  NA  NA  1  NA 
4     Dunfermline  NA  NA  1  1  NA  NA 
5     Humberside  1  1  1  1  NA  NA 
6     Inner London  1  NA  NA  NA  NA  NA 
7       Kent  NA  1  1  1  1  NA 
8     Kirkcaldy  1  NA  NA  NA  1  1 
9     Lancashire  1  NA  NA  NA  NA  NA 
10   Not known/missing  NA  1  NA  NA  NA  1 
11     Outer London  1  NA  NA  NA  1  1 
12     Oxfordshire  NA  NA  1  NA  NA  NA 
13      Renfrew  NA  1  1  1  1  NA 
14   Ross and Cromarty  1  1  1  1  1  1 
15       <NA>  4  5  3  4  2  5 
+1

仍值得讚賞。謝謝:) – Phil

+0

謝謝@Phil,總是需要點來提高聲譽! ;) – sorearm