2017-11-11 90 views
0

通過休息和出現計數砍我有一個數據幀,看起來像這樣:該城市屬於的R - 由

dat <- structure(list(Geocode = c("1100015", "1100023", "1100031", "1100049", 
"1100056", "1100064", "1100072", "1100080", "1100098", "1100106", 
"1100114", "1100122", "1100130", "1100148", "1100155", "1100189", 
"1100205", "1100254", "1100262", "1100288", "1100296", "1100304", 
"1100320", "1100338", "1100346", "1100379", "1100403", "1100452", 
"1100502", "1100601"), Location = c("Alta Floresta D'oeste, RO", 
"Ariquemes, RO", "Cabixi, RO", "Cacoal, RO", "Cerejeiras, RO", 
"Colorado Do Oeste, RO", "Corumbiara, RO", "Costa Marques, RO", 
"Espigo D'oeste, RO", "Guajar-Mirim, RO", "Jaru, RO", "Ji-Paran, RO", 
"Machadinho D'oeste, RO", "Nova Brasilndia D'oeste, RO", "Ouro Preto Do Oeste, RO", 
"Pimenta Bueno, RO", "Porto Velho, RO", "Presidente Mdici, RO", 
"Rio Crespo, RO", "Rolim De Moura, RO", "Santa Luzia D'oeste, RO", 
"Vilhena, RO", "So Miguel Do Guapor, RO", "Nova Mamor, RO", "Alvorada D'oeste, RO", 
"Alto Alegre Dos Parecis, RO", "Alto Paraso, RO", "Buritis, RO", 
"Novo Horizonte Do Oeste, RO", "Cacaulandia, RO"), Region = c("Norte", 
"Norte", "Norte", "Norte", "Norte", "Norte", "Norte", "Norte", 
"Norte", "Norte", "Sul", "Sul", "Sul", "Sul", "Sul", 
"Sul", "Sul", "Sul", "Sul", "Sul", "Nordeste", "Nordeste", 
"Nordeste", "Nordeste", "Nordeste", "Nordeste", "Nordeste", "Nordeste", "Nordeste", 
"Nordeste"), Population = c(25578L, 104401L, 6355L, 87226L, 17986L, 
18817L, 8842L, 16651L, 32385L, 46632L, 55738L, 130419L, 37167L, 
21592L, 39924L, 37512L, 502748L, 22557L, 3750L, 56242L, 8532L, 
91801L, 23933L, 27600L, 17063L, 13940L, 20210L, 37838L, 10276L, 
6367L)), .Names = c("Geocode", "Location", "Region", "Population" 
), row.names = c(NA, 30L), class = "data.frame") 

這表明一些城市的人口,以及該地區。

我需要將人口分爲兩部分(breaks=c(0,50000,100000)),然後根據整體(所有地區)的休息時間以及按地區分開來查找城市的數量。

所得數據幀應該是這樣的(無規,假設值):

Class     Region  Count 
[0-50000]    Norte  7 
[50000-100000]   Norte  3 
[>100000]    Norte  0 
[0-50000]    Sul   5 
[50000-100000]   Sul   4 
[>100000]    Sul   1 
[0-50000]    Nordeste  4 
[50000-100000]   Nordeste  5 
[>100000]    Nordeste  1 
[0-50000]    All   16 
[50000-100000]   All   12 
[>100000]    All   2 

任何幫助理解。

回答

2

通過使用cutdplyr

dat$Class=cut(dat$Population,c(0,50000,100000,Inf),labels=c('0-50000','50000-100000','>100000')) 
library(dplyr) 
d1=dat%>%group_by(Class,Region)%>%summarise(count=n()) 
d2=dat%>%group_by(Class)%>%summarise(count=n(),Region='All') 
bind_rows(d1,d2) 

      Class Region count 
     <fctr> <chr> <int> 
1  0-50000 Nordeste  9 
2  0-50000 Norte  8 
3  0-50000  Sul  6 
4 50000-100000 Nordeste  1 
5 50000-100000 Norte  1 
6 50000-100000  Sul  2 
7  >100000 Norte  1 
8  >100000  Sul  2 
9  0-50000  All 23 
10 50000-100000  All  4 
11  >100000  All  3 
+2

或者,在創建'Class'後,bind_rows(count(dat,Region,Class),count(dat,Class,Region =「all」))'會做。 – jazzurro

+0

@jazzurro很棒:-)謝謝 – Wen

+0

不客氣。 :) – jazzurro

1

這裏是一個快速和骯髒的方法,可能會更新此之後,使其更加清潔,避免不得不bind_rows()

嘗試以下操作:

library(tidyverse) 

dat_1 <- dat %>% 
    mutate(population_breaks = case_when(Population <= 50000 ~ "0-50000", 
             Population >= 50000 & Population <= 100000 ~ "50000-100000", 
             Population >= 100000 ~ ">100000")) %>% 
    group_by(population_breaks) %>% 
    count(Region) 

dat_2 <- dat %>% 
    mutate(population_breaks = case_when(Population <= 50000 ~ "0-50000", 
             Population >= 50000 & Population <= 100000 ~ "50000-100000", 
             Population >= 100000 ~ ">100000")) %>% 
    group_by(population_breaks) %>% 
    count(population_breaks) %>% 
    mutate(Region = "All") 

bind_rows(dat_1, dat_2) 

其中返回:

# A tibble: 11 x 3 
# Groups: population_breaks [3] 
    population_breaks Region  n 
       <chr> <chr> <int> 
1   0-50000 Nordeste  9 
2  50000-100000 Nordeste  1 
3   >100000 Norte  1 
4   0-50000 Norte  8 
5  50000-100000 Norte  1 
6   >100000  Sul  2 
7   0-50000  Sul  6 
8  50000-100000  Sul  2 
9   >100000  All  3 
10   0-50000  All 23 
11  50000-100000  All  4