2017-08-08 67 views
1

我有數據作爲樣品如下:分配對面標籤

customerNames.1=c('A','B','A','C','E','F') 
customerNames.2=c('B','A','C','A','F','E') 
Relation=c('Father','NA','Spouse','NA','Mother','NA') 
Datasample=data.table(customerNames.1,customerNames.2,Relation) 

# data looks like 

    customerNames.1 customerNames.2 Relation 
1:    A    B Father 
2:    B    A  NA 
3:    A    C Spouse 
4:    C    A  NA 
5:    E    F Mother 
6:    F    E  NA 

每一行顯示customer.1的關係customer.2,而這裏A到B的關係是存在的主要來源,我也要錄製B到A與他們的關係(這可以從第一關係,如父親導出 - 子女,配偶 - 配偶,母 - 子)

我如何獲得像下面的東西:

customerNames.1 customerNames.2 Relation 
1:    A    B Father 
2:    B    A Child 
3:    A    C Spouse 
4:    C    A Spouse 
5:    E    F Mother 
6:    F    E Child 

回答

1

使用設g tidyverse,你可以這樣做來計算你的失蹤關係。它可能並不是最簡單的,但它可以像我想的那樣工作。

獲取您的源表,根據此信息計算您的關係並加入到您的缺失關係中以完成表格。

customerNames.1=c('A','B','A','C','E','F') 
customerNames.2=c('B','A','C','A','F','E') 
Relation=c('Father',NA_character_,'Spouse',NA_character_,'Mother',NA_character_) 
library(dplyr) 
Datasample=data_frame(customerNames.1,customerNames.2,Relation) 
Datasample 
#> # A tibble: 6 x 3 
#> customerNames.1 customerNames.2 Relation 
#>    <chr>   <chr> <chr> 
#> 1    A    B Father 
#> 2    B    A  <NA> 
#> 3    A    C Spouse 
#> 4    C    A  <NA> 
#> 5    E    F Mother 
#> 6    F    E  <NA> 

sourceTab <- tidyr::drop_na(Datasample) 
sourceTab 
#> # A tibble: 3 x 3 
#> customerNames.1 customerNames.2 Relation 
#>    <chr>   <chr> <chr> 
#> 1    A    B Father 
#> 2    A    C Spouse 
#> 3    E    F Mother 
computeTab <- sourceTab %>% 
    select(customerNames.1 = customerNames.2, customerNames.2 = customerNames.1, Relation) %>% 
    mutate(Relation = 
      case_when(
      Relation %in% c("Father", "Mother") ~ "Child", 
      Relation == "Spouse" ~ "Spouse", 
      TRUE ~ NA_character_ 
      )) 
computeTab 
#> # A tibble: 3 x 3 
#> customerNames.1 customerNames.2 Relation 
#>    <chr>   <chr> <chr> 
#> 1    B    A Child 
#> 2    C    A Spouse 
#> 3    F    E Child 
completedTab <- Datasample %>% 
    left_join(computeTab, by = c("customerNames.1", "customerNames.2")) %>% 
    mutate(Relation = case_when(
    !is.na(Relation.x) ~ Relation.x, 
    is.na(Relation.x) & !is.na(Relation.y) ~ Relation.y, 
    TRUE ~ NA_character_)) %>% 
    select(starts_with("customer"), Relation) 
completedTab 
#> # A tibble: 6 x 3 
#> customerNames.1 customerNames.2 Relation 
#>    <chr>   <chr> <chr> 
#> 1    A    B Father 
#> 2    B    A Child 
#> 3    A    C Spouse 
#> 4    C    A Spouse 
#> 5    E    F Mother 
#> 6    F    E Child 
bind_rows(sourceTab, completedTab) 
#> # A tibble: 9 x 3 
#> customerNames.1 customerNames.2 Relation 
#>    <chr>   <chr> <chr> 
#> 1    A    B Father 
#> 2    A    C Spouse 
#> 3    E    F Mother 
#> 4    A    B Father 
#> 5    B    A Child 
#> 6    A    C Spouse 
#> 7    C    A Spouse 
#> 8    E    F Mother 
#> 9    F    E Child 
+0

是這個作品! – Pb89