2016-12-16 108 views
1

如何按組填寫缺失值?使用另一個數據框填寫並填寫組中缺失的數據

我有一個df與recomendations和排名,我需要插入默認recomendations時,我沒有至少4。

輸入例:

library(tidyverse) 

fixed_recomendations <- data.frame(recomendation_id = 50:54, name = paste("recomendation", 50:54, sep = "_"), stringsAsFactors = FALSE) 

content_id <- c(1,1,2,rep(3, 6)) 
rank <- c(1, 2, 1, 1:6) 
recomendation_id <- c(1:9) 
name <- paste("recomendation", recomendation_id, sep = "_") 
df <- data.frame(content_id, rank, recomendation_id, name, stringsAsFactors = FALSE) 

# content_id rank recomendation_id name 
# 1   1  1     recomendation_1 
# 1   2  2     recomendation_2 
# 2   1  3     recomendation_3 
# 3   1  4     recomendation_4 
# 3   2  5     recomendation_5 
# 3   3  6     recomendation_6 
# 3   4  7     recomendation_7 
# 3   5  8     recomendation_8 
# 3   6  9     recomendation_9 

我試圖完全做到這一點/填充,但它不尊重的羣體和它也削減等級範圍之外的值。

df %>% 
    complete(content_id, rank = 1:4, 
      fill = list(
      recomendation_id = fixed_recomendations$recomendation_id, 
      name = fixed_recomendations$name 
      )) 

# content_id rank recomendation_id name 
# 1   1  1    recomendation_1 
# 1   2  2    recomendation_2 
# 1   3  50    recomendation_50 
# 1   4  51    recomendation_51 
# 2   1  3    recomendation_3 
# 2   2  52    recomendation_52 
# 2   3  53    recomendation_53 
# 2   4  54    recomendation_54 
# 3   1  4    recomendation_4 
# 3   2  5    recomendation_5 
# 3   3  6    recomendation_6 
# 3   4  7    recomendation_7 

所需的輸出:

# content_id rank recomendation_id name 
# 1   1  1    recomendation_1 
# 1   2  2    recomendation_2 
# 1   3  50    recomendation_50 
# 1   4  51    recomendation_51 
# 2   1  3    recomendation_3 
# 2   2  50    recomendation_50 
# 2   3  51    recomendation_51 
# 2   4  52    recomendation_52 
# 3   1  4    recomendation_4 
# 3   2  5    recomendation_5 
# 3   3  6    recomendation_6 
# 3   4  7    recomendation_7 
# 3   5  8    recomendation_8 
# 3   6  9    recomendation_9 

回答

0

我不知道這是否是最好的方法,但是我最終解決的兩個步驟,首先,我創建NA一個tibble值使用完整的,然後我過濾的NA值,並採用分體式這組更新/ map_df:

add_missing <- function(x){ 
    for(i in 1:nrow(x)){ 
    x[i,]$recomendation_id = fixed_recomendations[i,]$recomendation_id 
    x[i,]$name = fixed_recomendations[i,]$name  
    } 
    x 
} 

df_missing <- df %>% 
    complete(content_id, rank = 1:4) %>% 
    filter(is.na(recomendation_id)) %>% 
    split(.$content_id) %>% 
    map_df(add_missing) 

rbind(df, df_missing) %>% arrange(content_id, rank) 

> content_id rank recomendation_id    name 
1   1 1    1 recomendation_1 
2   1 2    2 recomendation_2 
3   1 3    50 recomendation_50 
4   1 4    51 recomendation_51 
5   2 1    3 recomendation_3 
6   2 2    50 recomendation_50 
7   2 3    51 recomendation_51 
8   2 4    52 recomendation_52 
9   3 1    4 recomendation_4 
10   3 2    5 recomendation_5 
11   3 3    6 recomendation_6 
12   3 4    7 recomendation_7 
13   3 5    8 recomendation_8 
14   3 6    9 recomendation_9 
1

我沒有使用相同的,你做的包的套裝,但這裏是使用foreach,我在一兩分鐘颳起瞭解決方案。我可以使用基本的for循環來完成它,但foreach中的.combine功能是有用的,而且它可以並行化以防萬一您的真實問題很大。

fixed_recomendations <- data.frame(recomendation_id = 50:54, name = paste("recomendation", 50:54, sep = "_"), stringsAsFactors = FALSE) 

content_id <- c(1,1,2,rep(3, 6)) 
rank <- c(1, 2, 1, 1:6) 
recomendation_id <- c(1:9) 
name <- paste("recomendation", recomendation_id, sep = "_") 
df <- data.frame(content_id, rank, recomendation_id, name, stringsAsFactors = FALSE) 


tab <- table(content_id) 
library(foreach) 
defaults <- foreach (i = 1:length(tab), .combine = rbind) %do%{ 
    if (tab[i]<4){ 
    foreach(j = 1:(4-tab[i]), .combine = rbind) %do% { 
     data.frame(names(tab)[i], j + tab[i], fixed_recomendations$recomendation_id[j], fixed_recomendations$name[j]) 
    } 
    } 
} 
colnames(defaults) <- colnames(df) 
df.new <- rbind(df, defaults) 
df.new <- df.new[with(df.new, order(content_id, rank)),] 
df.new 

    content_id rank recomendation_id    name 
1   1 1    1 recomendation_1 
2   1 2    2 recomendation_2 
12   1 3    50 recomendation_50 
11   1 4    51 recomendation_51 
3   2 1    3 recomendation_3 
23   2 2    50 recomendation_50 
21   2 3    51 recomendation_51 
22   2 4    52 recomendation_52 
4   3 1    4 recomendation_4 
5   3 2    5 recomendation_5 
6   3 3    6 recomendation_6 
7   3 4    7 recomendation_7 
8   3 5    8 recomendation_8 
9   3 6    9 recomendation_9