2016-07-21 226 views
0

我有看起來像這樣多個Dataframes值的條件替換[R

data_frame_1 <- data.frame(DATE = seq(as.Date("2016-01-01"),as.Date("2016-01-10"),by = "1 day"), 
          Att1 = c(1,3,4,5,NA,4,5,NA,NA,9), 
          Att2 = c(4,5,6,7,3,4,NA,7,2,NA) 
          ) 

> data_frame_1 
     DATE Att1 Att2 
1 2016-01-01 1 4 
2 2016-01-02 3 5 
3 2016-01-03 4 6 
4 2016-01-04 5 7 
5 2016-01-05 NA 3 
6 2016-01-06 4 4 
7 2016-01-07 5 NA 
8 2016-01-08 NA 7 
9 2016-01-09 NA 2 
10 2016-01-10 9 NA 


data_frame_2 <- data.frame(DATE = seq(as.Date("2016-01-01"),as.Date("2016-01-10"),by = "1 day"), 
          Att1 = c(3,3,21,5,8,4,5,11,5,9), 
          Att2 = c(23,7,9,13,3,4,9,7,2,12) 
) 

> data_frame_2 
     DATE Att1 Att2 
1 2016-01-01 1 4 
2 2016-01-02 3 5 
3 2016-01-03 4 6 
4 2016-01-04 5 7 
5 2016-01-05 3 3 
6 2016-01-06 4 4 
7 2016-01-07 5 9 
8 2016-01-08 11 7 
9 2016-01-09 5 2 
10 2016-01-10 9 12 

現在我想在數據FRAME_1每列,其中日期大於2016年1月8日2個數據幀和有NA它被從data_frame_2 對應的值,以便在最終的結果看起來是這樣的

> data_frame_1_mod 
     DATE Att1 Att2 
1 2016-01-01 1 4 
2 2016-01-02 3 5 
3 2016-01-03 4 6 
4 2016-01-04 5 7 
5 2016-01-05 NA 3 
6 2016-01-06 4 4 
7 2016-01-07 5 NA 
8 2016-01-08 11 7 
9 2016-01-09 5 2 
10 2016-01-10 9 12 

什麼是我在R中可以達致這最快的方式relaced?

編輯

這個方法我試過

replace_func <- function(column,date,data1,data2){ 
    rel_frame <- data.frame(date = data1$DATE, data_1 = data1[,column], data_2 = data2[,column]) 
    rel_frame$data_1_mod <- ifelse(rel_frame$date >= date & is.na(rel_frame$data_1),rel_frame$data_2,rel_frame$data_1) 
    rel_frame <- rel_frame[c("date","data_1_mod")] 
    colnames(rel_frame) <- c("DATE",column) 
    return(rel_frame) 
} 


all_frames <- lapply(c("Att1", "Att2"), function(x) replace_func(x,as.Date("2016-01-08"),data_frame_1,data_frame_2)) 
data_frame_1_mod <- Reduce(function(x, y) merge(x, y, all.x=TRUE),all_frames) 


> data_frame_1_mod 
     DATE Att1 Att2 
1 2016-01-01 1 4 
2 2016-01-02 3 5 
3 2016-01-03 4 6 
4 2016-01-04 5 7 
5 2016-01-05 NA 3 
6 2016-01-06 4 4 
7 2016-01-07 5 NA 
8 2016-01-08 11 7 
9 2016-01-09 5 2 
10 2016-01-10 9 12 

我在想,如果沒有更好的辦法可以

回答

1

這裏快速的解決方案基於data.table

library(data.table) 
## replace data.frame by data.table 
setDT(data_frame_1) 
setDT(data_frame_2) 
## since the number of columns to check can be big , 
## better to put your data in the long format 
dx1 <- melt(data_frame_1,id="DATE") 
dx2 <- melt(data_frame_2,id="DATE") 
## setkey for fast join 
setkey(dx1,DATE,variable) 
setkey(dx2,DATE,variable) 
## use tab2 as an index , and replace all miising values after a certain dates by the corresponding values from tab2 , finally come back to the wide format using `dcast` 
dcast(dx1[dx2][is.na(value) & DATE >= "2016-01-08",value:=i.value][,i.value:=NULL], 
     DATE~variable) 

這給預期的結果:

  DATE Att1 Att2 
1: 2016-01-01 1 4 
2: 2016-01-02 3 5 
3: 2016-01-03 4 6 
4: 2016-01-04 5 7 
5: 2016-01-05 NA 3 
6: 2016-01-06 4 4 
7: 2016-01-07 5 NA 
8: 2016-01-08 11 7 
9: 2016-01-09 5 2 
10: 2016-01-10 9 12 
1

如何:

n <- which(data_frame_1$DATE > as.Date("2016-01-08") & (is.na(data_frame_1$Att1) | is.na(data_frame_1$Att2)) 

n 
[1] 9 10 

for (i in n) { 
     data_frame_1[i,] <- data_frame_2[i,] 
} 

#result 
> data_frame_1 
     DATE Att1 Att2 
1 2016-01-01 1 4 
2 2016-01-02 3 5 
3 2016-01-03 4 6 
4 2016-01-04 5 7 
5 2016-01-05 NA 3 
6 2016-01-06 4 4 
7 2016-01-07 5 NA 
8 2016-01-08 NA 7 
9 2016-01-09 5 2 
10 2016-01-10 9 12 
>