這裏的情況下另一種解決方案,它的鍵可以按任意順序排列,或可能丟失:
n <- 10 # of csv files to create
obs <- 10 # of observations
# create test files
for (i in 1:n){
df <- data.frame(A = sample(1:3, obs, TRUE)
, B = sample(1:3, obs, TRUE)
, C = sample(1:3, obs, TRUE)
, value = runif(obs)
)
write.csv(df, file = tempfile(fileext = '.csv'), row.names = FALSE)
}
# read in the data
input <- lapply(list.files(tempdir(), "*.csv", full.names = TRUE)
, function(file) read.csv(file)
)
# put dataframe together and the compute the mean for each unique combination
# of A, B & C assuming that they could be in any order.
input <- do.call(rbind, input)
result <- lapply(split(input, list(input$A, input$B, input$C), drop = TRUE)
, function(sect){
sect$value[1L] <- mean(sect$value)
sect[1L, ]
}
)
# create output DF
result <- do.call(rbind, result)
result