2017-10-08 59 views
2

我收到的如何替換一個公式很大answer。我需要它來匹配data.frames列表。如何處理MatchIt中未被識別的替代配方(例如)?

當與MatchIt::matchit()匹配時,首先,我必須將結果保存爲matchit.full/matchit類。其次,match.data() data.frames瓦特/只有匹配的意見將被創建。

當我像往常一樣使用公式時,該問題出現在正常工作的第二步。對於替代現在看來,match.data()需要以某種方式識別公式,但它不會。

考慮此作爲一個例子(該警告可以忽略):

# example list 
library(car) 
WeightLoss1 <- WeightLoss 
WeightLoss1$group <- as.integer(ifelse(WeightLoss1$group == "Control", 0, 1)) 

WL = list(WeightLoss1, WeightLoss1, WeightLoss1) # doesn't make much sense, but suffices for example 

# substitute formula 
wl.cov <- c("wl1", "se1") 
WL.FM <- reformulate(wl.cov, response = "group") 

# matching w/o substitution 
m.match.0 <- lapply(1:length(WL), function(mark) { 
    require(MatchIt) 
    matchit(group ~ wl1 + se1, data = WL[[mark]]) 
}) 

# matching w/ substitution 
m.match.1 <- lapply(1:length(WL), function(mark) { 
    require(MatchIt) 
    matchit(WL.FM, data = WL[[mark]]) 
}) 

# now compare both attempts to create list of data.frames 
# w/o 
match <- lapply(1:length(m.match.0), function(i){ 
    require(MatchIt) 
    match.data(m.match.0[[i]]) 
}) 

# w/ 
match <- lapply(1:length(m.match.1), function(i){ 
    require(MatchIt) 
    match.data(m.match.1[[i]]) 
}) 

如可以看到的嘗試的w/o取代工作正常,嘗試瓦特/置換產生錯誤Error in eval(object$call$data, envir = env) : object 'mark' not found

這怎麼修補?

-

注:

> match.data 
function (object, group = "all", distance = "distance", weights = "weights", 
    subclass = "subclass") 
{ 
    if (!is.null(object$model)) { 
     env <- attributes(terms(object$model))$.Environment 
    } 
    else { 
     env <- parent.frame() 
    } 
    data <- eval(object$call$data, envir = env) 
    treat <- object$treat 
    wt <- object$weights 
    vars <- names(data) 
    if (distance %in% vars) 
     stop("invalid input for distance. choose a different name.") 
    else if (!is.null(object$distance)) { 
     dta <- data.frame(cbind(data, object$distance)) 
     names(dta) <- c(names(data), distance) 
     data <- dta 
    } 
    if (weights %in% vars) 
     stop("invalid input for weights. choose a different name.") 
    else if (!is.null(object$weights)) { 
     dta <- data.frame(cbind(data, object$weights)) 
     names(dta) <- c(names(data), weights) 
     data <- dta 
    } 
    if (subclass %in% vars) 
     stop("invalid input for subclass. choose a different name.") 
    else if (!is.null(object$subclass)) { 
     dta <- data.frame(cbind(data, object$subclass)) 
     names(dta) <- c(names(data), subclass) 
     data <- dta 
    } 
    if (group == "all") 
     return(data[wt > 0, ]) 
    else if (group == "treat") 
     return(data[wt > 0 & treat == 1, ]) 
    else if (group == "control") 
     return(data[wt > 0 & treat == 0, ]) 
    else stop("error: invalid input for group.") 
} 
<bytecode: 0x00000000866125e0> 
<environment: namespace:MatchIt> 

 

> matchit 
function (formula, data, method = "nearest", distance = "logit", 
    distance.options = list(), discard = "none", reestimate = FALSE, 
    ...) 
{ 
    mcall <- match.call() 
    if (is.null(data)) 
     stop("Dataframe must be specified", call. = FALSE) 
    if (!is.data.frame(data)) { 
     stop("Data must be a dataframe", call. = FALSE) 
    } 
    if (sum(is.na(data)) > 0) 
     stop("Missing values exist in the data") 
    ischar <- rep(0, dim(data)[2]) 
    for (i in 1:dim(data)[2]) if (is.character(data[, i])) 
     data[, i] <- as.factor(data[, i]) 
    if (!is.numeric(distance)) { 
     fn1 <- paste("distance2", distance, sep = "") 
     if (!exists(fn1)) 
      stop(distance, "not supported.") 
    } 
    if (is.numeric(distance)) { 
     fn1 <- "distance2user" 
    } 
    fn2 <- paste("matchit2", method, sep = "") 
    if (!exists(fn2)) 
     stop(method, "not supported.") 
    tryerror <- try(model.frame(formula), TRUE) 
    if (distance %in% c("GAMlogit", "GAMprobit", "GAMcloglog", 
     "GAMlog", "GAMcauchit")) { 
     requireNamespace("mgcv") 
     tt <- terms(mgcv::interpret.gam(formula)$fake.formula) 
    } 
    else { 
     tt <- terms(formula) 
    } 
    attr(tt, "intercept") <- 0 
    mf <- model.frame(tt, data) 
    treat <- model.response(mf) 
    X <- model.matrix(tt, data = mf) 
    if (method == "exact") { 
     distance <- out1 <- discarded <- NULL 
     if (!is.null(distance)) 
      warning("distance is set to `NULL' when exact matching is used.") 
    } 
    else if (is.numeric(distance)) { 
     out1 <- NULL 
     discarded <- discard(treat, distance, discard, X) 
    } 
    else { 
     if (is.null(distance.options$formula)) 
      distance.options$formula <- formula 
     if (is.null(distance.options$data)) 
      distance.options$data <- data 
     out1 <- do.call(fn1, distance.options) 
     discarded <- discard(treat, out1$distance, discard, X) 
     if (reestimate) { 
      distance.options$data <- data[!discarded, ] 
      distance.options$weights <- distance.options$weights[!discarded] 
      tmp <- out1 
      out1 <- do.call(fn1, distance.options) 
      tmp$distance[!discarded] <- out1$distance 
      out1$distance <- tmp$distance 
     } 
     distance <- out1$distance 
    } 
    if (fn1 == "distance2mahalanobis") { 
     is.full.mahalanobis <- TRUE 
    } 
    else { 
     is.full.mahalanobis <- FALSE 
    } 
    out2 <- do.call(fn2, list(treat, X, data, distance = distance, 
     discarded, is.full.mahalanobis = is.full.mahalanobis, 
     ...)) 
    if (fn1 == "distance2mahalanobis") { 
     distance[1:length(distance)] <- NA 
     class(out2) <- c("matchit.mahalanobis", "matchit") 
    } 
    out2$call <- mcall 
    out2$model <- out1$model 
    out2$formula <- formula 
    out2$treat <- treat 
    if (is.null(out2$X)) { 
     out2$X <- X 
    } 
    out2$distance <- distance 
    out2$discarded <- discarded 
    nn <- matrix(0, ncol = 2, nrow = 4) 
    nn[1, ] <- c(sum(out2$treat == 0), sum(out2$treat == 1)) 
    nn[2, ] <- c(sum(out2$treat == 0 & out2$weights > 0), sum(out2$treat == 
     1 & out2$weights > 0)) 
    nn[3, ] <- c(sum(out2$treat == 0 & out2$weights == 0 & out2$discarded == 
     0), sum(out2$treat == 1 & out2$weights == 0 & out2$discarded == 
     0)) 
    nn[4, ] <- c(sum(out2$treat == 0 & out2$weights == 0 & out2$discarded == 
     1), sum(out2$treat == 1 & out2$weights == 0 & out2$discarded == 
     1)) 
    dimnames(nn) <- list(c("All", "Matched", "Unmatched", "Discarded"), 
     c("Control", "Treated")) 
    out2$nn <- nn 
    return(out2) 
} 
<bytecode: 0x0000000086d6e158> 
<environment: namespace:MatchIt> 

回答

3

首先注意它是不是就是這兩種情況,但事實之間的關鍵區別subsittution在非替代情況下,代碼定義了在函數中調用matchit而在替換中的公式它會在該函數外定義公式。在這兩種情況下,如果公式是在函數外部定義的,那麼它將會失敗,並且在兩種情況下,如果公式是在函數內部定義的,它將會起作用。

的問題是,因爲這個公式在示例中的功能之外定義公式的環境是全球環境

environment(WL.FM) 
## <environment: R_GlobalEnv> 

,而我們希望它是在匿名函數中的局部環境中,它用來。

1)試試這個:

m.match.1 <- lapply(WL, function(x) { 
    WL.FM <- reformulate(wl.cov, response = "group") 
    matchit(WL.FM, data = x) 
}) 
match <- lapply(m.match.1, match.data) 

2),或者如果你不想來定義函數的公式嘗試這種另類:

WL.FM <- reformulate(wl.cov, response = "group") 
m.match.1 <- lapply(WL, function(x) { 
    environment(WL.FM) <- environment() 
    matchit(WL.FM, data = x) 
}) 
match <- lapply(m.match.1, match.data) 

2A)重置環境的另一種方法是將公式轉換爲字符,然後返回到公式:

WL.FM <- reformulate(wl.cov, response = "group") 
m.match.1 <- lapply(WL, function(x) { 
    WL.FM <- formula(format(WL.FM)) 
    matchit(WL.FM, data = x) 
}) 
match <- lapply(m.match.1, match.data) 

3)另一種說法是將WL.FM定義爲字符串而不是公式對象。那麼它沒有環境。將其轉換爲一個公式中的匿名函數在這種情況下,它的enviornment將默認有:

WL.FM <- format(reformulate(wl.cov, response = "group")) # character 
m.match.1 <- lapply(WL, function(x) matchit(formula(WL.FM), data = x)) 
match <- lapply(m.match.1, match.data) 

注:雖然沒有涉及到的關鍵問題,從風格來看,在上面我們已經刪除require聲明。在代碼頂部使用一個library聲明,並且不要使用require,除非它在if聲明中。 - if (require(...)) ...如果要加載的程序包不可用,您希望代碼儘早失敗。

此外,我們已將lapply代碼更改爲遍歷WLm.match.1,而不是遍歷每種情況下的下標。