2015-03-02 54 views
2

我想在d中使用dglm包來適應雙glm。這與statmod包一起使用,以使用tweedie模型。問題的再現是:R包中的dglm

library(dglm) 
library(statmod) 

p <- 1.5 
y <- runif(10) 
x <- runif(10) 

dglm(y~x,~x,family=tweedie(link.power=0, var.power=p)) 
#doesnt work 

dglm(y~x,~x,family=tweedie(link.power=0, var.power=1.5)) 
#works 

var.power需要一個變量來定義,因爲我想用一個循環,dglm在它的每個條目運行

+0

Thi s是包中的一個明確的錯誤。您應該通過此報告聯繫作者。它與作者使用'match.call'評估參數的方式有關。如果我能拿出一個解決方案,我會發布解決方法。 – nograpes 2015-03-02 14:10:05

+0

我已經在dglm.constant中隔離了這個問題: 匹配(tweedie.p,c(0,1,2,3),nomatch = 0) – Acarbalacar 2015-03-02 14:24:43

+0

確實,但這是問題,因爲你傳遞給'tweedie'函數的'power'參數從來沒有在'dglm'函數中被評估過。我在下面解釋。 – nograpes 2015-03-02 14:31:49

回答

2

所以,你可以修復通過迫使dglm評估您輸入p時的呼叫。在dglm功能,在約行73:

if (family$family == "Tweedie") { 
    tweedie.p <- call$family$var.power 
} 

應該是:

if (family$family == "Tweedie") { 
    tweedie.p <- eval(call$family$var.power) 
} 

你可以讓自己的功能與修補程序是這樣的:

dglm.nograpes <- function (formula = formula(data), dformula = ~1, family = gaussian, 
    dlink = "log", data = sys.parent(), subset = NULL, weights = NULL, 
    contrasts = NULL, method = "ml", mustart = NULL, betastart = NULL, 
    etastart = NULL, phistart = NULL, control = dglm.control(...), 
    ykeep = TRUE, xkeep = FALSE, zkeep = FALSE, ...) 
{ 
    call <- match.call() 
    if (is.character(family)) 
     family <- get(family, mode = "function", envir = parent.frame()) 
    if (is.function(family)) 
     family <- family() 
    if (is.null(family$family)) { 
     print(family) 
     stop("'family' not recognized") 
    } 
    mnames <- c("", "formula", "data", "weights", "subset") 
    cnames <- names(call) 
    cnames <- cnames[match(mnames, cnames, 0)] 
    mcall <- call[cnames] 
    mcall[[1]] <- as.name("model.frame") 
    mframe <<- eval(mcall, sys.parent()) 
    mf <- match.call(expand.dots = FALSE) 
    y <- model.response(mframe, "numeric") 
    if (is.null(dim(y))) { 
     N <- length(y) 
    } 
    else { 
     N <- dim(y)[1] 
    } 
    nobs <- N 
    mterms <- attr(mframe, "terms") 
    X <- model.matrix(mterms, mframe, contrasts) 
    weights <- model.weights(mframe) 
    if (is.null(weights)) 
     weights <- rep(1, N) 
    if (is.null(weights)) 
     weights <- rep(1, N) 
    if (!is.null(weights) && any(weights < 0)) { 
     stop("negative weights not allowed") 
    } 
    offset <- model.offset(mframe) 
    if (is.null(offset)) 
     offset <- rep(0, N) 
    if (!is.null(offset) && length(offset) != NROW(y)) { 
     stop(gettextf("number of offsets is %d should equal %d (number of observations)", 
      length(offset), NROW(y)), domain = NA) 
    } 
    mcall$formula <- formula 
    mcall$formula[3] <- switch(match(length(dformula), c(0, 2, 
     3)), 1, dformula[2], dformula[3]) 
    mframe <- eval(mcall, sys.parent()) 
    dterms <- attr(mframe, "terms") 
    Z <- model.matrix(dterms, mframe, contrasts) 
    doffset <- model.extract(mframe, offset) 
    if (is.null(doffset)) 
     doffset <- rep(0, N) 
    name.dlink <- substitute(dlink) 
    if (is.name(name.dlink)) { 
     if (is.character(dlink)) { 
      name.dlink <- dlink 
     } 
     else { 
      dlink <- name.dlink <- as.character(name.dlink) 
     } 
    } 
    else { 
     if (is.call(name.dlink)) 
      name.dlink <- deparse(name.dlink) 
    } 
    if (!is.null(name.dlink)) 
     name.dlink <- name.dlink 
    if (family$family == "Tweedie") { 
     tweedie.p <- eval(call$family$var.power) 
    } 
    Digamma <- family$family == "Gamma" || (family$family == 
     "Tweedie" && tweedie.p == 2) 
    if (Digamma) { 
     linkinv <- make.link(name.dlink)$linkinv 
     linkfun <- make.link(name.dlink)$linkfun 
     mu.eta <- make.link(name.dlink)$mu.eta 
     valid.eta <- make.link(name.dlink)$valid.eta 
     init <- expression({ 
      if (any(y <= 0)) { 
       print(y) 
       print(any(y <= 0)) 
       stop("non-positive values not allowed for the DM gamma family") 
      } 
      n <- rep.int(1, nobs) 
      mustart <- y 
     }) 
     dfamily <- structure(list(family = "Digamma", variance = varfun.digamma, 
      dev.resids = function(y, mu, wt) { 
       wt * unitdeviance.digamma(y, mu) 
      }, aic = function(y, n, mu, wt, dev) NA, link = name.dlink, 
      linkfun = linkfun, linkinv = linkinv, mu.eta = mu.eta, 
      initialize = init, validmu = function(mu) { 
       all(mu > 0) 
      }, valideta = valid.eta)) 
    } 
    else { 
     eval(substitute(dfamily <- Gamma(link = lk), list(lk = name.dlink))) 
    } 
    dlink <- as.character(dfamily$link) 
    logdlink <- dlink == "log" 
    if (!is.null(call$method)) { 
     name.method <- substitute(method) 
     if (!is.character(name.method)) 
      name.method <- deparse(name.method) 
     list.methods <- c("ml", "reml", "ML", "REML", "Ml", "Reml") 
     i.method <- pmatch(method, list.methods, nomatch = 0) 
     if (!i.method) 
      stop("Method must be ml or reml") 
     method <- switch(i.method, "ml", "reml", "ml", "reml", 
      "ml", "reml") 
    } 
    reml <- method == "reml" 
    if (is.null(mustart)) { 
     etastart <- NULL 
     eval(family$initialize) 
     mu <- mustart 
     mustart <- NULL 
    } 
    if (!is.null(betastart)) { 
     eta <- X %*% betastart 
     mu <- family$linkinv(eta + offset) 
    } 
    else { 
     if (!is.null(mustart)) { 
      mu <- mustart 
      eta <- family$linkfun(mu) - offset 
     } 
     else { 
      eta <- lm.fit(X, family$linkfun(mu) - offset, singular.ok = TRUE)$fitted.values 
      mu <- family$linkinv(eta + offset) 
     } 
    } 
    d <- family$dev.resids(y, mu, weights) 
    if (!is.null(phistart)) { 
     phi <- phistart 
     deta <- dfamily$linkfun(phi) - doffset 
    } 
    else { 
     deta <- lm.fit(Z, dfamily$linkfun(d + (d == 0)/6) - doffset, 
      singular.ok = TRUE)$fitted.values 
     if (logdlink) 
      deta <- deta + 1.27036 
     phi <- dfamily$linkinv(deta + offset) 
    } 
    if (any(phi <= 0)) { 
     cat("Some values for phi are non-positive, suggesting an inappropriate model", 
      "Try a different link function.\n") 
    } 
    zm <- as.vector(eta + (y - mu)/family$mu.eta(eta)) 
    wm <- as.vector(eval(family$variance(mu)) * weights/phi) 
    mfit <- lm.wfit(X, zm, wm, method = "qr", singular.ok = TRUE) 
    eta <- mfit$fitted.values 
    mu <- family$linkinv(eta + offset) 
    cat("family:", family$family, "\n") 
    if (family$family == "Tweedie") { 
     cat("p:", tweedie.p, "\n") 
     if ((tweedie.p > 0) & (any(mu < 0))) { 
      cat("Some values for mu are negative, suggesting an inappropriate model.", 
       "Try a different link function.\n") 
     } 
    } 
    d <- family$dev.resids(y, mu, weights) 
    const <- dglm.constant(y, family, weights) 
    if (Digamma) { 
     h <- 2 * (lgamma(weights/phi) + (1 + log(phi/weights)) * 
      weights/phi) 
    } 
    else { 
     h <- log(phi/weights) 
    } 
    m2loglik <- const + sum(h + d/phi) 
    if (reml) 
     m2loglik <- m2loglik + 2 * log(abs(prod(diag(mfit$R)))) 
    m2loglikold <- m2loglik + 1 
    epsilon <- control$epsilon 
    maxit <- control$maxit 
    trace <- control$trace 
    iter <- 0 
    while (abs(m2loglikold - m2loglik)/(abs(m2loglikold) + 1) > 
     epsilon && iter < maxit) { 
     hdot <- 1/dfamily$mu.eta(deta) 
     if (Digamma) { 
      delta <- 2 * weights * (log(weights/phi) - digamma(weights/phi)) 
      u <- 2 * weights^2 * (trigamma(weights/phi) - phi/weights) 
      fdot <- phi^2/u * hdot 
     } 
     else { 
      delta <- phi 
      u <- phi^2 
      fdot <- hdot 
     } 
     wd <- 1/(fdot^2 * u) 
     if (reml) { 
      h <- hat(mfit$qr) 
      delta <- delta - phi * h 
      wd <- wd - 2 * (h/hdot^2/phi^2) + h^2 
     } 
     if (any(wd < 0)) { 
      cat(" Some weights are negative; temporarily fixing. This may be a sign of an inappropriate model.\n") 
      wd[wd < 0] <- 0 
     } 
     if (any(is.infinite(wd))) { 
      cat(" Some weights are negative; temporarily fixing. This may be a sign of an inappropriate model.\n") 
      wd[is.infinite(wd)] <- 100 
     } 
     zd <- deta + (d - delta) * fdot 
     dfit <- lm.wfit(Z, zd, wd, method = "qr", singular.ok = TRUE) 
     deta <- dfit$fitted.values 
     phi <- dfamily$linkinv(deta + doffset) 
     if (any(is.infinite(phi))) { 
      cat("*** Some values for phi are infinite, suggesting an inappropriate model", 
       "Try a different link function. Making an attempt to continue...\n") 
      phi[is.infinite(phi)] <- 10 
     } 
     zm <- eta + (y - mu)/family$mu.eta(eta) 
     fam.wt <- expression(weights * family$variance(mu)) 
     wm <- eval(fam.wt)/phi 
     mfit <- lm.wfit(X, zm, wm, method = "qr", singular.ok = TRUE) 
     eta <- mfit$fitted.values 
     mu <- family$linkinv(eta + offset) 
     if (family$family == "Tweedie") { 
      if ((tweedie.p > 0) & (any(mu < 0))) { 
       cat("*** Some values for mu are negative, suggesting an inappropriate model.", 
        "Try a different link function. Making an attempt to continue...\n") 
       mu[mu <= 0] <- 1 
      } 
     } 
     d <- family$dev.resids(y, mu, weights) 
     m2loglikold <- m2loglik 
     if (Digamma) { 
      h <- 2 * (lgamma(weights/phi) + (1 + log(phi/weights)) * 
       weights/phi) 
     } 
     else { 
      h <- log(phi/weights) 
     } 
     m2loglik <- const + sum(h + d/phi) 
     if (reml) { 
      m2loglik <- m2loglik + 2 * log(abs(prod(diag(mfit$R)))) 
     } 
     iter <- iter + 1 
     if (trace) 
      cat("DGLM iteration ", iter, ": -2*log-likelihood = ", 
       format(round(m2loglik, 4)), " \n", sep = "") 
    } 
    mfit$formula <- call$formula 
    mfit$call <- call 
    mfit$family <- family 
    mfit$linear.predictors <- mfit$fitted.values + offset 
    mfit$fitted.values <- mu 
    mfit$prior.weights <- weights 
    mfit$terms <- mterms 
    mfit$contrasts <- attr(X, "contrasts") 
    intercept <- attr(mterms, "intercept") 
    mfit$df.null <- N - sum(weights == 0) - as.integer(intercept) 
    mfit$call <- call 
    mfit$deviance <- sum(d/phi) 
    mfit$aic <- NA 
    mfit$null.deviance <- glm.fit(x = X, y = y, weights = weights/phi, 
     offset = offset, family = family) 
    if (length(mfit$null.deviance) > 1) 
     mfit$null.deviance <- mfit$null.deviance$null.deviance 
    if (ykeep) 
     mfit$y <- y 
    if (xkeep) 
     mfit$x <- X 
    class(mfit) <- c("glm", "lm") 
    dfit$family <- dfamily 
    dfit$prior.weights <- rep(1, N) 
    dfit$linear.predictors <- dfit$fitted.values + doffset 
    dfit$fitted.values <- phi 
    dfit$terms <- dterms 
    dfit$aic <- NA 
    call$formula <- call$dformula 
    call$dformula <- NULL 
    call$family <- call(dfamily$family, link = name.dlink) 
    dfit$call <- call 
    dfit$residuals <- dfamily$dev.resid(d, phi, wt = rep(1/2, 
     N)) 
    dfit$deviance <- sum(dfit$residuals) 
    dfit$null.deviance <- glm.fit(x = Z, y = d, weights = rep(1/2, 
     N), offset = doffset, family = dfamily) 
    if (length(dfit$null.deviance) > 1) 
     dfit$null.deviance <- dfit$null.deviance$null.deviance 
    if (ykeep) 
     dfit$y <- d 
    if (zkeep) 
     dfit$z <- Z 
    dfit$formula <- as.vector(attr(dterms, "formula")) 
    dfit$iter <- iter 
    class(dfit) <- c("glm", "lm") 
    out <- c(mfit, list(dispersion.fit = dfit, iter = iter, method = method, 
     m2loglik = m2loglik)) 
    class(out) <- c("dglm", "glm", "lm") 
    out 
} 

,然後運行它像這樣:

dglm.nograpes(y~x,~x,family=tweedie(link.power=0, var.power=p)) 
+0

非常感謝。我永遠無法自己解決這個問題 – Acarbalacar 2015-03-02 14:35:12

+0

在我看來,真正的問題是'tweedie'不存儲'var.power'的值。如果是這樣,那麼你就不必依賴'dglm'中的函數調用。將'var.power'存儲到'tweedie'調用的輸出中會更加一致,然後直接引用該值。既然同一個作者寫了兩個函數,你應該真的聯繫他(戈登史密斯),以便他能解決這個問題。已經有了 – nograpes 2015-03-02 14:37:08

+0

。再次感謝你。 – Acarbalacar 2015-03-02 14:40:22