2014-12-19 23 views
2

我偶爾處理的數據有一個「完美擬合」的線性模型。對於我運行的每個迴歸,我需要提取我一直在用summary(mymodel)$r.squared進行的r.squared值,但是在完全擬合模型(參見下文)中失敗。如何處理完美契合的線性模型

df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1)) 
     mymodel <- lm(y ~ x, data = df) 
     summary(mymodel)$r.squared #This raises a warning 
     0.5294 

我該如何處理這些情況?基本上,我覺得我要像做

If(mymodel is a perfect fit) 
    rsquared = 1 
else 
    rsquared = summary(mymodel)$r.squared 
+1

如果你只是不希望看到的警告,你可以隨時將它包裝在'suppressWarnings()'。 – joran

+2

完美擬合將有0個殘差:'if(sum(resid(mymodel))<0.0001){rsq = 1}' – koekenbakker

+0

'rsquared < - tryCatch(summary(mymodel)$ r.squared,warning = function w)return(1))' – rawr

回答

2

您可以使用tryCatch

df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1)) 
     mymodel <- lm(y ~ x, data = df) 
     summary(mymodel)$r.squared #This raises a warning 

tryCatch(summary(mymodel)$r.squared, warning = function(w) return(1)) 
# [1] 1 

而且具有附加條件捕獲特定的警告

df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1)) 
mymodel <- lm(y ~ x, data = df) 
summary(mymodel)$r.squared #This raises a warning 

f <- function(expr) { 
    tryCatch(expr, 
      warning = function(w) { 
      if (grepl('perfect fit', w)) 
       return(1) 
      else return(w) 
      }) 
} 

f(TRUE) 
# [1] TRUE 

f(sum(1:5)) 
# [1] 15 

f(summary(mymodel)$r.squared) 
# [1] 1 

f(warning('this is not a fit warning')) 
# <simpleWarning in doTryCatch(return(expr), name, parentenv, handler): this is not a fit warning> 
0

一種選擇搭上完美擬合是確定殘差:如果它是一個完美擬合,殘差總和將爲零。

x = 1:5 

# generate 3 sets of y values, last set is random values 
y = matrix(data = c(rep(1,5),1:5,rnorm(5)), nrow = 5) 
tolerance = 0.0001 
r.sq = array(NA,ncol(y)) 

# check fit for three sets 
for (i in 1:ncol(y)){ 
    fit = lm(y[,i]~x) 

    # determine sum of residuals 
    if (sum(abs(resid(fit))) < tolerance) { 

    # perfect fit case 
    r.sq[i] = 1 } else { 

     # non-perfect fit case 
     r.sq[i] = summary(fit)$r.squared 
    } 
} 

print(r.sq) 
# [1] 1.0000000 1.0000000 0.7638879 
1

如果你想確保一切都將是完美的工作,那麼你可以稍稍修改源代碼(類型summary.lm看到原碼):

df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1)) 
mymodel <- lm(y ~ x, data = df) 

這是怎麼了我修改了它。除了函數底部的位之外,其他都與原始summary函數相同。

summary2 <- function (object, correlation = FALSE, symbolic.cor = FALSE, 
         ...) 
{ 
    z <- object 
    p <- z$rank 
    rdf <- z$df.residual 
    if (p == 0) { 
    r <- z$residuals 
    n <- length(r) 
    w <- z$weights 
    if (is.null(w)) { 
     rss <- sum(r^2) 
    } 
    else { 
     rss <- sum(w * r^2) 
     r <- sqrt(w) * r 
    } 
    resvar <- rss/rdf 
    ans <- z[c("call", "terms", if (!is.null(z$weights)) "weights")] 
    class(ans) <- "summary.lm" 
    ans$aliased <- is.na(coef(object)) 
    ans$residuals <- r 
    ans$df <- c(0L, n, length(ans$aliased)) 
    ans$coefficients <- matrix(NA, 0L, 4L) 
    dimnames(ans$coefficients) <- list(NULL, c("Estimate", 
               "Std. Error", "t value", "Pr(>|t|)")) 
    ans$sigma <- sqrt(resvar) 
    ans$r.squared <- ans$adj.r.squared <- 0 
    return(ans) 
    } 
    if (is.null(z$terms)) 
    stop("invalid 'lm' object: no 'terms' component") 
    if (!inherits(object, "lm")) 
    warning("calling summary.lm(<fake-lm-object>) ...") 
    Qr <- qr(object) 
    n <- NROW(Qr$qr) 
    if (is.na(z$df.residual) || n - p != z$df.residual) 
    warning("residual degrees of freedom in object suggest this is not an \"lm\" fit") 
    r <- z$residuals 
    f <- z$fitted.values 
    w <- z$weights 
    if (is.null(w)) { 
    mss <- if (attr(z$terms, "intercept")) 
     sum((f - mean(f))^2) 
    else sum(f^2) 
    rss <- sum(r^2) 
    } 
    else { 
    mss <- if (attr(z$terms, "intercept")) { 
     m <- sum(w * f/sum(w)) 
     sum(w * (f - m)^2) 
    } 
    else sum(w * f^2) 
    rss <- sum(w * r^2) 
    r <- sqrt(w) * r 
    } 
    resvar <- rss/rdf 
    p1 <- 1L:p 
    R <- chol2inv(Qr$qr[p1, p1, drop = FALSE]) 
    se <- sqrt(diag(R) * resvar) 
    est <- z$coefficients[Qr$pivot[p1]] 
    tval <- est/se 
    ans <- z[c("call", "terms", if (!is.null(z$weights)) "weights")] 
    ans$residuals <- r 
    ans$coefficients <- cbind(est, se, tval, 2 * pt(abs(tval), 
                rdf, lower.tail = FALSE)) 
    dimnames(ans$coefficients) <- list(names(z$coefficients)[Qr$pivot[p1]], 
            c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) 
    ans$aliased <- is.na(coef(object)) 
    ans$sigma <- sqrt(resvar) 
    ans$df <- c(p, rdf, NCOL(Qr$qr)) 
    if (p != attr(z$terms, "intercept")) { 
    df.int <- if (attr(z$terms, "intercept")) 
     1L 
    else 0L 
    ans$r.squared <- mss/(mss + rss) 
    ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n - 
                 df.int)/rdf) 
    ans$fstatistic <- c(value = (mss/(p - df.int))/resvar, 
         numdf = p - df.int, dendf = rdf) 
    } 
    else ans$r.squared <- ans$adj.r.squared <- 0 
    ans$cov.unscaled <- R 
    dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1, 
                  1)] 

    #below is the only change to the code 
    #instead of ans$r.squared <- 1 the original code had a warning 
    if (is.finite(resvar) && resvar < (mean(f)^2 + var(f)) * 
     1e-30) { 
    ans$r.squared <- 1 #this is practically the only change in the source code. Originally it had the warning here 
    } 
    #moved the above lower in the order of the code so as not to affect the original code 
    #checked it and seems to be working properly 

    if (correlation) { 
    ans$correlation <- (R * resvar)/outer(se, se) 
    dimnames(ans$correlation) <- dimnames(ans$cov.unscaled) 
    ans$symbolic.cor <- symbolic.cor 
    } 
    if (!is.null(z$na.action)) 
    ans$na.action <- z$na.action 
    class(ans) <- "summary.lm" 
    ans 

} 

運行新公式,看到它現在沒有任何警告。沒有其他ifelse if條件是必需的。

> summary2(mymodel)$r.squared 
[1] 1