R 如何处理完全拟合线性模型

R 如何处理完全拟合线性模型,r,R,我处理的数据偶尔会有一个“完美拟合”的线性模型。对于我运行的每个回归,我都需要提取r.squared值,我一直在使用summary(mymodel)$r.squared,但如果是完美拟合的模型,这将失败(见下文) 您可以使用tryCatch df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1)) mymodel <- lm(y ~ x, data = df) summary(mymodel)$r.squared

我处理的数据偶尔会有一个“完美拟合”的线性模型。对于我运行的每个回归,我都需要提取r.squared值,我一直在使用
summary(mymodel)$r.squared
,但如果是完美拟合的模型,这将失败(见下文)


您可以使用
tryCatch

df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1))
      mymodel <- lm(y ~ x, data = df)
      summary(mymodel)$r.squared #This raises a warning

tryCatch(summary(mymodel)$r.squared, warning = function(w) return(1))
# [1] 1

df捕捉完美拟合的一个选项是确定残差:如果是完美拟合,残差之和将为零

x = 1:5

# generate 3 sets of y values, last set is random values
y = matrix(data = c(rep(1,5),1:5,rnorm(5)), nrow = 5)
tolerance = 0.0001
r.sq = array(NA,ncol(y))

# check fit for three sets
for (i in 1:ncol(y)){
  fit = lm(y[,i]~x)

  # determine sum of residuals
  if (sum(abs(resid(fit))) < tolerance) {

    # perfect fit case
    r.sq[i] = 1 } else { 

      # non-perfect fit case
      r.sq[i] = summary(fit)$r.squared
  }
}

print(r.sq)
# [1] 1.0000000 1.0000000 0.7638879
x=1:5
#生成3组y值,最后一组为随机值
y=矩阵(数据=c(代表(1,5),1:5,形式(5)),nrow=5)
公差=0.0001
r、 sq=阵列(NA,ncol(y))
#检查是否适合三套
适用于(i/1:ncol(y)){
拟合=lm(y[,i]~x)
#确定残差之和
if(总和(绝对值(剩余(拟合))<公差){
#合身的箱子
r、 sq[i]=1}else{
#非完美合身箱
r、 sq[i]=汇总(拟合)$r.平方
}
}
印刷品(r.sq)
# [1] 1.0000000 1.0000000 0.7638879

如果您想确保一切正常运行,只需稍微修改源代码(键入summary.lm以查看原始代码):

运行新公式,查看它现在是否可以正常工作,并且没有任何警告。如果需要
或条件,则无其他

> summary2(mymodel)$r.squared 
[1] 1

如果你只是不想看到警告,你可以用
suppressWarnings()
来包装它。完美的匹配将有0个残差:
If(sum(resid(mymodel))怎么样
rsquared@rawr你的答案更符合我的要求。唯一的问题是,你的方法会假设
lm
引发的任何警告都是“完美匹配”吗警告?换句话说,lm提出的任何警告是否会导致rsquared=1?撇开将r.sq用于任何类型的诊断测试都是一个非常糟糕的想法这一事实不谈,您的示例并不完美(警告具有误导性).在该示例中,x没有影响;警告告诉您,基本上,x的估计影响应该正好为0,但这不是因为机器精度错误。在这种情况下,R.sq肯定不是1。
df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1))
mymodel <- lm(y ~ x, data = df)
summary(mymodel)$r.squared #This raises a warning

f <- function(expr) {
  tryCatch(expr, 
           warning = function(w) {
             if (grepl('perfect fit', w))
               return(1) 
             else return(w)
           })  
}

f(TRUE)
# [1] TRUE

f(sum(1:5))
# [1] 15

f(summary(mymodel)$r.squared)
# [1] 1

f(warning('this is not a fit warning'))
# <simpleWarning in doTryCatch(return(expr), name, parentenv, handler): this is not a fit warning>
x = 1:5

# generate 3 sets of y values, last set is random values
y = matrix(data = c(rep(1,5),1:5,rnorm(5)), nrow = 5)
tolerance = 0.0001
r.sq = array(NA,ncol(y))

# check fit for three sets
for (i in 1:ncol(y)){
  fit = lm(y[,i]~x)

  # determine sum of residuals
  if (sum(abs(resid(fit))) < tolerance) {

    # perfect fit case
    r.sq[i] = 1 } else { 

      # non-perfect fit case
      r.sq[i] = summary(fit)$r.squared
  }
}

print(r.sq)
# [1] 1.0000000 1.0000000 0.7638879
df <- data.frame(x = c(1,2,3,4,5), y = c(1,1,1,1,1))
mymodel <- lm(y ~ x, data = df)
summary2 <- function (object, correlation = FALSE, symbolic.cor = FALSE, 
                      ...) 
{
  z <- object
  p <- z$rank
  rdf <- z$df.residual
  if (p == 0) {
    r <- z$residuals
    n <- length(r)
    w <- z$weights
    if (is.null(w)) {
      rss <- sum(r^2)
    }
    else {
      rss <- sum(w * r^2)
      r <- sqrt(w) * r
    }
    resvar <- rss/rdf
    ans <- z[c("call", "terms", if (!is.null(z$weights)) "weights")]
    class(ans) <- "summary.lm"
    ans$aliased <- is.na(coef(object))
    ans$residuals <- r
    ans$df <- c(0L, n, length(ans$aliased))
    ans$coefficients <- matrix(NA, 0L, 4L)
    dimnames(ans$coefficients) <- list(NULL, c("Estimate", 
                                               "Std. Error", "t value", "Pr(>|t|)"))
    ans$sigma <- sqrt(resvar)
    ans$r.squared <- ans$adj.r.squared <- 0
    return(ans)
  }
  if (is.null(z$terms)) 
    stop("invalid 'lm' object:  no 'terms' component")
  if (!inherits(object, "lm")) 
    warning("calling summary.lm(<fake-lm-object>) ...")
  Qr <- qr(object)
  n <- NROW(Qr$qr)
  if (is.na(z$df.residual) || n - p != z$df.residual) 
    warning("residual degrees of freedom in object suggest this is not an \"lm\" fit")
  r <- z$residuals
  f <- z$fitted.values
  w <- z$weights
  if (is.null(w)) {
    mss <- if (attr(z$terms, "intercept")) 
      sum((f - mean(f))^2)
    else sum(f^2)
    rss <- sum(r^2)
  }
  else {
    mss <- if (attr(z$terms, "intercept")) {
      m <- sum(w * f/sum(w))
      sum(w * (f - m)^2)
    }
    else sum(w * f^2)
    rss <- sum(w * r^2)
    r <- sqrt(w) * r
  }
  resvar <- rss/rdf
  p1 <- 1L:p
  R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
  se <- sqrt(diag(R) * resvar)
  est <- z$coefficients[Qr$pivot[p1]]
  tval <- est/se
  ans <- z[c("call", "terms", if (!is.null(z$weights)) "weights")]
  ans$residuals <- r
  ans$coefficients <- cbind(est, se, tval, 2 * pt(abs(tval), 
                                                  rdf, lower.tail = FALSE))
  dimnames(ans$coefficients) <- list(names(z$coefficients)[Qr$pivot[p1]], 
                                     c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))
  ans$aliased <- is.na(coef(object))
  ans$sigma <- sqrt(resvar)
  ans$df <- c(p, rdf, NCOL(Qr$qr))
  if (p != attr(z$terms, "intercept")) {
    df.int <- if (attr(z$terms, "intercept")) 
      1L
    else 0L
    ans$r.squared <- mss/(mss + rss)
    ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n - 
                                                       df.int)/rdf)
    ans$fstatistic <- c(value = (mss/(p - df.int))/resvar, 
                        numdf = p - df.int, dendf = rdf)
  }
  else ans$r.squared <- ans$adj.r.squared <- 0
  ans$cov.unscaled <- R
  dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1, 
                                                             1)]

  #below is the only change to the code
  #instead of ans$r.squared <- 1 the original code had a warning
  if (is.finite(resvar) && resvar < (mean(f)^2 + var(f)) * 
        1e-30) {
    ans$r.squared <- 1 #this is practically the only change in the source code. Originally it had the warning here
  }
  #moved the above lower in the order of the code so as not to affect the original code
  #checked it and seems to be working properly

  if (correlation) {
    ans$correlation <- (R * resvar)/outer(se, se)
    dimnames(ans$correlation) <- dimnames(ans$cov.unscaled)
    ans$symbolic.cor <- symbolic.cor
  }
  if (!is.null(z$na.action)) 
    ans$na.action <- z$na.action
  class(ans) <- "summary.lm"
  ans

}
> summary2(mymodel)$r.squared 
[1] 1