R 将省略号用作输入变量时向函数添加描述性统计信息

R 将省略号用作输入变量时向函数添加描述性统计信息,r,function,regression,R,Function,Regression,对于分配,我在R中创建了一个函数,用于计算多元线性回归有用的回归系数、预测值和数据残差。它这样做如下: MLR <- function(y_var, ...){ y <- y_var X <- as.matrix(cbind(...)) intercept <- rep(1, length(y)) X <- cbind(intercept, X) regression_coef <- solve(t(

对于分配,我在R中创建了一个函数,用于计算多元线性回归有用的回归系数、预测值和数据残差。它这样做如下:

MLR <- function(y_var, ...){  
  
  y <- y_var  
  X <- as.matrix(cbind(...))  
  
  intercept <- rep(1, length(y)) 
  
  X <- cbind(intercept, X) 
  
  regression_coef <- solve(t(X) %*% X) %*% t(X) %*% y  
  
  predicted_val <- X %*% regression_coef 
  
  residual_val <- y - predicted_val 
 
  
  scatterplot <- plot(predicted_val, residual_val,
                      ylab = 'Residuals', xlab = 'Predicted values',
                      main = 'Predicted values against the residuals',
                      abline(0,0))
 
  list('y' = y, 
       'X' = X, 
       'Regression coefficients' = regression_coef,
       'Predicted values' = predicted_val, 
       'Residuals' = residual_val,
       'Scatterplot' = scatterplot
       )
}
不起作用


谢谢你的回复

尝试对您的功能进行此细微更改。我已经应用了
iris
dataset的一些变量。您可以在
X
上计算所需的统计信息,然后作为输出的附加插槽输出。代码如下:

#Function
MLR <- function(y_var, ...){  
  
  y <- y_var
  X <- as.matrix(cbind(...))  
  RX <- X
  
  intercept <- rep(1, length(y)) 
  
  X <- cbind(intercept, X) 
  
  regression_coef <- solve(t(X) %*% X) %*% t(X) %*% y  
  
  predicted_val <- X %*% regression_coef 
  
  residual_val <- y - predicted_val 
  
  
  scatterplot <- plot(predicted_val, residual_val,
                      ylab = 'Residuals', xlab = 'Predicted values',
                      main = 'Predicted values against the residuals',
                      abline(0,0))
  
  #Summary
  #Stats
  DMeans <- apply(RX,2,mean,na.rm=T)
  DSD <- apply(RX,2,sd,na.rm=T)
  DVar <- apply(RX,2,var,na.rm=T)
  DSummary <- rbind(DMeans,DSD,DVar)
  #Out
  list('y' = y, 
       'X' = X, 
       'Regression coefficients' = regression_coef,
       'Predicted values' = predicted_val, 
       'Residuals' = residual_val,
       'Scatterplot' = scatterplot,
       'Summary' = DSummary
  )
}
#Apply
MLR(y_var = iris$Sepal.Length,iris$Sepal.Width,iris$Petal.Length)

我想我明白了。不幸的是,使用省略号似乎很奇怪。检查cbind(…)是否在函数中正确运行(当我在输出中检查它时,它只有1列宽,而我在其中输入了2个变量,这似乎不正确)

我的解决方案不读取变量名-它使用占位符名(Var_1,Var_2,…,Var_n)


MLR
#Function
MLR <- function(y_var, ...){  
  
  y <- y_var
  X <- as.matrix(cbind(...))  
  RX <- X
  
  intercept <- rep(1, length(y)) 
  
  X <- cbind(intercept, X) 
  
  regression_coef <- solve(t(X) %*% X) %*% t(X) %*% y  
  
  predicted_val <- X %*% regression_coef 
  
  residual_val <- y - predicted_val 
  
  
  scatterplot <- plot(predicted_val, residual_val,
                      ylab = 'Residuals', xlab = 'Predicted values',
                      main = 'Predicted values against the residuals',
                      abline(0,0))
  
  #Summary
  #Stats
  DMeans <- apply(RX,2,mean,na.rm=T)
  DSD <- apply(RX,2,sd,na.rm=T)
  DVar <- apply(RX,2,var,na.rm=T)
  DSummary <- rbind(DMeans,DSD,DVar)
  #Out
  list('y' = y, 
       'X' = X, 
       'Regression coefficients' = regression_coef,
       'Predicted values' = predicted_val, 
       'Residuals' = residual_val,
       'Scatterplot' = scatterplot,
       'Summary' = DSummary
  )
}
#Apply
MLR(y_var = iris$Sepal.Length,iris$Sepal.Width,iris$Petal.Length)
$Scatterplot
NULL

$Summary
            [,1]     [,2]
DMeans 3.0573333 3.758000
DSD    0.4358663 1.765298
DVar   0.1899794 3.116278

MLR <- function(y_var, ...){  
  
  # these two packages will come in handy
  
  require(dplyr)
  require(tidyr)
  
  y <- y_var  
  X <- as.matrix(cbind(...))
  
  # firstly, we need to make df/tibble out of ellipsis
  
  X2 <- list(...)
  
  n <- tibble(n = rep(0, times = length(y)))
  
  index <- 0
  
  for(Var in X2){
    
    index <- index + 1
    n[, paste0("Var_", index)] <- Var
    
  }
  
  # after the df was created, now it's time for calculating desc
  # Using tidyr::gather with dplyr::summarize creates nice summary, 
  # where each row is another variable
  
  descriptives <- tidyr::gather(n, key = "Variable", value = "Value") %>%
    group_by(Variable) %>%
    summarize(mean = mean(Value), var = var(Value), sd = sd(Value), .groups = "keep")
  
  # everything except the output list is the same
  
  intercept <- rep(1, length(y)) 
  
  X <- cbind(intercept, X) 
  
  regression_coef <- solve(t(X) %*% X) %*% t(X) %*% y  
  
  predicted_val <- X %*% regression_coef 
  
  residual_val <- y - predicted_val 
  
  
  scatterplot <- plot(predicted_val, residual_val,
                      ylab = 'Residuals', xlab = 'Predicted values',
                      main = 'Predicted values against the residuals',
                      abline(0,0))
  
  
  list('y' = y, 
       'X' = X, 
       'Regression coefficients' = regression_coef,
       'Predicted values' = predicted_val, 
       'Residuals' = residual_val,
       'Scatterplot' = scatterplot,
       'descriptives' = descriptives[-1,] # need to remove the first row 
                                          # because it is "n" placeholder
  )
}