R 将省略号用作输入变量时向函数添加描述性统计信息
对于分配,我在R中创建了一个函数,用于计算多元线性回归有用的回归系数、预测值和数据残差。它这样做如下:R 将省略号用作输入变量时向函数添加描述性统计信息,r,function,regression,R,Function,Regression,对于分配,我在R中创建了一个函数,用于计算多元线性回归有用的回归系数、预测值和数据残差。它这样做如下: MLR <- function(y_var, ...){ y <- y_var X <- as.matrix(cbind(...)) intercept <- rep(1, length(y)) X <- cbind(intercept, X) regression_coef <- solve(t(
MLR <- function(y_var, ...){
y <- y_var
X <- as.matrix(cbind(...))
intercept <- rep(1, length(y))
X <- cbind(intercept, X)
regression_coef <- solve(t(X) %*% X) %*% t(X) %*% y
predicted_val <- X %*% regression_coef
residual_val <- y - predicted_val
scatterplot <- plot(predicted_val, residual_val,
ylab = 'Residuals', xlab = 'Predicted values',
main = 'Predicted values against the residuals',
abline(0,0))
list('y' = y,
'X' = X,
'Regression coefficients' = regression_coef,
'Predicted values' = predicted_val,
'Residuals' = residual_val,
'Scatterplot' = scatterplot
)
}
不起作用
谢谢你的回复 尝试对您的功能进行此细微更改。我已经应用了
iris
dataset的一些变量。您可以在X
上计算所需的统计信息,然后作为输出的附加插槽输出。代码如下:
#Function
MLR <- function(y_var, ...){
y <- y_var
X <- as.matrix(cbind(...))
RX <- X
intercept <- rep(1, length(y))
X <- cbind(intercept, X)
regression_coef <- solve(t(X) %*% X) %*% t(X) %*% y
predicted_val <- X %*% regression_coef
residual_val <- y - predicted_val
scatterplot <- plot(predicted_val, residual_val,
ylab = 'Residuals', xlab = 'Predicted values',
main = 'Predicted values against the residuals',
abline(0,0))
#Summary
#Stats
DMeans <- apply(RX,2,mean,na.rm=T)
DSD <- apply(RX,2,sd,na.rm=T)
DVar <- apply(RX,2,var,na.rm=T)
DSummary <- rbind(DMeans,DSD,DVar)
#Out
list('y' = y,
'X' = X,
'Regression coefficients' = regression_coef,
'Predicted values' = predicted_val,
'Residuals' = residual_val,
'Scatterplot' = scatterplot,
'Summary' = DSummary
)
}
#Apply
MLR(y_var = iris$Sepal.Length,iris$Sepal.Width,iris$Petal.Length)
我想我明白了。不幸的是,使用省略号似乎很奇怪。检查cbind(…)是否在函数中正确运行(当我在输出中检查它时,它只有1列宽,而我在其中输入了2个变量,这似乎不正确) 我的解决方案不读取变量名-它使用占位符名(Var_1,Var_2,…,Var_n)
MLR
#Function
MLR <- function(y_var, ...){
y <- y_var
X <- as.matrix(cbind(...))
RX <- X
intercept <- rep(1, length(y))
X <- cbind(intercept, X)
regression_coef <- solve(t(X) %*% X) %*% t(X) %*% y
predicted_val <- X %*% regression_coef
residual_val <- y - predicted_val
scatterplot <- plot(predicted_val, residual_val,
ylab = 'Residuals', xlab = 'Predicted values',
main = 'Predicted values against the residuals',
abline(0,0))
#Summary
#Stats
DMeans <- apply(RX,2,mean,na.rm=T)
DSD <- apply(RX,2,sd,na.rm=T)
DVar <- apply(RX,2,var,na.rm=T)
DSummary <- rbind(DMeans,DSD,DVar)
#Out
list('y' = y,
'X' = X,
'Regression coefficients' = regression_coef,
'Predicted values' = predicted_val,
'Residuals' = residual_val,
'Scatterplot' = scatterplot,
'Summary' = DSummary
)
}
#Apply
MLR(y_var = iris$Sepal.Length,iris$Sepal.Width,iris$Petal.Length)
$Scatterplot
NULL
$Summary
[,1] [,2]
DMeans 3.0573333 3.758000
DSD 0.4358663 1.765298
DVar 0.1899794 3.116278
MLR <- function(y_var, ...){
# these two packages will come in handy
require(dplyr)
require(tidyr)
y <- y_var
X <- as.matrix(cbind(...))
# firstly, we need to make df/tibble out of ellipsis
X2 <- list(...)
n <- tibble(n = rep(0, times = length(y)))
index <- 0
for(Var in X2){
index <- index + 1
n[, paste0("Var_", index)] <- Var
}
# after the df was created, now it's time for calculating desc
# Using tidyr::gather with dplyr::summarize creates nice summary,
# where each row is another variable
descriptives <- tidyr::gather(n, key = "Variable", value = "Value") %>%
group_by(Variable) %>%
summarize(mean = mean(Value), var = var(Value), sd = sd(Value), .groups = "keep")
# everything except the output list is the same
intercept <- rep(1, length(y))
X <- cbind(intercept, X)
regression_coef <- solve(t(X) %*% X) %*% t(X) %*% y
predicted_val <- X %*% regression_coef
residual_val <- y - predicted_val
scatterplot <- plot(predicted_val, residual_val,
ylab = 'Residuals', xlab = 'Predicted values',
main = 'Predicted values against the residuals',
abline(0,0))
list('y' = y,
'X' = X,
'Regression coefficients' = regression_coef,
'Predicted values' = predicted_val,
'Residuals' = residual_val,
'Scatterplot' = scatterplot,
'descriptives' = descriptives[-1,] # need to remove the first row
# because it is "n" placeholder
)
}