R 尝试为包创建函数,当存在分类变量时,该函数会自动绘制给定模型的变量响应
我试图创建一个函数,当至少一个变量是分类变量时,该函数将绘制所选变量的响应 当所有变量都是数值变量时,我通常会保持所有其他变量的均值,然后改变目标变量,以下是mtcars的一个示例:R 尝试为包创建函数,当存在分类变量时,该函数会自动绘制给定模型的变量响应,r,function,ggplot2,tidyverse,R,Function,Ggplot2,Tidyverse,我试图创建一个函数,当至少一个变量是分类变量时,该函数将绘制所选变量的响应 当所有变量都是数值变量时,我通常会保持所有其他变量的均值,然后改变目标变量,以下是mtcars的一个示例: library(tidyverse) data("mtcars") 首先,我将修改am变量,使其成为分类变量 mt2 <- mtcars %>% mutate(am = case_when(am == 0 ~ "Automatic", am == 1 ~ "Manual")) %>% sele
library(tidyverse)
data("mtcars")
首先,我将修改am变量,使其成为分类变量
mt2 <- mtcars %>% mutate(am = case_when(am == 0 ~ "Automatic", am == 1 ~ "Manual")) %>% select(mpg, am, wt, hp)
这就是我遇到麻烦的时候
当然,如果我在存在分类变量时尝试此操作,我会遇到问题,因为如果它尝试获取数据帧的分类值的平均值,则会失败:
model2 <- lm(mpg ~ wt + hp + am, data = mt2)
我会得到:
Error: variable 'am' was fitted with type "character" but type "numeric" was supplied
因此,我尝试了以下方法:
Plot_Response2 <- function(Model, variable){
# First I get the names of all categorical variables
Categoricals <- Model$model %>% mutate_if(is.factor, as.character) %>% select_if(is.character) %>% colnames()
# generate a data.frame with all the means copied 20 times for each level
Means <- Model$model %>% mutate_if(is.factor, as.character) %>% mutate_if(is.numeric, mean) %>% group_by_if(is.character) %>% summarise_if(is.numeric, mean) %>% ungroup()
Means <- Means[rep(seq_len(nrow(Means)), each = 20),] %>% arrange_if(is.character) %>% group_split(substitute(variable))
return(Means)
}
我会得到:
Error: variable 'am' was fitted with type "character" but type "numeric" was supplied
我为此编写了以下代码,但无法将其添加到函数中:
Means <- model2$model %>% mutate_if(is.factor, as.character) %>% mutate_if(is.numeric, mean) %>% group_by_if(is.character) %>% summarise_if(is.numeric, mean) %>% ungroup()
Means <- Means[rep(seq_len(nrow(Means)), each = 20),] %>% arrange_if(is.character) %>% group_split(am)
MinMax <- model2$model %>% select(wt) %>% pull(wt) %>% range()
MinMax <- seq(from = MinMax[1], to = MinMax[2], length.out = 20)
for(i in 1:length(Means)){
Means[[i]]$wt <- MinMax
}
Means <- bind_rows(Means)
Means$Predicted <- predict(model2, Means)
Means$SE <- predict(model2, Means, se.fit = T)$se.fit
ggplot(Means, aes(x = wt, y = Predicted)) + geom_ribbon(aes(ymax = Predicted + SE, ymin = Predicted - SE, fill = am), alpha = 0.5) + geom_line(aes(color = am)) + theme_classic()
我会得到:
Error: variable 'am' was fitted with type "character" but type "numeric" was supplied
同样,我使用了这段代码,这段代码似乎无法与函数2结合在一起
Means <- model2$model %>% mutate_if(is.factor, as.character) %>% mutate_if(is.numeric, mean) %>% group_by_if(is.character) %>% summarise_if(is.numeric, mean) %>% ungroup()
Means <- Means[rep(seq_len(nrow(Means)), each = 20),] %>% arrange_if(is.character) %>% group_split(am)
Means <- bind_rows(Means)
Means$Predicted <- predict(model2, Means)
Means$SE <- predict(model2, Means, se.fit = T)$se.fit
ggplot(Means, aes(x = am, y = Predicted)) + geom_errorbar(aes(ymin = Predicted - SE, ymax = Predicted + SE)) + geom_point() + theme_classic()
表示%mutate\u if(is.factor,as.character)%%>%mutate\u if(is.numeric,mean)%%>%groupby\u if(is.character)%%>%summary\u if(is.numeric,mean)%%>%ungroup()
表示%arrange\u如果(是.字符)%>%group\u拆分(am)
意思是这里有一个版本,它使用了更多的tidyverse函数,只是为了让事情变得更简单
Plot_Response <- function(Model, variable, N=20) {
model_data <- model.frame(Model)
stopifnot(variable %in% names(model_data))
# get all variables we need to dummy values for
all_vars <- model_data %>% select(-one_of(variable))
num_vars <- all_vars %>% select_if(is.numeric) %>% summarize_all(mean)
cat_vars <- all_vars %>% select_if(Negate(is.numeric)) %>% purrr::map(unique)
resp_var <- model_data %>% pull(variable)
if(is.numeric(resp_var)) {
resp_vals <- seq(min(resp_var), max(resp_var), length.out=N)
} else {
resp_vals <- unique(resp_var)
}
new_data <- tidyr::crossing(num_vars, !!!cat_vars, !!variable:=resp_vals)
pred <- broom::augment(Model, newdata = new_data, se_fit=TRUE)
## Plot the response
my_aes <- aes(x= !!sym(variable), y = .fitted)
if (length(cat_vars)==1) {
my_aes[["fill"]] <- sym(names(cat_vars))
} else if (length(cat_vars)>1) {
my_aes[["fill"]] <- quo(interaction(!!!syms(names(cat_vars))))
}
range_aes <- aes(ymax= .fitted + .se.fit, ymin = .fitted - .se.fit)
result <- ggplot(pred, my_aes) + theme_classic() + ylab("Predicted")
if(is.numeric(resp_var)) {
result +
(if (length(cat_vars)>0) {
geom_ribbon(range_aes)
} else {
geom_ribbon(range_aes, fill="grey")
}) +
geom_line()
} else {
result +
geom_errorbar(range_aes) +
geom_point()
}
}
Plot\u响应%purrr::map(唯一)
响应变量%pull(变量)
如果(是数字(对应变量)){
相应的我相信你所描述的正是该软件包解决的那种问题。软件包描述的第一行很好地说明了这一点:
DescTools是各种基本统计数据的广泛集合
R基本系统中不提供的功能和舒适性包装
数据的有效描述
我不喜欢加载一串包来完成R中的工作。但是,我对这一个例外。我认为Andri Signorell的大量工具集确实非常出色。在DescTools
中定义的函数与tidyverse
中定义的函数之间可能存在冲突,因此我用你的话来概括我的答案t求助于tidyverse
# DescTools needs to be available
if (!require(DescTools)) {
install.packages("DescTools")
}
library(DescTools)
# Create factors in mtcars
mt3 <- mtcars
mt3$am <- factor(mt3$am, labels = c("man", "auto"))
mt3$vs <- factor(mt3$vs, labels = c("v", "str"))
还有两个例子说明了对于相互作用的因素,甚至对于单个连续变量,这是多么简单
Desc(mpg ~ am:vs, mt3)
Desc(mt3$qsec)
也许结帐已经做了很多。谢谢@MrFlick,我甚至还学习了一门我不知道的交叉。你有我所需要的一切
Means <- model2$model %>% mutate_if(is.factor, as.character) %>% mutate_if(is.numeric, mean) %>% group_by_if(is.character) %>% summarise_if(is.numeric, mean) %>% ungroup()
Means <- Means[rep(seq_len(nrow(Means)), each = 20),] %>% arrange_if(is.character) %>% group_split(am)
Means <- bind_rows(Means)
Means$Predicted <- predict(model2, Means)
Means$SE <- predict(model2, Means, se.fit = T)$se.fit
ggplot(Means, aes(x = am, y = Predicted)) + geom_errorbar(aes(ymin = Predicted - SE, ymax = Predicted + SE)) + geom_point() + theme_classic()
Plot_Response <- function(Model, variable, N=20) {
model_data <- model.frame(Model)
stopifnot(variable %in% names(model_data))
# get all variables we need to dummy values for
all_vars <- model_data %>% select(-one_of(variable))
num_vars <- all_vars %>% select_if(is.numeric) %>% summarize_all(mean)
cat_vars <- all_vars %>% select_if(Negate(is.numeric)) %>% purrr::map(unique)
resp_var <- model_data %>% pull(variable)
if(is.numeric(resp_var)) {
resp_vals <- seq(min(resp_var), max(resp_var), length.out=N)
} else {
resp_vals <- unique(resp_var)
}
new_data <- tidyr::crossing(num_vars, !!!cat_vars, !!variable:=resp_vals)
pred <- broom::augment(Model, newdata = new_data, se_fit=TRUE)
## Plot the response
my_aes <- aes(x= !!sym(variable), y = .fitted)
if (length(cat_vars)==1) {
my_aes[["fill"]] <- sym(names(cat_vars))
} else if (length(cat_vars)>1) {
my_aes[["fill"]] <- quo(interaction(!!!syms(names(cat_vars))))
}
range_aes <- aes(ymax= .fitted + .se.fit, ymin = .fitted - .se.fit)
result <- ggplot(pred, my_aes) + theme_classic() + ylab("Predicted")
if(is.numeric(resp_var)) {
result +
(if (length(cat_vars)>0) {
geom_ribbon(range_aes)
} else {
geom_ribbon(range_aes, fill="grey")
}) +
geom_line()
} else {
result +
geom_errorbar(range_aes) +
geom_point()
}
}
model1 <- lm(mpg ~ wt + hp + am, data = mt2)
Plot_Response(model1, "wt")
Plot_Response(model1, "am")
# DescTools needs to be available
if (!require(DescTools)) {
install.packages("DescTools")
}
library(DescTools)
# Create factors in mtcars
mt3 <- mtcars
mt3$am <- factor(mt3$am, labels = c("man", "auto"))
mt3$vs <- factor(mt3$vs, labels = c("v", "str"))
# mpg as a function of weight
dev.new(width = 6, height = 4.5)
opar <- par(mfrow = c(1, 2))
Desc(mpg ~ wt, mt3, main = "Manual", subset = am == "man")
Desc(mpg ~ wt, mt3, main = "Automatic", subset = am == "auto")
par(opar)
# mpg as a function of transmission
Desc(mpg ~ am, mt3)
Desc(mpg ~ am:vs, mt3)
Desc(mt3$qsec)