使用扫帚从分组线性模型中提取斜率和r平方
我有一个数据框,我想按组运行线性模型,然后使用扫帚包提取每个模型的斜率和r平方。到目前为止,我正在尝试:使用扫帚从分组线性模型中提取斜率和r平方,r,tidyverse,broom,R,Tidyverse,Broom,我有一个数据框,我想按组运行线性模型,然后使用扫帚包提取每个模型的斜率和r平方。到目前为止,我正在尝试: library(tidyverse) library(broom) #read in the dataset data(mtcars) #add a group variable mtcars <- mtcars %>% as_tibble() %>% mutate(LC = 1) #create a second group mtcars2 <- mtcar
library(tidyverse)
library(broom)
#read in the dataset
data(mtcars)
#add a group variable
mtcars <- mtcars %>% as_tibble() %>% mutate(LC = 1)
#create a second group
mtcars2 <- mtcars
mtcars2 <- mtcars2 %>% mutate(LC = 2)
#bind together
mtcars <- rbind(mtcars, mtcars2)
#groupby and run regressions
all_regress <- mtcars %>% group_by(LC) %>%
do(mod1 = lm(mpg ~ disp, data = .),
mod2 = lm(mpg ~ wt, data = .))
#use broom the extract the slope and rsq per group
glance <-all_regress %>% mutate(tidy = map(mod1, broom::tidy),
glance = map(mod1, broom::glance),
augment = map(mod1, broom::augment),
rsq = glance %>% map_dbl('r.squared'),
slope = tidy %>% map_dbl(function(x) x$estimate[2]))
如果我在没有以下组的情况下执行此操作:
#read in the dataset
data(mtcars)
mtcars <- mtcars %>% as_tibble()
#run regressions
all_regress <- mtcars %>%
do(mod1 = lm(mpg ~ disp, data = .),
mod2 = lm(mpg ~ wt, data = .))
#use broom the extract the slope and rsq per group
glance <- all_regress %>% mutate(tidy = map(mod1, broom::tidy),
glance = map(mod1, broom::glance),
augment = map(mod1, broom::augment),
rsq = glance %>% map_dbl('r.squared'),
slope = tidy %>% map_dbl(function(x) x$estimate[2]))
#读入数据集
数据(mtcars)
mtcars%作为不可用项()
#运行回归
全部回归%
do(mod1=lm(mpg~disp,数据=),
mod2=lm(mpg~wt,数据=))
#使用扫帚,提取各组的斜率和rsq
扫视%变异(整洁=地图(mod1,扫帚::整洁),
扫视=地图(mod1,扫帚::扫视),
augment=map(mod1,broom::augment),
rsq=glance%>%map_dbl('r.squared'),
斜率=tidy%>%map_dbl(函数(x)x$estimate[2]))
没有错误。我认为只需添加
ungroup()
即可满足您的需要:
all\u按(LC)%>%
do(mod1=lm(mpg~disp,数据=),
mod2=lm(mpg~wt,data=)%>%ungroup()
#使用扫帚,提取各组的斜率和rsq
扫视%变异(整洁=地图(mod1,扫帚::整洁),
扫视=地图(mod1,扫帚::扫视),
augment=map(mod1,broom::augment),
rsq=glance%>%map_dbl('r.squared'),
斜率=tidy%>%map_dbl(函数(x)x$estimate[2]))
我使用了这种方法,时间更长,但我认为在各个步骤中有更多的控制。最后,我创建了一个包含每个模型的列表列的TIBLE
library(tidyverse)
library(broom)
#read in the dataset
data(mtcars)
#add a group variable
mtcars <- mtcars %>% as_tibble() %>% dplyr::select(-c(vs, am, gear, carb, cyl)) %>% mutate(LC = 1)
#create a second group
mtcars2 <- mtcars
mtcars2 <- mtcars2 %>% mutate(LC = 2)
#bind together
mtcars <- bind_rows(mtcars2, mtcars)
#group_split and run regressions
all_regress <- mtcars %>% group_split(LC) %>%
map(~ list(mod1 = lm(mpg ~ disp, data = .),
mod2 = lm(mpg ~ wt, data = .)))
# example <- all_regress[[2]][[1]] %>% glance()
#the list has 2 levels with 2 models each
data <- all_regress %>%
map(~
map(.x, function(model){
#column lists are needed because each function output different objects
tibble(mod = list(model),
tidy = list(broom::tidy(model)),
glance = list(broom::glance(model)),
augment = list(broom::augment(model))) %>%
mutate(
rsq = list(glance[[1]]$r.squared),
slope = list(tidy[[1]]$estimate[2]))
} ))
data_final <-
data %>% map2(unique(mtcars$LC), ~
map2(.x, .y, function(each_model, lc){
mutate(each_model, LC = lc)
}))
final_format <- #because of the list structure i need to bind the two datasets in each level and then bind them again.
map(data_final, ~reduce(.x, rbind)) %>% reduce(rbind)
#acces the data
final_format[1, 1][[1]]
库(tidyverse)
图书馆(扫帚)
#读入数据集
数据(mtcars)
#添加组变量
mtcars%as_tible()%%>%dplyr::选择(-c(vs,am,档位,carb,cyl))%%>%变异(LC=1)
#创建第二个组
mtcars2%
变异(
rsq=列表(一览表[[1]]$r.squared),
斜率=列表(整洁[[1]]$estimate[2]))
} ))
数据\u最终%map2(唯一(mtcars$LC)~
map2(.x,.y,函数(每个_模型,lc){
变异(每个_模型,LC=LC)
}))
最终格式%reduce(rbind)
#访问数据
最终_格式[1,1][[1]]
我可以理解为什么在定义添加_回归
时需要将其分组,但在定义浏览时将其分组似乎是多余的。既然all\u regresse
中的每一行都代表了一个不同的组,分组在glance
中实现了什么?你可能有一个很好的观点,我对扫帚/使用列表作为行值是相当陌生的。也许有更好的方法来实现我的目标?我绝不局限于我在这里展示的方法
library(tidyverse)
library(broom)
#read in the dataset
data(mtcars)
#add a group variable
mtcars <- mtcars %>% as_tibble() %>% dplyr::select(-c(vs, am, gear, carb, cyl)) %>% mutate(LC = 1)
#create a second group
mtcars2 <- mtcars
mtcars2 <- mtcars2 %>% mutate(LC = 2)
#bind together
mtcars <- bind_rows(mtcars2, mtcars)
#group_split and run regressions
all_regress <- mtcars %>% group_split(LC) %>%
map(~ list(mod1 = lm(mpg ~ disp, data = .),
mod2 = lm(mpg ~ wt, data = .)))
# example <- all_regress[[2]][[1]] %>% glance()
#the list has 2 levels with 2 models each
data <- all_regress %>%
map(~
map(.x, function(model){
#column lists are needed because each function output different objects
tibble(mod = list(model),
tidy = list(broom::tidy(model)),
glance = list(broom::glance(model)),
augment = list(broom::augment(model))) %>%
mutate(
rsq = list(glance[[1]]$r.squared),
slope = list(tidy[[1]]$estimate[2]))
} ))
data_final <-
data %>% map2(unique(mtcars$LC), ~
map2(.x, .y, function(each_model, lc){
mutate(each_model, LC = lc)
}))
final_format <- #because of the list structure i need to bind the two datasets in each level and then bind them again.
map(data_final, ~reduce(.x, rbind)) %>% reduce(rbind)
#acces the data
final_format[1, 1][[1]]