使用扫帚从分组线性模型中提取斜率和r平方_R_Tidyverse_Broom

使用扫帚从分组线性模型中提取斜率和r平方

使用扫帚从分组线性模型中提取斜率和r平方,r,tidyverse,broom,R,Tidyverse,Broom,我有一个数据框，我想按组运行线性模型，然后使用扫帚包提取每个模型的斜率和r平方。到目前为止，我正在尝试： library(tidyverse) library(broom) #read in the dataset data(mtcars) #add a group variable mtcars <- mtcars %>% as_tibble() %>% mutate(LC = 1) #create a second group mtcars2 <- mtcar

我有一个数据框，我想按组运行线性模型，然后使用扫帚包提取每个模型的斜率和r平方。到目前为止，我正在尝试：

library(tidyverse)
library(broom)

#read in the dataset
data(mtcars) 

#add a group variable
mtcars <- mtcars %>% as_tibble() %>% mutate(LC = 1)

#create a second group
mtcars2 <- mtcars 
mtcars2 <- mtcars2 %>% mutate(LC = 2)

#bind together
mtcars <- rbind(mtcars, mtcars2)

#groupby and run regressions
all_regress <-  mtcars %>% group_by(LC) %>%
  do(mod1 = lm(mpg ~ disp, data = .),
     mod2 = lm(mpg ~ wt, data = .))

#use broom the extract the slope and rsq per group
glance <-all_regress %>% mutate(tidy = map(mod1, broom::tidy),
                                   glance = map(mod1, broom::glance),
                                   augment = map(mod1, broom::augment),
                                   rsq = glance %>% map_dbl('r.squared'),
                                   slope = tidy %>% map_dbl(function(x) x$estimate[2]))

如果我在没有以下组的情况下执行此操作：

    #read in the dataset
    data(mtcars) 
    
    mtcars <- mtcars %>% as_tibble()
   
    #run regressions
    all_regress <-  mtcars %>%
      do(mod1 = lm(mpg ~ disp, data = .),
         mod2 = lm(mpg ~ wt, data = .))
    
    #use broom the extract the slope and rsq per group
    glance <- all_regress %>% mutate(tidy = map(mod1, broom::tidy),
                                       glance = map(mod1, broom::glance),
                                       augment = map(mod1, broom::augment),
                                       rsq = glance %>% map_dbl('r.squared'),
                                       slope = tidy %>% map_dbl(function(x) x$estimate[2]))

#读入数据集
数据（mtcars）
mtcars%作为不可用项（）
#运行回归
全部回归%
do（mod1=lm（mpg~disp，数据=），
mod2=lm（mpg~wt，数据=））
#使用扫帚，提取各组的斜率和rsq
扫视%变异（整洁=地图（mod1，扫帚：：整洁），
扫视=地图（mod1，扫帚：：扫视），
augment=map（mod1，broom:：augment），
rsq=glance%>%map_dbl（'r.squared'），
斜率=tidy%>%map_dbl（函数（x）x$estimate[2]））

没有错误。

我认为只需添加

ungroup（）

即可满足您的需要：

all\u按（LC）%>%
do（mod1=lm（mpg~disp，数据=），
mod2=lm（mpg~wt，data=）%>%ungroup（）
#使用扫帚，提取各组的斜率和rsq
扫视%变异（整洁=地图（mod1，扫帚：：整洁），
扫视=地图（mod1，扫帚：：扫视），
augment=map（mod1，broom:：augment），
rsq=glance%>%map_dbl（'r.squared'），
斜率=tidy%>%map_dbl（函数（x）x$estimate[2]））

我使用了这种方法，时间更长，但我认为在各个步骤中有更多的控制。最后，我创建了一个包含每个模型的列表列的TIBLE

library(tidyverse)
library(broom)

#read in the dataset
data(mtcars) 

#add a group variable
mtcars <- mtcars %>% as_tibble() %>% dplyr::select(-c(vs, am, gear, carb, cyl)) %>% mutate(LC = 1)

#create a second group
mtcars2 <- mtcars 
mtcars2 <- mtcars2 %>% mutate(LC = 2)

#bind together
mtcars <- bind_rows(mtcars2, mtcars)

#group_split and run regressions
all_regress <-  mtcars %>% group_split(LC) %>% 

    
       map(~ list(mod1 = lm(mpg ~ disp, data = .),
       mod2 = lm(mpg ~ wt, data = .)))


# example <- all_regress[[2]][[1]] %>% glance()
#the list has 2 levels with 2 models each
data <- all_regress %>% 
    map(~
            map(.x, function(model){
                #column lists are needed because each function output different objects
                tibble(mod = list(model),
                       tidy = list(broom::tidy(model)),
                       glance = list(broom::glance(model)),
                       augment = list(broom::augment(model))) %>%
                    mutate(
                        rsq = list(glance[[1]]$r.squared),
                        slope = list(tidy[[1]]$estimate[2]))

                       
            } ))

data_final <- 
data %>% map2(unique(mtcars$LC), ~
                 map2(.x, .y, function(each_model, lc){
                     mutate(each_model, LC = lc)
                 }))

final_format <- #because of the list structure i need to bind the two datasets in each level and then bind them again.
map(data_final, ~reduce(.x, rbind)) %>% reduce(rbind)


#acces the data
final_format[1, 1][[1]]

库（tidyverse）
图书馆（扫帚）
#读入数据集
数据（mtcars）
#添加组变量
mtcars%as_tible（）%%>%dplyr:：选择（-c（vs，am，档位，carb，cyl））%%>%变异（LC=1）
#创建第二个组
mtcars2%
变异(
rsq=列表（一览表[[1]]$r.squared），
斜率=列表（整洁[[1]]$estimate[2]））
} ))
数据\u最终%map2（唯一（mtcars$LC）~
map2（.x，.y，函数（每个_模型，lc）{
变异（每个_模型，LC=LC）
}))
最终格式%reduce（rbind）
#访问数据
最终_格式[1,1][[1]]

我可以理解为什么在定义

添加_回归

时需要将其分组，但在定义

浏览时将其分组似乎是多余的。既然all\u regresse
中的每一行都代表了一个不同的组，分组在glance中实现了什么？你可能有一个很好的观点，我对扫帚/使用列表作为行值是相当陌生的。也许有更好的方法来实现我的目标？我绝不局限于我在这里展示的方法
library(tidyverse)
library(broom)

#read in the dataset
data(mtcars) 

#add a group variable
mtcars <- mtcars %>% as_tibble() %>% dplyr::select(-c(vs, am, gear, carb, cyl)) %>% mutate(LC = 1)

#create a second group
mtcars2 <- mtcars 
mtcars2 <- mtcars2 %>% mutate(LC = 2)

#bind together
mtcars <- bind_rows(mtcars2, mtcars)

#group_split and run regressions
all_regress <-  mtcars %>% group_split(LC) %>% 

    
       map(~ list(mod1 = lm(mpg ~ disp, data = .),
       mod2 = lm(mpg ~ wt, data = .)))


# example <- all_regress[[2]][[1]] %>% glance()
#the list has 2 levels with 2 models each
data <- all_regress %>% 
    map(~
            map(.x, function(model){
                #column lists are needed because each function output different objects
                tibble(mod = list(model),
                       tidy = list(broom::tidy(model)),
                       glance = list(broom::glance(model)),
                       augment = list(broom::augment(model))) %>%
                    mutate(
                        rsq = list(glance[[1]]$r.squared),
                        slope = list(tidy[[1]]$estimate[2]))

                       
            } ))

data_final <- 
data %>% map2(unique(mtcars$LC), ~
                 map2(.x, .y, function(each_model, lc){
                     mutate(each_model, LC = lc)
                 }))

final_format <- #because of the list structure i need to bind the two datasets in each level and then bind them again.
map(data_final, ~reduce(.x, rbind)) %>% reduce(rbind)


#acces the data
final_format[1, 1][[1]]