Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/79.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
在GLM拟合R之前组织因子变量_R_Function - Fatal编程技术网

在GLM拟合R之前组织因子变量

在GLM拟合R之前组织因子变量,r,function,R,Function,我在R中使用GLMs做了大量工作,处理相当大的数据集(通常在模型拟合中包含几十个变量)。为了能够在拟合模型后生成某种图形输出,我发现在拟合模型之前“准备”任何我打算作为因子拟合的变量(名称以f_u开头的变量)是很有用的。我的意思是: (i) 由于我在拟合GLM之前对每个因子进行了重新排序,使参考级别等于权重最大的级别,因此我希望在执行relevel()命令之前保留级别顺序 (ii)为了在后面的图表中突出显示参考水平,我喜欢在单独的变量中记录它 我使用内置的mtcarsdataset将这种方法的一

我在R中使用GLMs做了大量工作,处理相当大的数据集(通常在模型拟合中包含几十个变量)。为了能够在拟合模型后生成某种图形输出,我发现在拟合模型之前“准备”任何我打算作为因子拟合的变量(名称以
f_u
开头的变量)是很有用的。我的意思是:

(i) 由于我在拟合GLM之前对每个因子进行了重新排序,使参考级别等于权重最大的级别,因此我希望在执行
relevel()
命令之前保留级别顺序

(ii)为了在后面的图表中突出显示参考水平,我喜欢在单独的变量中记录它

我使用内置的
mtcars
dataset将这种方法的一个示例放在一起

目前我有以下代码:

library(dplyr)

data(mtcars)

# tidy up and make easier to read

df <- mtcars # built in data set

# let's make it a bit easier to follow

df <- df %>%
  select(mpg,
         f_cylinders = cyl,
         c_displacement = disp,
         c_hp = hp,
         c_final_drive_ratio = drat,
         c_weight = wt,
         c_qtr_mile_time = qsec,
         f_v_or_straight = vs,
         f_transmission = am,
         f_gears = gear,
         f_num_carbs = carb)

df$f_v_or_straight <- ifelse(df$f_v_or_straight == 0, "V", "Straight")
df$f_transmission <- ifelse(df$f_transmission == 0, "Automatic", "Manual")

df$glm_weight <- 1

# organise factors - levels, reference level, weights

my_list <- list()

df$f_cylinders <- as.factor(df$f_cylinders)
my_list$f_cylinders_levels <- levels(df$f_cylinders) 
my_list$f_cylinders_weights <- df %>% group_by(f_cylinders) %>% summarise(glm_weight = sum(glm_weight)) %>% ungroup() %>% pull(glm_weight)
my_list$f_cylinders_ref <- "8"
df$f_cylinders <- df$f_cylinders %>% relevel(ref = my_list$f_cylinders_ref)

df$f_v_or_straight <- as.factor(df$f_v_or_straight)
my_list$f_v_or_straight_levels <- levels(df$f_v_or_straight) 
my_list$f_v_or_straight_weights <- df %>% group_by(f_v_or_straight) %>% summarise(glm_weight = sum(glm_weight)) %>% ungroup() %>% pull(glm_weight)
my_list$f_v_or_straight_ref <- "V"
df$f_v_or_straight <- df$f_v_or_straight %>% relevel(ref = my_list$f_v_or_straight_ref)

df$f_transmission <- as.factor(df$f_transmission)
my_list$f_transmission_levels <- levels(df$f_transmission) 
my_list$f_transmission_weights <- df %>% group_by(f_transmission) %>% summarise(glm_weight = sum(glm_weight)) %>% ungroup() %>% pull(glm_weight)
my_list$f_transmission_ref <- "Automatic"
df$f_transmission <- df$f_transmission %>% relevel(ref = my_list$f_transmission_ref)

df$f_gears <- as.factor(df$f_gears)
my_list$f_gears_levels <- levels(df$f_gears) 
my_list$f_gears_weights <- df %>% group_by(f_gears) %>% summarise(glm_weight = sum(glm_weight)) %>% ungroup() %>% pull(glm_weight)
my_list$f_gears_ref <- "3"
df$f_gears <- df$f_gears %>% relevel(ref = my_list$f_gears_ref)

df$f_num_carbs <- as.factor(df$f_num_carbs)
my_list$f_num_carbs_levels <- levels(df$f_num_carbs) 
my_list$f_num_carbs_weights <- df %>% group_by(f_num_carbs) %>% summarise(glm_weight = sum(glm_weight)) %>% ungroup() %>% pull(glm_weight)
my_list$f_num_carbs_ref <- "4"
df$f_num_carbs <- df$f_num_carbs %>% relevel(ref = my_list$f_num_carbs_ref)
库(dplyr)
数据(mtcars)
#整理一下,以便阅读

df考虑以用户定义的方法概括您的
relevel
流程,然后使用
purr::map_df
映射调用流程的
f_
列(与tidy一致):

数据

library(dplyr)
library(purrr)

df <- mtcars %>%
  select(mpg,
         f_cylinders = cyl,
         c_displacement = disp,
         c_hp = hp,
         c_final_drive_ratio = drat,
         c_weight = wt,
         c_qtr_mile_time = qsec,
         f_v_or_straight = vs,
         f_transmission = am,
         f_gears = gear,
         f_num_carbs = carb) %>%
  mutate(f_v_or_straight = ifelse(f_v_or_straight == 0, 
                                  "V", 
                                  "Straight"),
         f_transmission = ifelse(f_transmission == 0, 
                                 "Automatic", 
                                 "Manual"),
         glm_weight = 1)
查看更改

# ORIGINAL LEVELS
df %>% 
  select(starts_with("f_")) %>%
  map_df(as.factor) %>%
  map(levels)
# $f_cylinders
# [1] "4" "6" "8"
# 
# $f_v_or_straight
# [1] "Straight" "V"       
# 
# $f_transmission
# [1] "Automatic" "Manual"   
# 
# $f_gears
# [1] "3" "4" "5"
# 
# $f_num_carbs
# [1] "1" "2" "3" "4" "6" "8"


# ADJUSTED LEVELS
df %>% 
  select(starts_with("f_")) %>%
  map_df(as.factor) %>%
  map(levels)
# $f_cylinders
# [1] "8" "4" "6"
# 
# $f_v_or_straight
# [1] "V"        "Straight"
# 
# $f_transmission
# [1] "Automatic" "Manual"   
# 
# $f_gears
# [1] "3" "4" "5"
# 
# $f_num_carbs
# [1] "2" "1" "3" "4" "6" "8"

考虑以用户定义的方法概括您的
relevel
流程,然后使用
purr::map_df
映射调用流程的
f_uu
列(与tidy一致):

数据

library(dplyr)
library(purrr)

df <- mtcars %>%
  select(mpg,
         f_cylinders = cyl,
         c_displacement = disp,
         c_hp = hp,
         c_final_drive_ratio = drat,
         c_weight = wt,
         c_qtr_mile_time = qsec,
         f_v_or_straight = vs,
         f_transmission = am,
         f_gears = gear,
         f_num_carbs = carb) %>%
  mutate(f_v_or_straight = ifelse(f_v_or_straight == 0, 
                                  "V", 
                                  "Straight"),
         f_transmission = ifelse(f_transmission == 0, 
                                 "Automatic", 
                                 "Manual"),
         glm_weight = 1)
查看更改

# ORIGINAL LEVELS
df %>% 
  select(starts_with("f_")) %>%
  map_df(as.factor) %>%
  map(levels)
# $f_cylinders
# [1] "4" "6" "8"
# 
# $f_v_or_straight
# [1] "Straight" "V"       
# 
# $f_transmission
# [1] "Automatic" "Manual"   
# 
# $f_gears
# [1] "3" "4" "5"
# 
# $f_num_carbs
# [1] "1" "2" "3" "4" "6" "8"


# ADJUSTED LEVELS
df %>% 
  select(starts_with("f_")) %>%
  map_df(as.factor) %>%
  map(levels)
# $f_cylinders
# [1] "8" "4" "6"
# 
# $f_v_or_straight
# [1] "V"        "Straight"
# 
# $f_transmission
# [1] "Automatic" "Manual"   
# 
# $f_gears
# [1] "3" "4" "5"
# 
# $f_num_carbs
# [1] "2" "1" "3" "4" "6" "8"

不,你计算重量吗?或者这与最高频率相同?如果是这样的话,为什么不通过降低频率来重新排列因子级别?在这种情况下,我使用的是相等的权重,所以我可以使用summary(glm_weight=n())而不是summary(glm_weight=sum(glm_weight))——但通常我的权重是不相等的。不,你计算权重吗?或者这与最高频率相同?如果是这样的话,为什么不通过降低频率来重新排序因子级别呢?在这种情况下,我使用的是相等的权重,所以我可以使用summary(glm_weight=n())而不是summary(glm_weight=sum(glm_weight))——但总的来说,我的权重不会相等。非常感谢。我会在未来一两天再看这个。非常感谢。我将在未来一两天内对此进行研究。