R 因变量为分类变量的每一级的回归系列
我想测试一下女性是如何影响出院的。为此,我想运行一系列回归,其中因变量为=1(如果周一是出院日),否则为=0。接下来,如果星期二,模型将为=1,否则为=0。。。等。此时一周中的几天存储在名为R 因变量为分类变量的每一级的回归系列,r,for-loop,tidymodels,R,For Loop,Tidymodels,我想测试一下女性是如何影响出院的。为此,我想运行一系列回归,其中因变量为=1(如果周一是出院日),否则为=0。接下来,如果星期二,模型将为=1,否则为=0。。。等。此时一周中的几天存储在名为wkday的分类变量中 例如,在for循环中使用tidymodelfor如何快速实现这一点?这是我到目前为止所拥有的 # libraries: library(tidyr) library(dplyr) # create dataset: id <- seq(1:1000) wkdays <-
wkday
的分类变量中
例如,在for循环中使用tidymodel
for如何快速实现这一点?这是我到目前为止所拥有的
# libraries:
library(tidyr)
library(dplyr)
# create dataset:
id <- seq(1:1000)
wkdays <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
wkday <- sample(wkdays,1000, replace=T)
female <- sample(0:1, 1000, replace = T)
dta <- data.frame(id=id, wkday=wkday, female=female)
dta$mon <- ifelse(dta$wkday=="Monday",1,0)
dta$tues <- ifelse(dta$wkday=="Tuesday",1,0)
dta$wed <- ifelse(dta$wkday=="Wednesday",1,0)
dta$thurs <- ifelse(dta$wkday=="Thursday",1,0)
dta$fri <- ifelse(dta$wkday=="Friday",1,0)
dta$sat <- ifelse(dta$wkday=="Saturday",1,0)
dta$sun <- ifelse(dta$wkday=="Sunday",1,0)
# Model:
mon <- glm(mon ~ female, data=dta, family = "binomial")
tues <- glm(tues ~ female, data=dta, family = "binomial")
.
.
.
summary(mon)
summary(tues)
#库:
图书馆(tidyr)
图书馆(dplyr)
#创建数据集:
id可能类似于下面的答案。
首先,没有必要一个接一个地手工创建假人,model.matrix
就是为了这个
library(tidyr)
library(dplyr)
library(purrr)
library(broom)
# create dataset:
set.seed(2021)
id <- 1:1000
wkdays <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
wkday <- sample(wkdays,1000, replace=T)
female <- sample(0:1, 1000, replace = T)
dta <- data.frame(id=id, wkday=wkday, female=female)
tmp <- model.matrix(~0 + wkday, dta)
colnames(tmp) <- sub("wkday", "", colnames(tmp))
cbind(dta, tmp) %>%
select(-wkday) %>%
pivot_longer(
cols = -c(id, female),
names_to = "wkday",
values_to = "dummy"
) %>%
group_by(wkday) %>%
do(tidy(lm(dummy ~ female, data = .)))
## A tibble: 14 x 6
## Groups: wkday [7]
# wkday term estimate std.error statistic p.value
# <chr> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 Friday (Intercept) 0.128 0.0155 8.26 4.55e-16
# 2 Friday female 0.0226 0.0219 1.03 3.03e- 1
# 3 Monday (Intercept) 0.152 0.0163 9.30 8.52e-20
# 4 Monday female 0.0126 0.0231 0.547 5.84e- 1
# 5 Saturday (Intercept) 0.170 0.0163 10.4 3.78e-24
# 6 Saturday female -0.0234 0.0231 -1.01 3.12e- 1
# 7 Sunday (Intercept) 0.138 0.0156 8.82 4.88e-18
# 8 Sunday female 0.00857 0.0221 0.388 6.98e- 1
# 9 Thursday (Intercept) 0.128 0.0157 8.14 1.13e-15
#10 Thursday female 0.0326 0.0222 1.47 1.43e- 1
#11 Tuesday (Intercept) 0.138 0.0145 9.49 1.63e-20
#12 Tuesday female -0.0355 0.0205 -1.73 8.41e- 2
#13 Wednesday (Intercept) 0.148 0.0155 9.55 9.66e-21
#14 Wednesday female -0.0174 0.0219 -0.797 4.26e- 1
cbind(dta, tmp) %>%
select(-wkday) %>%
pivot_longer(
cols = -c(id, female),
names_to = "wkday",
values_to = "dummy"
) %>%
group_by(wkday) %>%
do(tidy(lm(dummy ~ female, data = .))) %>%
filter(term != "(Intercept)")
## A tibble: 7 x 6
## Groups: wkday [7]
# wkday term estimate std.error statistic p.value
# <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#1 Friday female 0.0226 0.0219 1.03 0.303
#2 Monday female 0.0126 0.0231 0.547 0.584
#3 Saturday female -0.0234 0.0231 -1.01 0.312
#4 Sunday female 0.00857 0.0221 0.388 0.698
#5 Thursday female 0.0326 0.0222 1.47 0.143
#6 Tuesday female -0.0355 0.0205 -1.73 0.0841
#7 Wednesday female -0.0174 0.0219 -0.797 0.426
dta%>%
bind_cols(
model.matrix(~0 + wkday, dta) %>% as.data.frame
) %>%
select(-wkday) %>%
pivot_longer(
cols = -c(id, female),
names_to = "wkday",
values_to = "dummy"
) %>%
mutate(wkday = sub("^wkday", "", wkday)) %>%
group_by(wkday) %>%
do(tidy(lm(dummy ~ female, data = .)))