R 在多列上运行t.test()以输出TIBLE
我有一个如下的数据框R 在多列上运行t.test()以输出TIBLE,r,statistics,dplyr,tidy,R,Statistics,Dplyr,Tidy,我有一个如下的数据框 record_id group enzyme1 enzyme2 ... ... <factor> <dbl> <dbl> ... ... 1 control 34.5 32.3 ... ... 2 control 32.1 34.1 ... ... 3
record_id group enzyme1 enzyme2 ... ...
<factor> <dbl> <dbl> ... ...
1 control 34.5 32.3 ... ...
2 control 32.1 34.1 ... ...
3 treatment 123.1 12.1 ... ...
我想基本上把所有的t测试输出叠加在一起,看起来像这样
estimate statistic p.value parameter conf.low conf.high
enzyme 1 197.7424 0.3706244 0.7119 75.3982 -865.0291 1260.514
enzyme 2 XXX.XX X.xxx 0.XXXX XX.XXXX -XX.XXX XX.XXX
等等
有什么想法吗 我们可以利用
库(tidyverse)
中的purr::map_df()
,如下所示:
library(broom)
library(tidyverse) # purrr is in here
data(mtcars)
#reproducible data to simulate your case
mtcars2 <- filter(mtcars, cyl %in% c(4, 6))
mtcars2$cyl <- as.factor(mtcars2$cyl)
# capture the columns you want to t.test
cols_not_cyl <- names(mtcars2)[-2]
# turn those column names into formulas
formulas <- paste(cols_not_cyl, "~ cyl") %>%
map(as.formula) %>% # needs to be class formula
set_names(cols_not_cyl) # useful for map_df()
# do the tests, then stack them all together
map_df(formulas, ~ tidy(t.test(formula = ., data = mtcars2)),
.id = "column_id")
df %>%
summarise_at(vars(starts_with('enzyme')), funs(list(tidy(t.test(. ~ group))))) %>%
map(1) %>% bind_rows(.id='enzymes')
# enzymes estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative
#1 enzyme1 -104.3 33.3 137.6 -7.168597 0.08610502 1.013697 -283.37000 74.77000 Welch Two Sample t-test two.sided
#2 enzyme2 19.6 33.2 13.6 11.204574 0.01532388 1.637394 10.22717 28.97283 Welch Two Sample t-test two.sided
库(扫帚)
图书馆(tidyverse)#purrr在这里
数据(mtcars)
#可复制的数据来模拟您的案例
mtcars2我们可以利用库(tidyverse)
中的purrr::map_df()
,如下所示:
library(broom)
library(tidyverse) # purrr is in here
data(mtcars)
#reproducible data to simulate your case
mtcars2 <- filter(mtcars, cyl %in% c(4, 6))
mtcars2$cyl <- as.factor(mtcars2$cyl)
# capture the columns you want to t.test
cols_not_cyl <- names(mtcars2)[-2]
# turn those column names into formulas
formulas <- paste(cols_not_cyl, "~ cyl") %>%
map(as.formula) %>% # needs to be class formula
set_names(cols_not_cyl) # useful for map_df()
# do the tests, then stack them all together
map_df(formulas, ~ tidy(t.test(formula = ., data = mtcars2)),
.id = "column_id")
df %>%
summarise_at(vars(starts_with('enzyme')), funs(list(tidy(t.test(. ~ group))))) %>%
map(1) %>% bind_rows(.id='enzymes')
# enzymes estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative
#1 enzyme1 -104.3 33.3 137.6 -7.168597 0.08610502 1.013697 -283.37000 74.77000 Welch Two Sample t-test two.sided
#2 enzyme2 19.6 33.2 13.6 11.204574 0.01532388 1.637394 10.22717 28.97283 Welch Two Sample t-test two.sided
库(扫帚)
图书馆(tidyverse)#purrr在这里
数据(mtcars)
#可复制的数据来模拟您的案例
mtcars2使用map计算所有测试,然后减少绑定测试:
df <- data.frame(record_id = 1:50, group = sample(c("control", "treatment"), 50, replace = TRUE),
enzyme1 = rnorm(50),
enzyme2 = rnorm(50))
library(broom)
library(dplyr)
library(purrr)
map(paste0("enzyme", 1:2), ~tidy(t.test(as.formula(paste0(.x, "~ group")),
data = df))) %>%
reduce(bind_rows)
df%
减少(绑定行)
使用map计算所有测试,然后减少绑定测试:
df <- data.frame(record_id = 1:50, group = sample(c("control", "treatment"), 50, replace = TRUE),
enzyme1 = rnorm(50),
enzyme2 = rnorm(50))
library(broom)
library(dplyr)
library(purrr)
map(paste0("enzyme", 1:2), ~tidy(t.test(as.formula(paste0(.x, "~ group")),
data = df))) %>%
reduce(bind_rows)
df%
减少(绑定行)
您可以创建一个空的data.frame
,然后使用rbind()
在循环中将信息添加到其中
以下是使用iris数据集的示例:
df=data.frame()
for(i in 1:(length(colnames(iris))-1)){ ##change your length to whatever colnames you have
variableName = colnames(iris)[i] ##loop through the desired colnames
df<-rbind(df,cbind(variableName, tidy(t.test(Petal.Width~Species,data=iris[1:99,]))))
}
df=data.frame()
对于(i in 1:(length(colnames(iris))-1)){##将您的长度更改为您拥有的任何colnames
variableName=colnames(iris)[i]##循环遍历所需的colnames
df您可以创建一个空的data.frame
,然后使用rbind()
将您的信息添加到循环中
以下是使用iris数据集的示例:
df=data.frame()
for(i in 1:(length(colnames(iris))-1)){ ##change your length to whatever colnames you have
variableName = colnames(iris)[i] ##loop through the desired colnames
df<-rbind(df,cbind(variableName, tidy(t.test(Petal.Width~Species,data=iris[1:99,]))))
}
df=data.frame()
对于(i in 1:(length(colnames(iris))-1)){##将您的长度更改为您拥有的任何colnames
variableName=colnames(iris)[i]##循环遍历所需的colnames
df也可以尝试这样的tidyverse方法:
library(broom)
library(tidyverse) # purrr is in here
data(mtcars)
#reproducible data to simulate your case
mtcars2 <- filter(mtcars, cyl %in% c(4, 6))
mtcars2$cyl <- as.factor(mtcars2$cyl)
# capture the columns you want to t.test
cols_not_cyl <- names(mtcars2)[-2]
# turn those column names into formulas
formulas <- paste(cols_not_cyl, "~ cyl") %>%
map(as.formula) %>% # needs to be class formula
set_names(cols_not_cyl) # useful for map_df()
# do the tests, then stack them all together
map_df(formulas, ~ tidy(t.test(formula = ., data = mtcars2)),
.id = "column_id")
df %>%
summarise_at(vars(starts_with('enzyme')), funs(list(tidy(t.test(. ~ group))))) %>%
map(1) %>% bind_rows(.id='enzymes')
# enzymes estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative
#1 enzyme1 -104.3 33.3 137.6 -7.168597 0.08610502 1.013697 -283.37000 74.77000 Welch Two Sample t-test two.sided
#2 enzyme2 19.6 33.2 13.6 11.204574 0.01532388 1.637394 10.22717 28.97283 Welch Two Sample t-test two.sided
数据:
df也可以尝试像这样的tidyverse方法:
library(broom)
library(tidyverse) # purrr is in here
data(mtcars)
#reproducible data to simulate your case
mtcars2 <- filter(mtcars, cyl %in% c(4, 6))
mtcars2$cyl <- as.factor(mtcars2$cyl)
# capture the columns you want to t.test
cols_not_cyl <- names(mtcars2)[-2]
# turn those column names into formulas
formulas <- paste(cols_not_cyl, "~ cyl") %>%
map(as.formula) %>% # needs to be class formula
set_names(cols_not_cyl) # useful for map_df()
# do the tests, then stack them all together
map_df(formulas, ~ tidy(t.test(formula = ., data = mtcars2)),
.id = "column_id")
df %>%
summarise_at(vars(starts_with('enzyme')), funs(list(tidy(t.test(. ~ group))))) %>%
map(1) %>% bind_rows(.id='enzymes')
# enzymes estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative
#1 enzyme1 -104.3 33.3 137.6 -7.168597 0.08610502 1.013697 -283.37000 74.77000 Welch Two Sample t-test two.sided
#2 enzyme2 19.6 33.2 13.6 11.204574 0.01532388 1.637394 10.22717 28.97283 Welch Two Sample t-test two.sided
数据:
dfmap()
不是由dplyr
导出的,您可以使用tidyverse
或purr
来代替,尽管map()
不是由dplyr
导出的,但是您可以使用tidyverse
或purr
来代替