R 在多列上运行t.test（）以输出TIBLE_R_Statistics_Dplyr_Tidy

R 在多列上运行t.test（）以输出TIBLE

r statistics

R 在多列上运行t.test（）以输出TIBLE,r,statistics,dplyr,tidy,R,Statistics,Dplyr,Tidy,我有一个如下的数据框 record_id group enzyme1 enzyme2 ... ... <factor> <dbl> <dbl> ... ... 1 control 34.5 32.3 ... ... 2 control 32.1 34.1 ... ... 3

我有一个如下的数据框

record_id   group      enzyme1     enzyme2  ... ... 
            <factor>   <dbl>       <dbl>    ... ... 
1           control    34.5        32.3     ... ...
2           control    32.1        34.1     ... ...
3           treatment  123.1       12.1     ... ...

我想基本上把所有的t测试输出叠加在一起，看起来像这样

              estimate   statistic  p.value  parameter  conf.low   conf.high
enzyme 1      197.7424   0.3706244  0.7119  75.3982  -865.0291  1260.514
enzyme 2      XXX.XX     X.xxx      0.XXXX  XX.XXXX  -XX.XXX    XX.XXX

等等

有什么想法吗

我们可以利用

库（tidyverse）

中的

purr:：map_df（）

，如下所示：

library(broom)
library(tidyverse) # purrr is in here
data(mtcars)

#reproducible data to simulate your case
mtcars2 <- filter(mtcars, cyl %in% c(4, 6)) 
mtcars2$cyl <- as.factor(mtcars2$cyl)

# capture the columns you want to t.test
cols_not_cyl <- names(mtcars2)[-2]

# turn those column names into formulas
formulas <- paste(cols_not_cyl, "~ cyl") %>%
    map(as.formula) %>% # needs to be class formula
    set_names(cols_not_cyl) # useful for map_df()

# do the tests, then stack them all together
map_df(formulas, ~ tidy(t.test(formula = ., data = mtcars2)),
       .id = "column_id")

df %>% 
    summarise_at(vars(starts_with('enzyme')), funs(list(tidy(t.test(. ~ group))))) %>% 
    map(1) %>% bind_rows(.id='enzymes')

#  enzymes estimate estimate1 estimate2 statistic    p.value parameter   conf.low conf.high                  method alternative
#1 enzyme1   -104.3      33.3     137.6 -7.168597 0.08610502  1.013697 -283.37000  74.77000 Welch Two Sample t-test   two.sided
#2 enzyme2     19.6      33.2      13.6 11.204574 0.01532388  1.637394   10.22717  28.97283 Welch Two Sample t-test   two.sided

库（扫帚）
图书馆（tidyverse）#purrr在这里
数据（mtcars）
#可复制的数据来模拟您的案例
mtcars2我们可以利用库（tidyverse）
中的purrr:：map_df（）
，如下所示：
library(broom)
library(tidyverse) # purrr is in here
data(mtcars)

#reproducible data to simulate your case
mtcars2 <- filter(mtcars, cyl %in% c(4, 6)) 
mtcars2$cyl <- as.factor(mtcars2$cyl)

# capture the columns you want to t.test
cols_not_cyl <- names(mtcars2)[-2]

# turn those column names into formulas
formulas <- paste(cols_not_cyl, "~ cyl") %>%
    map(as.formula) %>% # needs to be class formula
    set_names(cols_not_cyl) # useful for map_df()

# do the tests, then stack them all together
map_df(formulas, ~ tidy(t.test(formula = ., data = mtcars2)),
       .id = "column_id")

df %>% 
    summarise_at(vars(starts_with('enzyme')), funs(list(tidy(t.test(. ~ group))))) %>% 
    map(1) %>% bind_rows(.id='enzymes')

#  enzymes estimate estimate1 estimate2 statistic    p.value parameter   conf.low conf.high                  method alternative
#1 enzyme1   -104.3      33.3     137.6 -7.168597 0.08610502  1.013697 -283.37000  74.77000 Welch Two Sample t-test   two.sided
#2 enzyme2     19.6      33.2      13.6 11.204574 0.01532388  1.637394   10.22717  28.97283 Welch Two Sample t-test   two.sided

库（扫帚）
图书馆（tidyverse）#purrr在这里
数据（mtcars）
#可复制的数据来模拟您的案例
mtcars2使用map计算所有测试，然后减少绑定测试：
 df <- data.frame(record_id = 1:50, group = sample(c("control", "treatment"), 50, replace = TRUE), 
             enzyme1 = rnorm(50),
             enzyme2 = rnorm(50))

library(broom)
library(dplyr)
library(purrr)
map(paste0("enzyme", 1:2), ~tidy(t.test(as.formula(paste0(.x, "~ group")), 
data = df))) %>% 
reduce(bind_rows)

df%
减少（绑定行）
使用map计算所有测试，然后减少绑定测试：
 df <- data.frame(record_id = 1:50, group = sample(c("control", "treatment"), 50, replace = TRUE), 
             enzyme1 = rnorm(50),
             enzyme2 = rnorm(50))

library(broom)
library(dplyr)
library(purrr)
map(paste0("enzyme", 1:2), ~tidy(t.test(as.formula(paste0(.x, "~ group")), 
data = df))) %>% 
reduce(bind_rows)

df%
减少（绑定行）
您可以创建一个空的data.frame
，然后使用rbind（）
在循环中将信息添加到其中
以下是使用iris数据集的示例：
df=data.frame()
for(i in 1:(length(colnames(iris))-1)){ ##change your length to whatever colnames you have

  variableName = colnames(iris)[i] ##loop through the desired colnames

  df<-rbind(df,cbind(variableName, tidy(t.test(Petal.Width~Species,data=iris[1:99,]))))

}

df=data.frame（）
对于（i in 1:（length（colnames（iris））-1））{##将您的长度更改为您拥有的任何colnames
variableName=colnames（iris）[i]##循环遍历所需的colnames
df您可以创建一个空的data.frame
，然后使用rbind（）
将您的信息添加到循环中
以下是使用iris数据集的示例：
df=data.frame()
for(i in 1:(length(colnames(iris))-1)){ ##change your length to whatever colnames you have

  variableName = colnames(iris)[i] ##loop through the desired colnames

  df<-rbind(df,cbind(variableName, tidy(t.test(Petal.Width~Species,data=iris[1:99,]))))

}

df=data.frame（）
对于（i in 1:（length（colnames（iris））-1））{##将您的长度更改为您拥有的任何colnames
variableName=colnames（iris）[i]##循环遍历所需的colnames
df也可以尝试这样的tidyverse方法：
library(broom)
library(tidyverse) # purrr is in here
data(mtcars)

#reproducible data to simulate your case
mtcars2 <- filter(mtcars, cyl %in% c(4, 6)) 
mtcars2$cyl <- as.factor(mtcars2$cyl)

# capture the columns you want to t.test
cols_not_cyl <- names(mtcars2)[-2]

# turn those column names into formulas
formulas <- paste(cols_not_cyl, "~ cyl") %>%
    map(as.formula) %>% # needs to be class formula
    set_names(cols_not_cyl) # useful for map_df()

# do the tests, then stack them all together
map_df(formulas, ~ tidy(t.test(formula = ., data = mtcars2)),
       .id = "column_id")

df %>% 
    summarise_at(vars(starts_with('enzyme')), funs(list(tidy(t.test(. ~ group))))) %>% 
    map(1) %>% bind_rows(.id='enzymes')

#  enzymes estimate estimate1 estimate2 statistic    p.value parameter   conf.low conf.high                  method alternative
#1 enzyme1   -104.3      33.3     137.6 -7.168597 0.08610502  1.013697 -283.37000  74.77000 Welch Two Sample t-test   two.sided
#2 enzyme2     19.6      33.2      13.6 11.204574 0.01532388  1.637394   10.22717  28.97283 Welch Two Sample t-test   two.sided


数据：
df也可以尝试像这样的tidyverse方法：
library(broom)
library(tidyverse) # purrr is in here
data(mtcars)

#reproducible data to simulate your case
mtcars2 <- filter(mtcars, cyl %in% c(4, 6)) 
mtcars2$cyl <- as.factor(mtcars2$cyl)

# capture the columns you want to t.test
cols_not_cyl <- names(mtcars2)[-2]

# turn those column names into formulas
formulas <- paste(cols_not_cyl, "~ cyl") %>%
    map(as.formula) %>% # needs to be class formula
    set_names(cols_not_cyl) # useful for map_df()

# do the tests, then stack them all together
map_df(formulas, ~ tidy(t.test(formula = ., data = mtcars2)),
       .id = "column_id")

df %>% 
    summarise_at(vars(starts_with('enzyme')), funs(list(tidy(t.test(. ~ group))))) %>% 
    map(1) %>% bind_rows(.id='enzymes')

#  enzymes estimate estimate1 estimate2 statistic    p.value parameter   conf.low conf.high                  method alternative
#1 enzyme1   -104.3      33.3     137.6 -7.168597 0.08610502  1.013697 -283.37000  74.77000 Welch Two Sample t-test   two.sided
#2 enzyme2     19.6      33.2      13.6 11.204574 0.01532388  1.637394   10.22717  28.97283 Welch Two Sample t-test   two.sided


数据：
dfmap（）
不是由dplyr
导出的，您可以使用tidyverse
或purr
来代替，尽管map（）
不是由dplyr
导出的，但是您可以使用tidyverse
或purr
来代替