Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/80.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 在多列上运行t.test()以输出TIBLE_R_Statistics_Dplyr_Tidy - Fatal编程技术网

R 在多列上运行t.test()以输出TIBLE

R 在多列上运行t.test()以输出TIBLE,r,statistics,dplyr,tidy,R,Statistics,Dplyr,Tidy,我有一个如下的数据框 record_id group enzyme1 enzyme2 ... ... <factor> <dbl> <dbl> ... ... 1 control 34.5 32.3 ... ... 2 control 32.1 34.1 ... ... 3

我有一个如下的数据框

record_id   group      enzyme1     enzyme2  ... ... 
            <factor>   <dbl>       <dbl>    ... ... 
1           control    34.5        32.3     ... ...
2           control    32.1        34.1     ... ...
3           treatment  123.1       12.1     ... ... 
我想基本上把所有的t测试输出叠加在一起,看起来像这样

              estimate   statistic  p.value  parameter  conf.low   conf.high
enzyme 1      197.7424   0.3706244  0.7119  75.3982  -865.0291  1260.514
enzyme 2      XXX.XX     X.xxx      0.XXXX  XX.XXXX  -XX.XXX    XX.XXX 
等等


有什么想法吗

我们可以利用
库(tidyverse)
中的
purr::map_df()
,如下所示:

library(broom)
library(tidyverse) # purrr is in here
data(mtcars)

#reproducible data to simulate your case
mtcars2 <- filter(mtcars, cyl %in% c(4, 6)) 
mtcars2$cyl <- as.factor(mtcars2$cyl)

# capture the columns you want to t.test
cols_not_cyl <- names(mtcars2)[-2]

# turn those column names into formulas
formulas <- paste(cols_not_cyl, "~ cyl") %>%
    map(as.formula) %>% # needs to be class formula
    set_names(cols_not_cyl) # useful for map_df()

# do the tests, then stack them all together
map_df(formulas, ~ tidy(t.test(formula = ., data = mtcars2)),
       .id = "column_id")
df %>% 
    summarise_at(vars(starts_with('enzyme')), funs(list(tidy(t.test(. ~ group))))) %>% 
    map(1) %>% bind_rows(.id='enzymes')

#  enzymes estimate estimate1 estimate2 statistic    p.value parameter   conf.low conf.high                  method alternative
#1 enzyme1   -104.3      33.3     137.6 -7.168597 0.08610502  1.013697 -283.37000  74.77000 Welch Two Sample t-test   two.sided
#2 enzyme2     19.6      33.2      13.6 11.204574 0.01532388  1.637394   10.22717  28.97283 Welch Two Sample t-test   two.sided
库(扫帚)
图书馆(tidyverse)#purrr在这里
数据(mtcars)
#可复制的数据来模拟您的案例

mtcars2我们可以利用
库(tidyverse)
中的
purrr::map_df()
,如下所示:

library(broom)
library(tidyverse) # purrr is in here
data(mtcars)

#reproducible data to simulate your case
mtcars2 <- filter(mtcars, cyl %in% c(4, 6)) 
mtcars2$cyl <- as.factor(mtcars2$cyl)

# capture the columns you want to t.test
cols_not_cyl <- names(mtcars2)[-2]

# turn those column names into formulas
formulas <- paste(cols_not_cyl, "~ cyl") %>%
    map(as.formula) %>% # needs to be class formula
    set_names(cols_not_cyl) # useful for map_df()

# do the tests, then stack them all together
map_df(formulas, ~ tidy(t.test(formula = ., data = mtcars2)),
       .id = "column_id")
df %>% 
    summarise_at(vars(starts_with('enzyme')), funs(list(tidy(t.test(. ~ group))))) %>% 
    map(1) %>% bind_rows(.id='enzymes')

#  enzymes estimate estimate1 estimate2 statistic    p.value parameter   conf.low conf.high                  method alternative
#1 enzyme1   -104.3      33.3     137.6 -7.168597 0.08610502  1.013697 -283.37000  74.77000 Welch Two Sample t-test   two.sided
#2 enzyme2     19.6      33.2      13.6 11.204574 0.01532388  1.637394   10.22717  28.97283 Welch Two Sample t-test   two.sided
库(扫帚)
图书馆(tidyverse)#purrr在这里
数据(mtcars)
#可复制的数据来模拟您的案例

mtcars2使用map计算所有测试,然后减少绑定测试:

 df <- data.frame(record_id = 1:50, group = sample(c("control", "treatment"), 50, replace = TRUE), 
             enzyme1 = rnorm(50),
             enzyme2 = rnorm(50))

library(broom)
library(dplyr)
library(purrr)
map(paste0("enzyme", 1:2), ~tidy(t.test(as.formula(paste0(.x, "~ group")), 
data = df))) %>% 
reduce(bind_rows)
df%
减少(绑定行)

使用map计算所有测试,然后减少绑定测试:

 df <- data.frame(record_id = 1:50, group = sample(c("control", "treatment"), 50, replace = TRUE), 
             enzyme1 = rnorm(50),
             enzyme2 = rnorm(50))

library(broom)
library(dplyr)
library(purrr)
map(paste0("enzyme", 1:2), ~tidy(t.test(as.formula(paste0(.x, "~ group")), 
data = df))) %>% 
reduce(bind_rows)
df%
减少(绑定行)

您可以创建一个空的
data.frame
,然后使用
rbind()
在循环中将信息添加到其中

以下是使用iris数据集的示例:

df=data.frame()
for(i in 1:(length(colnames(iris))-1)){ ##change your length to whatever colnames you have

  variableName = colnames(iris)[i] ##loop through the desired colnames

  df<-rbind(df,cbind(variableName, tidy(t.test(Petal.Width~Species,data=iris[1:99,]))))

}
df=data.frame()
对于(i in 1:(length(colnames(iris))-1)){##将您的长度更改为您拥有的任何colnames
variableName=colnames(iris)[i]##循环遍历所需的colnames

df您可以创建一个空的
data.frame
,然后使用
rbind()
将您的信息添加到循环中

以下是使用iris数据集的示例:

df=data.frame()
for(i in 1:(length(colnames(iris))-1)){ ##change your length to whatever colnames you have

  variableName = colnames(iris)[i] ##loop through the desired colnames

  df<-rbind(df,cbind(variableName, tidy(t.test(Petal.Width~Species,data=iris[1:99,]))))

}
df=data.frame()
对于(i in 1:(length(colnames(iris))-1)){##将您的长度更改为您拥有的任何colnames
variableName=colnames(iris)[i]##循环遍历所需的colnames
df也可以尝试这样的tidyverse方法:

library(broom)
library(tidyverse) # purrr is in here
data(mtcars)

#reproducible data to simulate your case
mtcars2 <- filter(mtcars, cyl %in% c(4, 6)) 
mtcars2$cyl <- as.factor(mtcars2$cyl)

# capture the columns you want to t.test
cols_not_cyl <- names(mtcars2)[-2]

# turn those column names into formulas
formulas <- paste(cols_not_cyl, "~ cyl") %>%
    map(as.formula) %>% # needs to be class formula
    set_names(cols_not_cyl) # useful for map_df()

# do the tests, then stack them all together
map_df(formulas, ~ tidy(t.test(formula = ., data = mtcars2)),
       .id = "column_id")
df %>% 
    summarise_at(vars(starts_with('enzyme')), funs(list(tidy(t.test(. ~ group))))) %>% 
    map(1) %>% bind_rows(.id='enzymes')

#  enzymes estimate estimate1 estimate2 statistic    p.value parameter   conf.low conf.high                  method alternative
#1 enzyme1   -104.3      33.3     137.6 -7.168597 0.08610502  1.013697 -283.37000  74.77000 Welch Two Sample t-test   two.sided
#2 enzyme2     19.6      33.2      13.6 11.204574 0.01532388  1.637394   10.22717  28.97283 Welch Two Sample t-test   two.sided

数据:

df也可以尝试像这样的tidyverse方法:

library(broom)
library(tidyverse) # purrr is in here
data(mtcars)

#reproducible data to simulate your case
mtcars2 <- filter(mtcars, cyl %in% c(4, 6)) 
mtcars2$cyl <- as.factor(mtcars2$cyl)

# capture the columns you want to t.test
cols_not_cyl <- names(mtcars2)[-2]

# turn those column names into formulas
formulas <- paste(cols_not_cyl, "~ cyl") %>%
    map(as.formula) %>% # needs to be class formula
    set_names(cols_not_cyl) # useful for map_df()

# do the tests, then stack them all together
map_df(formulas, ~ tidy(t.test(formula = ., data = mtcars2)),
       .id = "column_id")
df %>% 
    summarise_at(vars(starts_with('enzyme')), funs(list(tidy(t.test(. ~ group))))) %>% 
    map(1) %>% bind_rows(.id='enzymes')

#  enzymes estimate estimate1 estimate2 statistic    p.value parameter   conf.low conf.high                  method alternative
#1 enzyme1   -104.3      33.3     137.6 -7.168597 0.08610502  1.013697 -283.37000  74.77000 Welch Two Sample t-test   two.sided
#2 enzyme2     19.6      33.2      13.6 11.204574 0.01532388  1.637394   10.22717  28.97283 Welch Two Sample t-test   two.sided

数据:

df
map()
不是由
dplyr
导出的,您可以使用
tidyverse
purr
来代替,尽管
map()
不是由
dplyr
导出的,但是您可以使用
tidyverse
purr
来代替