R 按功能汇总后的信息

R 按功能汇总后的信息,r,statistics,R,Statistics,如何在by函数中总结成对测试中的所有pvalue? 因此,我想将HW1、HW2、HW3和PG的p.value从pairwise.t.test提取到data.frame中(参见下面的输出示例)。 事实上,我有超过HW1,HW2,HW3和PG这只是一个较短的数据 dflong <- structure(list(moda = structure(c(4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,

如何在by函数中总结成对测试中的所有pvalue? 因此,我想将HW1、HW2、HW3和PG的p.value从
pairwise.t.test
提取到data.frame中(参见下面的输出示例)。 事实上,我有超过HW1,HW2,HW3和PG这只是一个较短的数据

dflong <- structure(list(moda = structure(c(4L, 4L, 4L, 4L, 4L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 
4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
3L, 3L, 3L, 3L, 3L), .Label = c("HW1", "HW2", "HW3", "PG"), class = "factor"), 
    replicates = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
    12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 
    4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 
    17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
    11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L), time = c("t0", 
    "t0", "t0", "t0", "t0", "t0", "t0", "t0", "t0", "t0", "t0", 
    "t0", "t0", "t0", "t0", "t0", "t0", "t0", "t0", "t0", "t14", 
    "t14", "t14", "t14", "t14", "t14", "t14", "t14", "t14", "t14", 
    "t14", "t14", "t14", "t14", "t14", "t14", "t14", "t14", "t14", 
    "t14", "t29", "t29", "t29", "t29", "t29", "t29", "t29", "t29", 
    "t29", "t29", "t29", "t29", "t29", "t29", "t29", "t29", "t29", 
    "t29", "t29", "t29"), unified = c(1096.6, 1304, 1205.2, 1278.9, 
    1221.3, 1090.7, 1022.7, 1071.3, 808.8, 1093.9, 1710.5, 1541.3, 
    1352.3, 1550.8, 1482.8, 1733.1, 1652.2, 1736.2, 1730.2, 1554, 
    263.7, 283, 302.3, 305.3, 288, 314.4, 369.3, 408.5, 408.5, 
    376.9, 295.9, 299.7, 304, 306.9, 309.3, 300.7, 300.6, 298.3, 
    302, 306, 6.68, 6.58, 6.63, 6.44, 6.55, 5.53, 4.56, 3.58, 
    3.84, 4.67, 6.57, 6.62, 6.62, 6.6, 6.62, 6.69, 6.86, 6.99, 
    6.95, 6.81)), row.names = c(NA, -60L), class = "data.frame")

yy <-dflong$unified
xx <-dflong$time
pair <- function(x) {hhg <- pairwise.t.test(yy,xx,data=x, p.adj = "bonf") }
o <-by(dflong$unified, dflong$moda, FUN=pair )
以下是使用
tidyverse
软件包的一种(冗长的)可能性:

library(tidyverse)
dflong %>% 
  as_tibble %>% 
  select(-replicates) %>% 
  group_by(moda, time) %>% 
  nest(.key = data) %>% 
  {left_join(., ., by = "moda", suffix = c("_1", "_2"))} %>%
  filter(as.numeric(gsub("t", "", time_1)) <= as.numeric(gsub("t", "", time_2))) %>% 
  mutate(time = paste(time_1, time_2, sep = "-")) %>% 
  mutate(pval = map2_dbl(data_1, data_2, ~t.test(.x[[1]], .y[[1]])$p.value)) %>% 
  select(moda, time, pval) %>% 
  spread(moda, pval)

 A tibble: 6 x 5
     time          HW1          HW2          HW3           PG
*   <chr>        <dbl>        <dbl>        <dbl>        <dbl>
1   t0-t0 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
2  t0-t14 1.154305e-04 2.874392e-05 2.544053e-06 6.916064e-06
3  t0-t29 4.647212e-05 1.243158e-05 1.202839e-06 4.619454e-06
4 t14-t14 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
5 t14-t29 2.746895e-05 2.655776e-08 1.945818e-09 2.955209e-06
6 t29-t29 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
库(tidyverse)
dflong%>%
可存储%>%
选择(-replicates)%>%
分组依据(moda,时间)%>%
嵌套(.key=data)%>%
{left_join(,,by=“moda”,后缀=c(“_1”,“_2”))}%>%
过滤器(如.numeric(gsub(“t”),time_1))%
突变(时间=粘贴(时间1,时间2,sep=“-”))%>%
变异(pval=map2_dbl(data_1,data_2,~t.test(.x[[1]],.y[[1]])$p.value))%>%
选择(moda、时间、pval)%>%
排列(moda、pval)
一个tibble:6x5
时间HW1 HW2 HW3 PG
*                                   
1 t0-t0 100000E+001000000E+001000000E+001000000E+001000000E+001000000E+00
2 t0-t14 1.154305e-04 2.874392e-05 2.544053e-06 6.916064e-06
3 t0-t29 4.647212e-05 1.243158e-05 1.202839e-06 4.619454e-06
4 t14-t14 1.000000e+011000000e+011000000e+011000000e+011000000e+011000000e+00
5 t14-t29 2.746895e-05 2.655776e-08 1.945818e-09 2.955209e-06
6T29-T291000000E+001000000E+001000000E+001000000E+001000000E+001000000E+00
这里有一个使用
tidyverse
软件包的(冗长的)可能性:

library(tidyverse)
dflong %>% 
  as_tibble %>% 
  select(-replicates) %>% 
  group_by(moda, time) %>% 
  nest(.key = data) %>% 
  {left_join(., ., by = "moda", suffix = c("_1", "_2"))} %>%
  filter(as.numeric(gsub("t", "", time_1)) <= as.numeric(gsub("t", "", time_2))) %>% 
  mutate(time = paste(time_1, time_2, sep = "-")) %>% 
  mutate(pval = map2_dbl(data_1, data_2, ~t.test(.x[[1]], .y[[1]])$p.value)) %>% 
  select(moda, time, pval) %>% 
  spread(moda, pval)

 A tibble: 6 x 5
     time          HW1          HW2          HW3           PG
*   <chr>        <dbl>        <dbl>        <dbl>        <dbl>
1   t0-t0 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
2  t0-t14 1.154305e-04 2.874392e-05 2.544053e-06 6.916064e-06
3  t0-t29 4.647212e-05 1.243158e-05 1.202839e-06 4.619454e-06
4 t14-t14 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
5 t14-t29 2.746895e-05 2.655776e-08 1.945818e-09 2.955209e-06
6 t29-t29 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
库(tidyverse)
dflong%>%
可存储%>%
选择(-replicates)%>%
分组依据(moda,时间)%>%
嵌套(.key=data)%>%
{left_join(,,by=“moda”,后缀=c(“_1”,“_2”))}%>%
过滤器(如.numeric(gsub(“t”),time_1))%
突变(时间=粘贴(时间1,时间2,sep=“-”))%>%
变异(pval=map2_dbl(data_1,data_2,~t.test(.x[[1]],.y[[1]])$p.value))%>%
选择(moda、时间、pval)%>%
排列(moda、pval)
一个tibble:6x5
时间HW1 HW2 HW3 PG
*                                   
1 t0-t0 100000E+001000000E+001000000E+001000000E+001000000E+001000000E+00
2 t0-t14 1.154305e-04 2.874392e-05 2.544053e-06 6.916064e-06
3 t0-t29 4.647212e-05 1.243158e-05 1.202839e-06 4.619454e-06
4 t14-t14 1.000000e+011000000e+011000000e+011000000e+011000000e+011000000e+00
5 t14-t29 2.746895e-05 2.655776e-08 1.945818e-09 2.955209e-06
6T29-T291000000E+001000000E+001000000E+001000000E+001000000E+001000000E+00

我不明白你是如何提取pairwise.t.test的所有pvalue矩阵的,这是我发布的一个问题,你可能知道怎么做?我不明白你是如何提取pairwise.t.test的所有pvalue矩阵的,这是我发布的一个问题,你可能知道怎么做吗?