R 按组复制控制行

R 按组复制控制行,r,dataframe,dplyr,R,Dataframe,Dplyr,我有一个包含许多分组治疗的数据集。同时,在两个独立的小组中,我收集了阳性和阴性对照。为了绘图和进一步分析,我想复制每个单独治疗组的对照组。因此,我的情节从这里开始: 为此: 在dplyr中,我已经找到了如何识别和生成具有正确控制值的列,但挑战在于如何复制数据集的完整行并附加它们,而不仅仅是为每个相关组添加“积极控制”和“消极控制”列。这种方法是可行的,但这意味着您实际上只能存储在每个治疗过程中复制的摘要值(例如平均值),而不是维护单个读数 librar(ggplot) before <

我有一个包含许多分组治疗的数据集。同时,在两个独立的小组中,我收集了阳性和阴性对照。为了绘图和进一步分析,我想复制每个单独治疗组的对照组。因此,我的情节从这里开始:

为此:

在dplyr中,我已经找到了如何识别和生成具有正确控制值的列,但挑战在于如何复制数据集的完整行并附加它们,而不仅仅是为每个相关组添加“积极控制”和“消极控制”列。这种方法是可行的,但这意味着您实际上只能存储在每个治疗过程中复制的摘要值(例如平均值),而不是维护单个读数

librar(ggplot)

before <- structure(list(group = c("grp1", "grp1", "grp1", "grp1", 
"grp2", "grp2", "grp2", "grp2", "grp3", "grp3", "grp3", "grp3", 
"neg", "neg", "pos", "pos"), treatment = c("A", "B", "C", 
"D", "A", "B", "C", "D", "A", "B", "C", "D", "none", "none", 
"none", "none"), value = c(3L, 5L, 7L, 9L, 2L, 4L, 6L, 8L, 3L, 
4L, 6L, 9L, 12L, 10L, 1L, 2L)), class = "data.frame", row.names = c(NA, -16L))

ggplot(data = before, aes(x=treatment, y=value)) + geom_boxplot() + facet_wrap (~group)

after <- structure(list(group = c("grp1", "grp1", "grp1", "grp1", "grp1", "grp1", 
"grp1", "grp1", "grp2", "grp2", "grp2", "grp2", "grp2", "grp2", 
"grp2", "grp2", "grp3", "grp3", "grp3", "grp3", "grp3", "grp3", 
"grp3", "grp3"), treatment = c("A", "B", "C", "D", "neg", "neg", 
"pos", "pos", "A", "B", "C", "D", "neg", "neg", "pos", "pos", 
"A", "B", "C", "D", "neg", "neg", "pos", "pos"), value = c(3L, 
5L, 7L, 9L, 12L, 10L, 1L, 2L, 2L, 4L, 6L, 8L, 12L, 10L, 1L, 2L, 
3L, 4L, 6L, 9L, 12L, 10L, 1L, 2L)), class = "data.frame", row.names = c(NA, -24L))

ggplot(data = after, aes(x=treatment, y=value)) + geom_boxplot() + facet_wrap (~group)
librar(ggplot)

在之前,一个选项是
过滤
列中带有'neg','pos'的行,并绑定
组分裂
列中没有'neg','pos'的原始数据

library(dplyr)
library(tidyr)
library(purrr)
tmp <- before %>% 
          # // filter the rows where the group values are 'neg', 'pos'
          filter(group %in% c('neg', 'pos')) %>%
          # // then replace the treatment values with the group column values
          mutate(treatment = group) %>%
          # // remove the group
          select(-group) 
-输出

out
# A tibble: 24 x 3
#   group treatment value
#   <chr> <chr>     <int>
# 1 grp1  A             3
# 2 grp1  B             5
# 3 grp1  C             7
# 4 grp1  D             9
# 5 grp1  neg          12
# 6 grp1  neg          10
# 7 grp1  pos           1
# 8 grp1  pos           2
# 9 grp2  A             2
#10 grp2  B             4
# … with 14 more rows
# A tibble: 24 x 3
#   group treatment value
#   <chr> <chr>     <int>
# 1 grp1  A             3
# 2 grp1  B             5
# 3 grp1  C             7
# 4 grp1  D             9
# 5 grp1  neg          12
# 6 grp1  neg          10
# 7 grp1  pos           1
# 8 grp1  pos           2
# 9 grp2  A             2
#10 grp2  B             4
# … with 14 more rows


它也可以在单个管道中完成

before %>% 
    # // replace the treatment values that 'none' with corresponding group values
    mutate(treatment = coalesce(na_if(treatment, 'none'), group)) %>% 
    # // do a group by group
    group_by( group) %>% 
    # // summarise the columns of interest with across
    summarise(across(c(treatment, value), 
      # // append the values in the full dataset where the group
      # // column is 'neg', 'pos'
      ~ c(., dplyr:::peek_mask()$full_data()[[cur_column()]][
          before$group %in% c("neg", "pos")])),
       .groups = 'drop') %>%
    # // filter out the 'pos', 'neg' group rows
    filter(!group %in% c('pos', 'neg'))
-输出

out
# A tibble: 24 x 3
#   group treatment value
#   <chr> <chr>     <int>
# 1 grp1  A             3
# 2 grp1  B             5
# 3 grp1  C             7
# 4 grp1  D             9
# 5 grp1  neg          12
# 6 grp1  neg          10
# 7 grp1  pos           1
# 8 grp1  pos           2
# 9 grp2  A             2
#10 grp2  B             4
# … with 14 more rows
# A tibble: 24 x 3
#   group treatment value
#   <chr> <chr>     <int>
# 1 grp1  A             3
# 2 grp1  B             5
# 3 grp1  C             7
# 4 grp1  D             9
# 5 grp1  neg          12
# 6 grp1  neg          10
# 7 grp1  pos           1
# 8 grp1  pos           2
# 9 grp2  A             2
#10 grp2  B             4
# … with 14 more rows
#一个tible:24 x 3
#群体治疗价值
#         
#1 grp1 A 3
#2 grp1 B 5
#3 grp1 C 7
#4 grp1 D 9
#5 grp1负12
#6 grp1负10
#7 grp1位置1
#8 grp1位置2
#9 grp2 A 2
#10 grp2 B 4
#…还有14行

一个选项是
过滤
列中带有'neg','pos'的行,并绑定那些带有
组分割的原始数据,而不带有
列中的'neg','pos'

library(dplyr)
library(tidyr)
library(purrr)
tmp <- before %>% 
          # // filter the rows where the group values are 'neg', 'pos'
          filter(group %in% c('neg', 'pos')) %>%
          # // then replace the treatment values with the group column values
          mutate(treatment = group) %>%
          # // remove the group
          select(-group) 
-输出

out
# A tibble: 24 x 3
#   group treatment value
#   <chr> <chr>     <int>
# 1 grp1  A             3
# 2 grp1  B             5
# 3 grp1  C             7
# 4 grp1  D             9
# 5 grp1  neg          12
# 6 grp1  neg          10
# 7 grp1  pos           1
# 8 grp1  pos           2
# 9 grp2  A             2
#10 grp2  B             4
# … with 14 more rows
# A tibble: 24 x 3
#   group treatment value
#   <chr> <chr>     <int>
# 1 grp1  A             3
# 2 grp1  B             5
# 3 grp1  C             7
# 4 grp1  D             9
# 5 grp1  neg          12
# 6 grp1  neg          10
# 7 grp1  pos           1
# 8 grp1  pos           2
# 9 grp2  A             2
#10 grp2  B             4
# … with 14 more rows


它也可以在单个管道中完成

before %>% 
    # // replace the treatment values that 'none' with corresponding group values
    mutate(treatment = coalesce(na_if(treatment, 'none'), group)) %>% 
    # // do a group by group
    group_by( group) %>% 
    # // summarise the columns of interest with across
    summarise(across(c(treatment, value), 
      # // append the values in the full dataset where the group
      # // column is 'neg', 'pos'
      ~ c(., dplyr:::peek_mask()$full_data()[[cur_column()]][
          before$group %in% c("neg", "pos")])),
       .groups = 'drop') %>%
    # // filter out the 'pos', 'neg' group rows
    filter(!group %in% c('pos', 'neg'))
-输出

out
# A tibble: 24 x 3
#   group treatment value
#   <chr> <chr>     <int>
# 1 grp1  A             3
# 2 grp1  B             5
# 3 grp1  C             7
# 4 grp1  D             9
# 5 grp1  neg          12
# 6 grp1  neg          10
# 7 grp1  pos           1
# 8 grp1  pos           2
# 9 grp2  A             2
#10 grp2  B             4
# … with 14 more rows
# A tibble: 24 x 3
#   group treatment value
#   <chr> <chr>     <int>
# 1 grp1  A             3
# 2 grp1  B             5
# 3 grp1  C             7
# 4 grp1  D             9
# 5 grp1  neg          12
# 6 grp1  neg          10
# 7 grp1  pos           1
# 8 grp1  pos           2
# 9 grp2  A             2
#10 grp2  B             4
# … with 14 more rows
#一个tible:24 x 3
#群体治疗价值
#         
#1 grp1 A 3
#2 grp1 B 5
#3 grp1 C 7
#4 grp1 D 9
#5 grp1负12
#6 grp1负10
#7 grp1位置1
#8 grp1位置2
#9 grp2 A 2
#10 grp2 B 4
#…还有14行

谢谢,@akrun。因此,要走完这一步,tmp df是一个拆分,用于从治疗组中移除对照组,并方便地将其附加。我们过滤所有不是pos/neg控件的内容,这就是事情变得有点模糊的地方:“group_split”将每个组转换为单独的列表?然后map_dfr命令使用类似rbind的命令将tmp df附加到每个分割组(
.x
)?然后
fill
重新组装所有东西?假设这是大致正确的,如果涉及到额外的分组级别,这会起作用吗?@MarioNiepel是的,它只是
filter
i使用'neg','pos'对这些行进行筛选,并在拆分筛选数据后使用'neg','pos'行和'tmp@MarioNiepel我在每行代码中添加了一些描述。希望itt能帮你摇滚。这(以及一些明智的谷歌搜索+试错)应该会让我明白。谢谢大家!@MarioNiepel由于嵌套条件与您的不同,您能否将其作为新问题发布谢谢@akrun。因此,要走完这一步,tmp df是一个拆分,用于从治疗组中移除对照组,并方便地将其附加。我们过滤所有不是pos/neg控件的内容,这就是事情变得有点模糊的地方:“group_split”将每个组转换为单独的列表?然后map_dfr命令使用类似rbind的命令将tmp df附加到每个分割组(
.x
)?然后
fill
重新组装所有东西?假设这是大致正确的,如果涉及到额外的分组级别,这会起作用吗?@MarioNiepel是的,它只是
filter
i使用'neg','pos'对这些行进行筛选,并在拆分筛选数据后使用'neg','pos'行和'tmp@MarioNiepel我在每行代码中添加了一些描述。希望itt能帮你摇滚。这(以及一些明智的谷歌搜索+试错)应该会让我明白。谢谢大家!@MarioNiepel由于嵌套条件与您的不同,您可以将其作为新问题发布吗