在r中拆分分组的二项数据_R_Data Manipulation

在r中拆分分组的二项数据

在r中拆分分组的二项数据,r,data-manipulation,R,Data Manipulation,我有这样的数据 samplesize <- 6 group <- c(1,2,3) total <- rep(samplesize,length(group)) outcomeTrue <- c(2,1,3) df <- data.frame(group,total,outcomeTrue) group2 <- c(rep(1,6),rep(2,6),rep(3,6)) outcomeTrue2 <- c(rep(1,2),rep(0,6-2),r

我有这样的数据

samplesize <- 6

group <- c(1,2,3)
total <- rep(samplesize,length(group))
outcomeTrue <- c(2,1,3)

df <- data.frame(group,total,outcomeTrue)

group2 <- c(rep(1,6),rep(2,6),rep(3,6))
outcomeTrue2 <- c(rep(1,2),rep(0,6-2),rep(1,1),rep(0,6-1),rep(1,3),rep(0,6-3))

df2 <- data.frame(group2,outcomeTrue2)

samplesize你也在那里。只需在x位置使用带有“[”函数的group 2变量：
df[ group2 , ]
    group total outcomeTrue
1       1     6           2
1.1     1     6           2
1.2     1     6           2
1.3     1     6           2
1.4     1     6           2
1.5     1     6           2
2       2     6           1
2.1     2     6           1
2.2     2     6           1
2.3     2     6           1
2.4     2     6           1
2.5     2     6           1
3       3     6           3
3.1     3     6           3
3.2     3     6           3
3.3     3     6           3
3.4     3     6           3
3.5     3     6           3

当与行名
匹配的数字或字符值放在“[”的x位置时，它复制整行
，这是一个基本的R解决方案
do.call(rbind, lapply(split(df, df$group), function(x) data.frame(group2 = x$group, outcome2 = rep(c(1,0), times = c(x$outcome, x$total-x$outcome)))))

#     group2 outcome2
# 1.1      1        1
# 1.2      1        1
# 1.3      1        0
# 1.4      1        0
# 1.5      1        0
# 1.6      1        0
# 2.1      2        1
# 2.2      2        0
# 2.3      2        0
# 2.4      2        0
# 2.5      2        0
# 2.6      2        0
# 3.1      3        1
# 3.2      3        1
# 3.3      3        1
# 3.4      3        0
# 3.5      3        0
# 3.6      3        0

这里有一个选项带有tidyverrse
。我们uncount
使用“total”列展开行，按“group”分组，根据row_number（）
和“outcomerue”的值创建一个具有逻辑条件的二进制索引
library(tidyverse)
df %>% 
   uncount(total) %>% 
   group_by(group) %>%
   mutate(outcomeTrue = as.integer(row_number() <= outcomeTrue[1]))
# A tibble: 18 x 2
# Groups:   group [3]
#   group outcomeTrue
#   <dbl>       <int>
# 1     1           1
# 2     1           1
# 3     1           0
# 4     1           0
# 5     1           0
# 6     1           0
# 7     2           1
# 8     2           0
# 9     2           0
#10     2           0
#11     2           0
#12     2           0
#13     3           1
#14     3           1
#15     3           1
#16     3           0
#17     3           0
#18     3           0

库（tidyverse）
df%>%
未计数（总数）%>%
分组依据（分组）%>%
mutate（outcomeTrue=as.integer（row_number（）outcomeTrue
和outcomeTrue2
应该分别是outcomeTrue
和outcomeTrue2
。很抱歉，您是对的，更改了一个，但忘记更改了另一个。不，问题。欢迎使用。