Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/webpack/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
在r中拆分分组的二项数据_R_Data Manipulation - Fatal编程技术网

在r中拆分分组的二项数据

在r中拆分分组的二项数据,r,data-manipulation,R,Data Manipulation,我有这样的数据 samplesize <- 6 group <- c(1,2,3) total <- rep(samplesize,length(group)) outcomeTrue <- c(2,1,3) df <- data.frame(group,total,outcomeTrue) group2 <- c(rep(1,6),rep(2,6),rep(3,6)) outcomeTrue2 <- c(rep(1,2),rep(0,6-2),r

我有这样的数据

samplesize <- 6

group <- c(1,2,3)
total <- rep(samplesize,length(group))
outcomeTrue <- c(2,1,3)

df <- data.frame(group,total,outcomeTrue)
group2 <- c(rep(1,6),rep(2,6),rep(3,6))
outcomeTrue2 <- c(rep(1,2),rep(0,6-2),rep(1,1),rep(0,6-1),rep(1,3),rep(0,6-3))

df2 <- data.frame(group2,outcomeTrue2)

samplesize你也在那里。只需在x位置使用带有“[”函数的group 2变量:

df[ group2 , ]
    group total outcomeTrue
1       1     6           2
1.1     1     6           2
1.2     1     6           2
1.3     1     6           2
1.4     1     6           2
1.5     1     6           2
2       2     6           1
2.1     2     6           1
2.2     2     6           1
2.3     2     6           1
2.4     2     6           1
2.5     2     6           1
3       3     6           3
3.1     3     6           3
3.2     3     6           3
3.3     3     6           3
3.4     3     6           3
3.5     3     6           3

当与
行名
匹配的数字或字符值放在“[”的x位置时,它复制整行

,这是一个基本的R解决方案

do.call(rbind, lapply(split(df, df$group), function(x) data.frame(group2 = x$group, outcome2 = rep(c(1,0), times = c(x$outcome, x$total-x$outcome)))))

#     group2 outcome2
# 1.1      1        1
# 1.2      1        1
# 1.3      1        0
# 1.4      1        0
# 1.5      1        0
# 1.6      1        0
# 2.1      2        1
# 2.2      2        0
# 2.3      2        0
# 2.4      2        0
# 2.5      2        0
# 2.6      2        0
# 3.1      3        1
# 3.2      3        1
# 3.3      3        1
# 3.4      3        0
# 3.5      3        0
# 3.6      3        0

这里有一个选项带有
tidyverrse
。我们
uncount
使用“total”列展开行,按“group”分组,根据
row_number()
和“outcomerue”的值创建一个具有逻辑条件的二进制索引

library(tidyverse)
df %>% 
   uncount(total) %>% 
   group_by(group) %>%
   mutate(outcomeTrue = as.integer(row_number() <= outcomeTrue[1]))
# A tibble: 18 x 2
# Groups:   group [3]
#   group outcomeTrue
#   <dbl>       <int>
# 1     1           1
# 2     1           1
# 3     1           0
# 4     1           0
# 5     1           0
# 6     1           0
# 7     2           1
# 8     2           0
# 9     2           0
#10     2           0
#11     2           0
#12     2           0
#13     3           1
#14     3           1
#15     3           1
#16     3           0
#17     3           0
#18     3           0
库(tidyverse)
df%>%
未计数(总数)%>%
分组依据(分组)%>%

mutate(outcomeTrue=as.integer(row_number()
outcomeTrue
outcomeTrue2
应该分别是
outcomeTrue
outcomeTrue2
。很抱歉,您是对的,更改了一个,但忘记更改了另一个。不,问题。欢迎使用。