使用dplyr计算每次就诊两个因素的比率

使用dplyr计算每次就诊两个因素的比率,r,dplyr,R,Dplyr,我想使用dplyr计算受试者每次就诊的两个因素的比率向量。可以在下面创建模拟数据: subj = c(rep("A", 10), rep("B", 4), rep("C", 6)) vist = c(rep(c("C0", "C1", "C2", "C3", "C4"), each=2), rep(c("C0", "C1"), each=2), rep(c("C0", "C1", "C2"), each=2)) factor = c(rep(c("L", "N

我想使用
dplyr
计算受试者每次就诊的两个因素的比率向量。可以在下面创建模拟数据:

subj = c(rep("A", 10), rep("B", 4), rep("C", 6))
vist = c(rep(c("C0", "C1", "C2", "C3", "C4"), each=2),
         rep(c("C0", "C1"), each=2),
         rep(c("C0", "C1", "C2"), each=2))
factor = c(rep(c("L", "N"), 5), rep(c("L", "N"), 2), rep(c("L", "N"), 3))
set.seed(111)
aval = round(rnorm(n = 20, 0, 1), 2)

dat = data.frame(subj, vist, factor, aval, stringsAsFactors = FALSE)
dat
这看起来像:

   subj vist factor  aval
1     A   C0      L  0.24
2     A   C0      N -0.33
3     A   C1      L -0.31
4     A   C1      N -2.30
5     A   C2      L -0.17
6     A   C2      N  0.14
7     A   C3      L -1.50
8     A   C3      N -1.01
9     A   C4      L -0.95
10    A   C4      N -0.49
11    B   C0      L -0.17
12    B   C0      N -0.41
13    B   C1      L  1.85
14    B   C1      N  0.39
15    C   C0      L  0.80
16    C   C0      N -1.57
17    C   C1      L -0.09
18    C   C1      N -0.36
19    C   C2      L -1.19
20    C   C2      N  0.36

需要的是每次就诊(
vist
)中每个受试者(
sub
)的因子(
factor
)“N”与“L”的比值(
aval
)。例如,第一个比率值为
-1.375
,来自
-0.33/0.24
。谢谢

您可以使用tidyr软件包中的
spread
重塑数据,然后很容易计算新列:

library(tidyr)
library(dplyr)
dat %>%
  spread(factor, aval) %>%
  mutate(ratio = N/L)


   subj vist     L     N      ratio
1     A   C0  0.24 -0.33 -1.3750000
2     A   C1 -0.31 -2.30  7.4193548
3     A   C2 -0.17  0.14 -0.8235294
4     A   C3 -1.50 -1.01  0.6733333
5     A   C4 -0.95 -0.49  0.5157895
6     B   C0 -0.17 -0.41  2.4117647
7     B   C1  1.85  0.39  0.2108108
8     C   C0  0.80 -1.57 -1.9625000
9     C   C1 -0.09 -0.36  4.0000000
10    C   C2 -1.19  0.36 -0.3025210

如果每组只有一个
N
L
,您可以执行以下操作:

dat %>% 
       group_by(subj, vist) %>% 
       summarise(ratio = aval[factor == "N"]/aval[factor == "L"])

#Source: local data frame [10 x 3]
#Groups: subj [?]

#    subj  vist      ratio
#   <chr> <chr>      <dbl>
#1      A    C0 -1.3750000
#2      A    C1  7.4193548
#3      A    C2 -0.8235294
#4      A    C3  0.6733333
#5      A    C4  0.5157895
#6      B    C0  2.4117647
#7      B    C1  0.2108108
#8      C    C0 -1.9625000
#9      C    C1  4.0000000
#10     C    C2 -0.3025210
dat%>%
分组依据(科目、科目)%>%
总结(比率=平均值[系数==“N”]/平均值[系数==“L”])
#来源:本地数据帧[10 x 3]
#分组:受试者[?]
#主客观比
#          
#1A C0-1.3750000
#2 A C1 7.4193548
#3 A C2-0.8235294
#4 A C3 0.6733333
#5 A C4 0.5157895
#6 B C0 2.4117647
#7 B C1 0.2108108
#8 C C0-1.9625000
#9 C C1 4.0000000
#10 C C2-0.3025210

在base R中,您可以使用
聚合
构建比率摘要,或
ave
将这些比率填入原始data.frame中。这假设data.frame是规则的,并且顺序正确

aggregate(dat$aval, dat[c("subj", "vist")], FUN=function(x) x[2] / x[1])
   subj vist          x
1     A   C0 -1.3750000
2     B   C0  2.4117647
3     C   C0 -1.9625000
4     A   C1  7.4193548
5     B   C1  0.2108108
6     C   C1  4.0000000
7     A   C2 -0.8235294
8     C   C2 -0.3025210
9     A   C3  0.6733333
10    A   C4  0.5157895


dat$rat如果它们的顺序相同,并且每个“subc”、“vist”正好有一对

dat$ratio <- rep(dat$aval[c(FALSE, TRUE)]/dat$aval[c( TRUE, FALSE)], each = 2)
dat$ratio
#[1] -1.3750000 -1.3750000  7.4193548  7.4193548 -0.8235294 -0.8235294
#[7]  0.6733333  0.6733333  0.5157895  0.5157895  2.4117647  2.4117647
#[13]  0.2108108  0.2108108 -1.9625000 -1.9625000  4.0000000  4.0000000
#[19] -0.3025210 -0.3025210
dat$比率
dat$ratio <- rep(dat$aval[c(FALSE, TRUE)]/dat$aval[c( TRUE, FALSE)], each = 2)
dat$ratio
#[1] -1.3750000 -1.3750000  7.4193548  7.4193548 -0.8235294 -0.8235294
#[7]  0.6733333  0.6733333  0.5157895  0.5157895  2.4117647  2.4117647
#[13]  0.2108108  0.2108108 -1.9625000 -1.9625000  4.0000000  4.0000000
#[19] -0.3025210 -0.3025210