R 计算数据子集的百分比
我想为每个代表计算数据帧中每个DB的百分比 这是我的df:R 计算数据子集的百分比,r,R,我想为每个代表计算数据帧中每个DB的百分比 这是我的df: df <- structure(list(class = c("CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL&
df <- structure(list(class = c("CL", "CL", "CL", "CL", "CL", "CL",
"CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL",
"CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL",
"CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL",
"CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL",
"CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL", "CL"),
DB = c(0L, 4L, 2L, 4L, 2L, 0L, 4L, 2L, 4L, 2L, 0L, 4L, 2L,
4L, 2L, 0L, 4L, 2L, 4L, 2L, 0L, 4L, 2L, 4L, 2L, 0L, 4L, 2L,
4L, 2L, 0L, 4L, 2L, 4L, 2L, 0L, 4L, 2L, 4L, 2L, 0L, 4L, 2L,
4L, 2L, 0L, 4L, 2L, 4L, 2L, 0L, 4L, 2L, 4L, 2L, 0L, 4L, 2L,
4L, 2L), rep = c("early1", "early1", "early1", "early1",
"early1", "early2", "early2", "early2", "early2", "early2",
"early3", "early3", "early3", "early3", "early3", "mid1",
"mid1", "mid1", "mid1", "mid1", "mid2", "mid2", "mid2", "mid2",
"mid2", "mid3", "mid3", "mid3", "mid3", "mid3", "late1",
"late1", "late1", "late1", "late1", "late2", "late2", "late2",
"late2", "late2", "late3", "late3", "late3", "late3", "late3",
"stat1", "stat1", "stat1", "stat1", "stat1", "stat2", "stat2",
"stat2", "stat2", "stat2", "stat3", "stat3", "stat3", "stat3",
"stat3"), num = c(0.009523686, 0.043189398, 0.420979104,
0.246671197, 3.451885409, 0.007613802, 0.046278008, 0.392051405,
0.25502036, 3.159879284, 0.011478203, 0.054305349, 0.464326108,
0.307066853, 3.6602462, 0.010537286, 0.04003825, 0.433807129,
0.265128974, 3.793074386, 0.007334728, 0.050505078, 0.380642914,
0.297303594, 3.223705784, 0.006611086, 0.030062788, 0.368471191,
0.196684816, 3.41357708, 0.01118598, 0.079753642, 0.42517786,
0.475724091, 3.136961558, 0.011543492, 0.074750731, 0.436643281,
0.41869653, 3.16178206, 0.014526945, 0.083030295, 0.421018391,
0.453286503, 3.034308389, 0.018963327, 0.134905674, 0.324578481,
0.675510653, 2.075633975, 0.020817582, 0.138788879, 0.322903267,
0.686792019, 2.160872891, 0.02334196, 0.141525911, 0.326552113,
0.705404966, 2.160032852)), row.names = c(NA, -60L), class = c("tbl_df",
"tbl", "data.frame"), .Names = c("class", "DB", "rep", "num"))
df%汇总(funs(sum()/sum(class==CL)*100))
然而,这是一种不工作。。。
有什么建议我做错了什么吗
最好的,
Magda
分组人
两次
library(dplyr)
df_sum <- df %>%
group_by(rep) %>% # grouped by rep
mutate(sum_rep=sum(num)) %>% # sum of each rep
group_by(rep,class,DB) %>% # grouped by DB
summarise(desired=sum(num)/unique(sum_rep)) # sum(DB)/sum(rep)
或者先把百分比加起来
dat %>%
group_by(rep) %>%
dplyr::mutate(sumper=num/sum(num))%>%group_by(rep,class,DB)%>%
dplyr::summarise(Ret=sum(sumper)*100)
使用
data.table
可以执行以下操作:
require(data.table)
setDT(df)
df[,sum_rep:=sum(num),(rep)]
df[,,
(期望值=总和(num)/唯一(总和代表)),
(代表、班级、DB)]
谢谢你的回答,你太棒了!然而,当我复制你的代码时,我得到的数据框df_sum只有一列“需要”,一个单元格等于1。你使用的是你发布的相同数据框吗?可能尝试更新dplyr
?@magruc这是正常错误,当您输入dplyr
和plyr
时,这两个包有一些功能重叠,因此当您使用dplyr
功能时,您可以添加dplyr::
,以返回所需的输出(PS,更新的dplyr
无法解决问题)@Wen,很可能。OP:这就是为什么首选使用tidyverse
,或者始终确保在dplyr
之前加载plyr
。
rep class DB desired
1 early1 CL 0 0.002282627
2 early1 CL 2 0.928243905
3 early1 CL 4 0.069473468
4 early2 CL 0 0.001972057
5 early2 CL 2 0.919988412
6 early2 CL 4 0.078039532
7 early3 CL 0 0.002552173
8 early3 CL 2 0.917096873
9 early3 CL 4 0.080350953
10 late1 CL 0 0.002709255
dat %>%
group_by(rep) %>%
dplyr::mutate(sumper=num/sum(num))%>%group_by(rep,class,DB)%>%
dplyr::summarise(Ret=sum(sumper)*100)