在dplyr分析中组合多个摘要统计信息
对于示例数据帧:在dplyr分析中组合多个摘要统计信息,r,dplyr,plyr,R,Dplyr,Plyr,对于示例数据帧: df1 <- structure(list(practice = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), drug = c("123A456", "123A567", "123A123", "123A567"
df1 <- structure(list(practice = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), drug = c("123A456",
"123A567", "123A123", "123A567", "123A456", "123A123", "123A567",
"123A567", "998A125", "123A456", "998A125", "123A567", "123A456",
"998A125", "123A567", "123A567", "123A567", "998A125", "123A123",
"998A125", "123A123", "123A456", "998A125", "123A567", "998A125",
"123A456", "123A123", "998A125", "123A567", "123A567", "998A125",
"123A456", "123A123", "123A567", "123A567", "998A125", "123A456"
), items = c(1, 2, 3, 4, 5, 4, 6, 7, 8, 9, 5, 6, 7, 8, 9, 4,
5, 6, 3, 2, 3, 4, 5, 6, 7, 4, 3, 2, 3, 4, 5, 4, 3, 4, 5, 6, 4
), quantity = c(1, 2, 4, 5, 3, 2, 3, 5, 4, 5, 7, 9, 5, 3, 4,
6, 1, 2, 4, 5, 3, 2, 3, 5, 4, 5, 7, 9, 5, 3, 4, 6, 1, 2, 4, 5,
3)), .Names = c("practice", "drug", "items", "quantity"), row.names = c(NA,
-37L), spec = structure(list(cols = structure(list(practice = structure(list(), class = c("collector_integer",
"collector")), drug = structure(list(), class = c("collector_character",
"collector")), items = structure(list(), class = c("collector_integer",
"collector")), quantity = structure(list(), class = c("collector_integer",
"collector"))), .Names = c("practice", "drug", "items", "quantity"
)), default = structure(list(), class = c("collector_guess",
"collector"))), .Names = c("cols", "default"), class = "col_spec"), class = c("tbl_df",
"tbl", "data.frame"))
df1我想这就是你想要的:
df1 %>%
group_by(practice) %>%
summarize(items_123 = sum(if_else(stringr::str_detect(drug, '^123'), items, 0)),
quantity_123 = sum(if_else(stringr::str_detect(drug, '^123'), quantity, 0)),
all_items = sum(items),
all_quantity = sum(quantity))
# A tibble: 3 x 5
practice items_123 quantity_123 all_items all_quantity
<int> <dbl> <dbl> <dbl> <dbl>
1 1 54 44 75 58
2 2 44 42 66 65
3 3 24 19 35 28
df1%>%
组别(执业)%>%
总结(items_123=sum(如果其他(stringr::str_detect(药物“^123”),items,0)),
数量_123=总和(如果其他(stringr::str _detect(药物,^123'),数量,0)),
所有项目=总和(项目),
全部数量=总和(数量))
#一个tibble:3x5
练习项目\u 123数量\u 123所有\u项目所有\u数量
1 1 54 44 75 58
2 2 44 42 66 65
3 3 24 19 35 28
df1%>%groupby(practice)%%>%mutate(all.items=sum(items))%%>%filter(str_sub(drug,1,3)==123)%%>%summary(all.items=first(all.items),quantSum=sum(quantity))
Hi@akrun-我想我知道。。。我更愿意对每种药物(如123)分别进行分析,并将所有药物的总量和项目作为额外的列,这样我就可以看到发生了什么……谢谢@zack-看起来很棒。我能问个问题吗?当我将代码应用于我的真实数据时,我遇到了一个错误。。。唯一的区别是我的药物代码是长的-BNFCode,^0407020A0”-这行吗?我在Summary_impl(.data,dots)中得到错误:求值错误:false
必须是整数类型,而不是double。嗯,根据我的经验,这可能是if_else
函数,它对true和false子句的类型要求严格。现在试试ifelse
,看看这是否能解决问题。也就是说,它看起来像我的代码中的项目
和数量
(以及提供的dput
输出)类型是否为double
-您的项是否可能为整型列或数量列是否可能为整型
?我使用as.numeric,这样就可以了。你的建议很有效,非常感谢。
drug123 <- df1 %>%
filter(substr(drug, 1,3)==123)
ALL.drug123 <- aggregate(drug123$quantity, by=list(Category=drug123$practice), FUN=sum)
df1 %>%
group_by(practice) %>%
summarize(items_123 = sum(if_else(stringr::str_detect(drug, '^123'), items, 0)),
quantity_123 = sum(if_else(stringr::str_detect(drug, '^123'), quantity, 0)),
all_items = sum(items),
all_quantity = sum(quantity))
# A tibble: 3 x 5
practice items_123 quantity_123 all_items all_quantity
<int> <dbl> <dbl> <dbl> <dbl>
1 1 54 44 75 58
2 2 44 42 66 65
3 3 24 19 35 28