在dplyr分析中组合多个摘要统计信息_R_Dplyr_Plyr

在dplyr分析中组合多个摘要统计信息

在dplyr分析中组合多个摘要统计信息,r,dplyr,plyr,R,Dplyr,Plyr,对于示例数据帧： df1 <- structure(list(practice = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), drug = c("123A456", "123A567", "123A123", "123A567"

对于示例数据帧：

df1 <- structure(list(practice = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), drug = c("123A456", 
"123A567", "123A123", "123A567", "123A456", "123A123", "123A567", 
"123A567", "998A125", "123A456", "998A125", "123A567", "123A456", 
"998A125", "123A567", "123A567", "123A567", "998A125", "123A123", 
"998A125", "123A123", "123A456", "998A125", "123A567", "998A125", 
"123A456", "123A123", "998A125", "123A567", "123A567", "998A125", 
"123A456", "123A123", "123A567", "123A567", "998A125", "123A456"
), items = c(1, 2, 3, 4, 5, 4, 6, 7, 8, 9, 5, 6, 7, 8, 9, 4, 
5, 6, 3, 2, 3, 4, 5, 6, 7, 4, 3, 2, 3, 4, 5, 4, 3, 4, 5, 6, 4
), quantity = c(1, 2, 4, 5, 3, 2, 3, 5, 4, 5, 7, 9, 5, 3, 4, 
6, 1, 2, 4, 5, 3, 2, 3, 5, 4, 5, 7, 9, 5, 3, 4, 6, 1, 2, 4, 5, 
3)), .Names = c("practice", "drug", "items", "quantity"), row.names = c(NA, 
-37L), spec = structure(list(cols = structure(list(practice = structure(list(), class = c("collector_integer", 
"collector")), drug = structure(list(), class = c("collector_character", 
"collector")), items = structure(list(), class = c("collector_integer", 
"collector")), quantity = structure(list(), class = c("collector_integer", 
"collector"))), .Names = c("practice", "drug", "items", "quantity"
)), default = structure(list(), class = c("collector_guess", 
"collector"))), .Names = c("cols", "default"), class = "col_spec"), class = c("tbl_df", 
"tbl", "data.frame"))

df1我想这就是你想要的：
df1 %>%
  group_by(practice) %>%
  summarize(items_123 = sum(if_else(stringr::str_detect(drug, '^123'), items, 0)),
            quantity_123 = sum(if_else(stringr::str_detect(drug, '^123'), quantity, 0)),
            all_items = sum(items),
            all_quantity = sum(quantity))

# A tibble: 3 x 5
  practice items_123 quantity_123 all_items all_quantity
     <int>     <dbl>        <dbl>     <dbl>        <dbl>
1        1        54           44        75           58
2        2        44           42        66           65
3        3        24           19        35           28

df1%>%
组别(执业)%>%
总结（items_123=sum（如果其他（stringr:：str_detect（药物“^123”），items，0）），
数量_123=总和（如果其他（stringr:：str _detect（药物，^123'），数量，0）），
所有项目=总和（项目），
全部数量=总和（数量））
#一个tibble:3x5
练习项目\u 123数量\u 123所有\u项目所有\u数量
1        1        54           44        75           58
2        2        44           42        66           65
3        3        24           19        35           28
df1%>%groupby（practice）%%>%mutate（all.items=sum（items））%%>%filter（str_sub（drug，1，3）==123）%%>%summary（all.items=first（all.items），quantSum=sum（quantity））

Hi@akrun-我想我知道。。。我更愿意对每种药物（如123）分别进行分析，并将所有药物的总量和项目作为额外的列，这样我就可以看到发生了什么……谢谢@zack-看起来很棒。我能问个问题吗？当我将代码应用于我的真实数据时，我遇到了一个错误。。。唯一的区别是我的药物代码是长的-BNFCode，^0407020A0”-这行吗？我在Summary_impl（.data，dots）中得到错误：求值错误：

false

必须是整数类型，而不是double。嗯，根据我的经验，这可能是

if_else

函数，它对true和false子句的类型要求严格。现在试试

ifelse

，看看这是否能解决问题。也就是说，它看起来像我的代码中的

项目

和

数量

（以及提供的

dput

输出）类型是否为

double

-您的

项是否可能为整型列或数量列是否可能为整型？我使用as.numeric，这样就可以了。你的建议很有效，非常感谢。
drug123 <- df1 %>% 
  filter(substr(drug, 1,3)==123)


ALL.drug123 <- aggregate(drug123$quantity, by=list(Category=drug123$practice), FUN=sum)

df1 %>%
  group_by(practice) %>%
  summarize(items_123 = sum(if_else(stringr::str_detect(drug, '^123'), items, 0)),
            quantity_123 = sum(if_else(stringr::str_detect(drug, '^123'), quantity, 0)),
            all_items = sum(items),
            all_quantity = sum(quantity))

# A tibble: 3 x 5
  practice items_123 quantity_123 all_items all_quantity
     <int>     <dbl>        <dbl>     <dbl>        <dbl>
1        1        54           44        75           58
2        2        44           42        66           65
3        3        24           19        35           28