选择第一行并在R中的数据框中的group_中聚合
我有以下数据选择第一行并在R中的数据框中的group_中聚合,r,dplyr,tidyverse,R,Dplyr,Tidyverse,我有以下数据 df <- tibble::tribble( ~V1, ~V2, ~V3, ~V4, ~V5, "CTV10016020", "PoP", "2020-06-08 01:50:07", 220L, "Music", "CTV10016020", "PoP", "2020-06-08 01:53:45", 8L, "Music", "CTV10016020",
df <- tibble::tribble(
~V1, ~V2, ~V3, ~V4, ~V5,
"CTV10016020", "PoP", "2020-06-08 01:50:07", 220L, "Music",
"CTV10016020", "PoP", "2020-06-08 01:53:45", 8L, "Music",
"CTV10016020", "PoP", "2020-06-08 01:53:53", 133L, "Music",
"CTV10016020", "PoP", "2020-06-08 01:56:05", 234L, "Music",
"CTV10016020", "PoP", "2020-06-08 01:59:57", 0L, "Control",
"CTVM11011420", "Game", "2020-06-08 02:03:00", 0L, "Control",
"CTVM11011420", "Game", "2020-06-08 02:03:00", 10L, "Music",
"CTVM11011420", "Game", "2020-06-08 02:03:07", 116L, "Music",
"CTVM11011420", "Game", "2020-06-08 02:05:01", 32L, "Audio",
"CTVM11011420", "Game", "2020-06-08 02:05:32", 208L, "Music",
"CTVM11011420", "Game", "2020-06-08 02:08:36", 42L, "Audio"
)
使用
dplyr
df %>%
group_by(V1, V2) %>%
mutate(total=sum(V4)) %>%
select(-c(V4, V5)) %>%
distinct() %>%
filter(V3==first(V3))
给你
#一个tible:2 x 4
#分组:V1、V2[2]
V1 V2 V3总计
1 CTV10016020 PoP 2020-06-08 01:50:07595
2 CTVM11011420游戏2020-06-08 02:03:00 408
使用dplyr
df %>%
group_by(V1, V2) %>%
mutate(total=sum(V4)) %>%
select(-c(V4, V5)) %>%
distinct() %>%
filter(V3==first(V3))
给你
#一个tible:2 x 4
#分组:V1、V2[2]
V1 V2 V3总计
1 CTV10016020 PoP 2020-06-08 01:50:07595
2 CTVM11011420游戏2020-06-08 02:03:00 408
如果我们删除group\u by
之后的mutate
步骤,并在summary
中使用该步骤,OP的方法应该可以正常工作,因为在summary
之后,我们会得到summary
中使用的唯一列以及任何分组列,即mutate
,以首先得到(V3)
未进入输出
library(dplyr)
df %>%
mutate(V3= as.POSIXct(V3, "%Y-%m-%d %H:%M:%OS", tz = "Europe/Helsinki")) %>%
group_by(V1, V2) %>%
summarise(V3 = first(V3), total = sum(V4))
# A tibble: 2 x 4
# Groups: V1 [2]
# V1 V2 V3 total
# <chr> <chr> <chr> <int>
#1 CTV10016020 PoP 2020-06-08 01:50:07 595
#2 CTVM11011420 Game 2020-06-08 02:03:00 408
库(dplyr)
df%>%
变异(V3=as.POSIXct(V3,“%Y-%m-%d%H:%m:%OS”,tz=“欧洲/赫尔辛基”))%>%
分组依据(V1,V2)%>%
总结(V3=第一个(V3),总计=总和(V4))
#一个tibble:2x4
#分组:V1[2]
#V1 V2 V3总计
#
#1 CTV10016020 PoP 2020-06-08 01:50:07595
#2 CTVM11011420游戏2020-06-08 02:03:00 408
如果我们删除group\u by
之后的mutate
步骤,并在summary
中使用该步骤,OP的方法应该可以正常工作,因为在summary
之后,我们会得到summary
中使用的唯一列以及任何分组列,即mutate
,以首先得到(V3)
未进入输出
library(dplyr)
df %>%
mutate(V3= as.POSIXct(V3, "%Y-%m-%d %H:%M:%OS", tz = "Europe/Helsinki")) %>%
group_by(V1, V2) %>%
summarise(V3 = first(V3), total = sum(V4))
# A tibble: 2 x 4
# Groups: V1 [2]
# V1 V2 V3 total
# <chr> <chr> <chr> <int>
#1 CTV10016020 PoP 2020-06-08 01:50:07 595
#2 CTVM11011420 Game 2020-06-08 02:03:00 408
库(dplyr)
df%>%
变异(V3=as.POSIXct(V3,“%Y-%m-%d%H:%m:%OS”,tz=“欧洲/赫尔辛基”))%>%
分组依据(V1,V2)%>%
总结(V3=第一个(V3),总计=总和(V4))
#一个tibble:2x4
#分组:V1[2]
#V1 V2 V3总计
#
#1 CTV10016020 PoP 2020-06-08 01:50:07595
#2 CTVM11011420游戏2020-06-08 02:03:00 408