Partial group by语句以查找变量的多个实例的计数
我正在努力完成一项相当简单的任务。我有以下数据,希望在每次访问的事件列表中找到项目的计数。所以它可能看起来像下面这样Partial group by语句以查找变量的多个实例的计数,r,dplyr,R,Dplyr,我正在努力完成一项相当简单的任务。我有以下数据,希望在每次访问的事件列表中找到项目的计数。所以它可能看起来像下面这样 Visit_high visit event_list 101 1 3 101 2 5 102 1 2 103 1 6 103 2 8 103 3
Visit_high visit event_list
101 1 3
101 2 5
102 1 2
103 1 6
103 2 8
103 3 5
...
Visit high是一个用户id,Visit是指他们的访问次数,event list是指他们采取的操作次数。因此,用户101两次访问该网站,第一次访问时进行了3次操作,第二次访问时进行了5次操作
> dput(tail(mydf[1:50,c(5,10)], 10))
structure(list(event_list = structure(c(2L, 2L, 2L, 2L, 76L,
36L, 64L, 37L, 14L, 25L), .Label = c("", "100,101,102,115,116",
"100,101,102,115,116,146", "100,101,102,116", "100,101,102,116,146",
"100,101,115,116", "100,101,117,118", "100,102,115,116", "100,102,115,116,146",
"100,102,116", "100,102,116,146", "100,107,115,116", "100,107,116",
"100,115,116", "100,115,116,146", "100,116", "100,116,146", "100,117",
"102,115,116", "102,115,116,146", "102,116", "102,116,146", "107,115,116",
"108,117,118", "115,116", "115,116,146", "116", "116,146", "202",
"202,120", "205,100,101,109,117,118", "206,115,116", "206,115,116,146",
"206,116", "206,116,146", "206,214,115,116", "206,214,115,116,146",
"206,214,116", "206,214,116,146", "206,215,115,116", "206,215,115,116,146",
"207,102,115,116", "207,102,115,116,146", "207,102,116", "207,102,116,146",
"207,115,116", "208,100,101,102,115,116", "208,100,101,102,116",
"208,100,102,115,116", "208,100,115,116", "208,102,109,115,116",
"208,102,109,116", "208,102,115,116", "208,102,116", "208,109,115,116",
"208,109,115,116,146", "208,109,116", "208,115,116", "208,116",
"210,102,108,115,116", "210,102,108,116", "212,102,109,115,116",
"212,102,109,116", "212,109,115,116", "212,109,116", "212,115,116",
"214,100,101,102,115,116", "214,100,101,102,115,116,146", "214,100,115,116",
"214,100,115,116,146", "214,100,116", "214,100,116,146", "214,102,115,116",
"214,102,115,116,146", "214,102,116", "214,115,116", "214,115,116,146",
"214,116", "214,116,146", "214,207,102,115,116", "214,221,102,115,116",
"214,221,102,115,116,146", "215,100,101,102,115,116", "215,100,101,102,115,116,146",
"215,100,101,102,116", "215,100,101,115,116", "215,100,102,115,116",
"215,100,102,116", "215,100,115,116", "215,100,115,116,146",
"215,100,116", "215,102,115,116", "215,102,115,116,146", "215,102,116",
"215,115,116", "215,115,116,146", "215,116", "215,207,102,115,116",
"215,207,102,116", "215,221,100,102,115,116", "215,221,100,102,116",
"215,221,102,115,116", "215,221,102,116", "220,102,115,116",
"221,100,102,115,116", "221,100,102,115,116,146", "221,100,102,116",
"221,102,115,116", "221,102,115,116,146", "221,102,116", "226,100,117,119,120",
"227,102,115,116", "227,102,116", "228,102,115,116", "232,102,115,116",
"234,102,115,116", "235"), class = "factor"), visid_high = c(2710815361820866560,
2710815518587167232, 2710815707565725184, 2710815726893081600,
2710815857889578496, 2710815857889578496, 2710815857889578496,
2710815883659387904, 2710815902986739712, 2710815950231374336
)), .Names = c("event_list", "visid_high"), row.names = 41:50, class = "data.frame")
我有每个访问者id的访问次数,但我有点不知道如何区分每个访问次数
event_sum = cbind(mmf$visid_high, mmf$event_list, sapply(strsplit(mmf$event_list, ","), length))
这个怎么样
# dummy data based on your example
du <- data.frame(Visit_high=c(101,101,102,103,103,103),
visit=c(1,2,1,1,2,3),
event_list=c(3,5,2,6,8,5))
# a function to sum all visits for a given visit-high
fu <- function(vh) {
rows_for_this_user <- which(du$Visit_high==vh)
events_for_this_user <- sum(du$event_list[rows_for_this_user])
#return a vector with the user id and events count
c(Visit_high=vh, event_sum=events_for_this_user)
}
data.frame(t(sapply(unique(du$Visit_high), fu)))
希望我答对了你的问题(调用你的数据DF):
myfun如果您需要group
的sum
,有很多方法可以做到
library(data.table)
setDT(du)[, list(event_sum=sum(event_list)), by=Visit_high]
# Visit_high event_sum
#1: 101 8
#2: 102 2
#3: 103 19
或基本R
方法
aggregate(cbind(event_sum=event_list)~Visit_high, du, FUN=sum)
数据
du你到底想实现什么?每次访问的事件列表
总和
?例如:访客101的8个操作?如果您的示例使用一致的拼写,例如visid
vsvisit
,也会很好。但我们真正需要的是您提供的非常短的样本输入的样本输出。
library(data.table)
setDT(du)[, list(event_sum=sum(event_list)), by=Visit_high]
# Visit_high event_sum
#1: 101 8
#2: 102 2
#3: 103 19
aggregate(cbind(event_sum=event_list)~Visit_high, du, FUN=sum)
du <- data.frame(Visit_high=c(101,101,102,103,103,103),
visit=c(1,2,1,1,2,3),
event_list=c(3,5,2,6,8,5))