Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/81.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Partial group by语句以查找变量的多个实例的计数_R_Dplyr - Fatal编程技术网

Partial group by语句以查找变量的多个实例的计数

Partial group by语句以查找变量的多个实例的计数,r,dplyr,R,Dplyr,我正在努力完成一项相当简单的任务。我有以下数据,希望在每次访问的事件列表中找到项目的计数。所以它可能看起来像下面这样 Visit_high visit event_list 101 1 3 101 2 5 102 1 2 103 1 6 103 2 8 103 3

我正在努力完成一项相当简单的任务。我有以下数据,希望在每次访问的事件列表中找到项目的计数。所以它可能看起来像下面这样

Visit_high   visit    event_list
101            1          3
101            2          5
102            1          2
103            1          6
103            2          8
103            3          5
...
Visit high是一个用户id,Visit是指他们的访问次数,event list是指他们采取的操作次数。因此,用户101两次访问该网站,第一次访问时进行了3次操作,第二次访问时进行了5次操作

> dput(tail(mydf[1:50,c(5,10)], 10))
structure(list(event_list = structure(c(2L, 2L, 2L, 2L, 76L, 
36L, 64L, 37L, 14L, 25L), .Label = c("", "100,101,102,115,116", 
"100,101,102,115,116,146", "100,101,102,116", "100,101,102,116,146", 
"100,101,115,116", "100,101,117,118", "100,102,115,116", "100,102,115,116,146", 
"100,102,116", "100,102,116,146", "100,107,115,116", "100,107,116", 
"100,115,116", "100,115,116,146", "100,116", "100,116,146", "100,117", 
"102,115,116", "102,115,116,146", "102,116", "102,116,146", "107,115,116", 
"108,117,118", "115,116", "115,116,146", "116", "116,146", "202", 
"202,120", "205,100,101,109,117,118", "206,115,116", "206,115,116,146", 
"206,116", "206,116,146", "206,214,115,116", "206,214,115,116,146", 
"206,214,116", "206,214,116,146", "206,215,115,116", "206,215,115,116,146", 
"207,102,115,116", "207,102,115,116,146", "207,102,116", "207,102,116,146", 
"207,115,116", "208,100,101,102,115,116", "208,100,101,102,116", 
"208,100,102,115,116", "208,100,115,116", "208,102,109,115,116", 
"208,102,109,116", "208,102,115,116", "208,102,116", "208,109,115,116", 
"208,109,115,116,146", "208,109,116", "208,115,116", "208,116", 
"210,102,108,115,116", "210,102,108,116", "212,102,109,115,116", 
"212,102,109,116", "212,109,115,116", "212,109,116", "212,115,116", 
"214,100,101,102,115,116", "214,100,101,102,115,116,146", "214,100,115,116", 
"214,100,115,116,146", "214,100,116", "214,100,116,146", "214,102,115,116", 
"214,102,115,116,146", "214,102,116", "214,115,116", "214,115,116,146", 
"214,116", "214,116,146", "214,207,102,115,116", "214,221,102,115,116", 
"214,221,102,115,116,146", "215,100,101,102,115,116", "215,100,101,102,115,116,146", 
"215,100,101,102,116", "215,100,101,115,116", "215,100,102,115,116", 
"215,100,102,116", "215,100,115,116", "215,100,115,116,146", 
"215,100,116", "215,102,115,116", "215,102,115,116,146", "215,102,116", 
"215,115,116", "215,115,116,146", "215,116", "215,207,102,115,116", 
"215,207,102,116", "215,221,100,102,115,116", "215,221,100,102,116", 
"215,221,102,115,116", "215,221,102,116", "220,102,115,116", 
"221,100,102,115,116", "221,100,102,115,116,146", "221,100,102,116", 
"221,102,115,116", "221,102,115,116,146", "221,102,116", "226,100,117,119,120", 
"227,102,115,116", "227,102,116", "228,102,115,116", "232,102,115,116", 
"234,102,115,116", "235"), class = "factor"), visid_high = c(2710815361820866560, 
2710815518587167232, 2710815707565725184, 2710815726893081600, 
2710815857889578496, 2710815857889578496, 2710815857889578496, 
2710815883659387904, 2710815902986739712, 2710815950231374336
)), .Names = c("event_list", "visid_high"), row.names = 41:50, class = "data.frame")
我有每个访问者id的访问次数,但我有点不知道如何区分每个访问次数

event_sum = cbind(mmf$visid_high, mmf$event_list, sapply(strsplit(mmf$event_list, ","), length))
这个怎么样

# dummy data based on your example
du <- data.frame(Visit_high=c(101,101,102,103,103,103),  
                 visit=c(1,2,1,1,2,3), 
                 event_list=c(3,5,2,6,8,5))
# a function to sum all visits for a given visit-high
fu <- function(vh) {
  rows_for_this_user <- which(du$Visit_high==vh)
  events_for_this_user <- sum(du$event_list[rows_for_this_user]) 
  #return a vector with the user id and events count
  c(Visit_high=vh, event_sum=events_for_this_user)
}

data.frame(t(sapply(unique(du$Visit_high), fu)))

希望我答对了你的问题(调用你的数据DF):


myfun如果您需要
group
sum
,有很多方法可以做到

library(data.table)
setDT(du)[, list(event_sum=sum(event_list)), by=Visit_high]
#   Visit_high event_sum
#1:        101         8
#2:        102         2
#3:        103        19
基本R
方法

 aggregate(cbind(event_sum=event_list)~Visit_high, du, FUN=sum)
数据
du你到底想实现什么?每次
访问的
事件列表
总和
?例如:访客101的8个操作?如果您的示例使用一致的拼写,例如
visid
vs
visit
,也会很好。但我们真正需要的是您提供的非常短的样本输入的样本输出。
library(data.table)
setDT(du)[, list(event_sum=sum(event_list)), by=Visit_high]
#   Visit_high event_sum
#1:        101         8
#2:        102         2
#3:        103        19
 aggregate(cbind(event_sum=event_list)~Visit_high, du, FUN=sum)
du <- data.frame(Visit_high=c(101,101,102,103,103,103),  
             visit=c(1,2,1,1,2,3), 
             event_list=c(3,5,2,6,8,5))