R 按行计数列，避免熔化/聚集_R_Data.table_Dplyr

R 按行计数列，避免熔化/聚集

R 按行计数列，避免熔化/聚集,r,data.table,dplyr,R,Data.table,Dplyr,我正在使用这样的数据帧： idno 08:00 08:05 08:10 08:15 08:20 08:25 1 1 Domestic Domestic Domestic Domestic Domestic Domestic 2 2 Leisure Leisure Leisure Leisure Leisure Leisure 3 3 Eat Eat

我正在使用这样的数据帧：

   idno      08:00      08:05      08:10    08:15    08:20    08:25
1     1   Domestic   Domestic   Domestic Domestic Domestic Domestic
2     2    Leisure    Leisure    Leisure  Leisure  Leisure  Leisure
3     3        Eat        Eat        Eat      Eat      Eat      Eat
4     4       Paid       Paid       Paid     Paid     Paid     Paid
5     5      Sleep      Sleep      Sleep    Sleep    Sleep    Sleep
6     6        Eat        Eat        Eat  Missing  Missing  Missing
7     7      Sleep      Sleep      Sleep    Sleep    Sleep    Sleep
8     8       Paid       Paid       Paid     Paid     Paid     Paid
9     9      Sleep      Sleep      Sleep    Sleep    Sleep    Sleep
10   10 Child Care Child Care Child Care   Travel   Travel   Travel

我感兴趣的是这样总结这个数据框架

（需要输出）
然而，我想知道是否有更直接的方法来做到这一点。我的问题是，我正在处理一个非常大的数据库，并使用
melt
，然后
count
然后
spread
可能有点慢
是否有一种直接的方法来计算每行的列（变量的分布），最好使用
data.table

setDT(df)[,.N,by=] #
类似于每行的
by
列

df = structure(list(idno = 1:10, `08:00` = c("Domestic", "Leisure", "Eat", "Paid", "Sleep", "Eat", "Sleep", "Paid", "Sleep", "Child Care" ), `08:05` = c("Domestic", "Leisure", "Eat", "Paid", "Sleep", "Eat", "Sleep", "Paid", "Sleep", "Child Care"), `08:10` = c("Domestic", "Leisure", "Eat", "Paid", "Sleep", "Eat", "Sleep", "Paid", "Sleep", "Child Care"), `08:15` = c("Domestic", "Leisure", "Eat", "Paid", "Sleep", "Missing", "Sleep", "Paid", "Sleep", "Travel"), `08:20` = c("Domestic", "Leisure", "Eat", "Paid", "Sleep", "Missing", "Sleep", "Paid", "Sleep", "Travel"), `08:25` = c("Domestic", "Leisure", "Eat", "Paid", "Sleep", "Missing", "Sleep", "Paid", "Sleep", "Travel" )), .Names = c("idno", "08:00", "08:05", "08:10", "08:15", "08:20", "08:25"), row.names = c(NA, 10L), class = "data.frame")

您可以从
qdapTools
中尝试
mtabulate

library(qdapTools) mtabulate(split(df[-1], seq(nrow(df)))) # Child Care Domestic Eat Leisure Missing Paid Sleep Travel #1 0 6 0 0 0 0 0 0 #2 0 0 0 6 0 0 0 0 #3 0 0 6 0 0 0 0 0 #4 0 0 0 0 0 6 0 0 #5 0 0 0 0 0 0 6 0 #6 0 0 3 0 3 0 0 0 #7 0 0 0 0 0 0 6 0 #8 0 0 0 0 0 6 0 0 #9 0 0 0 0 0 0 6 0 #10 3 0 0 0 0 0 0 3

库（data.table）；dcast（melt（setDT（df），id=1，idno~value）
这仍然是一个有趣的解决方案，感谢@jaapalternine（基于重复目标）：
xtabs（~idno+value，data.table:：melt（df，id=1））
备选方案二：
library（dplyr）；图书馆（tidyr）；df%%>%聚集（键，值，-1）%%>%group\U by（idno，值）%%>%tally%%>%排列（键=值，值=n，填充=0）
感谢您提供的所有选择。让我保持警觉！非常有趣谢谢
df = structure(list(idno = 1:10, `08:00` = c("Domestic", "Leisure", "Eat", "Paid", "Sleep", "Eat", "Sleep", "Paid", "Sleep", "Child Care" ), `08:05` = c("Domestic", "Leisure", "Eat", "Paid", "Sleep", "Eat", "Sleep", "Paid", "Sleep", "Child Care"), `08:10` = c("Domestic", "Leisure", "Eat", "Paid", "Sleep", "Eat", "Sleep", "Paid", "Sleep", "Child Care"), `08:15` = c("Domestic", "Leisure", "Eat", "Paid", "Sleep", "Missing", "Sleep", "Paid", "Sleep", "Travel"), `08:20` = c("Domestic", "Leisure", "Eat", "Paid", "Sleep", "Missing", "Sleep", "Paid", "Sleep", "Travel"), `08:25` = c("Domestic", "Leisure", "Eat", "Paid", "Sleep", "Missing", "Sleep", "Paid", "Sleep", "Travel" )), .Names = c("idno", "08:00", "08:05", "08:10", "08:15", "08:20", "08:25"), row.names = c(NA, 10L), class = "data.frame")

library(qdapTools) mtabulate(split(df[-1], seq(nrow(df)))) # Child Care Domestic Eat Leisure Missing Paid Sleep Travel #1 0 6 0 0 0 0 0 0 #2 0 0 0 6 0 0 0 0 #3 0 0 6 0 0 0 0 0 #4 0 0 0 0 0 6 0 0 #5 0 0 0 0 0 0 6 0 #6 0 0 3 0 3 0 0 0 #7 0 0 0 0 0 0 6 0 #8 0 0 0 0 0 6 0 0 #9 0 0 0 0 0 0 6 0 #10 3 0 0 0 0 0 0 3