R 数据表中最频繁的
请在下面找到我的样本数据 我想从名为“开始/结束”的列中确定频率以及“持续时间”。欢迎任何帮助R 数据表中最频繁的,r,dataframe,data.table,R,Dataframe,Data.table,请在下面找到我的样本数据 我想从名为“开始/结束”的列中确定频率以及“持续时间”。欢迎任何帮助 structure(list(serial = c(19050112, 19050112, 12201018, 17221212, 19300613, 19050112, 13260115, 16151202, 16310311, 14291209, 12190516, 15160311, 12201018, 34080603, 17221212, 19300613, 19050112, 1504
structure(list(serial = c(19050112, 19050112, 12201018, 17221212,
19300613, 19050112, 13260115, 16151202, 16310311, 14291209, 12190516,
15160311, 12201018, 34080603, 17221212, 19300613, 19050112, 15040801,
13260115, 16151202), `Start/End` = c("t0730_0745 - t0730_0745",
"t0745_0800 - t0745_0800", "t0800_0815 - t0800_0815", "t0800_0815 - t0800_0815",
"t0800_0815 - t0800_0815", "t0800_0815 - t0800_0815", "t0800_0815 - t0800_0815",
"t0800_0815 - t0800_0815", "t0800_0815 - t0800_0815", "t0800_0815 - t0800_0815",
"t0800_0815 - t0800_0815", "t0800_0815 - t0800_0815", "t0815_0830 - t0815_0830",
"t0815_0830 - t0815_0830", "t0815_0830 - t0815_0830", "t0815_0830 - t0815_0830",
"t0815_0830 - t0815_0830", "t0815_0830 - t0815_0830", "t0815_0830 - t0815_0830",
"t0815_0830 - t0815_0830"), Duration = c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
Frequency = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), row.names = c(NA, -20L
), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x000001824bb61ef0>)
结构(序列号=c(19050112、19050112、12201018、17221212、,
19300613, 19050112, 13260115, 16151202, 16310311, 14291209, 12190516,
15160311, 12201018, 34080603, 17221212, 19300613, 19050112, 15040801,
1326011516151202),`Start/End`=c(“t0730_0745-t0730_0745”,
“t0745_0800-t0745_0800”,“t0800_0815-t0800_0815”,“t0800_0815-t0800_0815”,
“t0800_0815-t0800_0815”、“t0800_0815-t0800_0815”、“t0800_0815-t0800_0815”,
“t0800_0815-t0800_0815”、“t0800_0815-t0800_0815”、“t0800_0815-t0800_0815”,
“t0800_0815-t0800_0815”、“t0800_0815-t0800_0815”、“t0815_0830-t0815_0830”,
“t0815_0830-t0815_0830”、“t0815_0830-t0815_0830”、“t0815_0830-t0815_0830”,
“t0815_0830-t0815_0830”、“t0815_0830-t0815_0830”、“t0815_0830-t0815_0830”,
“t0815_0830-t0815_0830”),持续时间=c(2L,2L,2L,2L,2L,
2L、2L、2L、2L、2L、2L、2L、2L、2L、2L、2L、2L、2L、2L、2L、2L、2L、2L、,
频率=c(1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L),row.names=c(NA,-20L
),class=c(“data.table”,“data.frame”),.internal.selfref=)
您可以在
“-”
上拆分字符串,使用表计算其频率,并获得最常出现的值
sort(table(unlist(strsplit(df1$`Start/End`, ' - '))), decreasing = TRUE)[1]
#t0800_0815
# 20
要获得类似频率的所有值,我们可以使用:
tab <- table(unlist(strsplit(df1$`Start/End`, ' - ')))
tab[tab == max(tab)]
tab我们可以使用data.table
方法
library(data.table)
df1[, .(new = unlist(tstrsplit(`Start/End`, " - ")))][,
.N, new][N == max(N)]
# new N
#1: t0800_0815 20