R 获取按小时分组的计数
我想获得每种类型的每小时计数(R 获取按小时分组的计数,r,dplyr,time-series,aggregate,tidyverse,R,Dplyr,Time Series,Aggregate,Tidyverse,我想获得每种类型的每小时计数(version1和version2) 样本数据: type <- c('version1','version1','version1','version2','version2') startdate <- as.POSIXct(c('2017-11-1 02:11:02.000','2018-3-25 02:13:02.000','2019-3-14 03:45:02.000', '2017
version1
和version2
)
样本数据:
type <- c('version1','version1','version1','version2','version2')
startdate <- as.POSIXct(c('2017-11-1 02:11:02.000','2018-3-25 02:13:02.000','2019-3-14 03:45:02.000',
'2017-3-14 02:55:02.000','2018-3-14 03:45:02.000'))
df <- data.frame(type, startdate)
df
type startdate
1 version1 2017-11-01 02:11:02
2 version1 2018-03-25 02:13:02
3 version1 2019-03-14 03:45:02
4 version2 2017-03-14 02:55:02
5 version2 2018-03-14 03:45:02
我们可以首先从
startdate
获取小时数,count
每小时的行数和类型
<编码>完成缺失的小时数,并用0填充其计数,然后使用pivot\u wide
获取宽幅格式的数据
library(dplyr)
library(tidyr)
df %>%
mutate(hr = lubridate::hour(startdate)) %>%
count(hr, type) %>%
complete(type, hr = seq(0, max(hr)), fill = list(n = 0)) %>%
pivot_wider(names_from = type, values_from = n)
# A tibble: 4 x 3
# hr version1 version2
# <int> <dbl> <dbl>
#1 0 0 0
#2 1 0 0
#3 2 2 1
#4 3 1 1
库(dplyr)
图书馆(tidyr)
df%>%
突变(hr=lubridate::hour(startdate))%>%
计数(小时,类型)%>%
完成(类型,hr=seq(0,max(hr)),填充=list(n=0))%>%
枢轴(名称从=类型,值从=n)
#一个tibble:4x3
#人力资源版本1版本2
#
#1 0 0 0
#2 1 0 0
#3 2 2 1
#4 3 1 1
我们可以首先从开始日期
获取小时数,计数
每小时的行数和类型
<编码>完成缺失的小时数,并用0填充其计数,然后使用pivot\u wide
获取宽幅格式的数据
library(dplyr)
library(tidyr)
df %>%
mutate(hr = lubridate::hour(startdate)) %>%
count(hr, type) %>%
complete(type, hr = seq(0, max(hr)), fill = list(n = 0)) %>%
pivot_wider(names_from = type, values_from = n)
# A tibble: 4 x 3
# hr version1 version2
# <int> <dbl> <dbl>
#1 0 0 0
#2 1 0 0
#3 2 2 1
#4 3 1 1
库(dplyr)
图书馆(tidyr)
df%>%
突变(hr=lubridate::hour(startdate))%>%
计数(小时,类型)%>%
完成(类型,hr=seq(0,max(hr)),填充=list(n=0))%>%
枢轴(名称从=类型,值从=n)
#一个tibble:4x3
#人力资源版本1版本2
#
#1 0 0 0
#2 1 0 0
#3 2 2 1
#4 3 1 1
开始日期变量有问题。因此,我使用包lubridate
library(dplyr)
library(tidyr)
type = c('version1','version1','version1','version2','version2')
startdate = lubridate::ymd_hms(c('2017-11-1T02:11:02.000','2018-3-25T02:13:02.000',
'2019-3-14T03:45:02.000','2017-3-14T02:55:02.000',
'2018-3-14T03:45:02.000'))
tibble(type = type, startdate = startdate) %>%
count(type, hour = lubridate::hour(startdate)) %>%
spread(type, n)
# A tibble: 2 x 3
hour version1 version2
<int> <int> <int>
1 2 2 1
2 3 1 1
库(dplyr)
图书馆(tidyr)
类型=c('version1'、'version1'、'version1'、'version2'、'version2')
startdate=lubridate::ymd_hms(c('2017-11-1T02:11:02.000','2018-3-25T02:13:02.000',
‘2019-3-14T03:45:02.000’、‘2017-3-14T02:55:02.000’,
‘2018-3-14T03:45:02.000’)
TIBLE(类型=类型,起始日期=起始日期)%>%
计数(类型,小时=润滑脂::小时(起始日期))%>%
排列(类型,n)
#一个tibble:2x3
小时版本1版本2
1 2 2 1
2 3 1 1
开始日期变量有问题。因此,我使用包lubridate
library(dplyr)
library(tidyr)
type = c('version1','version1','version1','version2','version2')
startdate = lubridate::ymd_hms(c('2017-11-1T02:11:02.000','2018-3-25T02:13:02.000',
'2019-3-14T03:45:02.000','2017-3-14T02:55:02.000',
'2018-3-14T03:45:02.000'))
tibble(type = type, startdate = startdate) %>%
count(type, hour = lubridate::hour(startdate)) %>%
spread(type, n)
# A tibble: 2 x 3
hour version1 version2
<int> <int> <int>
1 2 2 1
2 3 1 1
库(dplyr)
图书馆(tidyr)
类型=c('version1'、'version1'、'version1'、'version2'、'version2')
startdate=lubridate::ymd_hms(c('2017-11-1T02:11:02.000','2018-3-25T02:13:02.000',
‘2019-3-14T03:45:02.000’、‘2017-3-14T02:55:02.000’,
‘2018-3-14T03:45:02.000’)
TIBLE(类型=类型,起始日期=起始日期)%>%
计数(类型,小时=润滑脂::小时(起始日期))%>%
排列(类型,n)
#一个tibble:2x3
小时版本1版本2
1 2 2 1
2 3 1 1
基本R解决方案:
# Extract the hour and store it as a vector:
df$hour <- gsub(".* ", "", trunc(df$startdate, units = "hours"))
# Count the number of observations of each type in each hour:
df$type_hour_cnt <- with(df,
ave(paste(type, hour, sep = " - "),
paste(type, hour, sep = " - "), FUN = seq_along))
# Reshape dataframe:
df <- as.data.frame(as.matrix(xtabs(type_hour_cnt ~ hour + type, df, sparse = T)))
# Extract rownames and store them as "hour" vector and then delete row.names:
df <- data.frame(cbind(hour = row.names(df), df), row.names = NULL)
#提取小时并将其存储为向量:
df$hour基本R解决方案:
# Extract the hour and store it as a vector:
df$hour <- gsub(".* ", "", trunc(df$startdate, units = "hours"))
# Count the number of observations of each type in each hour:
df$type_hour_cnt <- with(df,
ave(paste(type, hour, sep = " - "),
paste(type, hour, sep = " - "), FUN = seq_along))
# Reshape dataframe:
df <- as.data.frame(as.matrix(xtabs(type_hour_cnt ~ hour + type, df, sparse = T)))
# Extract rownames and store them as "hour" vector and then delete row.names:
df <- data.frame(cbind(hour = row.names(df), df), row.names = NULL)
#提取小时并将其存储为向量:
df$hour谢谢你的帮助!面对一个小问题。。。获取有关pivot\u-wider()
的错误:找不到我安装的tidyr
函数“pivot\u-wider”,不确定这里有什么问题。再次感谢@苹果橙子它有较新版本的tidyr
,您需要使用install.packages(“tidyr”)
安装它。如果您使用的是旧版本,请改用spread
<代码>df%>%变异(hr=lubridate::hour(startdate))%%>%计数(hr,type)%%>%完成(type,hr=seq(0,max(hr)),填充=列表(n=0))%%>%排列(type,n)
感谢您的帮助!面对一个小问题。。。获取有关pivot\u-wider()
的错误:找不到我安装的tidyr
函数“pivot\u-wider”,不确定这里有什么问题。再次感谢@苹果橙子它有较新版本的tidyr
,您需要使用install.packages(“tidyr”)
安装它。如果您使用的是旧版本,请改用spread
<代码>df%>%突变(hr=lubridate::hour(startdate))%%>%计数(hr,类型)%%>%完成(类型,hr=seq(0,最大(hr)),填充=列表(n=0))%%>%排列(类型,n)