Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/67.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 获取按小时分组的计数_R_Dplyr_Time Series_Aggregate_Tidyverse - Fatal编程技术网

R 获取按小时分组的计数

R 获取按小时分组的计数,r,dplyr,time-series,aggregate,tidyverse,R,Dplyr,Time Series,Aggregate,Tidyverse,我想获得每种类型的每小时计数(version1和version2) 样本数据: type <- c('version1','version1','version1','version2','version2') startdate <- as.POSIXct(c('2017-11-1 02:11:02.000','2018-3-25 02:13:02.000','2019-3-14 03:45:02.000', '2017

我想获得每种类型的每小时计数(
version1
version2

样本数据:

type <- c('version1','version1','version1','version2','version2')

startdate <- as.POSIXct(c('2017-11-1 02:11:02.000','2018-3-25 02:13:02.000','2019-3-14 03:45:02.000', 

                            '2017-3-14 02:55:02.000','2018-3-14 03:45:02.000'))


df <- data.frame(type, startdate)

df

      type           startdate
1 version1 2017-11-01 02:11:02
2 version1 2018-03-25 02:13:02
3 version1 2019-03-14 03:45:02
4 version2 2017-03-14 02:55:02
5 version2 2018-03-14 03:45:02

我们可以首先从
startdate
获取小时数,
count
每小时的行数和
类型
<编码>完成缺失的小时数,并用0填充其计数,然后使用
pivot\u wide
获取宽幅格式的数据

library(dplyr)
library(tidyr)

df %>%
  mutate(hr = lubridate::hour(startdate)) %>%
  count(hr, type) %>%
  complete(type, hr = seq(0, max(hr)), fill = list(n = 0)) %>%
  pivot_wider(names_from = type, values_from = n)

# A tibble: 4 x 3
#     hr version1 version2
#  <int>    <dbl>    <dbl>
#1     0        0        0
#2     1        0        0
#3     2        2        1
#4     3        1        1
库(dplyr)
图书馆(tidyr)
df%>%
突变(hr=lubridate::hour(startdate))%>%
计数(小时,类型)%>%
完成(类型,hr=seq(0,max(hr)),填充=list(n=0))%>%
枢轴(名称从=类型,值从=n)
#一个tibble:4x3
#人力资源版本1版本2
#          
#1     0        0        0
#2     1        0        0
#3     2        2        1
#4     3        1        1

我们可以首先从
开始日期
获取小时数,
计数
每小时的行数和
类型
<编码>完成缺失的小时数,并用0填充其计数,然后使用
pivot\u wide
获取宽幅格式的数据

library(dplyr)
library(tidyr)

df %>%
  mutate(hr = lubridate::hour(startdate)) %>%
  count(hr, type) %>%
  complete(type, hr = seq(0, max(hr)), fill = list(n = 0)) %>%
  pivot_wider(names_from = type, values_from = n)

# A tibble: 4 x 3
#     hr version1 version2
#  <int>    <dbl>    <dbl>
#1     0        0        0
#2     1        0        0
#3     2        2        1
#4     3        1        1
库(dplyr)
图书馆(tidyr)
df%>%
突变(hr=lubridate::hour(startdate))%>%
计数(小时,类型)%>%
完成(类型,hr=seq(0,max(hr)),填充=list(n=0))%>%
枢轴(名称从=类型,值从=n)
#一个tibble:4x3
#人力资源版本1版本2
#          
#1     0        0        0
#2     1        0        0
#3     2        2        1
#4     3        1        1

开始日期变量有问题。因此,我使用包
lubridate

library(dplyr)
library(tidyr)

type = c('version1','version1','version1','version2','version2')

startdate = lubridate::ymd_hms(c('2017-11-1T02:11:02.000','2018-3-25T02:13:02.000',
                                 '2019-3-14T03:45:02.000','2017-3-14T02:55:02.000',
                                 '2018-3-14T03:45:02.000'))

tibble(type = type, startdate = startdate) %>%
  count(type, hour = lubridate::hour(startdate)) %>%
  spread(type, n)

# A tibble: 2 x 3
   hour version1 version2
  <int>    <int>    <int>
1     2        2        1
2     3        1        1
库(dplyr)
图书馆(tidyr)
类型=c('version1'、'version1'、'version1'、'version2'、'version2')
startdate=lubridate::ymd_hms(c('2017-11-1T02:11:02.000','2018-3-25T02:13:02.000',
‘2019-3-14T03:45:02.000’、‘2017-3-14T02:55:02.000’,
‘2018-3-14T03:45:02.000’)
TIBLE(类型=类型,起始日期=起始日期)%>%
计数(类型,小时=润滑脂::小时(起始日期))%>%
排列(类型,n)
#一个tibble:2x3
小时版本1版本2
1     2        2        1
2     3        1        1

开始日期变量有问题。因此,我使用包
lubridate

library(dplyr)
library(tidyr)

type = c('version1','version1','version1','version2','version2')

startdate = lubridate::ymd_hms(c('2017-11-1T02:11:02.000','2018-3-25T02:13:02.000',
                                 '2019-3-14T03:45:02.000','2017-3-14T02:55:02.000',
                                 '2018-3-14T03:45:02.000'))

tibble(type = type, startdate = startdate) %>%
  count(type, hour = lubridate::hour(startdate)) %>%
  spread(type, n)

# A tibble: 2 x 3
   hour version1 version2
  <int>    <int>    <int>
1     2        2        1
2     3        1        1
库(dplyr)
图书馆(tidyr)
类型=c('version1'、'version1'、'version1'、'version2'、'version2')
startdate=lubridate::ymd_hms(c('2017-11-1T02:11:02.000','2018-3-25T02:13:02.000',
‘2019-3-14T03:45:02.000’、‘2017-3-14T02:55:02.000’,
‘2018-3-14T03:45:02.000’)
TIBLE(类型=类型,起始日期=起始日期)%>%
计数(类型,小时=润滑脂::小时(起始日期))%>%
排列(类型,n)
#一个tibble:2x3
小时版本1版本2
1     2        2        1
2     3        1        1
基本R解决方案:

# Extract the hour and store it as a vector: 

df$hour <- gsub(".* ", "", trunc(df$startdate, units = "hours"))

# Count the number of observations of each type in each hour: 

df$type_hour_cnt <- with(df,

                        ave(paste(type, hour, sep = " - "),

                            paste(type, hour, sep = " - "), FUN = seq_along))

# Reshape dataframe: 

df <- as.data.frame(as.matrix(xtabs(type_hour_cnt ~ hour + type, df, sparse = T)))

# Extract rownames and store them as "hour" vector and then delete row.names: 

df <- data.frame(cbind(hour = row.names(df), df), row.names = NULL)
#提取小时并将其存储为向量:
df$hour基本R解决方案:

# Extract the hour and store it as a vector: 

df$hour <- gsub(".* ", "", trunc(df$startdate, units = "hours"))

# Count the number of observations of each type in each hour: 

df$type_hour_cnt <- with(df,

                        ave(paste(type, hour, sep = " - "),

                            paste(type, hour, sep = " - "), FUN = seq_along))

# Reshape dataframe: 

df <- as.data.frame(as.matrix(xtabs(type_hour_cnt ~ hour + type, df, sparse = T)))

# Extract rownames and store them as "hour" vector and then delete row.names: 

df <- data.frame(cbind(hour = row.names(df), df), row.names = NULL)
#提取小时并将其存储为向量:

df$hour谢谢你的帮助!面对一个小问题。。。获取有关
pivot\u-wider()
的错误:
找不到我安装的
tidyr
函数“pivot\u-wider”,不确定这里有什么问题。再次感谢@苹果橙子它有较新版本的
tidyr
,您需要使用
install.packages(“tidyr”)
安装它。如果您使用的是旧版本,请改用
spread
<代码>df%>%变异(hr=lubridate::hour(startdate))%%>%计数(hr,type)%%>%完成(type,hr=seq(0,max(hr)),填充=列表(n=0))%%>%排列(type,n)
感谢您的帮助!面对一个小问题。。。获取有关
pivot\u-wider()
的错误:
找不到我安装的
tidyr
函数“pivot\u-wider”,不确定这里有什么问题。再次感谢@苹果橙子它有较新版本的
tidyr
,您需要使用
install.packages(“tidyr”)
安装它。如果您使用的是旧版本,请改用
spread
<代码>df%>%突变(hr=lubridate::hour(startdate))%%>%计数(hr,类型)%%>%完成(类型,hr=seq(0,最大(hr)),填充=列表(n=0))%%>%排列(类型,n)