R:在时间间隔内计算日期
假设我们有数据输入:R:在时间间隔内计算日期,r,date,R,Date,假设我们有数据输入: df.in <- data.frame(event = c(1,2,3,4,5), start = c("2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "2015-01-03"), end = c("2015-01-03", "2015-01-04", "2015-
df.in <- data.frame(event = c(1,2,3,4,5),
start = c("2015-01-01", "2015-01-01", "2015-01-02",
"2015-01-02", "2015-01-03"),
end = c("2015-01-03", "2015-01-04", "2015-01-03",
"2015-01-05", "2015-01-05"))
df.in$start <- as.Date(df.in$start, "%Y-%m-%d")
df.in$end <- as.Date(df.in$end, "%Y-%m-%d")
> df.in
event start end
1 1 2015-01-01 2015-01-03
2 2 2015-01-01 2015-01-04
3 3 2015-01-02 2015-01-03
4 4 2015-01-02 2015-01-05
5 5 2015-01-03 2015-01-05
因此,我目前的想法是一个循环:
for(i in seq_along(df.out$date)){
temp.df <- df.in[df.in$start <= df.out$date[i],]
df.out$count[i] <- nrow(temp.df) - nrow(temp.df[temp.df$end <= df.out$date[i],])
}
> df.out
date count
1 2015-01-01 2
2 2015-01-02 4
3 2015-01-03 3
4 2015-01-04 2
5 2015-01-05 0
for(i在序列中沿(df.out$日期)){
temp.df所以我对data.table::foverlaps()
做了研究。我将把我的发现留给任何可能会发现它有用的人,因为老实说,我在搜索类似帖子时并没有真正找到这些小东西
考虑到我们正在比较区间,并且区间仅在y
参数上,即df。在这个特殊情况下,我们必须人为地创建一个区间。就像df.out$date2啊…所以我在搜索时又弄错了关键字。显然,我是在寻找“重叠”。谢谢!考虑到我对数据表软件包已经熟悉了整整一个小时,就我的知识而言,这些东西似乎有些复杂。不过,我将以你的例子进行一点实验,看看它会带来什么,非常感谢!我已经对你的答案进行了编辑(如果你不介意的话),以说明我的评论。
#1 **
#2 ****
#3 ***
#4 **
#5
for(i in seq_along(df.out$date)){
temp.df <- df.in[df.in$start <= df.out$date[i],]
df.out$count[i] <- nrow(temp.df) - nrow(temp.df[temp.df$end <= df.out$date[i],])
}
> df.out
date count
1 2015-01-01 2
2 2015-01-02 4
3 2015-01-03 3
4 2015-01-04 2
5 2015-01-05 0
require(data.table)
df.out <- data.table(date = c("2015-01-01", "2015-01-02", "2015-01-03",
"2015-01-04", "2015-01-05"),
count = 0)
df.out$date <- as.Date(df.out$date, "%Y-%m-%d")
df.in <- data.table(event = c(1,2,3,4,5),
start = c("2015-01-01", "2015-01-01", "2015-01-02",
"2015-01-02", "2015-01-03"),
end = c("2015-01-03", "2015-01-04", "2015-01-03",
"2015-01-05", "2015-01-05"))
df.in$start <- as.Date(df.in$start, "%Y-%m-%d")
df.in$end <- as.Date(df.in$end, "%Y-%m-%d") - 1
setkey(df.in, start, end)
df.out$date2 <- df.out$date
df.test <- foverlaps(x = df.out, y = df.in, type = "within", by.x = c("date", "date2"), by.y = c("start", "end"))
df.test$count[!is.na(df.test$event)] <- 1
aggregate(count ~ date, data = df.test, sum)
date count
1 2015-01-01 2
2 2015-01-02 4
3 2015-01-03 3
4 2015-01-04 2
5 2015-01-05 0
df.out <- data.table(date = as.Date(c("2015-01-01", "2015-01-02", "2015-01-03",
"2015-01-04", "2015-01-05")))
df.in <- data.table(event = 1:5,
start = as.Date(c("2015-01-01", "2015-01-01", "2015-01-02",
"2015-01-02", "2015-01-03")),
end = as.Date(c("2015-01-03", "2015-01-04", "2015-01-03",
"2015-01-05", "2015-01-05")))
df.out[, `:=`(start = date, end = date)]
df.in[, end := end - 1L]
setkey(df.out, start, end)
foverlaps(df.in, df.out)[, .(count = .N), by = date]
# date count
# 1: 2015-01-01 2
# 2: 2015-01-02 4
# 3: 2015-01-03 3
# 4: 2015-01-04 2
res <- foverlaps(df.in, df.out, which = TRUE)[, .N, by = yid]
df.out[res$yid, Count := res$N]
df.out[is.na(Count), Count := 0L]