在另一个data.frame(data.table)中查找一个data.frame的对应日期
由于这是我关于stackoverflow的第一个问题,我希望它满足所有要求。下面是一个可复制的例子。两个data.table(包含月度数据)和一个data.table(提供相应日期) 首先,我想检查dt_list1和dt_list2的日期是否在dt_Dates_的相应日期值的时间范围内(变量:in.period) 问题在于,比较必须满足以下几个条件:在另一个data.frame(data.table)中查找一个data.frame的对应日期,r,date,data.table,comparison,R,Date,Data.table,Comparison,由于这是我关于stackoverflow的第一个问题,我希望它满足所有要求。下面是一个可复制的例子。两个data.table(包含月度数据)和一个data.table(提供相应日期) 首先,我想检查dt_list1和dt_list2的日期是否在dt_Dates_的相应日期值的时间范围内(变量:in.period) 问题在于,比较必须满足以下几个条件: 如果是>存储日期存储日期存储日期区域城市日期地区面试官日期地区日期地区面试官日期地区城市日期区域区域城市日期日期区域区域日期日期
- 如果是>存储<1或2,则应检查>日期<是否在>存储<1或2的>日期<的时间范围内
- 如果是>存储<3,则应检查>日期<是否在>区域
城市<1的>日期<的时间范围内 - 如果是>地区
面试官<1,则应检查>日期<是否在>地区日期<的时间范围内 - 如果是>地区
面试官<2或3,则应检查>日期<是否在>地区 城市<0的>日期<的时间范围内 - 对于>区域
区域 城市<1的>日期<是否在>日期<的时间范围内 - 对于剩余的>区域区域日期<是否在>日期<的时间范围内
库(data.table)
清单1
library(data.table)
list1 <- data.table(
Month = rep(1, 500),
Region = sample(c("A", "B", "C"), 500, replace = T, prob = c(0.25, 0.25, 0.5)),
Store = sample(seq(1:7), 500, replace = T),
Interviewer = sample(seq(1:9), 500, replace = T),
Sale = sample(c(TRUE, FALSE), 500, replace = T, prob = c(0.25, 0.75)),
Date = sample(seq(as.Date('2020/01/01'), as.Date('2020/01/13'), by="day"), 500, replace = T),
Day = NA,
Within.Period = NA,
)
list2 <- data.table(
Month = rep(2, 600),
Region = sample(c("A", "B", "C"), 600, replace = T, prob = c(0.25, 0.25, 0.5)),
Store = sample(seq(1:8), 600, replace = T),
Interviewer = sample(seq(1:10), 600, replace = T),
Sale = sample(c(TRUE, FALSE), 600, replace = T, prob = c(0.25, 0.75)),
Date = sample(seq(as.Date('2020/02/02'), as.Date('2020/02/14'), by="day"), 600, replace = T),
Day = NA,
Within.Period = NA
)
list1 <- list1[with(list1,
order(Region, Store, Interviewer, Sale, Date))]
list2 <- list1[with(list2,
order(Region, Store, Interviewer, Sale, Date))]
dates <- data.table(
Region = c(rep(NA, 4), "A", "A", "B", "B", "C", "C", "C", "C"),
City = c(rep(NA, 8), 0, 0, 1, 1),
Store = c(1, 1, 2, 2, rep(NA, 8)),
Month = rep(c(1,2), 6),
Sale.Date = as.Date(c("02.01", "03.02", "03.01", "04.02", "03.01", "04.02", "06.01", "07.02", "09.01", "10.02", "09.01", "10.02"), format = "%d.%m"),
Day.1 = as.Date(c("01.01", "02.02", "02.01", "03.02", "02.01", "03.02", "05.01", "06.02", "08.01", "09.02", "08.01", "09.02"), format = "%d.%m"),
Day.2 = as.Date(c(rep(NA, 2),"03.01", "04.02", "03.01", "04.02", "06.01", "07.02", "09.01", "10.02", "09.01", "10.02"), format = "%d.%m"),
Day.3 = as.Date(c(rep(NA, 4), "04.01", "05.02", "07.01", "08.02", "10.01", "11.02", "10.01", "11.02"), format = "%d.%m"),
Day.4 = as.Date(c(rep(NA, 6), "08.01", "09.02", "11.01", "12.02", "11.01", "12.02"), format = "%d.%m"),
Day.5 = as.Date(c(rep(NA, 10), "12.01","13.02"), format = "%d.%m")
)
dates_melted <- melt(dates, id.vars = c("Region", "City", "Store", "Month", "Sale.Date"), variable.name = "Day", value.name = "Date", na.rm = TRUE)
´´´
dt_list1$Within.Period <- with(dt_list1, Store == "3" & ifelse(Sale == FALSE, Date %in% dt_dates_melted$Dates[dt_dates_melted$Region == "C" & dt_dates_melted$City == 1], Date %in% dt_dates_melted$Sale.Dates[dt_dates_melted$Region == "C" & dt_dates_melted$City == 1]))
´´´
dt_list1$Within.Period <- with(dt_list1, Store %in% dt_dates_melted$Store & ifelse(Sale == FALSE, Date %in% dt_dates_melted$Dates[!is.na(dt_dates_melted$Store)], Date %in% dt_dates_melted$Sale.Dates[!is.na(dt_dates_melted$Store)]))
´´´