合并向量'；日期'；进入R中的数据帧_R_Date_Join_Merge

合并向量'；日期'；进入R中的数据帧

r date join merge

合并向量'；日期'；进入R中的数据帧,r,date,join,merge,R,Date,Join,Merge,我创建了2015-11-29至2020-09-05的日期向量，如下所示： newdates_startweek <- seq(as.Date('2015-11-29'),as.Date('2020-09-05'),by = 7) 原始数据集的代码： region id name date appointment A 1 clinic1 2015-11-29 1 A 1 cl

我创建了2015-11-29至2020-09-05的日期向量，如下所示：

newdates_startweek <- seq(as.Date('2015-11-29'),as.Date('2020-09-05'),by = 7)

原始数据集的代码：

region   id      name        date       appointment
   A      1    clinic1    2015-11-29        1      
   A      1    clinic1    2015-12-08        1      
   A      1    clinic1    2020-08-17        1      
   A      1    clinic1    2020-08-19        1      
   A      1    clinic1    2020-09-03        1

region <- c("A","A","A","A", "A")
id <- c(1,1,1,1,1)
name <- c("clinic1","clinic1","clinic1","clinic1","clinic1")
date <- c(as.Date('2015-11-29'), as.Date("2015-12-08"), as.Date("2020-08-17"), as.Date('2020-08-19'), as.Date('2020-09-03'))
appointment <- c(1,1,1,1,1)

df <- data.frame(region, id, name, date, appointment)

你知道一种快速的方法吗？

这个答案是：

> df %>% mutate(date = as.character(date)) %>% right_join(as.data.frame(as.character(newdates_startweek)) %>% setNames('newdates_startweek'), by = c('date' ='newdates_startweek' ), keep = 1) 
    region id    name       date appointment newdates_startweek
1        A  1 clinic1 2015-11-29           1         2015-11-29
2     <NA> NA    <NA>       <NA>          NA         2015-12-06
3     <NA> NA    <NA>       <NA>          NA         2015-12-13
4     <NA> NA    <NA>       <NA>          NA         2015-12-20
5     <NA> NA    <NA>       <NA>          NA         2015-12-27
6     <NA> NA    <NA>       <NA>          NA         2016-01-03
..
..

>df%>%mutate（date=as.character（date））%%>%right\u join（as.data.frame（as.character（newdates\u startweek））%%>%setNames（'newdates\u startweek'），by=c（'date'='newdates\u startweek'），keep=1）
地区id名称日期约会新日期\u startweek
1 A 1临床医生1 2015-11-29 1 2015-11-29
2北美2015-12-06
3北美2015-12-13
4北美2015-12-20
5北美2015-12-27
6北美2016-01-03
..
..

也许可以试试

library(dplyr)
library(tidyr)
library(lubridate)

df %>% 
  mutate(newdates_startweek = floor_date(date, "week", 7)) %>% 
  complete(region, id, name, newdates_startweek = full_seq(newdates_startweek, 7), fill = list(appointment = 0))

输出

# A tibble: 250 x 6
   region    id name    newdates_startweek date       appointment
   <chr>  <dbl> <chr>   <date>             <date>           <dbl>
 1 A          1 clinic1 2015-11-29         2015-11-29           1
 2 A          1 clinic1 2015-12-06         2015-12-08           1
 3 A          1 clinic1 2015-12-13         NA                   0
 4 A          1 clinic1 2015-12-20         NA                   0
 5 A          1 clinic1 2015-12-27         NA                   0
 6 A          1 clinic1 2016-01-03         NA                   0
 7 A          1 clinic1 2016-01-10         NA                   0
 8 A          1 clinic1 2016-01-17         NA                   0
 9 A          1 clinic1 2016-01-24         NA                   0
10 A          1 clinic1 2016-01-31         NA                   0
# ... with 240 more rows

#一个tible:250 x 6
地区id名称newdates\u startweek日期约会
临床医生2015-11-29 2015-11-29 1
临床医生2015-12-06 2015-12-08
3 A 1临床医生1 2015-12-13 NA 0
4 A 1临床医生1 2015-12-20 NA 0
5 A 1临床医生1 2015-12-27 NA 0
6 A 1临床医生1 2016-01-03 NA 0
7 A 1临床医生1 2016-01-10 NA 0
8 A 1临床医生1 2016-01-17 NA 0
9 A 1临床医生1 2016-01-24 NA 0
10 A 1临床医生1 2016-01-31 NA 0
# ... 还有240行

好的，所以我的解决方案有点冗长。我回收了一些我拥有的代码，并试图思考如果您想对多个集合（区域、id、名称）执行此操作，这将如何工作。这是一个

data.table

解决方案，但我已将输出转换回df

library(data.table)
library(optiRum)
library(lubridate)
library(dplyr)

# a function in my tool kit :)
getWeek <- function(dates, weekday_start="Sunday"){
  lookup <- data.table(id=1:7, day=as.character(lubridate::wday(1:7, label = TRUE, abbr = FALSE)))
  # its -1 for some reason in floor date
  weeks <- floor_date(dates, "week", 
                      week_start = lookup[day==weekday_start, id]-1)
  return(weeks)
}

# test it, should go back to sunday
week_start <- weekdays(newdates_startweek[1])
getWeek(as.Date("2015-11-30"), week_start)

# so find the starting week for each date, using data.tables
dt <- as.data.table(df)
dt[, week := getWeek(date, week_start)]

# I've used optiRum::CJ.dt for making all combinations of the id data in id_sets[]
# and the string of weeks. It's like expand.grid but quicker and more versatile
id_sets <- as.data.table(unique(df[,  c("region", "id", "name")]))
expand_dt <- optiRum::CJ.dt(data.table(week = newdates_startweek), id_sets)

# dplyr::anti_join removes data already existing in dt
expand_dt <- setDT(anti_join(expand_dt, dt, by=c("region", "id", "name", "week")))

# now they can be bound together. rbindlist has a fill option to add NA's for me
out <- rbindlist(list(dt, expand_dt), fill=TRUE) 

# order by week and date
setorder(out, week, date)
out_df <- as.data.frame(out)
out_df

库（data.table）
图书馆（光学）
图书馆（lubridate）
图书馆（dplyr）
#我的工具包中的一个函数：）
getWeek感谢您提出的方法。但我得到了以下错误消息：错误：向量内存耗尽（达到限制？）这很奇怪。我不能在我的笔记本电脑上复制那个错误。也许您限制了R的可用内存？检查Sys.getenv（“R\u MAX\u VSIZE”）

查看一些诊断@DanielaRodriguesThanks对于这个@ekoam，将进行检查，感谢您提出这个方法-它与第一个答案类似。

library(data.table)
library(optiRum)
library(lubridate)
library(dplyr)

# a function in my tool kit :)
getWeek <- function(dates, weekday_start="Sunday"){
  lookup <- data.table(id=1:7, day=as.character(lubridate::wday(1:7, label = TRUE, abbr = FALSE)))
  # its -1 for some reason in floor date
  weeks <- floor_date(dates, "week", 
                      week_start = lookup[day==weekday_start, id]-1)
  return(weeks)
}

# test it, should go back to sunday
week_start <- weekdays(newdates_startweek[1])
getWeek(as.Date("2015-11-30"), week_start)

# so find the starting week for each date, using data.tables
dt <- as.data.table(df)
dt[, week := getWeek(date, week_start)]

# I've used optiRum::CJ.dt for making all combinations of the id data in id_sets[]
# and the string of weeks. It's like expand.grid but quicker and more versatile
id_sets <- as.data.table(unique(df[,  c("region", "id", "name")]))
expand_dt <- optiRum::CJ.dt(data.table(week = newdates_startweek), id_sets)

# dplyr::anti_join removes data already existing in dt
expand_dt <- setDT(anti_join(expand_dt, dt, by=c("region", "id", "name", "week")))

# now they can be bound together. rbindlist has a fill option to add NA's for me
out <- rbindlist(list(dt, expand_dt), fill=TRUE) 

# order by week and date
setorder(out, week, date)
out_df <- as.data.frame(out)
out_df