Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/65.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/5/date/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
合并向量';日期';进入R中的数据帧_R_Date_Join_Merge - Fatal编程技术网

合并向量';日期';进入R中的数据帧

合并向量';日期';进入R中的数据帧,r,date,join,merge,R,Date,Join,Merge,我创建了2015-11-29至2020-09-05的日期向量,如下所示: newdates_startweek <- seq(as.Date('2015-11-29'),as.Date('2020-09-05'),by = 7) 原始数据集的代码: region id name date appointment A 1 clinic1 2015-11-29 1 A 1 cl

我创建了2015-11-29至2020-09-05的日期向量,如下所示:

newdates_startweek <- seq(as.Date('2015-11-29'),as.Date('2020-09-05'),by = 7)
原始数据集的代码:

region   id      name        date       appointment
   A      1    clinic1    2015-11-29        1      
   A      1    clinic1    2015-12-08        1      
   A      1    clinic1    2020-08-17        1      
   A      1    clinic1    2020-08-19        1      
   A      1    clinic1    2020-09-03        1      
region <- c("A","A","A","A", "A")
id <- c(1,1,1,1,1)
name <- c("clinic1","clinic1","clinic1","clinic1","clinic1")
date <- c(as.Date('2015-11-29'), as.Date("2015-12-08"), as.Date("2020-08-17"), as.Date('2020-08-19'), as.Date('2020-09-03'))
appointment <- c(1,1,1,1,1)

df <- data.frame(region, id, name, date, appointment)
你知道一种快速的方法吗?

这个答案是:

> df %>% mutate(date = as.character(date)) %>% right_join(as.data.frame(as.character(newdates_startweek)) %>% setNames('newdates_startweek'), by = c('date' ='newdates_startweek' ), keep = 1) 
    region id    name       date appointment newdates_startweek
1        A  1 clinic1 2015-11-29           1         2015-11-29
2     <NA> NA    <NA>       <NA>          NA         2015-12-06
3     <NA> NA    <NA>       <NA>          NA         2015-12-13
4     <NA> NA    <NA>       <NA>          NA         2015-12-20
5     <NA> NA    <NA>       <NA>          NA         2015-12-27
6     <NA> NA    <NA>       <NA>          NA         2016-01-03
..
..
>df%>%mutate(date=as.character(date))%%>%right\u join(as.data.frame(as.character(newdates\u startweek))%%>%setNames('newdates\u startweek'),by=c('date'='newdates\u startweek'),keep=1)
地区id名称日期约会新日期\u startweek
1 A 1临床医生1 2015-11-29 1 2015-11-29
2北美2015-12-06
3北美2015-12-13
4北美2015-12-20
5北美2015-12-27
6北美2016-01-03
..
..
也许可以试试

library(dplyr)
library(tidyr)
library(lubridate)

df %>% 
  mutate(newdates_startweek = floor_date(date, "week", 7)) %>% 
  complete(region, id, name, newdates_startweek = full_seq(newdates_startweek, 7), fill = list(appointment = 0))
输出

# A tibble: 250 x 6
   region    id name    newdates_startweek date       appointment
   <chr>  <dbl> <chr>   <date>             <date>           <dbl>
 1 A          1 clinic1 2015-11-29         2015-11-29           1
 2 A          1 clinic1 2015-12-06         2015-12-08           1
 3 A          1 clinic1 2015-12-13         NA                   0
 4 A          1 clinic1 2015-12-20         NA                   0
 5 A          1 clinic1 2015-12-27         NA                   0
 6 A          1 clinic1 2016-01-03         NA                   0
 7 A          1 clinic1 2016-01-10         NA                   0
 8 A          1 clinic1 2016-01-17         NA                   0
 9 A          1 clinic1 2016-01-24         NA                   0
10 A          1 clinic1 2016-01-31         NA                   0
# ... with 240 more rows
#一个tible:250 x 6
地区id名称newdates\u startweek日期约会
临床医生2015-11-29 2015-11-29 1
临床医生2015-12-06 2015-12-08
3 A 1临床医生1 2015-12-13 NA 0
4 A 1临床医生1 2015-12-20 NA 0
5 A 1临床医生1 2015-12-27 NA 0
6 A 1临床医生1 2016-01-03 NA 0
7 A 1临床医生1 2016-01-10 NA 0
8 A 1临床医生1 2016-01-17 NA 0
9 A 1临床医生1 2016-01-24 NA 0
10 A 1临床医生1 2016-01-31 NA 0
# ... 还有240行

好的,所以我的解决方案有点冗长。我回收了一些我拥有的代码,并试图思考如果您想对多个集合(区域、id、名称)执行此操作,这将如何工作。这是一个
data.table
解决方案,但我已将输出转换回df

library(data.table)
library(optiRum)
library(lubridate)
library(dplyr)

# a function in my tool kit :)
getWeek <- function(dates, weekday_start="Sunday"){
  lookup <- data.table(id=1:7, day=as.character(lubridate::wday(1:7, label = TRUE, abbr = FALSE)))
  # its -1 for some reason in floor date
  weeks <- floor_date(dates, "week", 
                      week_start = lookup[day==weekday_start, id]-1)
  return(weeks)
}

# test it, should go back to sunday
week_start <- weekdays(newdates_startweek[1])
getWeek(as.Date("2015-11-30"), week_start)

# so find the starting week for each date, using data.tables
dt <- as.data.table(df)
dt[, week := getWeek(date, week_start)]

# I've used optiRum::CJ.dt for making all combinations of the id data in id_sets[]
# and the string of weeks. It's like expand.grid but quicker and more versatile
id_sets <- as.data.table(unique(df[,  c("region", "id", "name")]))
expand_dt <- optiRum::CJ.dt(data.table(week = newdates_startweek), id_sets)

# dplyr::anti_join removes data already existing in dt
expand_dt <- setDT(anti_join(expand_dt, dt, by=c("region", "id", "name", "week")))

# now they can be bound together. rbindlist has a fill option to add NA's for me
out <- rbindlist(list(dt, expand_dt), fill=TRUE) 

# order by week and date
setorder(out, week, date)
out_df <- as.data.frame(out)
out_df
库(data.table)
图书馆(光学)
图书馆(lubridate)
图书馆(dplyr)
#我的工具包中的一个函数:)

getWeek感谢您提出的方法。但我得到了以下错误消息:错误:向量内存耗尽(达到限制?)这很奇怪。我不能在我的笔记本电脑上复制那个错误。也许您限制了R的可用内存?检查Sys.getenv(“R\u MAX\u VSIZE”)
查看一些诊断@DanielaRodriguesThanks对于这个@ekoam,将进行检查,感谢您提出这个方法-它与第一个答案类似。
library(data.table)
library(optiRum)
library(lubridate)
library(dplyr)

# a function in my tool kit :)
getWeek <- function(dates, weekday_start="Sunday"){
  lookup <- data.table(id=1:7, day=as.character(lubridate::wday(1:7, label = TRUE, abbr = FALSE)))
  # its -1 for some reason in floor date
  weeks <- floor_date(dates, "week", 
                      week_start = lookup[day==weekday_start, id]-1)
  return(weeks)
}

# test it, should go back to sunday
week_start <- weekdays(newdates_startweek[1])
getWeek(as.Date("2015-11-30"), week_start)

# so find the starting week for each date, using data.tables
dt <- as.data.table(df)
dt[, week := getWeek(date, week_start)]

# I've used optiRum::CJ.dt for making all combinations of the id data in id_sets[]
# and the string of weeks. It's like expand.grid but quicker and more versatile
id_sets <- as.data.table(unique(df[,  c("region", "id", "name")]))
expand_dt <- optiRum::CJ.dt(data.table(week = newdates_startweek), id_sets)

# dplyr::anti_join removes data already existing in dt
expand_dt <- setDT(anti_join(expand_dt, dt, by=c("region", "id", "name", "week")))

# now they can be bound together. rbindlist has a fill option to add NA's for me
out <- rbindlist(list(dt, expand_dt), fill=TRUE) 

# order by week and date
setorder(out, week, date)
out_df <- as.data.frame(out)
out_df