Warning: file_get_contents(/data/phpspider/zhask/data//catemap/5/date/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R:在R中创建一个新列,根据两个日期确定学期_R_Date - Fatal编程技术网

R:在R中创建一个新列,根据两个日期确定学期

R:在R中创建一个新列,根据两个日期确定学期,r,date,R,Date,我有一些数据。ID和日期,我正在尝试为学期创建一个新字段 df: 我还有一个学期表格: start end season_year 20120801 20121222 Fall-2012 20121223 20130123 Winter-2013 20130124 20130523 Spring-2013 20130524 20130805 Summer-2013 20130806 20131228 Fall

我有一些数据。ID和日期,我正在尝试为学期创建一个新字段

df:

我还有一个
学期
表格:

start       end         season_year
20120801    20121222    Fall-2012
20121223    20130123    Winter-2013
20130124    20130523    Spring-2013
20130524    20130805    Summer-2013
20130806    20131228    Fall-2013
20131229    20140122    Winter-2014
20140123    20140522    Spring-2014
20140523    20140804    Summer-2014
20140805    20141227    Fall-2014
20141228    20150128    Winter-2015
20150129    20150528    Spring-2015
20150529    20150803    Summer-2015
20150804    20151226    Fall-2015
20151227    20160127    Winter-2016
20160128    20160526    Spring-2016
20160527    20160801    Summer-2016
20160802    20161224    Fall-2016
20161225    20170125    Winter-2017
20170126    20170525    Spring-2017
20170526    20170807    Summer-2017
20170808    20171230    Fall-2017
20171231    20180124    Winter-2018
20180125    20180524    Spring-2018
20180525    20180806    Summer-2018
20180807    20181222    Fall-2018
20181223    20190123    Winter-2019
20190124    20190523    Spring-2019
20190524    20180804    Summer-2019
如果
df$date
介于
学期$start
学期$end
之间,我想在
df
中创建一个新字段,然后在
df
中放置相应的值
学期$seasure\u year

我试着看看lubridate软件包是否有帮助,但这似乎更适合于计算


我看到了,这似乎是最接近我想要的,但是,为了让事情变得更复杂,不是所有的学期都是六个月

一个使用
非equi
使用
数据更新联接的解决方案。table
lubridate
包可以是:

library(data.table)

setDT(df)
setDT(semester)


df[,date:=as.IDate(as.character(date), format = "%Y%m%d")]
semester[,':='(start = as.IDate(as.character(start), format = "%Y%m%d"), 
                         end=as.IDate(as.character(end), format = "%Y%m%d"))]


df[semester, on=.(date >= start, date <= end), season_year := i.season_year]

df
#    id       date season_year
# 1:  1 2016-08-22   Fall-2016
# 2:  2 2017-01-09 Winter-2017
# 3:  3 2017-08-28   Fall-2017
# 4:  4 2017-09-25   Fall-2017
# 5:  5 2018-01-08 Winter-2018
# 6:  6 2018-04-02 Spring-2018
# 7:  7 2016-07-11 Summer-2016
# 8:  8 2015-08-31   Fall-2015
# 9:  9 2016-01-11 Winter-2016
# 10: 10 2016-05-02 Spring-2016
# 11: 11 2016-08-29   Fall-2016
# 12: 12 2017-01-09 Winter-2017
# 13: 13 2017-05-01 Spring-2017
库(data.table)
setDT(df)
setDT(学期)
df[,date:=as.IDate(as.character(date),format=“%Y%m%d”)]
学期[,':='(start=as.IDate(as.character(start),format=“%Y%m%d”),
end=as.IDate(as.character(end),format=“%Y%m%d”)]
df[学期,日期=(日期>=开始,日期这有效吗

library(lubridate)

semester$start <- ymd(semester$start)
semester$end <- ymd(semester$end)
df$date <- ymd(df$date)

LU <-  Map(`:`, semester$start, semester$end)
LU <- data.frame(value = unlist(LU),
                 index = rep(seq_along(LU), lapply(LU, length)))


df$semester <- semester$season_year[LU$index[match(df$date, LU$value)]]
库(lubridate)

学期$start看一看这个问题。虽然不太一样,但应该可以让你达到90%的目标:谢谢你的建议!我大约在20分钟前就开始了,在
df
中添加一个新字段,但它还没有结束。你有什么想法可以帮助你加快速度吗?@Walker唯一的选择是不使用
lubridate
。相反,您可以从
数据表
本身使用
作为.IDate
。希望有帮助。@Walker我已经更新了答案。感谢您指出。我已将答案修改为使用
IDate
。请让我们知道性能优势,因为它也将帮助未来的用户。使用而不是
(日期>=开始,日期)
df <- read.table(text="
id  date
1   20160822
2   20170109
3   20170828
4   20170925
5   20180108
6   20180402
7   20160711
8   20150831
9   20160111
10  20160502
11  20160829
12  20170109
13  20170501",
header = TRUE, stringsAsFactors = FALSE)


semester <- read.table(text="
start       end         season_year
20120801    20121222    Fall-2012
20121223    20130123    Winter-2013
20130124    20130523    Spring-2013
20130524    20130805    Summer-2013
20130806    20131228    Fall-2013
20131229    20140122    Winter-2014
20140123    20140522    Spring-2014
20140523    20140804    Summer-2014
20140805    20141227    Fall-2014
20141228    20150128    Winter-2015
20150129    20150528    Spring-2015
20150529    20150803    Summer-2015
20150804    20151226    Fall-2015
20151227    20160127    Winter-2016
20160128    20160526    Spring-2016
20160527    20160801    Summer-2016
20160802    20161224    Fall-2016
20161225    20170125    Winter-2017
20170126    20170525    Spring-2017
20170526    20170807    Summer-2017
20170808    20171230    Fall-2017
20171231    20180124    Winter-2018
20180125    20180524    Spring-2018
20180525    20180806    Summer-2018
20180807    20181222    Fall-2018
20181223    20190123    Winter-2019
20190124    20190523    Spring-2019
20190524    20180804    Summer-2019",
header = TRUE, stringsAsFactors = FALSE)
library(lubridate)

semester$start <- ymd(semester$start)
semester$end <- ymd(semester$end)
df$date <- ymd(df$date)

LU <-  Map(`:`, semester$start, semester$end)
LU <- data.frame(value = unlist(LU),
                 index = rep(seq_along(LU), lapply(LU, length)))


df$semester <- semester$season_year[LU$index[match(df$date, LU$value)]]