R 将日期之间的两个数据帧连接在一起

R 将日期之间的两个数据帧连接在一起,r,R,我正在尝试使用fuzzyjoin或类似的方法将两个数据帧连接在一起 我想通过ID和date\u EOM加入df2到df1,但在df1中的date\u EOM和date\u EOM+3之间加入date\u EOM 也就是说,df1很长,它有更多的时间序列数据,我想把它和较短的df2连接起来,其中df1中的数据介于date\u EOM和date\u EOM\u plus\u 3之间 以下操作不起作用: library(fuzzyjoin) df2 %>% fuzzy_left_join(d

我正在尝试使用fuzzyjoin或类似的方法将两个数据帧连接在一起

我想通过
ID
date\u EOM
加入
df2
df1
,但在
df1
中的
date\u EOM
date\u EOM+3
之间加入
date\u EOM

也就是说,
df1
很长,它有更多的时间序列数据,我想把它和较短的
df2
连接起来,其中
df1
中的数据介于
date\u EOM
date\u EOM\u plus\u 3
之间

以下操作不起作用:

library(fuzzyjoin)
df2 %>%
  fuzzy_left_join(df1,
                  by = c("ID" = "ID",
                         "date_EOM" = "date_EOM",
                         "date_EOM" = "date_EOM_plus_3"),
                  match_fun = list("=", ">=", "<=")
                  )
库(fuzzyjoin)
df2%>%
模糊左联合(df1,
by=c(“ID”=“ID”,
“date_EOM”=“date_EOM”,
“日期”=“日期加3”),

match_fun=list(“=”、“>=”、“我不确定我是否理解最终数据帧的外观,但我觉得
lubridate
interval
函数应该会将您带到这里

库(“tidyverse”)
图书馆(“润滑”)
df2_间隔%
筛选器(日期%EOM%在%as.列表内(df2\U间隔))
简而言之,%
内的
%检查给定日期是否在使用上一个函数创建的任何时间间隔内


这仅过滤
df2
的一个数据范围中包含的
df1
行。此时,您可以继续并
lef_join()
,如果这是您的想法,或者只需在%as.list(df2_间隔)内创建一个带有
date_EOM%的伪变量
然后从这里开始。

您非常接近,但您的代码有三个问题:

  • match\u fun()
    中的函数需要反勾(`),而不是引号(“or”)
  • “等于”比较器功能是
    =
    ,而不是
    =
  • 要将
    df2
    连接到
    df1
    您需要
    df1%>%fuzzy\u left\u-join(df2,…)
    ,而不是
    df2%>%fuzzy\u left\u-join(df1,…)
    。还有其他方法可以做到这一点,但为了简单起见,我们应该切换
    df1
    df2
  • 以下几点似乎可以做到这一点:

    df1%>%
    模糊左联合(df2,
    by=c(“ID”=“ID”,
    “date_EOM”=“date_EOM”,
    “日期”=“日期加3”),
    匹配乐趣=列表(`=`,`>=``
    
    > head(df1)
        date_EOM       ID var_1
    1 2015-04-30 09627Y10 71577
    2 2015-05-31 09627Y10 64829
    3 2015-06-30 09627Y10 79008
    4 2015-07-31 09627Y10 24319
    5 2015-08-31 09627Y10 24271
    6 2015-09-30 09627Y10 38051
    
    
    > head(df2)
    # A tibble: 6 x 4
      ID       date_EOM   date_EOM_plus_3 var_2
      <chr>    <date>     <date>          <dbl>
    1 26864810 2008-02-29 2008-05-31          1
    2 26864810 2009-03-31 2009-06-30          2
    3 26864810 2009-02-28 2009-05-31          2
    4 26864810 2010-02-28 2010-05-31          1
    5 26864810 2011-02-28 2011-05-31          1
    6 26864810 2012-02-29 2012-05-31          1
    
    df1 <- structure(list(date_EOM = structure(c(16555, 16586, 16616, 16647, 
    16678, 16708, 16739, 16769, 16800, 16831, 16860, 16891, 16921, 
    16952, 16982, 17013, 17044, 17074, 17105, 17135, 17166, 17197, 
    17225, 17256, 17286, 17317, 17347, 17378, 17409, 17439, 17470, 
    17500, 17531, 17562, 17590, 17621, 17651, 17682, 17712, 17743, 
    17774, 17804, 17835, 17865, 17896, 12814, 12842, 12873, 12903, 
    12934, 12964, 12995, 13026, 13056, 13087, 13117, 13148, 13179, 
    13207, 13238, 13268, 13299, 13329, 13360, 13391, 13421, 13452, 
    13482, 13513, 13544, 13572, 13603, 13633, 13664, 13694, 13725, 
    13756, 13786, 13817, 13847, 13878, 13909, 13938, 13969, 13999, 
    14030, 14060, 14091, 14122, 14152, 14183, 14213, 14244, 14275, 
    14303, 14334, 14364, 14395, 14425, 14456, 14487, 14517, 14548, 
    14578, 14609, 14640, 14668, 14699, 14729, 14760, 14790, 14821, 
    14852, 14882, 14913, 14943, 14974, 15005, 15033, 15064, 15094, 
    15125, 15155, 15186, 15217, 15247, 15278, 15308, 15339, 15370, 
    15399, 15430, 15460, 15491, 15521, 15552, 15583, 15613, 15644, 
    15674, 15705, 15736, 15764, 15795, 15825, 15856, 15886, 15917, 
    15948, 15978, 16009, 16039, 16070, 16101, 16129, 16160, 16190, 
    16221, 16251, 16282, 16313, 16343, 16374, 16404, 16435, 16466, 
    16494, 16525, 16555, 16586, 16616, 16647, 16678, 16708, 16739, 
    16769, 16800, 16831, 16860, 16891, 16921, 16952, 16982, 17013, 
    17044, 17074), class = "Date"), ID = c("09627Y10", "09627Y10", 
    "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", 
    "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", 
    "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", 
    "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", 
    "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", 
    "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", 
    "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", "09627Y10", 
    "09627Y10", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810"), var_1 = c(71577, 
    64829, 79008, 24319, 24271, 38051, 36962, 57471, 53909, 42452, 
    30679, 38091, 28095, 32294, 51117, 24724, 42720, 51312, 53133, 
    55767, 95558, 63798, 65024, 147838, 83441, 71575, 147199, 78138, 
    80006, 96524, 73523, 80160, 148519, 66447, 64899, 78689, 83721, 
    116659, 146079, 73399, 77594, 55159, 90624, 89813, 64276, 3201253, 
    2431312, 2597968, 2812961, 2246178, 2495002, 2685559, 2231979, 
    3082188, 3210950, 2604852, 2863003, 4617400, 3317902, 3815995, 
    2988183, 3389021, 5442709, 5431740, 4743099, 3515196, 4096597, 
    6025625, 5252737, 6420185, 5342544, 6022201, 5861288, 6890111, 
    6390106, 8151154, 11150273, 7440683, 11327526, 11461364, 5595098, 
    12380073, 7310007, 6750283, 6652174, 7212304, 5581204, 9771562, 
    4738422, 7909627, 9548136, 5429511, 4897759, 5417455, 5469542, 
    6537099, 6336852, 4924378, 5408494, 5935821, 4036994, 4251811, 
    5204948, 3745676, 4145843, 6015356, 3820903, 5008049, 4845117, 
    5729854, 5149140, 5955255, 5512172, 5449250, 6016798, 4259770, 
    3022433, 5331361, 4667700, 4916282, 3993569, 3727907, 4159248, 
    3186004, 7862443, 4557679, 5054754, 4148564, 4493250, 4980311, 
    3766246, 4152900, 3763739, 4553546, 4453020, 3865450, 3444880, 
    3029692, 4606733, 3513674, 3308547, 6820762, 3784315, 4498774, 
    5237598, 5125980, 4534635, 3831884, 2759388, 3046901, 5864084, 
    3768261, 5113238, 5457462, 4306425, 4536429, 4226480, 2695787, 
    2697229, 4304343, 2516059, 3771647, 3644023, 2166936, 2776204, 
    3069746, 2472952, 3897729, 3710804, 2530741, 2794476, 3500625, 
    3806155, 3020445, 6917279, 2540017, 2363408, 3227050, 2651100, 
    2046093, 2685440, 2559308, 2642814, 2834369, 3321310, 1695951
    )), row.names = c(NA, -186L), class = "data.frame")
    
    df2 <- structure(list(ID = c("26864810", "26864810", "26864810", "26864810", 
    "26864810", "26864810", "26864810", "26864810", "26864810", "26864810", 
    "09627Y10", "09627Y10"), date_EOM = structure(c(13938, 14334, 
    14303, 14668, 15033, 15399, 15764, 16129, 16494, 16860, 17256, 
    17590), class = "Date"), date_EOM_plus_3 = structure(c(14030, 
    14425, 14395, 14760, 15125, 15491, 15856, 16221, 16586, 16952, 
    17347, 17682), class = "Date"), var_2 = c(1, 2, 2, 1, 1, 1, 3, 
    1, 4, 2, 3, 3)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
    -12L))