R 分组并创建每月日期序列

R 分组并创建每月日期序列,r,R,我有一些数据如下所示: cusip date start_date end_date 1 00036020 2011-01-31 2011-07-29 2012-06-30 2 00036020 2011-02-28 2011-07-29 2012-06-30 3 00036020 2011-03-31 2011-07-29 2012-06-30 4 00036020 2011-04-29 2011-07-29 2012-06-30 5 00036020 2011-05

我有一些数据如下所示:

     cusip       date start_date   end_date
1 00036020 2011-01-31 2011-07-29 2012-06-30
2 00036020 2011-02-28 2011-07-29 2012-06-30
3 00036020 2011-03-31 2011-07-29 2012-06-30
4 00036020 2011-04-29 2011-07-29 2012-06-30
5 00036020 2011-05-31 2011-07-29 2012-06-30
6 00036020 2011-06-30 2011-07-29 2012-06-30
我想根据
id
列对
进行分组,并计算
开始日期
结束日期
之间的月末日期。或者在
开始日期
结束日期
之间创建一个月日期序列,我可以将
日期
列与之匹配


我基本上想过滤分组数据,直到开始日期和结束日期之间,只需执行
filter(日期>=开始日期&日期如果我们需要为每个“开始日期”及其对应的“结束日期”创建一个日期序列,可以使用
map2
完成,这里不需要任何分组,因为它从每个对应的“开始日期/结束日期”获取序列

library(purrr)
df %>%
      mutate(Seq = map2(start_date, end_date, seq, by = '1 day'))
更新 根据OP的评论

df %>%  
   group_by(cusip) %>% 
   mutate(rn = row_number()) %>%       
   filter(cummax(date >= start_date & date <= end_date) > 0)
# A tibble: 102 x 5
# Groups:   cusip [1]
#   cusip    date       start_date end_date      rn
#   <chr>    <date>     <date>     <date>     <int>
# 1 00036020 2011-07-29 2011-07-29 2012-06-30     7
# 2 00036020 2011-08-31 2011-07-29 2012-06-30     8
# 3 00036020 2011-09-30 2011-07-29 2012-06-30     9
# 4 00036020 2011-10-31 2011-07-29 2012-06-30    10
# 5 00036020 2011-11-30 2011-07-29 2012-06-30    11
# 6 00036020 2011-12-30 2011-07-29 2012-06-30    12
# 7 00036020 2012-01-31 2012-07-31 2013-06-30    13
# 8 00036020 2012-02-29 2012-07-31 2013-06-30    14
# 9 00036020 2012-03-30 2012-07-31 2013-06-30    15
#10 00036020 2012-04-30 2012-07-31 2013-06-30    16
# … with 92 more rows
df%>%
分组依据(cusip)%>%
变异(rn=行数())%>%
过滤器(cummax(日期>=开始日期和日期0)
#一个tibble:102 x 5
#分组:cusip[1]
#cusip日期开始日期结束日期rn
#                      
# 1 00036020 2011-07-29 2011-07-29 2012-06-30     7
# 2 00036020 2011-08-31 2011-07-29 2012-06-30     8
# 3 00036020 2011-09-30 2011-07-29 2012-06-30     9
# 4 00036020 2011-10-31 2011-07-29 2012-06-30    10
# 5 00036020 2011-11-30 2011-07-29 2012-06-30    11
# 6 00036020 2011-12-30 2011-07-29 2012-06-30    12
# 7 00036020 2012-01-31 2012-07-31 2013-06-30    13
# 8 00036020 2012-02-29 2012-07-31 2013-06-30    14
# 9 00036020 2012-03-30 2012-07-31 2013-06-30    15
#10 00036020 2012-04-30 2012-07-31 2013-06-30    16
#…还有92行

-检查前24行

您期望的输出是什么。不清楚为什么在分组
df%>%group\u by(cusip)%%>%filter中也需要“开始日期”、“结束日期”(日期>=开始日期,我检查了您的前24行的日期,它给出的输出与您显示的相同。有什么我遗漏的吗?我现在将在更大的样本上检查它。这与我想要的非常接近。是否可以创建没有嵌套的
seq
列的序列?对于每个
cusip
,我想要so的序列方法如下:
seq(from=df2$start\u date,to=df2$end\u date)
我将有12个月。@user113156它将为每一行执行序列。我希望能够将
日期
列与从
开始日期
结束日期
构建的每月序列值匹配起来。好的,我不清楚预期的输出。在你的代码中,你做了一个
过滤器并且它基于逻辑工作。我添加了一个编辑,如果仍然不清楚,请告诉我,我将添加更多信息。
    df <- structure(list(cusip = c("00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036020", "00036020", "00036020", 
"00036020", "00036020", "00036020", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110", "00036110", "00036110", "00036110", 
"00036110", "00036110", "00036110"), date = structure(c(15005, 
15033, 15064, 15093, 15125, 15155, 15184, 15217, 15247, 15278, 
15308, 15338, 15370, 15399, 15429, 15460, 15491, 15520, 15552, 
15583, 15611, 15644, 15674, 15705, 15736, 15764, 15792, 15825, 
15856, 15884, 15917, 15947, 15978, 16009, 16038, 16070, 16101, 
16129, 16160, 16190, 16220, 16251, 16282, 16311, 16343, 16374, 
16402, 16435, 16465, 16493, 16525, 16555, 16584, 16616, 16647, 
16678, 16708, 16738, 16769, 16800, 16829, 16829, 16860, 16860, 
16891, 16891, 16920, 16920, 16952, 16952, 16982, 16982, 17011, 
17011, 17044, 17044, 17074, 17074, 17105, 17105, 17135, 17135, 
17165, 17165, 17197, 17225, 17256, 17284, 17317, 17347, 17378, 
17409, 17438, 17470, 17500, 17529, 17562, 17590, 17619, 17651, 
17682, 17711, 17743, 17774, 17802, 17835, 17865, 17896, 12814, 
12842, 12873, 12902, 12934, 12964, 12993, 13026, 13056, 13087, 
13117, 13147, 13179, 13207, 13238, 13266, 13299, 13329, 13360, 
13391, 13420, 13452, 13482, 13511, 13544, 13572, 13602, 13633, 
13664, 13693, 13725, 13756, 13784, 13817, 13847, 13878, 13909, 
13938, 13969, 13999, 14029, 14060, 14091, 14120, 14152, 14183, 
14211, 14244, 14274, 14302, 14334, 14364, 14393, 14425, 14456, 
14487, 14517, 14547, 14578, 14609, 14638, 14666, 14699, 14729, 
14757, 14790, 14820, 14852, 14882, 14911, 14943, 14974, 15005, 
15033, 15064, 15093, 15125, 15155, 15184, 15217, 15247, 15278, 
15308, 15338, 15370, 15399, 15429, 15460, 15491, 15520, 15552, 
15583, 15611, 15644, 15674, 15705, 15736, 15764, 15792, 15825, 
15856, 15884, 15917, 15947, 15978, 16009, 16038, 16070, 16101, 
16129, 16160, 16190, 16220, 16251, 16282, 16311, 16343, 16374, 
16402, 16435, 16465, 16493, 16525, 16555, 16584, 16616, 16647, 
16678, 16708, 16738, 16769, 16800, 16829, 16860, 16891, 16920, 
16952, 16982, 17011, 17044, 17074, 17105, 17135, 17165, 17197, 
17225, 17256, 17284, 17317, 17347, 17378, 17409, 17438, 17470, 
17500, 17529), class = "Date"), start_date = structure(c(15184, 
15184, 15184, 15184, 15184, 15184, 15184, 15184, 15184, 15184, 
15184, 15184, 15552, 15552, 15552, 15552, 15552, 15552, 15552, 
15552, 15552, 15552, 15552, 15552, 15917, 15917, 15917, 15917, 
15917, 15917, 15917, 15917, 15917, 15917, 15917, 15917, 16282, 
16282, 16282, 16282, 16282, 16282, 16282, 16282, 16282, 16282, 
16282, 16282, 16647, 16647, 16647, 16647, 16647, 16647, 16647, 
16647, 16647, 16647, 16647, 16647, 17011, 17011, 17011, 17011, 
17011, 17011, 17011, 17011, 17011, 17011, 17011, 17011, 17011, 
17011, 17011, 17011, 17011, 17011, 17011, 17011, 17011, 17011, 
17011, 17011, 17378, 17378, 17378, 17378, 17378, 17378, 17378, 
17378, 17378, 17378, 17378, 17378, 17743, 17743, 17743, 17743, 
17743, 17743, 17743, 17743, 17743, 17743, 17743, 17743, 13360, 
13360, 13360, 13360, 13360, 13360, 13360, 13360, 13360, 13360, 
13360, 13360, 13725, 13725, 13725, 13725, 13725, 13725, 13725, 
13725, 13725, 13725, 13725, 13725, 14091, 14091, 14091, 14091, 
14091, 14091, 14091, 14091, 14091, 14091, 14091, 14091, 14456, 
14456, 14456, 14456, 14456, 14456, 14456, 14456, 14456, 14456, 
14456, 14456, 14820, 14820, 14820, 14820, 14820, 14820, 14820, 
14820, 14820, 14820, 14820, 14820, 15184, 15184, 15184, 15184, 
15184, 15184, 15184, 15184, 15184, 15184, 15184, 15184, 15552, 
15552, 15552, 15552, 15552, 15552, 15552, 15552, 15552, 15552, 
15552, 15552, 15917, 15917, 15917, 15917, 15917, 15917, 15917, 
15917, 15917, 15917, 15917, 15917, 16282, 16282, 16282, 16282, 
16282, 16282, 16282, 16282, 16282, 16282, 16282, 16282, 16647, 
16647, 16647, 16647, 16647, 16647, 16647, 16647, 16647, 16647, 
16647, 16647, 17011, 17011, 17011, 17011, 17011, 17011, 17011, 
17011, 17011, 17011, 17011, 17011, 17378, 17378, 17378, 17378, 
17378, 17378, 17378, 17378, 17378, 17378, 17378, 17378, 17743, 
17743, 17743, 17743, 17743, 17743, 17743, 17743, 17743, 17743, 
17743, 17743), class = "Date"), end_date = structure(c(15521, 
15521, 15521, 15521, 15521, 15521, 15521, 15521, 15521, 15521, 
15521, 15521, 15886, 15886, 15886, 15886, 15886, 15886, 15886, 
15886, 15886, 15886, 15886, 15886, 16251, 16251, 16251, 16251, 
16251, 16251, 16251, 16251, 16251, 16251, 16251, 16251, 16616, 
16616, 16616, 16616, 16616, 16616, 16616, 16616, 16616, 16616, 
16616, 16616, 16982, 16982, 16982, 16982, 16982, 16982, 16982, 
16982, 16982, 16982, 16982, 16982, 17347, 17347, 17347, 17347, 
17347, 17347, 17347, 17347, 17347, 17347, 17347, 17347, 17347, 
17347, 17347, 17347, 17347, 17347, 17347, 17347, 17347, 17347, 
17347, 17347, 17712, 17712, 17712, 17712, 17712, 17712, 17712, 
17712, 17712, 17712, 17712, 17712, 18077, 18077, 18077, 18077, 
18077, 18077, 18077, 18077, 18077, 18077, 18077, 18077, 13694, 
13694, 13694, 13694, 13694, 13694, 13694, 13694, 13694, 13694, 
13694, 13694, 14060, 14060, 14060, 14060, 14060, 14060, 14060, 
14060, 14060, 14060, 14060, 14060, 14425, 14425, 14425, 14425, 
14425, 14425, 14425, 14425, 14425, 14425, 14425, 14425, 14790, 
14790, 14790, 14790, 14790, 14790, 14790, 14790, 14790, 14790, 
14790, 14790, 15155, 15155, 15155, 15155, 15155, 15155, 15155, 
15155, 15155, 15155, 15155, 15155, 15521, 15521, 15521, 15521, 
15521, 15521, 15521, 15521, 15521, 15521, 15521, 15521, 15886, 
15886, 15886, 15886, 15886, 15886, 15886, 15886, 15886, 15886, 
15886, 15886, 16251, 16251, 16251, 16251, 16251, 16251, 16251, 
16251, 16251, 16251, 16251, 16251, 16616, 16616, 16616, 16616, 
16616, 16616, 16616, 16616, 16616, 16616, 16616, 16616, 16982, 
16982, 16982, 16982, 16982, 16982, 16982, 16982, 16982, 16982, 
16982, 16982, 17347, 17347, 17347, 17347, 17347, 17347, 17347, 
17347, 17347, 17347, 17347, 17347, 17712, 17712, 17712, 17712, 
17712, 17712, 17712, 17712, 17712, 17712, 17712, 17712, 18077, 
18077, 18077, 18077, 18077, 18077, 18077, 18077, 18077, 18077, 
18077, 18077), class = "Date")), row.names = c(NA, -264L), class = "data.frame")
          cusip       date start_date   end_date
   ** 1  00036020 2011-01-31 2011-07-29 2012-06-30
   ** 2  00036020 2011-02-28 2011-07-29 2012-06-30
   ** 3  00036020 2011-03-31 2011-07-29 2012-06-30
   ** 4  00036020 2011-04-29 2011-07-29 2012-06-30
   ** 5  00036020 2011-05-31 2011-07-29 2012-06-30
   ** 6  00036020 2011-06-30 2011-07-29 2012-06-30
    7    00036020 2011-07-29 2011-07-29 2012-06-30
    8    00036020 2011-08-31 2011-07-29 2012-06-30
    9    00036020 2011-09-30 2011-07-29 2012-06-30
    10   00036020 2011-10-31 2011-07-29 2012-06-30
    11   00036020 2011-11-30 2011-07-29 2012-06-30
    12   00036020 2011-12-30 2011-07-29 2012-06-30
    13   00036020 2012-01-31 2012-07-31 2013-06-30
    14   00036020 2012-02-29 2012-07-31 2013-06-30
    15   00036020 2012-03-30 2012-07-31 2013-06-30
    16   00036020 2012-04-30 2012-07-31 2013-06-30
    17   00036020 2012-05-31 2012-07-31 2013-06-30
    18   00036020 2012-06-29 2012-07-31 2013-06-30
    19   00036020 2012-07-31 2012-07-31 2013-06-30
    20   00036020 2012-08-31 2012-07-31 2013-06-30
    21   00036020 2012-09-28 2012-07-31 2013-06-30
    22   00036020 2012-10-31 2012-07-31 2013-06-30
    23   00036020 2012-11-30 2012-07-31 2013-06-30
    24   00036020 2012-12-31 2012-07-31 2013-06-30
library(purrr)
df %>%
      mutate(Seq = map2(start_date, end_date, seq, by = '1 day'))
df %>%  
   group_by(cusip) %>% 
   mutate(rn = row_number()) %>%       
   filter(cummax(date >= start_date & date <= end_date) > 0)
# A tibble: 102 x 5
# Groups:   cusip [1]
#   cusip    date       start_date end_date      rn
#   <chr>    <date>     <date>     <date>     <int>
# 1 00036020 2011-07-29 2011-07-29 2012-06-30     7
# 2 00036020 2011-08-31 2011-07-29 2012-06-30     8
# 3 00036020 2011-09-30 2011-07-29 2012-06-30     9
# 4 00036020 2011-10-31 2011-07-29 2012-06-30    10
# 5 00036020 2011-11-30 2011-07-29 2012-06-30    11
# 6 00036020 2011-12-30 2011-07-29 2012-06-30    12
# 7 00036020 2012-01-31 2012-07-31 2013-06-30    13
# 8 00036020 2012-02-29 2012-07-31 2013-06-30    14
# 9 00036020 2012-03-30 2012-07-31 2013-06-30    15
#10 00036020 2012-04-30 2012-07-31 2013-06-30    16
# … with 92 more rows