R 根据日期在组内拆分重叠行

R 根据日期在组内拆分重叠行,r,dplyr,data.table,iranges,R,Dplyr,Data.table,Iranges,我试图根据现有行的重叠时间段创建新行。例如,我想将此转换为: Customer_Product <- data.table(Customer=c("A01","A01","A01", "A02", "A02", "A02", "A03", "A03", "A03"), Product=c("Prod1","Prod2","Prod3","Prod1","Prod2","Prod3","Prod1","Prod2","Prod3"),

我试图根据现有行的重叠时间段创建新行。例如,我想将此转换为:

Customer_Product <- data.table(Customer=c("A01","A01","A01", "A02", "A02", "A02", "A03", "A03", "A03"), 
                Product=c("Prod1","Prod2","Prod3","Prod1","Prod2","Prod3","Prod1","Prod2","Prod3"), 
                Start_Date=c("1/1/2015", "3/1/2015", "4/1/2015", "1/1/2015", "3/1/2015", "4/1/2015", "1/1/2015", "3/1/2015", "4/1/2015"),
                End_Date=c("2/1/2015","5/1/2015","5/1/2015","2/1/2015","5/1/2015","6/1/2015","2/1/2015","6/1/2015","5/1/2015"))
变成这样:

Customer_Product_Combo <- data.table(Customer=c("A01","A01","A01", "A02", "A02", "A02", "A02","A03", "A03","A03","A03"),
                Product_or_Combination=c("Prod1","Prod2","Prod2/Prod3","Prod1","Prod2","Prod2/Prod3","Prod3","Prod1","Prod2","Prod2/Prod3","Prod2"),
                Start_Date=c("1/1/2015","3/1/2015","4/1/2015","1/1/2015","3/1/2015","4/1/2015","5/1/2015","1/1/2015","3/1/2015","4/1/2015","5/1/2015"),
                End_Date=c("2/1/2015","4/1/2015","5/1/2015","2/1/2015","4/1/2015","5/1/2015","6/1/2015","2/1/2015","4/1/2015","5/1/2015","6/1/2015"))
我一直在研究IRanges,因为disjoin()似乎是一个可能的解决方案,但我看不到任何继承/合并“Prod”数据的方法

我还尝试在dplyr中使用lead/lag,然后是聚集/合并循环来勾勒出一些东西,但也值得注意的是,我可能有超过2个“Prod”重叠的实例,然后逻辑变得混乱


有没有一个合理的方法来做到这一点?非常感谢您的帮助

我正在使用您发布的数据(作为
data.frame


Customer\u Product我正在使用您发布的数据(作为
data.frame


Customer\u产品看一看:看一看:一个真正优雅的解决方案-工作完美!非常感谢非常优雅的解决方案-工作完美!非常感谢
Customer_Product_Combo <- data.table(Customer=c("A01","A01","A01", "A02", "A02", "A02", "A02","A03", "A03","A03","A03"),
                Product_or_Combination=c("Prod1","Prod2","Prod2/Prod3","Prod1","Prod2","Prod2/Prod3","Prod3","Prod1","Prod2","Prod2/Prod3","Prod2"),
                Start_Date=c("1/1/2015","3/1/2015","4/1/2015","1/1/2015","3/1/2015","4/1/2015","5/1/2015","1/1/2015","3/1/2015","4/1/2015","5/1/2015"),
                End_Date=c("2/1/2015","4/1/2015","5/1/2015","2/1/2015","4/1/2015","5/1/2015","6/1/2015","2/1/2015","4/1/2015","5/1/2015","6/1/2015"))
    Customer Product_or_Combination Start_Date End_Date
 1:      A01                  Prod1   1/1/2015 2/1/2015
 2:      A01                  Prod2   3/1/2015 4/1/2015
 3:      A01            Prod2/Prod3   4/1/2015 5/1/2015
 4:      A02                  Prod1   1/1/2015 2/1/2015
 5:      A02                  Prod2   3/1/2015 4/1/2015
 6:      A02            Prod2/Prod3   4/1/2015 5/1/2015
 7:      A02                  Prod3   5/1/2015 6/1/2015
 8:      A03                  Prod1   1/1/2015 2/1/2015
 9:      A03                  Prod2   3/1/2015 4/1/2015
10:      A03            Prod2/Prod3   4/1/2015 5/1/2015
11:      A03                  Prod2   5/1/2015 6/1/2015
Customer_Product <- data.frame(Customer=c("A01","A01","A01", "A02", "A02", "A02", "A03", "A03", "A03"), 
                               Product=c("Prod1","Prod2","Prod3","Prod1","Prod2","Prod3","Prod1","Prod2","Prod3"), 
                               Start_Date=c("1/1/2015", "3/1/2015", "4/1/2015", "1/1/2015", "3/1/2015", "4/1/2015", "1/1/2015", "3/1/2015", "4/1/2015"),
                               End_Date=c("2/1/2015","5/1/2015","5/1/2015","2/1/2015","5/1/2015","6/1/2015","2/1/2015","6/1/2015","5/1/2015"))
library(tidyverse)
library(data.table)
library(lubridate)

Customer_Product %>%
  mutate_at(vars(matches("Date")), dmy) %>%                          # update to date columns (if needed)
  mutate(day = map2(Start_Date, End_Date, ~seq(.x, .y, "day"))) %>%  # create sequence of days between start and end
  unnest() %>%                                                       # unnest data
  group_by(Customer, day) %>%                                        # for each customer and day
  summarise(Product = paste0(Product, collapse = "/")) %>%           # find corresponding products
  group_by(Customer, Product, id = rleid(Product)) %>%               # for each customer, product combination and position of product combination
  summarise(Start_Date = min(day),                                   # get start date
            End_Date = max(day)) %>%                                 # get end date
  ungroup() %>%                                                      # ungroup
  select(-id) %>%                                                    # remove id column
  arrange(Customer, Start_Date)                                      # order rows (if needed)


# # A tibble: 11 x 4
#   Customer Product     Start_Date End_Date  
#   <fct>    <chr>       <date>     <date>    
# 1 A01      Prod1       2015-01-01 2015-01-02
# 2 A01      Prod2       2015-01-03 2015-01-03
# 3 A01      Prod2/Prod3 2015-01-04 2015-01-05
# 4 A02      Prod1       2015-01-01 2015-01-02
# 5 A02      Prod2       2015-01-03 2015-01-03
# 6 A02      Prod2/Prod3 2015-01-04 2015-01-05
# 7 A02      Prod3       2015-01-06 2015-01-06
# 8 A03      Prod1       2015-01-01 2015-01-02
# 9 A03      Prod2       2015-01-03 2015-01-03
#10 A03      Prod2/Prod3 2015-01-04 2015-01-05
#11 A03      Prod2       2015-01-06 2015-01-06