R 合并具有连续时间间隔的数据行_R_Merge_Gaps And Islands

R 合并具有连续时间间隔的数据行

r merge

R 合并具有连续时间间隔的数据行,r,merge,gaps-and-islands,R,Merge,Gaps And Islands,我有一个带有Start.Date和Stop.Date的患者药物数据集。每一个都表示为一行。我希望合并时间间隔连续的行，如下所示： ID = c(2, 2, 2, 2, 3, 5) Medication = c("aspirin", "aspirin", "aspirin", "tylenol", "lipitor", "advil") Start.Date = c("05/01/2017", "05/05/2017", "06/20/2017", "05/01/2017", "05/06/2

我有一个带有Start.Date和Stop.Date的患者药物数据集。每一个都表示为一行。我希望合并时间间隔连续的行，如下所示：

ID = c(2, 2, 2, 2, 3, 5) 
Medication = c("aspirin", "aspirin", "aspirin", "tylenol", "lipitor", "advil") 
Start.Date = c("05/01/2017", "05/05/2017", "06/20/2017", "05/01/2017", "05/06/2017", "05/28/2017")
Stop.Date = c("05/04/2017", "05/10/2017", "06/27/2017", "05/15/2017", "05/12/2017", "06/13/2017")
df = data.frame(ID, Medication, Start.Date, Stop.Date) 


  ID Medication Start.Date  Stop.Date
   2    aspirin 05/01/2017 05/04/2017
   2    aspirin 05/05/2017 05/10/2017
   2    aspirin 06/20/2017 06/27/2017
   2    tylenol 05/01/2017 05/15/2017
   3    lipitor 05/06/2017 05/12/2017
   5      advil 05/28/2017 06/13/2017

如果停止日期是下一个开始日期的前一天，我希望按ID和药物减少行数。它应该如下所示：

  ID Medication Start.Date  Stop.Date
   2    aspirin 05/01/2017 05/10/2017
   2    aspirin 06/20/2017 06/27/2017
   2    tylenol 05/01/2017 05/15/2017
   3    lipitor 05/06/2017 05/12/2017
   5      advil 05/28/2017 06/13/2017

将“开始”和“停止”日期列转换为lubridate中带有mdy的日期类，按“ID”、“药物”分组，过滤不等于1的“开始日期”和“停止日期”的“lead”的abs差值

library(dplyr)
library(lubridate)
df %>%
  mutate_at(3:4, mdy) %>% 
  group_by(ID, Medication) %>%
  filter(abs(lead(Start.Date, default = last(Start.Date)) - Stop.Date) != 1)
# A tibble: 5 x 4
# Groups:   ID, Medication [4]
#     ID Medication Start.Date Stop.Date 
#  <dbl> <fct>      <date>     <date>    
#1     2 aspirin    2017-05-05 2017-05-10
#2     2 aspirin    2017-06-20 2017-06-27
#3     2 tylenol    2017-05-01 2017-05-15
#4     3 lipitor    2017-05-06 2017-05-12
#5     5 advil      2017-05-28 2017-06-13

注意：我们可以像以前一样首先将日期列转换为日期类

注2：这两种方法都是基于OP提供的示例的简单方法

df %>%
    mutate_at(vars(ends_with("Date")), function(x) as.Date(x, format = "%m/%d/%Y")) %>%
    group_by(ID, Medication) %>%
    mutate(
        isConsecutive = lead(Start.Date) - Stop.Date == 1,
        isConsecutive = ifelse(
            is.na(isConsecutive) & lag(isConsecutive) == TRUE, FALSE, isConsecutive),
        grp = cumsum(isConsecutive)) %>%
    group_by(ID, Medication, grp) %>%
    mutate(Start.Date = min(Start.Date), Stop.Date = max(Stop.Date)) %>%
    slice(1) %>%
    ungroup() %>%
    select(-isConsecutive, -grp)
## A tibble: 5 x 4
#     ID Medication Start.Date Stop.Date
#  <dbl> <fct>      <date>     <date>
#1    2. aspirin    2017-05-01 2017-05-10
#2    2. aspirin    2017-06-20 2017-06-27
#3    2. tylenol    2017-05-01 2017-05-15
#4    3. lipitor    2017-05-06 2017-05-12
#5    5. advil      2017-05-28 2017-06-13

结果似乎很可靠

谢谢你，这正是我需要的！如果我想处理从1天到小于5天的差异，例如，如何修改此脚本？@gfa2001这是一个完全不同的问题，您应该在单独的问题中提问。谢谢，这很有效，但我通常不喜欢指定列号，因为这可能会从一个数据集更改为另一个数据集。@gfa2001只需使用df%>%mutate\u atvarscontainsDate，mdy。。。等甚至列的名称本身也不能解决问题。您给出的解决方案不能解决问题。然后，您应该查看输出edit@Onyambu它基于OP的输入数据再次检查输出。这不是你提供的

library(tidyverse)
library(lubridate)
df%>%
  group_by(Medication)%>%
  mutate_at(vars(3:4),mdy)%>%
  mutate(Start.Date = coalesce(
                 if_else((Start.Date-lag(Stop.Date))==1,lag(Start.Date),Start.Date),Start.Date),
         s = lead(Start.Date)!=Start.Date)%>%
  filter(s|is.na(s))%>%
  select(-s)

# A tibble: 5 x 4
# Groups:   ID, Medication [4]
     ID Medication Start.Date Stop.Date 
  <dbl> <chr>      <date>     <date>    
1     2 aspirin    2017-05-01 2017-05-10
2     2 aspirin    2017-06-20 2017-06-27
3     2 tylenol    2017-05-01 2017-05-15
4     3 lipitor    2017-05-06 2017-05-12
5     5 advil      2017-05-28 2017-06-13

df <- structure(list(ID = c(2, 2, 2, 2, 2, 3, 5, 5), Medication = structure(c(2L,
2L, 2L, 2L, 4L, 3L, 1L, 1L), .Label = c("advil", "aspirin", "lipitor",
"tylenol"), class = "factor"), Start.Date = structure(c(1L, 2L,
6L, 7L, 1L, 3L, 4L, 5L), .Label = c("05/01/2017", "05/05/2017",
"05/06/2017", "05/28/2017", "06/14/2017", "06/20/2017", "06/28/2017"
), class = "factor"), Stop.Date = structure(c(2L, 3L, 8L, 1L,
5L, 4L, 6L, 7L), .Label = c("04/30/2017", "05/04/2017", "05/10/2017",
"05/12/2017", "05/15/2017", "06/13/2017", "06/20/2017", "06/27/2017"
), class = "factor")), .Names = c("ID", "Medication", "Start.Date",
"Stop.Date"), row.names = c(NA, -8L), class = "data.frame")
df;
#    ID Medication Start.Date  Stop.Date
#1  2    aspirin 05/01/2017 05/04/2017
#2  2    aspirin 05/05/2017 05/10/2017
#3  2    aspirin 06/20/2017 06/27/2017
#4  2    aspirin 06/28/2017 04/30/2017
#5  2    tylenol 05/01/2017 05/15/2017
#6  3    lipitor 05/06/2017 05/12/2017
#7  5      advil 05/28/2017 06/13/2017
#8  5      advil 06/14/2017 06/20/2017

df %>%
    mutate_at(vars(ends_with("Date")), function(x) as.Date(x, format = "%m/%d/%Y")) %>%
    group_by(ID, Medication) %>%
    mutate(
        isConsecutive = lead(Start.Date) - Stop.Date == 1,
        isConsecutive = ifelse(
            is.na(isConsecutive) & lag(isConsecutive) == TRUE, FALSE, isConsecutive),
        grp = cumsum(isConsecutive)) %>%
    group_by(ID, Medication, grp) %>%
    mutate(Start.Date = min(Start.Date), Stop.Date = max(Stop.Date)) %>%
    slice(1) %>%
    ungroup() %>%
    select(-isConsecutive, -grp)
## A tibble: 5 x 4
#     ID Medication Start.Date Stop.Date
#  <dbl> <fct>      <date>     <date>
#1    2. aspirin    2017-05-01 2017-05-10
#2    2. aspirin    2017-06-20 2017-06-27
#3    2. tylenol    2017-05-01 2017-05-15
#4    3. lipitor    2017-05-06 2017-05-12
#5    5. advil      2017-05-28 2017-06-20

library(tidyverse)
library(lubridate)
df%>%
  group_by(Medication)%>%
  mutate_at(vars(3:4),mdy)%>%
  mutate(Start.Date = coalesce(
                 if_else((Start.Date-lag(Stop.Date))==1,lag(Start.Date),Start.Date),Start.Date),
         s = lead(Start.Date)!=Start.Date)%>%
  filter(s|is.na(s))%>%
  select(-s)

# A tibble: 5 x 4
# Groups:   ID, Medication [4]
     ID Medication Start.Date Stop.Date 
  <dbl> <chr>      <date>     <date>    
1     2 aspirin    2017-05-01 2017-05-10
2     2 aspirin    2017-06-20 2017-06-27
3     2 tylenol    2017-05-01 2017-05-15
4     3 lipitor    2017-05-06 2017-05-12
5     5 advil      2017-05-28 2017-06-13