基于r中的先前日期值筛选变量
我有一个如下所示的数据框:基于r中的先前日期值筛选变量,r,filter,dplyr,tidyverse,R,Filter,Dplyr,Tidyverse,我有一个如下所示的数据框: df <- structure(list(id = c("555900339", "555900339", "555900339", "555900339", "555900339", "555900327", "555900327", "555900327", "555703505", "555703379", "555703379"
df <- structure(list(id = c("555900339", "555900339", "555900339",
"555900339", "555900339", "555900327", "555900327", "555900327",
"555703505", "555703379", "555703379", "555703379", "555703379",
"555703379", "555703366", "555702668", "555702668", "555702668",
"555702668", "555702668"), date = c("20200207", "20200207",
"20200207", "20200208", "20200208", "20200207", "20200207", "20200207",
"20200207", "20200207", "20200207", "20200207", "20200207", "20200207",
"20200207", "20200207", "20200208", "20200208", "20200208", "20200208"
), flag_code = c("SLEP", "NCHG", "MOTN", "CIHB", "NCON", "SLEP",
"NCHG", "MOTN", "INMC", "SLEP", "NCHG", "MOTN", "COFF", "NCON",
"NCHG", "SLEP", "NOMO", "NCON", "MOTN", "CIHB")), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
我觉得有一种更简单的方法,但有一种方法是:
library(dplyr)
df %>%
mutate(date = as.Date(date, '%Y%m%d')) %>%
filter(flag_code %in% c('NCHG', 'NCON')) %>%
filter(
(c(0, diff(date)) == 1 & ( (flag_code == 'NCON' & lag(flag_code) == 'NCHG') ) |
(lead(c(0, diff(date))) == 1 & flag_code == 'NCHG' & lead(flag_code) == 'NCON') ) )
输出:
# A tibble: 4 x 3
id date flag_code
<chr> <date> <chr>
1 555900339 2020-02-07 NCHG
2 555900339 2020-02-08 NCON
3 555703366 2020-02-07 NCHG
4 555702668 2020-02-08 NCON
#一个tible:4 x 3
id日期标志\u代码
1555900339 2020-02-07 NCHG
255900339 2020-02-08 NCON
3 555703366 2020-02-07 NCHG
4 555702668 2020-02-08 NCO
# A tibble: 4 x 3
id date flag_code
<chr> <date> <chr>
1 555900339 2020-02-07 NCHG
2 555900339 2020-02-08 NCON
3 555703366 2020-02-07 NCHG
4 555702668 2020-02-08 NCON