R在行中的多个条件上使用any()-更复杂的版本

R在行中的多个条件上使用any()-更复杂的版本,r,vector,dplyr,stringr,R,Vector,Dplyr,Stringr,昨天这里解决了这个问题的简单版本: 但是,我不确定是否可以将其扩展到我的第二个需要,即我试图确定From_ID中的ID是否位于该组中的任何较早的to_ID行中 library(dplyr);library(anytime) Tag <- c('V1','V1','V1','V1','V1','V1','V2','V2','V2','V3','V3','V3','V3','V4','V4','V4') From_ID <- c('BL342','SD234','FR212','P

昨天这里解决了这个问题的简单版本:

但是,我不确定是否可以将其扩展到我的第二个需要,即我试图确定From_ID中的ID是否位于该组中的任何较早的to_ID行中

library(dplyr);library(anytime)


Tag <- c('V1','V1','V1','V1','V1','V1','V2','V2','V2','V3','V3','V3','V3','V4','V4','V4')
From_ID <- c('BL342','SD234','FR212','PX123','KJ214','BL342',  'FR231','BL231','CV231',   'KJ875','PX239','TR543','FR342',    'DS329','FR321','DF321')
To_ID <- c('FR212','BL342','SD234','FG342','BL342','KJ214',  'BL231','FR231','KJ123',   'FG432','KJ123','TR998','PX239',    'HG734','DF321','FR321')

Date <-  sample(seq(anydate('2017-01-01'), anydate('2020-01-01'), by="day"), 16)

df <- data.frame(Tag, From_ID, To_ID, Date) %>% group_by(Tag) %>% arrange(Tag, desc(Date)) %>% mutate(Rank = row_number()) 


将“日期”转换为
Date
类(
dmy
-来自
lubridate
),
按“标记”、“日期”排列数据,按“标记”分组,通过在
行编号上循环创建“Flag2”列,检查元素“from\u ID”是否为
%in%
从第一行到该行的“to\u ID”,
取消分组
根据“标记”、“排名”列按原始顺序排列
数据集

library(dplyr)
library(purrr)
library(lubridate) 
df %>%
     # // convert to Date class
     mutate(Date = dmy(Date)) %>% 
     # // order the dataset by Tag, Date
     arrange(Tag, Date) %>% 
     # // grouped by Tag
     group_by(Tag) %>% 
     # // loop over the sequence of rows
     mutate(Flag2 = map_lgl(row_number(),
           # // check whether the 'From_ID' of that row is in 
           # // any of the previous row elements of 'To_ID'
           ~ From_ID[.x] %in% To_ID[1:(.x)])) %>% 
     ungroup %>% 
     # // order back to original
     arrange(Tag, Rank)
# A tibble: 16 x 7
#   Tag   From_ID To_ID Date        Rank Flag  Flag2
#   <chr> <chr>   <chr> <date>     <int> <lgl> <lgl>
# 1 V1    FR212   SD234 2019-08-24     1 TRUE  TRUE 
# 2 V1    BL342   KJ214 2019-05-05     2 TRUE  TRUE 
# 3 V1    PX123   FG342 2019-04-22     3 FALSE FALSE
# 4 V1    KJ214   BL342 2019-01-01     4 FALSE FALSE
# 5 V1    SD234   BL342 2018-09-13     5 FALSE FALSE
# 6 V1    BL342   FR212 2018-04-30     6 FALSE FALSE
# 7 V2    FR231   BL231 2019-09-16     1 TRUE  TRUE 
# 8 V2    CV231   KJ123 2018-01-07     2 FALSE FALSE
# 9 V2    BL231   FR231 2017-01-11     3 FALSE FALSE
#10 V3    KJ875   FG432 2019-11-14     1 FALSE FALSE
#11 V3    TR543   TR998 2019-10-26     2 FALSE FALSE
#12 V3    FR342   PX239 2019-07-02     3 FALSE FALSE
#13 V3    PX239   KJ123 2017-07-15     4 FALSE FALSE
#14 V4    DS329   HG734 2019-01-30     1 FALSE FALSE
#15 V4    DF321   FR321 2017-05-06     2 TRUE  TRUE 
#16 V4    FR321   DF321 2017-03-20     3 FALSE FALSE
库(dplyr)
图书馆(purrr)
图书馆(lubridate)
df%>%
#//转换为日期类
突变(日期=dmy(日期))%>%
#//按标记、日期对数据集排序
安排(标签、日期)%>%
#//按标记分组
分组依据(标签)%>%
#//在行序列上循环
mutate(Flag2=map\u lgl(行号(),
#//检查该行的“From_ID”是否在
#//To_ID'前面的任何行元素
~From_-ID[.x]%在%到_-ID[1:(.x)])%>%
解组%>%
#//订单退回原版
排列(标签、等级)
#一个tibble:16x7
#标记从\u ID到\u ID日期等级标志Flag2
#               
#1 V1 FR212 SD234 2019-08-24 1正确-正确
#2 V1 BL342 KJ214 2019-05-05 2正确-正确
#3 V1 PX123 FG342 2019-04-22 3假假
#4 V1 KJ214 BL342 2019-01-01 4假假
#5 V1 SD234 BL342 2018-09-13 5假假
#6 V1 BL342 FR212 2018-04-30 6假假
#7 V2 FR231 BL231 2019-09-16 1真实
#8 V2 CV231 KJ123 2018-01-07 2假
#9 V2 BL231 FR231 2017-01-11 3假假
#10 V3 KJ875 FG432 2019-11-14 1假假
#11 V3 TR543 TR998 2019-10-26 2假假
#12 V3 FR342 PX239 2019-07-02 3假假
#13 V3 PX239 KJ123 2017-07-15 4假假
#14 V4 DS329 HG734 2019-01-30 1假假
#15 V4 DF321 FR321 2017-05-06 2真实
#16 V4 FR321 DF321 2017-03-20 3假假
数据
df你能给我解释一下这个代码吗,我理解这个前提,但不确定我自己是否能构建它。mutate(Flag2=map_lgl(row_number(),~From_ID[.x]%in%To_ID[1:(.x)])%>%此外,我更改了预期的输出日期,excel在我手动创建输出时对我进行了更改。很抱歉。@Medwards没关系。我用
dmy
更改了它。谢谢,我只是想看看我是否在遵循如果我想跳过你前后翻转方向的步骤,我可以定义%in%to_ID[(.x):max(Rank)]?@Medwards如果你没有翻转,如果
排名是基于已经订购的“Date”,那么你就不需要再安排
了,只需将相同的代码与
row\u number()
Rank
一起使用,即
map(Rank,~
   Tag From_ID To_ID       Date Rank  Flag
1   V1   FR212 SD234 2019-08-24    1  TRUE
2   V1   BL342 KJ214 2019-05-05    2  TRUE
3   V1   PX123 FG342 2019-04-22    3 FALSE
4   V1   KJ214 BL342 2019-01-01    4 FALSE
5   V1   SD234 BL342 2018-09-13    5 FALSE
6   V1   BL342 FR212 2018-04-30    6 FALSE
7   V2   FR231 BL231 2019-09-16    1  TRUE
8   V2   CV231 KJ123 2018-01-07    2 FALSE
9   V2   BL231 FR231 2017-01-11    3 FALSE
10  V3   KJ875 FG432 2019-11-14    1 FALSE
11  V3   TR543 TR998 2019-10-26    2 FALSE
12  V3   FR342 PX239 2019-07-02    3 FALSE
13  V3   PX239 KJ123 2017-07-15    4 FALSE
14  V4   DS329 HG734 2019-01-30    1 FALSE
15  V4   DF321 FR321 2017-05-06    2  TRUE
16  V4   FR321 DF321 2017-03-20    3 FALSE

library(dplyr)
library(purrr)
library(lubridate) 
df %>%
     # // convert to Date class
     mutate(Date = dmy(Date)) %>% 
     # // order the dataset by Tag, Date
     arrange(Tag, Date) %>% 
     # // grouped by Tag
     group_by(Tag) %>% 
     # // loop over the sequence of rows
     mutate(Flag2 = map_lgl(row_number(),
           # // check whether the 'From_ID' of that row is in 
           # // any of the previous row elements of 'To_ID'
           ~ From_ID[.x] %in% To_ID[1:(.x)])) %>% 
     ungroup %>% 
     # // order back to original
     arrange(Tag, Rank)
# A tibble: 16 x 7
#   Tag   From_ID To_ID Date        Rank Flag  Flag2
#   <chr> <chr>   <chr> <date>     <int> <lgl> <lgl>
# 1 V1    FR212   SD234 2019-08-24     1 TRUE  TRUE 
# 2 V1    BL342   KJ214 2019-05-05     2 TRUE  TRUE 
# 3 V1    PX123   FG342 2019-04-22     3 FALSE FALSE
# 4 V1    KJ214   BL342 2019-01-01     4 FALSE FALSE
# 5 V1    SD234   BL342 2018-09-13     5 FALSE FALSE
# 6 V1    BL342   FR212 2018-04-30     6 FALSE FALSE
# 7 V2    FR231   BL231 2019-09-16     1 TRUE  TRUE 
# 8 V2    CV231   KJ123 2018-01-07     2 FALSE FALSE
# 9 V2    BL231   FR231 2017-01-11     3 FALSE FALSE
#10 V3    KJ875   FG432 2019-11-14     1 FALSE FALSE
#11 V3    TR543   TR998 2019-10-26     2 FALSE FALSE
#12 V3    FR342   PX239 2019-07-02     3 FALSE FALSE
#13 V3    PX239   KJ123 2017-07-15     4 FALSE FALSE
#14 V4    DS329   HG734 2019-01-30     1 FALSE FALSE
#15 V4    DF321   FR321 2017-05-06     2 TRUE  TRUE 
#16 V4    FR321   DF321 2017-03-20     3 FALSE FALSE
df <- structure(list(Tag = c("V1", "V1", "V1", "V1", "V1", "V1", "V2", 
"V2", "V2", "V3", "V3", "V3", "V3", "V4", "V4", "V4"), From_ID = c("FR212", 
"BL342", "PX123", "KJ214", "SD234", "BL342", "FR231", "CV231", 
"BL231", "KJ875", "TR543", "FR342", "PX239", "DS329", "DF321", 
"FR321"), To_ID = c("SD234", "KJ214", "FG342", "BL342", "BL342", 
"FR212", "BL231", "KJ123", "FR231", "FG432", "TR998", "PX239", 
"KJ123", "HG734", "FR321", "DF321"), Date = c("24/08/2019", "5/05/2019", 
"22/04/2019", "1/01/2019", "13/09/2018", "30/04/2018", "16/09/2019", 
"7/01/2018", "11/01/2017", "14/11/2019", "26/10/2019", "2/07/2019", 
"15/07/2017", "30/01/2019", "6/05/2017", "20/03/2017"), Rank = c(1L, 
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L), 
    Flag = c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE)),
    class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16"))