根据r中的ID在另一列中查找一列中的值

根据r中的ID在另一列中查找一列中的值,r,R,我有一个数据框,每个ID有多个条目。 ID有一个参考号(新参考号)和一个旧参考号(旧参考号)。我需要找到每个ID的最新参考号,即不在旧参考号列中的参考号 ID <- c(1,2,3,4,1,3,5,2,4,1,3,4) NEW_REF <- c("TS101","TS253","TS565","TS789","TD123","TS101","TD367","TH152","TD123","TF908","TD256","TS898") OLD_REF <- c("TD

我有一个数据框,每个ID有多个条目。 ID有一个参考号(新参考号)和一个旧参考号(旧参考号)。我需要找到每个ID的最新参考号,即不在旧参考号列中的参考号

ID <- c(1,2,3,4,1,3,5,2,4,1,3,4)     
NEW_REF <- c("TS101","TS253","TS565","TS789","TD123","TS101","TD367","TH152","TD123","TF908","TD256","TS898")
OLD_REF <- c("TD123","TH152","TS101","TD123","TF908","TD256","TG232","TR142","TS898","TR268","TB496","TD969")
DF <- data.frame(ID,NEW_REF ,OLD_REF )

DF$Active_ind <- NA
DF$Active_ind[which(DF$NEW_REF %in% DF$OLD_REF )] <-"N"    #if a reference number is in the old reference number column it is not active or not the most recent
DF$Active_ind[which(!(DF$NEW_REF %in% DF$OLD_REF ))] <-"Y"   #if a reference number is not in the old reference number column it is active or the most recent

    ID NEW_REF OLD_REF Active_ind
1   1   TS101   TD123          N
2   2   TS253   TH152          Y
3   3   TS565   TS101          Y
4   4   TS789   TD123          Y
5   1   TD123   TF908          N
6   3   TS101   TD256          N
7   5   TD367   TG232          Y
8   2   TH152   TR142          N
9   4   TD123   TS898          N
10  1   TF908   TR268          N
11  3   TD256   TB496          N
12  4   TS898   TD969          N

我知道用for循环是可能的,但是我想避免这种情况,因为我的数据集有超过40000个不同的ID,并且在使用循环时变得非常耗时。

我们可以使用
dplyr
ID
对它们进行分组,然后检查
NEW\u REF
中的值是否存在于
OLD\u REF
中,并相应地给出它们的值

library(dplyr)
DF %>%
   group_by(ID) %>%
   mutate(Active_Ind = ifelse(NEW_REF %in% OLD_REF, "N", "Y"))


#     ID NEW_REF OLD_REF Active_Ind
#   <dbl>  <fctr>  <fctr>      <chr>
#      1   TS101   TD123          Y
#      2   TS253   TH152          Y
#      3   TS565   TS101          Y
#      4   TS789   TD123          Y
#      1   TD123   TF908          N
#      3   TS101   TD256          N
#      5   TD367   TG232          Y
#      2   TH152   TR142          N
#      4   TD123   TS898          N
#      1   TF908   TR268          N
#      3   TD256   TB496          N
#      4   TS898   TD969          N
库(dplyr)
DF%>%
分组依据(ID)%>%
变异(活动索引=ifelse(新索引%in%旧索引,“N”,“Y”))
#ID新\u参考旧\u参考活动\u标识
#             
#1 TS101 TD123 Y
#2 TS253 TH152 Y
#3台TS565台TS101 Y
#4 TS789 TD123 Y
#1 TD123 TF908 N
#3 TS101 TD256 N
#5 TD367 TG232 Y
#2 TH152 TR142 N
#4 TD123 TS898 N
#1 TF908 TR268 N
#3 TD256 TB496 N
#4 TS898 TD969 N
library(dplyr)
DF %>%
   group_by(ID) %>%
   mutate(Active_Ind = ifelse(NEW_REF %in% OLD_REF, "N", "Y"))


#     ID NEW_REF OLD_REF Active_Ind
#   <dbl>  <fctr>  <fctr>      <chr>
#      1   TS101   TD123          Y
#      2   TS253   TH152          Y
#      3   TS565   TS101          Y
#      4   TS789   TD123          Y
#      1   TD123   TF908          N
#      3   TS101   TD256          N
#      5   TD367   TG232          Y
#      2   TH152   TR142          N
#      4   TD123   TS898          N
#      1   TF908   TR268          N
#      3   TD256   TB496          N
#      4   TS898   TD969          N