R:将一列与另一列中作为列表对象显示的值进行比较
我使用for循环创建了DF1,或者是从某人那里得到的。我想在DF2中检查nos值是否作为列表或串接在对应日期的字符串中。我经常遇到这种情况。 这是代码R:将一列与另一列中作为列表对象显示的值进行比较,r,data.table,tidyverse,R,Data.table,Tidyverse,我使用for循环创建了DF1,或者是从某人那里得到的。我想在DF2中检查nos值是否作为列表或串接在对应日期的字符串中。我经常遇到这种情况。 这是代码 library(dplyr) library(magrittr) DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print DF2 <- data.frame
library(dplyr)
library(magrittr)
DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),no = c(1,3)) %>% print
DF1 = left_join(DF2,DF1,by=c('det1'='det')) %>%
mutate(list_column = strsplit(nos,split = "\\|")) %>%
print
# DF1
# det1 no nos
# 1 2013-02-02 1 1|3
# 2 2018-01-11 2 4|2|1
for(i_ in 1:nrow(DF1)){
# i_ = 1
temp = DF1[i_,]
list_vals = temp$list_column %>% as.vector() %>% unlist() %>% as.numeric() %>% print
DF1$present[i_] = temp$no %in% list_vals
}
#R>DF1
# det1 no nos list_column present
#1 2013-02-02 1 1|3 1, 3 TRUE
#2 2018-01-11 3 4|2|1 4, 2, 1 FALSE
创建另一个逻辑列的最佳方法是什么,该列显示“否”是“否”之一。如何实现我正在尝试做的事情或更好地实现我正在尝试最终得到的事情?
我欢迎任何解决方案库、tidyverse或data.table
编辑-1
我正在寻找消除for循环的方法
数据仅在一个位置添加为.character
DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),no = c(1,3)) %>% print
DF1 = left_join(DF2,DF1,by=c('det1'='det')) %>%
mutate(list_column = strsplit(as.character(nos),split = "\\|"))
我发现grepl在这种情况下很有用
DF3 = left_join(DF2, DF1, by=c('det1'='det'))
for(i in 1:nrow(DF3)){
DF3[i, 'present'] = grepl(DF3[i,'no'], DF3[i, 'nos'])
}
> DF3
det1 no nos present
1 2013-02-02 1 1|3 TRUE
2 2018-01-11 3 4|2|1 FALSE
在stringsAsFactors=F中添加数据:
**代码的第一部分**
library(dplyr)
DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),no = c(1,3)) %>% print
DF1 = left_join(DF2,DF1,by=c('det1'='det')) # no need to strsplit()
结果:
det1 no nos present
1: 2013-02-02 1 1|3 TRUE
2: 2018-01-11 3 4|2|1 FALSE
此解决方案可移植到data.table,例如:
library(data.table)
data.table::setDT(DF1) # into data.table
DF1[, present := apply(DF1, 1, function(x){
ifelse(grepl(x=x[3], pattern = paste0("(?<!\\d)", x[2], "(?!\\d)"), perl = TRUE), T, F)
})] # the := is a symbol for assignment
我们可以使用Map来循环“list_column”,检查相交元素的长度
或者更紧凑一些,无需匿名函数调用
DF1 %>%
mutate(present = lengths(map2(list_column, DF2$no, intersect)) > 0)
另一个使用data.table::tstrsplit的选项:
数据:
det1 no nos present
1: 2013-02-02 1 1|3 TRUE
2: 2018-01-11 3 4|2|1 FALSE
library(data.table)
data.table::setDT(DF1) # into data.table
DF1[, present := apply(DF1, 1, function(x){
ifelse(grepl(x=x[3], pattern = paste0("(?<!\\d)", x[2], "(?!\\d)"), perl = TRUE), T, F)
})] # the := is a symbol for assignment
library(tidyverse)
DF1 %>%
mutate(present = map2(list_column, DF2$no, ~ length(intersect(.x, .y))) > 0)
# det1 no nos list_column present
#1 2013-02-02 1 1|3 1, 3 TRUE
#2 2018-01-11 3 4|2|1 4, 2, 1 FALSE
DF1 %>%
mutate(present = lengths(map2(list_column, DF2$no, intersect)) > 0)
library(data.table)
df1 <- setDT(DF1)[, .(no=as.integer(unlist(tstrsplit(nos, "\\|")))), by=.(det)]
setDT(DF2)[, present := FALSE][
df1, on=c("det1"="det", "no"), present := !is.na(i.no)]
det1 no present
1: 2013-02-02 1 TRUE
2: 2018-01-11 3 FALSE
DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),
nos = c("1|3","4|2|1","3|4"))
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),
no = as.integer(c(1,3)))