R 函数来查看多个向量的组合是否与目标向量匹配
给定一组任意向量:R 函数来查看多个向量的组合是否与目标向量匹配,r,dplyr,R,Dplyr,给定一组任意向量: a<-c("giraffe", "dolphin", "pig") b<-c("elephant" , "pig") c<-c("zebra","cobra","spider","porcupine") d<-c("porcupine") e<-c(
a<-c("giraffe", "dolphin", "pig")
b<-c("elephant" , "pig")
c<-c("zebra","cobra","spider","porcupine")
d<-c("porcupine")
e<-c("spider","cobra")
f<-c("elephant","pig","porcupine")
a首先将向量转换为列表l试试以下方法:
用你的向量建立一个列表
vec_list问题中显示的解决方案由非重叠向量组成,因此我们假设这是一项要求,因此我们希望将目标划分为不相交的向量,覆盖它。如果向量可能重叠,则在涉及以下使用>=的约束中使用=或==代替
假设的问题称为集合划分问题,而具有重叠的问题称为集合覆盖问题
假设最后注释中所示的向量L和目标列表构成目标(全部)、向量关联矩阵A、动物和从目标导出的约束方程右侧rhs
,并运行所示的线性程序
如果找到了一个解决方案,那么我们将添加一个约束,该约束将在下一次迭代中通过坚持其至少一个零为1来消除它。我们迭代5次(即最多5个解决方案)或直到找不到更多的解决方案
library(CVXR)
x <- Variable(nc, boolean = TRUE)
objective <- Minimize(sum(x))
constraints <- list(A %*% x == matrix(rhs))
solns <- soln <- NULL
for(i in 1:5) {
if (!is.null(soln)) constraints <- c(constraints, sum((1 - soln) * x) >= 1)
prob <- Problem(objective, constraints)
result <- solve(prob)
if (result$status != "optimal") break
soln <- result$getValue(x)
solns <- c(solns, list(names(L)[soln == 1]))
}
solns
## [[1]]
## [1] "e" "f"
##
## [[2]]
## [1] "b" "d" "e"
我们展示了一个使用lpSolveAPI包的解决方案,然后在后面的部分中使用CVXR包重复它
LPSOVEAPI
库(lpSolveAPI)
动物使用combn的基本R选项
lst <- list(a, b, c, d, e, f)
nms <- c("a", "b", "c", "d", "e", "f")
names(
Filter(
isTRUE,
unlist(
lapply(
seq_along(lst),
function(k) {
setNames(
combn(lst, k, FUN = function(v) !(length(setdiff(unlist(v), target)) + length(setdiff(target, unlist(v))))),
combn(nms, k, toString)
)
}
)
)
)
)
给予
更新
如果您确实需要找到排他性的组合,即没有重叠,我们可以尝试下面的代码
subset(
unlist(
lapply(
seq_along(nms), function(k) combn(nms, k, toString)
)
),
unlist(
lapply(
seq_along(lst),
function(k) combn(lst, k, FUN = function(v) length(unlist(v))==length(target) & all(unlist(v)%in% target))
)
)
)
或
给
[1] "b, f" "e, f" "b, d, e"
检查函数这几乎可以工作,但我需要知道哪些向量组合正好等于目标向量,而不仅仅是哪些向量中有值。从这个意义上说,b,d,e,f的答案是错误的,因为我最终会得到两只猪,两只大象和两只豪猪。我需要一个精确的匹配。我将在问题中澄清,这与LMC的回答有相同的问题。b、 d,e,f不是一个解决方案,因为存在重叠。不过,如果这不是问题的话,这是可行的。我试图澄清问题question@Jaycee你没有提到你在文章中试图避免重叠。我已经更新了我的答案,所以你可以检查一下。谢谢。鉴于对非重叠向量的需求,这找到了合适的解决方案
a %in% target
[1] FALSE FALSE TRUE
all(a %in% target)
[1] FALSE
match_elem <- function(i, the_list, target) {
if (all( the_list[[i]] %in% target)) {
return(names(the_list)[[i]])
}
}
library(lpSolveAPI)
animals <- sort(unique(unlist(L)))
A <- +outer(animals, L, Vectorize(`%in%`))
rownames(A) <- animals
nr <- nrow(A)
nc <- ncol(A)
rhs <- rownames(A) %in% target
lp <- make.lp(nr, nc)
set.objfn(lp, rep(1, nc))
for(i in 1:nr) add.constraint(lp, A[i, ], "=", rhs[i])
for(j in 1:nc) set.type(lp, j, type = "binary")
soln <- solns <- NULL
for(s in 1:5) {
if (!is.null(soln)) add.constraint(lp, 1-soln, ">=", 1)
if (solve(lp) != 0) break
soln <- get.variables(lp)
solns <- c(solns, list(names(L)[soln == 1]))
}
solns
## [[1]]
## [1] "e" "f"
##
## [[2]]
## [1] "b" "d" "e"
library(CVXR)
x <- Variable(nc, boolean = TRUE)
objective <- Minimize(sum(x))
constraints <- list(A %*% x == matrix(rhs))
solns <- soln <- NULL
for(i in 1:5) {
if (!is.null(soln)) constraints <- c(constraints, sum((1 - soln) * x) >= 1)
prob <- Problem(objective, constraints)
result <- solve(prob)
if (result$status != "optimal") break
soln <- result$getValue(x)
solns <- c(solns, list(names(L)[soln == 1]))
}
solns
## [[1]]
## [1] "e" "f"
##
## [[2]]
## [1] "b" "d" "e"
L <- within(list(), {
a <- c("giraffe", "dolphin", "pig")
b <- c("elephant" , "pig")
c <- c("zebra","cobra","spider","porcupine")
d <- c("porcupine")
e <- c("spider","cobra")
f <- c("elephant","pig","porcupine")
})
L <- L[order(names(L))]
target<- c("elephant" , "pig","cobra","spider","porcupine")
lst <- list(a, b, c, d, e, f)
nms <- c("a", "b", "c", "d", "e", "f")
names(
Filter(
isTRUE,
unlist(
lapply(
seq_along(lst),
function(k) {
setNames(
combn(lst, k, FUN = function(v) !(length(setdiff(unlist(v), target)) + length(setdiff(target, unlist(v))))),
combn(nms, k, toString)
)
}
)
)
)
)
subset(
unlist(
lapply(
seq_along(nms), function(k) combn(nms, k, toString)
)
),
unlist(
lapply(
seq_along(lst),
function(k) combn(lst, k, FUN = function(v) !(length(setdiff(unlist(v), target)) + length(setdiff(target, unlist(v)))))
)
)
)
[1] "e, f" "b, d, e" "b, e, f" "d, e, f" "b, d, e, f"
subset(
unlist(
lapply(
seq_along(nms), function(k) combn(nms, k, toString)
)
),
unlist(
lapply(
seq_along(lst),
function(k) combn(lst, k, FUN = function(v) length(unlist(v))==length(target) & all(unlist(v)%in% target))
)
)
)
names(
Filter(
isTRUE,
unlist(
lapply(
seq_along(lst),
function(k) {
setNames(
combn(lst, k, FUN = function(v) length(unlist(v))==length(target) & all(unlist(v)%in% target)),
combn(nms, k, toString)
)
}
)
)
)
)
[1] "b, f" "e, f" "b, d, e"