如何在R中生成N个最不相似的组合
我有一套6个色码(x),一套N个个体,每个个体都需要贴上一个独特的色码标签,每个动物身上有四个位置,每个位置都可以携带不同的颜色。我有6种不同的颜色 因此,两个人的代码可能是:如何在R中生成N个最不相似的组合,r,cryptography,combinations,permutation,combinatorics,R,Cryptography,Combinations,Permutation,Combinatorics,我有一套6个色码(x),一套N个个体,每个个体都需要贴上一个独特的色码标签,每个动物身上有四个位置,每个位置都可以携带不同的颜色。我有6种不同的颜色 因此,两个人的代码可能是: 1.红、蓝、蓝、白 2.白色,黄色,粉色,黄色 但是,由于每个位置的颜色都可能脱落,我想生成一个冗余的标签方案,这样即使在一个(甚至两个?)位置失去颜色后,仍然可以将个人与其他人区分开来 尽管6种颜色和4种位置给出了1296种组合,但我发现很难选择N种最不相似的组合: 可复制示例: 库(gtools) 我不能肯定地回答你
1.红、蓝、蓝、白
2.白色,黄色,粉色,黄色 但是,由于每个位置的颜色都可能脱落,我想生成一个冗余的标签方案,这样即使在一个(甚至两个?)位置失去颜色后,仍然可以将个人与其他人区分开来 尽管6种颜色和4种位置给出了1296种组合,但我发现很难选择N种最不相似的组合: 可复制示例:
库(gtools)
我不能肯定地回答你的问题,但我有一个想法可能会对你有所帮助
使用每种颜色的第一个字母生成字符串代码:
library(gtools)
x <- c("w", "r", "g", "b", "p", "y")
Perms <- permutations(n=6,r=4,v=x,repeats.allowed=T)
m <- apply(Perms, 1, paste, collapse = "")
> head(m)
[1] "bbbb" "bbbg" "bbbp" "bbbr" "bbbw" "bbby"
创建一个n*n矩阵:
库(vwr)
lvmat lvmat[1:5,1:5]
grrp pgpg rprr yprw gggp
grrp 0 4 3 2
PG4 0 4 3
rprr 3 4 0 2 4
yprw 3 4 2 0 4
gggp 2 3 4 0
现在,您可以通过自举或漂浮在船上的任何方式最大化总和(lvmat)
,以获得大多数不同组合的样本。上述重复的圈数建议示例。
注意,由于依赖随机抽样,这仍然不能保证不会有仅在一个位置不同的代码对。不过,这是一个好的开始-谢谢
# install.packages("gtools")
library(gtools)
library(vwr)
## Available colours
x <- c("W", "R", "G", "B", "P", "Y")
## Generate all possible colour combinations, for 6 colours & 4 positions
body <- data.frame(permutations(n=6,r=4,v=x,repeats.allowed=T), stringsAsFactors = F) ; colnames(body) <- c("Head","Thorax","L_gaster","R_gaster")
## concatenate each colour-code to a sequence without spaces, etc
m <- paste( body$Head, body$Thorax, body$L_gaster, body$R_gaster, sep="")
##
set.seed(1)
COLONY_SIZE <- 50 ## How many adult workers in the colony excluding the queen
N_Attempts <- 1000 ## How many alternative solutions to generate - the more the better, but it takes longer
## prepare data-containers
Summary <- NULL
LvList <- list()
for (TRY in 1:N_Attempts)
{print(paste(TRY,"of",N_Attempts))
y <- sample(m, COLONY_SIZE) ## randomly sample COLONY_SIZE codes
## measure pairwise Levenshtein distances for all pair combinations
Matrix <- sapply(y, function(x) levenshtein.distance(x, y))
diag(Matrix) <- NA ## eliminate self-self measure (distance = 0)
Matrix[lower.tri(Matrix)] <- NA ## dist i-j = dist j-i
## store solution
LvList[[TRY]] <- Matrix
## summarize each solution using three metrics:
## (i) the average pair distance (higher is better)
## (ii) the number of 'close' code pairs (those with the minimum distance of 1 - lower is better)
## (iii) the maximum number of 'close' code *pairs across all codes (lower is better)
Summary <- rbind(Summary, data.frame(Mean_Distance = mean(Matrix, na.rm=T),
N_close_pairs = sum(Matrix[!is.na(Matrix)]==1),
N_close_pairs_per_ant = max(rowSums( Matrix==1, na.rm=T)) ))
}
## ***Find the solution with the fewest pairs wiRth the lowest distance***
Summary$Mean_Distance_Rank <- rank(Summary$Mean_Distance)
Summary$N_close_pairs_Rank <- rank(-Summary$N_close_pairs)
Summary$N_close_pairs_per_ant_Rank <- rank(-Summary$N_close_pairs_per_ant)
Summary$Rank_Total <- Summary$Mean_Distance_Rank + Summary$N_close_pairs_Rank + Summary$N_close_pairs_per_ant_Rank
solution <- rownames( LvList[[which.max(Summary$Rank_Total)]] )
## Highlight candidate solutions
Colour <- rep(rgb(0,0,0,0.1,1),nrow(Summary) )
Colour [which.max(Summary$Rank_Total) ] <- "red"
pairs(Summary[,c("Mean_Distance","N_close_pairs","N_close_pairs_per_ant")], col=Colour, bg=Colour, pch=21, cex=1.4)
## format into a table
SOLUTION <- data.frame(Code=1:COLONY_SIZE, t(as.data.frame(sapply(solution, strsplit, ""))))
colnames(SOLUTION)[2:5] <- c("Head","Thorax","L_gaster","R_gaster")
#安装程序包(“gtools”)
图书馆(gtools)
图书馆(vwr)
##可用颜色
x这里有一种更好的方法,它不依赖盲采样,而是将每个代码对之间的相似性表示为网络中的一条边,然后使用IGRAPHE函数MAXIMUM_ivs搜索最不相似的代码对:
rm(list=ls())
library(gtools)
library(igraph)
##
outputfolder <- "XXXXXXXXXX"
dir.create(outputfolder,showWarnings = F)
setwd(outputfolder)
## Available colours
x <- c("W", "R", "G", "B", "P", "Y")
## Generate all possible colour combinations, for 6 colours & 4 positions
body <- data.frame(permutations(n=6,r=4,v=x,repeats.allowed=T), stringsAsFactors = F) ; colnames(body) <- c("Head","Thorax","L_gaster","R_gaster")
write.table(body,file="Paint_marks_full_list.txt",col.names=T,row.names=F,quote=F,append=F)
## Generate edge list
edge_list <- data.frame(comb_1=character(),comb_2=character(),similarity=character())
if (!file.exists("Edge_list.txt")){
write.table(edge_list,file="Edge_list.txt",col.names=T,row.names=F,quote=F,append=F)
}else{
edge_list <- read.table("Edge_list.txt",header=T,stringsAsFactors = F)
}
if (nrow(edge_list)>0){
last_i <- edge_list[nrow(edge_list),"comb_1"]
last_j <- edge_list[nrow(edge_list),"comb_2"]
}
if (!(last_i==(nrow(body)-1)&last_j==nrow(body))){
for (i in last_i:(nrow(body)-1)){
print(paste("Combination",i))
for (j in (i+1):nrow(body)){
if (i>last_i|j>last_j){
simil <- length(which(body[i,]==body[j,]))
if (simil>0){
write.table(data.frame(comb_1=i,comb_2=j,similarity=simil),file="Edge_list.txt",col.names=F,row.names=F,quote=F,append=T)
}
}
}
}
}
######let's make 3 graphs with edges representing overlap between combinations ###
##First graph, in which ANY overlap between two combinations is seen as an edge. Will be used to produce list of paint combination with no overlap
net1 <- graph.data.frame(edge_list[c("comb_1","comb_2")],directed=F)
##Second graph, in which only overlaps of 2 or more spots is seen as an edge. Will be used to produce list of paint combinations with no more than 1 spot in common
net2 <- graph.data.frame(edge_list[which(edge_list$similarity>=2),c("comb_1","comb_2")],directed=F)
##Third graph, in which only overlaps of 3 or more spots is seen as an edge. Will be used to produce list of paint combinations with no more than 2 spots in common
net3 <- graph.data.frame(edge_list[which(edge_list$similarity>=3),c("comb_1","comb_2")],directed=F)
#######Now let's use the ivs function to get independent vertex sets, i.e., set of vertices with no connections between any of them
no_overlap_list <- largest_ivs(net1)
max_one_spot_overlap_list <- largest_ivs(net2)
max_two_spots_overlap_list <- largest_ivs(net3)
rm(list=ls())
图书馆(gtools)
图书馆(igraph)
##
outputfolder听起来像你要找的是至少两个的组合。很好的实现!
library(vwr)
lvmat <- sapply(y, function(x) levenshtein.distance(x, y))
> lvmat[1:5, 1:5]
grrp pgpg rprr yprw gggp
grrp 0 4 3 3 2
pgpg 4 0 4 4 3
rprr 3 4 0 2 4
yprw 3 4 2 0 4
gggp 2 3 4 4 0
# install.packages("gtools")
library(gtools)
library(vwr)
## Available colours
x <- c("W", "R", "G", "B", "P", "Y")
## Generate all possible colour combinations, for 6 colours & 4 positions
body <- data.frame(permutations(n=6,r=4,v=x,repeats.allowed=T), stringsAsFactors = F) ; colnames(body) <- c("Head","Thorax","L_gaster","R_gaster")
## concatenate each colour-code to a sequence without spaces, etc
m <- paste( body$Head, body$Thorax, body$L_gaster, body$R_gaster, sep="")
##
set.seed(1)
COLONY_SIZE <- 50 ## How many adult workers in the colony excluding the queen
N_Attempts <- 1000 ## How many alternative solutions to generate - the more the better, but it takes longer
## prepare data-containers
Summary <- NULL
LvList <- list()
for (TRY in 1:N_Attempts)
{print(paste(TRY,"of",N_Attempts))
y <- sample(m, COLONY_SIZE) ## randomly sample COLONY_SIZE codes
## measure pairwise Levenshtein distances for all pair combinations
Matrix <- sapply(y, function(x) levenshtein.distance(x, y))
diag(Matrix) <- NA ## eliminate self-self measure (distance = 0)
Matrix[lower.tri(Matrix)] <- NA ## dist i-j = dist j-i
## store solution
LvList[[TRY]] <- Matrix
## summarize each solution using three metrics:
## (i) the average pair distance (higher is better)
## (ii) the number of 'close' code pairs (those with the minimum distance of 1 - lower is better)
## (iii) the maximum number of 'close' code *pairs across all codes (lower is better)
Summary <- rbind(Summary, data.frame(Mean_Distance = mean(Matrix, na.rm=T),
N_close_pairs = sum(Matrix[!is.na(Matrix)]==1),
N_close_pairs_per_ant = max(rowSums( Matrix==1, na.rm=T)) ))
}
## ***Find the solution with the fewest pairs wiRth the lowest distance***
Summary$Mean_Distance_Rank <- rank(Summary$Mean_Distance)
Summary$N_close_pairs_Rank <- rank(-Summary$N_close_pairs)
Summary$N_close_pairs_per_ant_Rank <- rank(-Summary$N_close_pairs_per_ant)
Summary$Rank_Total <- Summary$Mean_Distance_Rank + Summary$N_close_pairs_Rank + Summary$N_close_pairs_per_ant_Rank
solution <- rownames( LvList[[which.max(Summary$Rank_Total)]] )
## Highlight candidate solutions
Colour <- rep(rgb(0,0,0,0.1,1),nrow(Summary) )
Colour [which.max(Summary$Rank_Total) ] <- "red"
pairs(Summary[,c("Mean_Distance","N_close_pairs","N_close_pairs_per_ant")], col=Colour, bg=Colour, pch=21, cex=1.4)
## format into a table
SOLUTION <- data.frame(Code=1:COLONY_SIZE, t(as.data.frame(sapply(solution, strsplit, ""))))
colnames(SOLUTION)[2:5] <- c("Head","Thorax","L_gaster","R_gaster")
rm(list=ls())
library(gtools)
library(igraph)
##
outputfolder <- "XXXXXXXXXX"
dir.create(outputfolder,showWarnings = F)
setwd(outputfolder)
## Available colours
x <- c("W", "R", "G", "B", "P", "Y")
## Generate all possible colour combinations, for 6 colours & 4 positions
body <- data.frame(permutations(n=6,r=4,v=x,repeats.allowed=T), stringsAsFactors = F) ; colnames(body) <- c("Head","Thorax","L_gaster","R_gaster")
write.table(body,file="Paint_marks_full_list.txt",col.names=T,row.names=F,quote=F,append=F)
## Generate edge list
edge_list <- data.frame(comb_1=character(),comb_2=character(),similarity=character())
if (!file.exists("Edge_list.txt")){
write.table(edge_list,file="Edge_list.txt",col.names=T,row.names=F,quote=F,append=F)
}else{
edge_list <- read.table("Edge_list.txt",header=T,stringsAsFactors = F)
}
if (nrow(edge_list)>0){
last_i <- edge_list[nrow(edge_list),"comb_1"]
last_j <- edge_list[nrow(edge_list),"comb_2"]
}
if (!(last_i==(nrow(body)-1)&last_j==nrow(body))){
for (i in last_i:(nrow(body)-1)){
print(paste("Combination",i))
for (j in (i+1):nrow(body)){
if (i>last_i|j>last_j){
simil <- length(which(body[i,]==body[j,]))
if (simil>0){
write.table(data.frame(comb_1=i,comb_2=j,similarity=simil),file="Edge_list.txt",col.names=F,row.names=F,quote=F,append=T)
}
}
}
}
}
######let's make 3 graphs with edges representing overlap between combinations ###
##First graph, in which ANY overlap between two combinations is seen as an edge. Will be used to produce list of paint combination with no overlap
net1 <- graph.data.frame(edge_list[c("comb_1","comb_2")],directed=F)
##Second graph, in which only overlaps of 2 or more spots is seen as an edge. Will be used to produce list of paint combinations with no more than 1 spot in common
net2 <- graph.data.frame(edge_list[which(edge_list$similarity>=2),c("comb_1","comb_2")],directed=F)
##Third graph, in which only overlaps of 3 or more spots is seen as an edge. Will be used to produce list of paint combinations with no more than 2 spots in common
net3 <- graph.data.frame(edge_list[which(edge_list$similarity>=3),c("comb_1","comb_2")],directed=F)
#######Now let's use the ivs function to get independent vertex sets, i.e., set of vertices with no connections between any of them
no_overlap_list <- largest_ivs(net1)
max_one_spot_overlap_list <- largest_ivs(net2)
max_two_spots_overlap_list <- largest_ivs(net3)