如何在R中生成N个最不相似的组合_R_Cryptography_Combinations_Permutation_Combinatorics

如何在R中生成N个最不相似的组合

r cryptography

如何在R中生成N个最不相似的组合,r,cryptography,combinations,permutation,combinatorics,R,Cryptography,Combinations,Permutation,Combinatorics,我有一套6个色码（x），一套N个个体，每个个体都需要贴上一个独特的色码标签，每个动物身上有四个位置，每个位置都可以携带不同的颜色。我有6种不同的颜色因此，两个人的代码可能是： 1.红、蓝、蓝、白 2.白色，黄色，粉色，黄色但是，由于每个位置的颜色都可能脱落，我想生成一个冗余的标签方案，这样即使在一个（甚至两个？）位置失去颜色后，仍然可以将个人与其他人区分开来尽管6种颜色和4种位置给出了1296种组合，但我发现很难选择N种最不相似的组合：可复制示例：库（gtools）我不能肯定地回答你

我有一套6个色码（x），一套N个个体，每个个体都需要贴上一个独特的色码标签，每个动物身上有四个位置，每个位置都可以携带不同的颜色。我有6种不同的颜色

因此，两个人的代码可能是：
1.红、蓝、蓝、白
2.白色，黄色，粉色，黄色

但是，由于每个位置的颜色都可能脱落，我想生成一个冗余的标签方案，这样即使在一个（甚至两个？）位置失去颜色后，仍然可以将个人与其他人区分开来
尽管6种颜色和4种位置给出了1296种组合，但我发现很难选择N种最不相似的组合：
可复制示例：
库（gtools）我不能肯定地回答你的问题，但我有一个想法可能会对你有所帮助使用每种颜色的第一个字母生成字符串代码： library(gtools) x <- c("w", "r", "g", "b", "p", "y") Perms <- permutations(n=6,r=4,v=x,repeats.allowed=T) m <- apply(Perms, 1, paste, collapse = "") > head(m) [1] "bbbb" "bbbg" "bbbp" "bbbr" "bbbw" "bbby" 创建一个n*n矩阵：库（vwr） lvmat lvmat[1:5,1:5] grrp pgpg rprr yprw gggp grrp 0 4 3 2 PG4 0 4 3 rprr 3 4 0 2 4 yprw 3 4 2 0 4 gggp 2 3 4 0 现在，您可以通过自举或漂浮在船上的任何方式最大化总和（lvmat）
，以获得大多数不同组合的样本。
上述重复的圈数建议示例。注意，由于依赖随机抽样，这仍然不能保证不会有仅在一个位置不同的代码对。不过，这是一个好的开始-谢谢

# install.packages("gtools") library(gtools) library(vwr) ## Available colours x <- c("W", "R", "G", "B", "P", "Y") ## Generate all possible colour combinations, for 6 colours & 4 positions body <- data.frame(permutations(n=6,r=4,v=x,repeats.allowed=T), stringsAsFactors = F) ; colnames(body) <- c("Head","Thorax","L_gaster","R_gaster") ## concatenate each colour-code to a sequence without spaces, etc m <- paste( body$Head, body$Thorax, body$L_gaster, body$R_gaster, sep="") ## set.seed(1) COLONY_SIZE <- 50 ## How many adult workers in the colony excluding the queen N_Attempts <- 1000 ## How many alternative solutions to generate - the more the better, but it takes longer ## prepare data-containers Summary <- NULL LvList <- list() for (TRY in 1:N_Attempts) {print(paste(TRY,"of",N_Attempts)) y <- sample(m, COLONY_SIZE) ## randomly sample COLONY_SIZE codes ## measure pairwise Levenshtein distances for all pair combinations Matrix <- sapply(y, function(x) levenshtein.distance(x, y)) diag(Matrix) <- NA ## eliminate self-self measure (distance = 0) Matrix[lower.tri(Matrix)] <- NA ## dist i-j = dist j-i ## store solution LvList[[TRY]] <- Matrix ## summarize each solution using three metrics: ## (i) the average pair distance (higher is better) ## (ii) the number of 'close' code pairs (those with the minimum distance of 1 - lower is better) ## (iii) the maximum number of 'close' code *pairs across all codes (lower is better) Summary <- rbind(Summary, data.frame(Mean_Distance = mean(Matrix, na.rm=T), N_close_pairs = sum(Matrix[!is.na(Matrix)]==1), N_close_pairs_per_ant = max(rowSums( Matrix==1, na.rm=T)) )) } ## ***Find the solution with the fewest pairs wiRth the lowest distance*** Summary$Mean_Distance_Rank <- rank(Summary$Mean_Distance) Summary$N_close_pairs_Rank <- rank(-Summary$N_close_pairs) Summary$N_close_pairs_per_ant_Rank <- rank(-Summary$N_close_pairs_per_ant) Summary$Rank_Total <- Summary$Mean_Distance_Rank + Summary$N_close_pairs_Rank + Summary$N_close_pairs_per_ant_Rank solution <- rownames( LvList[[which.max(Summary$Rank_Total)]] ) ## Highlight candidate solutions Colour <- rep(rgb(0,0,0,0.1,1),nrow(Summary) ) Colour [which.max(Summary$Rank_Total) ] <- "red" pairs(Summary[,c("Mean_Distance","N_close_pairs","N_close_pairs_per_ant")], col=Colour, bg=Colour, pch=21, cex=1.4) ## format into a table SOLUTION <- data.frame(Code=1:COLONY_SIZE, t(as.data.frame(sapply(solution, strsplit, "")))) colnames(SOLUTION)[2:5] <- c("Head","Thorax","L_gaster","R_gaster")

#安装程序包（“gtools”）图书馆（gtools）图书馆（vwr） ##可用颜色 x这里有一种更好的方法，它不依赖盲采样，而是将每个代码对之间的相似性表示为网络中的一条边，然后使用IGRAPHE函数MAXIMUM_ivs搜索最不相似的代码对： rm(list=ls()) library(gtools) library(igraph) ## outputfolder <- "XXXXXXXXXX" dir.create(outputfolder,showWarnings = F) setwd(outputfolder) ## Available colours x <- c("W", "R", "G", "B", "P", "Y") ## Generate all possible colour combinations, for 6 colours & 4 positions body <- data.frame(permutations(n=6,r=4,v=x,repeats.allowed=T), stringsAsFactors = F) ; colnames(body) <- c("Head","Thorax","L_gaster","R_gaster") write.table(body,file="Paint_marks_full_list.txt",col.names=T,row.names=F,quote=F,append=F) ## Generate edge list edge_list <- data.frame(comb_1=character(),comb_2=character(),similarity=character()) if (!file.exists("Edge_list.txt")){ write.table(edge_list,file="Edge_list.txt",col.names=T,row.names=F,quote=F,append=F) }else{ edge_list <- read.table("Edge_list.txt",header=T,stringsAsFactors = F) } if (nrow(edge_list)>0){ last_i <- edge_list[nrow(edge_list),"comb_1"] last_j <- edge_list[nrow(edge_list),"comb_2"] } if (!(last_i==(nrow(body)-1)&last_j==nrow(body))){ for (i in last_i:(nrow(body)-1)){ print(paste("Combination",i)) for (j in (i+1):nrow(body)){ if (i>last_i|j>last_j){ simil <- length(which(body[i,]==body[j,])) if (simil>0){ write.table(data.frame(comb_1=i,comb_2=j,similarity=simil),file="Edge_list.txt",col.names=F,row.names=F,quote=F,append=T) } } } } } ######let's make 3 graphs with edges representing overlap between combinations ### ##First graph, in which ANY overlap between two combinations is seen as an edge. Will be used to produce list of paint combination with no overlap net1 <- graph.data.frame(edge_list[c("comb_1","comb_2")],directed=F) ##Second graph, in which only overlaps of 2 or more spots is seen as an edge. Will be used to produce list of paint combinations with no more than 1 spot in common net2 <- graph.data.frame(edge_list[which(edge_list$similarity>=2),c("comb_1","comb_2")],directed=F) ##Third graph, in which only overlaps of 3 or more spots is seen as an edge. Will be used to produce list of paint combinations with no more than 2 spots in common net3 <- graph.data.frame(edge_list[which(edge_list$similarity>=3),c("comb_1","comb_2")],directed=F) #######Now let's use the ivs function to get independent vertex sets, i.e., set of vertices with no connections between any of them no_overlap_list <- largest_ivs(net1) max_one_spot_overlap_list <- largest_ivs(net2) max_two_spots_overlap_list <- largest_ivs(net3) rm（list=ls（））图书馆（gtools）图书馆（igraph） ## outputfolder听起来像你要找的是至少两个的组合。很好的实现！ library(vwr) lvmat <- sapply(y, function(x) levenshtein.distance(x, y)) > lvmat[1:5, 1:5] grrp pgpg rprr yprw gggp grrp 0 4 3 3 2 pgpg 4 0 4 4 3 rprr 3 4 0 2 4 yprw 3 4 2 0 4 gggp 2 3 4 4 0 # install.packages("gtools") library(gtools) library(vwr) ## Available colours x <- c("W", "R", "G", "B", "P", "Y") ## Generate all possible colour combinations, for 6 colours & 4 positions body <- data.frame(permutations(n=6,r=4,v=x,repeats.allowed=T), stringsAsFactors = F) ; colnames(body) <- c("Head","Thorax","L_gaster","R_gaster") ## concatenate each colour-code to a sequence without spaces, etc m <- paste( body$Head, body$Thorax, body$L_gaster, body$R_gaster, sep="") ## set.seed(1) COLONY_SIZE <- 50 ## How many adult workers in the colony excluding the queen N_Attempts <- 1000 ## How many alternative solutions to generate - the more the better, but it takes longer ## prepare data-containers Summary <- NULL LvList <- list() for (TRY in 1:N_Attempts) {print(paste(TRY,"of",N_Attempts)) y <- sample(m, COLONY_SIZE) ## randomly sample COLONY_SIZE codes ## measure pairwise Levenshtein distances for all pair combinations Matrix <- sapply(y, function(x) levenshtein.distance(x, y)) diag(Matrix) <- NA ## eliminate self-self measure (distance = 0) Matrix[lower.tri(Matrix)] <- NA ## dist i-j = dist j-i ## store solution LvList[[TRY]] <- Matrix ## summarize each solution using three metrics: ## (i) the average pair distance (higher is better) ## (ii) the number of 'close' code pairs (those with the minimum distance of 1 - lower is better) ## (iii) the maximum number of 'close' code *pairs across all codes (lower is better) Summary <- rbind(Summary, data.frame(Mean_Distance = mean(Matrix, na.rm=T), N_close_pairs = sum(Matrix[!is.na(Matrix)]==1), N_close_pairs_per_ant = max(rowSums( Matrix==1, na.rm=T)) )) } ## ***Find the solution with the fewest pairs wiRth the lowest distance*** Summary$Mean_Distance_Rank <- rank(Summary$Mean_Distance) Summary$N_close_pairs_Rank <- rank(-Summary$N_close_pairs) Summary$N_close_pairs_per_ant_Rank <- rank(-Summary$N_close_pairs_per_ant) Summary$Rank_Total <- Summary$Mean_Distance_Rank + Summary$N_close_pairs_Rank + Summary$N_close_pairs_per_ant_Rank solution <- rownames( LvList[[which.max(Summary$Rank_Total)]] ) ## Highlight candidate solutions Colour <- rep(rgb(0,0,0,0.1,1),nrow(Summary) ) Colour [which.max(Summary$Rank_Total) ] <- "red" pairs(Summary[,c("Mean_Distance","N_close_pairs","N_close_pairs_per_ant")], col=Colour, bg=Colour, pch=21, cex=1.4) ## format into a table SOLUTION <- data.frame(Code=1:COLONY_SIZE, t(as.data.frame(sapply(solution, strsplit, "")))) colnames(SOLUTION)[2:5] <- c("Head","Thorax","L_gaster","R_gaster") rm(list=ls()) library(gtools) library(igraph) ## outputfolder <- "XXXXXXXXXX" dir.create(outputfolder,showWarnings = F) setwd(outputfolder) ## Available colours x <- c("W", "R", "G", "B", "P", "Y") ## Generate all possible colour combinations, for 6 colours & 4 positions body <- data.frame(permutations(n=6,r=4,v=x,repeats.allowed=T), stringsAsFactors = F) ; colnames(body) <- c("Head","Thorax","L_gaster","R_gaster") write.table(body,file="Paint_marks_full_list.txt",col.names=T,row.names=F,quote=F,append=F) ## Generate edge list edge_list <- data.frame(comb_1=character(),comb_2=character(),similarity=character()) if (!file.exists("Edge_list.txt")){ write.table(edge_list,file="Edge_list.txt",col.names=T,row.names=F,quote=F,append=F) }else{ edge_list <- read.table("Edge_list.txt",header=T,stringsAsFactors = F) } if (nrow(edge_list)>0){ last_i <- edge_list[nrow(edge_list),"comb_1"] last_j <- edge_list[nrow(edge_list),"comb_2"] } if (!(last_i==(nrow(body)-1)&last_j==nrow(body))){ for (i in last_i:(nrow(body)-1)){ print(paste("Combination",i)) for (j in (i+1):nrow(body)){ if (i>last_i|j>last_j){ simil <- length(which(body[i,]==body[j,])) if (simil>0){ write.table(data.frame(comb_1=i,comb_2=j,similarity=simil),file="Edge_list.txt",col.names=F,row.names=F,quote=F,append=T) } } } } } ######let's make 3 graphs with edges representing overlap between combinations ### ##First graph, in which ANY overlap between two combinations is seen as an edge. Will be used to produce list of paint combination with no overlap net1 <- graph.data.frame(edge_list[c("comb_1","comb_2")],directed=F) ##Second graph, in which only overlaps of 2 or more spots is seen as an edge. Will be used to produce list of paint combinations with no more than 1 spot in common net2 <- graph.data.frame(edge_list[which(edge_list$similarity>=2),c("comb_1","comb_2")],directed=F) ##Third graph, in which only overlaps of 3 or more spots is seen as an edge. Will be used to produce list of paint combinations with no more than 2 spots in common net3 <- graph.data.frame(edge_list[which(edge_list$similarity>=3),c("comb_1","comb_2")],directed=F) #######Now let's use the ivs function to get independent vertex sets, i.e., set of vertices with no connections between any of them no_overlap_list <- largest_ivs(net1) max_one_spot_overlap_list <- largest_ivs(net2) max_two_spots_overlap_list <- largest_ivs(net3)