R 在ggplot2中绘制重新调平的成对距离矩阵
加载库并创建可复制的示例R 在ggplot2中绘制重新调平的成对距离矩阵,r,ggplot2,tidyr,pairwise,R,Ggplot2,Tidyr,Pairwise,加载库并创建可复制的示例 #Load libraries set.seed(123) library(tidyr) library(ggplot2) #Creating a fake pairwise matrix locs <- 5 tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs) tmp[upper.tri(tmp,diag = T)] <- NA colnames(tmp) <- LETTERS
#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)
#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <- LETTERS[1:locs]
tmp
#Converting into a data frame
tmp1 <- as.data.frame(cbind(rownames(tmp),as.data.frame(tmp)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL
head(tmp1)
#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]
tmp1
#Making a tiled plot based on default levels
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")
#加载库
种子集(123)
图书馆(tidyr)
图书馆(GG2)
#创建伪成对矩阵
从tmp
和full之后开始创建LOC。如果运行:
reordered_mat <- full.matrix[match(my.order, rownames(full.matrix)),
match(my.order, colnames(full.matrix))]
lt_reordered_mat <- replace(reordered_mat, !lower.tri(reordered_mat), NA)
tmp1 <- as.data.frame(cbind(rownames(lt_reordered_mat),as.data.frame(lt_reordered_mat)))
当Mike H.提供他的答案时,我创建了一个稍微不同的解决方案。我认为他的答案更好,因为它更简洁,不使用for循环
#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)
#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <- LETTERS[1:locs]
tmp
#Converting into a data frame
tmp1 <- as.data.frame(cbind(rownames(tmp),as.data.frame(tmp)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL
head(tmp1)
#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]
tmp1
#Making a tiled plot based on default levels
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")
#biological order
my.order <- c("A","C","D","B","E")
my.order
#re-leveling
tmp1$locA <- factor(tmp1$locA, levels = my.order,ordered = T)
tmp1$locB <- factor(tmp1$locB, levels = my.order,ordered = T)
tmp1
#the releveled plot
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")
#reordering tmp by my.order and replacing NAs with zero
x <- tmp
x<- x[my.order,my.order]
x[is.na(x)] <- 0
x
#identifying which values switch from the lower matrix to the upper matrix
y <- x
y[y !=0] <- 1
#figuring out which side of the matrix that needs to be switched to switch locA and locB
if(sum(y[lower.tri(y)]) > sum(y[upper.tri(y)])){ y[lower.tri(y)] <- 0 }
if(sum(y[lower.tri(y)]) == sum(y[upper.tri(y)])){ y[lower.tri(y)] <- 0 }
if(sum(y[lower.tri(y)]) < sum(y[upper.tri(y)])){ y[upper.tri(y)] <- 0 }
#Converting t into a long form data frame
fm <- as.data.frame(cbind(rownames(y),as.data.frame(y)))
names(fm)[1] <- "locA"
rownames(fm) <- NULL
fm <- gather(fm, key = "locB",value = "value",-locA)
#identifying which need to be switched and created an identifer to merge with
fm$action <- ifelse(fm$value == 1,"switch","keep")
fm$both <- paste0(fm$locA,fm$locB)
fm
#creating the same identifer in tmp1
tmp1$both <- paste0(tmp1$locA,tmp1$locB)
head(tmp1)
#merging the fm and tmp1 together
tmp2 <- merge(x = fm[,4:5],y = tmp1,by = "both")
tmp2
#using a for loop to make the necessary switches
i <- NULL
for(i in 1:nrow(tmp2)){
if(tmp2$action[i] == "switch"){
A <- as.character(tmp2$locA[i])
B <- as.character(tmp2$locB[i])
tmp2$locA[i] <- B
tmp2$locB[i] <- A
}
}
tmp2
#re-leveling to my order
tmp2$locA <- factor(tmp2$locA, levels = my.order,ordered = T)
tmp2$locB <- factor(tmp2$locB, levels = my.order,ordered = T)
tmp2
#now the graphic
ggplot(tmp2, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")
#加载库
种子集(123)
图书馆(tidyr)
图书馆(GG2)
#创建伪成对矩阵
locs我认为您需要为基础数据中的这两对进行locA
和locB
切换。是的,您是对的。对于这个简单的示例,我可以在其中硬编码修复,但对于我的实际数据集,有26个loc,因此硬编码修复变得更加麻烦。我正在寻找一个通用的解决方案,可以在任何成对距离矩阵中解决这个问题。您能更上游地更改基础数据吗?在您的示例中,当您分配colnames(tmp).
和rownames(tmp).
时,如果您分配了正确的顺序(c(“A”、“c”、“D”、“B”、“E”)
),它应该在您重新分配顺序后工作。对于假示例,是,它会工作,但是计算两两距离矩阵的R函数通常提供一个矩阵,其中列和行的顺序是以字母数字的方式进行的,因此在创建tmp的上游更改colnames()和rownames()可以解决这个问题。在本例中,tmp的最终形式是我的数据集的样子。这种格式化数据在社区生态学和群体遗传学中很常见。我想你的已满。矩阵
谢谢,这就是我要找的!
reordered_mat <- full.matrix[match(my.order, rownames(full.matrix)),
match(my.order, colnames(full.matrix))]
lt_reordered_mat <- replace(reordered_mat, !lower.tri(reordered_mat), NA)
tmp1 <- as.data.frame(cbind(rownames(lt_reordered_mat),as.data.frame(lt_reordered_mat)))
#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)
#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <- LETTERS[1:locs]
x <- tmp
x[is.na(x)] <- 0
y <- t(tmp)
y[is.na(y)] <- 0
full.matrix <- x+y
my.order <- c("A","C","D","B","E")
reordered_mat <- full.matrix[match(my.order, rownames(full.matrix)),
match(my.order, colnames(full.matrix))]
lt_reordered_mat <- replace(reordered_mat, !lower.tri(reordered_mat), NA)
tmp1 <- as.data.frame(cbind(rownames(lt_reordered_mat),as.data.frame(lt_reordered_mat)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL
#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]
#re-leveling
tmp1$locA <- factor(tmp1$locA, levels = my.order,ordered = T)
tmp1$locB <- factor(tmp1$locB, levels = my.order,ordered = T)
#the releveled plot
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")
#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)
#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <- LETTERS[1:locs]
tmp
#Converting into a data frame
tmp1 <- as.data.frame(cbind(rownames(tmp),as.data.frame(tmp)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL
head(tmp1)
#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]
tmp1
#Making a tiled plot based on default levels
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")
#biological order
my.order <- c("A","C","D","B","E")
my.order
#re-leveling
tmp1$locA <- factor(tmp1$locA, levels = my.order,ordered = T)
tmp1$locB <- factor(tmp1$locB, levels = my.order,ordered = T)
tmp1
#the releveled plot
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")
#reordering tmp by my.order and replacing NAs with zero
x <- tmp
x<- x[my.order,my.order]
x[is.na(x)] <- 0
x
#identifying which values switch from the lower matrix to the upper matrix
y <- x
y[y !=0] <- 1
#figuring out which side of the matrix that needs to be switched to switch locA and locB
if(sum(y[lower.tri(y)]) > sum(y[upper.tri(y)])){ y[lower.tri(y)] <- 0 }
if(sum(y[lower.tri(y)]) == sum(y[upper.tri(y)])){ y[lower.tri(y)] <- 0 }
if(sum(y[lower.tri(y)]) < sum(y[upper.tri(y)])){ y[upper.tri(y)] <- 0 }
#Converting t into a long form data frame
fm <- as.data.frame(cbind(rownames(y),as.data.frame(y)))
names(fm)[1] <- "locA"
rownames(fm) <- NULL
fm <- gather(fm, key = "locB",value = "value",-locA)
#identifying which need to be switched and created an identifer to merge with
fm$action <- ifelse(fm$value == 1,"switch","keep")
fm$both <- paste0(fm$locA,fm$locB)
fm
#creating the same identifer in tmp1
tmp1$both <- paste0(tmp1$locA,tmp1$locB)
head(tmp1)
#merging the fm and tmp1 together
tmp2 <- merge(x = fm[,4:5],y = tmp1,by = "both")
tmp2
#using a for loop to make the necessary switches
i <- NULL
for(i in 1:nrow(tmp2)){
if(tmp2$action[i] == "switch"){
A <- as.character(tmp2$locA[i])
B <- as.character(tmp2$locB[i])
tmp2$locA[i] <- B
tmp2$locB[i] <- A
}
}
tmp2
#re-leveling to my order
tmp2$locA <- factor(tmp2$locA, levels = my.order,ordered = T)
tmp2$locB <- factor(tmp2$locB, levels = my.order,ordered = T)
tmp2
#now the graphic
ggplot(tmp2, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
geom_tile(color="black")+
geom_text(size=5,color="white")