R 在ggplot2中绘制重新调平的成对距离矩阵

R 在ggplot2中绘制重新调平的成对距离矩阵,r,ggplot2,tidyr,pairwise,R,Ggplot2,Tidyr,Pairwise,加载库并创建可复制的示例 #Load libraries set.seed(123) library(tidyr) library(ggplot2) #Creating a fake pairwise matrix locs <- 5 tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs) tmp[upper.tri(tmp,diag = T)] <- NA colnames(tmp) <- LETTERS

加载库并创建可复制的示例

#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)

#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <-  LETTERS[1:locs]
tmp

#Converting into a data frame
tmp1 <- as.data.frame(cbind(rownames(tmp),as.data.frame(tmp)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL
head(tmp1)

#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]
tmp1

#Making a tiled plot based on default levels
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")
#加载库
种子集(123)
图书馆(tidyr)
图书馆(GG2)
#创建伪成对矩阵

tmp
full之后开始创建LOC。如果运行:

reordered_mat <- full.matrix[match(my.order, rownames(full.matrix)), 
                             match(my.order, colnames(full.matrix))]

lt_reordered_mat <- replace(reordered_mat, !lower.tri(reordered_mat), NA)

tmp1 <- as.data.frame(cbind(rownames(lt_reordered_mat),as.data.frame(lt_reordered_mat)))

当Mike H.提供他的答案时,我创建了一个稍微不同的解决方案。我认为他的答案更好,因为它更简洁,不使用for循环

#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)

#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <-  LETTERS[1:locs]
tmp

#Converting into a data frame
tmp1 <- as.data.frame(cbind(rownames(tmp),as.data.frame(tmp)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL
head(tmp1)

#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]
tmp1

#Making a tiled plot based on default levels
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")

#biological order
my.order <- c("A","C","D","B","E")
my.order

#re-leveling
tmp1$locA <- factor(tmp1$locA, levels = my.order,ordered = T)
tmp1$locB <- factor(tmp1$locB, levels = my.order,ordered = T)
tmp1

#the releveled plot
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")

#reordering tmp by my.order and replacing NAs with zero
x <- tmp
x<- x[my.order,my.order]
x[is.na(x)] <- 0
x

#identifying which values switch from the lower matrix to the upper matrix
y <- x
y[y !=0] <- 1

#figuring out which side of the matrix that needs to be switched to switch locA and locB
if(sum(y[lower.tri(y)]) > sum(y[upper.tri(y)])){ y[lower.tri(y)] <- 0 }
if(sum(y[lower.tri(y)]) == sum(y[upper.tri(y)])){ y[lower.tri(y)] <- 0 }
if(sum(y[lower.tri(y)]) < sum(y[upper.tri(y)])){ y[upper.tri(y)] <- 0 }

#Converting t into a long form data frame
fm <- as.data.frame(cbind(rownames(y),as.data.frame(y)))
names(fm)[1] <- "locA"
rownames(fm) <- NULL
fm <- gather(fm, key = "locB",value = "value",-locA)

#identifying which need to be switched and created an identifer to merge with
fm$action <- ifelse(fm$value == 1,"switch","keep")
fm$both <- paste0(fm$locA,fm$locB)
fm

#creating the same identifer in tmp1
tmp1$both <- paste0(tmp1$locA,tmp1$locB)
head(tmp1)

#merging the fm and tmp1 together
tmp2 <- merge(x = fm[,4:5],y = tmp1,by = "both")
tmp2

#using a for loop to make the necessary switches
i <- NULL
for(i in 1:nrow(tmp2)){
  if(tmp2$action[i] == "switch"){
    A <- as.character(tmp2$locA[i])
    B <- as.character(tmp2$locB[i])
    tmp2$locA[i] <- B
    tmp2$locB[i] <- A
  }
}
tmp2

#re-leveling to my order
tmp2$locA <- factor(tmp2$locA, levels = my.order,ordered = T)
tmp2$locB <- factor(tmp2$locB, levels = my.order,ordered = T)
tmp2

#now the graphic
ggplot(tmp2, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")
#加载库
种子集(123)
图书馆(tidyr)
图书馆(GG2)
#创建伪成对矩阵

locs我认为您需要为基础数据中的这两对进行
locA
locB
切换。是的,您是对的。对于这个简单的示例,我可以在其中硬编码修复,但对于我的实际数据集,有26个loc,因此硬编码修复变得更加麻烦。我正在寻找一个通用的解决方案,可以在任何成对距离矩阵中解决这个问题。您能更上游地更改基础数据吗?在您的示例中,当您分配
colnames(tmp).
rownames(tmp).
时,如果您分配了正确的顺序(
c(“A”、“c”、“D”、“B”、“E”)
),它应该在您重新分配顺序后工作。对于假示例,是,它会工作,但是计算两两距离矩阵的R函数通常提供一个矩阵,其中列和行的顺序是以字母数字的方式进行的,因此在创建tmp的上游更改colnames()和rownames()可以解决这个问题。在本例中,tmp的最终形式是我的数据集的样子。这种格式化数据在社区生态学和群体遗传学中很常见。我想你的
已满。矩阵
谢谢,这就是我要找的!
reordered_mat <- full.matrix[match(my.order, rownames(full.matrix)), 
                             match(my.order, colnames(full.matrix))]

lt_reordered_mat <- replace(reordered_mat, !lower.tri(reordered_mat), NA)

tmp1 <- as.data.frame(cbind(rownames(lt_reordered_mat),as.data.frame(lt_reordered_mat)))
#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)

#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <-  LETTERS[1:locs]

x <- tmp
x[is.na(x)] <- 0
y <- t(tmp)
y[is.na(y)] <- 0
full.matrix <- x+y


my.order <- c("A","C","D","B","E")

reordered_mat <- full.matrix[match(my.order, rownames(full.matrix)), 
                             match(my.order, colnames(full.matrix))]
lt_reordered_mat <- replace(reordered_mat, !lower.tri(reordered_mat), NA)
tmp1 <- as.data.frame(cbind(rownames(lt_reordered_mat),as.data.frame(lt_reordered_mat)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL

#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]


#re-leveling
tmp1$locA <- factor(tmp1$locA, levels = my.order,ordered = T)
tmp1$locB <- factor(tmp1$locB, levels = my.order,ordered = T)

#the releveled plot
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")
#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)

#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <-  LETTERS[1:locs]
tmp

#Converting into a data frame
tmp1 <- as.data.frame(cbind(rownames(tmp),as.data.frame(tmp)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL
head(tmp1)

#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]
tmp1

#Making a tiled plot based on default levels
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")

#biological order
my.order <- c("A","C","D","B","E")
my.order

#re-leveling
tmp1$locA <- factor(tmp1$locA, levels = my.order,ordered = T)
tmp1$locB <- factor(tmp1$locB, levels = my.order,ordered = T)
tmp1

#the releveled plot
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")

#reordering tmp by my.order and replacing NAs with zero
x <- tmp
x<- x[my.order,my.order]
x[is.na(x)] <- 0
x

#identifying which values switch from the lower matrix to the upper matrix
y <- x
y[y !=0] <- 1

#figuring out which side of the matrix that needs to be switched to switch locA and locB
if(sum(y[lower.tri(y)]) > sum(y[upper.tri(y)])){ y[lower.tri(y)] <- 0 }
if(sum(y[lower.tri(y)]) == sum(y[upper.tri(y)])){ y[lower.tri(y)] <- 0 }
if(sum(y[lower.tri(y)]) < sum(y[upper.tri(y)])){ y[upper.tri(y)] <- 0 }

#Converting t into a long form data frame
fm <- as.data.frame(cbind(rownames(y),as.data.frame(y)))
names(fm)[1] <- "locA"
rownames(fm) <- NULL
fm <- gather(fm, key = "locB",value = "value",-locA)

#identifying which need to be switched and created an identifer to merge with
fm$action <- ifelse(fm$value == 1,"switch","keep")
fm$both <- paste0(fm$locA,fm$locB)
fm

#creating the same identifer in tmp1
tmp1$both <- paste0(tmp1$locA,tmp1$locB)
head(tmp1)

#merging the fm and tmp1 together
tmp2 <- merge(x = fm[,4:5],y = tmp1,by = "both")
tmp2

#using a for loop to make the necessary switches
i <- NULL
for(i in 1:nrow(tmp2)){
  if(tmp2$action[i] == "switch"){
    A <- as.character(tmp2$locA[i])
    B <- as.character(tmp2$locB[i])
    tmp2$locA[i] <- B
    tmp2$locB[i] <- A
  }
}
tmp2

#re-leveling to my order
tmp2$locA <- factor(tmp2$locA, levels = my.order,ordered = T)
tmp2$locB <- factor(tmp2$locB, levels = my.order,ordered = T)
tmp2

#now the graphic
ggplot(tmp2, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")