Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/66.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 分层聚类的堆叠条形图(树状图)_R_Bar Chart_Dendrogram_Dendextend - Fatal编程技术网

R 分层聚类的堆叠条形图(树状图)

R 分层聚类的堆叠条形图(树状图),r,bar-chart,dendrogram,dendextend,R,Bar Chart,Dendrogram,Dendextend,我正试图得到类似的结果,但不幸的是,我找不到任何软件包可以让我用树状图绘制堆叠条形图,如下图所示: 有人知道怎么做吗?这是第一次尝试答案,但要想让它真正起作用还需要更多的工作。具体来说,需要更仔细地考虑元素位置的对齐(以及它们的顺序) # library library(ggplot2) # create a dataset specie=c(rep("sorgho" , 3) , rep("poacee" , 3) , rep("banana" , 3) , rep("triticum"

我正试图得到类似的结果,但不幸的是,我找不到任何软件包可以让我用树状图绘制堆叠条形图,如下图所示:


有人知道怎么做吗?

这是第一次尝试答案,但要想让它真正起作用还需要更多的工作。具体来说,需要更仔细地考虑元素位置的对齐(以及它们的顺序)

# library
library(ggplot2)

# create a dataset
specie=c(rep("sorgho" , 3) , rep("poacee" , 3) , rep("banana" , 3) , rep("triticum" , 3) )
condition=rep(c("normal" , "stress" , "Nitrogen") , 4)
value=abs(rnorm(12 , 0 , 15))
data=data.frame(specie,condition,value)


dend <- as.dendrogram(hclust(dist(with(data, tapply(value, specie, mean)))))

data$specie <- factor(data$specie, levels = labels(dend))

# Stacked Percent
library(dendextend)
p1 <- ggplot(dend, horiz = T) 
p2 <- ggplot(data, aes(fill=condition, y=value, x=specie)) + 
    geom_bar( stat="identity", position="fill") + coord_flip()

library(cowplot)
plot_grid(p1, p2, align = "h")
#库
图书馆(GG2)
#创建数据集
物种=c(代表(“高粱”,3),代表(“波西”,3),代表(“香蕉”,3),代表(“小麦”,3))
条件=代表(c(“正常”、“应力”、“氮”),4)
值=绝对值(rnorm(12,0,15))
数据=数据帧(种类、条件、值)

dend几乎三年后,仍然没有能够将堆叠条形图与ggplot中的分层聚类相结合的软件包(至少我知道)。以下是我基于该帖子的解决方案:

库(tidyverse)
图书馆(芬戈)
图书馆(素食主义者)
图书馆(GGO)
图书馆(Dendestend)
图书馆(ggsci)
图书馆(cowplot)
##生成示例数据####
种子集(500)

组合矩阵这是我的罗马脚本版本。它显示条内的百分比,并使用
vegan::reorder.hclust
对树状图的分支重新排序,以便第一列值最高的行倾向于放在顶部,最后一列值最高的行倾向于放在底部。我还删除了额外的边距、记号和轴

库(tidyverse)
图书馆(GGO)
图书馆(素食主义者)
库(颜色空间)
图书馆(cowplot)
t=读取表格(text=“西班牙_EN 0.028152 0.971828 0.000010 0.000010
挪威中石器时代0.784705 0.083387 0.000010 0.131898
俄罗斯_Sunghir4 0.000010 0.000010 0.999970 0.000010
伊朗0.000010 0.492331 0.383227 0.124433
俄罗斯魔鬼洞穴0.000010 0.000010 0.000010 0.999970
意大利北维拉布鲁纳汞柱0.999970.0000100.0000100.000010
俄罗斯卡雷利亚0.527887 0.133179 0.072342 0.266593
俄罗斯尤亚纳乌上涨0.000010 0.000014 0.999660.000010
佐治亚州科蒂亚斯0.000010 0.537322 0.381313 0.081355
中国海岸岛0.0000100.0000100.1486520.851328
土耳其0.000010 0.999970 0.000010 0.000010
美国古代贝林吉安0.008591 0.000010 0.095008 0.896391
俄罗斯西德尔基诺汞柱0.624076 0.045350 0.105615 0.224958
俄罗斯科利马M 0.020197 0.000010 0.000010 0.979783
中国天元0.000010 0.000010 0.423731 0.576249“,行名称=1)
hc=hclust(距离(t),方法=“病房D2”)
hc=重新排序(hc,wts=-as.matrix(t)%*%seq(ncol(t))^2)#素食者::重新排序.hclust
tree=ggdendro::dendro_数据(如树状图(hc),type=“rectangle”)
p1=ggplot(ggdendro::段(树))+
几何图形段(aes(x=y,y=x,xend=yend,yend=xend),lineend=“round”,尺寸=0.4)+
scale_x_continuous(expand=expansion(add=c(0.01)))+#不要在顶级节点之间裁剪半行
刻度连续(极限=0.5+c(0,nrow(t)),扩展=c(0,0))+
主题(
axis.text=元素_blank(),
axis.ticks=元素_blank(),
axis.ticks.length=单位(0,“pt”),#删除ticks占用的额外空间
axis.title=元素_blank(),
panel.background=element_rect(fill=“white”),
panel.grid=element\u blank(),
绘图页边距=边距(5,5,5,0)
)
t=t[hc$标签[hc$订单],]
t2=数据帧(V1=行名(t)[row(t)],V2=列名(t)[col(t)],V3=未命名(do.call(c,t)))
实验室=圆形(100*t2$V3)
实验室[lab==0]=“”
p2=ggplot(t2,aes(x=因子(V1,级别=行名(t)),y=V3,填充=V2))+
几何图形条(stat=“identity”,宽度=1,位置=位置填充(反向=T))+
几何图形文本(aes(标签=实验室),位置=位置堆栈(vjust=0.5,反向=T),大小=3.5)+
coord_flip()+
比例x离散(扩展=c(0,0))+
比例y离散(扩展=c(0,0))+
刻度填充手册(数值=颜色空间::十六进制(HSV(头部(0360,长度输出=ncol(t)+1),-1),.5,1)))+
主题(
axis.text=元素\文本(color=“black”,size=11),
axis.text.x=元素_blank(),
axis.ticks=元素_blank(),
axis.title=元素_blank(),
legend.position=“无”,
绘图。边距=边距(5,0,5,5)
)
cowplot::plot_网格(p2,p1,rel_宽度=c(1,4))
保存(“a.png”,高度=0.25*nrow(t),宽度=7)

还有
ggh4x
中的
scale\ux\u树状图
scale\uy\u树状图
,它们使用
ggdendro::dendro\u数据
:。然而,我无法让他们使用
coord\u flip
处理水平堆叠的钢筋

库(ggh4x)
t=水头(美国试验,20)
t2=数据帧(V1=行名(t)[row(t)],V2=列名(t)[col(t)],V3=未命名(do.call(c,t)))
hc=hclust(距离(t))
ggplot(t2,aes(x=factor(V1,level=rownames(t)),y=V3,fill=V2))+
几何图形(stat=“identity”,宽度=1,位置=position\u堆栈(反向=F))+
几何图形文本(aes(标签=圆形(V3)),位置=位置堆栈(vjust=0.5,反向=F),大小=3)+
比例x树状图(hclust=hc)+
比例y离散(扩展=c(0,0))+
#刻度填充手册(数值=颜色空间::十六进制(HSV(头部(0360,长度输出=ncol(t)+1),-1),.5,1)))+
主题(
axis.text=元素\文本(color=“black”,size=11),
axis.text.x=元素\文本(角度=90,hjust=1,vjust=0.5),
axis.text.y=元素_blank(),
axis.ticks=元素_blank(),
轴刻度长度=单位(14,“pt”),#树状图高度
axis.title=元素_blank(),
图例.对正=c(0,1),
图例.key=element_rect(fill=NA),#删除颜色方块周围的灰色边框
图例.余量=余量(-6,0,0,0),
图例位置=c(0,1),
legend.title=element_blank(),
panel.background=element_rect(fill=“white”),
绘图。边距=边距(5,0,5,5)
)
ggsave(“a.png”,高度=6,宽度=6)

编辑:第三个选项是使用
循环

库(圆圈)
图书馆(素食主义者)#用于重新订购.hclust(可能被“系列化”掩盖)
图书馆(Dendestend)#用于彩色分支机构
t=read.table(text=“Kalmyk 0.119357 0.725057 0.000010 0.037803 0.117774
吉尔吉斯共和国0.039367 0.512079 0.230150 0.095038 0.123366
阿尔泰安努切坎0.034095 0.000010 0.919478 0.000010 0.046407
阿塞拜疆0.051638 0.004671 0.010727 0.902646 0.030318
乌兹别克语0.102725 0.273261 0.001854 0.452126 0.170033
萨拉尔0.000010 0.539636 0.460334 0.000010 0.000010
喀山鞑靼0.113456 0.057026 0.000010 0
library(tidyverse)
library(phangorn)
library(vegan)
library(ggdendro)
library(dendextend)
library(ggsci)
library(cowplot)

## generate example data ####
set.seed(500)
combined_matrix <- data.frame(a=runif(14, 0, 33), b=runif(14, 0, 33), c=runif(14, 0, 33))
combined_matrix$d <- 100 - combined_matrix$a - combined_matrix$b - combined_matrix$c
row.names(combined_matrix) <- paste0("s", seq(1,14))

# vegan::vegdist() to calculate Bray-Curtis distance matrix
dm <- vegdist(combined_matrix, method = "bray")
# calculate UPGMA tree with phangorn::upgma() and convert to dendrogram
dendUPGMA <- as.dendrogram(upgma(dm))
plot_dendro_bars_v <- function(df, dend, taxonomy) {
  #convert dendrogram to segment data
  dend_data <- dendro_data(dend, type="rectangle")
  segment_data <- dend_data[["segments"]]
  #sample positions df
  sample_pos_table <- with(dend_data$labels, 
                           data.frame(x_center = x, sample = as.character(label), width = 0.9))
  #prepare input data
  ptdf <- rownames_to_column(df, var = "sample") %>%
    pivot_longer(-sample, names_to = taxonomy, values_to = "Frequency") %>%
    group_by(sample) %>%
    mutate(Frequency = Frequency/100,
           ymax = cumsum(Frequency/sum(Frequency)),
           ymin = ymax - Frequency/sum(Frequency),
           y_center = ymax-(Frequency/2)) %>%
    left_join(sample_pos_table) %>%
    mutate(xmin = x_center-width/2,
           xmax = x_center+width/2)
  #plot stacked bars
  axis_limits <- with(sample_pos_table, 
                      c(min(x_center - 0.5 * width), max(x_center + 0.5 * width))) + 
    0.1 * c(-1, 1) # extra spacing: 0.1
  plt_hbars <- ggplot(ptdf, 
                      aes_string(x = "x_center", y = "y_center", fill = taxonomy, xmin = "xmin", xmax = "xmax",
                                 height = "Frequency", width = "width")) + 
    geom_tile() +
    geom_rect(ymin = 0, ymax = 1, color = "black", fill = "transparent") +
    scale_fill_rickandmorty() +
    scale_y_continuous(expand = c(0, 0)) + 
    # For the y axis, alternatively set the labels as: gene_position_table$gene
    scale_x_continuous(breaks = sample_pos_table[, "x_center"], 
                       labels = sample_pos_table$sample,
                       limits = axis_limits, 
                       expand = c(0, 0)) + 
    labs(x = "", y = "Frequency") +
    theme_bw() +
    theme(# margin: top, right, bottom, and left
      plot.margin = unit(c(-0.9, 0.2, 1, 0.2), "cm"), 
      panel.grid.minor = element_blank())
  #plot dendrogram
  plt_dendr <- ggplot(segment_data) + 
    geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
    scale_y_continuous(expand = c(0, 0.05)) + 
    scale_x_continuous(breaks = sample_pos_table$x_center, 
                       labels = rep("", nrow(sample_pos_table)), 
                       limits = axis_limits, 
                       expand = c(0, 0)) + 
    labs(x = "", y = "Distance", colour = "", size = "") +
    theme_bw() + 
    theme(panel.grid.minor = element_blank(),
          panel.grid.major = element_blank())
  #combine plots
  comb <- plot_grid(plt_dendr, plt_hbars, align = 'v', ncol = 1, axis = "lr", rel_heights = c(0.3, 1))
  comb
}
plot_dendro_bars_v(df = combined_matrix, dend = dendUPGMA, taxonomy = "example")
  plot_dendro_bars_h <- function(df, dend, taxonomy) {
  #convert dendrogram to segemnt data
  dend_data <- dendro_data(dend, type="rectangle")
  segment_data <- with(segment(dend_data), 
                       data.frame(x = y, y = x, xend = yend, yend = xend))
  #sample positions df
  sample_pos_table <- with(dend_data$labels, 
                           data.frame(y_center = x, sample = as.character(label), height = 0.9))
  #prepare input data
  ptdf <- rownames_to_column(df, var = "sample") %>%
    pivot_longer(-sample, names_to = taxonomy, values_to = "Frequency") %>%
    group_by(sample) %>%
    mutate(Frequency = Frequency/100,
           xmax = cumsum(Frequency/sum(Frequency)),
           xmin = xmax - Frequency/sum(Frequency),
           x_center = xmax-(Frequency/2)) %>%
    left_join(sample_pos_table) %>%
    mutate(ymin = y_center-height/2,
           ymax = y_center+height/2)
  #plot stacked bars
  axis_limits <- with(sample_pos_table, 
                      c(min(y_center - 0.5 * height), max(y_center + 0.5 * height))) + 
    0.1 * c(-1, 1) # extra spacing: 0.1
  plt_hbars <- ggplot(ptdf, 
                      aes_string(x = "x_center", y = "y_center", fill = taxonomy, ymin = "ymin", ymax = "ymax",
                                 height = "height", width = "Frequency")) + 
    geom_tile() +
    geom_rect(xmin = 0, xmax = 1, color = "black", fill = "transparent") +
    scale_fill_rickandmorty() +
    scale_x_continuous(expand = c(0, 0)) + 
    # For the y axis, alternatively set the labels as: gene_position_table$gene
    scale_y_continuous(breaks = sample_pos_table[, "y_center"], 
                       labels = rep("", nrow(sample_pos_table)),
                       limits = axis_limits, 
                       expand = c(0, 0)) + 
    labs(x = "Frequency", y = "") +
    theme_bw() +
    theme(# margin: top, right, bottom, and left
      plot.margin = unit(c(1, 0.2, 0.2, -0.9), "cm"), 
      panel.grid.minor = element_blank())
  #plot dendrogram
  plt_dendr <- ggplot(segment_data) + 
    geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
    scale_x_reverse(expand = c(0, 0.05)) + 
    scale_y_continuous(breaks = sample_pos_table$y_center, 
                       labels = sample_pos_table$sample, 
                       limits = axis_limits, 
                       expand = c(0, 0)) + 
    labs(x = "Distance", y = "", colour = "", size = "") +
    theme_bw() + 
    theme(panel.grid.minor = element_blank(),
          panel.grid.major = element_blank())
  #combine plots
  comb <- plot_grid(plt_dendr, plt_hbars, align = 'h', rel_widths = c(0.3, 1))
  return(comb)
}
plot_dendro_bars_h(df = combined_matrix, dend = dendUPGMA, taxonomy = "example")