R 高亮显示ggplot堆叠条形图中的一个因子

R 高亮显示ggplot堆叠条形图中的一个因子,r,ggplot2,R,Ggplot2,我有几项实验的数据,这些实验给出了一个物种的DNA在样本中的比例,我想用叠加条形图来表示。示例数据是 sample_df <- structure(list(sample = c("R17108_BSSE_QGF_132757_HHWVVDRXX_1_G44937_CCGTGAAG_CAGTGGAT_S29_L001", "R17108_BSSE_QGF_132757_HHWVVDRXX_1_G44937_CCGTGAAG_CAGTGGAT_S29_L001

我有几项实验的数据,这些实验给出了一个物种的DNA在样本中的比例,我想用叠加条形图来表示。示例数据是

sample_df <- structure(list(sample = c("R17108_BSSE_QGF_132757_HHWVVDRXX_1_G44937_CCGTGAAG_CAGTGGAT_S29_L001", 
"R17108_BSSE_QGF_132757_HHWVVDRXX_1_G44937_CCGTGAAG_CAGTGGAT_S29_L001", 
"R17108_BSSE_QGF_132757_HHWVVDRXX_1_G44937_CCGTGAAG_CAGTGGAT_S29_L001", 
"R18676", "R18676", "R23399_COW6673A59_S33", "R23399_COW6673A59_S33", 
"R23464_COW5599A32_S33", "R23464_COW5599A32_S33", "R23464_COW5599A32_S33", 
"R24033_concatreseq", "R24033_concatreseq", "R24033_concatreseq", 
"R24033_concatreseq", "R24033_concatreseq", "R24033_concatreseq", 
"R24033_concatreseq", "R24033", "R24033", "R24033", "R24033", 
"R24033", "R24033", "R30216_concatreseq", "R30216_concatreseq", 
"R30216", "R30216", "R31417_concatreseq", "R31417", "R32064", 
"R32064", "R32064", "R32064", "R4752_BSSE_QGF_132888_HHWVVDRXX_1_G45072_CAGGAGCC_GTCCAATC_S159_L001", 
"R4752_BSSE_QGF_132888_HHWVVDRXX_1_G45072_CAGGAGCC_GTCCAATC_S159_L001", 
"R4752_BSSE_QGF_132888_HHWVVDRXX_1_G45072_CAGGAGCC_GTCCAATC_S159_L001", 
"R4752_BSSE_QGF_132888_HHWVVDRXX_1_G45072_CAGGAGCC_GTCCAATC_S159_L001", 
"R4752_BSSE_QGF_132888_HHWVVDRXX_1_G45072_CAGGAGCC_GTCCAATC_S159_L001", 
"R4775_LFO46Pool105_3311__L5_ACCACTGT_L005", "R4775_LFO46Pool105_3311__L5_ACCACTGT_L005"
), name = c("Microbacterium sp. LKL04", "Microbacterium oleivorans", 
"Mycobacterium tuberculosis", "Staphylococcus cohnii", "Mycobacterium tuberculosis", 
"Paraburkholderia fungorum", "Paraburkholderia xenovorans", "Paraburkholderia fungorum", 
"Paraburkholderia aromaticivorans", "Paraburkholderia xenovorans", 
"Bacillus safensis", "Bacillus sp. WP8", "Bacillus sp. PAMC28571", 
"Bacillus sp. PAMC22265", "Bacillus pumilus", "Bacillus altitudinis", 
"Bacillus subtilis", "Bacillus safensis", "Bacillus sp. WP8", 
"Bacillus sp. PAMC28571", "Bacillus sp. PAMC22265", "Bacillus pumilus", 
"Bacillus altitudinis", "Mycobacterium avium", "Mycobacterium tuberculosis", 
"Mycobacterium avium", "Mycobacterium tuberculosis", "Mycobacterium avium", 
"Mycobacterium avium", "Staphylococcus aureus", "Paenibacillus sp. 32O-W", 
"Mycobacterium tuberculosis", "Homo sapiens", "Ralstonia pickettii", 
"Ralstonia mannitolilytica", "Ralstonia insidiosa", "Ralstonia solanacearum", 
"Mycobacterium tuberculosis", "Paenibacillus naphthalenovorans", 
"Paenibacillus sp. B01"), fraction_total_reads = c(0.29347, 0.09071, 
0.46242, 0.6525, 0.32403, 0.92541, 0.01772, 0.8842, 0.04011, 
0.01561, 0.72733, 0.02744, 0.11121, 0.02673, 0.03845, 0.02282, 
0.01176, 0.73674, 0.02711, 0.12122, 0.01858, 0.03677, 0.02115, 
0.97964, 0.01579, 0.98397, 0.01227, 0.9907, 0.99348, 0.43967, 
0.01337, 0.4288, 0.02825, 0.54439, 0.08077, 0.05916, 0.01978, 
0.23135, 0.70247, 0.02424)), row.names = c(NA, -40L), class = c("tbl_df", 
"tbl", "data.frame"))
得到

问题是我主要对结核分枝杆菌感兴趣,但它的颜色在其他分枝杆菌中经常消失,所以它并不突出。有没有一种方法可以为一个物种指定一种特定的不同颜色或背景或边界,而不必为所有物种指定颜色?我的真实数据超过1000种,这将是非常劳动密集型的。在其他地方,我看到它建议我创建一个这样的新专栏

sample_df2 <- sample_df %>%
    mutate(is_mtb = ifelse(name == "Mycobacterium tuberculosis", TRUE, FALSE))
但这会泄露太多信息,因为我看不到其他物种是什么


如果我有一个像第一个一样以某种方式突出显示结核分枝杆菌的图,以及每个条的底部,那对我来说是最好的。

你可以这样处理:

library(ggplot2)

# define default colours for each name
mynames <- unique(sample_df$name)
mycolours <- scales::hue_pal()(length(mynames))
mycolours <- setNames(mycolours, mynames)

# set up custom colour!
mycolours["Mycobacterium tuberculosis"] <- "red"

# your plot
ggplot(data = sample_df, aes(fill=name, y=fraction_total_reads, x=sample)) + 
 geom_bar(position="stack", stat="identity", color="black") +
 theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
 
 # add this! 
 scale_fill_manual(values = mycolours)
库(ggplot2)
#为每个名称定义默认颜色

mynames也可能会有所帮助,与下面的方法类似,我用另一个选项编辑了我的答案,以使其更加突出,以防您需要ITP:我希望Raccom和您尝试
cood_flip()+主题(legend.position=“bottom”)+scale_y_continuous(labels=scales::percent)
,以提高图表的可读性
ggplot(data = sample_df2, aes(fill=is_mtb, y=fraction_total_reads, x=sample)) + 
    geom_bar(position="stack", stat="identity") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
library(ggplot2)

# define default colours for each name
mynames <- unique(sample_df$name)
mycolours <- scales::hue_pal()(length(mynames))
mycolours <- setNames(mycolours, mynames)

# set up custom colour!
mycolours["Mycobacterium tuberculosis"] <- "red"

# your plot
ggplot(data = sample_df, aes(fill=name, y=fraction_total_reads, x=sample)) + 
 geom_bar(position="stack", stat="identity", color="black") +
 theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
 
 # add this! 
 scale_fill_manual(values = mycolours)
library(ggplot2)

mynames <- unique(sample_df$name)

myfills <- scales::hue_pal()(length(mynames))
myfills <- setNames(myfills, mynames)
myfills["Mycobacterium tuberculosis"] <- "red"

mycolours <- rep("black", length(mynames))
mycolours <- setNames(mycolours, mynames)
mycolours["Mycobacterium tuberculosis"] <- "red"

myalphas <- rep(0.6, length(mynames))
myalphas <- setNames(myalphas, mynames)
myalphas["Mycobacterium tuberculosis"] <- 1


ggplot(data = sample_df, aes(y = fraction_total_reads, 
                             x = sample, 
                             alpha  = name,
                             colour = name,
                             fill   = name)) + 
 geom_bar(position = "stack", stat = "identity") +
 theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
 scale_fill_manual  (values = myfills  ) +
 scale_colour_manual(values = mycolours) +
 scale_alpha_manual (values = myalphas )