Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/heroku/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 如何沿着染色体图形绘制位置_R_Plot_Bioinformatics_Genetics - Fatal编程技术网

R 如何沿着染色体图形绘制位置

R 如何沿着染色体图形绘制位置,r,plot,bioinformatics,genetics,R,Plot,Bioinformatics,Genetics,我想绘制一幅图,描绘我所研究的生物体的14条线性染色体,按比例,在每条染色体的指定位置用彩条标出。理想情况下,我希望使用R,因为这是我唯一有经验的编程语言 我探索了各种方法,例如基因组图,但我发现这比我想要的更复杂/显示的数据比我拥有的(例如显示细胞基因带)多得多,并且通常是人类染色体特有的 我基本上只想要14个以下尺寸的灰色条: chromosome size 1 640851 2 947102

我想绘制一幅图,描绘我所研究的生物体的14条线性染色体,按比例,在每条染色体的指定位置用彩条标出。理想情况下,我希望使用R,因为这是我唯一有经验的编程语言

我探索了各种方法,例如基因组图,但我发现这比我想要的更复杂/显示的数据比我拥有的(例如显示细胞基因带)多得多,并且通常是人类染色体特有的

我基本上只想要14个以下尺寸的灰色条:

chromosome           size
         1         640851
         2         947102
         3        1067971
         4        1200490
         5        1343557
         6        1418242
         7        1445207
         8        1472805
         9        1541735
        10        1687656
        11        2038340
        12        2271494
        13        2925236
        14        3291936
然后在染色体长度上画出大约150个位置的彩色标记。e、 g.这些位置的标记:

Chromosome        Position
         3          817702
        12         1556936
        13         1131566
理想情况下,我也希望能够根据位置指定一些不同的颜色,例如

Chromosome        Position        Type
         3          817702           A
        12         1556936           A
        13         1131566           A
         5         1041685           B
        11          488717           B
        14         1776463           B
例如,“A”标记为蓝色,“B”标记为绿色

这张图片中粘贴了一个与我想制作的非常类似的情节(来自Bopp等人的《PlOS Genetics 2013》;9(2):e1003293):


有人能推荐一种方法吗?它不一定是一个生物信息学软件包,如果有另一种方法,我可以使用R生成14个特定比例大小的条,并在条的指定位置标记。e、 g.我一直在考虑修改ggplot2中的简单条形图,但我不知道如何在特定位置沿条形图添加标记。

只需保存
条形图调用,然后调用
分段
在适当位置进行标记即可。例如:

bp <- barplot(dat$size, border=NA, col="grey80")

with(marks,
  segments(
    bp[Chromosome,]-0.5,
    Position,
    bp[Chromosome,]+0.5,
    Position,
    col=Type,
    lwd=2, 
    lend=1
   )
)

bp以下是绘制此类图的一般解决方案,根据

为此,我选择使用
geom_rect
,因为它允许对形状大小进行更精细的调整,并允许形状以分辨率进行缩放;我认为
geom_段
宽度不可缩放

还要注意的是,使用这种方法,基因改变位置的标记是按比例绘制的,这意味着它们可能很薄,以至于在绘图上不容易看到;如果您愿意,您可以自行将其调整到最小尺寸

加载数据 结果

数据

白痴鱼合作社+ggplot
chrAndMarksMap-Yuo可以使用
geom_段
作为线路。。。一些(非常)粗糙的代码:
p参见问题:用数据绘制染色体表意文字非常感谢,geom_段正是我所需要的!干杯。另外,请看是的,有许多程序和软件包可用于此,但我认为能够在本机
ggplot2
中自己完成这项工作有很大的优势@Will Hamilton我很想看看您的最终代码示例的副本,也许您可以添加它作为答案?非常感谢,我最后在ggplot2中使用了geom_段,只是因为我更喜欢使用ggplot2处理其他绘图参数,但这种方法也非常有效。干杯
dat <- structure(list(chromosome = 1:14, size = c(640851L, 947102L, 
1067971L, 1200490L, 1343557L, 1418242L, 1445207L, 1472805L, 1541735L, 
1687656L, 2038340L, 2271494L, 2925236L, 3291936L)), .Names = c("chromosome", 
"size"), class = "data.frame", row.names = c(NA, -14L))

marks <- structure(list(Chromosome = c(3L, 12L, 13L, 5L, 11L, 14L), Position = c(817702L, 
1556936L, 1131566L, 1041685L, 488717L, 1776463L), Type = structure(c(1L, 
1L, 1L, 2L, 2L, 2L), .Label = c("A", "B"), class = "factor")), .Names = c("Chromosome", 
"Position", "Type"), class = "data.frame", row.names = c(NA, 
-6L))
library("ggplot2") # for the plot
library("ggrepel") # for spreading text labels on the plot, you can replace with `geom_text` if you want
library("scales") # for axis labels notation

# insert your steps to load data from tabular files or other sources here; 
# dummy datasets taken directly from files shown in this example

# data with the copy number alterations for the sample
sample_cns <- structure(list(gene = c("AC116366.7", "ANKRD24", "APC", "SNAPC3", 
"ARID1A", "ATM", "BOD1L1", "BRCA1", "C11orf65", "CHD5"), chromosome = c("chr5", 
"chr19", "chr5", "chr9", "chr1", "chr11", "chr4", "chr17", "chr11", 
"chr1"), start = c(131893016L, 4183350L, 112043414L, 15465517L, 
27022894L, 108098351L, 13571634L, 41197694L, 108180886L, 6166339L
), end = c(131978056L, 4224502L, 112179823L, 15465578L, 27107247L, 
108236235L, 13629211L, 41276113L, 108236235L, 6240083L), cn = c(1L, 
1L, 1L, 7L, 1L, 1L, 3L, 3L, 1L, 1L), CNA = c("loss", "loss", 
"loss", "gain", "loss", "loss", "gain", "gain", "loss", "loss"
)), .Names = c("gene", "chromosome", "start", "end", "cn", "CNA"
), row.names = c(NA, 10L), class = "data.frame")

# > head(sample_cns)
#         gene chromosome     start       end cn  CNA
# 1 AC116366.7       chr5 131893016 131978056  1 loss
# 2    ANKRD24      chr19   4183350   4224502  1 loss
# 3        APC       chr5 112043414 112179823  1 loss
# 4     SNAPC3       chr9  15465517  15465578  7 gain
# 5     ARID1A       chr1  27022894  27107247  1 loss
# 6        ATM      chr11 108098351 108236235  1 loss

# hg19 chromosome sizes
chrom_sizes <- structure(list(chromosome = c("chrM", "chr1", "chr2", "chr3", "chr4", 
"chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", 
"chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", 
"chr20", "chr21", "chr22", "chrX", "chrY"), size = c(16571L, 249250621L, 
243199373L, 198022430L, 191154276L, 180915260L, 171115067L, 159138663L, 
146364022L, 141213431L, 135534747L, 135006516L, 133851895L, 115169878L, 
107349540L, 102531392L, 90354753L, 81195210L, 78077248L, 59128983L, 
63025520L, 48129895L, 51304566L, 155270560L, 59373566L)), .Names = c("chromosome", 
"size"), class = "data.frame", row.names = c(NA, -25L))

# > head(chrom_sizes)
#   chromosome      size
# 1       chrM     16571
# 2       chr1 249250621
# 3       chr2 243199373
# 4       chr3 198022430
# 5       chr4 191154276
# 6       chr5 180915260


# hg19 centromere locations
centromeres <- structure(list(chromosome = c("chr1", "chr2", "chr3", "chr4", 
"chr5", "chr6", "chr7", "chr8", "chr9", "chrX", "chrY", "chr10", 
"chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", 
"chr18", "chr19", "chr20", "chr21", "chr22"), start = c(121535434L, 
92326171L, 90504854L, 49660117L, 46405641L, 58830166L, 58054331L, 
43838887L, 47367679L, 58632012L, 10104553L, 39254935L, 51644205L, 
34856694L, 16000000L, 16000000L, 17000000L, 35335801L, 22263006L, 
15460898L, 24681782L, 26369569L, 11288129L, 13000000L), end = c(124535434L, 
95326171L, 93504854L, 52660117L, 49405641L, 61830166L, 61054331L, 
46838887L, 50367679L, 61632012L, 13104553L, 42254935L, 54644205L, 
37856694L, 19000000L, 19000000L, 20000000L, 38335801L, 25263006L, 
18460898L, 27681782L, 29369569L, 14288129L, 16000000L)), .Names = c("chromosome", 
"start", "end"), class = "data.frame", row.names = c(NA, -24L
))

# > head(centromeres)
#   chromosome     start       end
# 1       chr1 121535434 124535434
# 2       chr2  92326171  95326171
# 3       chr3  90504854  93504854
# 4       chr4  49660117  52660117
# 5       chr5  46405641  49405641
# 6       chr6  58830166  61830166
# create an ordered factor level to use for the chromosomes in all the datasets
chrom_order <- c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", 
                 "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", 
                 "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", 
                 "chr22", "chrX", "chrY", "chrM")
chrom_key <- setNames(object = as.character(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
                                              12, 13, 14, 15, 16, 17, 18, 19, 20, 
                                              21, 22, 23, 24, 25)), 
                      nm = chrom_order)
chrom_order <- factor(x = chrom_order, levels = rev(chrom_order))

# convert the chromosome column in each dataset to the ordered factor
chrom_sizes[["chromosome"]] <- factor(x = chrom_sizes[["chromosome"]], 
                                      levels = chrom_order)
sample_cns[["chromosome"]] <- factor(x = sample_cns[["chromosome"]], 
                                     levels = chrom_order)
centromeres[["chromosome"]] <- factor(x = centromeres[["chromosome"]], 
                                      levels = chrom_order)
# create a color key for the plot
group.colors <- c(gain = "red", loss = "blue")
ggplot(data = chrom_sizes) + 
    # base rectangles for the chroms, with numeric value for each chrom on the x-axis
    geom_rect(aes(xmin = as.numeric(chromosome) - 0.2, 
                  xmax = as.numeric(chromosome) + 0.2, 
                  ymax = size, ymin = 0), 
              colour="black", fill = "white") + 
    # rotate the plot 90 degrees
    coord_flip() +
    # black & white color theme 
    theme(axis.text.x = element_text(colour = "black"), 
          panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank(), 
          panel.background = element_blank()) + 
    # give the appearance of a discrete axis with chrom labels
    scale_x_discrete(name = "chromosome", limits = names(chrom_key)) +
    # add bands for centromeres
    geom_rect(data = centromeres, aes(xmin = as.numeric(chromosome) - 0.2, 
                                      xmax = as.numeric(chromosome) + 0.2, 
                                      ymax = end, ymin = start)) +
    # add bands for CNA value
    geom_rect(data = sample_cns, aes(xmin = as.numeric(chromosome) - 0.2, 
                                     xmax = as.numeric(chromosome) + 0.2, 
                                     ymax = end, ymin = start, fill = CNA)) + 
    scale_fill_manual(values = group.colors) +
    # add 'gain' gene markers
    geom_text_repel(data = subset(sample_cns, sample_cns$CNA == "gain"), 
                    aes(x = chromosome, y = start, label = gene), 
                    color = "red", show.legend = FALSE) +
    # add 'loss' gene markers
    geom_text_repel(data = subset(sample_cns, sample_cns$CNA == "loss"), 
                    aes(x = chromosome, y = start, label = gene ), 
                    color = "blue", show.legend = FALSE) +
    ggtitle("Copy Number Alterations") +
    # supress scientific notation on the y-axis
    scale_y_continuous(labels = comma) +
    ylab("region (bp)")
{ # dataframes
  dfChrSize<-read.table(text="chrName           chrSize
         1         640851
         2         947102
         3        1067971
         4        1200490
         5        1343557
         6        1418242
         7        1445207
         8        1472805
         9        1541735
        10        1687656
        11        2038340
        12        2271494
        13        2925236
        14        3291936", header=T)

  dfMarkPos<-read.table(text="chrName   markPos markSize markName
3          817702 50000 type1
12         1556936  50000 type2
13         1131566  50000 type2", header=T, stringsAsFactors=F)
}
install.packages("idiogramFISH")
library(idiogramFISH) # v. 1.16.1

par(mar=c(0,0,0,0) ) # b l t r

plotIdiograms(dfChrSize,dfMarkPos=dfMarkPos, 
              karIndex = FALSE,  
              karHeight = 4,
              orderChr = "original",
              chrWidth = .2, 
              chrSpacing = .5,
              legendHeight = 2,
              chromatids = FALSE,
              rulerIntervalMb = 1000000,
              useMinorTicks = TRUE,   # ruler 
              xlimLeftMod = 2,        # modify left margin
              ylimBotMod = -3,        # modify bottom margin
              classMbName = "",       # chr. title
              yPosRulerTitle = 3,     # ruler title pos.
              xPosRulerTitle = 3)
chrAndMarksMap <- mapGGChrMark(dfChrSize,dfMarkPos,chrSpacing = .8)

# ggplot

library(ggplot2)

ggplot() + 
  geom_polygon(aes(x=x,y=y,
                   group=Chr) 
               ,data=chrAndMarksMap$dataChr
               ,color="gray"
               ,fill="gray"
  ) +
  geom_polygon(aes(x=x,y=y,
                   group=id,
                   color=markName,
                   fill=markName) 
               ,data=chrAndMarksMap$dataMark
  ) +
  theme_classic()+
  scale_x_continuous(breaks=seq(1,nrow(dfChrSize),1)
  ) +
  scale_y_continuous(breaks = seq(0,3500000,500000),
                     labels = seq(0,3.5 , .5)
  ) +
  geom_segment(aes(y=0,yend=3500000,x=-Inf,xend=-Inf)
  )+
  theme(axis.line=element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_text(angle=0),
        legend.title = element_blank()
        ) +
  ylab("Mb")