Python 在X轴上方或图表顶部添加特征

Python 在X轴上方或图表顶部添加特征,python,matplotlib,Python,Matplotlib,我有一个图表,看起来像: 现在我想根据两个X坐标在X轴上方添加一些附加信息 例如,将值1376和1837连接起来,并对它们进行注释,使其看起来像(我知道它看起来很糟糕,但只是你有了一个想法。当然,文本的位置并不理想): 还有几个区域可以重叠。我试着用plt.arrow(1376,01837,0)来做这件事,但箭头并没有停在1837。它一直延伸到X轴的末端。我也尝试过使用基本的文本注释工具,但我从来没有得到我想要的。另一个解决方案是在标题下方的图表顶部添加信息。因此,您关于顶部或底部的任何想法

我有一个图表,看起来像:

现在我想根据两个X坐标在X轴上方添加一些附加信息

例如,将值1376和1837连接起来,并对它们进行注释,使其看起来像(我知道它看起来很糟糕,但只是你有了一个想法。当然,文本的位置并不理想):


还有几个区域可以重叠。我试着用
plt.arrow(1376,01837,0)
来做这件事,但箭头并没有停在1837。它一直延伸到X轴的末端。我也尝试过使用基本的文本注释工具,但我从来没有得到我想要的。另一个解决方案是在标题下方的图表顶部添加信息。因此,您关于顶部或底部的任何想法都会有所帮助

一个可能的解决方案,虽然这是一个有点手动的过程,并不理想(如果你有很多这样的东西,可能会有点乏味),就是简单地在图形上绘制一条额外的线。你可以指定要绘制线的x坐标,y坐标将是图形上的垂直位置

import matplotlib.pyplot as plt
import numpy as np

# create some data
x = np.arange(0,10,0.1)
y = np.sin(x)

fig, ax = plt.subplots()
ax.plot(x,y)

ax.plot([2,4],[-1,-1], color="red", lw=1) # add the line
ax.annotate('Test 1', xy=(2.5, -0.95)) # add text above the line

# increase the thickness of the line using lw =
ax.plot([6,8],[-1,-1], color="red", lw=3)
ax.annotate('Test 2', xy=(6.5, -0.95)) 

plt.show()
这会产生如下图形:


根据需要绘制的这些图的数量,您可能希望自动化区域/间隔列表的过程。当然,问题在于如何处理重叠的时间间隔。下面的代码试图在解决时间间隔重叠的同时自动化该过程

#/usr/bin/env python
#-*-编码:utf-8-*-
将numpy作为np导入
将matplotlib.pyplot作为plt导入
来自itertools进口链、组合
def注释_间隔(间隔,标签,y0=0,dy=-1,ax=None):
"""
在下面用条形图和居中标签注释间隔。
论据:
----------
区间-(N,2)数组
间隔列表
字符串的标签-(N,)iterable
相应标签列表
y0-整数/浮点(默认值为0)
注释的基线y值
dy-int/float(默认值-1)
沿y方向移动以避免注释重叠
ax-matplotlib轴对象(默认plt.gca())
要注释的轴
"""
如果ax为无:
ax=plt.gca()
#为每个间隔分配y值;解决重叠
y=y0+\u获取\u级别(间隔)*dy
对于(开始、停止)、yy、zip标签(间隔、y、标签):
ax.绘图([开始,停止],[yy,yy],lw=3)
ax.文本(开始+(停止-开始)/2.,yy,标签,
水平对齐='中心',垂直对齐='底部')
def_获取_液位(间隔):
"""
为每个间隔指定一个“级别”,以确保没有两个重叠的间隔在同一级别上。
在创建新标高之前,尽可能多地填充较低的标高。
"""
#初始化输出
n=长度(间隔)
级别=np.零((n))
#解决重叠
重叠=\u获取\u重叠(间隔)
如果np.有(重叠):
包含_重叠,=np.其中(np.any(重叠,轴=0))
剩余=列表(包含重叠)
ctr=0
而len(剩余)>0:
索引=\u获取\u最长\u非重叠\u集(间隔[剩余])
最长=[索引中ii的剩余.pop(ii)[:-1]]
级别[最长]=ctr
ctr+=1
回报水平
def_get_重叠(间隔):
"""
论据:
----------
区间-(N,2)数组
间隔列表
返回:
--------
重叠-(N,N)数组
重叠类型(如有)
重叠[ii,jj]=0-无重叠
重叠[ii,jj]=1-区间[ii]内区间[jj]的起点
重叠[ii,jj]=2-区间[ii]内区间[jj]的停止
重叠[ii,jj]=3-由区间[ii]封装的区间[jj]
重叠[ii,jj]=4-区间[jj]封装区间[ii]
"""
n=长度(间隔)
重叠=np.zero((n,n),dtype=np.int)
对于ii,枚举(间隔)中的(开始、停止):
对于枚举(间隔)中的jj,(s,t):
如果二jj:
重叠[ii,jj]+=int((s>=开始)和(s<停止))
重叠[ii,jj]+=2*int((t>=start)和(t(1,)(2,)(3,)(1,2)(1,3)(2,3)(1,2,3)
s=列表(iterable)#允许重复元素
返回链。从_iterable(范围(len(s)+1)内r的组合(s,r))
def test():
导入字符串
n=6
间隔=np.sort(np.random.rand(n,2),轴=1)
标签=[字符串中的字母对应字母。ascii_小写[:n]]
注释_间隔(间隔、标签)
plt.show()

为了澄清,您是要指示基因组区域,即x值的范围,还是要指示特定的x值对?我要指示基因组区域。此外,您的箭头不起作用,因为调用签名是
plt.arrow(x,y,dx,dy)
。您需要类似于
plt.arrow(1376,01837-1376,0)
的东西。不过,我只想画一条线。谢谢你提供的信息。我对Python不太感兴趣,所以到目前为止,我对这部电影的情节非常满意。我去看看
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np
import matplotlib.pyplot as plt
from itertools import chain, combinations

def annotate_intervals(intervals, labels, y0=0, dy=-1, ax=None):
    """
    Annotates an interval with a bar and a centred label below.

    Arguments:
    ----------
    intervals - (N, 2) array
        list of intervals
    labels - (N, ) iterable of strings
        list of corresponding labels
    y0 - int/float (default 0)
        baseline y value of annotations
    dy - int/float (default -1)
        shift in y to avoid overlaps of annotations
    ax - matplotlib axis object (default plt.gca())
        axis to annotate
    """

    if ax is None:
        ax = plt.gca()

    # assign y values to each interval; resolve overlaps
    y = y0 + _get_levels(intervals) * dy

    for (start, stop), yy, label in zip(intervals, y, labels):
        ax.plot([start, stop], [yy, yy], lw=3)
        ax.text(start + (stop-start)/2., yy, label,
                horizontalalignment='center', verticalalignment='bottom')

def _get_levels(intervals):
    """
    Assign a 'level' to each interval such that no two overlapping intervals are on the same level.
    Fill lower levels as much as possible before creating a new level.
    """

    # initialise output
    n = len(intervals)
    levels = np.zeros((n))

    # resolve overlaps
    overlaps = _get_overlaps(intervals)
    if np.any(overlaps):
        contains_overlaps, = np.where(np.any(overlaps, axis=0))
        remaining = list(contains_overlaps)
        ctr = 0
        while len(remaining) > 0:
            indices = _get_longest_non_overlapping_set(intervals[remaining])
            longest = [remaining.pop(ii) for ii in indices[::-1]]
            levels[longest] = ctr
            ctr += 1

    return levels

def _get_overlaps(intervals):
    """
    Arguments:
    ----------
    intervals - (N, 2) array
        list of intervals

    Returns:
    --------
    overlap - (N, N) array
        type of overlap (if any)

    overlap[ii,jj] = 0 - no overlap
    overlap[ii,jj] = 1 - start of interval[jj] within interval[ii]
    overlap[ii,jj] = 2 - stop  of interval[jj] within interval[ii]
    overlap[ii,jj] = 3 - interval[jj] encapsulated by interval[ii]
    overlap[ii,jj] = 4 - interval[jj] encapsulates interval[ii]

    """

    n = len(intervals)
    overlap = np.zeros((n,n), dtype=np.int)
    for ii, (start, stop) in enumerate(intervals):
        for jj, (s, t) in enumerate(intervals):
            if ii != jj:
                overlap[ii,jj] += int((s >= start) and (s < stop))
                overlap[ii,jj] += 2 * int((t >= start) and (t < stop))

    # if interval[jj] encapsulates interval[ii], overlaps[ii,jj] is still 0
    mask = overlap == 3
    overlap[mask.T] += 4

    return overlap

def _get_longest_non_overlapping_set(intervals):
    """
    Brute-force approach:
    1) Get all possible sets of intervals.
    2) Filter for non-overlapping sets.
    3) Determine total length of intervals for each.
    4) Select set with highest total.
    """
    indices = np.arange(len(intervals))
    lengths = np.diff(intervals, axis=1)
    powerset = list(_get_powerset(indices))
    powerset = powerset[1:] # exclude empty set

    total_lengths = np.zeros((len(powerset)))
    for ii, selection in enumerate(powerset):
        selection = np.array(selection)
        if not np.any(_get_overlaps(intervals[selection])):
            total_lengths[ii] = np.sum(lengths[selection])

    return powerset[np.argmax(total_lengths)]

def _get_powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)  # allows duplicate elements
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

def test():
    import string
    n = 6
    intervals = np.sort(np.random.rand(n, 2), axis=1)
    labels = [letter for letter in string.ascii_lowercase[:n]]
    annotate_intervals(intervals, labels)
    plt.show()