Python 在X轴上方或图表顶部添加特征
我有一个图表,看起来像: 现在我想根据两个X坐标在X轴上方添加一些附加信息 例如,将值1376和1837连接起来,并对它们进行注释,使其看起来像(我知道它看起来很糟糕,但只是你有了一个想法。当然,文本的位置并不理想):Python 在X轴上方或图表顶部添加特征,python,matplotlib,Python,Matplotlib,我有一个图表,看起来像: 现在我想根据两个X坐标在X轴上方添加一些附加信息 例如,将值1376和1837连接起来,并对它们进行注释,使其看起来像(我知道它看起来很糟糕,但只是你有了一个想法。当然,文本的位置并不理想): 还有几个区域可以重叠。我试着用plt.arrow(1376,01837,0)来做这件事,但箭头并没有停在1837。它一直延伸到X轴的末端。我也尝试过使用基本的文本注释工具,但我从来没有得到我想要的。另一个解决方案是在标题下方的图表顶部添加信息。因此,您关于顶部或底部的任何想法
还有几个区域可以重叠。我试着用
plt.arrow(1376,01837,0)
来做这件事,但箭头并没有停在1837。它一直延伸到X轴的末端。我也尝试过使用基本的文本注释工具,但我从来没有得到我想要的。另一个解决方案是在标题下方的图表顶部添加信息。因此,您关于顶部或底部的任何想法都会有所帮助 一个可能的解决方案,虽然这是一个有点手动的过程,并不理想(如果你有很多这样的东西,可能会有点乏味),就是简单地在图形上绘制一条额外的线。你可以指定要绘制线的x坐标,y坐标将是图形上的垂直位置
import matplotlib.pyplot as plt
import numpy as np
# create some data
x = np.arange(0,10,0.1)
y = np.sin(x)
fig, ax = plt.subplots()
ax.plot(x,y)
ax.plot([2,4],[-1,-1], color="red", lw=1) # add the line
ax.annotate('Test 1', xy=(2.5, -0.95)) # add text above the line
# increase the thickness of the line using lw =
ax.plot([6,8],[-1,-1], color="red", lw=3)
ax.annotate('Test 2', xy=(6.5, -0.95))
plt.show()
这会产生如下图形:
根据需要绘制的这些图的数量,您可能希望自动化区域/间隔列表的过程。当然,问题在于如何处理重叠的时间间隔。下面的代码试图在解决时间间隔重叠的同时自动化该过程
#/usr/bin/env python
#-*-编码:utf-8-*-
将numpy作为np导入
将matplotlib.pyplot作为plt导入
来自itertools进口链、组合
def注释_间隔(间隔,标签,y0=0,dy=-1,ax=None):
"""
在下面用条形图和居中标签注释间隔。
论据:
----------
区间-(N,2)数组
间隔列表
字符串的标签-(N,)iterable
相应标签列表
y0-整数/浮点(默认值为0)
注释的基线y值
dy-int/float(默认值-1)
沿y方向移动以避免注释重叠
ax-matplotlib轴对象(默认plt.gca())
要注释的轴
"""
如果ax为无:
ax=plt.gca()
#为每个间隔分配y值;解决重叠
y=y0+\u获取\u级别(间隔)*dy
对于(开始、停止)、yy、zip标签(间隔、y、标签):
ax.绘图([开始,停止],[yy,yy],lw=3)
ax.文本(开始+(停止-开始)/2.,yy,标签,
水平对齐='中心',垂直对齐='底部')
def_获取_液位(间隔):
"""
为每个间隔指定一个“级别”,以确保没有两个重叠的间隔在同一级别上。
在创建新标高之前,尽可能多地填充较低的标高。
"""
#初始化输出
n=长度(间隔)
级别=np.零((n))
#解决重叠
重叠=\u获取\u重叠(间隔)
如果np.有(重叠):
包含_重叠,=np.其中(np.any(重叠,轴=0))
剩余=列表(包含重叠)
ctr=0
而len(剩余)>0:
索引=\u获取\u最长\u非重叠\u集(间隔[剩余])
最长=[索引中ii的剩余.pop(ii)[:-1]]
级别[最长]=ctr
ctr+=1
回报水平
def_get_重叠(间隔):
"""
论据:
----------
区间-(N,2)数组
间隔列表
返回:
--------
重叠-(N,N)数组
重叠类型(如有)
重叠[ii,jj]=0-无重叠
重叠[ii,jj]=1-区间[ii]内区间[jj]的起点
重叠[ii,jj]=2-区间[ii]内区间[jj]的停止
重叠[ii,jj]=3-由区间[ii]封装的区间[jj]
重叠[ii,jj]=4-区间[jj]封装区间[ii]
"""
n=长度(间隔)
重叠=np.zero((n,n),dtype=np.int)
对于ii,枚举(间隔)中的(开始、停止):
对于枚举(间隔)中的jj,(s,t):
如果二jj:
重叠[ii,jj]+=int((s>=开始)和(s<停止))
重叠[ii,jj]+=2*int((t>=start)和(t(1,)(2,)(3,)(1,2)(1,3)(2,3)(1,2,3)
s=列表(iterable)#允许重复元素
返回链。从_iterable(范围(len(s)+1)内r的组合(s,r))
def test():
导入字符串
n=6
间隔=np.sort(np.random.rand(n,2),轴=1)
标签=[字符串中的字母对应字母。ascii_小写[:n]]
注释_间隔(间隔、标签)
plt.show()
为了澄清,您是要指示基因组区域,即x值的范围,还是要指示特定的x值对?我要指示基因组区域。此外,您的箭头不起作用,因为调用签名是plt.arrow(x,y,dx,dy)
。您需要类似于plt.arrow(1376,01837-1376,0)
的东西。不过,我只想画一条线。谢谢你提供的信息。我对Python不太感兴趣,所以到目前为止,我对这部电影的情节非常满意。我去看看
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
from itertools import chain, combinations
def annotate_intervals(intervals, labels, y0=0, dy=-1, ax=None):
"""
Annotates an interval with a bar and a centred label below.
Arguments:
----------
intervals - (N, 2) array
list of intervals
labels - (N, ) iterable of strings
list of corresponding labels
y0 - int/float (default 0)
baseline y value of annotations
dy - int/float (default -1)
shift in y to avoid overlaps of annotations
ax - matplotlib axis object (default plt.gca())
axis to annotate
"""
if ax is None:
ax = plt.gca()
# assign y values to each interval; resolve overlaps
y = y0 + _get_levels(intervals) * dy
for (start, stop), yy, label in zip(intervals, y, labels):
ax.plot([start, stop], [yy, yy], lw=3)
ax.text(start + (stop-start)/2., yy, label,
horizontalalignment='center', verticalalignment='bottom')
def _get_levels(intervals):
"""
Assign a 'level' to each interval such that no two overlapping intervals are on the same level.
Fill lower levels as much as possible before creating a new level.
"""
# initialise output
n = len(intervals)
levels = np.zeros((n))
# resolve overlaps
overlaps = _get_overlaps(intervals)
if np.any(overlaps):
contains_overlaps, = np.where(np.any(overlaps, axis=0))
remaining = list(contains_overlaps)
ctr = 0
while len(remaining) > 0:
indices = _get_longest_non_overlapping_set(intervals[remaining])
longest = [remaining.pop(ii) for ii in indices[::-1]]
levels[longest] = ctr
ctr += 1
return levels
def _get_overlaps(intervals):
"""
Arguments:
----------
intervals - (N, 2) array
list of intervals
Returns:
--------
overlap - (N, N) array
type of overlap (if any)
overlap[ii,jj] = 0 - no overlap
overlap[ii,jj] = 1 - start of interval[jj] within interval[ii]
overlap[ii,jj] = 2 - stop of interval[jj] within interval[ii]
overlap[ii,jj] = 3 - interval[jj] encapsulated by interval[ii]
overlap[ii,jj] = 4 - interval[jj] encapsulates interval[ii]
"""
n = len(intervals)
overlap = np.zeros((n,n), dtype=np.int)
for ii, (start, stop) in enumerate(intervals):
for jj, (s, t) in enumerate(intervals):
if ii != jj:
overlap[ii,jj] += int((s >= start) and (s < stop))
overlap[ii,jj] += 2 * int((t >= start) and (t < stop))
# if interval[jj] encapsulates interval[ii], overlaps[ii,jj] is still 0
mask = overlap == 3
overlap[mask.T] += 4
return overlap
def _get_longest_non_overlapping_set(intervals):
"""
Brute-force approach:
1) Get all possible sets of intervals.
2) Filter for non-overlapping sets.
3) Determine total length of intervals for each.
4) Select set with highest total.
"""
indices = np.arange(len(intervals))
lengths = np.diff(intervals, axis=1)
powerset = list(_get_powerset(indices))
powerset = powerset[1:] # exclude empty set
total_lengths = np.zeros((len(powerset)))
for ii, selection in enumerate(powerset):
selection = np.array(selection)
if not np.any(_get_overlaps(intervals[selection])):
total_lengths[ii] = np.sum(lengths[selection])
return powerset[np.argmax(total_lengths)]
def _get_powerset(iterable):
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable) # allows duplicate elements
return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
def test():
import string
n = 6
intervals = np.sort(np.random.rand(n, 2), axis=1)
labels = [letter for letter in string.ascii_lowercase[:n]]
annotate_intervals(intervals, labels)
plt.show()