Matplotlib 将分割的violinplot分成两半,以比较尾部数据
有没有一种方法可以将“分裂”的海生小提琴图(或其他类型的小提琴图)的两半进行物理分离?我试着比较两种不同的处理方法,但是有一条很细的尾巴,很难(不可能)分辨出分裂小提琴的一个或两个半边是否一直延伸到尾巴的顶端 我的一个想法是,如果两个部分稍微分开,而不是紧挨着彼此,那么就很容易准确地吸收数据 这是我的密码:Matplotlib 将分割的violinplot分成两半,以比较尾部数据,matplotlib,seaborn,violin-plot,Matplotlib,Seaborn,Violin Plot,有没有一种方法可以将“分裂”的海生小提琴图(或其他类型的小提琴图)的两半进行物理分离?我试着比较两种不同的处理方法,但是有一条很细的尾巴,很难(不可能)分辨出分裂小提琴的一个或两个半边是否一直延伸到尾巴的顶端 我的一个想法是,如果两个部分稍微分开,而不是紧挨着彼此,那么就很容易准确地吸收数据 这是我的密码: import pandas as pd import numpy as np import matplotlib.pyplot as plt from matplotlib import
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
# load data into a dataframe
df1 = pd.read_excel('Modeling analysis charts.xlsx',
sheetname='lmps',
parse_cols=[0,5],
skiprows=0,
header=1)
# identify which dispatch run this data is from
df1['Run']='Scheduling'
# load data into a dataframe
df2 = pd.read_excel('Modeling analysis charts.xlsx',
sheetname='lmps',
parse_cols=[7,12],
skiprows=0,
header=1)
# identify which dispatch run this data is from
df2['Run']='Pricing'
# drop rows with missing data
df1 = df1.dropna(how='any')
df2 = df2.dropna(how='any')
# merge data from different runs
df = pd.concat([df1,df2])
# LMPs are all opposite of actual values, so correct that
df['LMP'] = -df['LMP']
fontsize = 10
style.use('fivethirtyeight')
fig, axes = plt.subplots()
sns.violinplot(x='Scenario', y='LMP', hue='Run', split=True, data=df, inner=None, scale='area', bw=0.2, cut=0, linewidth=0.5, ax = axes)
axes.set_title('Day Ahead Market')
#axes.set_ylim([-15,90])
axes.yaxis.grid(True)
axes.set_xlabel('Scenario')
axes.set_ylabel('LMP ($/MWh)')
#plt.savefig('DAMarket.pdf', bbox_inches='tight')
plt.show()
编辑:出于历史原因,这是公认的答案,但请看一看@conchoecia更新、更干净的实现。
好主意。我实现的基本思想是绘制整个图形,抓取对应于两个半小提琴的补丁,然后向左或向右移动这些补丁的路径。希望代码是自解释的,否则请在注释中告诉我
我对上面@Paul的答案进行了扩展,使其更加可靠。现在它既支持垂直方向也支持水平方向,我实现它来处理内部的“棍棒”,因为它适合我的应用程序
将numpy导入为np
将matplotlib.pyplot作为plt导入
导入matplotlib.collections
导入seaborn作为sns
作为pd进口熊猫
def偏移量曲线图(最大、三角形、宽度、内部、方向):
"""
此函数用于偏移violinplot的一半以比较尾部
或者在它们之间绘制其他东西。这是专门设计的
对于Seaborn使用选项“split=True”的violinplots。
对于线条,这是在假设Seaborn使用
以整数为中心。
Args:
包含VIOLINPLOT的轴。
放在小提琴图两半之间的空间量
传递给sns.violinplot()的violinplot的总宽度
海洋生物的内部结构类型
violinplot的方向。“hotizontal”或“vertical”。
返回:
-NA,直接修改
"""
#补偿材料
如果内部==‘粘滞’:
lines=ax.get_lines()
对于行中的行:
如果方向==“水平”:
data=line.get_ydata()
打印(数据)
如果int(数据[0]+1)/int(数据[1]+1)<1:
#类型为顶部,移动为负,水平方向向后
数据-=增量
其他:
#类型为底部,移动位置,水平方向向后
数据+=增量
行。设置数据(数据)
elif方向==“垂直”:
数据=行。获取扩展数据()
打印(数据)
如果int(数据[0]+1)/int(数据[1]+1)<1:
#类型为左,移动为负
数据-=增量
其他:
#类型为左,移动位置
数据+=增量
行。设置扩展数据(数据)
对于ii,枚举中的项目(ax.集合):
#axis包含多个集合和路径集合
如果isinstance(项,matplotlib.collections.PolyCollection):
#获取路径
路径,=项。获取路径()
顶点=路径。顶点
half_type=_wedge_dir(顶点,方向)
#移动路径的x坐标
如果输入[top',[bottom']:
如果内部在[“粘住”,无]:
如果half_type==‘top’:#->up
顶点[:,1]=delta
elif half_type==‘bottom’:#->down
顶点[:,1]+=delta
elif half_键入[‘左’、‘右’]:
如果内部在[“粘住”,无]:
如果half_type==‘left’:#->left
顶点[:,0]=delta
elif half_type==‘right’:#->向下
顶点[:,0]+=delta
def_wedge_dir(顶点,方向):
"""
Args:
matplotlib.collections.PolyCollection中的顶点
方向必须是“水平”或“垂直”,具体取决于
你的阴谋已经策划好了。
返回:
-['top','bottom','left','right']中的字符串,用于确定
小提琴图的一半是相对于中心的。
"""
如果方向==“水平”:
结果=(方向,len(集合(顶点[1:5,1]))==1)
elif方向==“垂直”:
结果=(方向,len(集合(顶点[-3:-1,0]))==1)
结果_键={('horizontal',True):'bottom',
('horizontal',False):'top',
('vertical',True):'left',
('vertical',False):'right'}
#如果启动后的第一对x/y值相同,则
#是输入方向。如果不是,则相反
返回结果\u键[结果]
#创建一些数据
n=100#样本数量
c=[‘猫’、‘老鼠’、‘熊’、‘梨’、‘圈套’]#类
y=np.random.randn(n)
x=np.随机选择(c,大小=n)
z=np.random.rand(n)>0.5#子类
数据=局部数据帧(dict(x=x,y=y,z=z))
打印('完成制作数据')
#初始化新轴;
图(ax1,ax2)=plt.子批次(2)
internal=“sticks”#注意:“box”是默认值
宽度=0.75
δ=0.05
最终宽度=宽度-增量
打印(数据)
sns.violinplot(数据=数据,x='y',y='x',
split=True,hue=z,
ax=ax1,内部为“='sticks',
体重=0.2)
sns.violinplot(数据=数据,x='x',y='y',
split=True,hue=z,
ax=ax2,内部为“='sticks',
体重=0.2)
偏移曲线图(ax1,三角形,最终宽度,内部“水平”)
偏移曲线图(ax2,三角形,最终宽度,内部“垂直”)
plt.show()
我会直接计算KDE并比较它们,而不是试图从一个有意的高级绘图函数中提取信息。另一个想法是绘制最大值
import numpy as np
import matplotlib.pyplot as plt;
import matplotlib.collections
import seaborn as sns
import pandas as pd
# create some data
n = 10000 # number of samples
c = 5 # classes
y = np.random.randn(n)
x = np.random.randint(0, c, size=n)
z = np.random.rand(n) > 0.5 # sub-class
data = pd.DataFrame(dict(x=x, y=y, z=z))
# initialise new axis;
# if there is random other crap on the axis (e.g. a previous plot),
# the hacky code below won't work
fig, ax = plt.subplots(1,1)
# plot
inner = None # Note: 'box' is default
ax = sns.violinplot(data=data, x='x', y='y', hue='z', split=True, inner=inner, ax=ax)
# offset stuff
delta = 0.02
for ii, item in enumerate(ax.collections):
# axis contains PolyCollections and PathCollections
if isinstance(item, matplotlib.collections.PolyCollection):
# get path
path, = item.get_paths()
vertices = path.vertices
# shift x-coordinates of path
if not inner:
if ii % 2: # -> to right
vertices[:,0] += delta
else: # -> to left
vertices[:,0] -= delta
else: # inner='box' adds another type of PollyCollection
if ii % 3 == 0:
vertices[:,0] -= delta
elif ii % 3 == 1:
vertices[:,0] += delta
else: # ii % 3 = 2
pass
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.collections
import seaborn as sns
import pandas as pd
def offset_violinplot_halves(ax, delta, width, inner, direction):
"""
This function offsets the halves of a violinplot to compare tails
or to plot something else in between them. This is specifically designed
for violinplots by Seaborn that use the option `split=True`.
For lines, this works on the assumption that Seaborn plots everything with
integers as the center.
Args:
<ax> The axis that contains the violinplots.
<delta> The amount of space to put between the two halves of the violinplot
<width> The total width of the violinplot, as passed to sns.violinplot()
<inner> The type of inner in the seaborn
<direction> Orientation of violinplot. 'hotizontal' or 'vertical'.
Returns:
- NA, modifies the <ax> directly
"""
# offset stuff
if inner == 'sticks':
lines = ax.get_lines()
for line in lines:
if direction == 'horizontal':
data = line.get_ydata()
print(data)
if int(data[0] + 1)/int(data[1] + 1) < 1:
# type is top, move neg, direction backwards for horizontal
data -= delta
else:
# type is bottom, move pos, direction backward for hori
data += delta
line.set_ydata(data)
elif direction == 'vertical':
data = line.get_xdata()
print(data)
if int(data[0] + 1)/int(data[1] + 1) < 1:
# type is left, move neg
data -= delta
else:
# type is left, move pos
data += delta
line.set_xdata(data)
for ii, item in enumerate(ax.collections):
# axis contains PolyCollections and PathCollections
if isinstance(item, matplotlib.collections.PolyCollection):
# get path
path, = item.get_paths()
vertices = path.vertices
half_type = _wedge_dir(vertices, direction)
# shift x-coordinates of path
if half_type in ['top','bottom']:
if inner in ["sticks", None]:
if half_type == 'top': # -> up
vertices[:,1] -= delta
elif half_type == 'bottom': # -> down
vertices[:,1] += delta
elif half_type in ['left', 'right']:
if inner in ["sticks", None]:
if half_type == 'left': # -> left
vertices[:,0] -= delta
elif half_type == 'right': # -> down
vertices[:,0] += delta
def _wedge_dir(vertices, direction):
"""
Args:
<vertices> The vertices from matplotlib.collections.PolyCollection
<direction> Direction must be 'horizontal' or 'vertical' according to how
your plot is laid out.
Returns:
- a string in ['top', 'bottom', 'left', 'right'] that determines where the
half of the violinplot is relative to the center.
"""
if direction == 'horizontal':
result = (direction, len(set(vertices[1:5,1])) == 1)
elif direction == 'vertical':
result = (direction, len(set(vertices[-3:-1,0])) == 1)
outcome_key = {('horizontal', True): 'bottom',
('horizontal', False): 'top',
('vertical', True): 'left',
('vertical', False): 'right'}
# if the first couple x/y values after the start are the same, it
# is the input direction. If not, it is the opposite
return outcome_key[result]
# create some data
n = 100 # number of samples
c = ['cats', 'rats', 'bears', 'pears', 'snares'] # classes
y = np.random.randn(n)
x = np.random.choice(c, size=n)
z = np.random.rand(n) > 0.5 # sub-class
data = pd.DataFrame(dict(x=x, y=y, z=z))
print('done making data')
# initialise new axes;
fig, (ax1, ax2) = plt.subplots(2)
inner = "sticks" # Note: 'box' is default
width = 0.75
delta = 0.05
final_width = width - delta
print(data)
sns.violinplot(data=data, x='y', y='x',
split=True, hue = 'z',
ax = ax1, inner='sticks',
bw = 0.2)
sns.violinplot(data=data, x='x', y='y',
split=True, hue = 'z',
ax = ax2, inner='sticks',
bw = 0.2)
offset_violinplot_halves(ax1, delta, final_width, inner, 'horizontal')
offset_violinplot_halves(ax2, delta, final_width, inner, 'vertical')
plt.show()