Matplotlib 将分割的violinplot分成两半,以比较尾部数据

Matplotlib 将分割的violinplot分成两半,以比较尾部数据,matplotlib,seaborn,violin-plot,Matplotlib,Seaborn,Violin Plot,有没有一种方法可以将“分裂”的海生小提琴图(或其他类型的小提琴图)的两半进行物理分离?我试着比较两种不同的处理方法,但是有一条很细的尾巴,很难(不可能)分辨出分裂小提琴的一个或两个半边是否一直延伸到尾巴的顶端 我的一个想法是,如果两个部分稍微分开,而不是紧挨着彼此,那么就很容易准确地吸收数据 这是我的密码: import pandas as pd import numpy as np import matplotlib.pyplot as plt from matplotlib import

有没有一种方法可以将“分裂”的海生小提琴图(或其他类型的小提琴图)的两半进行物理分离?我试着比较两种不同的处理方法,但是有一条很细的尾巴,很难(不可能)分辨出分裂小提琴的一个或两个半边是否一直延伸到尾巴的顶端

我的一个想法是,如果两个部分稍微分开,而不是紧挨着彼此,那么就很容易准确地吸收数据

这是我的密码:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns

# load data into a dataframe
df1 = pd.read_excel('Modeling analysis charts.xlsx',
                   sheetname='lmps',
                   parse_cols=[0,5],
                   skiprows=0,
                   header=1)

# identify which dispatch run this data is from      
df1['Run']='Scheduling' 

# load data into a dataframe
df2 = pd.read_excel('Modeling analysis charts.xlsx',
                   sheetname='lmps',
                   parse_cols=[7,12],
                   skiprows=0,
                   header=1)

# identify which dispatch run this data is from
df2['Run']='Pricing' 

# drop rows with missing data
df1 = df1.dropna(how='any')
df2 = df2.dropna(how='any')

# merge data from different runs
df = pd.concat([df1,df2])

# LMPs are all opposite of actual values, so correct that
df['LMP'] = -df['LMP']

fontsize = 10

style.use('fivethirtyeight')

fig, axes = plt.subplots()

sns.violinplot(x='Scenario', y='LMP', hue='Run', split=True, data=df, inner=None, scale='area', bw=0.2, cut=0, linewidth=0.5, ax = axes)
axes.set_title('Day Ahead Market')

#axes.set_ylim([-15,90])
axes.yaxis.grid(True)
axes.set_xlabel('Scenario')
axes.set_ylabel('LMP ($/MWh)')

#plt.savefig('DAMarket.pdf', bbox_inches='tight')

plt.show()
编辑:出于历史原因,这是公认的答案,但请看一看@conchoecia更新、更干净的实现。 好主意。我实现的基本思想是绘制整个图形,抓取对应于两个半小提琴的补丁,然后向左或向右移动这些补丁的路径。希望代码是自解释的,否则请在注释中告诉我


我对上面@Paul的答案进行了扩展,使其更加可靠。现在它既支持垂直方向也支持水平方向,我实现它来处理内部的“棍棒”,因为它适合我的应用程序

将numpy导入为np
将matplotlib.pyplot作为plt导入
导入matplotlib.collections
导入seaborn作为sns
作为pd进口熊猫
def偏移量曲线图(最大、三角形、宽度、内部、方向):
"""
此函数用于偏移violinplot的一半以比较尾部
或者在它们之间绘制其他东西。这是专门设计的
对于Seaborn使用选项“split=True”的violinplots。
对于线条,这是在假设Seaborn使用
以整数为中心。
Args:
包含VIOLINPLOT的轴。
放在小提琴图两半之间的空间量
传递给sns.violinplot()的violinplot的总宽度
海洋生物的内部结构类型
violinplot的方向。“hotizontal”或“vertical”。
返回:
-NA,直接修改
"""
#补偿材料
如果内部==‘粘滞’:
lines=ax.get_lines()
对于行中的行:
如果方向==“水平”:
data=line.get_ydata()
打印(数据)
如果int(数据[0]+1)/int(数据[1]+1)<1:
#类型为顶部,移动为负,水平方向向后
数据-=增量
其他:
#类型为底部,移动位置,水平方向向后
数据+=增量
行。设置数据(数据)
elif方向==“垂直”:
数据=行。获取扩展数据()
打印(数据)
如果int(数据[0]+1)/int(数据[1]+1)<1:
#类型为左,移动为负
数据-=增量
其他:
#类型为左,移动位置
数据+=增量
行。设置扩展数据(数据)
对于ii,枚举中的项目(ax.集合):
#axis包含多个集合和路径集合
如果isinstance(项,matplotlib.collections.PolyCollection):
#获取路径
路径,=项。获取路径()
顶点=路径。顶点
half_type=_wedge_dir(顶点,方向)
#移动路径的x坐标
如果输入[top',[bottom']:
如果内部在[“粘住”,无]:
如果half_type==‘top’:#->up
顶点[:,1]=delta
elif half_type==‘bottom’:#->down
顶点[:,1]+=delta
elif half_键入[‘左’、‘右’]:
如果内部在[“粘住”,无]:
如果half_type==‘left’:#->left
顶点[:,0]=delta
elif half_type==‘right’:#->向下
顶点[:,0]+=delta
def_wedge_dir(顶点,方向):
"""
Args:
matplotlib.collections.PolyCollection中的顶点
方向必须是“水平”或“垂直”,具体取决于
你的阴谋已经策划好了。
返回:
-['top','bottom','left','right']中的字符串,用于确定
小提琴图的一半是相对于中心的。
"""
如果方向==“水平”:
结果=(方向,len(集合(顶点[1:5,1]))==1)
elif方向==“垂直”:
结果=(方向,len(集合(顶点[-3:-1,0]))==1)
结果_键={('horizontal',True):'bottom',
('horizontal',False):'top',
('vertical',True):'left',
('vertical',False):'right'}
#如果启动后的第一对x/y值相同,则
#是输入方向。如果不是,则相反
返回结果\u键[结果]
#创建一些数据
n=100#样本数量
c=[‘猫’、‘老鼠’、‘熊’、‘梨’、‘圈套’]#类
y=np.random.randn(n)
x=np.随机选择(c,大小=n)
z=np.random.rand(n)>0.5#子类
数据=局部数据帧(dict(x=x,y=y,z=z))
打印('完成制作数据')
#初始化新轴;
图(ax1,ax2)=plt.子批次(2)
internal=“sticks”#注意:“box”是默认值
宽度=0.75
δ=0.05
最终宽度=宽度-增量
打印(数据)
sns.violinplot(数据=数据,x='y',y='x',
split=True,hue=z,
ax=ax1,内部为“='sticks',
体重=0.2)
sns.violinplot(数据=数据,x='x',y='y',
split=True,hue=z,
ax=ax2,内部为“='sticks',
体重=0.2)
偏移曲线图(ax1,三角形,最终宽度,内部“水平”)
偏移曲线图(ax2,三角形,最终宽度,内部“垂直”)
plt.show()

我会直接计算KDE并比较它们,而不是试图从一个有意的高级绘图函数中提取信息。另一个想法是绘制最大值
import numpy as np
import matplotlib.pyplot as plt;
import matplotlib.collections
import seaborn as sns
import pandas as pd

# create some data
n = 10000 # number of samples
c = 5 # classes
y = np.random.randn(n)
x = np.random.randint(0, c, size=n)
z = np.random.rand(n) > 0.5 # sub-class
data = pd.DataFrame(dict(x=x, y=y, z=z))

# initialise new axis;
# if there is random other crap on the axis (e.g. a previous plot),
# the hacky code below won't work
fig, ax = plt.subplots(1,1)

# plot
inner = None # Note: 'box' is default
ax = sns.violinplot(data=data, x='x', y='y', hue='z', split=True, inner=inner, ax=ax)

# offset stuff
delta = 0.02
for ii, item in enumerate(ax.collections):
    # axis contains PolyCollections and PathCollections
    if isinstance(item, matplotlib.collections.PolyCollection):
        # get path
        path, = item.get_paths()
        vertices = path.vertices

        # shift x-coordinates of path
        if not inner:
            if ii % 2: # -> to right
                vertices[:,0] += delta
            else: # -> to left
                vertices[:,0] -= delta
        else: # inner='box' adds another type of PollyCollection
            if ii % 3 == 0:
                vertices[:,0] -= delta
            elif ii % 3 == 1:
                vertices[:,0] += delta
            else: # ii % 3 = 2
                pass
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.collections
import seaborn as sns
import pandas as pd

def offset_violinplot_halves(ax, delta, width, inner, direction):
    """
    This function offsets the halves of a violinplot to compare tails
    or to plot something else in between them. This is specifically designed
    for violinplots by Seaborn that use the option `split=True`.

    For lines, this works on the assumption that Seaborn plots everything with
     integers as the center.

    Args:
     <ax>    The axis that contains the violinplots.
     <delta> The amount of space to put between the two halves of the violinplot
     <width> The total width of the violinplot, as passed to sns.violinplot()
     <inner> The type of inner in the seaborn
     <direction> Orientation of violinplot. 'hotizontal' or 'vertical'.

    Returns:
     - NA, modifies the <ax> directly
    """
    # offset stuff
    if inner == 'sticks':
        lines = ax.get_lines()
        for line in lines:
            if direction == 'horizontal':
                data = line.get_ydata()
                print(data)
                if int(data[0] + 1)/int(data[1] + 1) < 1:
                    # type is top, move neg, direction backwards for horizontal
                    data -= delta
                else:
                    # type is bottom, move pos, direction backward for hori
                    data += delta
                line.set_ydata(data)
            elif direction == 'vertical':
                data = line.get_xdata()
                print(data)
                if int(data[0] + 1)/int(data[1] + 1) < 1:
                    # type is left, move neg
                    data -= delta
                else:
                    # type is left, move pos
                    data += delta
                line.set_xdata(data)


    for ii, item in enumerate(ax.collections):
        # axis contains PolyCollections and PathCollections
        if isinstance(item, matplotlib.collections.PolyCollection):
            # get path
            path, = item.get_paths()
            vertices = path.vertices
            half_type = _wedge_dir(vertices, direction)
            # shift x-coordinates of path
            if half_type in ['top','bottom']:
               if inner in ["sticks", None]:
                    if half_type == 'top': # -> up
                        vertices[:,1] -= delta
                    elif half_type == 'bottom': # -> down
                        vertices[:,1] += delta
            elif half_type in ['left', 'right']:
                if inner in ["sticks", None]:
                    if half_type == 'left': # -> left
                        vertices[:,0] -= delta
                    elif half_type == 'right': # -> down
                        vertices[:,0] += delta

def _wedge_dir(vertices, direction):
    """
    Args:
      <vertices>  The vertices from matplotlib.collections.PolyCollection
      <direction> Direction must be 'horizontal' or 'vertical' according to how
                   your plot is laid out.
    Returns:
      - a string in ['top', 'bottom', 'left', 'right'] that determines where the
         half of the violinplot is relative to the center.
    """
    if direction == 'horizontal':
        result = (direction, len(set(vertices[1:5,1])) == 1)
    elif direction == 'vertical':
        result = (direction, len(set(vertices[-3:-1,0])) == 1)
    outcome_key = {('horizontal', True): 'bottom',
                   ('horizontal', False): 'top',
                   ('vertical', True): 'left',
                   ('vertical', False): 'right'}
    # if the first couple x/y values after the start are the same, it
    #  is the input direction. If not, it is the opposite
    return outcome_key[result]

# create some data
n = 100 # number of samples
c = ['cats', 'rats', 'bears', 'pears', 'snares'] # classes
y = np.random.randn(n)
x = np.random.choice(c, size=n)
z = np.random.rand(n) > 0.5 # sub-class
data = pd.DataFrame(dict(x=x, y=y, z=z))
print('done making data')

# initialise new axes;
fig, (ax1, ax2) = plt.subplots(2)

inner = "sticks" # Note: 'box' is default
width = 0.75
delta = 0.05
final_width = width - delta
print(data)
sns.violinplot(data=data, x='y', y='x',
               split=True, hue = 'z',
               ax = ax1, inner='sticks',
               bw = 0.2)
sns.violinplot(data=data, x='x', y='y',
               split=True, hue = 'z',
               ax = ax2, inner='sticks',
               bw = 0.2)

offset_violinplot_halves(ax1, delta, final_width, inner, 'horizontal')
offset_violinplot_halves(ax2, delta, final_width, inner, 'vertical')

plt.show()