Numpy 在散点图中显示置信限和预测限

Numpy 在散点图中显示置信限和预测限,numpy,matplotlib,scipy,regression,seaborn,Numpy,Matplotlib,Scipy,Regression,Seaborn,我有两个数据数组,即High和weight: import numpy as np, matplotlib.pyplot as plt heights = np.array([50,52,53,54,58,60,62,64,66,67,68,70,72,74,76,55,50,45,65]) weights = np.array([25,50,55,75,80,85,50,65,85,55,45,45,50,75,95,65,50,40,45]) plt.plot(heights,weigh

我有两个数据数组,即High和weight:

import numpy as np, matplotlib.pyplot as plt

heights = np.array([50,52,53,54,58,60,62,64,66,67,68,70,72,74,76,55,50,45,65])
weights = np.array([25,50,55,75,80,85,50,65,85,55,45,45,50,75,95,65,50,40,45])

plt.plot(heights,weights,'bo')
plt.show()
我想制作与此类似的情节:


欢迎提出任何想法。

您可以使用seaborn绘图库创建您想要的绘图

In [18]: import seaborn as sns

In [19]: heights = np.array([50,52,53,54,58,60,62,64,66,67, 68,70,72,74,76,55,50,45,65])
    ...: weights = np.array([25,50,55,75,80,85,50,65,85,55,45,45,50,75,95,65,50,40,45])
    ...: 

In [20]: sns.regplot(heights,weights, color ='blue')
Out[20]: <matplotlib.axes.AxesSubplot at 0x13644f60>
[18]中的
:导入seaborn作为sns
[19]中:高度=np.数组([50,52,53,54,58,60,62,64,66,67,68,70,72,74,76,55,50,45,65])
…:权重=np.数组([25,50,55,75,80,85,50,65,85,55,45,45,50,75,95,65,50,40,45])
...: 
[20]中:sns.regplot(高度、权重、颜色='blue')
出[20]:

以下是我总结的内容。我试着模仿你的截图

给定的

一些用于绘制置信区间的详细辅助函数

import numpy as np
import scipy as sp
import scipy.stats as stats
import matplotlib.pyplot as plt


%matplotlib inline


def plot_ci_manual(t, s_err, n, x, x2, y2, ax=None):
    """Return an axes of confidence bands using a simple approach.

    Notes
    -----
    .. math:: \left| \: \hat{\mu}_{y|x0} - \mu_{y|x0} \: \right| \; \leq \; T_{n-2}^{.975} \; \hat{\sigma} \; \sqrt{\frac{1}{n}+\frac{(x_0-\bar{x})^2}{\sum_{i=1}^n{(x_i-\bar{x})^2}}}
    .. math:: \hat{\sigma} = \sqrt{\sum_{i=1}^n{\frac{(y_i-\hat{y})^2}{n-2}}}

    References
    ----------
    .. [1] M. Duarte.  "Curve fitting," Jupyter Notebook.
       http://nbviewer.ipython.org/github/demotu/BMC/blob/master/notebooks/CurveFitting.ipynb

    """
    if ax is None:
        ax = plt.gca()

    ci = t * s_err * np.sqrt(1/n + (x2 - np.mean(x))**2 / np.sum((x - np.mean(x))**2))
    ax.fill_between(x2, y2 + ci, y2 - ci, color="#b9cfe7", edgecolor="")

    return ax


def plot_ci_bootstrap(xs, ys, resid, nboot=500, ax=None):
    """Return an axes of confidence bands using a bootstrap approach.

    Notes
    -----
    The bootstrap approach iteratively resampling residuals.
    It plots `nboot` number of straight lines and outlines the shape of a band.
    The density of overlapping lines indicates improved confidence.

    Returns
    -------
    ax : axes
        - Cluster of lines
        - Upper and Lower bounds (high and low) (optional)  Note: sensitive to outliers

    References
    ----------
    .. [1] J. Stults. "Visualizing Confidence Intervals", Various Consequences.
       http://www.variousconsequences.com/2010/02/visualizing-confidence-intervals.html

    """ 
    if ax is None:
        ax = plt.gca()

    bootindex = sp.random.randint

    for _ in range(nboot):
        resamp_resid = resid[bootindex(0, len(resid) - 1, len(resid))]
        # Make coeffs of for polys
        pc = sp.polyfit(xs, ys + resamp_resid, 1)                   
        # Plot bootstrap cluster
        ax.plot(xs, sp.polyval(pc, xs), "b-", linewidth=2, alpha=3.0 / float(nboot))

    return ax
代码

# Computations ----------------------------------------------------------------
# Raw Data
heights = np.array([50,52,53,54,58,60,62,64,66,67,68,70,72,74,76,55,50,45,65])
weights = np.array([25,50,55,75,80,85,50,65,85,55,45,45,50,75,95,65,50,40,45])

x = heights
y = weights

# Modeling with Numpy
def equation(a, b):
    """Return a 1D polynomial."""
    return np.polyval(a, b) 

p, cov = np.polyfit(x, y, 1, cov=True)                     # parameters and covariance from of the fit of 1-D polynom.
y_model = equation(p, x)                                   # model using the fit parameters; NOTE: parameters here are coefficients

# Statistics
n = weights.size                                           # number of observations
m = p.size                                                 # number of parameters
dof = n - m                                                # degrees of freedom
t = stats.t.ppf(0.975, n - m)                              # used for CI and PI bands

# Estimates of Error in Data/Model
resid = y - y_model                           
chi2 = np.sum((resid / y_model)**2)                        # chi-squared; estimates error in data
chi2_red = chi2 / dof                                      # reduced chi-squared; measures goodness of fit
s_err = np.sqrt(np.sum(resid**2) / dof)                    # standard deviation of the error


# Plotting --------------------------------------------------------------------
fig, ax = plt.subplots(figsize=(8, 6))

# Data
ax.plot(
    x, y, "o", color="#b9cfe7", markersize=8, 
    markeredgewidth=1, markeredgecolor="b", markerfacecolor="None"
)

# Fit
ax.plot(x, y_model, "-", color="0.1", linewidth=1.5, alpha=0.5, label="Fit")  

x2 = np.linspace(np.min(x), np.max(x), 100)
y2 = equation(p, x2)

# Confidence Interval (select one)
plot_ci_manual(t, s_err, n, x, x2, y2, ax=ax)
#plot_ci_bootstrap(x, y, resid, ax=ax)

# Prediction Interval
pi = t * s_err * np.sqrt(1 + 1/n + (x2 - np.mean(x))**2 / np.sum((x - np.mean(x))**2))   
ax.fill_between(x2, y2 + pi, y2 - pi, color="None", linestyle="--")
ax.plot(x2, y2 - pi, "--", color="0.5", label="95% Prediction Limits")
ax.plot(x2, y2 + pi, "--", color="0.5")


# Figure Modifications --------------------------------------------------------
# Borders
ax.spines["top"].set_color("0.5")
ax.spines["bottom"].set_color("0.5")
ax.spines["left"].set_color("0.5")
ax.spines["right"].set_color("0.5")
ax.get_xaxis().set_tick_params(direction="out")
ax.get_yaxis().set_tick_params(direction="out")
ax.xaxis.tick_bottom()
ax.yaxis.tick_left() 

# Labels
plt.title("Fit Plot for Weight", fontsize="14", fontweight="bold")
plt.xlabel("Height")
plt.ylabel("Weight")
plt.xlim(np.min(x) - 1, np.max(x) + 1)

# Custom legend
handles, labels = ax.get_legend_handles_labels()
display = (0, 1)
anyArtist = plt.Line2D((0, 1), (0, 0), color="#b9cfe7")    # create custom artists
legend = plt.legend(
    [handle for i, handle in enumerate(handles) if i in display] + [anyArtist],
    [label for i, label in enumerate(labels) if i in display] + ["95% Confidence Limits"],
    loc=9, bbox_to_anchor=(0, -0.21, 1., 0.102), ncol=3, mode="expand"
)  
frame = legend.get_frame().set_edgecolor("0.5")

# Save Figure
plt.tight_layout()
plt.savefig("filename.png", bbox_extra_artists=(legend,), bbox_inches="tight")

plt.show()
输出

使用
plot\u ci\u manual()

使用
plot\u ci\u bootstrap()

希望这有帮助。干杯


详细信息

  • 我相信,由于图例位于图形之外,因此它不会显示在matplotblib的弹出窗口中。它在Jupyter中使用
    %maplotlib inline
    运行良好

  • 主置信区间代码(
    plot\u ci\u manual()
    )是从另一个生成类似于OP的绘图的代码改编而来的。您可以通过取消注释第二个选项
    plot\u ci\u bootstrap()
    ,选择一种更高级的技术

  • 更新

  • 这篇文章已经用与Python3兼容的修订代码进行了更新
  • stats.t.ppf()
    接受较低的尾部概率。根据以下参考资料,
    t=sp.stats.t.ppf(0.95,n-m)
    被修正为
    t=sp.stats.t.ppf(0.975,n-m)
    ,以反映双侧95%t统计(或单侧97.5%t统计)。
    • (谢谢@Bonlenfum和@tryptofan)
  • y2
    已更新,以更灵活地响应给定模型(@RENATION)
  • 添加了一个抽象的
    方程
    函数来包装模型函数。非线性回归是可能的,尽管没有得到证明。根据需要修改适当的变量(谢谢@PJW)
  • 另请参见

    • 使用
      statsmodels
      库打印标注栏时
    • 在绘制带和计算带有
      不确定性的置信区间时
      库(在单独的环境中小心安装)
    对于我的一个项目,我需要为时间序列建模创建时间间隔,并使我创建的过程更加高效:一个以矢量化方式进行时间序列平滑和异常值检测的python库

    它提供了不同的平滑算法以及计算间隔的可能性

    import numpy as np
    import scipy as sp
    import scipy.stats as stats
    import matplotlib.pyplot as plt
    
    
    %matplotlib inline
    
    
    def plot_ci_manual(t, s_err, n, x, x2, y2, ax=None):
        """Return an axes of confidence bands using a simple approach.
    
        Notes
        -----
        .. math:: \left| \: \hat{\mu}_{y|x0} - \mu_{y|x0} \: \right| \; \leq \; T_{n-2}^{.975} \; \hat{\sigma} \; \sqrt{\frac{1}{n}+\frac{(x_0-\bar{x})^2}{\sum_{i=1}^n{(x_i-\bar{x})^2}}}
        .. math:: \hat{\sigma} = \sqrt{\sum_{i=1}^n{\frac{(y_i-\hat{y})^2}{n-2}}}
    
        References
        ----------
        .. [1] M. Duarte.  "Curve fitting," Jupyter Notebook.
           http://nbviewer.ipython.org/github/demotu/BMC/blob/master/notebooks/CurveFitting.ipynb
    
        """
        if ax is None:
            ax = plt.gca()
    
        ci = t * s_err * np.sqrt(1/n + (x2 - np.mean(x))**2 / np.sum((x - np.mean(x))**2))
        ax.fill_between(x2, y2 + ci, y2 - ci, color="#b9cfe7", edgecolor="")
    
        return ax
    
    
    def plot_ci_bootstrap(xs, ys, resid, nboot=500, ax=None):
        """Return an axes of confidence bands using a bootstrap approach.
    
        Notes
        -----
        The bootstrap approach iteratively resampling residuals.
        It plots `nboot` number of straight lines and outlines the shape of a band.
        The density of overlapping lines indicates improved confidence.
    
        Returns
        -------
        ax : axes
            - Cluster of lines
            - Upper and Lower bounds (high and low) (optional)  Note: sensitive to outliers
    
        References
        ----------
        .. [1] J. Stults. "Visualizing Confidence Intervals", Various Consequences.
           http://www.variousconsequences.com/2010/02/visualizing-confidence-intervals.html
    
        """ 
        if ax is None:
            ax = plt.gca()
    
        bootindex = sp.random.randint
    
        for _ in range(nboot):
            resamp_resid = resid[bootindex(0, len(resid) - 1, len(resid))]
            # Make coeffs of for polys
            pc = sp.polyfit(xs, ys + resamp_resid, 1)                   
            # Plot bootstrap cluster
            ax.plot(xs, sp.polyval(pc, xs), "b-", linewidth=2, alpha=3.0 / float(nboot))
    
        return ax
    
    在线性回归的情况下:

    import numpy as np
    import matplotlib.pyplot as plt
    from tsmoothie.smoother import *
    from tsmoothie.utils_func import sim_randomwalk
    
    # generate 10 randomwalks of length 50
    np.random.seed(33)
    data = sim_randomwalk(n_series=10, timesteps=50, 
                          process_noise=10, measure_noise=30)
    
    # operate smoothing
    smoother = PolynomialSmoother(degree=1)
    smoother.smooth(data)
    
    # generate intervals
    low_pi, up_pi = smoother.get_intervals('prediction_interval', confidence=0.05)
    low_ci, up_ci = smoother.get_intervals('confidence_interval', confidence=0.05)
    
    # plot the first smoothed timeseries with intervals
    plt.figure(figsize=(11,6))
    plt.plot(smoother.smooth_data[0], linewidth=3, color='blue')
    plt.plot(smoother.data[0], '.k')
    plt.fill_between(range(len(smoother.data[0])), low_pi[0], up_pi[0], alpha=0.3, color='blue')
    plt.fill_between(range(len(smoother.data[0])), low_ci[0], up_ci[0], alpha=0.3, color='blue')
    

    对于阶数大于1的回归:

    # operate smoothing
    smoother = PolynomialSmoother(degree=5)
    smoother.smooth(data)
    
    # generate intervals
    low_pi, up_pi = smoother.get_intervals('prediction_interval', confidence=0.05)
    low_ci, up_ci = smoother.get_intervals('confidence_interval', confidence=0.05)
    
    # plot the first smoothed timeseries with intervals
    plt.figure(figsize=(11,6))
    plt.plot(smoother.smooth_data[0], linewidth=3, color='blue')
    plt.plot(smoother.data[0], '.k')
    plt.fill_between(range(len(smoother.data[0])), low_pi[0], up_pi[0], alpha=0.3, color='blue')
    plt.fill_between(range(len(smoother.data[0])), low_ci[0], up_ci[0], alpha=0.3, color='blue')
    


    我还指出,tsmoothie可以以矢量化的方式对多个时间序列进行平滑处理。希望这能帮助别人

    我需要偶尔做这样的情节。。。这是我第一次用Python/Jupyter做这件事,这篇文章对我帮助很大,尤其是Pylang的详细答案

    我知道有“更简单”的方法可以达到目的,但我认为这种方法更具说教性,可以让我一步一步地了解正在发生的事情。我甚至在这里学到了“预测间隔”!谢谢

    下面是更简单的Pylang代码,包括Pearson相关性(以及r2)和均方误差(MSE)的计算。当然,最终的绘图(!)必须适用于每个数据集

    将numpy导入为np
    将matplotlib.pyplot作为plt导入
    将scipy.stats导入为stats
    高度=np.数组([50,52,53,54,58,60,62,64,66,67,68,70,72,74,76,55,50,45,65])
    权重=np.数组([25,50,55,75,80,85,50,65,85,55,45,45,50,75,95,65,50,40,45])
    x=高度
    y=重量
    斜率,截距=np.多元拟合(x,y,1)#线性模型平差
    y#U模型=np.polyval([斜率,截距],x)#建模。。。
    x_平均值=np.平均值(x)
    y_平均值=np.平均值(y)
    n=x.尺寸#样本数量
    m=2#参数数量
    自由度=n-m#自由度
    t=stats.t.ppf(0.975,dof)#区间置信度的学生统计
    残差=y-y_模型
    标准误差=(np.总和(残差**2)/dof)**.5#误差的标准偏差
    #计算r2
    # https://www.statisticshowto.com/probability-and-statistics/coefficient-of-determination-r-squared/
    #皮尔逊相关系数
    分子=np.和((x-x_平均值)*(y-y_平均值))
    分母=(np.和((x-x_平均值)**2)*np.和((y-y_平均值)**2))**.5
    相关性系数=分子/分母
    r2=相关性系数**2
    #均方误差
    MSE=1/n*np.和((y-y_模型)**2)
    #绘制调整后的模型
    x_line=np.linspace(np.min(x),np.max(x),100)
    y_线=np.polyval([斜率,截距],x_线)
    #置信区间
    ci=t*std_误差*(1/n+(x_线-x_平均值)**2/np.和((x-x_平均值)**2))**.5
    #预测区间
    pi=t*std_误差*(1+1/n+(x_线-x_平均值)**2/np.和((x-x_平均值)**2))**.5
    ###############策划
    plt.rcParams.update({'font.size':14})
    图=plt.图()
    ax=图中的添加轴([.1、.1、.8、.8])
    ax.plot(x,y,'o',颜色='royalblue')
    ax.plot(x_线,y_线,颜色=‘皇家蓝’)
    ax.fill_-between(x_-line,y_-line+pi,y_-line-pi,颜色='lightcyan',标签='95%预测间隔')
    ax.fill_-between(x_-line,y_-line+ci,y_-line-ci,颜色='skyblue',标签='95%置信区间')
    ax.set_xlabel('x')
    ax.set_ylabel('y'))
    #必须针对每种情况和首选项更改舍入和位置
    a=str(np.圆形(截距))
    b=str(np.圆形(坡度,2))
    r2s=str(np.round(r2,2))
    MSEs=str(np.round(MSE))
    ax.text(45110,'y='+a++'+b++'x')
    ax.text(45100,$r^2$='+r2s+'MSE='+MSEs)
    plt.图例(bbox_至_锚=(1.25),fontsize=12)