Python 3.x 截距为零的线性回归和斜率值的不确定性

Python 3.x 截距为零的线性回归和斜率值的不确定性,python-3.x,linear-regression,Python 3.x,Linear Regression,我想用python进行线性回归,有两个要求: 截距被迫归零 在输出中,我希望有斜率参数的不确定性,以及p值,r平方 据我所知,stats.linregresse执行第一个要求,np.linalg.lstsq执行第二个要求。有人能帮我找到最简单的方法吗 多谢各位, 卡米尔 这里的想法是,我们在没有+b的情况下对lieaner函数进行回归,因为b会上下移动y轴截距,因此当该值为0时,我们得到截距为(0,0)的线性回归 使用scipy.curve\u fit的一个好处是,您可以对任何公式进行回归-

我想用python进行线性回归,有两个要求:

  • 截距被迫归零

  • 在输出中,我希望有斜率参数的不确定性,以及p值,r平方

据我所知,stats.linregresse执行第一个要求,np.linalg.lstsq执行第二个要求。有人能帮我找到最简单的方法吗

多谢各位, 卡米尔

这里的想法是,我们在没有+b的情况下对lieaner函数进行回归,因为b会上下移动y轴截距,因此当该值为0时,我们得到截距为(0,0)的线性回归


使用scipy.curve\u fit的一个好处是,您可以对任何公式进行回归-尽管r\u平方是曲线回归中的一些冗余项。

此示例提供了您问题中要求的统计信息,还绘制了拟合函数与数据的对比图

from scipy.optimize import curve_fit
import numpy as np
import scipy.odr
import scipy.stats
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt

xData = np.array([5.357, 5.797, 5.936, 6.161, 6.697, 6.731, 6.775, 8.442, 9.861])
yData = np.array([0.376, 0.874, 1.049, 1.327, 2.054, 2.077, 2.138, 4.744, 7.104])

def func(x,b0):
    return b0 * x

initialParameters = numpy.array([np.mean(yData) / np.mean(xData)])

def f_wrapper_for_odr(beta, x): # parameter order for odr
    return func(x, *beta)

fittedParameters, cov= curve_fit(func, xData, yData, p0=initialParameters)

model = scipy.odr.odrpack.Model(f_wrapper_for_odr)
data = scipy.odr.odrpack.Data(xData, yData)
myodr = scipy.odr.odrpack.ODR(data, model, beta0=fittedParameters,  maxit=0)
myodr.set_job(fit_type=2)
fittedParameterstatistics = myodr.run()
df_e = len(xData) - len(fittedParameters) # degrees of freedom, error
cov_beta = fittedParameterstatistics.cov_beta # parameter covariance matrix from ODR
sd_beta = fittedParameterstatistics.sd_beta * fittedParameterstatistics.sd_beta
ci = []
t_df = scipy.stats.t.ppf(0.975, df_e)
ci = []
for i in range(len(fittedParameters)):
    ci.append([fittedParameters[i] - t_df * fittedParameterstatistics.sd_beta[i], fittedParameters[i] + t_df * fittedParameterstatistics.sd_beta[i]])

tstat_beta = fittedParameters / fittedParameterstatistics.sd_beta # coeff t-statistics
pstat_beta = (1.0 - scipy.stats.t.cdf(np.abs(tstat_beta), df_e)) * 2.0    # coef. p-values

for i in range(len(fittedParameters)):
    print('parameter:', fittedParameters[i])
    print('   conf interval:', ci[i][0], ci[i][1])
    print('   tstat:', tstat_beta[i])
    print('   pstat:', pstat_beta[i])
    print()

modelPredictions = func(xData, *fittedParameters) 

absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

print()


##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plot
    xModel = numpy.linspace(min(xData), max(xData))
    yModel = func(xModel, *fittedParameters)

    # now the model as a line plot
    axes.plot(xModel, yModel)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)
from scipy.optimize import curve_fit
import numpy as np
import scipy.odr
import scipy.stats
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt

xData = np.array([5.357, 5.797, 5.936, 6.161, 6.697, 6.731, 6.775, 8.442, 9.861])
yData = np.array([0.376, 0.874, 1.049, 1.327, 2.054, 2.077, 2.138, 4.744, 7.104])

def func(x,b0):
    return b0 * x

initialParameters = numpy.array([np.mean(yData) / np.mean(xData)])

def f_wrapper_for_odr(beta, x): # parameter order for odr
    return func(x, *beta)

fittedParameters, cov= curve_fit(func, xData, yData, p0=initialParameters)

model = scipy.odr.odrpack.Model(f_wrapper_for_odr)
data = scipy.odr.odrpack.Data(xData, yData)
myodr = scipy.odr.odrpack.ODR(data, model, beta0=fittedParameters,  maxit=0)
myodr.set_job(fit_type=2)
fittedParameterstatistics = myodr.run()
df_e = len(xData) - len(fittedParameters) # degrees of freedom, error
cov_beta = fittedParameterstatistics.cov_beta # parameter covariance matrix from ODR
sd_beta = fittedParameterstatistics.sd_beta * fittedParameterstatistics.sd_beta
ci = []
t_df = scipy.stats.t.ppf(0.975, df_e)
ci = []
for i in range(len(fittedParameters)):
    ci.append([fittedParameters[i] - t_df * fittedParameterstatistics.sd_beta[i], fittedParameters[i] + t_df * fittedParameterstatistics.sd_beta[i]])

tstat_beta = fittedParameters / fittedParameterstatistics.sd_beta # coeff t-statistics
pstat_beta = (1.0 - scipy.stats.t.cdf(np.abs(tstat_beta), df_e)) * 2.0    # coef. p-values

for i in range(len(fittedParameters)):
    print('parameter:', fittedParameters[i])
    print('   conf interval:', ci[i][0], ci[i][1])
    print('   tstat:', tstat_beta[i])
    print('   pstat:', pstat_beta[i])
    print()

modelPredictions = func(xData, *fittedParameters) 

absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

print()


##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plot
    xModel = numpy.linspace(min(xData), max(xData))
    yModel = func(xModel, *fittedParameters)

    # now the model as a line plot
    axes.plot(xModel, yModel)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)