Python “步行者不是”；“步行”；基于MCMC的模型拟合_Python_Mathematical Optimization_Bayesian_Mcmc_Emcee

Python “步行者不是”；“步行”；基于MCMC的模型拟合

python

Python “步行者不是”；“步行”；基于MCMC的模型拟合,python,mathematical-optimization,bayesian,mcmc,emcee,Python,Mathematical Optimization,Bayesian,Mcmc,Emcee,我在执行模型的MCMC分析时遇到困难。我相信这与我在模型中有一个不完整的伽马函数有关我试图最小化高斯对数似然，但似乎步行者被困在井里，而不是试图最小化似然函数。如下图所示，其中y轴为模型参数，x轴为步数。该图像显示了漫游者如何不探索参数空间。我添加了另一幅图像来演示如何正确探索参数空间及我在下面添加了一些代码来演示我在做什么，其中x、y和yerr是大约4000个点的数组。该代码适用于其他模型，但仅在该模型上中断，因此它必须是其他模型所没有的内在特性。其他模型最明显的变化是添加了不完全伽马函

我在执行模型的MCMC分析时遇到困难。我相信这与我在模型中有一个不完整的伽马函数有关

我试图最小化高斯对数似然，但似乎步行者被困在井里，而不是试图最小化似然函数。如下图所示，其中y轴为模型参数，x轴为步数。该图像显示了漫游者如何不探索参数空间。我添加了另一幅图像来演示如何正确探索参数空间

及

我在下面添加了一些代码来演示我在做什么，其中x、y和yerr是大约4000个点的数组。该代码适用于其他模型，但仅在该模型上中断，因此它必须是其他模型所没有的内在特性。其他模型最明显的变化是添加了不完全伽马函数，否则它的函数形式与其他模型非常相似

我正在安装的模型具有以下形式：

def singinhomobremss(freq,S_norm,alpha,p,freq_peak): # Single inhomogeneous free-free emission model
    return S_norm*(p+1)*(np.power((freq/freq_peak),(2.1*(p+1)-alpha))*special.gammainc((p+1),(freq/freq_peak)**(-2.1)))*special.gamma(p+1)

请注意，我使用的是python包emcee（我会发布链接，但我没有足够的声誉…）。我真的不明白为什么步行者拒绝为这个模型“步行”，而他们却为其他模型“步行”。非常感谢您的帮助，但我知道这是一个合理的利基领域

import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as opt
import scipy.special as special # For access to the incomplete gamma function.
import emcee
import triangle 
import inspect

def singinhomobremss(freq,S_norm,alpha,p,freq_peak): # Single inhomogeneous free-free emission model
    return S_norm*(p+1)*(np.power((freq/freq_peak),(2.1*(p+1)-alpha))*special.gammainc((p+1),(freq/freq_peak)**(-2.1)))*special.gamma(p+1)

# initial guess for a fit

p0guess = [7, 0.6, -0.5, 3.5]

# Defining log-likelihood function
def lnlike(theta,x,y,yerr):

    S_norm,alpha,p,freq_peak = theta
    model = singinhomobremss(x,S_norm,alpha,p,freq_peak)
    inv_sigma = 1.0/(yerr**2)

    return -0.5*(np.sum((y-model)**2*inv_sigma - np.log(inv_sigma))) 
    # Use the scipy.opt model to find the optimum of this likelihood function

nll = lambda *args: -lnlike(*args)
result = opt.fmin(nll,p0guess,args=(x,y,yerr),full_output='true')
S_norm_ml,alpha_ml,p_ml,freq_peak_ml = result[0]

# Defining priors
def lnprior(theta):
    S_norm,alpha,p,freq_peak = theta
    if S_norm_ml/100. < S_norm < S_norm_ml/100. and alpha_ml/100. < alpha < alpha_ml*100. and p_ml/100. < p < p_ml*100. and freq_peak_ml/100. < freq_peak < freq_peak_ml*100:
        return 0.00 
    return -np.inf      

# Combining this prior with the definition of the likelihood function, the probablity fucntion is:

def lnprob(theta, x, y, yerr):
    lp = lnprior(theta)
    if not np.isfinite(lp):
        return -np.inf
    return lp + lnlike(theta, x, y, yerr)

# Now implement emcee

ndim, nwalkers, nsteps = len(inspect.getargspec(singinhomobremss)[0])-1, 200, 2000 

# Initialising the walkers in a Gaussian ball around maximum likelihood result
#pos = [result['x'] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)]
pos = [result[0] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)]

sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args = (x,y,yerr))
sampler.run_mcmc(pos, nsteps) # This is the workhorse step.

# Now plotting the walks of the walkers with each step, for each parameter. If they have converged on a good value they should have clumped together.

fig = plt.figure(2,figsize=(10, 10))
fig.clf()
for j in range(ndim):
    ax = fig.add_subplot(ndim,1,j+1)
    ax.plot(np.array([sampler.chain[:,i,j] for i in range(nsteps)]),"k", alpha = 0.3)
    ax.set_ylabel((r'$S_{norm}$',r'$\alpha$',r'$p$',r'$\nu_{peak}$')[j], fontsize = 15)
plt.xlabel('Steps', fontsize = 15)
fig.show()

# To me it looks like the burn in period is well and truly over by 400 steps. So I will exclude those. 
print 'The burnin applied was 400. Make sure the walkers have converged after that many steps.'
samples = sampler.chain[:,400:,:].reshape((-1,ndim))

# Plotting the histograms of the fit.
trifig = triangle.corner(samples, labels = [r'$S_{norm}$',r'$\alpha$',r'$p$',r'$\nu_{peak}$'])

# Finally to get the 1 sigma final uncertainties you do
S_norm_singinhomobremss_mcmc, alpha_singinhomobremss_mcmc, p_singinhomobremss_mcmc, freq_peak_singinhomobremss_mcmc = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]), zip(*np.percentile(samples,[16,50,84], axis = 0))) # Uncertainites based on the 16th, 50th and 84th percentile.

plt.figure()
plt.clf()
plt.errorbar(nu, flux, flux_err, marker = '.', color = 'gray', linestyle='none', label = 'Data',alpha=0.2)  
plt.loglog(nu_peaklong,singinhomobremss(nu_peaklong, *poptsinginhomobremss), 'saddlebrown',label="Best fit inhomogeneous model from least-square")
plt.loglog(nu_peaklong,singinhomobremss(nu_peaklong,S_norm_singinhomobremss_mcmc[0], alpha_singinhomobremss_mcmc[0], p_singinhomobremss_mcmc[0], freq_peak_singinhomobremss_mcmc[0]),color = 'r', linestyle='-', label="Best fit inhomogeneous free-free model.")
plt.title('PKS 1718-649 Epoch - '+filename, fontsize = 15)
minnu = np.array(min(nu))-0.05*np.array(min(nu))
plt.legend(loc='lower center', fontsize=10) # make a legend in the best location
plt.xlabel('Frequency (GHz)', fontsize = 15)
plt.ylabel('Flux Density (Jy)', fontsize = 15)
plt.axis([min(nu)-0.1*min(nu), max(nu)+0.1*max(nu), min(flux)-0.1*min(flux), max(flux)+0.1*max(flux)])
plt.rc('xtick',labelsize=15)
plt.rc('ytick',labelsize=15)
plt.xticks([2,4,6,8,10]) #Setting grid line positions.
plt.yticks([3,4,5])
plt.grid(True)
plt.show()

将numpy导入为np
将matplotlib.pyplot作为plt导入
导入scipy.optimize作为选项
将scipy.special导入为special#以访问不完整的gamma函数。
进口主持
导入三角形
进口检验
def singinhomobremss（频率、S_范数、α、p、频率峰值）：#单一非均匀自由-自由发射模型
返回S_范数*（p+1）*（np.功率（（频率/频率峰值），（2.1*（p+1）-alpha））*特殊.gammainc（（p+1），（频率/频率峰值）**（-2.1））*特殊.gamma（p+1）
#猜测是否合适
p0guess=[7,0.6，-0.5,3.5]
#对数似然函数的定义
def lnlike（θ，x，y，yerr）：
S_范数，α，p，频率峰值=θ
模型=singinhomobremss（x，S_范数，α，p，频率峰值）
投资西格玛=1.0/（年**2）
返回值-0.5*（np.总和（（y型）**2*库存西格玛-np.对数（库存西格玛）））
#使用scipy.opt模型找到该似然函数的最佳值
nll=lambda*args:-lnlike（*args）
结果=opt.fmin（nll，p0guess，args=（x，y，yerr），完整输出='true'）
S_norm_ml，alpha_ml，p_ml，freq_peak_ml=结果[0]
#定义先验
def lnprior（θ）：
S_范数，α，p，频率峰值=θ
如果S_norm_ml/100.

我已经解决了这个问题！只要在这里写下解决方案，以备将来有人遇到类似情况时参考。步行者没有步行，因为lnprob是-inf。解决方案实际上是补救性的，没有任何实际效果