Python 将高斯混合转换为PyMC3

Python 将高斯混合转换为PyMC3,python,mcmc,pymc,Python,Mcmc,Pymc,我正在学习PyMC3,我想做一个简单的高斯混合的例子。我找到了一个示例,并希望将其转换为pymc3,但我当前在尝试绘制traceplot时遇到了一个错误 n1 = 500 n2 = 200 n = n1+n2 mean1 = 21.8 mean2 = 42.0 precision = 0.1 sigma = np.sqrt(1 / precision) # precision = 1/sigma^2 print "sigma1: %s" % sigma1 print "sigma2: %

我正在学习PyMC3,我想做一个简单的高斯混合的例子。我找到了一个示例,并希望将其转换为pymc3,但我当前在尝试绘制traceplot时遇到了一个错误

n1 = 500
n2 = 200
n = n1+n2

mean1 = 21.8
mean2 = 42.0

precision = 0.1

sigma = np.sqrt(1 / precision)

# precision = 1/sigma^2
print "sigma1: %s" % sigma1
print "sigma2: %s" % sigma2

data1 = np.random.normal(mean1,sigma,n1)
data2 = np.random.normal(mean2,sigma,n2)

data = np.concatenate([data1 , data2])
#np.random.shuffle(data)

fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, xlabel='x', ylabel='y', title='mixture of 2    guassians')
ax.plot(range(0,n1+n2), data, 'x', label='data')
plt.legend(loc=0)

with pm.Model() as model:
    #priors
    p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2

    ber = pm.Bernoulli( "ber", p = p) # produces 1 with proportion p.

    precision = pm.Gamma('precision', alpha=0.1, beta=0.1)

    mean1 = pm.Normal( "mean1", 0, 0.01 ) #better to use normals versus Uniforms (unless you are certain the value is  truncated at 0 and 200 
    mean2 = pm.Normal( "mean2", 0, 0.01 )

    mean = pm.Deterministic('mean', ber*mean1 + (1-ber)*mean2)

    process = pm.Normal('process', mu=mean, tau=precision, observed=data)

    # inference
    step = pm.Metropolis()
    trace = pm.sample(10000, step)
    pm.traceplot(trace)
错误:

sigma1: 3.16227766017
sigma2: 1.69030850946
 [-----------------100%-----------------] 10000 of 10000 complete in 4.4 sec
---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-10-eb728824de83> in <module>()
     44     step = pm.Metropolis()
     45     trace = pm.sample(10000, step)
---> 46     pm.traceplot(trace)

/usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc in traceplot(trace, vars, figsize, lines, combined, grid)
     70                 ax[i, 0].set_xlim(mind - .5, maxd + .5)
     71             else:
---> 72                 kdeplot_op(ax[i, 0], d)
     73             ax[i, 0].set_title(str(v))
     74             ax[i, 0].grid(grid)

/usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc in kdeplot_op(ax, data)
     94     for i in range(data.shape[1]):
     95         d = data[:, i]
---> 96         density = kde.gaussian_kde(d)
     97         l = np.min(d)
     98         u = np.max(d)

/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in __init__(self, dataset, bw_method)
    186 
    187         self.d, self.n = self.dataset.shape
--> 188         self.set_bandwidth(bw_method=bw_method)
    189 
    190     def evaluate(self, points):

/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in set_bandwidth(self, bw_method)
    496             raise ValueError(msg)
    497 
--> 498         self._compute_covariance()
    499 
    500     def _compute_covariance(self):

/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in _compute_covariance(self)
    507             self._data_covariance = atleast_2d(np.cov(self.dataset, rowvar=1,
    508                                                bias=False))
--> 509             self._data_inv_cov = linalg.inv(self._data_covariance)
    510 
    511         self.covariance = self._data_covariance * self.factor**2

/usr/lib64/python2.7/site-packages/scipy/linalg/basic.pyc in inv(a, overwrite_a, check_finite)
    381         inv_a, info = getri(lu, piv, lwork=lwork, overwrite_lu=1)
    382     if info > 0:
--> 383         raise LinAlgError("singular matrix")
    384     if info < 0:
    385         raise ValueError('illegal value in %d-th argument of internal '

LinAlgError: singular matrix
sigma1:3.16227766017
sigma2:1.69030850946
[--------------100%--------------10000个,其中10000个在4.4秒内完成
---------------------------------------------------------------------------
LinalError回溯(最近一次调用上次)
在()
44步骤=pm.Metropolis()
45道=pm样品(10000步)
--->下午46点追踪图(追踪)
/traceplot中的usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc(trace、vars、figsize、line、combined、grid)
70 ax[i,0]。设置_xlim(注意-0.5,最大+0.5)
71.其他:
--->72 kdeplot_op(ax[i,0],d)
73 ax[i,0]。设置标题(str(v))
74 ax[i,0]。栅格(栅格)
/kdeplot_op中的usr/lib/python2.7/site-packages/pymc-3.0-py2.7.egg/pymc/plots.pyc(ax,数据)
94表示范围内的i(data.shape[1]):
95 d=数据[:,i]
--->96密度=kDa.高斯密度(d)
97升=np最小值(d)
98 u=np.最大值(d)
/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in____________(self,dataset,bw_方法)
186
187 self.d,self.n=self.dataset.shape
-->188自行设置带宽(bw_方法=bw_方法)
189
190 def评估(自我,分数):
/设置带宽中的usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc(self,bw_方法)
496提升值错误(msg)
497
-->498自我计算协方差()
499
500 def_计算协方差(自):
/usr/lib64/python2.7/site-packages/scipy/stats/kde.pyc in_compute_convariance(self)
507 self.\u数据\u协方差=至少\u 2d(np.cov(self.dataset,rowvar=1,
508偏差=错误)
-->509自整数据自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量自整变量
510
511自协方差=自数据自协方差*自系数**2
/inv中的usr/lib64/python2.7/site-packages/scipy/linalg/basic.pyc(a,覆盖a,检查有限)
381 inv_a,info=getri(lu,piv,lwork=lwork,overwrite_lu=1)
382如果信息>0:
-->383误差(“奇异矩阵”)
384如果信息<0:
385 raise value ERROR('内部的第%d个参数中的值非法'
线性误差:奇异矩阵

感谢Fonnesbeck在github问题追踪器上回答了这个问题:

以下是更新的代码:

with pm.Model() as model:
    #priors
    p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2

    ber = pm.Bernoulli( "ber", p = p, shape=len(data)) # produces 1 with proportion p.

    sigma = pm.Uniform('sigma', 0, 100)
    precision = sigma**-2

    mean = pm.Normal( "mean", 0, 0.01, shape=2 )

    mu = pm.Deterministic('mu', mean[ber])

    process = pm.Normal('process', mu=mu, tau=precision, observed=data)

with model:
    step1 = pm.Metropolis([p, sigma, mean])
    step2 = pm.BinaryMetropolis([ber])
    trace = pm.sample(10000, [step1, step2])

在推断伯努利随机变量时,需要使用BinaryMetropolis

,感谢Fonnesbeck在github问题追踪器上回答了这个问题:

以下是更新的代码:

with pm.Model() as model:
    #priors
    p = pm.Uniform( "p", 0 , 1) #this is the fraction that come from mean1 vs mean2

    ber = pm.Bernoulli( "ber", p = p, shape=len(data)) # produces 1 with proportion p.

    sigma = pm.Uniform('sigma', 0, 100)
    precision = sigma**-2

    mean = pm.Normal( "mean", 0, 0.01, shape=2 )

    mu = pm.Deterministic('mu', mean[ber])

    process = pm.Normal('process', mu=mu, tau=precision, observed=data)

with model:
    step1 = pm.Metropolis([p, sigma, mean])
    step2 = pm.BinaryMetropolis([ber])
    trace = pm.sample(10000, [step1, step2])

在推断伯努利随机变量时,您需要使用BinaryMetropolis,我知道这个问题已经过时了,但我正在尝试不同的PyMC3用法示例,以适应PyMC3中的建模。上面给出的答案在PyMC3的当前版本1.0中不起作用(它没有正确区分这两种方法)。为使其正常工作,我必须做的最小更改如下:

1)
# mean = pm.Normal("mean", 0, 0.01, shape=2 )
mean = pm.Uniform('mean', 15, 60, shape=2)
2)
# step2 = pm.BinaryMetropolis([ber])
step2 = pm.ElemwiseCategorical(vars=[ber], values=[0, 1])

以防万一,其他人也有类似的问题。

我知道这个问题很老了,但我正在尝试不同的PyMC3用法示例,以适应PyMC3中的建模。上面给出的答案在PyMC3的当前版本1.0中不起作用(它没有正确区分这两种方法)。为使其正常工作,我必须做的最小更改如下:

1)
# mean = pm.Normal("mean", 0, 0.01, shape=2 )
mean = pm.Uniform('mean', 15, 60, shape=2)
2)
# step2 = pm.BinaryMetropolis([ber])
step2 = pm.ElemwiseCategorical(vars=[ber], values=[0, 1])

以防其他人也有类似的问题。

更简单、更快的版本如下:

with pm.Model() as model2:
    p = pm.Beta( "p", 1., 1.)    
    means = pm.Uniform('mean', 15, 60, shape=2)
    sigma = pm.Uniform('sigma', 0, 20, testval=5)

    process = pm.NormalMixture('obs', tt.stack([p, 1-p]), means, sd=sigma, observed=data)

with model2:
    step = pm.Metropolis()
    trace = pm.sample(10000, step=step)

更简单、更快捷的版本如下:

with pm.Model() as model2:
    p = pm.Beta( "p", 1., 1.)    
    means = pm.Uniform('mean', 15, 60, shape=2)
    sigma = pm.Uniform('sigma', 0, 20, testval=5)

    process = pm.NormalMixture('obs', tt.stack([p, 1-p]), means, sd=sigma, observed=data)

with model2:
    step = pm.Metropolis()
    trace = pm.sample(10000, step=step)