Python 使用pymc3分类分布和具有多个试验的数据

Python 使用pymc3分类分布和具有多个试验的数据,python,bayesian,pymc,pymc3,Python,Bayesian,Pymc,Pymc3,我试图在一个实验的几个试验(或重复运行)中检测切换点(可能会因试验而异)。下面是一个我正在尝试做的虚拟示例: import numpy as np import pymc3 as pm import theano.tensor as tt # Data has 2 trials/independent runs, each with 500 observations (which can be 0, 1 or 2) # Data switches from uniform probabilit

我试图在一个实验的几个试验(或重复运行)中检测切换点(可能会因试验而异)。下面是一个我正在尝试做的虚拟示例:

import numpy as np
import pymc3 as pm
import theano.tensor as tt

# Data has 2 trials/independent runs, each with 500 observations (which can be 0, 1 or 2)
# Data switches from uniform probabilities to (1/6,2/3,1/6) at 250 on both trials
data = np.zeros((2,500))
data[0, :250] = np.random.choice(np.arange(3), size = 250, p = [1/3, 1/3, 1/3])
data[0, 250:] = np.random.choice(np.arange(3), size = 250, p =  [1/6, 2/3, 1/6])
data[1, :250] = np.random.choice(np.arange(3), size = 250, p =  [1/3, 1/3, 1/3])
data[1, 250:] = np.random.choice(np.arange(3), size = 250, p =  [1/6, 2/3, 1/6])

with pm.Model() as model:
    # 2 emission 'states', each with 3 possible emissions
    p = pm.Dirichlet('p', np.ones(3), shape = (2, 3)) 
    switchpoint = pm.DiscreteUniform('switchpoint', lower = 0, upper = 500)
    state = []  

    for i in range(2):
        state.append(tt.switch(switchpoint >= np.arange(500), 0, 1))
    obs = pm.Categorical('obs', p = p[state], observed = data)
然而,分类分布似乎不能处理同一数据集中的多个重复试验,当分布试图将概率总和为1时,我得到一个错误:

ValueError                                Traceback (most recent call last)
<ipython-input-27-7ccdbe0bf9c9> in <module>()
      1 with model:
----> 2         obs = pm.Categorical('obs', p = p[state], observed = data)

/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
     34                 raise TypeError("observed needs to be data but got: {}".format(type(data)))
     35             total_size = kwargs.pop('total_size', None)
---> 36             dist = cls.dist(*args, **kwargs)
     37             return model.Var(name, dist, data, total_size)
     38         else:

/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/distribution.py in dist(cls, *args, **kwargs)
     45     def dist(cls, *args, **kwargs):
     46         dist = object.__new__(cls)
---> 47         dist.__init__(*args, **kwargs)
     48         return dist
     49 

/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/discrete.py in __init__(self, p, *args, **kwargs)
    433             self.k = tt.shape(p)[-1]
    434         self.p = p = tt.as_tensor_variable(p)
--> 435         self.p = (p.T / tt.sum(p, -1)).T
    436         self.mode = tt.argmax(p)
    437 

/home/narendra/anaconda3/lib/python3.6/site-packages/theano/tensor/var.py in __truediv__(self, other)
    202 
    203     def __truediv__(self, other):
--> 204         return theano.tensor.basic.true_div(self, other)
    205 
    206     def __floordiv__(self, other):

/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in __call__(self, *inputs, **kwargs)
    666                 thunk.outputs = [storage_map[v] for v in node.outputs]
    667 
--> 668                 required = thunk()
    669                 assert not required  # We provided all inputs
    670 

/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in rval()
    881 
    882         def rval():
--> 883             fill_storage()
    884             for o in node.outputs:
    885                 compute_map[o][0] = True

/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/cc.py in __call__(self)
   1705                 print(self.error_storage, file=sys.stderr)
   1706                 raise
-> 1707             reraise(exc_type, exc_value, exc_trace)
   1708 
   1709 

/home/narendra/anaconda3/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
    684         if value.__traceback__ is not tb:
    685             raise value.with_traceback(tb)
--> 686         raise value
    687 
    688 else:

ValueError: Input dimension mis-match. (input[0].shape[1] = 500, input[1].shape[1] = 2)
ValueError回溯(最近一次调用)
在()
1、型号:
---->2 obs=pm.分类('obs',p=p[状态],观察值=数据)
/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/distribution.py in新
34 raise TypeError(“观察到的需要是数据,但得到:{}”。格式(类型(数据)))
35总尺寸=kwargs.pop(“总尺寸”,无)
--->36 dist=cls.dist(*args,**kwargs)
37返回模型.Var(名称、地区、数据、总大小)
38其他:
/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/distributions/distribution.py(cls,*args,**kwargs)
45 def区(cls、*ARG、**kwargs):
46 dist=对象。\uuuu新建\uuuuuu(cls)
--->47区初始值(*args,**kwargs)
48返回区
49
/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/discrete.py in uuuuuu init_uuuu(self,p,*args,**kwargs)
433 self.k=tt.shape(p)[-1]
434 self.p=p=tt.as\u tensor\u变量(p)
-->435 self.p=(p.T/tt.sum(p,-1)).T
436 self.mode=tt.argmax(p)
437
/home/narendra/anaconda3/lib/python3.6/site-packages/theano/tensor/var.py in\uuuu\u\u truediv\uuuu(self,other)
202
203 def uu truediv uu(自身、其他):
-->204返回无张量基本真div(self,other)
205
206定义地板(自身、其他):
/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in.\uuuuu调用(self,*输入,**kwargs)
666 thunk.outputs=[节点.outputs中v的存储映射[v]
667
-->668必需=thunk()
669断言不需要#我们提供了所有输入
670
/rval()中的home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/op.py
881
882 def rval():
-->883填充存储()
884用于节点中的o。输出:
885计算映射[o][0]=真
/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/cc.py in\uuuuuuuu call\uuuuuuu(self)
1705打印(self.error\u存储,file=sys.stderr)
1706提高
->1707重放(exc_类型、exc_值、exc_跟踪)
1708
1709
/home/narendra/anaconda3/lib/python3.6/site-packages/six.py在reraise中(tp,value,tb)
684如果值.\uuuu回溯\uuuuuu不是tb:
685带回溯(tb)的提升值
-->686提高价值
687
688其他:
ValueError:输入维度不匹配。(输入[0]。形状[1]=500,输入[1]。形状[1]=2)
在pymc3的几个试验中,有没有办法处理分类排放?单独分析每个试验对我来说不是一个选择,因为我想用实际数据构建一个层次模型,在多次重复试验中在不同类型的状态之间切换,每个状态都有其自己的分类发射概率