Python 使用pymc3分类分布和具有多个试验的数据
我试图在一个实验的几个试验(或重复运行)中检测切换点(可能会因试验而异)。下面是一个我正在尝试做的虚拟示例:Python 使用pymc3分类分布和具有多个试验的数据,python,bayesian,pymc,pymc3,Python,Bayesian,Pymc,Pymc3,我试图在一个实验的几个试验(或重复运行)中检测切换点(可能会因试验而异)。下面是一个我正在尝试做的虚拟示例: import numpy as np import pymc3 as pm import theano.tensor as tt # Data has 2 trials/independent runs, each with 500 observations (which can be 0, 1 or 2) # Data switches from uniform probabilit
import numpy as np
import pymc3 as pm
import theano.tensor as tt
# Data has 2 trials/independent runs, each with 500 observations (which can be 0, 1 or 2)
# Data switches from uniform probabilities to (1/6,2/3,1/6) at 250 on both trials
data = np.zeros((2,500))
data[0, :250] = np.random.choice(np.arange(3), size = 250, p = [1/3, 1/3, 1/3])
data[0, 250:] = np.random.choice(np.arange(3), size = 250, p = [1/6, 2/3, 1/6])
data[1, :250] = np.random.choice(np.arange(3), size = 250, p = [1/3, 1/3, 1/3])
data[1, 250:] = np.random.choice(np.arange(3), size = 250, p = [1/6, 2/3, 1/6])
with pm.Model() as model:
# 2 emission 'states', each with 3 possible emissions
p = pm.Dirichlet('p', np.ones(3), shape = (2, 3))
switchpoint = pm.DiscreteUniform('switchpoint', lower = 0, upper = 500)
state = []
for i in range(2):
state.append(tt.switch(switchpoint >= np.arange(500), 0, 1))
obs = pm.Categorical('obs', p = p[state], observed = data)
然而,分类分布似乎不能处理同一数据集中的多个重复试验,当分布试图将概率总和为1时,我得到一个错误:
ValueError Traceback (most recent call last)
<ipython-input-27-7ccdbe0bf9c9> in <module>()
1 with model:
----> 2 obs = pm.Categorical('obs', p = p[state], observed = data)
/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
34 raise TypeError("observed needs to be data but got: {}".format(type(data)))
35 total_size = kwargs.pop('total_size', None)
---> 36 dist = cls.dist(*args, **kwargs)
37 return model.Var(name, dist, data, total_size)
38 else:
/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/distribution.py in dist(cls, *args, **kwargs)
45 def dist(cls, *args, **kwargs):
46 dist = object.__new__(cls)
---> 47 dist.__init__(*args, **kwargs)
48 return dist
49
/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/discrete.py in __init__(self, p, *args, **kwargs)
433 self.k = tt.shape(p)[-1]
434 self.p = p = tt.as_tensor_variable(p)
--> 435 self.p = (p.T / tt.sum(p, -1)).T
436 self.mode = tt.argmax(p)
437
/home/narendra/anaconda3/lib/python3.6/site-packages/theano/tensor/var.py in __truediv__(self, other)
202
203 def __truediv__(self, other):
--> 204 return theano.tensor.basic.true_div(self, other)
205
206 def __floordiv__(self, other):
/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in __call__(self, *inputs, **kwargs)
666 thunk.outputs = [storage_map[v] for v in node.outputs]
667
--> 668 required = thunk()
669 assert not required # We provided all inputs
670
/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in rval()
881
882 def rval():
--> 883 fill_storage()
884 for o in node.outputs:
885 compute_map[o][0] = True
/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/cc.py in __call__(self)
1705 print(self.error_storage, file=sys.stderr)
1706 raise
-> 1707 reraise(exc_type, exc_value, exc_trace)
1708
1709
/home/narendra/anaconda3/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
685 raise value.with_traceback(tb)
--> 686 raise value
687
688 else:
ValueError: Input dimension mis-match. (input[0].shape[1] = 500, input[1].shape[1] = 2)
ValueError回溯(最近一次调用)
在()
1、型号:
---->2 obs=pm.分类('obs',p=p[状态],观察值=数据)
/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/distribution.py in新
34 raise TypeError(“观察到的需要是数据,但得到:{}”。格式(类型(数据)))
35总尺寸=kwargs.pop(“总尺寸”,无)
--->36 dist=cls.dist(*args,**kwargs)
37返回模型.Var(名称、地区、数据、总大小)
38其他:
/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/distributions/distribution.py(cls,*args,**kwargs)
45 def区(cls、*ARG、**kwargs):
46 dist=对象。\uuuu新建\uuuuuu(cls)
--->47区初始值(*args,**kwargs)
48返回区
49
/home/narendra/anaconda3/lib/python3.6/site-packages/pymc3-3.0-py3.6.egg/pymc3/distributions/discrete.py in uuuuuu init_uuuu(self,p,*args,**kwargs)
433 self.k=tt.shape(p)[-1]
434 self.p=p=tt.as\u tensor\u变量(p)
-->435 self.p=(p.T/tt.sum(p,-1)).T
436 self.mode=tt.argmax(p)
437
/home/narendra/anaconda3/lib/python3.6/site-packages/theano/tensor/var.py in\uuuu\u\u truediv\uuuu(self,other)
202
203 def uu truediv uu(自身、其他):
-->204返回无张量基本真div(self,other)
205
206定义地板(自身、其他):
/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in.\uuuuu调用(self,*输入,**kwargs)
666 thunk.outputs=[节点.outputs中v的存储映射[v]
667
-->668必需=thunk()
669断言不需要#我们提供了所有输入
670
/rval()中的home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/op.py
881
882 def rval():
-->883填充存储()
884用于节点中的o。输出:
885计算映射[o][0]=真
/home/narendra/anaconda3/lib/python3.6/site-packages/theano/gof/cc.py in\uuuuuuuu call\uuuuuuu(self)
1705打印(self.error\u存储,file=sys.stderr)
1706提高
->1707重放(exc_类型、exc_值、exc_跟踪)
1708
1709
/home/narendra/anaconda3/lib/python3.6/site-packages/six.py在reraise中(tp,value,tb)
684如果值.\uuuu回溯\uuuuuu不是tb:
685带回溯(tb)的提升值
-->686提高价值
687
688其他:
ValueError:输入维度不匹配。(输入[0]。形状[1]=500,输入[1]。形状[1]=2)
在pymc3的几个试验中,有没有办法处理分类排放?单独分析每个试验对我来说不是一个选择,因为我想用实际数据构建一个层次模型,在多次重复试验中在不同类型的状态之间切换,每个状态都有其自己的分类发射概率