Python ';尺寸编号错误';Theano-LSTM中的错误

Python ';尺寸编号错误';Theano-LSTM中的错误,python,numpy,deep-learning,theano,lstm,Python,Numpy,Deep Learning,Theano,Lstm,我正在尝试为自己的数据重新创建LSTM示例 Traceback (most recent call last): File "lstm.py", line 124, in <module> train_rnn(train_data) File "lstm.py", line 120, in train_rnn train_cost = learn_rnn_fn(i, o) File "/usr/local/lib/python3.5/site-packag

我正在尝试为自己的数据重新创建LSTM示例

Traceback (most recent call last):
  File "lstm.py", line 124, in <module>
    train_rnn(train_data)
  File "lstm.py", line 120, in train_rnn
    train_cost = learn_rnn_fn(i, o)
  File "/usr/local/lib/python3.5/site-packages/theano/compile/function_module.py", line 788, in __call__
    allow_downcast=s.allow_downcast)
  File "/usr/local/lib/python3.5/site-packages/theano/tensor/type.py", line 178, in filter
    data.shape))
TypeError: ('Bad input argument to theano function with name "lstm.py:108" at index 0 (0-based)', 'Wrong number of dimensions: expected 2, got 0 with shape ().')
train_data
在该转换后成为二维numpy矩阵

sigma = lambda x: 1 / (1 + T.exp(-x))
act = T.tanh

def one_lstm_step(x_t, h_tm1, c_tm1, W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xy, W_ho, W_cy, b_o, W_hy, b_y):
    i_t = sigma(theano.dot(x_t, W_xi) + theano.dot(h_tm1, W_hi) + theano.dot(c_tm1, W_ci) + b_i)
    f_t = sigma(theano.dot(x_t, W_xf) + theano.dot(h_tm1, W_hf) + theano.dot(c_tm1, W_cf) + b_f)
    c_t = f_t * c_tm1 + i_t * act(theano.dot(x_t, W_xc) + theano.dot(h_tm1, W_hc) + b_c) 
    o_t = sigma(theano.dot(x_t, W_xo)+ theano.dot(h_tm1, W_ho) + theano.dot(c_t, W_co)  + b_o)
    h_t = o_t * act(c_t)
    y_t = sigma(theano.dot(h_t, W_hy) + b_y) 
    return [h_t, c_t, y_t]

def sample_weights(sizeX, sizeY):
    values = np.ndarray([sizeX, sizeY], dtype=dtype)
    for dx in range(sizeX):
        vals = np.random.uniform(low=-1., high=1.,  size=(sizeY,))
        values[dx,:] = vals
    _,svs,_ = np.linalg.svd(values)                      
    values = values / svs[0]
    return values

n_in = 1
n_hidden = n_i = n_c = n_o = n_f = 10
n_y = 1

W_xi = theano.shared(sample_weights(n_in, n_i))
W_hi = theano.shared(sample_weights(n_hidden, n_i))  
W_ci = theano.shared(sample_weights(n_c, n_i))  
b_i = theano.shared(np.cast[dtype](np.random.uniform(-0.5,.5,size = n_i)))
W_xf = theano.shared(sample_weights(n_in, n_f)) 
W_hf = theano.shared(sample_weights(n_hidden, n_f))
W_cf = theano.shared(sample_weights(n_c, n_f))
b_f = theano.shared(np.cast[dtype](np.random.uniform(0, 1.,size = n_f)))
W_xc = theano.shared(sample_weights(n_in, n_c))  
W_hc = theano.shared(sample_weights(n_hidden, n_c))
b_c = theano.shared(np.zeros(n_c, dtype=dtype))
W_xo = theano.shared(sample_weights(n_in, n_o))
W_ho = theano.shared(sample_weights(n_hidden, n_o))
W_co = theano.shared(sample_weights(n_c, n_o))
b_o = theano.shared(np.cast[dtype](np.random.uniform(-0.5,.5,size = n_o)))
W_hy = theano.shared(sample_weights(n_hidden, n_y))
b_y = theano.shared(np.zeros(n_y, dtype=dtype))
c0 = theano.shared(np.zeros(n_hidden, dtype=dtype))
h0 = T.tanh(c0)

params = [W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y, c0]
v = T.matrix(dtype=dtype)
target = T.matrix(dtype=dtype)

[h_vals, _, y_vals], _ = theano.scan(fn=one_lstm_step, 
                                    sequences = dict(input=v, taps=[0]), 
                                    outputs_info = [h0, c0, None ], # corresponds to return type of fn
                                    non_sequences = [W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y] )


cost = -T.mean(target * T.log(y_vals)+ (1.- target) * T.log(1. - y_vals))

updates=[]

learn_rnn_fn = theano.function(inputs = [v, target],
                                outputs = cost,
                                updates = updates)

nb_epochs=1
train_errors = np.ndarray(nb_epochs)

def train_rnn(train_data):
    for x in range(nb_epochs):
        error = 0.
        print(train_data)
        for j in range(len(train_data)):
            index = np.random.randint(0, len(train_data))
            i, o = train_data[index]
            train_cost = learn_rnn_fn(i, o)
            error += train_cost
        train_errors[x] = error

train_rnn(train_data)

调试表明变量
i
o
的形状不合适。我试图重塑数据,但这会导致其他数据类型问题。

函数create\u dataset返回一个numpy数组。然而,当调用i,o=train_data[index]时,您试图获得两个值。例如,您可以将值分配给一个时态变量,然后根据需要拆分它

编辑
变量
i
o
与函数
learn\n\u fn
所期望的类型不同。它需要numpy矩阵。

函数create\u dataset返回一个numpy数组。然而,当调用i,o=train_data[index]时,您试图获得两个值。例如,您可以将值分配给一个时态变量,然后根据需要拆分它

编辑
变量
i
o
与函数
learn\n\u fn
所期望的类型不同。它需要numpy矩阵。

是否尝试复制错误?事实上,我可以得到
I
o
的VAPE,但问题在于
I
o
的格式我无法调试(我应该做什么重塑)。我现在没有Theano,你能打印
I
o
的形状吗?尽管你的回答不完全正确,这让我思考,我犯了一个愚蠢的错误。我的模型需要矩阵输入,而我提供的是标量。所以我把
train\u cost=learn\u fn(I,o)
改为
train\u cost=learn\u rnn\u fn(np.matrix(I),np.matrix([o])
。你能更新你的答案吗,这样我就可以接受了。否则,我将需要写一个新的答案。完成!很高兴你发现了你的错误!你试过重现这个错误吗?事实上,我可以得到
I
o
的VAPE,但问题在于
I
o
的格式我无法调试(我应该做什么重塑)。我现在没有Theano,你能打印
I
o
的形状吗?尽管你的回答不完全正确,这让我思考,我犯了一个愚蠢的错误。我的模型需要矩阵输入,而我提供的是标量。所以我把
train\u cost=learn\u fn(I,o)
改为
train\u cost=learn\u rnn\u fn(np.matrix(I),np.matrix([o])
。你能更新你的答案吗,这样我就可以接受了。否则,我将需要写一个新的答案。完成!很高兴你发现了你的错误!
sigma = lambda x: 1 / (1 + T.exp(-x))
act = T.tanh

def one_lstm_step(x_t, h_tm1, c_tm1, W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xy, W_ho, W_cy, b_o, W_hy, b_y):
    i_t = sigma(theano.dot(x_t, W_xi) + theano.dot(h_tm1, W_hi) + theano.dot(c_tm1, W_ci) + b_i)
    f_t = sigma(theano.dot(x_t, W_xf) + theano.dot(h_tm1, W_hf) + theano.dot(c_tm1, W_cf) + b_f)
    c_t = f_t * c_tm1 + i_t * act(theano.dot(x_t, W_xc) + theano.dot(h_tm1, W_hc) + b_c) 
    o_t = sigma(theano.dot(x_t, W_xo)+ theano.dot(h_tm1, W_ho) + theano.dot(c_t, W_co)  + b_o)
    h_t = o_t * act(c_t)
    y_t = sigma(theano.dot(h_t, W_hy) + b_y) 
    return [h_t, c_t, y_t]

def sample_weights(sizeX, sizeY):
    values = np.ndarray([sizeX, sizeY], dtype=dtype)
    for dx in range(sizeX):
        vals = np.random.uniform(low=-1., high=1.,  size=(sizeY,))
        values[dx,:] = vals
    _,svs,_ = np.linalg.svd(values)                      
    values = values / svs[0]
    return values

n_in = 1
n_hidden = n_i = n_c = n_o = n_f = 10
n_y = 1

W_xi = theano.shared(sample_weights(n_in, n_i))
W_hi = theano.shared(sample_weights(n_hidden, n_i))  
W_ci = theano.shared(sample_weights(n_c, n_i))  
b_i = theano.shared(np.cast[dtype](np.random.uniform(-0.5,.5,size = n_i)))
W_xf = theano.shared(sample_weights(n_in, n_f)) 
W_hf = theano.shared(sample_weights(n_hidden, n_f))
W_cf = theano.shared(sample_weights(n_c, n_f))
b_f = theano.shared(np.cast[dtype](np.random.uniform(0, 1.,size = n_f)))
W_xc = theano.shared(sample_weights(n_in, n_c))  
W_hc = theano.shared(sample_weights(n_hidden, n_c))
b_c = theano.shared(np.zeros(n_c, dtype=dtype))
W_xo = theano.shared(sample_weights(n_in, n_o))
W_ho = theano.shared(sample_weights(n_hidden, n_o))
W_co = theano.shared(sample_weights(n_c, n_o))
b_o = theano.shared(np.cast[dtype](np.random.uniform(-0.5,.5,size = n_o)))
W_hy = theano.shared(sample_weights(n_hidden, n_y))
b_y = theano.shared(np.zeros(n_y, dtype=dtype))
c0 = theano.shared(np.zeros(n_hidden, dtype=dtype))
h0 = T.tanh(c0)

params = [W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y, c0]
v = T.matrix(dtype=dtype)
target = T.matrix(dtype=dtype)

[h_vals, _, y_vals], _ = theano.scan(fn=one_lstm_step, 
                                    sequences = dict(input=v, taps=[0]), 
                                    outputs_info = [h0, c0, None ], # corresponds to return type of fn
                                    non_sequences = [W_xi, W_hi, W_ci, b_i, W_xf, W_hf, W_cf, b_f, W_xc, W_hc, b_c, W_xo, W_ho, W_co, b_o, W_hy, b_y] )


cost = -T.mean(target * T.log(y_vals)+ (1.- target) * T.log(1. - y_vals))

updates=[]

learn_rnn_fn = theano.function(inputs = [v, target],
                                outputs = cost,
                                updates = updates)

nb_epochs=1
train_errors = np.ndarray(nb_epochs)

def train_rnn(train_data):
    for x in range(nb_epochs):
        error = 0.
        print(train_data)
        for j in range(len(train_data)):
            index = np.random.randint(0, len(train_data))
            i, o = train_data[index]
            train_cost = learn_rnn_fn(i, o)
            error += train_cost
        train_errors[x] = error

train_rnn(train_data)