Python 使用RNN和层类在Keras中实现最小LSTMCell

Python 使用RNN和层类在Keras中实现最小LSTMCell,python,tensorflow,keras,lstm,Python,Tensorflow,Keras,Lstm,我正在尝试实现一个简单的LSTMCell,而不使用tf.keras.layers.LSTMCell类中默认实现的“fancy kwargs”,它遵循如下示意图模型。它实际上没有直接的目的,我只是想练习实现一个比示例部分中描述的更复杂的RNNCell。我的代码如下: from keras import Input from keras.layers import Layer, RNN from keras.models import Model import keras.backend as K

我正在尝试实现一个简单的LSTMCell,而不使用tf.keras.layers.LSTMCell类中默认实现的“fancy kwargs”,它遵循如下示意图模型。它实际上没有直接的目的,我只是想练习实现一个比示例部分中描述的更复杂的RNNCell。我的代码如下:

from keras import Input
from keras.layers import Layer, RNN
from keras.models import Model
import keras.backend as K

class CustomLSTMCell(Layer):

    def __init__(self, units, **kwargs):
        self.state_size = units
        super(CustomLSTMCell, self).__init__(**kwargs)

    def build(self, input_shape):

        self.forget_w = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
                                    initializer='uniform',
                                    name='forget_w')
        self.forget_b = self.add_weight(shape=(self.state_size,),
                                    initializer='uniform',
                                    name='forget_b')

        self.input_w1 = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
                                    initializer='uniform',
                                    name='input_w1')
        self.input_b1 = self.add_weight(shape=(self.state_size,),
                                    initializer='uniform',
                                    name='input_b1')
        self.input_w2 = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
                                    initializer='uniform',
                                    name='input_w2')
        self.input_b2 = self.add_weight(shape=(self.state_size,),
                                    initializer='uniform',
                                    name='input_b2')

        self.output_w = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
                                    initializer='uniform',
                                    name='output_w')
        self.output_b = self.add_weight(shape=(self.state_size,),
                                    initializer='uniform',
                                    name='output_b')

        self.built = True

    def merge_with_state(self, inputs):
        self.stateH = K.concatenate([self.stateH, inputs], axis=-1)

    def forget_gate(self):
        forget = K.dot(self.forget_w, self.stateH) + self.forget_b
        forget = K.sigmoid(forget)
        self.stateC = self.stateC * forget

    def input_gate(self):
        candidate = K.dot(self.input_w1, self.stateH) + self.input_b1
        candidate = K.tanh(candidate)

        amount = K.dot(self.input_w2, self.stateH) + self.input_b2
        amount = K.tanh(amount)

        self.stateC = self.stateC + amount * candidate

    def output_gate(self):
        self.stateH = K.dot(self.output_w, self.stateH) + self.output_b
        self.stateH = K.sigmoid(self.stateH)

        self.stateH = self.stateH * K.tanh(self.stateC)

    def call(self, inputs, states):

        self.stateH = states[0]
        self.stateC = states[1]

        self.merge_with_state(inputs)
        self.forget_gate()
        self.input_gate()
        self.output_gate()

        return self.stateH, [self.stateH, self.stateC]

# Testing
inp = Input(shape=(None, 3))
lstm = RNN(CustomLSTMCell(10))(inp)

model = Model(inputs=inp, outputs=lstm)
inp_value = [[[[1,2,3], [2,3,4], [3,4,5]]]]
pred = model.predict(inp_value)
print(pred)
 from keras import Input
 from keras.layers import Layer, RNN
 from keras.models import Model
 import keras.backend as K

 class CustomLSTMCell(Layer):

     def __init__(self, units, **kwargs):
         self.state_size = [units, units]
         super(CustomLSTMCell, self).__init__(**kwargs)

     def build(self, input_shape):

         self.forget_w = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
                                         initializer='uniform',
                                         name='forget_w')
         self.forget_b = self.add_weight(shape=(1, self.state_size[0]),
                                         initializer='uniform',
                                         name='forget_b')

         self.input_w1 = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
                                         initializer='uniform',
                                         name='input_w1')
         self.input_b1 = self.add_weight(shape=(1, self.state_size[0]),
                                         initializer='uniform',
                                         name='input_b1')
         self.input_w2 = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
                                         initializer='uniform',
                                         name='input_w2')
         self.input_b2 = self.add_weight(shape=(1, self.state_size[0],),
                                         initializer='uniform',
                                         name='input_b2')

         self.output_w = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
                                         initializer='uniform',
                                         name='output_w')
         self.output_b = self.add_weight(shape=(1, self.state_size[0],),
                                         initializer='uniform',
                                         name='output_b')

         self.built = True

     def merge_with_state(self, inputs):
         self.stateH = K.concatenate([self.stateH, inputs], axis=-1)

     def forget_gate(self):        
         forget = K.batch_dot(self.forget_w, self.stateH) + self.forget_b
         forget = K.sigmoid(forget)
         self.stateC = self.stateC * forget

     def input_gate(self):
         candidate = K.batch_dot(self.input_w1, self.stateH) + self.input_b1
         candidate = K.tanh(candidate)

         amount = K.batch_dot(self.input_w2, self.stateH) + self.input_b2
         amount = K.sigmoid(amount)

         self.stateC = self.stateC + amount * candidate

     def output_gate(self):
         self.stateH = K.batch_dot(self.output_w, self.stateH) + self.output_b
         self.stateH = K.sigmoid(self.stateH)

         self.stateH = self.stateH * K.tanh(self.stateC)

     def call(self, inputs, states):

         self.stateH = states[0]
         self.stateC = states[1]

         self.merge_with_state(inputs)
         self.forget_gate()
         self.input_gate()
         self.output_gate()

         return self.stateH, [self.stateH, self.stateC]

 inp = Input(shape=(None, 3))
 lstm = RNN(CustomLSTMCell(10))(inp)

 model = Model(inputs=inp, outputs=lstm)
 inp_value = [[[[1,2,3], [2,3,4], [3,4,5]]]]
 pred = model.predict(inp_value)
 print(pred)
但是,当我尝试测试它时,出现了一个异常,并显示以下消息:

IndexError: tuple index out of range
中,在我为
self.stateC
设置值的行中调用
函数。在这里,我认为最初
函数调用的
状态
参数是一个张量,而不是张量列表,所以这就是我出错的原因。因此,我在类
\uuu init\uuuu
中添加了一个
self.ready\u called=False
行,并在
调用
函数中添加了以下部分:

 if not self.already_called:
        self.stateH = K.ones(self.state_size)
        self.stateC = K.ones(self.state_size)
        self.already_called = True
    else:
        self.stateH = states[0]
        self.stateC = states[1]
 ValueError: Shape must be rank 1 but is rank 2 for 'rnn_1/concat' (op: 'ConcatV2') with input shapes: [10], [?,3], [].
希望它能消除这个问题。这导致
merge_with_state
函数出现另一个错误:

 if not self.already_called:
        self.stateH = K.ones(self.state_size)
        self.stateC = K.ones(self.state_size)
        self.already_called = True
    else:
        self.stateH = states[0]
        self.stateC = states[1]
 ValueError: Shape must be rank 1 but is rank 2 for 'rnn_1/concat' (op: 'ConcatV2') with input shapes: [10], [?,3], [].

我真的不明白,因为RNN层应该只“显示”形状为(3)而不是(无,3)的CustomLSTMCell张量,因为轴0是它应该迭代的轴。在这一点上,我确信我做了一些非常错误的事情,应该向社区寻求帮助。基本上,我的问题是:我的代码出了什么问题?如果“几乎所有东西”都出了问题,那么我应该如何从头开始实现LSTMCell?

好的,看来我成功地解决了这个问题。事实证明,阅读文档总是很有用的,在本例中是。首先,
已经调用的
属性是不必要的,因为问题在于
初始化
函数的第一行:
状态大小
属性应该是一个整数列表,而不仅仅是一个整数,如下所示:
self.state大小=[units,units]
(因为我们需要两个状态来表示大小为
单位的LSTM,而不是一个)。当我纠正它时,我得到了一个不同的错误:
忘记门中的张量在维度上不兼容以进行加法。这是因为RNN一次看到整个批次,而不是批次中的每个元素(因此轴0处的
None
形状)。对它的修正是向轴0处大小为1的每个张量添加一个额外的维度,如下所示:

 self.forget_w = self.add_weight(shape=(1, self.state_size, self.state_size + input_shape[-1]),
                                initializer='uniform',
                                name='forget_w')
我不得不使用
K.batch\u dot
函数来代替dot产品。因此,整个工作代码如下所示:

from keras import Input
from keras.layers import Layer, RNN
from keras.models import Model
import keras.backend as K

class CustomLSTMCell(Layer):

    def __init__(self, units, **kwargs):
        self.state_size = units
        super(CustomLSTMCell, self).__init__(**kwargs)

    def build(self, input_shape):

        self.forget_w = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
                                    initializer='uniform',
                                    name='forget_w')
        self.forget_b = self.add_weight(shape=(self.state_size,),
                                    initializer='uniform',
                                    name='forget_b')

        self.input_w1 = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
                                    initializer='uniform',
                                    name='input_w1')
        self.input_b1 = self.add_weight(shape=(self.state_size,),
                                    initializer='uniform',
                                    name='input_b1')
        self.input_w2 = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
                                    initializer='uniform',
                                    name='input_w2')
        self.input_b2 = self.add_weight(shape=(self.state_size,),
                                    initializer='uniform',
                                    name='input_b2')

        self.output_w = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
                                    initializer='uniform',
                                    name='output_w')
        self.output_b = self.add_weight(shape=(self.state_size,),
                                    initializer='uniform',
                                    name='output_b')

        self.built = True

    def merge_with_state(self, inputs):
        self.stateH = K.concatenate([self.stateH, inputs], axis=-1)

    def forget_gate(self):
        forget = K.dot(self.forget_w, self.stateH) + self.forget_b
        forget = K.sigmoid(forget)
        self.stateC = self.stateC * forget

    def input_gate(self):
        candidate = K.dot(self.input_w1, self.stateH) + self.input_b1
        candidate = K.tanh(candidate)

        amount = K.dot(self.input_w2, self.stateH) + self.input_b2
        amount = K.tanh(amount)

        self.stateC = self.stateC + amount * candidate

    def output_gate(self):
        self.stateH = K.dot(self.output_w, self.stateH) + self.output_b
        self.stateH = K.sigmoid(self.stateH)

        self.stateH = self.stateH * K.tanh(self.stateC)

    def call(self, inputs, states):

        self.stateH = states[0]
        self.stateC = states[1]

        self.merge_with_state(inputs)
        self.forget_gate()
        self.input_gate()
        self.output_gate()

        return self.stateH, [self.stateH, self.stateC]

# Testing
inp = Input(shape=(None, 3))
lstm = RNN(CustomLSTMCell(10))(inp)

model = Model(inputs=inp, outputs=lstm)
inp_value = [[[[1,2,3], [2,3,4], [3,4,5]]]]
pred = model.predict(inp_value)
print(pred)
 from keras import Input
 from keras.layers import Layer, RNN
 from keras.models import Model
 import keras.backend as K

 class CustomLSTMCell(Layer):

     def __init__(self, units, **kwargs):
         self.state_size = [units, units]
         super(CustomLSTMCell, self).__init__(**kwargs)

     def build(self, input_shape):

         self.forget_w = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
                                         initializer='uniform',
                                         name='forget_w')
         self.forget_b = self.add_weight(shape=(1, self.state_size[0]),
                                         initializer='uniform',
                                         name='forget_b')

         self.input_w1 = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
                                         initializer='uniform',
                                         name='input_w1')
         self.input_b1 = self.add_weight(shape=(1, self.state_size[0]),
                                         initializer='uniform',
                                         name='input_b1')
         self.input_w2 = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
                                         initializer='uniform',
                                         name='input_w2')
         self.input_b2 = self.add_weight(shape=(1, self.state_size[0],),
                                         initializer='uniform',
                                         name='input_b2')

         self.output_w = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
                                         initializer='uniform',
                                         name='output_w')
         self.output_b = self.add_weight(shape=(1, self.state_size[0],),
                                         initializer='uniform',
                                         name='output_b')

         self.built = True

     def merge_with_state(self, inputs):
         self.stateH = K.concatenate([self.stateH, inputs], axis=-1)

     def forget_gate(self):        
         forget = K.batch_dot(self.forget_w, self.stateH) + self.forget_b
         forget = K.sigmoid(forget)
         self.stateC = self.stateC * forget

     def input_gate(self):
         candidate = K.batch_dot(self.input_w1, self.stateH) + self.input_b1
         candidate = K.tanh(candidate)

         amount = K.batch_dot(self.input_w2, self.stateH) + self.input_b2
         amount = K.sigmoid(amount)

         self.stateC = self.stateC + amount * candidate

     def output_gate(self):
         self.stateH = K.batch_dot(self.output_w, self.stateH) + self.output_b
         self.stateH = K.sigmoid(self.stateH)

         self.stateH = self.stateH * K.tanh(self.stateC)

     def call(self, inputs, states):

         self.stateH = states[0]
         self.stateC = states[1]

         self.merge_with_state(inputs)
         self.forget_gate()
         self.input_gate()
         self.output_gate()

         return self.stateH, [self.stateH, self.stateC]

 inp = Input(shape=(None, 3))
 lstm = RNN(CustomLSTMCell(10))(inp)

 model = Model(inputs=inp, outputs=lstm)
 inp_value = [[[[1,2,3], [2,3,4], [3,4,5]]]]
 pred = model.predict(inp_value)
 print(pred)
编辑:在问题中,我对链接的模型犯了一个错误,在
输入门中使用了tanh函数,而不是sigmoid。在这里,我在代码中编辑了它,因此现在它是正确的