Python 使用RNN和层类在Keras中实现最小LSTMCell
我正在尝试实现一个简单的LSTMCell,而不使用tf.keras.layers.LSTMCell类中默认实现的“fancy kwargs”,它遵循如下示意图模型。它实际上没有直接的目的,我只是想练习实现一个比示例部分中描述的更复杂的RNNCell。我的代码如下:Python 使用RNN和层类在Keras中实现最小LSTMCell,python,tensorflow,keras,lstm,Python,Tensorflow,Keras,Lstm,我正在尝试实现一个简单的LSTMCell,而不使用tf.keras.layers.LSTMCell类中默认实现的“fancy kwargs”,它遵循如下示意图模型。它实际上没有直接的目的,我只是想练习实现一个比示例部分中描述的更复杂的RNNCell。我的代码如下: from keras import Input from keras.layers import Layer, RNN from keras.models import Model import keras.backend as K
from keras import Input
from keras.layers import Layer, RNN
from keras.models import Model
import keras.backend as K
class CustomLSTMCell(Layer):
def __init__(self, units, **kwargs):
self.state_size = units
super(CustomLSTMCell, self).__init__(**kwargs)
def build(self, input_shape):
self.forget_w = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
initializer='uniform',
name='forget_w')
self.forget_b = self.add_weight(shape=(self.state_size,),
initializer='uniform',
name='forget_b')
self.input_w1 = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
initializer='uniform',
name='input_w1')
self.input_b1 = self.add_weight(shape=(self.state_size,),
initializer='uniform',
name='input_b1')
self.input_w2 = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
initializer='uniform',
name='input_w2')
self.input_b2 = self.add_weight(shape=(self.state_size,),
initializer='uniform',
name='input_b2')
self.output_w = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
initializer='uniform',
name='output_w')
self.output_b = self.add_weight(shape=(self.state_size,),
initializer='uniform',
name='output_b')
self.built = True
def merge_with_state(self, inputs):
self.stateH = K.concatenate([self.stateH, inputs], axis=-1)
def forget_gate(self):
forget = K.dot(self.forget_w, self.stateH) + self.forget_b
forget = K.sigmoid(forget)
self.stateC = self.stateC * forget
def input_gate(self):
candidate = K.dot(self.input_w1, self.stateH) + self.input_b1
candidate = K.tanh(candidate)
amount = K.dot(self.input_w2, self.stateH) + self.input_b2
amount = K.tanh(amount)
self.stateC = self.stateC + amount * candidate
def output_gate(self):
self.stateH = K.dot(self.output_w, self.stateH) + self.output_b
self.stateH = K.sigmoid(self.stateH)
self.stateH = self.stateH * K.tanh(self.stateC)
def call(self, inputs, states):
self.stateH = states[0]
self.stateC = states[1]
self.merge_with_state(inputs)
self.forget_gate()
self.input_gate()
self.output_gate()
return self.stateH, [self.stateH, self.stateC]
# Testing
inp = Input(shape=(None, 3))
lstm = RNN(CustomLSTMCell(10))(inp)
model = Model(inputs=inp, outputs=lstm)
inp_value = [[[[1,2,3], [2,3,4], [3,4,5]]]]
pred = model.predict(inp_value)
print(pred)
from keras import Input
from keras.layers import Layer, RNN
from keras.models import Model
import keras.backend as K
class CustomLSTMCell(Layer):
def __init__(self, units, **kwargs):
self.state_size = [units, units]
super(CustomLSTMCell, self).__init__(**kwargs)
def build(self, input_shape):
self.forget_w = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
initializer='uniform',
name='forget_w')
self.forget_b = self.add_weight(shape=(1, self.state_size[0]),
initializer='uniform',
name='forget_b')
self.input_w1 = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
initializer='uniform',
name='input_w1')
self.input_b1 = self.add_weight(shape=(1, self.state_size[0]),
initializer='uniform',
name='input_b1')
self.input_w2 = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
initializer='uniform',
name='input_w2')
self.input_b2 = self.add_weight(shape=(1, self.state_size[0],),
initializer='uniform',
name='input_b2')
self.output_w = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
initializer='uniform',
name='output_w')
self.output_b = self.add_weight(shape=(1, self.state_size[0],),
initializer='uniform',
name='output_b')
self.built = True
def merge_with_state(self, inputs):
self.stateH = K.concatenate([self.stateH, inputs], axis=-1)
def forget_gate(self):
forget = K.batch_dot(self.forget_w, self.stateH) + self.forget_b
forget = K.sigmoid(forget)
self.stateC = self.stateC * forget
def input_gate(self):
candidate = K.batch_dot(self.input_w1, self.stateH) + self.input_b1
candidate = K.tanh(candidate)
amount = K.batch_dot(self.input_w2, self.stateH) + self.input_b2
amount = K.sigmoid(amount)
self.stateC = self.stateC + amount * candidate
def output_gate(self):
self.stateH = K.batch_dot(self.output_w, self.stateH) + self.output_b
self.stateH = K.sigmoid(self.stateH)
self.stateH = self.stateH * K.tanh(self.stateC)
def call(self, inputs, states):
self.stateH = states[0]
self.stateC = states[1]
self.merge_with_state(inputs)
self.forget_gate()
self.input_gate()
self.output_gate()
return self.stateH, [self.stateH, self.stateC]
inp = Input(shape=(None, 3))
lstm = RNN(CustomLSTMCell(10))(inp)
model = Model(inputs=inp, outputs=lstm)
inp_value = [[[[1,2,3], [2,3,4], [3,4,5]]]]
pred = model.predict(inp_value)
print(pred)
但是,当我尝试测试它时,出现了一个异常,并显示以下消息:
IndexError: tuple index out of range
在中,在我为self.stateC
设置值的行中调用函数。在这里,我认为最初函数调用的状态
参数是一个张量,而不是张量列表,所以这就是我出错的原因。因此,我在类\uuu init\uuuu
中添加了一个self.ready\u called=False
行,并在调用
函数中添加了以下部分:
if not self.already_called:
self.stateH = K.ones(self.state_size)
self.stateC = K.ones(self.state_size)
self.already_called = True
else:
self.stateH = states[0]
self.stateC = states[1]
ValueError: Shape must be rank 1 but is rank 2 for 'rnn_1/concat' (op: 'ConcatV2') with input shapes: [10], [?,3], [].
希望它能消除这个问题。这导致merge_with_state
函数出现另一个错误:
if not self.already_called:
self.stateH = K.ones(self.state_size)
self.stateC = K.ones(self.state_size)
self.already_called = True
else:
self.stateH = states[0]
self.stateC = states[1]
ValueError: Shape must be rank 1 but is rank 2 for 'rnn_1/concat' (op: 'ConcatV2') with input shapes: [10], [?,3], [].
我真的不明白,因为RNN层应该只“显示”形状为(3)而不是(无,3)的CustomLSTMCell张量,因为轴0是它应该迭代的轴。在这一点上,我确信我做了一些非常错误的事情,应该向社区寻求帮助。基本上,我的问题是:我的代码出了什么问题?如果“几乎所有东西”都出了问题,那么我应该如何从头开始实现LSTMCell?好的,看来我成功地解决了这个问题。事实证明,阅读文档总是很有用的,在本例中是。首先,已经调用的
属性是不必要的,因为问题在于初始化
函数的第一行:状态大小
属性应该是一个整数列表,而不仅仅是一个整数,如下所示:self.state大小=[units,units]
(因为我们需要两个状态来表示大小为单位的LSTM,而不是一个)。当我纠正它时,我得到了一个不同的错误:忘记门中的张量在维度上不兼容以进行加法。这是因为RNN一次看到整个批次,而不是批次中的每个元素(因此轴0处的None
形状)。对它的修正是向轴0处大小为1的每个张量添加一个额外的维度,如下所示:
self.forget_w = self.add_weight(shape=(1, self.state_size, self.state_size + input_shape[-1]),
initializer='uniform',
name='forget_w')
我不得不使用K.batch\u dot
函数来代替dot产品。因此,整个工作代码如下所示:
from keras import Input
from keras.layers import Layer, RNN
from keras.models import Model
import keras.backend as K
class CustomLSTMCell(Layer):
def __init__(self, units, **kwargs):
self.state_size = units
super(CustomLSTMCell, self).__init__(**kwargs)
def build(self, input_shape):
self.forget_w = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
initializer='uniform',
name='forget_w')
self.forget_b = self.add_weight(shape=(self.state_size,),
initializer='uniform',
name='forget_b')
self.input_w1 = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
initializer='uniform',
name='input_w1')
self.input_b1 = self.add_weight(shape=(self.state_size,),
initializer='uniform',
name='input_b1')
self.input_w2 = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
initializer='uniform',
name='input_w2')
self.input_b2 = self.add_weight(shape=(self.state_size,),
initializer='uniform',
name='input_b2')
self.output_w = self.add_weight(shape=(self.state_size, self.state_size + input_shape[-1]),
initializer='uniform',
name='output_w')
self.output_b = self.add_weight(shape=(self.state_size,),
initializer='uniform',
name='output_b')
self.built = True
def merge_with_state(self, inputs):
self.stateH = K.concatenate([self.stateH, inputs], axis=-1)
def forget_gate(self):
forget = K.dot(self.forget_w, self.stateH) + self.forget_b
forget = K.sigmoid(forget)
self.stateC = self.stateC * forget
def input_gate(self):
candidate = K.dot(self.input_w1, self.stateH) + self.input_b1
candidate = K.tanh(candidate)
amount = K.dot(self.input_w2, self.stateH) + self.input_b2
amount = K.tanh(amount)
self.stateC = self.stateC + amount * candidate
def output_gate(self):
self.stateH = K.dot(self.output_w, self.stateH) + self.output_b
self.stateH = K.sigmoid(self.stateH)
self.stateH = self.stateH * K.tanh(self.stateC)
def call(self, inputs, states):
self.stateH = states[0]
self.stateC = states[1]
self.merge_with_state(inputs)
self.forget_gate()
self.input_gate()
self.output_gate()
return self.stateH, [self.stateH, self.stateC]
# Testing
inp = Input(shape=(None, 3))
lstm = RNN(CustomLSTMCell(10))(inp)
model = Model(inputs=inp, outputs=lstm)
inp_value = [[[[1,2,3], [2,3,4], [3,4,5]]]]
pred = model.predict(inp_value)
print(pred)
from keras import Input
from keras.layers import Layer, RNN
from keras.models import Model
import keras.backend as K
class CustomLSTMCell(Layer):
def __init__(self, units, **kwargs):
self.state_size = [units, units]
super(CustomLSTMCell, self).__init__(**kwargs)
def build(self, input_shape):
self.forget_w = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
initializer='uniform',
name='forget_w')
self.forget_b = self.add_weight(shape=(1, self.state_size[0]),
initializer='uniform',
name='forget_b')
self.input_w1 = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
initializer='uniform',
name='input_w1')
self.input_b1 = self.add_weight(shape=(1, self.state_size[0]),
initializer='uniform',
name='input_b1')
self.input_w2 = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
initializer='uniform',
name='input_w2')
self.input_b2 = self.add_weight(shape=(1, self.state_size[0],),
initializer='uniform',
name='input_b2')
self.output_w = self.add_weight(shape=(1, self.state_size[0], self.state_size[0] + input_shape[-1]),
initializer='uniform',
name='output_w')
self.output_b = self.add_weight(shape=(1, self.state_size[0],),
initializer='uniform',
name='output_b')
self.built = True
def merge_with_state(self, inputs):
self.stateH = K.concatenate([self.stateH, inputs], axis=-1)
def forget_gate(self):
forget = K.batch_dot(self.forget_w, self.stateH) + self.forget_b
forget = K.sigmoid(forget)
self.stateC = self.stateC * forget
def input_gate(self):
candidate = K.batch_dot(self.input_w1, self.stateH) + self.input_b1
candidate = K.tanh(candidate)
amount = K.batch_dot(self.input_w2, self.stateH) + self.input_b2
amount = K.sigmoid(amount)
self.stateC = self.stateC + amount * candidate
def output_gate(self):
self.stateH = K.batch_dot(self.output_w, self.stateH) + self.output_b
self.stateH = K.sigmoid(self.stateH)
self.stateH = self.stateH * K.tanh(self.stateC)
def call(self, inputs, states):
self.stateH = states[0]
self.stateC = states[1]
self.merge_with_state(inputs)
self.forget_gate()
self.input_gate()
self.output_gate()
return self.stateH, [self.stateH, self.stateC]
inp = Input(shape=(None, 3))
lstm = RNN(CustomLSTMCell(10))(inp)
model = Model(inputs=inp, outputs=lstm)
inp_value = [[[[1,2,3], [2,3,4], [3,4,5]]]]
pred = model.predict(inp_value)
print(pred)
编辑:在问题中,我对链接的模型犯了一个错误,在输入门中使用了tanh函数,而不是sigmoid。在这里,我在代码中编辑了它,因此现在它是正确的