Neural network 关于LSTM编码器-解码器模型不工作_Neural Network_Sequence_Lstm_Attention Model

Neural network 关于LSTM编码器-解码器模型不工作

neural-network

Neural network 关于LSTM编码器-解码器模型不工作,neural-network,sequence,lstm,attention-model,Neural Network,Sequence,Lstm,Attention Model,我使用编码器-解码器-注意LSTM模型使句子自动完成但无论输入是什么，该模型都预测相同的序列。我什么都试过了我也在使用嵌入层。任何帮助都将不胜感激这是我的密码 BUFFER_SIZE = len(input_data) BATCH_SIZE = 128 embedding_dim = 300 units = 128 vocab_in_size = len(input_lang.word2idx) vocab_out_size = len(target_lang.word2idx) df

我使用编码器-解码器-注意LSTM模型使句子自动完成

但无论输入是什么，该模型都预测相同的序列。我什么都试过了

我也在使用嵌入层。任何帮助都将不胜感激

这是我的密码

BUFFER_SIZE = len(input_data)
BATCH_SIZE = 128
embedding_dim = 300
units = 128
vocab_in_size = len(input_lang.word2idx)
vocab_out_size = len(target_lang.word2idx)
df.iloc[170:190]

input_data.shape[1]

encoder_inputs = Input(shape=(len_input,))
encoder_emb = Embedding(input_dim=vocab_in_size, output_dim=embedding_dim)
encoder_lstm = LSTM(units=units, return_sequences=True,return_state=True)
hidden = encoder_emb(encoder_inputs)
encoder_out, fstate_h, fstate_c  = encoder_lstm(hidden)


encoder_states = [fstate_h, fstate_c]

decoder_inputs = Input(shape=(None,))
decoder_emb = Embedding(input_dim=vocab_out_size, output_dim=embedding_dim)
decoder_lstm = LSTM(units=units, return_sequences=True, return_state=True)

decoder_lstm_out, _, _ = decoder_lstm(decoder_emb(decoder_inputs), initial_state=encoder_states)

decoder_d1 = Dense(units, activation="relu")
decoder_d2 = Dense(vocab_out_size, activation="softmax")
decoder_out = decoder_d2(Dropout(rate=.2)(decoder_d1(Dropout(rate=.2)(decoder_lstm_out))))

model = Model(inputs = [encoder_inputs, decoder_inputs], outputs= decoder_out)

model.compile(optimizer=tf.train.AdamOptimizer(), loss="sparse_categorical_crossentropy", metrics=['sparse_categorical_accuracy'])
model.summary()

epochs = 10
history = model.fit([input_data, teacher_data], target_data,
                 batch_size= BATCH_SIZE,
                 epochs=epochs,
                 validation_split=0.2)

encoder_model = Model(encoder_inputs, encoder_states)
inf_decoder_inputs = Input(shape=(None,), name="inf_decoder_inputs")

state_input_h = Input(shape=(units,), name="state_input_h")
state_input_c = Input(shape=(units,), name="state_input_c")

decoder_res, decoder_h, decoder_c = decoder_lstm(

解码器emb（inf解码器输入），初始状态=[状态输入h，状态输入c]）

inf\u decoder\u out=解码器d2（解码器d1（解码器res））
inf\u模型=模型（输入=[inf\u解码器\u输入，状态\u输入\u h，状态\u输入\u c]，
输出=[inf\u解码器\u输出、解码器\u h、解码器\u c]）
定义句子到向量（句子，lang）：
前=句子
vec=np.零（len_输入）
语句列表=[lang.word2idx[s]表示在预拆分（“”）中的s
对于枚举中的i，w（句子列表）：
vec[i]=w
返回向量
def翻译（输入句子、输入模型、输入模型）：
sv=句子到向量（输入句子，输入语言）
sv=sv。重塑（1，透镜（sv））
[sh，sc]=infenc_模型预测（x=sv）
i=0
start\u vec=target\u lang.word2idx[“”]
stop\u vec=target\u lang.word2idx[“”]
cur_vec=np.零（（1,1））
cur_vec[0,0]=开始_vec
cur_word=“”
输出_句子=“”
而cur_word！=”“我<（len_target-1）：
i+=1
如果cur_word！="":
输出句子=输出句子+当前单词
x_in=[cur_vec，sh，sc]
[nvec，sh，sc]=infmodel.predict（x=x_in）
cur_vec[0,0]=np.argmax（nvec[0,0]）
打印（cur_vec[0,0]）
cur_word=target_lang.idx2word[np.argmax（nvec[0,0]）]
打印（cur_word）
返回输出语句
测试=['gov无法']
target_lang.idx2word[1]
输出=[]
output.append（{“Input seq”：t.lower（），“Pred.seq”：translate（t.lower（），encoder\u model，inf\u model）}）
结果\u df=pd.DataFrame.from\u dict（输出）
头部测向结果（透镜（测试））

inf_decoder_out = decoder_d2(decoder_d1(decoder_res))
inf_model = Model(inputs=[inf_decoder_inputs, state_input_h, state_input_c], 
                  outputs=[inf_decoder_out, decoder_h, decoder_c])

def sentence_to_vector(sentence, lang):

    pre = sentence
    vec = np.zeros(len_input)
    sentence_list = [lang.word2idx[s] for s in pre.split(' ')]
    for i,w in enumerate(sentence_list):
        vec[i] = w
    return vec

def translate(input_sentence, infenc_model, infmodel):
    sv = sentence_to_vector(input_sentence, input_lang)
    sv = sv.reshape(1,len(sv))
    [sh, sc] = infenc_model.predict(x=sv)
    i = 0
    start_vec = target_lang.word2idx["<start>"]
    stop_vec = target_lang.word2idx["<end>"]

    cur_vec = np.zeros((1,1))
    cur_vec[0,0] = start_vec
    cur_word = "<start>"
    output_sentence = ""

    while cur_word != "<end>" and i < (len_target-1):
        i += 1
        if cur_word != "<start>":
            output_sentence = output_sentence + " " + cur_word
        x_in = [cur_vec, sh, sc]
        [nvec, sh, sc] = infmodel.predict(x=x_in)
        cur_vec[0,0] = np.argmax(nvec[0,0])
        print(cur_vec[0,0])
        cur_word = target_lang.idx2word[np.argmax(nvec[0,0])]
        print(cur_word)
    return output_sentence

test = ['gov is unable']
target_lang.idx2word[1]

output=[]
    output.append({"Input seq":t.lower(), "Pred. Seq":translate(t.lower(), encoder_model, inf_model)})

results_df = pd.DataFrame.from_dict(output) 
results_df.head(len(test))