乱码Keras LSTM深度拼写输出

乱码Keras LSTM深度拼写输出,keras,deep-learning,lstm,autocorrect,Keras,Deep Learning,Lstm,Autocorrect,我正在尝试建立一个基于LSTM的拼写修正程序。它在训练期间效果很好。请参阅随附的屏幕截图 我做了一些轻微的修改,所以输入的是一个不正确的单词,主要是像name这样的专有名词,并尝试为它提供建议。我正确地进行了文本矢量化,但仍然从LSTM获得了乱七八糟的输出。以下是11个时代之后的故事 这是我的python模块 import numpy as np from numpy import zeros as np_zeros from keras.models import model_from_jso

我正在尝试建立一个基于LSTM的拼写修正程序。它在训练期间效果很好。请参阅随附的屏幕截图

我做了一些轻微的修改,所以输入的是一个不正确的单词,主要是像name这样的专有名词,并尝试为它提供建议。我正确地进行了文本矢量化,但仍然从LSTM获得了乱七八糟的输出。以下是11个时代之后的故事

这是我的python模块

import numpy as np
from numpy import zeros as np_zeros
from keras.models import model_from_json

seed = 7
np.random.seed(seed)

class CharacterTable(object):
    """
    Given a set of characters:
    + Encode them to a one hot integer representation
    + Decode the one hot integer representation to their character output
    + Decode a vector of probabilities to their character output
    """

    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
        self.size = len(self.chars)

    def encode(self, C, maxlen):
        """Encode as one-hot"""
        X = np_zeros((maxlen, len(self.chars)), dtype=np.bool)  # pylint:disable=no-member
        for i, c in enumerate(C):
            X[i, self.char_indices[c]] = 1
        return X

    def decode(self, X, calc_argmax=True):
        """Decode from one-hot"""
        if calc_argmax:
            X = X.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in X)


def load_saved_model(checkpoint_filename=None,dataset_params_filename=None):
#         # serialize model to JSON

    # load json and create model

    json_file = open('/deepspell/models_gpu_1/model_gpu_1.json','r')

    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    print("Loaded Model")

    if checkpoint_filename is not None:
        model.load_weights(checkpoint_filename)
        print("Loaded Model Weights")

    return model

def vectorize(questions):
    """Vectorize the questions list and expected answers list"""
    # Samples, Time steps, features
    X = np_zeros((len(questions), x_max_length, character_table.size), dtype=np.bool)

    for i in range(len(questions)):
        sentence = questions[i]
        for j, c in enumerate(sentence):
            X[i, j, character_table.char_indices[c]] = 1
    return X

def run_autocorrect_name(text):
    """
        text : text to auto-correct
    """
    temp_list = []; temp_list.append(text)
    text_vect = vectorize(temp_list)
    pred_text = model.predict_classes(text_vect,verbose=0)
    suggestion = character_table.decode(pred_text[0], calc_argmax=False)
    print("Suggestion for {} : {} ".format(text,suggestion))
    return suggestion

CHARS = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .")
# chars_answer : list of lowercase A-Z, dot, dash, space
# chars_question : list of lowercase + uppercase A-Z, dot, dash, space
chars_answer = {'k', '.', 'x', 'l', 'n', 't', 'p', 'm', 'b', 'r', 'e', 'i', 'g', 'y', 'z', 'u', 'f', 'q', 'o', 's', 'd', 'j', ' ', 'v', 'h', 'a', '-', 'c', 'w'}
chars_question = {'I', 'k', '.', 'x', 'l', 'n', 'Q', 't', 'm', 'p', 'b', 'r', 'e', 'i', 'K', 'S', 'g', 'H', 'M', 'y', 'z', 'u', 'W', 'R', 'L', 'U', 'f', 'q', 'X', 'G', 'F', 'Y', 'A', 'T', 'o', 'J', 's', 'Z', 'd', 'j', 'D', 'O', ' ', 'v', 'V', 'C', 'h', 'E', 'N', 'B', 'a', '-', 'P', 'c', 'w'}
chars = sorted(list(set.union(chars_answer, chars_question)))    
x_max_length = 40
character_table = CharacterTable(chars)
# load weights
model = '/deepspell/models_gpu_1/weights.11-0.11.hdf5' # 11 epoch
# load the model
model = load_saved_model(checkpoint_filename=model)
# Compile model (required to make predictions)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

test_str = 'Taylor Swift'
run_autocorrect_name(test_str)
上面的输出是

Suggestion for Taylor Swift : anezalrr  gilluuhhooooosssaaa........... 

增加历元数(训练步骤)。@AmirHadifar我确实运行了70个历元2天,但val_acc没有增加(GTX 1050工作站)。第11个纪元之后,val_acc没有增加多少,所以我使用了那个模型。有没有其他加快模特训练的方法?你认为我的矢量化代码有什么问题吗?