Python 3.x ValueError:检查输入时出错:预期嵌入_1_输入具有形状(4),但获得具有形状(1,)的数组

Python 3.x ValueError:检查输入时出错:预期嵌入_1_输入具有形状(4),但获得具有形状(1,)的数组,python-3.x,keras,deep-learning,Python 3.x,Keras,Deep Learning,我当时正在进行seq2seq翻译,却被困在这里:- def createModel(engVocab, frVocab, size, englishMaxlength, frenchMaxLength): model = Sequential() model.add(Embedding(input_dim = engVocab, output_dim = size, input_length = englishMaxlength, mask_zero = True)) model.add(LST

我当时正在进行seq2seq翻译,却被困在这里:-

def createModel(engVocab, frVocab, size, englishMaxlength, frenchMaxLength):
model = Sequential()
model.add(Embedding(input_dim = engVocab, output_dim = size, input_length = englishMaxlength, mask_zero = True))
model.add(LSTM(units = size))
model.add(RepeatVector(frenchMaxLength))
model.add(LSTM(units = size, return_sequences = True))
model.add(TimeDistributed(Dense(frenchVocabsize, activation = 'softmax')))
return model

def DataGenerator(trainingDataEnglish, trainingDataFrench):
    while True:
        l = len(trainingDataFrench)
        for i in range(l):
            yield(trainingDataEnglish[i], trainingDataFrench[i])
我创建的测试和培训数据如下:-

def encodeSequences(trainingData, tokenizer, maxlength):
encoder = tokenizer.texts_to_sequences(trainingData)
encoder = pad_sequences(encoder, maxlen=maxlength, padding='pre')
return encoder

def encodeOutput(testData, vocabSize):
y = []
for sequence in testData:
    Seq = to_categorical(sequence, num_classes=vocabSize)
    y.append(Seq)
y = np.array(y)
return y

samples = 7000
trainingSize = 6000
trainEng = english[:trainingSize] #array of strings
trainFr = french[:trainingSize] #array of strings
testEng = english[trainingSize:samples] #array of strings
testFr = french[trainingSize:samples] #array of strings
englishTokenizer = createTokenizer(trainEng)
frenchTokenizer = createTokenizer(trainFr)
englishVocabSize = len(englishTokenizer.word_index) + 1
trainX = encodeSequences(trainEng, englishTokenizer, englishMaxlength)
trainY = encodeSequences(trainFr, frenchTokenizer, frenchMaxLength)
trainY = encodeOutput(trainY, frenchVocabsize)


testX = encodeSequences(testEng, englishTokenizer, englishMaxlength)
testY = encodeSequences(testFr, frenchTokenizer, frenchMaxLength)
testY = encodeOutput(testY, frenchVocabsize)
EncodeSequence和encodeOutput的使用如下:-

def encodeSequences(trainingData, tokenizer, maxlength):
encoder = tokenizer.texts_to_sequences(trainingData)
encoder = pad_sequences(encoder, maxlen=maxlength, padding='pre')
return encoder

def encodeOutput(testData, vocabSize):
y = []
for sequence in testData:
    Seq = to_categorical(sequence, num_classes=vocabSize)
    y.append(Seq)
y = np.array(y)
return y

samples = 7000
trainingSize = 6000
trainEng = english[:trainingSize] #array of strings
trainFr = french[:trainingSize] #array of strings
testEng = english[trainingSize:samples] #array of strings
testFr = french[trainingSize:samples] #array of strings
englishTokenizer = createTokenizer(trainEng)
frenchTokenizer = createTokenizer(trainFr)
englishVocabSize = len(englishTokenizer.word_index) + 1
trainX = encodeSequences(trainEng, englishTokenizer, englishMaxlength)
trainY = encodeSequences(trainFr, frenchTokenizer, frenchMaxLength)
trainY = encodeOutput(trainY, frenchVocabsize)


testX = encodeSequences(testEng, englishTokenizer, englishMaxlength)
testY = encodeSequences(testFr, frenchTokenizer, frenchMaxLength)
testY = encodeOutput(testY, frenchVocabsize)
最后:

model = createModel(engVocab = englishVocabSize, frVocab = frenchVocabsize, size = 256, englishMaxlength = englishMaxlength, frenchMaxLength = frenchMaxLength)
print(model.summary())
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy')

steps = len(trainX)
generator = DataGenerator(trainX, trainY)
model.fit_generator(generator, epochs = epochs, steps_per_epoch = steps, validation_data = (testX, testY))
model.save('Model.h5')
我得到以下错误:-

ValueError: Error when checking input: expected embedding_1_input to have shape (4,) but got array with shape (1,)
我该如何解决这个问题? 我哪里出错了? 请帮忙。
提前谢谢。

您在哪里调用
encodeSequences
?您的数据生成器很可能存在填充问题?我已更新了encodeSequences的使用,请检查