Python 3.x ValueError:检查输入时出错:预期嵌入_1_输入具有形状(4),但获得具有形状(1,)的数组
我当时正在进行seq2seq翻译,却被困在这里:-Python 3.x ValueError:检查输入时出错:预期嵌入_1_输入具有形状(4),但获得具有形状(1,)的数组,python-3.x,keras,deep-learning,Python 3.x,Keras,Deep Learning,我当时正在进行seq2seq翻译,却被困在这里:- def createModel(engVocab, frVocab, size, englishMaxlength, frenchMaxLength): model = Sequential() model.add(Embedding(input_dim = engVocab, output_dim = size, input_length = englishMaxlength, mask_zero = True)) model.add(LST
def createModel(engVocab, frVocab, size, englishMaxlength, frenchMaxLength):
model = Sequential()
model.add(Embedding(input_dim = engVocab, output_dim = size, input_length = englishMaxlength, mask_zero = True))
model.add(LSTM(units = size))
model.add(RepeatVector(frenchMaxLength))
model.add(LSTM(units = size, return_sequences = True))
model.add(TimeDistributed(Dense(frenchVocabsize, activation = 'softmax')))
return model
def DataGenerator(trainingDataEnglish, trainingDataFrench):
while True:
l = len(trainingDataFrench)
for i in range(l):
yield(trainingDataEnglish[i], trainingDataFrench[i])
我创建的测试和培训数据如下:-
def encodeSequences(trainingData, tokenizer, maxlength):
encoder = tokenizer.texts_to_sequences(trainingData)
encoder = pad_sequences(encoder, maxlen=maxlength, padding='pre')
return encoder
def encodeOutput(testData, vocabSize):
y = []
for sequence in testData:
Seq = to_categorical(sequence, num_classes=vocabSize)
y.append(Seq)
y = np.array(y)
return y
samples = 7000
trainingSize = 6000
trainEng = english[:trainingSize] #array of strings
trainFr = french[:trainingSize] #array of strings
testEng = english[trainingSize:samples] #array of strings
testFr = french[trainingSize:samples] #array of strings
englishTokenizer = createTokenizer(trainEng)
frenchTokenizer = createTokenizer(trainFr)
englishVocabSize = len(englishTokenizer.word_index) + 1
trainX = encodeSequences(trainEng, englishTokenizer, englishMaxlength)
trainY = encodeSequences(trainFr, frenchTokenizer, frenchMaxLength)
trainY = encodeOutput(trainY, frenchVocabsize)
testX = encodeSequences(testEng, englishTokenizer, englishMaxlength)
testY = encodeSequences(testFr, frenchTokenizer, frenchMaxLength)
testY = encodeOutput(testY, frenchVocabsize)
EncodeSequence和encodeOutput的使用如下:-
def encodeSequences(trainingData, tokenizer, maxlength):
encoder = tokenizer.texts_to_sequences(trainingData)
encoder = pad_sequences(encoder, maxlen=maxlength, padding='pre')
return encoder
def encodeOutput(testData, vocabSize):
y = []
for sequence in testData:
Seq = to_categorical(sequence, num_classes=vocabSize)
y.append(Seq)
y = np.array(y)
return y
samples = 7000
trainingSize = 6000
trainEng = english[:trainingSize] #array of strings
trainFr = french[:trainingSize] #array of strings
testEng = english[trainingSize:samples] #array of strings
testFr = french[trainingSize:samples] #array of strings
englishTokenizer = createTokenizer(trainEng)
frenchTokenizer = createTokenizer(trainFr)
englishVocabSize = len(englishTokenizer.word_index) + 1
trainX = encodeSequences(trainEng, englishTokenizer, englishMaxlength)
trainY = encodeSequences(trainFr, frenchTokenizer, frenchMaxLength)
trainY = encodeOutput(trainY, frenchVocabsize)
testX = encodeSequences(testEng, englishTokenizer, englishMaxlength)
testY = encodeSequences(testFr, frenchTokenizer, frenchMaxLength)
testY = encodeOutput(testY, frenchVocabsize)
最后:
model = createModel(engVocab = englishVocabSize, frVocab = frenchVocabsize, size = 256, englishMaxlength = englishMaxlength, frenchMaxLength = frenchMaxLength)
print(model.summary())
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy')
steps = len(trainX)
generator = DataGenerator(trainX, trainY)
model.fit_generator(generator, epochs = epochs, steps_per_epoch = steps, validation_data = (testX, testY))
model.save('Model.h5')
我得到以下错误:-
ValueError: Error when checking input: expected embedding_1_input to have shape (4,) but got array with shape (1,)
我该如何解决这个问题?
我哪里出错了?
请帮忙。
提前谢谢。您在哪里调用
encodeSequences
?您的数据生成器很可能存在填充问题?我已更新了encodeSequences的使用,请检查