Python 用于查找文档间文本相似性的keras LSTM层的维数错误
我写这段代码是为了使用暹罗方法来计算两个文档的相似性。我附上了我的模型的一个片段。我在LSTM层中得到一个关于维度的错误。有人能帮我吗Python 用于查找文档间文本相似性的keras LSTM层的维数错误,python,multidimensional-array,keras,lstm,Python,Multidimensional Array,Keras,Lstm,我写这段代码是为了使用暹罗方法来计算两个文档的相似性。我附上了我的模型的一个片段。我在LSTM层中得到一个关于维度的错误。有人能帮我吗 #Setting Parameters for the model max_features = 20000 maxlen = 200 class DocSim(object): def __init__(self, w2v_model , stopwords=[]): self.w2v_model = w2v_model
#Setting Parameters for the model
max_features = 20000
maxlen = 200
class DocSim(object):
def __init__(self, w2v_model , stopwords=[]):
self.w2v_model = w2v_model
self.stopwords = stopwords
def vectorize(self, doc):
"""Identify the vector values for each word in the given document"""
doc = doc.lower()
words = [w for w in doc.split(" ") if w not in self.stopwords]
word_vecs = []
for word in words:
try:
vec = self.w2v_model[word]
word_vecs.append(vec)
except KeyError:
# Ignore, if the word doesn't exist in the vocabulary
pass
# Assuming that document vector is the mean of all the word vectors
vector = np.mean(word_vecs, axis=0)
return vector
def Siamese_cosine_sim(self, vectorA, vectorB):
# Input for variable-length sequences of integers
inputs = keras.Input(shape=(None,), dtype="int32")
# Add 2 bidirectional LSTMs
vectorA = layers.LSTM(64, return_sequences=True)(vectorA)
vectorA = layers.LSTM(64)(vectorA)
# Add 2 bidirectional LSTMs
vectorB = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(vectorB)
vectorB = layers.Bidirectional(layers.LSTM(64))(vectorB)
# Add a classifier
outputs = layers.Dense(1, activation="sigmoid")(left_doc,right_doc)
"""Find the cosine similarity distance between two vectors."""
csim = np.dot(left_doc, right_doc) / (np.linalg.norm(left_doc) * np.linalg.norm(right_doc))
if np.isnan(np.sum(csim)):
return 0
return csim
我犯了一个错误
Value error: Input 0 of layer lstm_9 is incompatible with the layer: expected ndim=3, found ndim=1. Full shape received: [300]