Python 为什么单词嵌入中的固定词汇顺序很重要?

Python 为什么单词嵌入中的固定词汇顺序很重要?,python,pytorch,word-embedding,Python,Pytorch,Word Embedding,当我最近经历这些时,我注意到词汇的顺序会对预测结果产生影响 下面是一个示例代码来解释这个问题,它是从Robert Guthrie以前的代码修改而来的 import torch import torch.autograd as autograd import torch.nn as nn import torch.nn.functional as functional import torch.optim as optim torch.manual_seed(1) CONTEXT_SIZE =

当我最近经历这些时,我注意到词汇的顺序会对预测结果产生影响

下面是一个示例代码来解释这个问题,它是从Robert Guthrie以前的代码修改而来的

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as functional
import torch.optim as optim

torch.manual_seed(1)

CONTEXT_SIZE = 2
EMBEDDING_DIM = 4

test_sentence = r"""<s> The mathematician ran . <\s>
<s> The mathematician ran to the store . <\s>
<s> The physicist ran to the store . <\s>
<s> The philosopher thought about it . <\s>
<s> The mathematician solved the open problem . <\s>""".split()

# build a list of tuples.  Each tuple is ([ word_i-2, word_i-1 ], target word)
trigrams = [([test_sentence[i], test_sentence[i + 1]], test_sentence[i + 2])
            for i in range(len(test_sentence) - 2)]

# fix the order of vocabulary
# if the sorted() is removed, the predicting result will be unstable.
vocab = sorted(list(set(test_sentence)))
word_to_ix = {word: i for i, word in enumerate(vocab)}


class NGramLanguageModeler(nn.Module):

    def __init__(self, vocab_size, embedding_dim, context_size):
        super(NGramLanguageModeler, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(context_size * embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)

    def forward(self, inputs):
        embeds = self.embeddings(inputs).view((1, -1))
        out = functional.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = functional.log_softmax(out, dim=1)
        return log_probs


loss_function = nn.NLLLoss()
model = NGramLanguageModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)
optimizer = optim.SGD(model.parameters(), lr=0.01)

# train the model
for epoch in range(20):
    for context, target in trigrams:
        context_indices = [word_to_ix[w] for w in context]
        context_var = autograd.Variable(torch.LongTensor(context_indices))

        model.zero_grad()
        log_probs = model(context_var)

        loss = loss_function(log_probs, autograd.Variable(torch.LongTensor([word_to_ix[target]])))

        loss.backward()
        optimizer.step()

context_tuple = ("<s>", "The")

context_indices = [word_to_ix[w] for w in context_tuple]
context_var = autograd.Variable(torch.LongTensor(context_indices))

model.zero_grad()
log_probs = model(context_var)

sims = []
probs = []
candidates = ["philosopher", "physicist"]

# to calculate which word is closer to mathematician according to cosine similarities
related_embedding = model.embeddings(autograd.Variable(torch.LongTensor([word_to_ix["mathematician"]])))

for word in candidates:
    # Probability
    probs.append(log_probs[0][word_to_ix[word]])
    # Cosine similarity
    embedding = model.embeddings(autograd.Variable(torch.LongTensor([word_to_ix[word]])))
    sims.append(functional.cosine_similarity(embedding, related_embedding))

print("Predicted word (probability): %s" % (candidates[0] if probs[0] > probs[1] else candidates[1]))
print("Predicted word (cosine similarity): %s" % (candidates[0] if sims[0] > sims[1] else candidates[1]))
导入火炬
导入torch.autograd作为autograd
导入torch.nn作为nn
导入torch.nn.functional作为functional
将torch.optim导入为optim
手电筒。手动种子(1)
上下文大小=2
嵌入_DIM=4
数学家跑了起来。
数学家跑向商店。
物理学家跑向商店。
哲学家想了想。
数学家解决了这个悬而未决的问题
#建立一个元组列表。每个元组是([单词i-2,单词i-1],目标单词)
三角形=[([测试句子[i],测试句子[i+1],测试句子[i+2]))
对于范围内的i(len(测试句子)-2)]
#确定词汇的顺序
#如果删除sorted(),则预测结果将不稳定。
vocab=排序(列表(集合(测试句子)))
word_to_ix={word:i代表i,枚举中的单词(vocab)}
NGramLanguageModeler类(nn.模块):
定义初始大小(自、语音大小、嵌入大小、上下文大小):
super(NGramLanguageModeler,self)。\uu init
self.embeddings=nn.Embedding(声音大小,嵌入尺寸)
self.linear1=nn.Linear(上下文大小*嵌入大小,128)
self.linear2=nn.Linear(128,声音大小)
def前进档(自身,输入):
嵌入=自。嵌入(输入)。视图((1,-1))
out=functional.relu(自线性1(嵌入))
out=自线性2(out)
log_probs=功能性。log_softmax(输出,尺寸=1)
返回日志问题
损失函数=nn.NLLLoss()
model=NGramLanguageModeler(len(vocab)、嵌入维度、上下文大小)
optimizer=optim.SGD(model.parameters(),lr=0.01)
#训练模型
对于范围内的历元(20):
对于上下文,目标在三角图中:
上下文索引=[上下文中w的单词到ix[w]
上下文变量=自动加载变量(火炬长传感器(上下文指数))
模型0_梯度()
log\u probs=模型(上下文变量)
损耗=损耗函数(log probs,自动加载变量(torch.LongTensor([word_to_ix[target]]))
loss.backward()
optimizer.step()
上下文\元组=(“”,“The”)
上下文索引=[上下文元组中w的单词到x[w]
上下文变量=自动加载变量(火炬长传感器(上下文指数))
模型0_梯度()
log\u probs=模型(上下文变量)
模拟人生=[]
probs=[]
候选人=[“哲学家”、“物理学家”]
#根据余弦相似性计算哪个词更接近数学家
相关嵌入=模型嵌入(autograd.Variable(torch.LongTensor([word\u to_ix[“数学家]]))
候选词:
#概率
probs.append(log_probs[0][word_to_ix[word]]
#余弦相似性
嵌入=模型嵌入(autograd.Variable(torch.LongTensor([word\u to_ix[word]]))
附加(函数的余弦相似性(嵌入,相关嵌入))
打印(“预测词(概率):%s”%(如果probs[0]>probs[1]则候选词[0]否则候选词[1]))
打印(“预测词(余弦相似性):%s”%(如果sims[0]>sims[1]或其他候选词[1]))
如果删除
vocab
变量的sorted()函数,结果将不同。既然我已经修复了Pytorch的随机种子,为什么结果是不可复制的