Python RNN变分自动编码器重建效果好,但生成效果差
我试图通过训练基于RNN的变分自动编码器来重现这一结果。虽然重建原始文本的工作非常顺利,但新文本的生成却非常糟糕。我在下面给出了我的模型架构。它松散地基于此 调试时,我注意到输出总是Python RNN变分自动编码器重建效果好,但生成效果差,python,nlp,pytorch,recurrent-neural-network,gated-recurrent-unit,Python,Nlp,Pytorch,Recurrent Neural Network,Gated Recurrent Unit,我试图通过训练基于RNN的变分自动编码器来重现这一结果。虽然重建原始文本的工作非常顺利,但新文本的生成却非常糟糕。我在下面给出了我的模型架构。它松散地基于此 调试时,我注意到输出总是pred=[[self.start\u idx]]内的令牌,它重复max\u len次数。在上述情况下,作为示例句子中的输入标记 class SentenceVAE(nn.Module): def __init__(self, embedding_size, vocab_size, hidden_size,
pred=[[self.start\u idx]]
内的令牌,它重复max\u len
次数。在上述情况下,
作为示例句子
中的输入标记
class SentenceVAE(nn.Module):
def __init__(self, embedding_size, vocab_size, hidden_size, latent_dim, dropout, device,
max_len = 50, pad_idx = 0, start_idx = 1, end_idx = 2, unk_idx = 3):
super(SentenceVAE, self).__init__()
self.tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.Tensor
self.embed = nn.Embedding(vocab_size, embedding_size, pad_idx)
self.hidden_to_mu = nn.Linear(hidden_size, latent_dim)
self.hidden_to_logvar = nn.Linear(hidden_size, latent_dim)
self.dropout = nn.Dropout(dropout)
self.encoder_gru = nn.GRU(embedding_size, hidden_size, batch_first=True)
self.decoder_gru = nn.GRU(embedding_size, hidden_size, batch_first=True)
self.flow_fc = nn.Sequential(
nn.Linear(latent_dim, 1024),
nn.GELU(),
nn.Linear(1024, hidden_size)
)
self.out = nn.Linear(hidden_size, vocab_size)
self.device = device
self.latent_dim = latent_dim
self.unk_idx = unk_idx
self.start_idx = start_idx
self.end_idx = end_idx
self.pad_idx = pad_idx
def reparameterize(self, mu, logvar):
eps = torch.randn_like(logvar)
std = torch.exp(0.5 * logvar)
return mu + eps * std
def decode(self, hidden, dec_in):
decoder_input = self.embed(dec_in)
if len(hidden.size()) < 3:
hidden = hidden.unsqueeze(0)
outputs, hidden = self.decoder_gru(decoder_input, hidden)
out = self.out(outputs)
return out, hidden
def sample_sentence(self, z = None):
max_len = 20
batch = 1
if z == None:
z = torch.randn((batch,self.latent_dim))
z = z.to(self.device)
hidden = self.flow_fc(z)
pred = [[self.start_idx]]
out_sent = []
for i in range(max_len):
pred_tensor = torch.tensor(pred)
pred_tensor = pred_tensor.to(device)
preds, hidden = self.decode(hidden, pred_tensor)
preds = preds[:, -1, :]
pred_index = torch.argmax(preds, dim = -1)
pred[0] = [pred_index.item()]
out_sent.append(pred_index.item())
if pred_index.item() == self.end_idx:
break
return out_sent
def forward(self, x):
enc_in, dec_in = x, x
encoder_input = self.embed(enc_in)
_, rnn_hidden = self.encoder_gru(encoder_input)
rnn_hidden = rnn_hidden.squeeze(0)
mu = self.hidden_to_mu(rnn_hidden)
logvar = self.hidden_to_logvar(rnn_hidden)
z = self.reparameterize(mu, logvar)
## Randomly replace words with <unk>
dec_in_copy = dec_in.clone()
prob = torch.rand(dec_in.size())
prob[(dec_in == self.start_idx) | (dec_in == self.end_idx) | (dec_in == self.pad_idx)] = 1
dec_in_copy[prob < dropout] = self.unk_idx
hidden = self.flow_fc(z)
out, _= self.decode(hidden, dec_in_copy)
return mu, logvar, out
<SOS> gondry 's direction is adequate ... but what gives human nature its unique feel is kaufman 's script . <EOS>
<SOS> it 's direction is adequate ... but what gives human nature its unique feel is kaufman 's approach . <EOS>
<SOS> there seems to be no clear path as to where the story 's going , or how long it 's going to take to get there . <EOS>
<SOS> there seems to be no amount path , to where the most 's going , or even long it 's going to take to get there . <EOS>
<SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS> <SOS>