Deep learning LSTM层在PyTorch中由自己的输出馈送时返回nan

Deep learning LSTM层在PyTorch中由自己的输出馈送时返回nan,deep-learning,lstm,pytorch,Deep Learning,Lstm,Pytorch,我试图用LSTM和混合密度网络生成时间序列数据,如中所述 以下是指向我的实现的链接: 存储库包含用于训练网络的玩具数据集。 在训练时,LSTM层在一次迭代后返回其隐藏状态的nan。报告了一个类似的问题。 为方便起见,以下是代码: import torch import torch.nn as nn import torch.optim as optim from torch.autograd import Variable import torch.nn.functional as F impo

我试图用LSTM和混合密度网络生成时间序列数据,如中所述 以下是指向我的实现的链接: 存储库包含用于训练网络的玩具数据集。 在训练时,LSTM层在一次迭代后返回其隐藏状态的nan。报告了一个类似的问题。 为方便起见,以下是代码:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import numpy.random as npr

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
ts = torch.load('LDS_Toy_Data.pt')

def detach(states):
    return [state.detach() for state in states]

class MDNLSTM(nn.Module):
    def __init__(self, d_obs, d_lat=2, n_gaussians=2, n_layers=1):
        super(MDNLSTM, self).__init__()

        self.d_obs = d_obs
        self.d_lat = d_lat
        self.n_gaussians = n_gaussians
        self.n_layers = n_layers

        self.lstm = nn.LSTM(d_obs, d_lat, n_layers, batch_first=True)
        self.fcPi = nn.Linear(d_lat, n_gaussians*d_obs)
        self.fcMu = nn.Linear(d_lat, n_gaussians*d_obs)
        self.fcSigma = nn.Linear(d_lat, n_gaussians*d_obs)

    def get_mixture_coef(self, y):
        time_steps = y.size(1)
        pi, mu, sigma = self.fcPi(y), self.fcMu(y), self.fcSigma(y)

        pi = pi.view(-1, time_steps, self.n_gaussians, self.d_obs)
        mu = mu.view(-1, time_steps, self.n_gaussians, self.d_obs)
        sigma = sigma.view(-1, time_steps, self.n_gaussians, self.d_obs)

        pi = F.softmax(pi, 2)
        sigma = torch.exp(sigma)
        return pi, mu, sigma


    def forward(self, x, h):
        y, (h, c) = self.lstm(x, h)
        #print(h)
        pi, mu, sigma = self.get_mixture_coef(y)
        return (pi, mu, sigma), (h, c)

    def init_hidden(self, bsz):
        return (torch.zeros(self.n_layers, bsz, self.d_lat).to(device),
                torch.zeros(self.n_layers, bsz, self.d_lat).to(device))

def mdn_loss_fn(y, pi, mu, sigma):
    m = torch.distributions.Normal(loc=mu, scale=sigma)
    loss = torch.exp(m.log_prob(y))
    loss = torch.sum(loss * pi, dim=2)
    loss = -torch.log(loss)
    return loss.mean()

def criterion(y, pi, mu, sigma):
    y = y.unsqueeze(2)
    return mdn_loss_fn(y, pi, mu, sigma)

DOBS = 10
DLAT = 2
INSTS = 100
seqlen = 30
epochs = 200


mdnlstm = MDNLSTM(DOBS, DLAT).to(device)
optimizer = torch.optim.Adam(mdnlstm.parameters())

z =  torch.from_numpy(ts[:INSTS,:,:]).float().to(device)

# hiddens=[]
# Train the model
for epoch in range(epochs):
    # Set initial hidden and cell states
    hidden = mdnlstm.init_hidden(INSTS)

    for i in range(0, z.size(1) - seqlen, seqlen):
        # Get mini-batch inputs and targets
        inputs = z[:, i:i+seqlen, :]
        targets = z[:, (i+1):(i+1)+seqlen, :]

        hidden = detach(hidden)
#         hiddens.append(hidden)
        (pi, mu, sigma), hidden = mdnlstm(inputs, hidden)
        loss = criterion(targets, pi, mu, sigma)
        mdnlstm.zero_grad()
        loss.backward()
        optimizer.step()

    if epoch % 100 == 0:
        print ('Epoch [{}/{}], Loss: {:.4f}'
               .format(epoch, epochs, loss.item()))

如果您能在这方面提供帮助,我将不胜感激。

此问题是由于日志和exp操作没有以稳定的方式完成而导致的。下面是我使用的加权log sum exp技巧的一个实现,可以解决这个问题:

def weighted_logsumexp(x,w, dim=None, keepdim=False):
    if dim is None:
        x, dim = x.view(-1), 0
    xm, _ = torch.max(x, dim, keepdim=True)
    x = torch.where(
        # to prevent nasty nan's
        (xm == float('inf')) | (xm == float('-inf')),
        xm,
        xm + torch.log(torch.sum(torch.exp(x - xm)*w, dim, keepdim=True)))
    return x if keepdim else x.squeeze(dim)
并利用它实现了稳定损失函数:

def mdn_loss_stable(y,pi,mu,sigma):
    m = torch.distributions.Normal(loc=mu, scale=sigma)
    m_lp_y = m.log_prob(y)
    loss = -weighted_logsumexp(m_lp_y,pi,dim=2)
    return loss.mean()

这很有魅力。一般来说,问题是火炬不会报告流量不足

该问题是由于日志和exp操作没有以稳定的方式完成而导致的。下面是我使用的加权log sum exp技巧的一个实现,可以解决这个问题:

def weighted_logsumexp(x,w, dim=None, keepdim=False):
    if dim is None:
        x, dim = x.view(-1), 0
    xm, _ = torch.max(x, dim, keepdim=True)
    x = torch.where(
        # to prevent nasty nan's
        (xm == float('inf')) | (xm == float('-inf')),
        xm,
        xm + torch.log(torch.sum(torch.exp(x - xm)*w, dim, keepdim=True)))
    return x if keepdim else x.squeeze(dim)
并利用它实现了稳定损失函数:

def mdn_loss_stable(y,pi,mu,sigma):
    m = torch.distributions.Normal(loc=mu, scale=sigma)
    m_lp_y = m.log_prob(y)
    loss = -weighted_logsumexp(m_lp_y,pi,dim=2)
    return loss.mean()
这很有魅力。一般来说,问题是火炬不会报告流量不足