Python Conv可变自动编码器损耗为NaN
我正在训练一个可变自动编码器。突然,我的损失爆发了,然后变得更糟 我不知道为什么。当打印编码器的输出时,编码器已经在一些批次后输出了NaN值!所以它不会发生在取样部分。我的损失函数可能有什么问题吗? 我使用的是Adam优化器btw,学习率为0.002Python Conv可变自动编码器损耗为NaN,python,pytorch,artificial-intelligence,sampling,autoencoder,Python,Pytorch,Artificial Intelligence,Sampling,Autoencoder,我正在训练一个可变自动编码器。突然,我的损失爆发了,然后变得更糟 我不知道为什么。当打印编码器的输出时,编码器已经在一些批次后输出了NaN值!所以它不会发生在取样部分。我的损失函数可能有什么问题吗? 我使用的是Adam优化器btw,学习率为0.002 class VAE(nn.Module): def __init__(self, input_shape, z_dim): super().__init__() self.z_dim = z_dim
class VAE(nn.Module):
def __init__(self, input_shape, z_dim):
super().__init__()
self.z_dim = z_dim
self.input_shape = input_shape
# encoder
self.encoder_conv = nn.Sequential(
nn.Conv2d(1, 32, 3, stride=2, padding=1),
nn.BatchNorm2d(32),
nn.LeakyReLU(),
nn.Conv2d(32, 64, 3, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.LeakyReLU(),
nn.Conv2d(64, 64, 3, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.LeakyReLU(),
nn.Conv2d(64, 64, 3, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.LeakyReLU()
)
self.conv_out_size = self._get_conv_out_size(input_shape)
self.mu = nn.Sequential(
nn.Linear(self.conv_out_size, z_dim),
nn.LeakyReLU(),
nn.Dropout(0.2)
)
self.log_var = nn.Sequential(
nn.Linear(self.conv_out_size, z_dim),
nn.LeakyReLU(),
nn.Dropout(0.2)
)
# decoder
self.decoder_linear = nn.Sequential(
nn.Linear(z_dim, self.conv_out_size),
nn.LeakyReLU(),
nn.Dropout(0.2)
)
self.decoder_conv = nn.Sequential(
nn.UpsamplingNearest2d(scale_factor=2),
nn.ConvTranspose2d(64, 64, 3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.LeakyReLU(),
nn.UpsamplingNearest2d(scale_factor=2),
nn.ConvTranspose2d(64, 64, 3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.LeakyReLU(),
nn.UpsamplingNearest2d(scale_factor=2),
nn.ConvTranspose2d(64, 32, 3, stride=1, padding=1),
nn.BatchNorm2d(32),
nn.LeakyReLU(),
nn.UpsamplingNearest2d(scale_factor=2),
nn.ConvTranspose2d(32, 1, 3, stride=1, padding=(5,3)),
nn.Sigmoid()
)
def sampling(self, mu, log_var):
epsilon = torch.Tensor(np.random.normal(size=(self.z_dim), scale=1.0)).cuda()
return mu + epsilon * torch.exp(log_var / 2)
def forward_encoder(self, x):
x = self.encoder_conv(x)
x = x.view(x.size()[0], -1)
mu_p = self.mu(x)
log_var_p = self.log_var(x)
return [mu_p, log_var_p]
def forward_decoder(self, x):
x = self.decoder_linear(x)
x = x.view(x.size()[0], *self.conv_out_shape[1:])
x = self.decoder_conv(x)
return x
def forward(self, x):
mu_p, log_var_p = self.forward_encoder(x)
x = self.sampling(mu_p, log_var_p)
images_p = self.forward_decoder(x)
return [mu_p, log_var_p, images_p]
def _get_conv_out_size(self, shape):
out = self.encoder_conv(torch.zeros(1, *shape))
self.conv_out_shape = out.size()
return int(np.prod(self.conv_out_shape))
def forward_no_epsilon(self, x):
mu_p, log_var_p = self.forward_encoder(x)
x = mu_p
images_p = self.forward_decoder(x)
return images_p
损失:
列车:
mu_v, log_var_v, images_out_v = vae(images_v)
r_loss_v = r_loss(images_out_v, labels_v)
kl_loss_v = kl_loss(mu_v, log_var_v)
loss = kl_loss_v + r_loss_v * 10000.0
loss.backward()
optimizer.step()
看一看。
mu_v, log_var_v, images_out_v = vae(images_v)
r_loss_v = r_loss(images_out_v, labels_v)
kl_loss_v = kl_loss(mu_v, log_var_v)
loss = kl_loss_v + r_loss_v * 10000.0
loss.backward()
optimizer.step()