Pytorch RuntimeError:四维权重128 256应为四维输入,但得到的是大小为[32,128]的二维输入

Pytorch RuntimeError:四维权重128 256应为四维输入,但得到的是大小为[32,128]的二维输入,pytorch,dimensions,generative-adversarial-network,Pytorch,Dimensions,Generative Adversarial Network,我正在使用条件GAN作为基础模型创建一个图像生成器。我遇到了一个错误,我不知道如何调试,即使是在网上搜索解决方案之后。我不确定我是否应该更改培训设置或对模型进行一些调整,或者做其他事情。如果您能提供帮助,我们将不胜感激 我正在使用的CGAN模型: class Generator(nn.Module): def __init__(self, classes, channels, img_size, latent_dim): super(Generator, self).__

我正在使用条件GAN作为基础模型创建一个图像生成器。我遇到了一个错误,我不知道如何调试,即使是在网上搜索解决方案之后。我不确定我是否应该更改培训设置或对模型进行一些调整,或者做其他事情。如果您能提供帮助,我们将不胜感激

我正在使用的CGAN模型:

class Generator(nn.Module):
    def __init__(self, classes, channels, img_size, latent_dim):
        super(Generator, self).__init__()
        self.classes = classes
        self.channels = channels
        self.img_size = img_size
        self.latent_dim = latent_dim
        self.img_shape = (self.channels, self.img_size, self.img_size)
        self.label_embedding = nn.Embedding(self.classes, self.classes) # process label information, behave as a lookup table

        self.model = nn.Sequential(
            *self._create_layer_1(self.latent_dim + self.classes, 128, False),
            *self._create_layer_2(128, 256),
            *self._create_layer_2(256, 512),
            *self._create_layer_2(512, 1024),
            nn.Linear(1024, int(np.prod(self.img_shape))),
            nn.Tanh()
        )

    def _create_layer_1(self, size_in, size_out, normalize=True):
        layers = [nn.Linear(size_in, size_out)]
        if normalize:
            layers.append(nn.BatchNorm1d(size_out))
        layers.append(nn.LeakyReLU(0.2, inplace=True))
        return layers

    def _create_layer_2(self, size_in, size_out, normalize=True):
        layers = [nn.ConvTranspose2d(size_in, size_out, 4, 2, 1, bias=False)]
        if normalize:
            layers.append(nn.BatchNorm1d(size_out))
        layers.append(nn.LeakyReLU(0.2, inplace=True))
        return layers

    def forward(self, noise, labels):
        z = torch.cat((self.label_embedding(labels), noise), -1)
        x = self.model(z)
        x = x.view(x.size(0), *self.img_shape)
        return x


class Discriminator(nn.Module):
    def __init__(self, classes, channels, img_size, latent_dim):
        super(Discriminator, self).__init__()
        self.classes = classes
        self.channels = channels
        self.img_size = img_size
        self.latent_dim = latent_dim
        self.img_shape = (self.channels, self.img_size, self.img_size)
        self.label_embedding = nn.Embedding(self.classes, self.classes)
        self.adv_loss = torch.nn.BCELoss()

        self.model = nn.Sequential(
            *self._create_layer_1(self.classes + int(np.prod(self.img_shape)), 1024, False, True),
            *self._create_layer_2(1024, 512, True, True),
            *self._create_layer_2(512, 256, True, True),
            *self._create_layer_2(256, 128, False, False),
            *self._create_layer_1(128, 1, False, False),
            nn.Sigmoid()
        )

    def _create_layer_1(self, size_in, size_out, drop_out=True, act_func=True):
        layers = [nn.Linear(size_in, size_out)]
        if drop_out:
            layers.append(nn.Dropout(0.4))
        if act_func:
            layers.append(nn.LeakyReLU(0.2, inplace=True))
        return layers

    def _create_layer_2(self, size_in, size_out, drop_out=True, act_func=True):
        layers = [nn.Conv2d(size_in, size_out, 4, 2, 1, bias=False)]
        if drop_out:
            layers.append(nn.Dropout(0.4))
        if act_func:
            layers.append(nn.LeakyReLU(0.2, inplace=True))
        return layers

    def forward(self, image, labels):
        x = torch.cat((image.view(image.size(0), -1), self.label_embedding(labels)), -1)
        return self.model(x)

    def loss(self, output, label):
        return self.adv_loss(output, label)

用于初始化模型的代码:

class Model(object):
    def __init__(self,
                 name,
                 device,
                 data_loader,
                 classes,
                 channels,
                 img_size,
                 latent_dim,
                 style_dim=3):
        self.name = name
        self.device = device
        self.data_loader = data_loader
        self.classes = classes
        self.channels = channels
        self.img_size = img_size
        self.latent_dim = latent_dim
        self.style_dim = style_dim
        self.netG = cganG(self.classes, self.channels, self.img_size, self.latent_dim)
        self.netG.to(self.device)
        self.netD = cganD(self.classes, self.channels, self.img_size, self.latent_dim)
        self.netD.to(self.device)
        self.optim_G = None
        self.optim_D = None

    @property
    def generator(self):
        return self.netG

    @property
    def discriminator(self):
        return self.netD

    def create_optim(self, lr, alpha=0.5, beta=0.999):
        self.optim_G = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        self.netG.parameters()),
                                        lr=lr,
                                        betas=(alpha, beta))
        self.optim_D = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        self.netD.parameters()),
                                        lr=lr,
                                        betas=(alpha, beta))

    def _to_onehot(self, var, dim):
        res = torch.zeros((var.shape[0], dim), device=self.device)
        res[range(var.shape[0]), var] = 1.
        return res

    def train(self,
              epochs,
              log_interval=100,
              out_dir='',
              verbose=True):
        self.netG.train()
        self.netD.train()
        viz_z = torch.zeros((self.data_loader.batch_size, self.latent_dim), device=self.device)
        viz_noise = torch.randn(self.data_loader.batch_size, self.latent_dim, device=self.device)
        nrows = self.data_loader.batch_size // 8
        viz_label = torch.LongTensor(np.array([num for _ in range(nrows) for num in range(8)])).to(self.device)
        viz_onehot = self._to_onehot(viz_label, dim=self.classes)
        viz_style = torch.zeros((self.data_loader.batch_size, self.style_dim), device=self.device)
        total_time = time.time()
        for epoch in range(epochs):
            batch_time = time.time()
            for batch_idx, (data, target) in enumerate(self.data_loader):
                data, target = data.to(self.device), target.to(self.device)
                batch_size = data.size(0)
                real_label = torch.full((batch_size, 1), 1., device=self.device)
                fake_label = torch.full((batch_size, 1), 0., device=self.device)

                # Train G
                self.netG.zero_grad()
                z_noise = torch.randn(batch_size, self.latent_dim, device=self.device)
                x_fake_labels = torch.randint(0, self.classes, (batch_size,), device=self.device)
                x_fake = self.netG(z_noise, x_fake_labels)
                y_fake_g = self.netD(x_fake, x_fake_labels)
                g_loss = self.netD.loss(y_fake_g, real_label)
                g_loss.backward()
                self.optim_G.step()

                # Train D
                self.netD.zero_grad()
                y_real = self.netD(data, target)
                d_real_loss = self.netD.loss(y_real, real_label)
                y_fake_d = self.netD(x_fake.detach(), x_fake_labels)
                d_fake_loss = self.netD.loss(y_fake_d, fake_label)
                d_loss = (d_real_loss + d_fake_loss) / 2
                d_loss.backward()
                self.optim_D.step()

                if verbose and batch_idx % log_interval == 0 and batch_idx > 0:
                    print('Epoch {} [{}/{}] loss_D: {:.4f} loss_G: {:.4f} time: {:.2f}'.format(
                            epoch, batch_idx, len(self.data_loader),
                            d_loss.mean().item(),
                            g_loss.mean().item(),
                            time.time() - batch_time))
                    vutils.save_image(data, os.path.join(out_dir, 'real_samples.png'), normalize=True)
                    with torch.no_grad():
                        viz_sample = self.netG(viz_noise, viz_label)
                        vutils.save_image(viz_sample, os.path.join(out_dir, 'fake_samples_{}.png'.format(epoch)), nrow=8, normalize=True)
                    batch_time = time.time()   
            
            torch.save(self.netG.state_dict(), os.path.join(out_dir, 'netG_{}.pth'.format(epoch)))
            torch.save(self.netD.state_dict(), os.path.join(out_dir, 'netD_{}.pth'.format(epoch)))

            self.save_to(path=out_dir, name=self.name, verbose=False)
        if verbose:
            print('Total train time: {:.2f}'.format(time.time() - total_time))
培训设置代码:

def main():
    device = torch.device("cuda:0" if FLAGS.cuda else "cpu")
    if FLAGS.train:
        dataloader = torch.utils.data.DataLoader(
            dset.ImageFolder(FLAGS.data_dir, transforms.Compose([
                transforms.Resize(FLAGS.img_size),
                transforms.CenterCrop(FLAGS.img_size),
                transforms.ToTensor()
                ])),
                batch_size=FLAGS.batch_size,
                shuffle=True, 
                num_workers=4, 
                pin_memory=True
                )
        model = Model(FLAGS.model, device, dataloader, FLAGS.classes, FLAGS.channels, FLAGS.img_size, FLAGS.latent_dim)
        model.create_optim(FLAGS.lr)

        # Train
        print("Start training...\n")
        model.train(FLAGS.epochs, FLAGS.log_interval, FLAGS.out_dir, True)

if __name__ == '__main__':
    from utils import boolean_string
    parser.add_argument('--cuda', type=boolean_string, default=True, help='enable CUDA.')
    parser.add_argument('--train', type=boolean_string, default=True, help='train mode or eval mode.')
    parser.add_argument('--data_dir', type=str, default='../datasets', help='Directory for dataset.')
    parser.add_argument('--out_dir', type=str, default='output', help='Directory for output.')
    parser.add_argument('--epochs', type=int, default=800, help='number of epochs')
    parser.add_argument('--batch_size', type=int, default=32, help='size of batches')
    parser.add_argument('--lr', type=float, default=0.0002, help='learning rate')
    parser.add_argument('--latent_dim', type=int, default=62, help='latent space dimension')
    parser.add_argument('--classes', type=int, default=25, help='number of classes')
    parser.add_argument('--img_size', type=int, default=128, help='size of images')
    parser.add_argument('--channels', type=int, default=3, help='number of image channels')
设置:

PyTorch version: 1.1.0
CUDA version: 9.0.176

         Args         |    Type    |    Value
--------------------------------------------------
  cuda                |  bool      |  True
  train               |  bool      |  True
  resume              |  bool      |  False
  data_dir            |  str       |  ../datasets
  out_dir             |  str       |  output
  epochs              |  int       |  800
  batch_size          |  int       |  32
  lr                  |  float     |  0.0002
  latent_dim          |  int       |  62
  classes             |  int       |  25
  img_size            |  int       |  128
  channels            |  int       |  3
作为输入的图像大小:

torch.Size([32, 3, 128, 128])
模型结构:

Generator(
  (label_embedding): Embedding(25, 25)
  (model): Sequential(
    (0): Linear(in_features=87, out_features=128, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace)
    (2): ConvTranspose2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2, inplace)
    (5): ConvTranspose2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2, inplace)
    (8): ConvTranspose2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (9): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2, inplace)
    (11): Linear(in_features=1024, out_features=49152, bias=True)
    (12): Tanh()
  )
)

Discriminator(
  (label_embedding): Embedding(25, 25)
  (adv_loss): BCELoss()
  (model): Sequential(
    (0): Linear(in_features=49177, out_features=1024, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace)
    (2): Conv2d(1024, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): Dropout(p=0.4)
    (4): LeakyReLU(negative_slope=0.2, inplace)
    (5): Conv2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (6): Dropout(p=0.4)
    (7): LeakyReLU(negative_slope=0.2, inplace)
    (8): Conv2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (9): Linear(in_features=128, out_features=1, bias=True)
    (10): Sigmoid()
  )
)

我得到的错误是:

File "main.py", line 121, in <module>
    main()
  File "main.py", line 56, in main
    model.train(FLAGS.epochs, FLAGS.log_interval, FLAGS.out_dir, True)
  File "build_gan.py", line 123, in train
    x_fake = self.netG(z_noise, x_fake_labels)
  File "anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in __call__
    result = self.forward(*input, **kwargs)
  File "cgan.py", line 42, in forward
    x = self.model(z)
  File "anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in __call__
    result = self.forward(*input, **kwargs)
  File "anaconda3/lib/python3.6/site-packages/torch/nn/modules/container.py", line 92, in forward
    input = module(input)
  File "anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in __call__
    result = self.forward(*input, **kwargs)
  File "anaconda3/lib/python3.6/site-packages/torch/nn/modules/conv.py", line 796, in forward
    output_padding, self.groups, self.dilation)
RuntimeError: Expected 4-dimensional input for 4-dimensional weight 128 256, but got 2-dimensional input of size [32, 128] instead
文件“main.py”,第121行,在
main()
文件“main.py”,第56行,在main中
模型列车(FLAGS.epochs、FLAGS.log\u interval、FLAGS.out\u dir、True)
文件“build_gan.py”,第123行,列车中
x_-fake=self.netG(z_噪声,x_-fake_标签)
文件“anaconda3/lib/python3.6/site packages/torch/nn/modules/module.py”,第493行,在调用中__
结果=自我转发(*输入,**kwargs)
文件“cgan.py”,第42行,向前
x=自身模型(z)
文件“anaconda3/lib/python3.6/site packages/torch/nn/modules/module.py”,第493行,在调用中__
结果=自我转发(*输入,**kwargs)
文件“anaconda3/lib/python3.6/site packages/torch/nn/modules/container.py”,第92行,向前
输入=模块(输入)
文件“anaconda3/lib/python3.6/site packages/torch/nn/modules/module.py”,第493行,在调用中__
结果=自我转发(*输入,**kwargs)
文件“anaconda3/lib/python3.6/site packages/torch/nn/modules/conv.py”,第796行,向前
输出(填充、自组、自膨胀)
RuntimeError:四维权重128 256应为四维输入,但得到的是大小为[32,128]的二维输入

我使用我自己的图像数据集,有3个通道和25个类。我试图改变图像大小和内核大小,但仍然得到相同的错误。如果您能帮助我调试,我们将不胜感激。

问题实际上在于您的模型架构。您正试图在线性完全连接的层之后放置conv2d层。创建层1生成1d输出。您正试图将此1d输出馈送到需要多维输入的conv2d层

从您的代码中,我觉得让它一次运行的最好方法是从生成器类中完全删除“_create_layer_2”函数,并使用_create_layer_1函数定义所有层(以便所有层都是完全连接的层)。也为您的鉴别器执行此操作

如果您仍然需要使用conv2d。您应该将conv2d的输入重塑为2d张量。在最终线性层之前,还必须将2d张量展平为1d。或者您可以抛弃第一个线性nn.linear层,从conv2d开始

总结 在设计GAN时,您可能有开发CNN的经验。关键是,如果不使用适当的展平/重塑,您不会简单地将conv2d/conv层与线性层混合。

干杯