Python 如何将Conv2d推广到MaxPool2d到ConvTranspose2d?

Python 如何将Conv2d推广到MaxPool2d到ConvTranspose2d?,python,machine-learning,neural-network,pytorch,max-pooling,Python,Machine Learning,Neural Network,Pytorch,Max Pooling,我已经推广了PyTorch自动编码器实现。我使用的一个主要策略是重新调整序列的比例,该序列指示每层的节点。通过这种方式,我可以对各种网络大小进行实验 为了MRE,我将在一个有效的案例和一个无效的案例中提供原始的、非通用的代码。我正在搜索Conv2d、MaxPool2d和ConvTranspose2d之间的关系 # imports import torch import numpy as np import torch.nn as nn import torch.optim as optim im

我已经推广了PyTorch自动编码器实现。我使用的一个主要策略是重新调整序列的比例,该序列指示每层的节点。通过这种方式,我可以对各种网络大小进行实验

为了MRE,我将在一个有效的案例和一个无效的案例中提供原始的、非通用的代码。我正在搜索Conv2d、MaxPool2d和ConvTranspose2d之间的关系

# imports
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets    
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
# Original autoencoder class.
class autoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        # encoder layers
        self.enc1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        self.enc2 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
        self.enc3 = nn.Conv2d(32, 16, kernel_size=3, padding=1)
        #######################################################
        # The relationship here is beyond me.
        self.enc4 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        # decoder layers
        self.dec1 = nn.ConvTranspose2d(8, 8, kernel_size=3, stride=2)  
        #######################################################
        self.dec2 = nn.ConvTranspose2d(8, 16, kernel_size=3, stride=2)
        self.dec3 = nn.ConvTranspose2d(16, 32, kernel_size=2, stride=2)
        self.dec4 = nn.ConvTranspose2d(32, 64, kernel_size=2, stride=2)
        self.out = nn.Conv2d(64, 1, kernel_size=3, padding=1)
        
    def forward(self, x):
        # encode
        x = F.relu(self.enc1(x))
        x = self.pool(x)
        x = F.relu(self.enc2(x))
        x = self.pool(x)
        x = F.relu(self.enc3(x))
        x = self.pool(x)
        x = F.relu(self.enc4(x))
        x = self.pool(x)
        # decode
        x = F.relu(self.dec1(x))
        x = F.relu(self.dec2(x))
        x = F.relu(self.dec3(x))
        x = F.relu(self.dec4(x))
        x = F.sigmoid(self.out(x))
        return x
    
def train(network, trainloader):
    train_loss = list()
    for epoch in range(10):
        running_loss = 0.0
        for data in trainloader:
            img, _ = data
            img_noisy = img + 0.5 * torch.randn(img.shape)
            img_noisy = np.clip(img_noisy, 0., 1.)
            img_noisy = img_noisy.to(device)
            optimizer.zero_grad()
            outputs = network(img_noisy)

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = datasets.FashionMNIST(
    root='./data',
    train=True, 
    download=True,
    transform=transform
)
testset = datasets.FashionMNIST(
    root='./data',
    train=False,
    download=True,
    transform=transform
)
trainloader = DataLoader(
    trainset, 
    batch_size=16,
    shuffle=True
)
testloader = DataLoader(
    testset, 
    batch_size=16, 
    shuffle=True
)

net = autoencoder()
device = 'cpu'
net.to(device)  
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr = 1e-3)
train_loss = train(net, trainloader)
# MRE for the autoencoder class I generalize.
# I have only added one more layer, and not scaled
# the node per layer sequence, but expanded to 128.
class autoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        # encoder layers
        self.enc0 = nn.Conv2d(1, 128,  kernel_size=3, padding=1)
        self.enc1 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        self.enc2 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
        self.enc3 = nn.Conv2d(32, 16, kernel_size=3, padding=1)
        self.enc4 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        # decoder layers
        self.dec1 = nn.ConvTranspose2d(8, 8, kernel_size=3, stride=2)  
        self.dec2 = nn.ConvTranspose2d(8, 16, kernel_size=3, stride=2)
        self.dec3 = nn.ConvTranspose2d(16, 32, kernel_size=2, stride=2)
        self.dec4 = nn.ConvTranspose2d(32, 64, kernel_size=2, stride=2)
        self.dec5 = nn.ConvTranspose2d(64, 128, kernel_size=2, stride=2)
        self.out = nn.Conv2d(128, 1, kernel_size=3, padding=1)
        
    def forward(self, x):
        # encode
        x = F.relu(self.enc0(x))
        x = self.pool(x)
        x = F.relu(self.enc1(x))
        x = self.pool(x)
        x = F.relu(self.enc2(x))
        x = self.pool(x)
        x = F.relu(self.enc3(x))
        x = self.pool(x)
        x = F.relu(self.enc4(x))
        x = self.pool(x)
        # decode
        x = F.relu(self.dec1(x))
        x = F.relu(self.dec2(x))
        x = F.relu(self.dec3(x))
        x = F.relu(self.dec4(x))
        x = F.relu(self.dec5(x))
        x = F.sigmoid(self.out(x))
        return x
    
def train(network, trainloader):
    train_loss = list()
    for epoch in range(10):
        running_loss = 0.0
        for data in trainloader:
            img, _ = data
            img_noisy = img + 0.5 * torch.randn(img.shape)
            img_noisy = np.clip(img_noisy, 0., 1.)
            img_noisy = img_noisy.to(device)
            optimizer.zero_grad()
            outputs = network(img_noisy)

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = datasets.FashionMNIST(
    root='./data',
    train=True, 
    download=True,
    transform=transform
)
testset = datasets.FashionMNIST(
    root='./data',
    train=False,
    download=True,
    transform=transform
)
trainloader = DataLoader(
    trainset, 
    batch_size=16,
    shuffle=True
)
testloader = DataLoader(
    testset, 
    batch_size=16, 
    shuffle=True
)

net = autoencoder()
device = 'cpu'
net.to(device)  
criterion = nn.MSELoss()
optimizer = optim.Adam(net.parameters(), lr = 1e-3)
train_loss = train(net, trainloader)
# Error
Traceback (most recent call last):
  File "C:\Users\User\Desktop\ml_paper\mre.py", line 89, in <module>
    train_loss = train(net, trainloader)
  File "C:\Users\User\Desktop\ml_paper\mre.py", line 58, in train
    outputs = network(img_noisy)
  File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\User\Desktop\ml_paper\mre.py", line 38, in forward
    x = self.pool(x)
  File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\nn\modules\pooling.py", line 159, in forward
    self.return_indices)
  File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\_jit_internal.py", line 247, in fn
    return if_false(*args, **kwargs)
  File "C:\Users\User\Desktop\ml_paper\lib\site-packages\torch\nn\functional.py", line 576, in _max_pool2d
    input, kernel_size, stride, padding, dilation, ceil_mode)
RuntimeError: Given input size: (8x1x1). Calculated output size: (8x0x0). Output size is too small

import math
import torch.nn as nn
# infer kernel sizes for decoding layers.
series = [1, 8, 16, 32, 64, 128]
kernel_set = 3
tmp = nn.ModuleList()
for layer in range(1, len(series)):
    if layer == 1:
        tmp.append(nn.ConvTranspose2d(series[layer], series[layer], kernel_set, stride = 2))
    else:
        if layer > math.floor(len(series)/2):
            tmp.append(nn.ConvTranspose2d(series[layer - 1], series[layer], kernel_set - 1, stride = 2))
        else:
            tmp.append(nn.ConvTranspose2d(series[layer - 1], series[layer], kernel_set, stride = 2))
print(tmp)
ModuleList(
  (0): ConvTranspose2d(8, 8, kernel_size=(3, 3), stride=(2, 2))
  (1): ConvTranspose2d(8, 16, kernel_size=(3, 3), stride=(2, 2))
  (2): ConvTranspose2d(16, 32, kernel_size=(3, 3), stride=(2, 2))
  (3): ConvTranspose2d(32, 64, kernel_size=(2, 2), stride=(2, 2))
  (4): ConvTranspose2d(64, 128, kernel_size=(2, 2), stride=(2, 2))
)