Pytorch 使用“nn.Sequential”时的泛化差异?

Pytorch 使用“nn.Sequential”时的泛化差异?,pytorch,Pytorch,我有一个自动编码器深度网络,我注意到当我使用nn.Sequential时,泛化性能比不使用它时要好(即显式地通过层传递输入)。有没有其他人注意到这种行为,或者可以解释为什么会这样?Pytorch在顺序块中处理正则化的方式是否不同 下面是一段代码片段;我在末尾附上了完整的代码以供参考。我正在使用nn.Sequential和不使用变量use\u Sequential之间切换。我发现,当我不使用顺序模块时,我的测试精度总是比使用顺序模块时差 class Net(nn.Module): def

我有一个自动编码器深度网络,我注意到当我使用
nn.Sequential
时,泛化性能比不使用它时要好(即显式地通过层传递输入)。有没有其他人注意到这种行为,或者可以解释为什么会这样?Pytorch在顺序块中处理正则化的方式是否不同

下面是一段代码片段;我在末尾附上了完整的代码以供参考。我正在使用
nn.Sequential
和不使用变量
use\u Sequential
之间切换。我发现,当我不使用顺序模块时,我的测试精度总是比使用顺序模块时差

class Net(nn.Module):
    def __init__(self, hidden_dim, in_dim, use_sequential):
        super(Net, self).__init__()
        self.use_sequential = use_sequential
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim

        self.sig = nn.Sigmoid()
        self.encoder = nn.Sequential(
                nn.Linear(in_dim, in_dim),
                nn.BatchNorm1d(in_dim),
                nn.Linear(in_dim, hidden_dim)
        )
        self.decoder = nn.Sequential(
                nn.Linear(hidden_dim, in_dim),
                nn.BatchNorm1d(in_dim),
                nn.Linear(in_dim, in_dim)
        )

    def encode(self, x):
        if self.use_sequential:
            x = self.encoder(x)
        else:
            x = self.lin1(x)
            x = self.batchnorm(x)
            x = self.lin2(x)
        return x

    def decode(self, x):
        if self.use_sequential:
            x = self.decoder(x)
        else:
            x = self.lin3(x)
            x = self.batchnorm(x)
            x = self.lin4(x)
        return x

    def forward(self, x):
        x = self.encode(x)
        x = self.decode(x)
        x = self.sig(x) # Sigmoid for BCELoss
        return x
这是脚本的输出。正如您所看到的,随着模型的训练,测试精度/损失会更差,即使它们的训练精度/损失保持相似(我自己初始化所有权重,请参阅最后的完整代码):

我发现问题在于
BatchNorm1d
层,因为当我将其从模型中取出时,问题就消失了。顺序块中的
BatchNorm1d
之间是否存在差异?还是我犯了一个我忽略的错误?提前感谢您的帮助

以下是完整的代码:

import torch
from torch.utils import data
from torch.optim import Adam
from tqdm import tqdm
class Dataset(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, data, labels):
        self.labels = labels
        self.data = data
    def __len__(self):
        'Denotes the total number of samples'
        return len(self.labels)
    def __getitem__(self, index):
        'Generates one sample of data'
        # Load data and get label
        X = self.data[index]
        y = self.labels[index]
        return X, y

class Net(nn.Module):
    def __init__(self, hidden_dim, in_dim, use_sequential):
        super(Net, self).__init__()
        self.use_sequential = use_sequential
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim

        self.sig = nn.Sigmoid()
        self.encoder = nn.Sequential(
                nn.Linear(in_dim, in_dim),
                nn.BatchNorm1d(in_dim),
                nn.Linear(in_dim, hidden_dim)
                )
        self.decoder = nn.Sequential(
                nn.Linear(hidden_dim, in_dim),
                nn.BatchNorm1d(in_dim),
                nn.Linear(in_dim, in_dim)
                )


        self.lin1 = nn.Linear(in_dim, in_dim)
        self.lin1.weight.data.fill_(0.01)
        self.lin1.bias.data.fill_(0.01)

        self.batchnorm = nn.BatchNorm1d(in_dim)
        self.batchnorm.weight.data.fill_(0.01)
        self.batchnorm.bias.data.fill_(0.01)

        self.lin2 = nn.Linear(in_dim, hidden_dim)
        self.lin2.weight.data.fill_(0.01)
        self.lin2.bias.data.fill_(0.01)

        self.lin3 = nn.Linear(hidden_dim, in_dim)
        self.lin3.weight.data.fill_(0.01)
        self.lin3.bias.data.fill_(0.01)

        self.lin4 = nn.Linear(in_dim, in_dim)
        self.lin4.weight.data.fill_(0.01)
        self.lin4.bias.data.fill_(0.01)

    def encode(self, x):
        if self.use_sequential:
            x = self.encoder(x)
        else:
            x = self.lin1(x)
            x = self.batchnorm(x)
            x = self.lin2(x)
        return x

    def decode(self, x):
        if self.use_sequential:
            x = self.decoder(x)
        else:
            x = self.lin3(x)
            x = self.batchnorm(x)
            x = self.lin4(x)
        return x

    def forward(self, x):
        x = self.encode(x)
        x = self.decode(x)
        x = self.sig(x) # Sigmoid for BCELoss
        return x

def accuracy(preds, labels):
    acc2 = 1 - torch.sum(torch.abs(preds-labels)).item() / (list(preds.size())[0]*list(preds.size())[1])
    return acc2

def generate_data(block_size):
    train_data = torch.randint(2, (10000, block_size)).float()
    test_data = torch.randint(2, (2500, block_size)).float()

    train_labels = train_data
    test_labels = test_data
    return train_data, train_labels, test_data, test_labels

def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.BatchNorm1d:
        m.weight.data.fill_(0.01)
        m.bias.data.fill_(0.01)
    if type(m) == nn.PReLU:
        m.weight.data.fill_(0.01)

########################## Train code ####################
IN_DIM = 4
HIDDEN_DIM = 32
EPOCHS = 200
BATCH_SIZE = 256

# Generate data
train_data, train_labels, test_data, test_labels = generate_data(IN_DIM)                                                   

# Data loading                                                                                                                                        
params = {'batch_size': BATCH_SIZE,                                                                                                              
          'shuffle': True,                                                                                                                            
          'num_workers': 8}                                                                                                                
training_set = Dataset(train_data, train_labels)                                                                                                      
training_loader = torch.utils.data.DataLoader(training_set, **params)                                                                                 

# Sequential and non-sequential models                                                                                                                                                                                                                                                          
model_seq = Net(hidden_dim=HIDDEN_DIM, in_dim=IN_DIM, use_sequential=True)   
model_non = Net(hidden_dim=HIDDEN_DIM, in_dim=IN_DIM, use_sequential=False)  
model_seq.apply(init_weights)
model_non.apply(init_weights)

loss_fn = nn.BCEWithLogitsLoss()  

optimizer_seq = Adam(model_seq.parameters(), lr=0.001)
optimizer_non = Adam(model_non.parameters(), lr=0.001)

# Training  
for epoch in range(EPOCHS):                                                                                                                                                          
    model_seq.train()  
    model_non.train()
    for batch_idx, (batch, labels) in enumerate(training_loader):                            

        # Testing sequential model
        output_seq = model_seq(batch)                                                                                                                     
        loss_seq = loss_fn(output_seq, labels) 
        optimizer_seq.zero_grad()                                                                                                                     
        loss_seq.backward()                                                                                                                           
        optimizer_seq.step()     

        # Testing non-sequential model
        output_non = model_non(batch)                                                                                                                     
        loss_non = loss_fn(output_non, labels) 
        optimizer_non.zero_grad()                                                                                                                     
        loss_non.backward()                                                                                                                           
        optimizer_non.step()

        if batch_idx % (BATCH_SIZE-1) == 0:                                                                                                  
            pred_seq = torch.round(output_seq)                                                                                                            
            acc_seq = accuracy(pred_seq, labels)                                                                                                    
            print('SEQUENTIAL TRAIN, Epoch %2d: loss=%.4f, acc=%.2f' % (epoch, loss_seq.item(), acc_seq))  

            pred_non = torch.round(output_non)                                                                                                            
            acc_non = accuracy(pred_non, labels)                                                                                                    
            print('NONSEQUENTIAL TRAIN, Epoch %2d: loss=%.4f, acc=%.2f' % (epoch, loss_non.item(), acc_non))          

            # Sequential Validation  
            model_seq.eval()
            val_output_seq = model_seq(test_data)
            val_loss_seq = loss_fn(val_output_seq, test_labels)
            val_pred_seq = torch.round(val_output_seq)
            val_acc_seq = accuracy(val_pred_seq, test_labels)
            print('---> SEQUENTIAL TEST: Epoch %2d: loss=%.4f, acc=%.5f' % (epoch, val_loss_seq.item(), val_acc_seq))
            model_seq.train()

            # Nonsequential Validation
            model_non.eval()  
            val_output_non = model_non(test_data)
            val_loss_non = loss_fn(val_output_non, test_labels)
            val_pred_non = torch.round(val_output_non)
            val_acc_non = accuracy(val_pred_non, test_labels)
            print('---> NONSEQUENTIAL TEST: Epoch %2d: loss=%.4f, acc=%.5f' % (epoch, val_loss_non.item(), val_acc_non))
            model_non.train()

            print('\n')

你用这种方式比较什么:
nn.Sequentional
nn.ModuleList
或什么?在你的案例中如何衡量泛化?发布你正在比较的两种方法的代码将有助于你用这种方式比较:
nn.Sequentional
nn.ModuleList
或什么?你用什么方式衡量泛化你的案例?张贴你比较的两种方法的代码会有帮助
import torch
from torch.utils import data
from torch.optim import Adam
from tqdm import tqdm
class Dataset(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, data, labels):
        self.labels = labels
        self.data = data
    def __len__(self):
        'Denotes the total number of samples'
        return len(self.labels)
    def __getitem__(self, index):
        'Generates one sample of data'
        # Load data and get label
        X = self.data[index]
        y = self.labels[index]
        return X, y

class Net(nn.Module):
    def __init__(self, hidden_dim, in_dim, use_sequential):
        super(Net, self).__init__()
        self.use_sequential = use_sequential
        self.in_dim = in_dim
        self.hidden_dim = hidden_dim

        self.sig = nn.Sigmoid()
        self.encoder = nn.Sequential(
                nn.Linear(in_dim, in_dim),
                nn.BatchNorm1d(in_dim),
                nn.Linear(in_dim, hidden_dim)
                )
        self.decoder = nn.Sequential(
                nn.Linear(hidden_dim, in_dim),
                nn.BatchNorm1d(in_dim),
                nn.Linear(in_dim, in_dim)
                )


        self.lin1 = nn.Linear(in_dim, in_dim)
        self.lin1.weight.data.fill_(0.01)
        self.lin1.bias.data.fill_(0.01)

        self.batchnorm = nn.BatchNorm1d(in_dim)
        self.batchnorm.weight.data.fill_(0.01)
        self.batchnorm.bias.data.fill_(0.01)

        self.lin2 = nn.Linear(in_dim, hidden_dim)
        self.lin2.weight.data.fill_(0.01)
        self.lin2.bias.data.fill_(0.01)

        self.lin3 = nn.Linear(hidden_dim, in_dim)
        self.lin3.weight.data.fill_(0.01)
        self.lin3.bias.data.fill_(0.01)

        self.lin4 = nn.Linear(in_dim, in_dim)
        self.lin4.weight.data.fill_(0.01)
        self.lin4.bias.data.fill_(0.01)

    def encode(self, x):
        if self.use_sequential:
            x = self.encoder(x)
        else:
            x = self.lin1(x)
            x = self.batchnorm(x)
            x = self.lin2(x)
        return x

    def decode(self, x):
        if self.use_sequential:
            x = self.decoder(x)
        else:
            x = self.lin3(x)
            x = self.batchnorm(x)
            x = self.lin4(x)
        return x

    def forward(self, x):
        x = self.encode(x)
        x = self.decode(x)
        x = self.sig(x) # Sigmoid for BCELoss
        return x

def accuracy(preds, labels):
    acc2 = 1 - torch.sum(torch.abs(preds-labels)).item() / (list(preds.size())[0]*list(preds.size())[1])
    return acc2

def generate_data(block_size):
    train_data = torch.randint(2, (10000, block_size)).float()
    test_data = torch.randint(2, (2500, block_size)).float()

    train_labels = train_data
    test_labels = test_data
    return train_data, train_labels, test_data, test_labels

def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.BatchNorm1d:
        m.weight.data.fill_(0.01)
        m.bias.data.fill_(0.01)
    if type(m) == nn.PReLU:
        m.weight.data.fill_(0.01)

########################## Train code ####################
IN_DIM = 4
HIDDEN_DIM = 32
EPOCHS = 200
BATCH_SIZE = 256

# Generate data
train_data, train_labels, test_data, test_labels = generate_data(IN_DIM)                                                   

# Data loading                                                                                                                                        
params = {'batch_size': BATCH_SIZE,                                                                                                              
          'shuffle': True,                                                                                                                            
          'num_workers': 8}                                                                                                                
training_set = Dataset(train_data, train_labels)                                                                                                      
training_loader = torch.utils.data.DataLoader(training_set, **params)                                                                                 

# Sequential and non-sequential models                                                                                                                                                                                                                                                          
model_seq = Net(hidden_dim=HIDDEN_DIM, in_dim=IN_DIM, use_sequential=True)   
model_non = Net(hidden_dim=HIDDEN_DIM, in_dim=IN_DIM, use_sequential=False)  
model_seq.apply(init_weights)
model_non.apply(init_weights)

loss_fn = nn.BCEWithLogitsLoss()  

optimizer_seq = Adam(model_seq.parameters(), lr=0.001)
optimizer_non = Adam(model_non.parameters(), lr=0.001)

# Training  
for epoch in range(EPOCHS):                                                                                                                                                          
    model_seq.train()  
    model_non.train()
    for batch_idx, (batch, labels) in enumerate(training_loader):                            

        # Testing sequential model
        output_seq = model_seq(batch)                                                                                                                     
        loss_seq = loss_fn(output_seq, labels) 
        optimizer_seq.zero_grad()                                                                                                                     
        loss_seq.backward()                                                                                                                           
        optimizer_seq.step()     

        # Testing non-sequential model
        output_non = model_non(batch)                                                                                                                     
        loss_non = loss_fn(output_non, labels) 
        optimizer_non.zero_grad()                                                                                                                     
        loss_non.backward()                                                                                                                           
        optimizer_non.step()

        if batch_idx % (BATCH_SIZE-1) == 0:                                                                                                  
            pred_seq = torch.round(output_seq)                                                                                                            
            acc_seq = accuracy(pred_seq, labels)                                                                                                    
            print('SEQUENTIAL TRAIN, Epoch %2d: loss=%.4f, acc=%.2f' % (epoch, loss_seq.item(), acc_seq))  

            pred_non = torch.round(output_non)                                                                                                            
            acc_non = accuracy(pred_non, labels)                                                                                                    
            print('NONSEQUENTIAL TRAIN, Epoch %2d: loss=%.4f, acc=%.2f' % (epoch, loss_non.item(), acc_non))          

            # Sequential Validation  
            model_seq.eval()
            val_output_seq = model_seq(test_data)
            val_loss_seq = loss_fn(val_output_seq, test_labels)
            val_pred_seq = torch.round(val_output_seq)
            val_acc_seq = accuracy(val_pred_seq, test_labels)
            print('---> SEQUENTIAL TEST: Epoch %2d: loss=%.4f, acc=%.5f' % (epoch, val_loss_seq.item(), val_acc_seq))
            model_seq.train()

            # Nonsequential Validation
            model_non.eval()  
            val_output_non = model_non(test_data)
            val_loss_non = loss_fn(val_output_non, test_labels)
            val_pred_non = torch.round(val_output_non)
            val_acc_non = accuracy(val_pred_non, test_labels)
            print('---> NONSEQUENTIAL TEST: Epoch %2d: loss=%.4f, acc=%.5f' % (epoch, val_loss_non.item(), val_acc_non))
            model_non.train()

            print('\n')