Pytorch Pyrotch 1D辍学导致学习不稳定

Pytorch Pyrotch 1D辍学导致学习不稳定,pytorch,conv-neural-network,dropout,Pytorch,Conv Neural Network,Dropout,我正在实施一个类似于pytorch的CNN的《盗梦空间》。在卷积层块之后,我有三个完全连接的线性层,然后是一个sigmoid激活,以给出我的最终回归输出。我正在测试这个网络中退出层的效果,但它给了我一些意想不到的结果 代码如下: class MyInception(nn.Module): def __init__(self, in_channels, verbose=False): super(MyInception, self).__init__() s

我正在实施一个类似于pytorch的CNN的《盗梦空间》。在卷积层块之后,我有三个完全连接的线性层,然后是一个sigmoid激活,以给出我的最终回归输出。我正在测试这个网络中退出层的效果,但它给了我一些意想不到的结果

代码如下:

class MyInception(nn.Module):
    def __init__(self, in_channels, verbose=False):
        super(MyInception, self).__init__()
        self.v = verbose
        ic=in_channels; oc=16
        self.inceptionBlock1 = InceptionBlock(in_channels=ic, out_channels=oc, maxpool=False, verbose=verbose) 
        self.inceptionBlock2 = InceptionBlock(in_channels=oc * 6, out_channels=oc, maxpool=False, verbose=verbose) 
        self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.regressor = nn.Sequential(
            nn.Linear(oc * 6 * 35 * 35, 1024, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2, inplace=False),  # <--- Dropout 1
            nn.Linear(1024, 128, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.2, inplace=False),  # <--- Dropout 2
            nn.Linear(128, 1, bias=True),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.inceptionBlock1(x)
        x = self.inceptionBlock2(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.regressor(x)
        return x


def train(epochs=10, dot_every=25):
    running = pd.DataFrame(columns=['Epoch','Round','TrainLoss','TestLoss','LearningRate'])
    for epoch in range(epochs):
        train_losses = []
        model.train()
        counter = 0

        for images, targets in train_loader:
            images = images.to(device)
            targets = targets.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fn(torch.flatten(outputs), targets)
            train_losses.append( loss.item() )
            loss.backward()
            optimizer.step()

            counter += 1
            if counter % dot_every == 0: 
                print(".",  end='.', flush=True)
                test_loss = test()
            else:
                test_loss = -1.
            lr = np.squeeze(scheduler.get_lr())
            running = running.append(pd.Series([epoch, counter, loss.item(), test_loss, lr], index=running.columns), ignore_index=True)

        test_loss = test()
        train_loss = np.mean(np.asarray(train_losses))
        running = running.append(pd.Series([epoch, counter, train_loss, test_loss, lr], index=running.columns), ignore_index=True)
        print("")
        print(f"Epoch {epoch+1}, Train Loss: {np.round(train_loss,4)}, Test Loss: {np.round(test_loss, 4)}, Learning Rate: {np.format_float_scientific(lr, precision=4)}")
    return running


def test():
    model.eval()
    test_losses = []
    for i, (images,targets) in enumerate(test_loader):
        images = images.to(device)
        targets = targets.to(device)
        outputs = model(images)
        loss = loss_fn(torch.flatten(outputs), targets)
        test_losses.append( loss.item() )

    mean_loss = np.mean(np.asarray(test_losses))
    return mean_loss

# instantiate the model
model = MyInception(in_channels=4, verbose=False).to(device)
# define the optimizer and loss function
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
loss_fn = nn.MSELoss()

# run it
results = train(epochs=10, dot_every=20)

以下是训练数据的MSE损失图。红色=无辍学,绿色=仅第二次辍学,蓝色=仅第一次辍学,紫色=两次辍学 有辍学的跑步在垂直虚线的历元边界处的损失大幅增加,双辍学的跑步在第10历元开始时损失甚至大幅增加

重要的是测试损失。在第五纪元之后,这两种情况更加稳定,没有太大的差别,所以我可能不应该在意。但我想知道发生了什么


我破案了。我意识到我在测试调用中翻转model.train到model.eval,而没有在之后将其设置回train。由于辍学者在train和eval模式下的行为不同,添加辍学者揭示了这个错误