Python pyunet语义切分

Python pyunet语义切分,python,tensorflow,pytorch,Python,Tensorflow,Pytorch,我有一个Tensorflow v1版本的unet,它使用SGD进行非常好的训练,学习率为0.05 我在Pytorch中重写了网络,因为我想公开一些在Tensorflow中不那么容易的功能 我的模型总是预测一个空的遮罩,所以我试着使模型过度适合一个图像 可以过度拟合一个示例图像来预测一个掩码,但它仅适用于Adam,学习率为0.0005和1000个历元。我的旧模型可以在10个时代左右完成 我看不出我做错了什么。我一定是做错了什么,因为这是一个小问题,应该需要很少的调整 import numpy as

我有一个Tensorflow v1版本的unet,它使用SGD进行非常好的训练,学习率为0.05

我在Pytorch中重写了网络,因为我想公开一些在Tensorflow中不那么容易的功能

我的模型总是预测一个空的遮罩,所以我试着使模型过度适合一个图像

可以过度拟合一个示例图像来预测一个掩码,但它仅适用于Adam,学习率为0.0005和1000个历元。我的旧模型可以在10个时代左右完成

我看不出我做错了什么。我一定是做错了什么,因为这是一个小问题,应该需要很少的调整

import numpy as np
import cv2
from PIL import Image
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt 

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(42)

class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, relu=True):
        super().__init__()
        if relu:
            self.double_conv = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
                nn.ReLU(inplace=True)
            )
        else:
            self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True, relu=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        else:
            self.up = nn.ConvTranspose2d(in_channels // 2, in_channels // 2, kernel_size=2, stride=2)

        self.conv = DoubleConv(in_channels, out_channels, relu=relu)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
        diffX = torch.tensor([x2.size()[3] - x1.size()[3]])

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)


class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=True):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        self.down4 = Down(512, 512)
        self.up1 = Up(1024, 256, bilinear)
        self.up2 = Up(512, 128, bilinear)
        self.up3 = Up(256, 64, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

def decode_segmap(image, num_classes=3):

  label_colors = np.array([(128, 0, 0),
               (0, 128, 0), (0, 0, 128)])

  r = np.zeros_like(image).astype(np.uint8)
  g = np.zeros_like(image).astype(np.uint8)
  b = np.zeros_like(image).astype(np.uint8)

  for l in range(0, num_classes):
    idx = image == l
    r[idx] = label_colors[l, 0]
    g[idx] = label_colors[l, 1]
    b[idx] = label_colors[l, 2]

  rgb = np.stack([r, g, b], axis=2)
  return rgb


def load_batch(batch_size):
    rotated_frame = Image.open('0test.png')
    rotated_gt = Image.open('0label.png')

    trf = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize(mean = [0.2455],  std = [0.2684])])

    rotated_frame = trf(rotated_frame).unsqueeze(0)

    trf = transforms.Compose([
                    transforms.ToTensor()])
    rotated_gt = trf(rotated_gt).unsqueeze(0)

    rotated_frame = torch.mean(rotated_frame, 1).unsqueeze(1)
    rotated_gt = torch.mean(rotated_gt, 1).unsqueeze(1)

    return rotated_frame.to(device), rotated_gt.type(torch.long).to(device).squeeze(1)


net = UNet(1, 3)
net.to(device=device)

# Loss
#optimizer = optim.RMSprop(net.parameters(), lr=0.005, weight_decay=1e-8)
optimizer = optim.SGD(net.parameters(), lr=0.0005)
#optimizer = optim.Adam(net.parameters(), lr=0.0005)

criterion = nn.CrossEntropyLoss()

# Load data
rotated_frame, rotated_gt = load_batch(1)
print(rotated_frame.shape)
print(rotated_gt.shape)

# Train
epochs = 1000
losses = [] 
for epoch in range(epochs):
    predicted = net(rotated_frame)
    loss = criterion(predicted, rotated_gt)
    losses.append(loss)
    loss.backward()
    optimizer.step()
    print('Epoch {}/{} Loss: {}'.format(epoch, epochs, loss))

output = torch.argmax(predicted.squeeze(), dim=0).detach().cpu().numpy()

a, b = np.min(output), np.max(output)
print('Predicted: min: {} max: {}'.format(a, b))
print(output.shape)
rgb = decode_segmap(output)
plt.imshow(rgb)
plt.savefig('predicted_argmaxed.png')

gt = rotated_gt.squeeze().detach().cpu().numpy()
a, b = np.min(gt), np.max(gt)
print('Gt: min: {} max: {}'.format(a, b))
rgb = decode_segmap(gt)
plt.imshow(rgb)
plt.savefig('gt_argmaxed.png')
示例图像如下:


任何帮助都将不胜感激

如果使用的是
CrossEntropyLoss
是否尝试为类添加权重

weights=torch.tensor([0.75,1],dtype=torch.float)
标准=火炬nn.交叉熵(重量=重量,
将(='none')。还原为(设备)
如果您的模型生成的是一个空遮罩(例如白色遮罩),理论上它可以最大限度地减少损失,因为具有全白色图像似乎是更突出的类,这取决于您尝试为borders类添加更多权重的类的数量

你可以看到,我在做二元分类时使用的权重,一个类是70%,另一个是30%

否则,纳特提到的BN也会有所帮助。你的学习率似乎也有点太低了

编辑: 仅作澄清,文件中:

weight (Tensor, optional) – a manual rescaling weight given to each class. If given, has to be a Tensor of size C

如果您使用的是
CrossEntropyLoss
是否尝试为类添加权重

weights=torch.tensor([0.75,1],dtype=torch.float)
标准=火炬nn.交叉熵(重量=重量,
将(='none')。还原为(设备)
如果您的模型生成的是一个空遮罩(例如白色遮罩),理论上它可以最大限度地减少损失,因为具有全白色图像似乎是更突出的类,这取决于您尝试为borders类添加更多权重的类的数量

你可以看到,我在做二元分类时使用的权重,一个类是70%,另一个是30%

否则,纳特提到的BN也会有所帮助。你的学习率似乎也有点太低了

编辑: 仅作澄清,文件中:

weight (Tensor, optional) – a manual rescaling weight given to each class. If given, has to be a Tensor of size C

我看你身上没有BN。这可能是罪魁祸首。另外,您确定要在此处使用分类交叉熵吗?如果您想要二进制交叉熵,那么使用
BCELoss
。如果您想要在decode\u segmap输出中看到浮点图像(您的输出示例图像似乎不是二进制的)。如果是,则在每个通道中使用sigmoid函数。然后,为了调试,转储它的值,并检查图像是否完全为零(或一个),或者是否有一些浮点值。您可能需要删除带有预测的argmax以获得实际净输出。我在您的DoubleConv中没有看到任何BN。这可能是罪魁祸首。另外,您确定要在此处使用分类交叉熵吗?如果您想要二进制交叉熵,那么使用
BCELoss
。如果您想要在decode\u segmap输出中看到浮点图像(您的输出示例图像似乎不是二进制的)。如果是,则在每个通道中使用sigmoid函数。然后,为了调试,转储它的值,并检查图像是否完全为零(或一个),或者是否有一些浮点值。您可能需要删除带有预测的argmax以获得实际净输出。