Python pyunet语义切分_Python_Tensorflow_Pytorch

Python pyunet语义切分

python tensorflow pytorch

Python pyunet语义切分,python,tensorflow,pytorch,Python,Tensorflow,Pytorch,我有一个Tensorflow v1版本的unet，它使用SGD进行非常好的训练，学习率为0.05 我在Pytorch中重写了网络，因为我想公开一些在Tensorflow中不那么容易的功能我的模型总是预测一个空的遮罩，所以我试着使模型过度适合一个图像可以过度拟合一个示例图像来预测一个掩码，但它仅适用于Adam，学习率为0.0005和1000个历元。我的旧模型可以在10个时代左右完成我看不出我做错了什么。我一定是做错了什么，因为这是一个小问题，应该需要很少的调整 import numpy as

我有一个Tensorflow v1版本的unet，它使用SGD进行非常好的训练，学习率为0.05

我在Pytorch中重写了网络，因为我想公开一些在Tensorflow中不那么容易的功能

我的模型总是预测一个空的遮罩，所以我试着使模型过度适合一个图像

可以过度拟合一个示例图像来预测一个掩码，但它仅适用于Adam，学习率为0.0005和1000个历元。我的旧模型可以在10个时代左右完成

我看不出我做错了什么。我一定是做错了什么，因为这是一个小问题，应该需要很少的调整

import numpy as np
import cv2
from PIL import Image
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt 

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(42)

class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, relu=True):
        super().__init__()
        if relu:
            self.double_conv = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
                nn.ReLU(inplace=True)
            )
        else:
            self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True, relu=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        else:
            self.up = nn.ConvTranspose2d(in_channels // 2, in_channels // 2, kernel_size=2, stride=2)

        self.conv = DoubleConv(in_channels, out_channels, relu=relu)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
        diffX = torch.tensor([x2.size()[3] - x1.size()[3]])

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)


class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=True):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        self.down4 = Down(512, 512)
        self.up1 = Up(1024, 256, bilinear)
        self.up2 = Up(512, 128, bilinear)
        self.up3 = Up(256, 64, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

def decode_segmap(image, num_classes=3):

  label_colors = np.array([(128, 0, 0),
               (0, 128, 0), (0, 0, 128)])

  r = np.zeros_like(image).astype(np.uint8)
  g = np.zeros_like(image).astype(np.uint8)
  b = np.zeros_like(image).astype(np.uint8)

  for l in range(0, num_classes):
    idx = image == l
    r[idx] = label_colors[l, 0]
    g[idx] = label_colors[l, 1]
    b[idx] = label_colors[l, 2]

  rgb = np.stack([r, g, b], axis=2)
  return rgb


def load_batch(batch_size):
    rotated_frame = Image.open('0test.png')
    rotated_gt = Image.open('0label.png')

    trf = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize(mean = [0.2455],  std = [0.2684])])

    rotated_frame = trf(rotated_frame).unsqueeze(0)

    trf = transforms.Compose([
                    transforms.ToTensor()])
    rotated_gt = trf(rotated_gt).unsqueeze(0)

    rotated_frame = torch.mean(rotated_frame, 1).unsqueeze(1)
    rotated_gt = torch.mean(rotated_gt, 1).unsqueeze(1)

    return rotated_frame.to(device), rotated_gt.type(torch.long).to(device).squeeze(1)


net = UNet(1, 3)
net.to(device=device)

# Loss
#optimizer = optim.RMSprop(net.parameters(), lr=0.005, weight_decay=1e-8)
optimizer = optim.SGD(net.parameters(), lr=0.0005)
#optimizer = optim.Adam(net.parameters(), lr=0.0005)

criterion = nn.CrossEntropyLoss()

# Load data
rotated_frame, rotated_gt = load_batch(1)
print(rotated_frame.shape)
print(rotated_gt.shape)

# Train
epochs = 1000
losses = [] 
for epoch in range(epochs):
    predicted = net(rotated_frame)
    loss = criterion(predicted, rotated_gt)
    losses.append(loss)
    loss.backward()
    optimizer.step()
    print('Epoch {}/{} Loss: {}'.format(epoch, epochs, loss))

output = torch.argmax(predicted.squeeze(), dim=0).detach().cpu().numpy()

a, b = np.min(output), np.max(output)
print('Predicted: min: {} max: {}'.format(a, b))
print(output.shape)
rgb = decode_segmap(output)
plt.imshow(rgb)
plt.savefig('predicted_argmaxed.png')

gt = rotated_gt.squeeze().detach().cpu().numpy()
a, b = np.min(gt), np.max(gt)
print('Gt: min: {} max: {}'.format(a, b))
rgb = decode_segmap(gt)
plt.imshow(rgb)
plt.savefig('gt_argmaxed.png')

示例图像如下：

任何帮助都将不胜感激

如果使用的是

CrossEntropyLoss

是否尝试为类添加权重

weights=torch.tensor（[0.75,1]，dtype=torch.float）
标准=火炬nn.交叉熵（重量=重量，
将（='none'）。还原为（设备）

如果您的模型生成的是一个空遮罩（例如白色遮罩），理论上它可以最大限度地减少损失，因为具有全白色图像似乎是更突出的类，这取决于您尝试为borders类添加更多权重的类的数量

你可以看到，我在做二元分类时使用的权重，一个类是70%，另一个是30%

否则，纳特提到的BN也会有所帮助。你的学习率似乎也有点太低了

编辑：仅作澄清，文件中：

weight (Tensor, optional) – a manual rescaling weight given to each class. If given, has to be a Tensor of size C

如果您使用的是

CrossEntropyLoss

是否尝试为类添加权重

weights=torch.tensor（[0.75,1]，dtype=torch.float）
标准=火炬nn.交叉熵（重量=重量，
将（='none'）。还原为（设备）

你可以看到，我在做二元分类时使用的权重，一个类是70%，另一个是30%

否则，纳特提到的BN也会有所帮助。你的学习率似乎也有点太低了

编辑：仅作澄清，文件中：

weight (Tensor, optional) – a manual rescaling weight given to each class. If given, has to be a Tensor of size C

我看你身上没有BN。这可能是罪魁祸首。另外，您确定要在此处使用分类交叉熵吗？如果您想要二进制交叉熵，那么使用

BCELoss

。如果您想要在decode\u segmap输出中看到浮点图像（您的输出示例图像似乎不是二进制的）。如果是，则在每个通道中使用sigmoid函数。然后，为了调试，转储它的值，并检查图像是否完全为零（或一个），或者是否有一些浮点值。您可能需要删除带有预测的argmax以获得实际净输出。我在您的DoubleConv中没有看到任何BN。这可能是罪魁祸首。另外，您确定要在此处使用分类交叉熵吗？如果您想要二进制交叉熵，那么使用

BCELoss

。如果您想要在decode\u segmap输出中看到浮点图像（您的输出示例图像似乎不是二进制的）。如果是，则在每个通道中使用sigmoid函数。然后，为了调试，转储它的值，并检查图像是否完全为零（或一个），或者是否有一些浮点值。您可能需要删除带有预测的argmax以获得实际净输出。