Python 基于面片的图像训练及其在图像中的概率组合_Python_Python 3.x_Tensorflow_Keras_Neural Network

Python 基于面片的图像训练及其在图像中的概率组合

python python-3.x tensorflow keras neural-network

Python 基于面片的图像训练及其在图像中的概率组合,python,python-3.x,tensorflow,keras,neural-network,Python,Python 3.x,Tensorflow,Keras,Neural Network,首先，我实现了一个简单的VGG16图像分类网络 model = keras.applications.vgg16.VGG16(include_top = False, weights = None, input_shape = (32,32,3), pooling = 'max', classes = 10) 其输入形状为32 x 32。现在，我正在尝试实现一个基于补

首先，我实现了一个简单的

VGG16

图像分类网络

model = keras.applications.vgg16.VGG16(include_top = False,
                weights = None,
                input_shape = (32,32,3),
                pooling = 'max',
                classes = 10)

其输入形状为

32 x 32

。现在，我正在尝试实现一个基于补丁的神经网络。其主要思想是，从输入图像中，提取4个像这样的图像块

并训练提取的面片图像（

大小调整为32 x 32

，因为它是我们模型的输入形状），最后，结合它们的四个输出概率，找到最终的输出结果（使用normalization&argmax）。像这样,

我该怎么做？

提前感谢你的帮助

注意：

我猜使用

lambda层

是可能的

我的简单VGG分类实现是。

我使用MNIST数据集将每个图像作为4个补丁，然后作为一个批传递：

import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow import nn as nn
from functools import partial
import matplotlib.pyplot as plt

(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()

train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain))
test = tf.data.Dataset.from_tensor_slices((xtest, ytest))

patch_s = 18
stride = xtrain.shape[1] - patch_s

get_patches = lambda x, y: (tf.reshape(
    tf.image.extract_patches(
        images=tf.expand_dims(x[..., None], 0),
        sizes=[1, patch_s, patch_s, 1],
        strides=[1, stride, stride, 1],
        rates=[1, 1, 1, 1],
        padding='VALID'), (4, patch_s, patch_s, 1)), y)

train = train.map(get_patches)
test = test.map(get_patches)

fig = plt.figure()
plt.subplots_adjust(wspace=.1, hspace=.2)
images, labels = next(iter(train))
for index, image in enumerate(images):
    ax = plt.subplot(2, 2, index + 1)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.imshow(image)
plt.show()

然后，在训练循环中，我得到这4个输出中每一个的损失：

def compute_loss(model, x, y, training):
  out = model(x=x, training=training)
  repeated_y = tf.repeat(tf.expand_dims(y, 0), repeats=4, axis=0)
  loss = loss_object(y_true=repeated_y, y_pred=out, from_logits=True)
  loss = tf.reduce_mean(loss, axis=0)
  return loss

然后我减少轴0的平均值，将所有概率合并在一起。以下是完整的运行代码：

import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow import nn as nn
from functools import partial
import matplotlib.pyplot as plt

(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()

train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain))
test = tf.data.Dataset.from_tensor_slices((xtest, ytest))

patch_s = 18
stride = xtrain.shape[1] - patch_s

get_patches = lambda x, y: (tf.reshape(
    tf.image.extract_patches(
        images=tf.expand_dims(x[..., None], 0),
        sizes=[1, patch_s, patch_s, 1],
        strides=[1, stride, stride, 1],
        rates=[1, 1, 1, 1],
        padding='VALID'), (4, patch_s, patch_s, 1)), y)

train = train.map(get_patches)
test = test.map(get_patches)

fig = plt.figure()
plt.subplots_adjust(wspace=.1, hspace=.2)
images, labels = next(iter(train))
for index, image in enumerate(images):
    ax = plt.subplot(2, 2, index + 1)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.imshow(image)
plt.show()

def prepare(inputs, targets):
    inputs = tf.divide(x=inputs, y=255)
    targets = tf.one_hot(indices=targets, depth=10)
    return inputs, targets

train = train.take(10_000).map(prepare)
test = test.take(10_00).map(prepare)

class MyCNN(K.Model):
    def __init__(self):
        super(MyCNN, self).__init__()
        Conv = partial(Conv2D, kernel_size=(3, 3), activation=nn.relu)
        MaxPool = partial(MaxPooling2D, pool_size=(2, 2))

        self.conv1 = Conv(filters=16)
        self.maxp1 = MaxPool()
        self.conv2 = Conv(filters=32)
        self.maxp2 = MaxPool()
        self.conv3 = Conv(filters=64)
        self.maxp3 = MaxPool()
        self.flatt = Flatten()
        self.dens1 = Dense(64, activation=nn.relu)
        self.drop1 = Dropout(.5)
        self.dens2 = Dense(10, activation=nn.softmax)

    def call(self, inputs, training=None, **kwargs):
        x = self.conv1(inputs)
        x = self.maxp1(x)
        x = self.conv2(x)
        x = self.maxp2(x)
        x = self.conv3(x)
        x = self.maxp3(x)
        x = self.flatt(x)
        x = self.dens1(x)
        x = self.drop1(x)
        x = self.dens2(x)
        return x

model = MyCNN()

loss_object = tf.losses.categorical_crossentropy

def compute_loss(model, x, y, training):
  out = model(inputs=x, training=training)
  repeated_y = tf.repeat(tf.expand_dims(y, 0), repeats=4, axis=0)
  loss = loss_object(y_true=repeated_y, y_pred=out, from_logits=True)
  loss = tf.reduce_mean(loss, axis=0)
  return loss

def get_grad(model, x, y):
    with tf.GradientTape() as tape:
        loss = compute_loss(model, x, y, training=False)
    return loss, tape.gradient(loss, model.trainable_variables)

optimizer = tf.optimizers.Adam()

verbose = "Epoch {:2d}" \
          " Loss: {:.3f} Acc: {:.3%} TLoss: {:.3f} TAcc: {:.3%}"

for epoch in range(1, 10 + 1):
    train_loss = tf.metrics.Mean()
    train_acc = tf.metrics.CategoricalAccuracy()
    test_loss = tf.metrics.Mean()
    test_acc = tf.metrics.CategoricalAccuracy()

    for x, y in train:
        loss_value, grads = get_grad(model, x, y)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        train_loss.update_state(loss_value)
        train_acc.update_state(y, model(x, training=True))

    for x, y in test:
        loss_value, _ = get_grad(model, x, y)
        test_loss.update_state(loss_value)
        test_acc.update_state(y, model(x, training=False))

    print(verbose.format(epoch,
                         train_loss.result(),
                         train_acc.result(),
                         test_loss.result(),
                         test_acc.result()))

剧透警报：有这么小的补丁，效果不好。使修补程序大于18/28以获得更好的性能。

我使用MNIST数据集将每个图像作为4个修补程序获取，这些修补程序随后作为批传递：

import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow import nn as nn
from functools import partial
import matplotlib.pyplot as plt

(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()

train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain))
test = tf.data.Dataset.from_tensor_slices((xtest, ytest))

patch_s = 18
stride = xtrain.shape[1] - patch_s

get_patches = lambda x, y: (tf.reshape(
    tf.image.extract_patches(
        images=tf.expand_dims(x[..., None], 0),
        sizes=[1, patch_s, patch_s, 1],
        strides=[1, stride, stride, 1],
        rates=[1, 1, 1, 1],
        padding='VALID'), (4, patch_s, patch_s, 1)), y)

train = train.map(get_patches)
test = test.map(get_patches)

fig = plt.figure()
plt.subplots_adjust(wspace=.1, hspace=.2)
images, labels = next(iter(train))
for index, image in enumerate(images):
    ax = plt.subplot(2, 2, index + 1)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.imshow(image)
plt.show()

然后，在训练循环中，我得到这4个输出中每一个的损失：

def compute_loss(model, x, y, training):
  out = model(x=x, training=training)
  repeated_y = tf.repeat(tf.expand_dims(y, 0), repeats=4, axis=0)
  loss = loss_object(y_true=repeated_y, y_pred=out, from_logits=True)
  loss = tf.reduce_mean(loss, axis=0)
  return loss

然后我减少轴0的平均值，将所有概率合并在一起。以下是完整的运行代码：

import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow import nn as nn
from functools import partial
import matplotlib.pyplot as plt

(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()

train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain))
test = tf.data.Dataset.from_tensor_slices((xtest, ytest))

patch_s = 18
stride = xtrain.shape[1] - patch_s

get_patches = lambda x, y: (tf.reshape(
    tf.image.extract_patches(
        images=tf.expand_dims(x[..., None], 0),
        sizes=[1, patch_s, patch_s, 1],
        strides=[1, stride, stride, 1],
        rates=[1, 1, 1, 1],
        padding='VALID'), (4, patch_s, patch_s, 1)), y)

train = train.map(get_patches)
test = test.map(get_patches)

fig = plt.figure()
plt.subplots_adjust(wspace=.1, hspace=.2)
images, labels = next(iter(train))
for index, image in enumerate(images):
    ax = plt.subplot(2, 2, index + 1)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.imshow(image)
plt.show()

def prepare(inputs, targets):
    inputs = tf.divide(x=inputs, y=255)
    targets = tf.one_hot(indices=targets, depth=10)
    return inputs, targets

train = train.take(10_000).map(prepare)
test = test.take(10_00).map(prepare)

class MyCNN(K.Model):
    def __init__(self):
        super(MyCNN, self).__init__()
        Conv = partial(Conv2D, kernel_size=(3, 3), activation=nn.relu)
        MaxPool = partial(MaxPooling2D, pool_size=(2, 2))

        self.conv1 = Conv(filters=16)
        self.maxp1 = MaxPool()
        self.conv2 = Conv(filters=32)
        self.maxp2 = MaxPool()
        self.conv3 = Conv(filters=64)
        self.maxp3 = MaxPool()
        self.flatt = Flatten()
        self.dens1 = Dense(64, activation=nn.relu)
        self.drop1 = Dropout(.5)
        self.dens2 = Dense(10, activation=nn.softmax)

    def call(self, inputs, training=None, **kwargs):
        x = self.conv1(inputs)
        x = self.maxp1(x)
        x = self.conv2(x)
        x = self.maxp2(x)
        x = self.conv3(x)
        x = self.maxp3(x)
        x = self.flatt(x)
        x = self.dens1(x)
        x = self.drop1(x)
        x = self.dens2(x)
        return x

model = MyCNN()

loss_object = tf.losses.categorical_crossentropy

def compute_loss(model, x, y, training):
  out = model(inputs=x, training=training)
  repeated_y = tf.repeat(tf.expand_dims(y, 0), repeats=4, axis=0)
  loss = loss_object(y_true=repeated_y, y_pred=out, from_logits=True)
  loss = tf.reduce_mean(loss, axis=0)
  return loss

def get_grad(model, x, y):
    with tf.GradientTape() as tape:
        loss = compute_loss(model, x, y, training=False)
    return loss, tape.gradient(loss, model.trainable_variables)

optimizer = tf.optimizers.Adam()

verbose = "Epoch {:2d}" \
          " Loss: {:.3f} Acc: {:.3%} TLoss: {:.3f} TAcc: {:.3%}"

for epoch in range(1, 10 + 1):
    train_loss = tf.metrics.Mean()
    train_acc = tf.metrics.CategoricalAccuracy()
    test_loss = tf.metrics.Mean()
    test_acc = tf.metrics.CategoricalAccuracy()

    for x, y in train:
        loss_value, grads = get_grad(model, x, y)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        train_loss.update_state(loss_value)
        train_acc.update_state(y, model(x, training=True))

    for x, y in test:
        loss_value, _ = get_grad(model, x, y)
        test_loss.update_state(loss_value)
        test_acc.update_state(y, model(x, training=False))

    print(verbose.format(epoch,
                         train_loss.result(),
                         train_acc.result(),
                         test_loss.result(),
                         test_acc.result()))

剧透警报：有这么小的补丁，效果不好。使修补程序大于18/28以获得更好的性能