Python 如何通过一个损耗函数拟合具有两个输出的模型?

Python 如何通过一个损耗函数拟合具有两个输出的模型?,python,python-3.x,tensorflow,keras,tensorflow2.0,Python,Python 3.x,Tensorflow,Keras,Tensorflow2.0,我有一个模型,它必须返回坐标,然后它必须返回一个置信度。我的损失函数必须考虑目标坐标和目标可用性。下面是我的损失函数的样子: def损失(目标、目标可用性、PreD、信心): #我的损失函数在这里 回波损耗 TensorFlow的函数API显示了如何通过单独的损耗函数传递2个不同的输出(或返回2个损耗值的相同损耗函数,每对y\u true和y\u pred)一个损耗值)。我应该如何编译和拟合我的模型,以便它通过单一损失函数获取目标、目标可用性、预测和信心?我建议使用自定义训练循环来实现这一点。

我有一个模型,它必须返回坐标,然后它必须返回一个置信度。我的损失函数必须考虑目标坐标和目标可用性。下面是我的损失函数的样子:

def损失(目标、目标可用性、PreD、信心):
#我的损失函数在这里
回波损耗

TensorFlow的函数API显示了如何通过单独的损耗函数传递2个不同的输出(或返回2个损耗值的相同损耗函数,每对
y\u true
y\u pred
)一个损耗值)。我应该如何编译和拟合我的模型,以便它通过单一损失函数获取目标、目标可用性、预测和信心?

我建议使用自定义训练循环来实现这一点。它允许更大的灵活性。只要返回一个值,就可以在损失函数中执行任何类型的计算。假设您想这样做:

transformed_output = (y_pred * confidence) - availability
您可以在自定义损失函数中实现这一点(假设您的神经网络架构返回这三个值):

这将返回一个值,Tensorflow将尝试最小化该值,无论它是什么

这里有一个完整的例子。假设这是“可用性”:

<tf.Tensor: shape=(1, 10), dtype=float32, 
numpy=array([[0., 0., 0., 0., 1., 0., 0., 0., 1., 1.]], dtype=float32)>
让我们训练CNN根据这个特殊的损失函数对MNIST进行分类

import tensorflow as tf

(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()

unsqueeze = lambda x, y: (tf.expand_dims(
    tf.divide(
        tf.cast(x, tf.float32), 255), -1),
                          tf.one_hot(y, depth=10))

train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain)).\
    shuffle(64).\
    batch(64).\
    map(unsqueeze).\
    prefetch(1)

test = tf.data.Dataset.from_tensor_slices((xtest, ytest)).\
    shuffle(64).\
    batch(64).\
    map(unsqueeze).\
    prefetch(1)

class CNN(tf.keras.Model):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3),
                                            strides=(1, 1),
                                            input_shape=(28, 28, 1))
        self.maxp1 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
        self.conv2 = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3),
                                            strides=(1, 1))
        self.maxp2 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
        self.flat1 = tf.keras.layers.Flatten()
        self.dens1 = tf.keras.layers.Dense(64, activation='relu')
        self.drop1 = tf.keras.layers.Dropout(5e-1)
        self.dens3 = tf.keras.layers.Dense(10)

    def call(self, x, training=None, **kwargs):
        x = self.conv1(x)
        x = self.maxp1(x)
        x = self.conv2(x)
        x = self.maxp2(x)
        x = self.flat1(x)
        x = self.dens1(x)
        x = self.drop1(x)
        x = self.dens3(x)
        availability = tf.cast(tf.random.uniform((len(x), 10), 0, 2,
                                                 dtype=tf.int32), tf.float32)
        confidences = tf.random.uniform((len(x), 10), 0, 1, dtype=tf.float32)
        return x, availability, confidences

model = CNN()

loss_object = tf.losses.CategoricalCrossentropy(from_logits=True)


def compute_loss(model, x, y, training):
  out, avail, conf = model(inputs=x, training=training)
  transformed_output = tf.add(tf.multiply(out, conf), avail)
  loss = loss_object(y_true=y, y_pred=transformed_output)
  return loss


def get_grad(model, x, y):
    with tf.GradientTape() as tape:
        loss = compute_loss(model, x, y, training=False)
    return loss, tape.gradient(loss, model.trainable_variables)


optimizer = tf.optimizers.Adam()

verbose = "Epoch {:2d} Loss: {:.3f} TLoss: {:.3f} Acc: {:.2%} TAcc: {:.2%}"


for epoch in range(1, 10 + 1):
    train_loss = tf.metrics.Mean()
    train_acc = tf.metrics.CategoricalAccuracy()
    test_loss = tf.metrics.Mean()
    test_acc = tf.metrics.CategoricalAccuracy()

    for x, y in train:
        loss_value, grads = get_grad(model, x, y)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        train_loss.update_state(loss_value)
        train_acc.update_state(y, model(x, training=True))

    for x, y in test:
        loss_value, _ = get_grad(model, x, y)
        test_loss.update_state(loss_value)
        test_acc.update_state(y, model(x, training=False))


    print(verbose.format(epoch,
                         train_loss.result(),
                         test_loss.result(),
                         train_acc.result(),
                         test_acc.result()))

我编辑了我的答案在print语句中有一个小错误。您必须为模型创建一个单独的类,还是我们可以只使用
tf.keras.model
包装器?您可以执行这两个操作中的任何一个,只要它返回您提到的3个输出。
<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[0.09586799, 0.03268242, 0.04225421, 0.4026084 , 0.5088273 ,
        0.38777208, 0.53815687, 0.41644037, 0.5709661 , 0.7587745 ]],
      dtype=float32)>
import tensorflow as tf

(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()

unsqueeze = lambda x, y: (tf.expand_dims(
    tf.divide(
        tf.cast(x, tf.float32), 255), -1),
                          tf.one_hot(y, depth=10))

train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain)).\
    shuffle(64).\
    batch(64).\
    map(unsqueeze).\
    prefetch(1)

test = tf.data.Dataset.from_tensor_slices((xtest, ytest)).\
    shuffle(64).\
    batch(64).\
    map(unsqueeze).\
    prefetch(1)

class CNN(tf.keras.Model):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3),
                                            strides=(1, 1),
                                            input_shape=(28, 28, 1))
        self.maxp1 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
        self.conv2 = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3),
                                            strides=(1, 1))
        self.maxp2 = tf.keras.layers.MaxPool2D(pool_size=(2, 2))
        self.flat1 = tf.keras.layers.Flatten()
        self.dens1 = tf.keras.layers.Dense(64, activation='relu')
        self.drop1 = tf.keras.layers.Dropout(5e-1)
        self.dens3 = tf.keras.layers.Dense(10)

    def call(self, x, training=None, **kwargs):
        x = self.conv1(x)
        x = self.maxp1(x)
        x = self.conv2(x)
        x = self.maxp2(x)
        x = self.flat1(x)
        x = self.dens1(x)
        x = self.drop1(x)
        x = self.dens3(x)
        availability = tf.cast(tf.random.uniform((len(x), 10), 0, 2,
                                                 dtype=tf.int32), tf.float32)
        confidences = tf.random.uniform((len(x), 10), 0, 1, dtype=tf.float32)
        return x, availability, confidences

model = CNN()

loss_object = tf.losses.CategoricalCrossentropy(from_logits=True)


def compute_loss(model, x, y, training):
  out, avail, conf = model(inputs=x, training=training)
  transformed_output = tf.add(tf.multiply(out, conf), avail)
  loss = loss_object(y_true=y, y_pred=transformed_output)
  return loss


def get_grad(model, x, y):
    with tf.GradientTape() as tape:
        loss = compute_loss(model, x, y, training=False)
    return loss, tape.gradient(loss, model.trainable_variables)


optimizer = tf.optimizers.Adam()

verbose = "Epoch {:2d} Loss: {:.3f} TLoss: {:.3f} Acc: {:.2%} TAcc: {:.2%}"


for epoch in range(1, 10 + 1):
    train_loss = tf.metrics.Mean()
    train_acc = tf.metrics.CategoricalAccuracy()
    test_loss = tf.metrics.Mean()
    test_acc = tf.metrics.CategoricalAccuracy()

    for x, y in train:
        loss_value, grads = get_grad(model, x, y)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        train_loss.update_state(loss_value)
        train_acc.update_state(y, model(x, training=True))

    for x, y in test:
        loss_value, _ = get_grad(model, x, y)
        test_loss.update_state(loss_value)
        test_acc.update_state(y, model(x, training=False))


    print(verbose.format(epoch,
                         train_loss.result(),
                         test_loss.result(),
                         train_acc.result(),
                         test_acc.result()))