Python 在模型中使用run_急切地=False时出错。在Tensorflow中编译自定义Keras模型
我正在Tensorflow中开发一个自定义模型。我正试图从中实现一个虚拟对抗训练(VAT)模型。该模型在分类任务中同时使用了标记和未标记的数据。因此,在模型的Python 在模型中使用run_急切地=False时出错。在Tensorflow中编译自定义Keras模型,python,tensorflow,keras,deep-learning,eager-execution,Python,Tensorflow,Keras,Deep Learning,Eager Execution,我正在Tensorflow中开发一个自定义模型。我正试图从中实现一个虚拟对抗训练(VAT)模型。该模型在分类任务中同时使用了标记和未标记的数据。因此,在模型的train_步骤中,我需要将批次数据划分为带标签的(0或1)或未带标签的(-1)。当使用run\u急切地=True编译模型时,它似乎能按预期工作,但当我使用run\u急切地=False时,它会给我以下错误: ValueError: Number of mask dimensions must be specified, even if so
train_步骤
中,我需要将批次数据划分为带标签的(0或1)或未带标签的(-1)。当使用run\u急切地=True编译模型时,它似乎能按预期工作,但当我使用run\u急切地=False时,它会给我以下错误:
ValueError: Number of mask dimensions must be specified, even if some dimensions are None. E.g. shape=[None] is ok, but shape=None is not.
这似乎产生于:
X_l, y_l = tf.boolean_mask(X, tf.logical_not(missing)), tf.boolean_mask(y, tf.logical_not(missing))
我不确定是什么原因导致了错误,但似乎与奇怪的张量形状问题有关,这些问题只在run\u急切地=False
期间发生。我需要boolean_mask
功能来区分标记和未标记的数据。我希望有人能帮助我。为了重现错误,我添加了模型和一个小的模拟示例。当设置run\u急切地=False
时,模拟将产生我的错误
提前谢谢
型号定义:
from tensorflow import keras
import tensorflow as tf
metric_acc = keras.metrics.BinaryAccuracy()
metric_loss = keras.metrics.Mean('loss')
class VAT(keras.Model):
def __init__(self, units_1=16, units_2=16, dropout=0.3, xi=1e-6, epsilon=2.0, alpha=1.0):
super(VAT, self).__init__()
# Set model parameters
self.units_1 = units_1
self.units_2 = units_2
self.dropout = dropout
self.xi = xi
self.epsilon = epsilon
self.alpha = alpha
# First hidden
self.dense1 = keras.layers.Dense(self.units_1)
self.activation1 = keras.layers.Activation(tf.nn.leaky_relu)
self.dropout1 = keras.layers.Dropout(self.dropout)
# Second hidden
self.dense2 = keras.layers.Dense(self.units_2)
self.activation2 = keras.layers.Activation(tf.nn.leaky_relu)
self.dropout2 = keras.layers.Dropout(self.dropout)
# Output layer
self.dense3 = keras.layers.Dense(1)
self.activation3 = keras.layers.Activation("sigmoid")
def call(self, inputs, training=None, mask=None):
x1 = self.dense1(inputs)
x2 = self.activation1(x1)
x3 = self.dropout1(x2, training=True)
x4 = self.dense2(x3)
x5 = self.activation2(x4)
x6 = self.dropout2(x5, training=True)
x7 = self.dense3(x6)
x8 = self.activation3(x7)
return x8
def generate_perturbation(self, inputs):
# Generate normal vectors
d = tf.random.normal(shape=tf.shape(inputs))
# Normalize vectors
d = tf.math.l2_normalize(d, axis=1)
# Calculate r
r = self.xi * d
# Make predictions
p = self(inputs, training=True)
# Tape gradient
with tf.GradientTape() as tape:
tape.watch(r)
# Perturbed predictions
p_perturbed = self(inputs + r, training=True)
# Calculate divergence
D = keras.losses.KLD(p, p_perturbed) + keras.losses.KLD(1 - p, 1 - p_perturbed)
# Calculate gradient
gradient = tape.gradient(D, r)
# Calculate r_vadv
r_vadv = tf.math.l2_normalize(gradient, axis=1)
# Return virtual adversarial perturbation
return r_vadv
@tf.function
def train_step(self, data):
# Unpack data
X, y = data
# Missing label boolean indices
missing = tf.squeeze(tf.equal(y, -1))
# Split data into labeled and unlabeled data
X_l, y_l = tf.boolean_mask(X, tf.logical_not(missing)), tf.boolean_mask(y, tf.logical_not(missing))
X_u = tf.boolean_mask(X, missing)
# Calculate virtual perturbations for labeled and unlabeled
r_l = self.generate_perturbation(X_l)
r_u = self.generate_perturbation(X_u)
# Tape gradient
with tf.GradientTape() as model_tape:
model_tape.watch(self.trainable_variables)
# Calculate probabilities real data
prob_l, prob_u = self(X_l, training=True), self(X_u, training=True)
# Calculate probabilities perturbed data
prob_r_l, prob_r_u = self(X_l + self.epsilon * r_l, training=True), self(X_u + self.epsilon * r_u, training=True)
# Calculate loss
loss = vat_loss(y_l, prob_l, prob_u, prob_r_l, prob_r_u, self.alpha)
# Calculate gradient
model_gradient = model_tape.gradient(loss, self.trainable_variables)
# Update weights
self.optimizer.apply_gradients(zip(model_gradient, self.trainable_variables))
# Compute metrics
metric_acc.update_state(y_l, prob_l)
metric_loss.update_state(loss)
return {'loss': metric_loss.result(), 'accuracy': metric_acc.result()}
@property
def metrics(self):
return [metric_loss, metric_acc]
def vat_loss(y_l, prob_l, prob_u, prob_r_l, prob_r_u, alpha):
N_l = tf.cast(tf.size(prob_l), dtype=tf.dtypes.float32)
N_u = tf.cast(tf.size(prob_u), dtype=tf.dtypes.float32)
if tf.equal(N_l, 0):
# No labeled examples: get contribution from unlabeled data using perturbations
R_vadv = tf.reduce_sum(
keras.losses.KLD(prob_u, prob_r_u)
+ keras.losses.KLD(1 - prob_u, 1 - prob_r_u)
)
return alpha * R_vadv / N_u
elif tf.equal(N_u, 0):
# No unlabeled examples: get contribution from labeled data
R = tf.reduce_sum(keras.losses.binary_crossentropy(y_l, prob_l))
R_vadv = tf.reduce_sum(
keras.losses.KLD(prob_l, prob_r_l)
+ keras.losses.KLD(1 - prob_l, 1 - prob_r_l)
)
return R / N_l + alpha * R_vadv / N_l
else:
# Get contribution from labeled data
R = tf.reduce_sum(keras.losses.binary_crossentropy(y_l, prob_l))
# Get contribution from labeled and unlabeled data using perturbations
R_vadv = tf.reduce_sum(
keras.losses.KLD(prob_l, prob_r_l)
+ keras.losses.KLD(1 - prob_l, 1 - prob_r_l)
) + tf.reduce_sum(
keras.losses.KLD(prob_u, prob_r_u)
+ keras.losses.KLD(1 - prob_u, 1 - prob_r_u)
)
return R / N_l + alpha * R_vadv / (N_l + N_u)
模拟示例:
为了证明模型/代码按预期工作(当使用run\u急切地=True
时),我做了一个模拟示例。在这个示例中,当观察值被标记/未标记时,我会产生偏差。下图说明了模型使用的标记观察值(黄色或紫色)和未标记观察值(蓝色)
VAT产生的精度约为0.75,而参考模型产生的精度约为0.58。这些精度是在不进行超参数调整的情况下产生的
from modules.vat import VAT
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
def create_biased_sample(x, proportion_labeled):
labeled = np.random.choice([True, False], p=[proportion_labeled, 1-proportion_labeled])
if x[0] < 0.0:
return False
elif x[0] > 1.0:
return False
else:
return labeled
# Simulation parameters
N = 2000
proportion_labeled = 0.15
# Model training parameters
BATCH_SIZE = 128
BUFFER_SIZE = 60000
EPOCHS = 100
# Generate a dataset
X, y = datasets.make_moons(n_samples=N, noise=.05, random_state=3)
X, y = X.astype('float32'), y.astype('float32')
y = y.reshape(-1, 1)
# Split in train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5)
# Simulate missing labels
sample_biased = lambda x: create_biased_sample(x, proportion_labeled)
labeled = np.array([sample_biased(k) for k in X_train])
y_train[~ labeled] = -1
# Estimate VAT model
vat = VAT(dropout=0.2, units_1=16, units_2=16, epsilon=0.5)
vat.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), run_eagerly=True)
vat.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, shuffle=True)
# Estimate a reference model
reference = keras.models.Sequential([
keras.layers.Input(shape=(2,)),
keras.layers.Dense(16),
keras.layers.Activation(tf.nn.leaky_relu),
keras.layers.Dropout(0.2),
keras.layers.Dense(16),
keras.layers.Activation(tf.nn.leaky_relu),
keras.layers.Dropout(0.2),
keras.layers.Dense(1),
keras.layers.Activation("sigmoid")
])
reference.compile(optimizer=keras.optimizers.Adam(learning_rate=0.01), loss=keras.losses.binary_crossentropy, run_eagerly=False)
reference.fit(X_train[y_train.flatten() != -1, :], y_train[y_train.flatten() != -1], batch_size=BATCH_SIZE, epochs=EPOCHS, shuffle=True)
# Calculate out-of-sample accuracies
test_acc_vat = tf.reduce_mean(keras.metrics.binary_accuracy(y_test, vat(X_test, training=False)))
test_acc_reference = tf.reduce_mean(keras.metrics.binary_accuracy(y_test, reference(X_test, training=False)))
# Print results
print('Test accuracy of VAT: {}'.format(test_acc_vat))
print('Test accuracy of reference model: {}'.format(test_acc_reference))
# Plot scatter
plt.scatter(X_test[:, 0], X_test[:, 1])
plt.scatter(X_train[y_train.flatten() != -1, 0], X_train[y_train.flatten() != -1, 1], c=y_train.flatten()[y_train.flatten() != -1])
来自modules.vat进口vat
将numpy作为np导入
从sklearn导入数据集
从sklearn.model\u选择导入列车\u测试\u拆分
导入tensorflow作为tf
从tensorflow进口keras
将matplotlib.pyplot作为plt导入
def创建偏差样本(x,比例标注):
标签=np.random.choice([True,False],p=[proportion\u标签,1-proportion\u标签])
如果x[0]<0.0:
返回错误
elif x[0]>1.0:
返回错误
其他:
返回标记
#模拟参数
N=2000
标记的比例=0.15
#模型训练参数
批量大小=128
缓冲区大小=60000
纪元=100
#生成数据集
十、 y=数据集。生成卫星(n个样本=n,噪声=0.05,随机状态=3)
十、 y=X.astype('float32'),y.astype('float32'))
y=y。重塑(-1,1)
#列车和试验中的分离
X_系列,X_测试,y_系列,y_测试=系列测试分割(X,y,系列大小=0.5)
#模拟丢失的标签
样本偏差=λx:创建偏差样本(x,比例标注)
已标记=np.数组([X_序列中k的样本偏差(k)])
y_列[~标记]=-1
#增值税估算模型
增值税=增值税(辍学率=0.2,单位1=16,单位2=16,ε=0.5)
compile(optimizer=tf.keras.optimizers.Adam(learning\u rate=0.01),run\u热切地=True)
vat.fit(X列,y列,批量大小=批量大小,历代=历代,洗牌=真)
#估计参考模型
参考=keras.models.Sequential([
keras.layers.Input(shape=(2,),
keras.致密层(16),
keras层激活(tf.nn.leaky_relu),
keras.层压降(0.2),
keras.致密层(16),
keras层激活(tf.nn.leaky_relu),
keras.层压降(0.2),
keras.致密层(1),
keras.层激活(“S形”)
])
reference.compile(optimizer=keras.optimizers.Adam(learning\u rate=0.01),loss=keras.loss.binary\u crossentropy,run\u热切地=False)
reference.fit(X_-train[y_-train.flatten()!=-1,:],y_-train[y_-train.flatte()!=-1],批量大小=批量大小,历代=历代,随机播放=真)
#计算样本外精度
测试(acc)vat=tf.reduce平均值(keras.metrics.binary精确性(y测试,vat(X测试,训练=False)))
测试依据参考=tf.reduce平均值(keras.metrics.binary准确度(y测试,参考(X测试,训练=False)))
#打印结果
打印(“增值税的测试精度:{}”。格式(测试增值税)
打印('参考模型的测试精度:{}'。格式(测试依据参考))
#情节分散
plt.散射(X_检验[:,0],X_检验[:,1])
plt.scatter(X_train[y_train.flatte()!=-1,0],X_train[y_train.flatte()!=-1,1],c=y_train.flatte()[y_train.flatte()!=-1])
对于任何感兴趣的人,我通过在train\u step()方法中添加以下内容来解决问题:
missing.set_shape([None])
应该是在声明缺少张量之后。我使用以下线程解决了这个问题: