Python 3.x TensorFlow 2.0梯度胶带,带早顶
我正在使用Python3.7.5和TensorFlow 2.0的“GradientTape”API对MNIST数据集进行分类,该数据集使用300 100密度完全连接的体系结构。我想将TensorFlow的“EarlyStopping”与GradientTape()结合使用,以便根据正在监视或监视的变量以及耐心参数停止训练 我的代码如下:Python 3.x TensorFlow 2.0梯度胶带,带早顶,python-3.x,tensorflow,Python 3.x,Tensorflow,我正在使用Python3.7.5和TensorFlow 2.0的“GradientTape”API对MNIST数据集进行分类,该数据集使用300 100密度完全连接的体系结构。我想将TensorFlow的“EarlyStopping”与GradientTape()结合使用,以便根据正在监视或监视的变量以及耐心参数停止训练 我的代码如下: # Use tf.data to batch and shuffle the dataset train_ds = tf.data.Dataset.from_t
# Use tf.data to batch and shuffle the dataset
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(100).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size)
# Choose an optimizer and loss function for training-
loss_fn = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam(lr = 0.001)
def create_nn_gradienttape():
"""
Function to create neural network for use
with GradientTape API following MNIST
300 100 architecture
"""
model = Sequential()
model.add(
Dense(
units = 300, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal,
input_shape = (784,)
)
)
model.add(
Dense(
units = 100, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal
)
)
model.add(
Dense(
units = 10, activation = 'softmax'
)
)
return model
# Instantiate the model to be trained using GradientTape-
model = create_nn_gradienttape()
# Select metrics to measure the error & accuracy of model.
# These metrics accumulate the values over epochs and then
# print the overall result-
train_loss = tf.keras.metrics.Mean(name = 'train_loss')
train_accuracy = tf.keras.metrics.BinaryAccuracy(name = 'train_accuracy')
test_loss = tf.keras.metrics.Mean(name = 'test_loss')
test_accuracy = tf.keras.metrics.BinaryAccuracy(name = 'train_accuracy')
# Use tf.GradientTape to train the model-
@tf.function
def train_step(data, labels):
"""
Function to perform one step of Gradient
Descent optimization
"""
with tf.GradientTape() as tape:
# 'training=True' is only needed if there are layers with different
# behavior during training versus inference (e.g. Dropout).
# predictions = model(data, training=True)
predictions = model(data)
loss = loss_fn(labels, predictions)
# 'gradients' is a list variable!
gradients = tape.gradient(loss, model.trainable_variables)
# IMPORTANT:
# Multiply mask with computed gradients-
# List to hold element-wise multiplication between-
# computed gradient and masks-
grad_mask_mul = []
# Perform element-wise multiplication between computed gradients and masks-
for grad_layer, mask in zip(gradients, mask_model_stripped.trainable_weights):
grad_mask_mul.append(tf.math.multiply(grad_layer, mask))
# optimizer.apply_gradients(zip(gradients, model.trainable_variables))
optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
@tf.function
def test_step(data, labels):
"""
Function to test model performance
on testing dataset
"""
# training=False is only needed if there are layers with different
# behavior during training versus inference (e.g. Dropout).
predictions = model(data)
t_loss = loss_fn(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
EPOCHS = 15
for epoch in range(EPOCHS):
# Reset the metrics at the start of the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
for x, y in train_ds:
train_step(x, y)
for x_t, y_t in test_ds:
test_step(x_t, y_t)
template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'
print(template.format(epoch + 1,
train_loss.result(), train_accuracy.result()*100,
test_loss.result(), test_accuracy.result()*100))
# Count number of non-zero parameters in each layer and in total-
# print("layer-wise manner model, number of nonzero parameters in each layer are: \n")
model_sum_params = 0
for layer in model.trainable_weights:
# print(tf.math.count_nonzero(layer, axis = None).numpy())
model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()
print("Total number of trainable parameters = {0}\n".format(model_sum_params))
在上面的代码中,如何将“tf.keras.callbacks.EarlyStopping”与GradientTape()API一起使用
谢谢 那么,为什么不将回调与
fit()
一起使用,而是与GradientTape
一起使用呢?@thushv89我必须使用基于幅值的稀疏修剪的分层掩蔽操作,我必须使用GradientTape而不是fit()方法。有没有办法使用GradientTape()的EarlyStating?@Arun你解决了吗?@M.Innat是的,我解决了。你能发布你的解决方案吗?