Python 使用数据扩充丢失实例
我试图从TF数据集中扩充数据。我使用了TF文档()中的代码。我的问题是,在扩展之后,我剩下的实例比开始时少。我的目标是向原始数据中添加增强数据。这是我的密码:Python 使用数据扩充丢失实例,python,tensorflow,machine-learning,keras,Python,Tensorflow,Machine Learning,Keras,我试图从TF数据集中扩充数据。我使用了TF文档()中的代码。我的问题是,在扩展之后,我剩下的实例比开始时少。我的目标是向原始数据中添加增强数据。这是我的密码: (train_ds, val_ds, test_ds), metadata = tfds.load( 'rock_paper_scissors', split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'], shuffle_files = False, w
(train_ds, val_ds, test_ds), metadata = tfds.load(
'rock_paper_scissors',
split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
shuffle_files = False,
with_info=True,
as_supervised=True,
)
# Check the number of examples in each dataset to ensure the data split/loading worked.
train_size = tf.data.experimental.cardinality(train_ds)
valid_size = tf.data.experimental.cardinality(val_ds)
test_size = tf.data.experimental.cardinality(test_ds)
print ('train: %d, valid: %d, test: %d' % (train_size, valid_size, test_size))
# should be train: 2016, valid: 504, test: 372
train: 2016, valid: 252, test: 252
IMG_SIZE = 299
resize_and_rescale = tf.keras.Sequential([
tf.keras.layers.experimental.preprocessing.Resizing(IMG_SIZE, IMG_SIZE),
tf.keras.layers.experimental.preprocessing.Rescaling(1./255)
])
data_augmentation = tf.keras.Sequential([
tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
])
batch_size = 32
AUTOTUNE = tf.data.experimental.AUTOTUNE
def prepare(ds, shuffle=False, augment=False):
# Resize and rescale all datasets
ds = ds.map(lambda x, y: (resize_and_rescale(x), y),
num_parallel_calls=AUTOTUNE)
if shuffle:
ds = ds.shuffle(1000)
# Batch all datasets
ds = ds.batch(batch_size)
# Use data augmentation only on the training set
if augment:
ds = ds.map(lambda x, y: (data_augmentation(x, training=True), y),
num_parallel_calls=AUTOTUNE)
# Use buffered prefecting on all datasets
return ds.prefetch(buffer_size=AUTOTUNE)
train_ds = prepare(train_ds, shuffle=True, augment=True)
val_ds = prepare(val_ds)
test_ds = prepare(test_ds)
# Check the number of examples in each dataset to ensure the data split/loading worked.
train_size = tf.data.experimental.cardinality(train_ds)
valid_size = tf.data.experimental.cardinality(val_ds)
test_size = tf.data.experimental.cardinality(test_ds)
print ('train: %d, valid: %d, test: %d' % (train_size, valid_size, test_size))
train: 63, valid: 8, test: 8