Python 使用和不使用数据生成器获得不同的训练损失
我一直在训练keras模型来检测面部标志。数据集大约为500MB,包含64x64个图像Python 使用和不使用数据生成器获得不同的训练损失,python,tensorflow,machine-learning,keras,training-data,Python,Tensorflow,Machine Learning,Keras,Training Data,我一直在训练keras模型来检测面部标志。数据集大约为500MB,包含64x64个图像 我将数据生成器类实现为,而不是将整个数据集加载到内存中。我发现我的损失(SmoothL1)减少到5.9,然后停止进一步减少。通过测试36幅图像进行预测,该模型每次都预测出完全相同的地标,如下所示,这很奇怪。 我的数据生成器类别如下: 导入tensorflow作为tf 将numpy作为np导入 进口cv2 class Data_Generator(tf.keras.utils.Sequence): d
class Data_Generator(tf.keras.utils.Sequence):
def __init__(self, data_csv, data_path, dim=(224, 224), grey_scale=False, batch_size=16, preprocess=None, shuffle=False, info={}):
self.data_csv = data_csv
self.PATH = data_path
self.batch_size = batch_size
self.preprocess = preprocess
self.shuffle = shuffle
self.info = info
self.w = dim[0]
self.h = dim[1]
if grey_scale:
self.channels = 1
else:
self.channels = 3
self.on_epoch_end()
def __len__(self):
return int(np.floor(len(self.data_csv) / self.batch_size))
def on_epoch_end(self):
self.indexes = np.arange(len(self.data_csv))
if self.shuffle:
np.random.shuffle(self.indexes)
def preprocess(self, img):
# preprocessing goes here
# ...
return img
def __getitem__(self, idx):
indexes = self.indexes[idx * self.batch_size: (idx + 1) * self.batch_size]
data_temp = self.data_csv.iloc[indexes, :]
imgs_batch = np.empty((self.batch_size, self.h, self.w, self.channels), dtype=np.float32)
points_batch = np.empty((self.batch_size, 40))
for index in range(len(data_temp)):
img_path = df.iloc[index, :]["image_name"]
self.info[index * self.batch_size + index] = img_path
points = df.iloc[index, :]["landmarks"]
points = np.fromstring(points, dtype=float, sep=' ').flatten()
# only selecting mounth (upper and lower lip)
points = points[96:]
if self.channels == 1:
img = cv2.imread(os.path.join(self.PATH, img_path), cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (self.w, self.h), interpolation=cv2.INTER_AREA)
img = np.expand_dims(img, axis=-1)
else:
img = cv2.imread(os.path.join(self.PATH, img_path))
img = cv2.resize(img, (self.w, self.h), interpolation=cv2.INTER_AREA)
imgs_batch[index, ] = img
points_batch[index, ] = points
if self.preprocess:
imgs_batch = self.preprocess(imgs_batch)
return imgs_batch, points_batch
train_data_generator = Data_Generator(train, data_path=PATH_TO_JPEGIMAGES, dim=input_shape, grey_scale=True, batch_size=64, shuffle=True)
validation_data_generator = Data_Generator(test, data_path=PATH_TO_JPEGIMAGES, grey_scale=True, dim=input_shape, batch_size=64)
model.fit(train_data_generator,
epochs=200,
verbose=1,
validation_data=validation_data_generator,
callbacks=[reduce_lr_loss, cb_checkpt, tensorboard_callback]
)