Python 使用自定义数据生成器拟合具有大量数据的Keras模型
我试图用大量的数据来拟合我的Keras模型 为此,我使用自定义数据生成器和Python 使用自定义数据生成器拟合具有大量数据的Keras模型,python,tensorflow,computer-vision,deep-learning,keras,Python,Tensorflow,Computer Vision,Deep Learning,Keras,我试图用大量的数据来拟合我的Keras模型 为此,我使用自定义数据生成器和model.fit\u生成器函数 然而,我似乎无法理解我是否正确地做到了这一点 以下是我所拥有的: from os.path import join import cv2 import numpy as np from keras.models import Sequential from keras.layers.core import Flatten, Dense, Dropout from keras.layers
model.fit\u生成器
函数
然而,我似乎无法理解我是否正确地做到了这一点
以下是我所拥有的:
from os.path import join
import cv2
import numpy as np
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau
# The function returns a list of image names from folder
from data.preprocessing import get_list_of_images
class VGG19(object):
def __init__(self, weights_path=None, train_folder='data/train', validation_folder='data/val'):
self.weights_path = weights_path
self.model = self._init_model()
if weights_path:
self.model.load_weights(weights_path)
else:
self.datagen = self._init_datagen()
self.train_folder = train_folder
self.validation_folder = validation_folder
self.model.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
def fit(self, batch_size=32, nb_epoch=10):
self.model.fit_generator(
self._generate_data_from_folder(self.train_folder), 32,
nb_epoch,
verbose=1,
callbacks=[
TensorBoard(log_dir='./logs', write_images=True),
ModelCheckpoint(filepath='weights.{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_loss'),
ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=0.001)
],
validation_data=self._generate_data_from_folder(self.validation_folder),
nb_val_samples=32
)
def predict(self, X, batch_size=32, verbose=1):
return self.model.predict(X, batch_size=batch_size, verbose=verbose)
def predict_proba(self, X, batch_size=32, verbose=1):
return self.model.predict_proba(X, batch_size=batch_size, verbose=verbose)
def _init_model(self):
model = Sequential()
# model definition goes here...
return model
def _init_datagen(self):
return ImageDataGenerator(
featurewise_center=True,
samplewise_center=False,
featurewise_std_normalization=True,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
vertical_flip=True
)
def _generate_data_from_folder(self, folder_path):
while 1:
images = get_list_of_images(folder_path)
for image_path in images:
x = cv2.imread(join(folder_path, image_path))
y = 0 if image_path.split('.')[0] == 'dog' else 1
yield (x, y)
我的数据集由以下名称的图像组成:
,即:cat.[number].jpg
cat.124.jpg
,即:dog.[number].jpg
dog.64.jpg
我的
\u从\u文件夹生成\u数据\u功能是否正确实现了小批量优化
如何将ImageDataGenerator
的用法添加到我的\u从\u文件夹生成\u数据\u
函数中(从\u init\u datagen
函数)?好的,这里是指向我工作的项目最终版本的github链接:
希望,它能帮助某人你所说的“添加用法”是什么意思?@nemo我的意思是我想使用ImageDataGenerator
添加数据增强,但是,我不知道该怎么做。我应该将ImageDataGenerator.flow()
generator放在我的另一个生成器中,还是以不同的方式使用它?我也不确定我的数据生成器功能是否正确