Python Amazon Web服务P3比使用Keras、TensorFlow和MobileNet的本地GPU慢
我目前正在用keras和tensorflow训练(微调)一个预训练的MobileNet模型。培训使用GTX980在我的本地计算机上完成 为了加快速度,我在AWS上创建了一个p3.2xlarge实例,其中包含一个基于Ubuntu()的Amazon深度学习AMI 当运行一些测试数据(约300幅图像)时,我注意到我的本地计算机每个历元大约需要10秒,而aws需要26秒。我甚至用p3.16xlarge实例测试了它,但差别不大。当使用 所有内存(每个GPU 16GB)都已填满。我尝试了不同的数据量、keras实现、批量大小和提高GPU速度。列出设备时,GPU显示为已使用。跑得慢会有什么问题?我用的是jupyter笔记本。下面是我的测试代码:Python Amazon Web服务P3比使用Keras、TensorFlow和MobileNet的本地GPU慢,python,amazon-web-services,tensorflow,amazon-ec2,keras,Python,Amazon Web Services,Tensorflow,Amazon Ec2,Keras,我目前正在用keras和tensorflow训练(微调)一个预训练的MobileNet模型。培训使用GTX980在我的本地计算机上完成 为了加快速度,我在AWS上创建了一个p3.2xlarge实例,其中包含一个基于Ubuntu()的Amazon深度学习AMI 当运行一些测试数据(约300幅图像)时,我注意到我的本地计算机每个历元大约需要10秒,而aws需要26秒。我甚至用p3.16xlarge实例测试了它,但差别不大。当使用 所有内存(每个GPU 16GB)都已填满。我尝试了不同的数据量、ker
from keras.applications import MobileNet
mobile_model = MobileNet()
for layer in mobile_model.layers[:-4]:
layer.trainable = False
from keras import models
from keras import layers
from keras import optimizers
# Create the model
model = models.Sequential()
# Add the vgg convolutional base model
model.add(mobile_model)
# Add new layers
#model.add(layers.Flatten(return_sequences=True))
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(2, activation='softmax'))
from keras.preprocessing.image import ImageDataGenerator
train_dir = "./painOrNoPain/train/"
validation_dir = "./painOrNoPain/valid/"
image_size = 224
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
validation_datagen = ImageDataGenerator(rescale=1./255)
# Change the batchsize according to your system RAM
train_batchsize = 128
val_batchsize = 128
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(image_size, image_size),
batch_size=train_batchsize,
class_mode='categorical')
validation_generator = validation_datagen.flow_from_directory(
validation_dir,
target_size=(image_size, image_size),
batch_size=val_batchsize,
class_mode='categorical',
shuffle=False)
try:
model = multi_gpu_model(model)
except:
pass
from keras.optimizers import Adam
# Compile the model
model.compile(loss='categorical_crossentropy',
optimizer=Adam(lr=1e-4),
metrics=['acc'])
# Train the model
history = model.fit_generator(
train_generator,
steps_per_epoch=train_generator.samples/train_generator.batch_size ,
epochs=3,
validation_data=validation_generator,
validation_steps=validation_generator.samples/validation_generator.batch_size,
verbose=2)
# Save the model
model.save('small_last4.h5')
我也有同样的问题。使用Amazon AWS GPU运行甚至比在我自己的笔记本电脑上使用CPU运行还要慢。一个可能的解释是由于CPU和GPU之间的数据传输花费了大量的时间,如中所述,我也有同样的问题。使用Amazon AWS GPU运行甚至比在我自己的笔记本电脑上使用CPU运行还要慢。一种可能的解释是,如中所述,CPU和GPU之间的数据传输花费了大量时间
from keras.applications import MobileNet
mobile_model = MobileNet()
for layer in mobile_model.layers[:-4]:
layer.trainable = False
from keras import models
from keras import layers
from keras import optimizers
# Create the model
model = models.Sequential()
# Add the vgg convolutional base model
model.add(mobile_model)
# Add new layers
#model.add(layers.Flatten(return_sequences=True))
model.add(layers.Dense(1024, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(2, activation='softmax'))
from keras.preprocessing.image import ImageDataGenerator
train_dir = "./painOrNoPain/train/"
validation_dir = "./painOrNoPain/valid/"
image_size = 224
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
validation_datagen = ImageDataGenerator(rescale=1./255)
# Change the batchsize according to your system RAM
train_batchsize = 128
val_batchsize = 128
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(image_size, image_size),
batch_size=train_batchsize,
class_mode='categorical')
validation_generator = validation_datagen.flow_from_directory(
validation_dir,
target_size=(image_size, image_size),
batch_size=val_batchsize,
class_mode='categorical',
shuffle=False)
try:
model = multi_gpu_model(model)
except:
pass
from keras.optimizers import Adam
# Compile the model
model.compile(loss='categorical_crossentropy',
optimizer=Adam(lr=1e-4),
metrics=['acc'])
# Train the model
history = model.fit_generator(
train_generator,
steps_per_epoch=train_generator.samples/train_generator.batch_size ,
epochs=3,
validation_data=validation_generator,
validation_steps=validation_generator.samples/validation_generator.batch_size,
verbose=2)
# Save the model
model.save('small_last4.h5')