Deep learning 如何避免在训练深度神经网络时耗尽内存?
在研究了FCN的基础上,我正在尝试在Pascal VOC 2012上从头开始实现FCN。但每次我试着训练它时,记忆就会消失。我试过:Deep learning 如何避免在训练深度神经网络时耗尽内存?,deep-learning,conv-neural-network,tensorflow2.0,data-augmentation,semantic-segmentation,Deep Learning,Conv Neural Network,Tensorflow2.0,Data Augmentation,Semantic Segmentation,在研究了FCN的基础上,我正在尝试在Pascal VOC 2012上从头开始实现FCN。但每次我试着训练它时,记忆就会消失。我试过: 将批量大小减少到最小值4 减少数据扩充 我不知道我还应该做些什么,到底是什么导致了这个问题。 我该怎么办 我的数据加载器如下所示: import tensorflow as tf import matplotlib.pyplot as plt from PIL import Image import numpy as np import os from alb
- 将批量大小减少到最小值4
- 减少数据扩充
import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os
from albumentations import RandomRotate90
# Train and Val contains the respective paths for [TrainImage,Labels] and [ValImage,Labels] respectively.
num_classes = 21
Img_Width,Img_Height = 224,224
Train = tf.data.Dataset.from_tensor_slices(Train)
Val = tf.data.Dataset.from_tensor_slices(Val)
def Create_Mask(Img):
Seg_Labels = np.zeros((Img.shape[0],Img.shape[1],num_classes),dtype=np.float32)
for class_ in range(num_classes):
Seg_Labels[:,:,class_] = (Img == class_)
return tf.cast(Seg_Labels,dtype=tf.float32)
def Create_PreProcess_Mask_Img(Instance):
Img = Image.open(Instance[0].numpy())
Img = Img.resize((Img_Width,Img_Height),resample = Image.BILINEAR)
Img = np.asarray(Img)
Mask = Image.open(Instance[1].numpy())
Mask = Mask.resize((Img_Width,Img_Height),resample = Image.BILINEAR)
Mask = np.asarray(Mask)
# Since Mask is in 'P' mode it will automatically convert to labels using Color Palette
Normalization = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)
if tf.random.uniform(()) > 0.5: # Applying data Augmentation
aug = RandomRotate90(p=1)
Augmented = aug(image = Img,mask = Mask)
Img = Augmented["image"]
Mask = Augmented["mask"]
return Normalization(Img),Create_Mask(Mask)
def Preprocess(Instance):
Img,Mask = tf.py_function(Create_PreProcess_Mask_Img,[Instance],[tf.float32,tf.float32])
return tf.ensure_shape(Img,[None,None,3]),tf.ensure_shape(Mask,[None,None,num_classes])
#tf.ensure_shape returns the matrix if shape matches else error
def DataLoader(dataset,BATCH_SIZE = 4,BUFFER_SIZE = 256):
data = dataset.map(Preprocess,num_parallel_calls = tf.data.AUTOTUNE)
data = data.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat(1)
data = data.prefetch(buffer_size = tf.data.AUTOTUNE)
return data
Train = DataLoader(Train,BATCH_SIZE = 20)
Val = DataLoader(Val,BATCH_SIZE = 8)
我的模型:
# Lets Create Our FCN Model
def FCN_VGG8():
Input = tf.keras.layers.Input(shape = [Img_Width,Img_Height,3])
Conv1 = tf.keras.layers.Conv2D(64,kernel_size=3,strides = 1,padding="same",activation="relu")(Input)
Conv2 = tf.keras.layers.Conv2D(64,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv1)
Pool1 = tf.keras.layers.MaxPool2D(pool_size=2,strides=2)(Conv2)
Conv3 = tf.keras.layers.Conv2D(128,kernel_size=3,strides = 1,padding="same",activation="relu")(Pool1)
Conv4 = tf.keras.layers.Conv2D(128,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv3)
Pool2 = tf.keras.layers.MaxPool2D(pool_size=2,strides=2)(Conv4)
Conv5 = tf.keras.layers.Conv2D(256,kernel_size=3,strides = 1,padding="same",activation="relu")(Pool2)
Conv6 = tf.keras.layers.Conv2D(256,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv5)
Conv7 = tf.keras.layers.Conv2D(256,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv6)
Pool3 = tf.keras.layers.MaxPool2D(pool_size=2,strides=2)(Conv7)
Conv8 = tf.keras.layers.Conv2D(512,kernel_size=3,strides = 1,padding="same",activation="relu")(Pool3)
Conv9 = tf.keras.layers.Conv2D(512,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv8)
Conv10 = tf.keras.layers.Conv2D(512,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv9)
Pool4 = tf.keras.layers.MaxPool2D(pool_size=2,strides=2)(Conv10)
Conv11 = tf.keras.layers.Conv2D(512,kernel_size=3,strides = 1,padding="same",activation="relu")(Pool4)
Conv12 = tf.keras.layers.Conv2D(512,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv11)
Conv13 = tf.keras.layers.Conv2D(512,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv12)
Pool5 = tf.keras.layers.MaxPool2D(pool_size=2,strides=2)(Conv13)
# Fully Convolutional Layer
FC_Layer = tf.keras.layers.Conv2D(4096,kernel_size=7,activation="relu")(Pool5)
FC_Drop = tf.keras.layers.Dropout(rate=0.5)(FC_Layer)
FC_Layer2 = tf.keras.layers.Conv2D(4096,kernel_size=1,activation="relu")(FC_Drop)
FC_Drop2 = tf.keras.layers.Dropout(rate=0.5)(FC_Layer2)
# Classification Score Layer
Score = tf.keras.layers.Conv2D(num_classes,kernel_size=1,activation="relu")(FC_Drop2)
#Upsample Pool4
Upscore = tf.keras.layers.Conv2DTranspose(num_classes,kernel_size=4,strides=2,kernel_initializer="zeros")(Score)
Conv_Scale = tf.keras.layers.Conv2D(num_classes,kernel_size=1)(Pool4)
Cropped = tf.keras.layers.Cropping2D(cropping=(5,5))(Conv_Scale)
Fused = tf.keras.layers.add([Cropped,Upscore])
Upsampled_Pool4 = tf.keras.layers.Conv2DTranspose(num_classes,kernel_size=4,strides=2,kernel_initializer="zeros")(Fused)
# Upsample Pool3
Conv_Scale2 = tf.keras.layers.Conv2D(num_classes,kernel_size=1)(Pool3)
Cropped2 = tf.keras.layers.Cropping2D(cropping=(9,9))(Conv_Scale2)
Fused2 = tf.keras.layers.add([Cropped2,Upsampled_Pool4])
Upsampled_Pool3 = tf.keras.layers.Conv2DTranspose(num_classes,kernel_size=128,strides=16,kernel_initializer="zeros")(Fused2)
# Score per Pixel
Score = tf.keras.layers.Cropping2D(cropping=(24,24))(Upsampled_Pool3)
Score = tf.keras.layers.Softmax()(Score)
return tf.keras.Model(inputs = Input,outputs = Score)
model = FCN_VGG8()
VGG16 = tf.keras.applications.vgg16.VGG16(weights='imagenet')
# I used transfer learning to use pretrained VGG16 network by freezing first 19 layers
for layers in model.layers[:19]:
layers.trainable = False
EarlyStop = tf.keras.callbacks.EarlyStopping(patience = 10,restore_best_weights=True)
checkpoint_path = os.path.join(os.curdir,"checkpoint")
Checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,save_best_only=True)
Tensorboard = tf.keras.callbacks.TensorBoard(board_log_path)
MeanIou = tf.keras.metrics.MeanIoU(num_classes=21)
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-4,momentum=0.9),loss=tf.keras.losses.categorical_crossentropy,metrics=[MeanIou])
Epochs = 100
Batchsize = 20
Val_Batch = 8
history = model.fit(Train,validation_data=Val,batch_size=Batchsize,epochs=Epochs,callbacks=[EarlyStop,Checkpoint,Tensorboard],validation_batch_size=Val_Batch)
Epoch 1/100
366/366 [==============================] - ETA: 0s - loss: 2.1287 - mean_io_u: 0.4776
我意识到了一些事情:当训练批量大小为20的模型时,我不认为内存会耗尽,但在预测该历元的验证集时,内存曲线峰值非常高,超出了限制。我该怎么办?