Deep learning 如何避免在训练深度神经网络时耗尽内存?

Deep learning 如何避免在训练深度神经网络时耗尽内存?,deep-learning,conv-neural-network,tensorflow2.0,data-augmentation,semantic-segmentation,Deep Learning,Conv Neural Network,Tensorflow2.0,Data Augmentation,Semantic Segmentation,在研究了FCN的基础上,我正在尝试在Pascal VOC 2012上从头开始实现FCN。但每次我试着训练它时,记忆就会消失。我试过: 将批量大小减少到最小值4 减少数据扩充 我不知道我还应该做些什么,到底是什么导致了这个问题。 我该怎么办 我的数据加载器如下所示: import tensorflow as tf import matplotlib.pyplot as plt from PIL import Image import numpy as np import os from alb

在研究了FCN的基础上,我正在尝试在Pascal VOC 2012上从头开始实现FCN。但每次我试着训练它时,记忆就会消失。我试过:

  • 将批量大小减少到最小值4
  • 减少数据扩充
我不知道我还应该做些什么,到底是什么导致了这个问题。 我该怎么办

我的数据加载器如下所示:

import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os
from albumentations import RandomRotate90

# Train and Val contains the respective paths for [TrainImage,Labels] and [ValImage,Labels] respectively.

num_classes = 21
Img_Width,Img_Height = 224,224
Train = tf.data.Dataset.from_tensor_slices(Train)
Val = tf.data.Dataset.from_tensor_slices(Val)


def Create_Mask(Img):
  Seg_Labels = np.zeros((Img.shape[0],Img.shape[1],num_classes),dtype=np.float32)
  for class_ in range(num_classes):
    Seg_Labels[:,:,class_] = (Img == class_)
  return tf.cast(Seg_Labels,dtype=tf.float32)

    
def Create_PreProcess_Mask_Img(Instance):
 
  Img = Image.open(Instance[0].numpy())
  Img = Img.resize((Img_Width,Img_Height),resample = Image.BILINEAR)
  Img = np.asarray(Img)


  Mask = Image.open(Instance[1].numpy())
  Mask = Mask.resize((Img_Width,Img_Height),resample = Image.BILINEAR)
  Mask = np.asarray(Mask)   
   
  # Since Mask is in 'P' mode it will automatically convert to labels using Color Palette 

  Normalization = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)

  if tf.random.uniform(()) > 0.5:  # Applying data Augmentation
    aug = RandomRotate90(p=1)
    Augmented = aug(image = Img,mask = Mask)

    Img = Augmented["image"]
    Mask = Augmented["mask"]
  
  return Normalization(Img),Create_Mask(Mask)



def Preprocess(Instance):
  Img,Mask = tf.py_function(Create_PreProcess_Mask_Img,[Instance],[tf.float32,tf.float32])
  return tf.ensure_shape(Img,[None,None,3]),tf.ensure_shape(Mask,[None,None,num_classes])  
  #tf.ensure_shape returns the matrix if shape matches else error

def DataLoader(dataset,BATCH_SIZE = 4,BUFFER_SIZE = 256):
  data = dataset.map(Preprocess,num_parallel_calls = tf.data.AUTOTUNE)
  data = data.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat(1)
  data = data.prefetch(buffer_size = tf.data.AUTOTUNE)
  return data

Train = DataLoader(Train,BATCH_SIZE = 20)
Val = DataLoader(Val,BATCH_SIZE = 8)
我的模型:

# Lets Create Our FCN Model
def FCN_VGG8():

  Input = tf.keras.layers.Input(shape = [Img_Width,Img_Height,3])
  Conv1 = tf.keras.layers.Conv2D(64,kernel_size=3,strides = 1,padding="same",activation="relu")(Input)
  Conv2 = tf.keras.layers.Conv2D(64,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv1)
  Pool1 = tf.keras.layers.MaxPool2D(pool_size=2,strides=2)(Conv2)

  Conv3 = tf.keras.layers.Conv2D(128,kernel_size=3,strides = 1,padding="same",activation="relu")(Pool1)
  Conv4 = tf.keras.layers.Conv2D(128,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv3)
  Pool2 = tf.keras.layers.MaxPool2D(pool_size=2,strides=2)(Conv4)

  Conv5 = tf.keras.layers.Conv2D(256,kernel_size=3,strides = 1,padding="same",activation="relu")(Pool2)
  Conv6 = tf.keras.layers.Conv2D(256,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv5)
  Conv7 = tf.keras.layers.Conv2D(256,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv6)
  Pool3 = tf.keras.layers.MaxPool2D(pool_size=2,strides=2)(Conv7)

  Conv8 = tf.keras.layers.Conv2D(512,kernel_size=3,strides = 1,padding="same",activation="relu")(Pool3)
  Conv9 = tf.keras.layers.Conv2D(512,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv8)
  Conv10 = tf.keras.layers.Conv2D(512,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv9)
  Pool4 = tf.keras.layers.MaxPool2D(pool_size=2,strides=2)(Conv10)

  Conv11 = tf.keras.layers.Conv2D(512,kernel_size=3,strides = 1,padding="same",activation="relu")(Pool4)
  Conv12 = tf.keras.layers.Conv2D(512,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv11)
  Conv13 = tf.keras.layers.Conv2D(512,kernel_size=3,strides = 1,padding="same",activation="relu")(Conv12)
  Pool5 = tf.keras.layers.MaxPool2D(pool_size=2,strides=2)(Conv13)

  # Fully Convolutional Layer

  FC_Layer = tf.keras.layers.Conv2D(4096,kernel_size=7,activation="relu")(Pool5)
  FC_Drop = tf.keras.layers.Dropout(rate=0.5)(FC_Layer)
  FC_Layer2 = tf.keras.layers.Conv2D(4096,kernel_size=1,activation="relu")(FC_Drop)
  FC_Drop2 = tf.keras.layers.Dropout(rate=0.5)(FC_Layer2)

  # Classification Score Layer
  Score = tf.keras.layers.Conv2D(num_classes,kernel_size=1,activation="relu")(FC_Drop2)
 

  #Upsample Pool4

  Upscore = tf.keras.layers.Conv2DTranspose(num_classes,kernel_size=4,strides=2,kernel_initializer="zeros")(Score)
  
  Conv_Scale = tf.keras.layers.Conv2D(num_classes,kernel_size=1)(Pool4)
  Cropped = tf.keras.layers.Cropping2D(cropping=(5,5))(Conv_Scale)

  Fused = tf.keras.layers.add([Cropped,Upscore])
  
  Upsampled_Pool4 = tf.keras.layers.Conv2DTranspose(num_classes,kernel_size=4,strides=2,kernel_initializer="zeros")(Fused)

  
  # Upsample Pool3

  Conv_Scale2 = tf.keras.layers.Conv2D(num_classes,kernel_size=1)(Pool3)
  Cropped2 = tf.keras.layers.Cropping2D(cropping=(9,9))(Conv_Scale2)
  Fused2 = tf.keras.layers.add([Cropped2,Upsampled_Pool4])

  Upsampled_Pool3 = tf.keras.layers.Conv2DTranspose(num_classes,kernel_size=128,strides=16,kernel_initializer="zeros")(Fused2)

  # Score per Pixel

  Score = tf.keras.layers.Cropping2D(cropping=(24,24))(Upsampled_Pool3)
  Score = tf.keras.layers.Softmax()(Score)

  return tf.keras.Model(inputs = Input,outputs = Score)



model = FCN_VGG8()
VGG16 = tf.keras.applications.vgg16.VGG16(weights='imagenet')

# I used transfer learning to use pretrained VGG16 network by freezing first 19 layers

for layers in model.layers[:19]:
  layers.trainable = False


EarlyStop = tf.keras.callbacks.EarlyStopping(patience = 10,restore_best_weights=True)
checkpoint_path = os.path.join(os.curdir,"checkpoint")
Checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,save_best_only=True)
Tensorboard = tf.keras.callbacks.TensorBoard(board_log_path)

MeanIou = tf.keras.metrics.MeanIoU(num_classes=21)

model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-4,momentum=0.9),loss=tf.keras.losses.categorical_crossentropy,metrics=[MeanIou])



Epochs = 100
Batchsize = 20
Val_Batch = 8

history = model.fit(Train,validation_data=Val,batch_size=Batchsize,epochs=Epochs,callbacks=[EarlyStop,Checkpoint,Tensorboard],validation_batch_size=Val_Batch)

Epoch 1/100
366/366 [==============================] - ETA: 0s - loss: 2.1287 - mean_io_u: 0.4776

我意识到了一些事情:当训练批量大小为20的模型时,我不认为内存会耗尽,但在预测该历元的验证集时,内存曲线峰值非常高,超出了限制。我该怎么办?