Validation Tensorflow/Keras:挥发性验证损失
我一直在训练一个单级小病灶分割的U-Net,并且一直在不断地丢失验证。我有大约20k的图像在训练集和验证集之间分为70/30,所以我认为问题不在于数据太少。我已经尝试过几次洗牌和重新洗牌,波动性没有变化,所以我不认为验证集没有代表性。我尝试过降低学习率,但对波动性没有影响。我尝试了一些损失函数(骰子系数,焦点tversky,加权二元交叉熵)。我正在使用相当数量的增强,以避免过度安装。我还运行了我所有的数据(512x512 float64s和相应的512x512 int64掩码——都存储为numpy数组),并仔细检查值范围、数据类型等是否有问题……我甚至删除了该区域35像素以下的掩码中的任何ROI,我认为这可能是伪影和丢失 我使用的是来自目录的keras ImageDataGen.flow_…我最初使用的是zca_增白和亮度_范围增强,但我认为这会导致来自目录的flow_出现问题,并且会丢失遮罩和图像之间的链接。。所以我跳过了这个 我尝试过使用和不使用shuffle=True的验证生成器。批量大小为8 下面是我的一些代码,如果有帮助的话,我很乐意包含更多代码:Validation Tensorflow/Keras:挥发性验证损失,validation,tensorflow,keras,deep-learning,neural-network,Validation,Tensorflow,Keras,Deep Learning,Neural Network,我一直在训练一个单级小病灶分割的U-Net,并且一直在不断地丢失验证。我有大约20k的图像在训练集和验证集之间分为70/30,所以我认为问题不在于数据太少。我已经尝试过几次洗牌和重新洗牌,波动性没有变化,所以我不认为验证集没有代表性。我尝试过降低学习率,但对波动性没有影响。我尝试了一些损失函数(骰子系数,焦点tversky,加权二元交叉熵)。我正在使用相当数量的增强,以避免过度安装。我还运行了我所有的数据(512x512 float64s和相应的512x512 int64掩码——都存储为nump
# loss
from keras.losses import binary_crossentropy
import keras.backend as K
import tensorflow as tf
epsilon = 1e-5
smooth = 1
def dsc(y_true, y_pred):
smooth = 1.
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
return score
def dice_loss(y_true, y_pred):
loss = 1 - dsc(y_true, y_pred)
return loss
def bce_dice_loss(y_true, y_pred):
loss = binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
return loss
def confusion(y_true, y_pred):
smooth=1
y_pred_pos = K.clip(y_pred, 0, 1)
y_pred_neg = 1 - y_pred_pos
y_pos = K.clip(y_true, 0, 1)
y_neg = 1 - y_pos
tp = K.sum(y_pos * y_pred_pos)
fp = K.sum(y_neg * y_pred_pos)
fn = K.sum(y_pos * y_pred_neg)
prec = (tp + smooth)/(tp+fp+smooth)
recall = (tp+smooth)/(tp+fn+smooth)
return prec, recall
def tp(y_true, y_pred):
smooth = 1
y_pred_pos = K.round(K.clip(y_pred, 0, 1))
y_pos = K.round(K.clip(y_true, 0, 1))
tp = (K.sum(y_pos * y_pred_pos) + smooth)/ (K.sum(y_pos) + smooth)
return tp
def tn(y_true, y_pred):
smooth = 1
y_pred_pos = K.round(K.clip(y_pred, 0, 1))
y_pred_neg = 1 - y_pred_pos
y_pos = K.round(K.clip(y_true, 0, 1))
y_neg = 1 - y_pos
tn = (K.sum(y_neg * y_pred_neg) + smooth) / (K.sum(y_neg) + smooth )
return tn
def tversky(y_true, y_pred):
y_true_pos = K.flatten(y_true)
y_pred_pos = K.flatten(y_pred)
true_pos = K.sum(y_true_pos * y_pred_pos)
false_neg = K.sum(y_true_pos * (1-y_pred_pos))
false_pos = K.sum((1-y_true_pos)*y_pred_pos)
alpha = 0.7
return (true_pos + smooth)/(true_pos + alpha*false_neg + (1-alpha)*false_pos + smooth)
def tversky_loss(y_true, y_pred):
return 1 - tversky(y_true,y_pred)
def focal_tversky(y_true,y_pred):
pt_1 = tversky(y_true, y_pred)
gamma = 0.75
return K.pow((1-pt_1), gamma)
model = BlockModel((len(os.listdir(os.path.join(imageroot,'train_ct','train'))), 512, 512, 1),filt_num=16,numBlocks=4)
#model.compile(optimizer=Adam(learning_rate=0.001), loss=weighted_cross_entropy)
#model.compile(optimizer=Adam(learning_rate=0.001), loss=dice_coef_loss)
model.compile(optimizer=Adam(learning_rate=0.001), loss=focal_tversky)
train_mask = os.path.join(imageroot,'train_masks')
val_mask = os.path.join(imageroot,'val_masks')
model.load_weights(model_weights_path) #I'm initializing with some pre-trained weights from a similar model
data_gen_args_mask = dict(
rotation_range=10,
shear_range=20,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=[0.8,1.2],
horizontal_flip=True,
#vertical_flip=True,
fill_mode='nearest',
data_format='channels_last'
)
data_gen_args = dict(
**data_gen_args_mask
)
image_datagen_train = ImageDataGenerator(**data_gen_args)
mask_datagen_train = ImageDataGenerator(**data_gen_args)#_mask)
image_datagen_val = ImageDataGenerator()
mask_datagen_val = ImageDataGenerator()
seed = 1
BS = 8
steps = int(np.floor((len(os.listdir(os.path.join(train_ct,'train'))))/BS))
print(steps)
val_steps = int(np.floor((len(os.listdir(os.path.join(val_ct,'val'))))/BS))
print(val_steps)
train_image_generator = image_datagen_train.flow_from_directory(
train_ct,
target_size = (512, 512),
color_mode = ("grayscale"),
classes=None,
class_mode=None,
seed = seed,
shuffle = True,
batch_size = BS)
train_mask_generator = mask_datagen_train.flow_from_directory(
train_mask,
target_size = (512, 512),
color_mode = ("grayscale"),
classes=None,
class_mode=None,
seed = seed,
shuffle = True,
batch_size = BS)
val_image_generator = image_datagen_val.flow_from_directory(
val_ct,
target_size = (512, 512),
color_mode = ("grayscale"),
classes=None,
class_mode=None,
seed = seed,
shuffle = True,
batch_size = BS)
val_mask_generator = mask_datagen_val.flow_from_directory(
val_mask,
target_size = (512, 512),
color_mode = ("grayscale"),
classes=None,
class_mode=None,
seed = seed,
shuffle = True,
batch_size = BS)
train_generator = zip(train_image_generator, train_mask_generator)
val_generator = zip(val_image_generator, val_mask_generator)
# make callback for checkpointing
plot_losses = PlotLossesCallback(skip_first=0,plot_extrema=False)
%matplotlib inline
filepath = os.path.join(versionPath, model_version + "_saved-model-{epoch:02d}-{val_loss:.2f}.hdf5")
if reduce:
cb_check = [ModelCheckpoint(filepath,monitor='val_loss',
verbose=1,save_best_only=False,
save_weights_only=True,mode='auto',period=1),
reduce_lr,
plot_losses]
else:
cb_check = [ModelCheckpoint(filepath,monitor='val_loss',
verbose=1,save_best_only=False,
save_weights_only=True,mode='auto',period=1),
plot_losses]
# train model
history = model.fit_generator(train_generator, epochs=numEp,
steps_per_epoch=steps,
validation_data=val_generator,
validation_steps=val_steps,
verbose=1,
callbacks=cb_check,
use_multiprocessing = False
)
我的损失是这样的:
另一件可能相关的事情:我稍微调整了来自目录代码的流(将npy添加到白名单中)。但是训练损失看起来很好,所以假设问题不在这里有两个建议:
猜测1:虽然您使用的是同一个种子,但使用
shuffle=True
可能会使您受到拖累。我会尝试False
以防万一。---猜测2:在其中一种情况下,您的模型中有BatchNormalization
层:a)存在预训练段,您为这些段设置了trainable=False
;b) BN层在一个退出层之后。我最初对val生成器使用shuffle=False,得到了相同的问题。打印出layer.trainable用于model.layers中的layer,除第一层外,其余均为真。model.summary/model.layers中没有退出层,BN仅在conv或deconv之后。我没有发现您的代码中有任何奇怪的地方,所以我所能做的就是继续尝试猜测问题。-即使您说您的BNs是正常的,您也可以尝试删除BN层,并在编译之前确保它们是“trainable=True”我可以想象的另一件事是,您的图像名称与val目录中的掩码名称不一致。---另一种猜测是,可能你在验证中有太多的空掩码(而你的损失似乎取决于分母是否正确)提示,你可以将len(train\u image\u generator)
和len(val\u image\u generator)
作为步数,并比较其他生成器的长度这对你或任何人来说都会容易得多,要调试,请先一步一步地完成此步骤,并确认批处理方式符合预期。外部型号。安装。首先:查看图像加载程序生成的对:它们是否正确配对?