Neural network 为什么这个Keras暹罗图像匹配网络没有学到任何东西?
对于第一个健全性检查,我正在尝试建立一个网络,该网络学习为相同的图像对输出Neural network 为什么这个Keras暹罗图像匹配网络没有学到任何东西?,neural-network,computer-vision,deep-learning,keras,conv-neural-network,Neural Network,Computer Vision,Deep Learning,Keras,Conv Neural Network,对于第一个健全性检查,我正在尝试建立一个网络,该网络学习为相同的图像对输出1,为不相同的图像对输出0,希望它能很快适应 损失减少,但无论我怎么尝试,精度都会在0.5左右反弹 使用ResNet50作为共享的暹罗分支对,我将它们与元素减法合并,并将结果“差分层”馈送到单个乙状结肠单元中,如中所述。我还尝试了其他几种建议的变体;例如softmax输出、串联而非减法等 以下示例可以由任何人运行,前提是您提供一个目录的路径,该目录至少包含两个图像,以便作为命令行参数进行匹配 from keras.appl
1
,为不相同的图像对输出0
,希望它能很快适应
损失减少,但无论我怎么尝试,精度都会在0.5左右反弹
使用ResNet50作为共享的暹罗分支对,我将它们与元素减法合并,并将结果“差分层”馈送到单个乙状结肠单元中,如中所述。我还尝试了其他几种建议的变体;例如softmax输出、串联而非减法等
以下示例可以由任何人运行,前提是您提供一个目录的路径,该目录至少包含两个图像,以便作为命令行参数进行匹配
from keras.applications.resnet50 import ResNet50
from keras.models import Model
# from keras.utils.visualize_util import plot
from keras.layers import merge, \
Dense, \
Dropout, \
Input, \
GlobalAveragePooling2D, \
Lambda, \
BatchNormalization, \
Activation
from keras.layers.merge import Add, Multiply, Concatenate
from keras.optimizers import Adam, SGD, RMSprop
from keras.engine import Layer
import keras.backend as K
from keras import regularizers
import os
import random
from PIL import Image
import numpy as np
import cv2
def manhattan_distance(pair):
return K.sum(K.abs(pair[0]-pair[1]), axis=1, keepdims=True)
def _build_base_dense(input_shape):
input_tensor = Input(shape=input_shape)
base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=input_tensor)
for layer in base_model.layers:
layer.trainable = False
one_thirty_tooth_resolution = base_model.get_layer('activation_49').output
pooled = GlobalAveragePooling2D()(one_thirty_tooth_resolution)
# dense_1 = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(pooled)
# dense_1 = BatchNormalization()(dense_1)
# embedding_model = Model(inputs=input_tensor, outputs=dense_1)
embedding_model = Model(inputs=input_tensor, outputs=pooled)
return embedding_model
def build_siamese_dense(input_shape):
input_query = Input(shape=input_shape)
input_reference = Input(shape=input_shape)
base_network = _build_base_dense(input_shape=input_shape)
embed_query = base_network(input_query)
embed_reference = base_network(input_reference)
# dist = Lambda(manhattan_distance)([embed_query, embed_reference])
negative_embed_reference = Lambda(lambda x: x * -1)(embed_reference)
elementwise_dist = Add()([embed_query, negative_embed_reference]) #elementwise subtraction of each siamese leg
# merged = Concatenate()([embed_query, embed_reference])
# classify = Dense(2, activation='softmax')(dist)
classify = Dense(1, activation='sigmoid', use_bias=False)(elementwise_dist)
# classify = Dense(1, activation='sigmoid', use_bias=False)(merged)
model = Model(inputs=[input_query, input_reference], outputs=classify)
model.compile(
optimizer=Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0),
# optimizer=SGD(lr=0.001, momentum=0.5),
# loss='categorical_crossentropy', metrics=['accuracy'])
loss='binary_crossentropy', metrics=['accuracy'])
return model
def preprocess_cv2_batch(images, dim_ordering='default'):
# images = images.astype(np.float64)
if dim_ordering == 'default':
dim_ordering = K.image_dim_ordering()
assert dim_ordering in {'tf', 'th'}
if dim_ordering == 'th':
# need to transpose axes to make (batch, channels, height, width)
print('Image batch arrived with shape: {}'.format(str(images.shape)))
images = np.transpose(images, (0, 3, 1, 2))
print('Image batch axes were transposed to shape: {} for THEANO dim-ordering convention'.format(
str(images.shape)))
# # 'RGB'->'BGR'
# x = x[:, ::-1, :, :]
# Zero-center by mean pixel
images[:, 0, :, :] -= 103.939
images[:, 1, :, :] -= 116.779
images[:, 2, :, :] -= 123.68
else:
# 'RGB'->'BGR'
# x = x[:, :, :, ::-1]
# # Zero-center by mean pixel
images[:, :, :, 0] -= 103.939
images[:, :, :, 1] -= 116.779
images[:, :, :, 2] -= 123.68
return images
class DataGenerator(object):
'''
Class for iterating through a directory of images, creating training pairs on the fly
'''
def __init__(self, image_dir, input_shape, prob_positive=0.5):
self.input_shape = input_shape
self.image_dir = image_dir
self.prob_positive = prob_positive
self.image_file_list = [os.path.join(self.image_dir, item) for item in os.listdir(self.image_dir)]
assert len(self.image_file_list) >= 2, 'You need at least 2 images in the dir to do matching.'
def generate_batch(self, batch_size, debug=False):
while True:
batch_query_inputs = []
batch_reference_inputs = []
batch_labels = []
num_successful = 0
while num_successful < batch_size:
try:
# randomly choose a reference image
input_pair = np.zeros((2, self.input_shape[0], self.input_shape[1], self.input_shape[2]), dtype=np.float32)
# sample an image without replacement
allowed_indices = range(len(self.image_file_list))
random_image_index = random.choice(allowed_indices)
allowed_indices.pop(random_image_index)
random_image_path = self.image_file_list[random_image_index]
random_image_reference = cv2.imread(random_image_path)
random_image_reference = cv2.resize(random_image_reference,(self.input_shape[1], self.input_shape[0]))
input_pair[1] = random_image_reference
# flip a coin to decide whether the training example is a match or not
if random.random() < self.prob_positive: # match
input_pair[0] = np.array(random_image_reference)
is_match = 1
else: # no match - choose a different image
random_image_index = random.choice(allowed_indices)
random_image_path = self.image_file_list[random_image_index]
random_image_query = cv2.imread(random_image_path)
random_image_query = cv2.resize(random_image_query,(self.input_shape[1], self.input_shape[0]))
input_pair[0] = random_image_query
is_match = 0
input_pair = preprocess_cv2_batch(input_pair)
batch_query_inputs.append(input_pair[0])
batch_reference_inputs.append(input_pair[1])
batch_labels.append(is_match)
# DEBUG
# cv2.namedWindow('query match={}'.format(is_match))
# cv2.imshow('query match={}'.format(is_match), input_pair[0])
# cv2.namedWindow('reference match={}'.format(is_match))
# cv2.imshow('reference match={}'.format(is_match), input_pair[1])
# cv2.waitKey()
# cv2.destroyAllWindows()
num_successful+=1
except cv2.error as cv2e:
print(cv2e)
# except Exception as e:
# print('There was some kind of exception...')
# print(e)
batch_query_inputs = np.array(batch_query_inputs)
batch_reference_inputs = np.array(batch_reference_inputs)
batch_labels = np.array(batch_labels)
yield [batch_query_inputs, batch_reference_inputs], batch_labels
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('imagedir', help='path to directory where training images are found')
args = parser.parse_args()
IMAGE_DIR = args.imagedir
INPUT_SHAPE = (320, 320, 3)
BATCH_SIZE = 32
NUM_ITERATIONS = 500
VAL_INTERVAL = 50
MODEL_NAME = 'siamese_experiment'
data_train = DataGenerator(IMAGE_DIR, INPUT_SHAPE)
data_val = DataGenerator(IMAGE_DIR, INPUT_SHAPE)
gen_train = data_train.generate_batch(batch_size=BATCH_SIZE)
gen_val = data_val.generate_batch(batch_size=BATCH_SIZE)
net = build_siamese_dense(input_shape=INPUT_SHAPE)
net.summary()
with open('{}.losshistory'.format(MODEL_NAME), 'wb') as f:
f.truncate()
with open('{}.acchistory'.format(MODEL_NAME), 'wb') as f:
f.truncate()
for iteration in range(NUM_ITERATIONS):
# do validation
if iteration % VAL_INTERVAL == 0:
print('============\nIteration: {}'.format(iteration))
batch_X, batch_y = gen_val.next()
metrics_val = net.evaluate(batch_X, batch_y, batch_size=BATCH_SIZE, verbose=1)
print('VALIDATION: Loss={}, Acc={}'.format(metrics_val[0], metrics_val[1]))
batch_X, batch_y = gen_train.next()
metrics_train = net.train_on_batch(batch_X, batch_y)
print('============\nIteration: {}'.format(iteration))
print('TRAIN: Loss={}, Acc={}'.format(metrics_train[0], metrics_train[1]))
print('============')
with open('{}.losshistory'.format(MODEL_NAME), 'a') as f:
f.write('{}\n'.format(metrics_train[0]))
with open('{}.acchistory'.format(MODEL_NAME), 'a') as f:
f.write('{}\n'.format(metrics_train[1]))
从keras.applications.resnet50导入resnet50
从keras.models导入模型
#从keras.utils.visualize\u util导入绘图
从keras.layers导入合并\
浓密的\
辍学\
输入\
全球平均池2D\
兰姆达\
批量标准化\
激活
从keras.layers.merge导入添加、乘法、连接
从keras.optimizers导入Adam、SGD、RMSprop
从keras.engine导入层
将keras.backend作为K导入
从keras导入正则化器
导入操作系统
随机输入
从PIL导入图像
将numpy作为np导入
进口cv2
def曼哈顿_距离(对):
返回K.sum(K.abs(对[0]-对[1]),轴=1,keepdims=True)
定义构建基础密度(输入形状):
输入张量=输入(形状=输入形状)
基本模型=ResNet50(权重='imagenet',包括顶部=假,输入张量=输入张量)
对于基本模型层中的层:
layer.trainable=错误
一三十齿分辨率=基本模型。获取图层(“激活49”)。输出
pooled=GlobalAveragePoolig2D()(一个三十齿的分辨率)
#稠密_1=稠密(1024,activation='relu',kernel_正则化子=正则化子.l2(0.01))(合并)
#稠密的_1=BatchNormalization()(稠密的_1)
#嵌入模型=模型(输入=输入张量,输出=密集张量1)
嵌入模型=模型(输入=输入张量,输出=合并)
返回嵌入模型
def build_暹罗密度(输入形状):
输入\查询=输入(形状=输入\形状)
输入\参考=输入(形状=输入\形状)
基本网络=基本密集(输入形状=输入形状)
嵌入查询=基本网络(输入查询)
嵌入\参考=基本\网络(输入\参考)
#dist=Lambda(曼哈顿距离)([嵌入查询,嵌入引用])
负嵌入引用=λ(λx:x*-1)(嵌入引用)
elementwise_dist=Add()([embed_query,negative_embed_reference])#每个暹罗腿的元素减法
#merged=Concatenate()([embed\u query,embed\u reference])
#分类=密集(2,激活='softmax')(距离)
分类=密集(1,激活=乙状结肠,使用偏差=假)(按元素划分)
#分类=密集(1,激活=乙状结肠,使用偏差=假)(合并)
模型=模型(输入=[输入\查询,输入\参考],输出=分类)
model.compile(
优化器=Adam(lr=0.0001,β1=0.9,β2=0.999,ε=1e-08,衰减=0.0),
#优化器=新加坡元(lr=0.001,动量=0.5),
#损失=‘分类的’
损失='binary_crossentropy',度量=['Accurance'])
回归模型
def预处理_cv2_批处理(图像,尺寸排序为默认值):
#images=images.astype(np.float64)
如果dim_排序==‘默认值’:
尺寸排序=K.图像尺寸排序()
在{tf',th'}中断言dim_顺序
如果dim_排序='th':
#需要转换轴来制作(批次、通道、高度、宽度)
打印('图像批处理以形状:{}到达。格式(str(images.shape)))
图像=np.转置(图像,(0,3,1,2))
打印('图像批处理轴已转换为形状:{},用于无暗序约定'。格式(
str(images.shape)))
##“RGB”->“BGR”
#x=x[:,:-1,:,:]
#平均像素零中心
图像[:,0,:,:]-=103.939
图像[:,1,:,:]-=116.779
图像[:,2,:,:]-=123.68
其他:
#“RGB”->“BGR”
#x=x[:,:,:,::-1]
##平均像素零中心
图像[:,:,:,0]-=103.939
图像[:,:,:,1]-=116.779
图像[:,:,:,2]=123.68
返回图像
类数据生成器(对象):
'''
类,用于迭代图像目录,动态创建训练对
'''
定义初始值(自、图像方向、输入形状、概率正=0.5):
self.input\u shape=input\u shape
self.image\u dir=image\u dir
self.prob_positive=prob_positive
self.image\u file\u list=[os.path.join(self.image\u dir,item)用于os.listdir(self.image\u dir)中的项]
断言len(self.image\u file\u list)>=2,“目录中至少需要2个图像才能进行匹配。”
def生成批处理(自身、批处理大小、调试=False):
尽管如此:
批处理查询输入=[]
批次\参考\输入=[]
批次标签=[]
num_successful=0
当num\u成功<批大小:
尝试:
#随机选择参考图像
输入对=np.0((2,self.input\u形状[0],self.input\u形状[1],self.input\u形状[2]),dtype=np.float32)
#在不替换的情况下对图像进行采样
允许的索引=范围(len(self.image文件列表))
随机图像索引=随机选择(允许的索引)
允许的索引.pop(随机图像索引)
随机图像路径=self.image文件列表[随机图像索引]
随机图像参考=cv2.imread(随机图像路径)
随机图像参考=cv2。调整大小(随机图像参考)