Neural network 为什么这个Keras暹罗图像匹配网络没有学到任何东西?

Neural network 为什么这个Keras暹罗图像匹配网络没有学到任何东西?,neural-network,computer-vision,deep-learning,keras,conv-neural-network,Neural Network,Computer Vision,Deep Learning,Keras,Conv Neural Network,对于第一个健全性检查,我正在尝试建立一个网络,该网络学习为相同的图像对输出1,为不相同的图像对输出0,希望它能很快适应 损失减少,但无论我怎么尝试,精度都会在0.5左右反弹 使用ResNet50作为共享的暹罗分支对,我将它们与元素减法合并,并将结果“差分层”馈送到单个乙状结肠单元中,如中所述。我还尝试了其他几种建议的变体;例如softmax输出、串联而非减法等 以下示例可以由任何人运行,前提是您提供一个目录的路径,该目录至少包含两个图像,以便作为命令行参数进行匹配 from keras.appl

对于第一个健全性检查,我正在尝试建立一个网络,该网络学习为相同的图像对输出
1
,为不相同的图像对输出
0
,希望它能很快适应

损失减少,但无论我怎么尝试,精度都会在0.5左右反弹

使用ResNet50作为共享的暹罗分支对,我将它们与元素减法合并,并将结果“差分层”馈送到单个乙状结肠单元中,如中所述。我还尝试了其他几种建议的变体;例如softmax输出、串联而非减法等

以下示例可以由任何人运行,前提是您提供一个目录的路径,该目录至少包含两个图像,以便作为命令行参数进行匹配

from keras.applications.resnet50 import ResNet50
from keras.models import Model
# from keras.utils.visualize_util import plot
from keras.layers import merge, \
    Dense, \
    Dropout, \
    Input, \
    GlobalAveragePooling2D, \
    Lambda, \
    BatchNormalization, \
    Activation
from keras.layers.merge import Add, Multiply, Concatenate
from keras.optimizers import Adam, SGD, RMSprop
from keras.engine import Layer
import keras.backend as K
from keras import regularizers
import os
import random
from PIL import Image
import numpy as np
import cv2


def manhattan_distance(pair):
   return K.sum(K.abs(pair[0]-pair[1]), axis=1, keepdims=True)

def _build_base_dense(input_shape):
    input_tensor = Input(shape=input_shape)

    base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=input_tensor)

    for layer in base_model.layers:
        layer.trainable = False

    one_thirty_tooth_resolution = base_model.get_layer('activation_49').output

    pooled = GlobalAveragePooling2D()(one_thirty_tooth_resolution)
    # dense_1 = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(pooled)
    # dense_1 = BatchNormalization()(dense_1)

    # embedding_model = Model(inputs=input_tensor, outputs=dense_1)
    embedding_model = Model(inputs=input_tensor, outputs=pooled)

    return embedding_model

def build_siamese_dense(input_shape):
    input_query = Input(shape=input_shape)
    input_reference = Input(shape=input_shape)

    base_network = _build_base_dense(input_shape=input_shape)

    embed_query = base_network(input_query)
    embed_reference = base_network(input_reference)

    # dist = Lambda(manhattan_distance)([embed_query, embed_reference])
    negative_embed_reference = Lambda(lambda x: x * -1)(embed_reference)
    elementwise_dist = Add()([embed_query, negative_embed_reference]) #elementwise subtraction of each siamese leg
    # merged = Concatenate()([embed_query, embed_reference])

    # classify = Dense(2, activation='softmax')(dist)
    classify = Dense(1, activation='sigmoid', use_bias=False)(elementwise_dist)
    # classify = Dense(1, activation='sigmoid', use_bias=False)(merged)

    model = Model(inputs=[input_query, input_reference], outputs=classify)

    model.compile(
        optimizer=Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0),
        # optimizer=SGD(lr=0.001, momentum=0.5),
        # loss='categorical_crossentropy', metrics=['accuracy'])
        loss='binary_crossentropy', metrics=['accuracy'])

    return model

def preprocess_cv2_batch(images, dim_ordering='default'):
    # images = images.astype(np.float64)
    if dim_ordering == 'default':
        dim_ordering = K.image_dim_ordering()
        assert dim_ordering in {'tf', 'th'}

        if dim_ordering == 'th':
            # need to transpose axes to make (batch, channels, height, width)
            print('Image batch arrived with shape: {}'.format(str(images.shape)))
            images = np.transpose(images, (0, 3, 1, 2))
            print('Image batch axes were transposed to shape: {} for THEANO dim-ordering convention'.format(
                str(images.shape)))
            # # 'RGB'->'BGR'
            # x = x[:, ::-1, :, :]
            # Zero-center by mean pixel
            images[:, 0, :, :] -= 103.939
            images[:, 1, :, :] -= 116.779
            images[:, 2, :, :] -= 123.68
        else:
            # 'RGB'->'BGR'
            # x = x[:, :, :, ::-1]
            # # Zero-center by mean pixel
            images[:, :, :, 0] -= 103.939
            images[:, :, :, 1] -= 116.779
            images[:, :, :, 2] -= 123.68
        return images

class DataGenerator(object):
    '''
    Class for iterating through a directory of images, creating training pairs on the fly
    '''
    def __init__(self, image_dir, input_shape, prob_positive=0.5):
        self.input_shape = input_shape
        self.image_dir = image_dir
        self.prob_positive = prob_positive
        self.image_file_list = [os.path.join(self.image_dir, item) for item in os.listdir(self.image_dir)]
        assert len(self.image_file_list) >= 2, 'You need at least 2 images in the dir to do matching.'

    def generate_batch(self, batch_size, debug=False):
        while True:
            batch_query_inputs = []
            batch_reference_inputs = []
            batch_labels = []
            num_successful = 0
            while num_successful < batch_size:
                try:
                    # randomly choose a reference image
                    input_pair = np.zeros((2, self.input_shape[0], self.input_shape[1], self.input_shape[2]), dtype=np.float32)
                    # sample an image without replacement
                    allowed_indices = range(len(self.image_file_list))
                    random_image_index = random.choice(allowed_indices)
                    allowed_indices.pop(random_image_index)
                    random_image_path = self.image_file_list[random_image_index]
                    random_image_reference = cv2.imread(random_image_path)
                    random_image_reference = cv2.resize(random_image_reference,(self.input_shape[1], self.input_shape[0]))
                    input_pair[1] = random_image_reference

                    # flip a coin to decide whether the training example is a match or not
                    if random.random() < self.prob_positive: # match
                        input_pair[0] = np.array(random_image_reference)
                        is_match = 1
                    else: # no match - choose a different image
                        random_image_index = random.choice(allowed_indices)
                        random_image_path = self.image_file_list[random_image_index]
                        random_image_query = cv2.imread(random_image_path)
                        random_image_query = cv2.resize(random_image_query,(self.input_shape[1], self.input_shape[0]))
                        input_pair[0] = random_image_query
                        is_match = 0

                    input_pair = preprocess_cv2_batch(input_pair)
                    batch_query_inputs.append(input_pair[0])
                    batch_reference_inputs.append(input_pair[1])
                    batch_labels.append(is_match)

                    # DEBUG
                    # cv2.namedWindow('query match={}'.format(is_match))
                    # cv2.imshow('query match={}'.format(is_match), input_pair[0])
                    # cv2.namedWindow('reference match={}'.format(is_match))
                    # cv2.imshow('reference match={}'.format(is_match), input_pair[1])
                    # cv2.waitKey()
                    # cv2.destroyAllWindows()

                    num_successful+=1
                except cv2.error as cv2e:
                    print(cv2e)
                # except Exception as e:
                #     print('There was some kind of exception...')
                #     print(e)
            batch_query_inputs = np.array(batch_query_inputs)
            batch_reference_inputs = np.array(batch_reference_inputs)
            batch_labels = np.array(batch_labels)

            yield [batch_query_inputs, batch_reference_inputs], batch_labels


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('imagedir', help='path to directory where training images are found')
    args = parser.parse_args()

    IMAGE_DIR = args.imagedir
    INPUT_SHAPE = (320, 320, 3)
    BATCH_SIZE = 32
    NUM_ITERATIONS = 500
    VAL_INTERVAL = 50
    MODEL_NAME = 'siamese_experiment'

    data_train = DataGenerator(IMAGE_DIR, INPUT_SHAPE)
    data_val = DataGenerator(IMAGE_DIR, INPUT_SHAPE)
    gen_train = data_train.generate_batch(batch_size=BATCH_SIZE)
    gen_val = data_val.generate_batch(batch_size=BATCH_SIZE)

    net = build_siamese_dense(input_shape=INPUT_SHAPE)
    net.summary()

    with open('{}.losshistory'.format(MODEL_NAME), 'wb') as f:
        f.truncate()
    with open('{}.acchistory'.format(MODEL_NAME), 'wb') as f:
        f.truncate()
    for iteration in range(NUM_ITERATIONS):
        # do validation
        if iteration % VAL_INTERVAL == 0:
            print('============\nIteration: {}'.format(iteration))
            batch_X, batch_y = gen_val.next()
            metrics_val = net.evaluate(batch_X, batch_y, batch_size=BATCH_SIZE, verbose=1)
            print('VALIDATION: Loss={}, Acc={}'.format(metrics_val[0], metrics_val[1]))

        batch_X, batch_y = gen_train.next()
        metrics_train = net.train_on_batch(batch_X, batch_y)

        print('============\nIteration: {}'.format(iteration))
        print('TRAIN: Loss={}, Acc={}'.format(metrics_train[0], metrics_train[1]))
        print('============')
        with open('{}.losshistory'.format(MODEL_NAME), 'a') as f:
            f.write('{}\n'.format(metrics_train[0]))
        with open('{}.acchistory'.format(MODEL_NAME), 'a') as f:
            f.write('{}\n'.format(metrics_train[1]))
从keras.applications.resnet50导入resnet50
从keras.models导入模型
#从keras.utils.visualize\u util导入绘图
从keras.layers导入合并\
浓密的\
辍学\
输入\
全球平均池2D\
兰姆达\
批量标准化\
激活
从keras.layers.merge导入添加、乘法、连接
从keras.optimizers导入Adam、SGD、RMSprop
从keras.engine导入层
将keras.backend作为K导入
从keras导入正则化器
导入操作系统
随机输入
从PIL导入图像
将numpy作为np导入
进口cv2
def曼哈顿_距离(对):
返回K.sum(K.abs(对[0]-对[1]),轴=1,keepdims=True)
定义构建基础密度(输入形状):
输入张量=输入(形状=输入形状)
基本模型=ResNet50(权重='imagenet',包括顶部=假,输入张量=输入张量)
对于基本模型层中的层:
layer.trainable=错误
一三十齿分辨率=基本模型。获取图层(“激活49”)。输出
pooled=GlobalAveragePoolig2D()(一个三十齿的分辨率)
#稠密_1=稠密(1024,activation='relu',kernel_正则化子=正则化子.l2(0.01))(合并)
#稠密的_1=BatchNormalization()(稠密的_1)
#嵌入模型=模型(输入=输入张量,输出=密集张量1)
嵌入模型=模型(输入=输入张量,输出=合并)
返回嵌入模型
def build_暹罗密度(输入形状):
输入\查询=输入(形状=输入\形状)
输入\参考=输入(形状=输入\形状)
基本网络=基本密集(输入形状=输入形状)
嵌入查询=基本网络(输入查询)
嵌入\参考=基本\网络(输入\参考)
#dist=Lambda(曼哈顿距离)([嵌入查询,嵌入引用])
负嵌入引用=λ(λx:x*-1)(嵌入引用)
elementwise_dist=Add()([embed_query,negative_embed_reference])#每个暹罗腿的元素减法
#merged=Concatenate()([embed\u query,embed\u reference])
#分类=密集(2,激活='softmax')(距离)
分类=密集(1,激活=乙状结肠,使用偏差=假)(按元素划分)
#分类=密集(1,激活=乙状结肠,使用偏差=假)(合并)
模型=模型(输入=[输入\查询,输入\参考],输出=分类)
model.compile(
优化器=Adam(lr=0.0001,β1=0.9,β2=0.999,ε=1e-08,衰减=0.0),
#优化器=新加坡元(lr=0.001,动量=0.5),
#损失=‘分类的’
损失='binary_crossentropy',度量=['Accurance'])
回归模型
def预处理_cv2_批处理(图像,尺寸排序为默认值):
#images=images.astype(np.float64)
如果dim_排序==‘默认值’:
尺寸排序=K.图像尺寸排序()
在{tf',th'}中断言dim_顺序
如果dim_排序='th':
#需要转换轴来制作(批次、通道、高度、宽度)
打印('图像批处理以形状:{}到达。格式(str(images.shape)))
图像=np.转置(图像,(0,3,1,2))
打印('图像批处理轴已转换为形状:{},用于无暗序约定'。格式(
str(images.shape)))
##“RGB”->“BGR”
#x=x[:,:-1,:,:]
#平均像素零中心
图像[:,0,:,:]-=103.939
图像[:,1,:,:]-=116.779
图像[:,2,:,:]-=123.68
其他:
#“RGB”->“BGR”
#x=x[:,:,:,::-1]
##平均像素零中心
图像[:,:,:,0]-=103.939
图像[:,:,:,1]-=116.779
图像[:,:,:,2]=123.68
返回图像
类数据生成器(对象):
'''
类,用于迭代图像目录,动态创建训练对
'''
定义初始值(自、图像方向、输入形状、概率正=0.5):
self.input\u shape=input\u shape
self.image\u dir=image\u dir
self.prob_positive=prob_positive
self.image\u file\u list=[os.path.join(self.image\u dir,item)用于os.listdir(self.image\u dir)中的项]
断言len(self.image\u file\u list)>=2,“目录中至少需要2个图像才能进行匹配。”
def生成批处理(自身、批处理大小、调试=False):
尽管如此:
批处理查询输入=[]
批次\参考\输入=[]
批次标签=[]
num_successful=0
当num\u成功<批大小:
尝试:
#随机选择参考图像
输入对=np.0((2,self.input\u形状[0],self.input\u形状[1],self.input\u形状[2]),dtype=np.float32)
#在不替换的情况下对图像进行采样
允许的索引=范围(len(self.image文件列表))
随机图像索引=随机选择(允许的索引)
允许的索引.pop(随机图像索引)
随机图像路径=self.image文件列表[随机图像索引]
随机图像参考=cv2.imread(随机图像路径)
随机图像参考=cv2。调整大小(随机图像参考)