Python 不正确的facenet识别

Python 不正确的facenet识别,python,opencv,face-detection,face-recognition,Python,Opencv,Face Detection,Face Recognition,我一直在开发一个人脸识别考勤管理系统。我从头开始构建了管道,但最终,脚本在10个类中识别出了错误的面孔。 我已经使用Tensorflow和Python实现了以下管道 使用dlib的shape predictor捕获图像、调整大小、对齐它们,并将它们存储在命名文件夹中,以便在执行识别时进行比较 将图像Pickle到data.Pickle文件中,以便以后反序列化 使用OpenCV实现MTCNN算法检测网络摄像头捕获的帧中的人脸 将这些帧传递到facenet网络以创建128-D嵌入,并相应地与pick

我一直在开发一个人脸识别考勤管理系统。我从头开始构建了管道,但最终,脚本在10个类中识别出了错误的面孔。 我已经使用Tensorflow和Python实现了以下管道

  • 使用dlib的shape predictor捕获图像、调整大小、对齐它们,并将它们存储在命名文件夹中,以便在执行识别时进行比较
  • 将图像Pickle到
    data.Pickle
    文件中,以便以后反序列化

  • 使用OpenCV实现MTCNN算法检测网络摄像头捕获的帧中的人脸

  • 将这些帧传递到facenet网络以创建128-D嵌入,并相应地与pickle数据库中的嵌入进行比较
  • 以下是运行步骤3和4的主文件:

    from keras import backend as K
    import time
    from multiprocessing.dummy import Pool
    K.set_image_data_format('channels_first')
    import cv2
    import os
    import glob
    import numpy as np
    from numpy import genfromtxt
    import tensorflow as tf
    from keras.models import load_model
    from fr_utils import *
    from inception_blocks_v2 import *
    from mtcnn.mtcnn import MTCNN
    import dlib
    from imutils import face_utils
    import imutils
    import pickle
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.model_selection import train_test_split
    
    
    FRmodel = load_model('face-rec_Google.h5')
    # detector = dlib.get_frontal_face_detector()
    detector = MTCNN()
    # FRmodel = faceRecoModel(input_shape=(3, 96, 96))
    #
    # # detector = dlib.get_frontal_face_detector()
    # # predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
    # def triplet_loss(y_true, y_pred, alpha = 0.3):
    #     """
    #     Implementation of the triplet loss as defined by formula (3)
    #
    #     Arguments:
    #     y_pred -- python list containing three objects:
    #             anchor -- the encodings for the anchor images, of shape (None, 128)
    #             positive -- the encodings for the positive images, of shape (None, 128)
    #             negative -- the encodings for the negative images, of shape (None, 128)
    #
    #     Returns:
    #     loss -- real number, value of the loss
    #     """
    #
    #     anchor, positive, negative = y_pred[0], y_pred[1], y_pred[2]
    #
    #     pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), axis=-1)
    #     neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), axis=-1)
    #     basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha)
    #     loss = tf.reduce_sum(tf.maximum(basic_loss, 0.0))
    #
    #     return loss
    #
    # FRmodel.compile(optimizer = 'adam', loss = triplet_loss, metrics = ['accuracy'])
    # load_weights_from_FaceNet(FRmodel)
    def ret_model():
        return FRmodel
    
    def prepare_database():
        pickle_in = open("data.pickle","rb")
        database =  pickle.load(pickle_in)
        return database
    
    def unpickle_something(pickle_file):
        pickle_in = open(pickle_file,"rb")
        unpickled_file =  pickle.load(pickle_in)
        return unpickled_file
    
    
    def webcam_face_recognizer(database):
    
        cv2.namedWindow("preview")
        vc = cv2.VideoCapture(0)
    
        while vc.isOpened():
            ret, frame = vc.read()
            img_rgb = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
            img = frame
            # We do not want to detect a new identity while the program is in the process of identifying another person
            img = process_frame(img,img)
    
            cv2.imshow("Preview", img)
            cv2.waitKey(1)
    
        vc.release()
    
    def process_frame(img, frame):
        """
        Determine whether the current frame contains the faces of people from our database
        """
        # rects = detector(img)
        rects = detector.detect_faces(img)
        # Loop through all the faces detected and determine whether or not they are in the database
        identities = []
        for (i,rect) in enumerate(rects):
            (x,y,w,h) = rect['box'][0],rect['box'][1],rect['box'][2],rect['box'][3]
            img = cv2.rectangle(frame,(x, y),(x+w, y+h),(255,0,0),2)
    
            identity = find_identity(frame, x-50, y-50, x+w+50, y+h+50)
            cv2.putText(img, identity,(10,500), cv2.FONT_HERSHEY_SIMPLEX , 4,(255,255,255),2,cv2.LINE_AA)
    
            if identity is not None:
                identities.append(identity)
    
        if identities != []:
            cv2.imwrite('example.png',img)
    
        return img
    
    def find_identity(frame, x,y,w,h):
        """
        Determine whether the face contained within the bounding box exists in our database
    
        x1,y1_____________
        |                 |
        |                 |
        |_________________x2,y2
    
        """
        height, width, channels = frame.shape
        # The padding is necessary since the OpenCV face detector creates the bounding box around the face and not the head
        part_image = frame[y:y+h, x:x+w]
    
        return who_is_it(part_image, database, FRmodel)
    
    def who_is_it(image, database, model):
    
        encoding = img_to_encoding(image, model)
    
    
        min_dist = 100
        # Loop over the database dictionary's names and encodings.
        for (name, db_enc) in database.items():
    
            # Compute L2 distance between the target "encoding" and the current "emb" from the database.
            dist = np.linalg.norm(db_enc.flatten() - encoding.flatten())
    
            print('distance for %s is %s' %(name, dist))
    
            # If this distance is less than the min_dist, then set min_dist to dist, and identity to name
            if dist < min_dist:
                min_dist = dist
                identity = name
    
            if min_dist >0.1:
                print('Unknown person')
            else:
                print(identity)
        return identity
    
    
    if __name__ == "__main__":
        database = prepare_database()
        webcam_face_recognizer(database)
    
    从keras导入后端为K
    导入时间
    来自multiprocessing.dummy导入池
    K.设置图像数据格式(“通道优先”)
    进口cv2
    导入操作系统
    导入glob
    将numpy作为np导入
    从numpy导入genfromtxt
    导入tensorflow作为tf
    从keras.models导入负载_模型
    从fr_utils导入*
    从开始\u块\u v2导入*
    从mtcnn.mtcnn导入mtcnn
    导入dlib
    从imutils导入面\u utils
    导入imutils
    进口泡菜
    从sklearn.neighbors导入KNeighborsClassifier
    从sklearn.model\u选择导入列车\u测试\u拆分
    FRmodel=load_model('face-rec_Google.h5'))
    #探测器=dlib.获取正面探测器()
    检测器=MTCNN()
    #FRmodel=faceRecoModel(输入_形状=(3,96,96))
    #
    ##探测器=dlib.获取_正面_面部_探测器()
    ##predictor=dlib.shape_predictor(“shape_predictor_68_face_landmarks.dat”)
    #def三重态损耗(y_真,y_pred,α=0.3):
    #     """
    #实现公式(3)定义的三重态损耗
    #
    #论据:
    #y_pred——包含三个对象的python列表:
    #锚定——锚定图像的编码,形状(无,128)
    #正片——正片图像的编码,形状(无,128)
    #负片——负片图像的编码,形状(无,128)
    #
    #返回:
    #损失——实数,损失值
    #     """
    #
    #锚定,正,负=y_pred[0],y_pred[1],y_pred[2]
    #
    #pos_dist=tf.reduce_sum(tf.square(tf.subtract(锚定,正)),轴=-1)
    #负距离=tf.减和(tf.平方(tf.减(锚定,负)),轴=-1)
    #基本损耗=tf.加(tf.减(正差,负差),α)
    #损失=tf.减少总和(tf.最大值(基本损失,0.0))
    #
    #回波损耗
    #
    #compile(优化器='adam',loss=triplet\u loss,metrics=['accurity'])
    #从面网(FRmodel)加载重量
    def ret_模型():
    回归模型
    def prepare_数据库():
    pickle_in=open(“data.pickle”、“rb”)
    数据库=pickle.load(pickle\u in)
    返回数据库
    def unpickle_某物(pickle_文件):
    pickle\u in=open(pickle\u文件,“rb”)
    unpickled_file=pickle.load(pickle_in)
    返回未勾选的_文件
    def网络摄像头面部识别器(数据库):
    cv2.namedWindow(“预览”)
    vc=cv2.视频捕获(0)
    而vc.isopend():
    ret,frame=vc.read()
    img_rgb=cv2.cvt颜色(帧,cv2.COLOR_BGR2RGB)
    img=帧
    #我们不希望在程序识别另一个人的过程中检测到新身份
    img=过程框架(img,img)
    cv2.imshow(“预览”,img)
    cv2.等待键(1)
    vc.release()
    def过程框架(img,框架):
    """
    确定当前帧是否包含数据库中的人脸
    """
    #rects=探测器(img)
    rects=检测器。检测面(img)
    #循环遍历所有检测到的面,并确定它们是否在数据库中
    身份=[]
    对于枚举(rects)中的(i,rect):
    (x,y,w,h)=rect['box'][0],rect['box'][1],rect['box'][2],rect['box'][3]
    img=cv2.矩形(框架,(x,y),(x+w,y+h),(255,0,0),2)
    标识=查找标识(帧,x-50,y-50,x+w+50,y+h+50)
    cv2.putText(img,identity,(10500),cv2.FONT\u HERSHEY\u SIMPLEX,4,(255255255),2,cv2.LINE\u AA)
    如果标识不是无:
    identifies.append(identity)
    如果身份!=[]:
    cv2.imwrite('example.png',img)
    返回img
    def查找标识(帧,x,y,w,h):
    """
    确定边界框中包含的面是否存在于数据库中
    x1,y1_____________
    |                 |
    |                 |
    |_________________x2,y2
    """
    高度、宽度、通道=frame.shape
    #填充是必要的,因为OpenCV人脸检测器会围绕人脸而不是头部创建边界框
    部分图像=帧[y:y+h,x:x+w]
    返回谁是它(部分图像、数据库、模型)
    定义谁是它(图像、数据库、模型):
    编码=img_到_编码(图像、模型)
    最小距离=100
    #循环数据库字典的名称和编码。
    对于数据库中的(名称,db_enc)。项()
    #计算目标“编码”和数据库中当前“emb”之间的L2距离。
    dist=np.linalg.norm(db_enc.flatte()-encoding.flatte())
    打印(“%s”的距离为“%s%”(名称,距离))
    #如果此距离小于“最小距离”,则将“最小距离”设置为“距离”,将“标识”设置为“名称”
    如果距离小于最小距离:
    最小距离=距离
    标识=名称
    如果最小距离>0.1:
    打印('未知人员')
    其他:
    印刷品(身份)
    返回标识
    如果名称=“\uuuuu main\uuuuuuuu”:
    数据库=准备_数据库()
    网络摄像头面部识别器(数据库)
    
    我做错了什么? 这里的FRmodel是经过facenet培训的模型

    几点:

    • 我没有看到输入到网络中的人脸图像的大小调整、对齐和增白

    • 不能向可变大小的面添加50的固定边距。必须进行缩放,以使面区域填充