Tensorflow 无法同时检测多个面_Tensorflow_Face Recognition_Face

Tensorflow 无法同时检测多个面

tensorflow

Tensorflow 无法同时检测多个面,tensorflow,face-recognition,face,Tensorflow,Face Recognition,Face,由于某些原因，我无法同时检测多张脸。它一次只能检测一张脸。我如何解决这个问题？我在下面添加了代码。我使用谷歌的facenet进行实时人脸识别在视频输出中，它一次仅在一个面上创建边界框。但在控制台输出中，可以计算出存在的面数为两个或两个以上 from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow a

由于某些原因，我无法同时检测多张脸。它一次只能检测一张脸。我如何解决这个问题？我在下面添加了代码。我使用谷歌的facenet进行实时人脸识别

在视频输出中，它一次仅在一个面上创建边界框。但在控制台输出中，可以计算出存在的面数为两个或两个以上

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from scipy import misc
import cv2
import matplotlib.pyplot as plt
import numpy as np
import argparse
import facenet
import detect_face
import os
from os.path import join as pjoin
import sys
import time
import copy
import math
import pickle
from sklearn.svm import SVC
from sklearn.externals import joblib

#addded
#import reload
#reload(sys)
#sys.setdefaultencoding('utf8')

print('Creating networks and loading parameters')
with tf.Graph().as_default():
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, 
 log_device_placement=False))
    with sess.as_default():
    pnet, rnet, onet = detect_face.create_mtcnn(sess, './') #face detection

    minsize = 20  # minimum size of face                    #minsize, threshold, factor used for detection
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor
    margin = 44
    frame_interval = 3
    batch_size = 1000
    image_size = 182
    input_image_size = 160


    items = os.listdir("/Aryabhatta Robotics Internship/facenet-master/Real_time_face/ids/aligned")
    #HumanNames = []
    #for names in items:
        #HumanNames.append(names)
    #print(HumanNames)
    #HumanNames = ['Alok','Siddhant','tesra','s01','s02','s03','s04','s05','s06','s07','s08','s09','s10','s11','s12','s13','s14','s15','s16','s17','s18','s19','s20']    #train human name, known face names

    print('Loading feature extraction model')
    modeldir = '/Aryabhatta Robotics Internship/facenet-master/Real_time_face/models/20180402-114759/20180402-114759.pb' #feature extraction mmodel
    facenet.load_model(modeldir)

    images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
    embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
    phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
    embedding_size = embeddings.get_shape()[1]

    classifier_filename = '/Aryabhatta Robotics Internship/facenet-master/Real_time_face/models/my_classifier/my_classifier.pkl' #out own classifier
    classifier_filename_exp = os.path.expanduser(classifier_filename)
    with open(classifier_filename_exp, 'rb') as infile:
        (model, class_names) = pickle.load(infile)#, encoding='latin1')
        print('load classifier file-> %s' % classifier_filename_exp)

    video_capture = cv2.VideoCapture(0)
    c = 0

    # #video writer
    # fourcc = cv2.VideoWriter_fourcc(*'DIVX')
    # out = cv2.VideoWriter('3F_0726.avi', fourcc, fps=30, frameSize=(640,480))

    print('Start Recognition!')
    prevTime = 0
    while True: #infinite loop
        ret, frame = video_capture.read() #video capture from webcam

        frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)

        curTime = time.time()    # calc fps
        timeF = frame_interval

        if (c % timeF == 0):
            find_results = []

            if frame.ndim == 2:
                frame = facenet.to_rgb(frame)
            frame = frame[:, :, 0:3]
            bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
            nrof_faces = bounding_boxes.shape[0]
            print('Detected_FaceNum: %d' % nrof_faces)

            if nrof_faces > 0:
                det = bounding_boxes[:, 0:4]
                img_size = np.asarray(frame.shape)[0:2]

                cropped = []
                scaled = []
                scaled_reshape = []
                bb = np.zeros((nrof_faces,4), dtype=np.int32)

                for i in range(nrof_faces):
                    print("faceno:" + str(i))
                    emb_array = np.zeros((1, embedding_size))

                    bb[i][0] = det[i][0]
                    bb[i][1] = det[i][1]
                    bb[i][2] = det[i][2]
                    bb[i][3] = det[i][3]

                    # inner exception
                    if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
                        print('face is inner of range!')
                        continue

                    cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                    cropped[0] = facenet.flip(cropped[0], False)
                    scaled.append(misc.imresize(cropped[0], (image_size, image_size), interp='bilinear'))
                    scaled[0] = cv2.resize(scaled[0], (input_image_size,input_image_size),
                                           interpolation=cv2.INTER_CUBIC)
                    scaled[0] = facenet.prewhiten(scaled[0])
                    scaled_reshape.append(scaled[0].reshape(-1,input_image_size,input_image_size,3))
                    feed_dict = {images_placeholder: scaled_reshape[0], phase_train_placeholder: False}
                    emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                    #print(emb_array)


                    threshold_accuracy = 155
                    predictions = model.predict_proba(emb_array)
                    #print(predictions)

                    for i in range(len(predictions[0])):
                        predictions[0][i] = np.exp(18*predictions[0][i])
                        #print(predictions)

                    best_class_indices = np.argmax(predictions, axis=1)
                    print(best_class_indices)
                    print("next")
                    best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
                    print(best_class_probabilities)
                    for i in range(len(best_class_indices)):
                        print('%4d  %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i]))


                    cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face

                        #plot result idx under box
                    text_x = bb[i][0]
                    text_y = bb[i][3] + 20
                    # print('result: ', best_class_indices[0])

                    if best_class_probabilities[i] > threshold_accuracy :
                                    #result_names = HumanNames[best_class_indices[0]]
                        cv2.putText(frame, class_names[best_class_indices[i]], (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                    1, (0, 0, 255), thickness=1, lineType=2)
                    else:
                        cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                    1, (0, 0, 255), thickness=1, lineType=2)

                    #for H_i in HumanNames:
                        #if HumanNames[best_class_indices[0]] == H_i and best_class_probabilities[0] > threshold_accuracy :
                            #flag = 1
                            #result_names = HumanNames[best_class_indices[0]]
                            #cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                        #1, (0, 0, 255), thickness=1, lineType=2)


                        #else:
                            #cv2.putText(frame, 'Unknown', (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
                                       # 1, (0, 0, 255), thickness=1, lineType=2)
            else:
                print('Unable to align')

        sec = curTime - prevTime
        prevTime = curTime
        fps = 1 / (sec)
        str1 = 'FPS: %2.3f' % fps
        text_fps_x = len(frame[0]) - 150
        text_fps_y = 20
        cv2.putText(frame, str1, (text_fps_x, text_fps_y),
                        cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2)
        # c+=1
        cv2.imshow('Video', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video_capture.release()
    # #video writer
    # out.release()
    cv2.destroyAllWindows()

来自未来导入绝对导入
来自未来进口部
来自未来导入打印功能
导入tensorflow作为tf
从scipy导入杂项
进口cv2
将matplotlib.pyplot作为plt导入
将numpy作为np导入
导入argparse
导入facenet
导入检测人脸
导入操作系统
从os.path导入join作为pjoin
导入系统
导入时间
导入副本
输入数学
进口泡菜
从sklearn.svm导入SVC
从sklearn.externals导入作业库
#添加
#导入重新加载
#重新加载（系统）
#sys.setdefaultencoding（'utf8'）
打印（'创建网络和加载参数'）
使用tf.Graph（）作为默认值（）：
gpu\U选项=tf.gpu选项（每个进程\u gpu\u内存\u分数=0.6）
sess=tf.Session（config=tf.ConfigProto（gpu\U选项=gpu\U选项，
日志（设备位置=假））
使用sess.as_default（）：
pnet，rnet，onet=检测面部。创建面部检测
minsize=20#面部最小尺寸#minsize，阈值，用于检测的因子
阈值=[0.6,0.7,0.7]#三步阈值
系数=0.709#比例系数
保证金=44
帧间隔=3
批量大小=1000
图像大小=182
输入图像大小=160
items=os.listdir（“/Aryabhatta机器人实习/facenet主机/Real\u time\u face/ids/aligned”）
#人名=[]
#对于项目中的名称：
#人名。附加（人名）
#打印（人名）
#人名=['Alok'、'Siddhant'、'tesra'、's01'、's02'、's03'、's04'、's05'、's06'、's07'、's08'、's09'、's10'、's11'、's12'、's13'、's15'、's16'、's17'、's18'、's19'、's20']；训练人名、已知的脸名
打印（'加载特征提取模型'）
modeldir='/Aryabhatta机器人技术实习/facenet master/Real_time_face/models/20180402-114759/20180402-114759.pb'#特征提取mmodel
facenet.load_模型（modeldir）
images\u placeholder=tf.get\u default\u graph（）.get\u tensor\u by\u name（“输入：0”）
embeddings=tf.get_default_graph（）.get_tensor_by_name（“embeddings:0”）
phase\u train\u placeholder=tf.get\u default\u graph（）.get\u tensor\u by\u name（“phase\u train:0”）
嵌入大小=嵌入。获取形状（）[1]
classifier_filename='/Aryabhatta Robotics实习生/facenet master/Real_time_face/models/my_classifier/my_classifier.pkl'#我们自己的分类器
分类器\u文件名\u exp=os.path.expanduser（分类器\u文件名）
以open（分类器\文件名\ exp，'rb'）作为填充：
（模型，类名）=pickle.load（infle）#，编码='latin1'）
打印（'加载分类器文件->%s'%classifier\u filename\u exp）
视频捕获=cv2。视频捕获（0）
c=0
##录像机
#fourcc=cv2.VideoWriter\u fourcc（*“DIVX”）
#out=cv2.VideoWriter（'3F_0726.avi'，fourcc，fps=30，frameSize=（640480））
打印（'开始识别！'）
时间=0
如果为True:#无限循环
ret，frame=video_capture.read（）#从网络摄像头捕获视频
frame=cv2.调整大小（frame，（0,0），fx=0.5，fy=0.5）#调整框架大小（可选）
curTime=time.time（）#计算fps
timeF=帧间隔
如果（c%timeF==0）：
查找结果=[]
如果frame.ndim==2：
frame=facenet.to_rgb（frame）
帧=帧[：，：，0:3]
边界框，检测面。检测面（帧、最小尺寸、pnet、rnet、onet、阈值、因子）
nrof_面=边界_框。形状[0]
打印（'检测到的\u面数：%d'%n个\u面）
如果nrof_面>0：
det=边界框[：，0:4]
img_size=np.asarray（frame.shape）[0:2]
裁剪=[]
缩放=[]
缩放的形状=[]
bb=np.zero（（nrof_面，4），dtype=np.int32）
对于范围内的i（nrof_面）：
打印（“面号：+str（i））
emb_数组=np.0（（1，嵌入_大小））
bb[i][0]=det[i][0]
bb[i][1]=det[i][1]
bb[i][2]=det[i][2]
bb[i][3]=det[i][3]
#内部异常
如果bb[i][0]=len（帧）：
打印（'面在范围内！'）
持续
剪切。追加（帧[bb[i][1]：bb[i][3]，bb[i][0]：bb[i][2]，：）
裁剪[0]=facenet.flip（裁剪[0]，False）
scaled.append（杂项imresize（裁剪[0]，（图像大小，图像大小），interp='双线性'））
缩放[0]=cv2.调整大小（缩放[0]，（输入图像大小，输入图像大小），
插值=cv2.INTER_立方）
已缩放[0]=facenet.prewhiten（已缩放[0]）
缩放图像整形。追加（缩放[0]。整形（-1，输入图像大小，输入图像大小，3））
feed_dict={images_占位符：缩放_重塑[0]，phase_train_占位符：False}
emb_数组[0，：]=sess.run（嵌入，feed_dict=feed_dict）
#打印（emb_阵列）
阈值_精度=155
预测=模型。预测概率（emb\U数组）
#打印（预测）
对于范围内的i（len（预测[0]）：
预测[0][i]=np.exp（18*预测[0][i]）
#打印（预测）
最佳类指数=np.argmax（预测，轴=1）
打印（最佳类索引）
打印（“下一页”）
最佳类概率=预测_
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from scipy import misc
from skimage.transform import resize
import cv2
import numpy as np
import facenet
import detect_face
import os
import time
import pickle
import sys

img_path='download.jpeg'
modeldir = './model/20170511-185253.pb'
classifier_filename = './class/classifier.pkl'
npy='./npy'
train_img="./train_img"

with tf.Graph().as_default():
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
    with sess.as_default():
        pnet, rnet, onet = detect_face.create_mtcnn(sess, npy)

        minsize = 10  # minimum size of face
        threshold = [0.6, 0.7, 0.7]  # three steps's threshold
        factor = 1   # scale factor
        margin = 44
        frame_interval = 3
        batch_size = 1000
        image_size = 182
        input_image_size = 160
        
        HumanNames = os.listdir(train_img)
        HumanNames.sort()

        print('Loading feature extraction model')
        facenet.load_model(modeldir)

        images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
        embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
        phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
        embedding_size = embeddings.get_shape()[1]


        classifier_filename_exp = os.path.expanduser(classifier_filename)
        with open(classifier_filename_exp, 'rb') as infile:
            (model, class_names) = pickle.load(infile)

        # video_capture = cv2.VideoCapture("akshay_mov.mp4")
        c = 0


        print('Start Recognition!')
        prevTime = 0
        # ret, frame = video_capture.read()
        frame = cv2.imread(img_path,0)

        frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)

        curTime = time.time()+1    # calc fps
        timeF = frame_interval

        if (c % timeF == 0):
            find_results = []

            if frame.ndim == 2:
                frame = facenet.to_rgb(frame)
            frame = frame[:, :, 0:3]
            print(1)
            bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
            nrof_faces = bounding_boxes.shape[0]
            print('Face Detected: %d' % nrof_faces)

            if nrof_faces > 0:
                det = bounding_boxes[:, 0:4]
                img_size = np.asarray(frame.shape)[0:2]

                cropped = []
                scaled = []
                scaled_reshape = []
                bb = np.zeros((nrof_faces,4), dtype=np.int32)

                for i in range(nrof_faces):
                    emb_array = np.zeros((1, embedding_size))

                    bb[i][0] = det[i][0]
                    bb[i][1] = det[i][1]
                    bb[i][2] = det[i][2]
                    bb[i][3] = det[i][3]

                    # inner exception
                    if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
                        print('face is too close')
                        continue

                    cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
                    cropped[i] = facenet.flip(cropped[i], False)
                    scaled.append(resize(cropped[i], (image_size, image_size), anti_aliasing=True))
                    scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size),
                                           interpolation=cv2.INTER_CUBIC)
                    scaled[i] = facenet.prewhiten(scaled[i])
                    scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3))
                    feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False}
                    emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
                    predictions = model.predict_proba(emb_array)
                    print(predictions)
                    best_class_indices = np.argmax(predictions, axis=1)
                    # print(best_class_indices)
                    best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
                    print(best_class_probabilities)
                    cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)    #boxing face

                    #plot result idx under box
                    text_x = bb[i][0]
                    text_y = bb[i][3] + 20
                    print('Result Indices: ', best_class_indices[0])
                    print(HumanNames)
                    for H_i in HumanNames:
                        # print(H_i)
                        if HumanNames[best_class_indices[0]] == H_i:
                            result_names = HumanNames[best_class_indices[0]]
                            
            else:
                print('Unable to align')
        cv2.imshow('Image', frame)

        if cv2.waitKey(100) & 0xFF == ord('q'):
            sys.exit("Thanks")
        cv2.destroyAllWindows()