Python 正确的面罩检测

Python 正确的面罩检测,python,tensorflow,opencv,machine-learning,keras,Python,Tensorflow,Opencv,Machine Learning,Keras,所以我一直在尝试使用python脚本来检测人脸上的面具。有很多回购协议都有相同的代码,但它们只是检测一个人是否戴着面具。 我希望我的脚本也能检测面具是否正确佩戴,例如戴了面具但没有遮住嘴,或者说戴了面具但没有遮住鼻子。 下面是我用于培训数据的代码- # import the necessary packages from tensorflow.keras.preprocessing.image import ImageDataGenerator from tensorflow.keras.app

所以我一直在尝试使用python脚本来检测人脸上的面具。有很多回购协议都有相同的代码,但它们只是检测一个人是否戴着面具。 我希望我的脚本也能检测面具是否正确佩戴,例如戴了面具但没有遮住嘴,或者说戴了面具但没有遮住鼻子。 下面是我用于培训数据的代码-

# import the necessary packages
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import os

# initialize the initial learning rate, number of epochs to train for,
# and batch size
INIT_LR = 1e-4
EPOCHS = 20
BS = 32

DIRECTORY = r"D:\mask\dataset"
CATEGORIES = ["with_mask", "without_mask"]

# grab the list of images in our dataset directory, then initialize
# the list of data (i.e., images) and class images
print("[INFO] loading images...")

data = []
labels = []

for category in CATEGORIES:
    path = os.path.join(DIRECTORY, category)
    for img in os.listdir(path):
        img_path = os.path.join(path, img)
        image = load_img(img_path, target_size=(224, 224))
        image = img_to_array(image)
        image = preprocess_input(image)

        data.append(image)
        labels.append(category)

# perform one-hot encoding on the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
labels = to_categorical(labels)

data = np.array(data, dtype="float32")
labels = np.array(labels)

(trainX, testX, trainY, testY) = train_test_split(data, labels,
    test_size=0.20, stratify=labels, random_state=42)

# construct the training image generator for data augmentation
aug = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest")

# load the MobileNetV2 network, ensuring the head FC layer sets are
# left off
baseModel = MobileNetV2(weights="imagenet", include_top=False,
    input_tensor=Input(shape=(224, 224, 3)))

# construct the head of the model that will be placed on top of the
# the base model
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(128, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(2, activation="softmax")(headModel)

# place the head FC model on top of the base model (this will become
# the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)

# loop over all layers in the base model and freeze them so they will
# *not* be updated during the first training process
for layer in baseModel.layers:
    layer.trainable = False

# compile our model
print("[INFO] compiling model...")
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,
    metrics=["accuracy"])

# train the head of the network
print("[INFO] training head...")
H = model.fit(
    aug.flow(trainX, trainY, batch_size=BS),
    steps_per_epoch=len(trainX) // BS,
    validation_data=(testX, testY),
    validation_steps=len(testX) // BS,
    epochs=EPOCHS)

# make predictions on the testing set
print("[INFO] evaluating network...")
predIdxs = model.predict(testX, batch_size=BS)

# for each image in the testing set we need to find the index of the
# label with corresponding largest predicted probability
predIdxs = np.argmax(predIdxs, axis=1)

# show a nicely formatted classification report
print(classification_report(testY.argmax(axis=1), predIdxs,
    target_names=lb.classes_))

# serialize the model to disk
print("[INFO] saving mask detector model...")
model.save("mask_detector.model", save_format="h5")

# plot the training loss and accuracy
N = EPOCHS
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
plt.savefig("plot.png")
下面是检测面罩的代码-

from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from imutils.video import VideoStream
import numpy as np
import imutils
import time
import cv2
import os
def detect_and_predict_mask(frame, faceNet, maskNet):
# grab the dimensions of the frame and then construct a blob
# from it
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 1.0, (224, 224),
    (104.0, 177.0, 123.0))

# pass the blob through the network and obtain the face detections
faceNet.setInput(blob)
detections = faceNet.forward()
print(detections.shape)

# initialize our list of faces, their corresponding locations,
# and the list of predictions from our face mask network
faces = []
locs = []
preds = []

# loop over the detections
for i in range(0, detections.shape[2]):
    # extract the confidence (i.e., probability) associated with
    # the detection
    confidence = detections[0, 0, i, 2]

    # filter out weak detections by ensuring the confidence is
    # greater than the minimum confidence
    if confidence > 0.5:
        # compute the (x, y)-coordinates of the bounding box for
        # the object
        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
        (startX, startY, endX, endY) = box.astype("int")

        # ensure the bounding boxes fall within the dimensions of
        # the frame
        (startX, startY) = (max(0, startX), max(0, startY))
        (endX, endY) = (min(w - 1, endX), min(h - 1, endY))

        # extract the face ROI, convert it from BGR to RGB channel
        # ordering, resize it to 224x224, and preprocess it
        face = frame[startY:endY, startX:endX]
        face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
        face = cv2.resize(face, (224, 224))
        face = img_to_array(face)
        face = preprocess_input(face)

        # add the face and bounding boxes to their respective
        # lists
        faces.append(face)
        locs.append((startX, startY, endX, endY))

# only make a predictions if at least one face was detected
if len(faces) > 0:
    # for faster inference we'll make batch predictions on *all*
    # faces at the same time rather than one-by-one predictions
    # in the above `for` loop
    faces = np.array(faces, dtype="float32")
    preds = maskNet.predict(faces, batch_size=32)

# return a 2-tuple of the face locations and their corresponding
# locations
return (locs, preds)

# load our serialized face detector model from disk
prototxtPath = r"face_detector\deploy.prototxt"
weightsPath = r"face_detector\res10_300x300_ssd_iter_140000.caffemodel"
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)

# load the face mask detector model from disk
maskNet = load_model("mask_detector.model")

# initialize the video stream
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()

# loop over the frames from the video stream
while True:
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 400 pixels
 frame = vs.read()
 frame = imutils.resize(frame, width=400)

# detect faces in the frame and determine if they are wearing a
# face mask or not
(locs, preds) = detect_and_predict_mask(frame, faceNet, maskNet)

# loop over the detected face locations and their corresponding
# locations
for (box, pred) in zip(locs, preds):
    # unpack the bounding box and predictions
    (startX, startY, endX, endY) = box
    (mask, withoutMask) = pred

    # determine the class label and color we'll use to draw
    # the bounding box and text
    label = "Mask" if mask > withoutMask else "No Mask"
    color = (0, 255, 0) if label == "Mask" else (0, 0, 255)

    # include the probability in the label
    label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)

    # display the label and bounding box rectangle on the output
    # frame
    cv2.putText(frame, label, (startX, startY - 10),
        cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
    cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)

# show the output frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF

# if the `q` key was pressed, break from the loop
if key == ord("q"):
    break

# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()
我使用的是经过训练的模型,它能够成功地在多人的实时流中检测面具。我所需要的只是弄清楚如何检测面罩是否正确佩戴。 这方面的解决办法是什么。代码中需要做哪些更改

附言-说到ML,我是个新手


如果您有任何建议或帮助,我们将不胜感激。

我认为没有解决办法。您只需在不恰当佩戴面罩的情况下收集更多数据,并增加模型中的类数: 从仅有的两类[“带口罩”、“无口罩”]到[“带口罩”、“无口罩”、“未正确佩戴口罩]

收集第三类的数据,然后重新训练模型。更改此行:

headModel = Dense(2, activation="softmax")(headModel)
为此:

headModel = Dense(3, activation="softmax")(headModel)

因为现在你有3个类而不是2个,嗨,我尝试了Minh建议的方法,但是遇到了一些错误。 这是我得到的

Warning (from warnings module):
File "C:\Users\Ayus\AppData\Roaming\Python\Python37\site- 
packages\PIL\Image.py", line 952
"Palette images with Transparency expressed in bytes should be "
UserWarning: Palette images with Transparency expressed in bytes should be converted 
to RGBA images
Traceback (most recent call last):
File "D:\mask\train_mask_detector.py", line 59, in <module>
test_size=0.20, stratify=labels, random_state=42)
File "C:\Users\Ayush\AppData\Roaming\Python\Python37\site- 
packages\sklearn\model_selection\_split.py", line 2152, in train_test_split
train, test = next(cv.split(X=arrays[0], y=stratify))
File "C:\Users\Ayush\AppData\Roaming\Python\Python37\site- 
packages\sklearn\model_selection\_split.py", line 1746, in split
y = check_array(y, ensure_2d=False, dtype=None)
File "C:\Users\Ayush\AppData\Roaming\Python\Python37\site- 
packages\sklearn\utils\validation.py", line 72, in inner_f
return f(**kwargs)
File "C:\Users\Ayush\AppData\Roaming\Python\Python37\site- 
packages\sklearn\utils\validation.py", line 641, in check_array
% (array.ndim, estimator_name))
ValueError: Found array with dim 3. Estimator expected <= 2.
警告(来自警告模块):
文件“C:\Users\Ayus\AppData\Roaming\Python\Python37\site-
packages\PIL\Image.py“,第952行
“透明度以字节表示的调色板图像应为”
UserWarning:应转换透明度以字节表示的调色板图像
到RGBA图像
回溯(最近一次呼叫最后一次):
文件“D:\mask\train\u mask\u detector.py”,第59行,在
测试尺寸=0.20,分层=标签,随机状态=42)
文件“C:\Users\Ayush\AppData\Roaming\Python\Python37\site-
包装\sklearn\model\u selection\\u split.py”,第2152行,列车内测试\u split
序列,测试=下一个(cv.分割(X=阵列[0],y=分层))
文件“C:\Users\Ayush\AppData\Roaming\Python\Python37\site-
packages\sklearn\model\u selection\\u split.py”,第1746行,拆分
y=检查数组(y,确保2d=False,dtype=None)
文件“C:\Users\Ayush\AppData\Roaming\Python\Python37\site-
packages\sklearn\utils\validation.py”,第72行,内部
返回f(**kwargs)
文件“C:\Users\Ayush\AppData\Roaming\Python\Python37\site-
packages\sklearn\utils\validation.py”,第641行,在check\u数组中
%(array.ndim,估计器名称))

ValueError:找到了dim为3的数组。预计估计器尝试了您建议的方法,但我遇到了一些错误。我得到了这个结果-