Opencv 基于Python的视频密集光流和目标检测_Opencv_Deep Learning_Computer Vision_Object Detection_Opticalflow

Opencv 基于Python的视频密集光流和目标检测

opencv deep-learning computer-vision

Opencv 基于Python的视频密集光流和目标检测,opencv,deep-learning,computer-vision,object-detection,opticalflow,Opencv,Deep Learning,Computer Vision,Object Detection,Opticalflow,我试图在每一帧的视频中使用密集光流和目标检测。基本上，如果我只使用Yolo模型进行对象检测，它不会为视频的每一帧提供对象边界框。有时它被错过了。为了使它平滑，我想使用密集的光流，这样如果对于某些帧Yolo没有检测到对象，那么光流输出可以用于创建对象的边界框。下面是我使用Yolo集成密集光流和目标检测的代码 import numpy as np import argparse import imutils import time import cv2 import os import sys %m

我试图在每一帧的视频中使用密集光流和目标检测。基本上，如果我只使用Yolo模型进行对象检测，它不会为视频的每一帧提供对象边界框。有时它被错过了。为了使它平滑，我想使用密集的光流，这样如果对于某些帧Yolo没有检测到对象，那么光流输出可以用于创建对象的边界框。下面是我使用Yolo集成密集光流和目标检测的代码

import numpy as np
import argparse
import imutils
import time
import cv2
import os
import sys
%matplotlib inline
from matplotlib import pyplot as plt
confidence = 0.5
threshold= 0.3
labelsPath = "C:/Users/Akash Jain/Downloads/yolo-object-detection/yolo-coco/coco.names"
LABELS = open(labelsPath).read().strip().split("\n")
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),dtype="uint8")
weightsPath = "C:/Users/Akash Jain/Downloads/yolo-object-detection/yolo-coco/yolov3.weights"
configPath = "C:/Users/Akash Jain/Downloads/yolo-object-detection/yolo-coco/yolov3.cfg"
# load our YOLO object detector trained on COCO dataset (80 classes)
print("[INFO] loading YOLO from disk...")
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
np.set_printoptions(threshold=sys.maxsize)
writer = None
(W, H) = (None, None)
cap = cv2.VideoCapture("C:/Users/Akash Jain/Downloads/yolo-object-detection/videos/overpass.mp4")
try:
    prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() else cv2.CAP_PROP_FRAME_COUNT
    total = int(cap.get(prop))    
    print("[INFO] {} total frames in video".format(total))
except:
    print("[INFO] could not determine # of frames in video")
    total = -1
# ret = a boolean return value from getting the frame, first_frame = the first frame in the entire video sequence
ret, first_frame = cap.read()
# Converts frame to grayscale because we only need the luminance channel for detecting edges - less computationally expensive
prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)
# Creates an image filled with zero intensities with the same dimensions as the frame
mask = np.zeros_like(first_frame)
# Sets image saturation to maximum
mask[..., 1] = 255
dir_name='C:/Users/Akash Jain/Documents/ZED/Split'
base_filename='output'
filename_suffix = 'png'
fno=1
while(cap.isOpened()):    
    ret, frame = cap.read()
    if not ret:
        break
    if W is None or H is None:
        (H, W) = frame.shape[:2]
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    # Calculates dense optical flow by Farneback method
    # https://docs.opencv.org/3.0-beta/modules/video/doc/motion_analysis_and_object_tracking.html#calcopticalflowfarneback
    flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    # Computes the magnitude and angle of the 2D vectors
    magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    # Sets image hue according to the optical flow direction
    mask[..., 0] = angle * 180 / np.pi / 2
    # Sets image value according to the optical flow magnitude (normalized)
    mask[..., 2] = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)
    # Converts HSV to RGB (BGR) color representation
    rgb = cv2.cvtColor(mask, cv2.COLOR_HSV2BGR) 
    # Opens a new window and displays the output frame
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),swapRB=True, crop=False)
    net.setInput(blob)
    start = time.time()
    layerOutputs = net.forward(ln)
    end = time.time()
    boxes = []
    confidences = []
    classIDs = []    
    for output in layerOutputs:       
        for detection in output:            
            scores = detection[5:]
            classID = np.argmax(scores)           
            confidence = scores[classID]
            if confidence > 0.5:
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)
            idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.5,0.3)            
            if len(idxs) > 0:                
                for i in idxs.flatten():
                    (x, y) = (boxes[i][0], boxes[i][1])
                    (w, h) = (boxes[i][2], boxes[i][3])
                    color = [int(c) for c in COLORS[classIDs[i]]]
                    cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
                    text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
                    cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,0.5, color, 2)

    print("frame",fno) 
    #print(text)
    print("boxes",boxes)
    print("confidence",confidences)
    print("classes",classIDs)
    print("object detection") 
    print(frame.shape)
    #print(frame)
    print("optical")
    #print(rgb)
    print(rgb.shape)
    output = ((1 * frame) + (0.5 * rgb)).astype("uint8")
    newpath= os.path.join(dir_name, str(fno) + "." + filename_suffix)
    figure = plt.figure(figsize=(15,15))
    plt.imshow(output)    
    plt.show() 
    prev_gray = gray
    fno = int(fno)+1
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

如果我运行这段代码，我会得到以下输出

[INFO] loading YOLO from disk...
[INFO] 812 total frames in video
frame 1
boxes [[622, 264, 14, 12], [675, 275, 20, 20], [574, 293, 22, 18], [495, 334, 49, 29], [525, 374, 49, 50]]
confidence [0.8653988242149353, 0.6596917510032654, 0.9037992358207703, 0.9345515966415405, 0.8292896747589111]
classes [2, 2, 2, 2, 2]
object detection
(720, 1280, 3)
optical
(720, 1280, 3)

任何可以帮助我利用密集光流输出创建对象边界框的输入（如果对象检测模型未检测到对象），以便在每个帧中检测对象。我这样做的原因是对道路进行场景描述。路上有三辆车，一辆停着，两辆在行驶

嗨@Akash，你问这个问题后有什么进展吗？我不确定光流信息是否适合您的问题。因为如果车辆静止不动，识别静止车辆的流量不会发生变化。光流与检测系统结合使用看起来很有用，因为它提供了被检测对象的运动数据。然而，我做了一些研究，这个LearnOpenCV材料可能会对你有所帮助：。致以最良好的祝愿@ThiagoRTK进展不大。。我看过这个博客，问题是它只在第一帧检测对象并跟踪它。如果新物体介于两者之间，它将无法跟踪..只有光流没有帮助，因为我们还想找到物体，即，它是汽车还是人。这就是我们想做物体检测和光流的原因。嗨@Akash，我理解同时使用Yolo和光流（OF）的想法，但是，用Yolo做检测并从检测到的物体中提取运动特征。那么，让我看看我是否理解你的问题，好吗？当Yolo失败时，您想使用OF来检测并在对象中放置边界框，是吗？如果我理解正确的话，即使OF在移动对象中构建边界框，如果对象静止，OF也会失败。。。我只是想澄清一下，看看我是否能帮你，好吗？关于LearnOpenCV材料（采用跟踪方法）。我没有仔细看。但是，难道不可能调整材料，使每一个Yolo检测都成为跟踪的起点吗？这样，如果Yolo失败，它将是一个额外的层，以避免丢失对象。。。

frame 2
boxes [[622, 264, 14, 12], [674, 275, 21, 20], [573, 293, 23, 18], [495, 334, 50, 29], [518, 378, 57, 52]]
confidence [0.8665716648101807, 0.6463424563407898, 0.9210211038589478, 0.8402170538902283, 0.5718783140182495]
classes [2, 2, 2, 2, 2]
object detection
(720, 1280, 3)
optical
(720, 1280, 3)