Python OpenCV-基于特定对象条件保存视频时,并非所有这样的帧都被保存
我在Python中使用opencv,并试图在帧中出现特定类型的对象/标签(例如“伞”)时仅录制/保存视频中的帧 问题:Python OpenCV-基于特定对象条件保存视频时,并非所有这样的帧都被保存,python,python-3.x,opencv,artificial-intelligence,object-detection,Python,Python 3.x,Opencv,Artificial Intelligence,Object Detection,我在Python中使用opencv,并试图在帧中出现特定类型的对象/标签(例如“伞”)时仅录制/保存视频中的帧 问题: def vid_objects_detection(type=0, confidence_threshold=0.5, image_quality=416): classes = [] # reading category names from coco text file and inserting in classes list with ope
def vid_objects_detection(type=0, confidence_threshold=0.5, image_quality=416):
classes = []
# reading category names from coco text file and inserting in classes list
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
net = cv2.dnn.readNet("yolov3-tiny.weights", "yolov3-tiny.cfg") # using tiny versions of weights & config file
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# Loading video
cap = cv2.VideoCapture(type) # use 0 for webcam
_, frame = cap.read()
height, width, channels = frame.shape
# providing codec for writing frames to video
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
# Write video with name & size. Should be of same size(width, height) as original video
out_vid = cv2.VideoWriter('obj_detect4_'+str(type), fourcc, 20.0, (width,height))
font = cv2.FONT_HERSHEY_COMPLEX_SMALL
starting_time = time.time()
frame_id = 0
while True:
_, frame = cap.read()
frame_id +=1
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
height, width, channels = frame.shape
blob = cv2.dnn.blobFromImage(frame, 0.00392, (image_quality, image_quality), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# For showing informations on screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
# claculated scores, class_id, confidence
if confidence > confidence_threshold:
# claculatedd center_x, center_y, w,h,x,y
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
print("confidences:", confidences)
print(class_ids)
print("boxes", boxes)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, 0.4)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
elapsed_time = time.time() - starting_time
fps = frame_id / elapsed_time
time_display = time.strftime("%a, %d%b%Y %H:%M:%S", time.localtime())
cv2.putText(frame,"|FPS: " + str(round(fps,3)), (10, 40), font, 1, (0,255,0), 1)
print(fps)
# saving video frame by frame
if i == '':
pass
else:
if 'umbrella' in label:
out_vid.write(frame)
key = cv2.waitKey(5)
if key == 27:
break
cap.release()
out_vid.release()
cv2.destroyAllWindows()
# calling function
vid_objects_detection("walking.mp4")
它正确地从第一次在帧中找到提到的对象/标签的实例开始保存帧,但如果该对象/标签在接下来的几帧中不存在,并且仅在几帧之后出现,则这些帧不会保存到我正在保存它的mp4文件中
它只保存具有所述对象的第一个连续帧,不保存后续帧
阅读了此链接中的建议后,我将框架编写步骤放在for循环中编辑了代码,如下所示:
框写我尝试即兴创作的一段代码
# saving video frame by frame
for frame_numb in range(total_frames):
if i == '':
pass
else:
if "umbrella" in label:
print("umbrella in labels")
# Issue causing part where I may need some change
out_vid.write(frame[frame_numb])
上述代码更改的结果:
def vid_objects_detection(type=0, confidence_threshold=0.5, image_quality=416):
classes = []
# reading category names from coco text file and inserting in classes list
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
net = cv2.dnn.readNet("yolov3-tiny.weights", "yolov3-tiny.cfg") # using tiny versions of weights & config file
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# Loading video
cap = cv2.VideoCapture(type) # use 0 for webcam
_, frame = cap.read()
height, width, channels = frame.shape
# providing codec for writing frames to video
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
# Write video with name & size. Should be of same size(width, height) as original video
out_vid = cv2.VideoWriter('obj_detect4_'+str(type), fourcc, 20.0, (width,height))
font = cv2.FONT_HERSHEY_COMPLEX_SMALL
starting_time = time.time()
frame_id = 0
while True:
_, frame = cap.read()
frame_id +=1
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
height, width, channels = frame.shape
blob = cv2.dnn.blobFromImage(frame, 0.00392, (image_quality, image_quality), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# For showing informations on screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
# claculated scores, class_id, confidence
if confidence > confidence_threshold:
# claculatedd center_x, center_y, w,h,x,y
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
print("confidences:", confidences)
print(class_ids)
print("boxes", boxes)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, 0.4)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
elapsed_time = time.time() - starting_time
fps = frame_id / elapsed_time
time_display = time.strftime("%a, %d%b%Y %H:%M:%S", time.localtime())
cv2.putText(frame,"|FPS: " + str(round(fps,3)), (10, 40), font, 1, (0,255,0), 1)
print(fps)
# saving video frame by frame
if i == '':
pass
else:
if 'umbrella' in label:
out_vid.write(frame)
key = cv2.waitKey(5)
if key == 27:
break
cap.release()
out_vid.release()
cv2.destroyAllWindows()
# calling function
vid_objects_detection("walking.mp4")
它只创建256kb的文件,文件无法打开/无法写入任何内容
如果我在代码中做了以下更改,那么它只保存满足该条件的视频的第一帧,并在整个时间内运行相同的帧
# saving video frame by frame
for frame_numb in range(total_frames):
if i == '':
pass
else:
if "umbrella" in label:
print("umbrella in labels")
# Issue causing part where I may need some change
out_vid.write(frame)
共享下面更大的代码块以供参考:
def vid_objects_detection(type=0, confidence_threshold=0.5, image_quality=416):
classes = []
# reading category names from coco text file and inserting in classes list
with open("coco.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
net = cv2.dnn.readNet("yolov3-tiny.weights", "yolov3-tiny.cfg") # using tiny versions of weights & config file
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# Loading video
cap = cv2.VideoCapture(type) # use 0 for webcam
_, frame = cap.read()
height, width, channels = frame.shape
# providing codec for writing frames to video
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
# Write video with name & size. Should be of same size(width, height) as original video
out_vid = cv2.VideoWriter('obj_detect4_'+str(type), fourcc, 20.0, (width,height))
font = cv2.FONT_HERSHEY_COMPLEX_SMALL
starting_time = time.time()
frame_id = 0
while True:
_, frame = cap.read()
frame_id +=1
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
height, width, channels = frame.shape
blob = cv2.dnn.blobFromImage(frame, 0.00392, (image_quality, image_quality), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# For showing informations on screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
# claculated scores, class_id, confidence
if confidence > confidence_threshold:
# claculatedd center_x, center_y, w,h,x,y
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
print("confidences:", confidences)
print(class_ids)
print("boxes", boxes)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, 0.4)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
elapsed_time = time.time() - starting_time
fps = frame_id / elapsed_time
time_display = time.strftime("%a, %d%b%Y %H:%M:%S", time.localtime())
cv2.putText(frame,"|FPS: " + str(round(fps,3)), (10, 40), font, 1, (0,255,0), 1)
print(fps)
# saving video frame by frame
if i == '':
pass
else:
if 'umbrella' in label:
out_vid.write(frame)
key = cv2.waitKey(5)
if key == 27:
break
cap.release()
out_vid.release()
cv2.destroyAllWindows()
# calling function
vid_objects_detection("walking.mp4")
我在代码中删减了一些小的计算并插入了注释,以减少代码的长度有时视频编解码器会执行所谓的关键帧压缩。这意味着,一个帧被完全存储,比如每10帧存储一次,中间的所有其他帧被存储为更改或增量。在这种情况下,当您尝试仅在帧之间保存这些时,它们可能无法保存。但是,在这些情况下,如果按顺序迭代每个帧,则保存帧是有效的
可能你可以注释掉line out\u vid=
cv2。VideoWriter('obj\u detect4\u'+str(type),fourcc,20.0,(宽度,高度))
,然后根据你的情况尝试保存网络摄像头流中的帧。有时视频编解码器会执行所谓的关键帧压缩。这意味着,一个帧被完全存储,比如每10帧存储一次,中间的所有其他帧被存储为更改或增量。在这种情况下,当您尝试仅在帧之间保存这些时,它们可能无法保存。但是,在这些情况下,如果按顺序迭代每个帧,则保存帧是有效的
可能您可以注释掉line out\u vid=
cv2.VideoWriter('obj\u detect4\u'+str(type),fourcc,20.0,(宽度,高度))
,然后根据您的条件尝试保存网络摄像头流中的帧。您设置了if条件out for循环@因为我是蝙蝠侠谢谢你的建议。实际上,“标签”被帧中出现的最后一个对象/标签覆盖。所以它很难捕捉到“伞”的框架。所以我创建了一个列表,开始在其中插入标签名,并在if条件下使用该列表,而不仅仅是标签,即使不使用for循环,它也可以正常工作。@ViSa您可以共享更新的代码吗?我正在做一个类似的项目,谢谢你设置了if条件out for循环@因为我是蝙蝠侠谢谢你的建议。实际上,“标签”被帧中出现的最后一个对象/标签覆盖。所以它很难捕捉到“伞”的框架。所以我创建了一个列表,开始在其中插入标签名,并在if条件下使用该列表,而不仅仅是标签,即使不使用for循环,它也可以正常工作。@ViSa您可以共享更新的代码吗?我正在做一个类似的项目,谢谢Hanks提供的信息。我肯定会尝试你的建议,但现在我已经做到了。问题是,每个帧中的标签都被该帧的最后一个标签覆盖。所以大多数时候我搜索的标签都不能作为帧中的最后一个标签使用。所以我创建了一个列表来保存框架中出现的所有标签,并在列表中进行匹配。但是你给了我一个很好的建议,我一定会尝试的。谢谢你提供的信息。我肯定会尝试你的建议,但现在我已经做到了。问题是,每个帧中的标签都被该帧的最后一个标签覆盖。所以大多数时候我搜索的标签都不能作为帧中的最后一个标签使用。所以我创建了一个列表来保存框架中出现的所有标签,并在列表中进行匹配。但是你给了我一个很好的建议,我一定会尝试的。