Python 在google Colab中使用cv2.imshow（）_Python_Google Colaboratory_Cv2

Python 在google Colab中使用cv2.imshow（）

python google-colaboratory

Python 在google Colab中使用cv2.imshow（）,python,google-colaboratory,cv2,Python,Google Colaboratory,Cv2,我试图通过输入视频来对视频进行目标检测 cap=cv2.VideoCapture（“video3.mp4”）在处理部分完成后，我想通过使用 while True: ret, image_np = cap.read() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) #

我试图通过输入视频来对视频进行目标检测

cap=cv2.VideoCapture（“video3.mp4”）

在处理部分完成后，我想通过使用

while True:
ret, image_np = cap.read()

# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
output_dict = run_inference_for_single_image(image_np_expanded, detection_graph)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
    image_np,
    output_dict['detection_boxes'],
    output_dict['detection_classes'],
    output_dict['detection_scores'],
    category_index,
    instance_masks=output_dict.get('detection_masks'),
    use_normalized_coordinates=True,
    line_thickness=8)

cv2.imshow('object detection', cv2.resize(image_np, (800, 600)))
if cv2.waitKey(25) & 0XFF == ord('q'):
    cv2.destroyAllWindows()
    break

但是colab说cv2.imshow（）被禁用，无法使用cv2_imshow（）。但它只渲染图像。[一帧接一帧]。我想像使用cv2.imshow（）一样将视频输出。请帮我解决这个问题。提前谢谢

我的完整代码附呈

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

import cv2
from google.colab.patches import cv2_imshow

cap = cv2.VideoCapture("video3.mp4")

sys.path.append("..")
from object_detection.utils import ops as utils_ops

if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')

from object_detection.utils import label_map_util

from object_detection.utils import visualization_utils as vis_util

detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')


category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)


def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
  (im_height, im_width, 3)).astype(np.uint8)


PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(3, 8) ]

IMAGE_SIZE = (12, 8)

def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.Session() as sess:

  ops = tf.get_default_graph().get_operations()
  all_tensor_names = {output.name for op in ops for output in op.outputs}
  tensor_dict = {}
  for key in [
      'num_detections', 'detection_boxes', 'detection_scores',
      'detection_classes', 'detection_masks'
  ]:
    tensor_name = key + ':0'
    if tensor_name in all_tensor_names:
      tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
          tensor_name)
  if 'detection_masks' in tensor_dict:

    detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
    detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])

    real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
    detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
    detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
        detection_masks, detection_boxes, image.shape[1], image.shape[2])
    detection_masks_reframed = tf.cast(
        tf.greater(detection_masks_reframed, 0.5), tf.uint8)

    tensor_dict['detection_masks'] = tf.expand_dims(
        detection_masks_reframed, 0)
  image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')


  output_dict = sess.run(tensor_dict,
                         feed_dict={image_tensor: image})


  output_dict['num_detections'] = int(output_dict['num_detections'][0])
  output_dict['detection_classes'] = output_dict[
      'detection_classes'][0].astype(np.int64)
  output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
  output_dict['detection_scores'] = output_dict['detection_scores'][0]
  if 'detection_masks' in output_dict:
    output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict

while True:
    ret, image_np = cap.read()

    image_np_expanded = np.expand_dims(image_np, axis=0)

    output_dict = run_inference_for_single_image(image_np_expanded, detection_graph)

    vis_util.visualize_boxes_and_labels_on_image_array(
    image_np,
    output_dict['detection_boxes'],
    output_dict['detection_classes'],
    output_dict['detection_scores'],
    category_index,
    instance_masks=output_dict.get('detection_masks'),
    use_normalized_coordinates=True,
    line_thickness=8)

cv2_imshow(image_np)
if cv2.waitKey(25) & 0XFF == ord('q'):
    cv2.destroyAllWindows()
    break

将numpy导入为np
导入操作系统
将six.moves.urllib作为urllib导入
导入系统
导入tarfile
导入tensorflow作为tf
进口拉链
从distutils.version导入StrictVersion
从集合导入defaultdict
从io导入StringIO
从matplotlib导入pyplot作为plt
从PIL导入图像
进口cv2
从google.colab.patches导入cv2_imshow
cap=cv2.VideoCapture（“video3.mp4”）
sys.path.append（“..”）
从object_detection.utils导入操作作为utils_操作
如果StrictVersion（tf.\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu
raise ImportError（'请将TensorFlow安装升级到v1.12.*.'）
从object\u detection.utils导入标签\u映射\u util
从object_detection.utils导入可视化_utils作为vis_util
检测图=tf.graph（）
使用检测图。作为默认值（）：
od_graph_def=tf.GraphDef（）
将tf.gfile.gfile（路径到冻结图，'rb'）作为fid：
序列化的_图=fid.read（）
od_图形_def.ParseFromString（序列化_图形）
tf.import_graph_def（od_graph_def，name=''）
类别索引=标签映射工具。从标签映射创建类别索引（路径到标签，使用显示名称=真）
def将图像加载到图像阵列（图像）：
（im_宽度，im_高度）=image.size
返回np.array（image.getdata（））。重塑(
（im_高度，im_宽度，3））.aType（np.uint8）
路径\到\测试\图像\目录='测试\图像'
TEST_IMAGE_path=[os.path.join（path_TO_TEST_IMAGES_DIR，'IMAGE{}.jpg'.format（i）），用于范围（3,8）中的i）
图像大小=（12,8）
为单个图像（图像、图形）定义运行推理：
使用graph.as_default（）：
使用tf.Session（）作为sess：
ops=tf.get_default_graph（）.get_operations（）
all_tensor_names={output.name for op in op for output in op.outputs}
张量_dict={}
输入[
“检测次数”、“检测框”、“检测分数”，
“检测类”、“检测掩码”
]:
张量_name=key+'：0
如果所有张量名称中的张量名称：
tensor_dict[key]=tf.get_default_graph（）.get_tensor_by_name(
张量（名称）
如果张量中的“检测屏蔽”：
检测盒=tf.挤压（张量[detection\u boxes'，[0]）
检测屏蔽=tf.挤压（张量dict[‘检测屏蔽’，[0]）
real_num_detection=tf.cast（张量dict['num_detections'][0]，tf.int32）
检测盒=tf.slice（检测盒[0,0]，[real\u num\u detection，-1]）
检测掩码=tf.slice（检测掩码，[0,0,0]，[real\u num\u detection，-1，-1]）
检测\u掩码\u重构=utils\u ops.reframe\u框\u掩码\u到图像\u掩码(
检测遮罩，检测框，image.shape[1]，image.shape[2]）
检测\u掩模\u重构=tf.cast(
tf.更大（检测屏蔽重新格式化，0.5），tf.uint8）
张量dict['detection\u masks']=tf.expand\u dims(
检测\u掩码\u重新格式化，0）
image\u tensor=tf.get\u default\u graph（）.get\u tensor\u by\u name（'image\u tensor:0'））
输出dict=sess.run（张量dict，
feed_dict={image_tensor:image}）
输出dict['num\u detections']=int（输出dict['num\u detections'][0]）
输出指令['detection\u classes']=输出指令[
“检测类”][0]。aType（np.int64）
输出指令['detection\u Box']=输出指令['detection\u Box'][0]
输出记录[“检测记录”]=输出记录[“检测记录”][0]
如果输出指令中有“检测屏蔽”：
output_dict['detection_masks']=output_dict['detection_masks'][0]
返回输出命令
尽管如此：
ret，image\u np=cap.read（）
图像\u np\u展开=np。展开\u dims（图像\u np，轴=0）
输出\u dict=运行\u推断\u用于单个图像（图像\u np\u展开，检测\u图形）
vis_util.visualize_box_和_标签_在_image_数组上(
图像_np，
输出指令[‘检测盒’]，
输出dict[“检测类”]，
输出记录[“检测分数”]，
类别索引，
实例\u掩码=输出\u dict.get（'detection\u masks'），
使用标准化坐标=真，
线（厚度=8）
cv2\U imshow（图像\U np）
如果cv2.waitKey（25）&0XFF==ord（'q'）：
cv2.destroyAllWindows（）
打破

Colab notebook提供了一种在笔记本上观看视频的方法：

import io
import base64
from IPython.display import HTML

def playvideo(filename):
    video = io.open(filename, 'r+b').read()
    encoded = base64.b64encode(video)
    return HTML(data='''<video alt="test" controls>
                    <source src="data:video/mp4;base64,{0}" type="video/mp4"/>
                 </video>'''.format(encoded.decode('ascii')))

导入io
导入base64
从IPython.display导入HTML
def播放视频（文件名）：
video=io.open（文件名'r+b'）.read（）
encoded=base64.b64编码（视频）
返回HTML（数据=“”）
''。格式（编码。解码（'ascii'））

然后使用

播放视频（'./Megamind.mp4'）

观看视频

无论如何，请记住将

%pylab notebook

放在笔记本的开头，这有助于解决此类问题。

要在google colab中使用

cv2.imshow

，您可以使用以下导入：

from google.colab.patches import cv2_imshow
cv2_imshow(img)

演示如何在Colab中处理视频的示例： #定义辅助功能以显示视频输入io 从IPython.display导入HTML 从base64导入b64encode def显示视频（文件名，宽度=640）：

mp4=open（文件名'rb'）.read（）
data_url=“data:video/mp4；base64，”+b64编码（mp4）.decode（）
返回HTML（“”）
“”。格式（宽度、数据\ url））
#将结果视频从avi转换为mp4文件格式
导入操作系统
path_video=os.path.join（“outputs”，“tracker.avi”）
%cd输出/
!ffmpeg-y-loglevel panic-i tracker.avi output.mp4
%cd。。
#输出目标跟踪视频
path\u output=os.path.join（“outputs”、“output.mp4”）
显示视频（路径输出，宽度=960）

可能重复的功能是否有相关文档？图像应该是浮点还是整数？您可能可以使用opencv文档。尽我所能

  mp4 = open(file_name,'rb').read()
  data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
  return HTML("""
  <video width="{0}" controls>
        <source src="{1}" type="video/mp4">
  </video>
  """.format(width, data_url))
#convert resulting video from avi to mp4 file format
import os
path_video = os.path.join("outputs","tracker.avi")
%cd outputs/
!ffmpeg -y -loglevel panic -i tracker.avi output.mp4
%cd ..

#output object tracking video
path_output = os.path.join("outputs","output.mp4")
show_video(path_output, width=960)