Javascript 使用js网络摄像头代码对Colab进行Darknet yolov3预测的速度非常慢
我已经训练了一个yolov3模型,但是,在我的计算机上进行预测时,速度非常慢。即使在调整了高度和宽度之后,我仍然获得不到1 fps的速度。我已经求助于Colab来进行预测,然而,我遇到了几个问题。首先,要在Colab上捕获网络摄像头,它需要Javascript,我目前还不熟悉Javascript。我花了一个多星期的时间胡闹来让它工作。我已经到了它会在网络摄像头上预测的地步,然而,在屏幕上画一个盒子需要大约30秒,而且盒子不会消失 以下是我找到的运行网络摄像头的脚本: 下面是我编写的一些代码,它结合了网络摄像头代码和检测代码:Javascript 使用js网络摄像头代码对Colab进行Darknet yolov3预测的速度非常慢,javascript,python,google-colaboratory,yolo,Javascript,Python,Google Colaboratory,Yolo,我已经训练了一个yolov3模型,但是,在我的计算机上进行预测时,速度非常慢。即使在调整了高度和宽度之后,我仍然获得不到1 fps的速度。我已经求助于Colab来进行预测,然而,我遇到了几个问题。首先,要在Colab上捕获网络摄像头,它需要Javascript,我目前还不熟悉Javascript。我花了一个多星期的时间胡闹来让它工作。我已经到了它会在网络摄像头上预测的地步,然而,在屏幕上画一个盒子需要大约30秒,而且盒子不会消失 以下是我找到的运行网络摄像头的脚本: 下面是我编写的一些代码,它
import base64
import html
import io
import time
from IPython.display import display, Javascript
from google.colab.output import eval_js
import numpy as np
from PIL import Image
import cv2
def start_input(): #webcam
js = Javascript('''
var video;
var div = null;
var stream;
var captureCanvas;
var imgElement;
var labelElement;
var pendingResolve = null;
var shutdown = false;
function removeDom() {
stream.getVideoTracks()[0].stop();
video.remove();
div.remove();
video = null;
div = null;
stream = null;
imgElement = null;
captureCanvas = null;
labelElement = null;
}
function onAnimationFrame() {
if (!shutdown) {
window.requestAnimationFrame(onAnimationFrame);
}
if (pendingResolve) {
var result = "";
if (!shutdown) {
captureCanvas.getContext('2d').drawImage(video, 0, 0, 512, 512);
result = captureCanvas.toDataURL('image/jpeg', 0.8)
}
var lp = pendingResolve;
pendingResolve = null;
lp(result);
}
}
async function createDom() {
if (div !== null) {
return stream;
}
div = document.createElement('div');
div.style.border = '2px solid black';
div.style.padding = '3px';
div.style.width = '100%';
div.style.maxWidth = '600px';
document.body.appendChild(div);
const modelOut = document.createElement('div');
modelOut.innerHTML = "<span>Status:</span>";
labelElement = document.createElement('span');
labelElement.innerText = 'No data';
labelElement.style.fontWeight = 'bold';
modelOut.appendChild(labelElement);
div.appendChild(modelOut);
video = document.createElement('video');
video.style.display = 'block';
video.width = div.clientWidth - 6;
video.setAttribute('playsinline', '');
video.onclick = () => { shutdown = true; };
stream = await navigator.mediaDevices.getUserMedia(
{video: { facingMode: "environment"}});
div.appendChild(video);
imgElement = document.createElement('img');
imgElement.style.position = 'absolute';
imgElement.style.zIndex = 1;
imgElement.onclick = () => { shutdown = true; };
div.appendChild(imgElement);
const instruction = document.createElement('div');
instruction.innerHTML =
'<span style="color: red; font-weight: bold;">' +
'When finished, click here or on the video to stop this demo</span>';
div.appendChild(instruction);
instruction.onclick = () => { shutdown = true; };
video.srcObject = stream;
await video.play();
captureCanvas = document.createElement('canvas');
captureCanvas.width = 512; //video.videoWidth;
captureCanvas.height = 512; //video.videoHeight;
window.requestAnimationFrame(onAnimationFrame);
return stream;
}
async function takePhoto(label, imgData) {
if (shutdown) {
removeDom();
shutdown = false;
return '';
}
var preCreate = Date.now();
stream = await createDom();
var preShow = Date.now();
if (label != "") {
labelElement.innerHTML = label;
}
if (imgData != "") {
var videoRect = video.getClientRects()[0];
imgElement.style.top = videoRect.top + "px";
imgElement.style.left = videoRect.left + "px";
imgElement.style.width = videoRect.width + "px";
imgElement.style.height = videoRect.height + "px";
imgElement.src = imgData;
}
var preCapture = Date.now();
var result = await new Promise(function(resolve, reject) {
pendingResolve = resolve;
});
shutdown = false;
return {'create': preShow - preCreate,
'show': preCapture - preShow,
'capture': Date.now() - preCapture,
'img': result};
}
''')
display(js)
def take_photo(label, img_data):
data = eval_js('takePhoto("{}", "{}")'.format(label, img_data))
return data
import cv2
import numpy as np
import argparse
import imutils
import matplotlib.pyplot as plt
from sys import platform
parser = argparse.ArgumentParser()
parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
opt = parser.parse_args(args = [])
weights = '/content/gdrive/My Drive/yolov3_recent/yolov3_training_2000.weights'
from model import *
from models import *
from utils.datasets import *
from utils.utils import *
net = cv2.dnn.readNet(weights,'/content/Weightlifting_project/yolov3_cfg')
classes = []
with open("/content/Weightlifting_project/classes.txt", "r") as f:
classes = f.read().splitlines()
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(100, 3))
boxes = []
confidences = []
class_ids = []
box = []
knee_loc = []
hip_loc = []
foot_loc = []
knee_loc_2 = []
hip_loc_2 = []
foot_loc_2 = []
box_2 = []
def js_reply_to_image(js_reply):
"""
input:
js_reply: JavaScript object, contain image from webcam
output:
image_array: image array RGB size 512 x 512 from webcam
"""
jpeg_bytes = base64.b64decode(js_reply['img'].split(',')[1])
image_PIL = Image.open(io.BytesIO(jpeg_bytes))
image_array = np.array(image_PIL)
return image_array
def get_drawing_array(image_array):
"""
input:
image_array: image array RGB size 512 x 512 from webcam
output:
drawing_array: image RGBA size 512 x 512 only contain bounding box and text,
channel A value = 255 if the pixel contains drawing properties (lines, text)
else channel A value = 0
"""
drawing_array = np.zeros([512,512,4], dtype=np.uint8)
img = letterbox(image_array, new_shape=opt.img_size)[0]
#img = image_array.transpose(2, 0, 1)
#img = np.ascontiguousarray(img)
#img = torch.from_numpy(img).to(device)
#img = img.float() # uint8 to fp16/32
#img /= 255.0 # (0 - 255) to (0.0 - 1.0)
#if img.ndimension() == 3:
#img = img.unsqueeze(0)
blob = cv2.dnn.blobFromImage(img, 1/255, (512, 512), (0,0,0), swapRB=True, crop=False)
net.setInput(blob)
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)
outputs(layerOutputs,480,480,boxes,confidences,class_ids,box,box_2)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
if indexes is not None:
plot(indexes,boxes,classes,class_ids,confidences,colors,drawing_array,font)
drawing_array[:,:,3] = (drawing_array.max(axis = 2) > 0 ).astype(int) * 255
return drawing_array
def drawing_array_to_bytes(drawing_array):
"""
input:
drawing_array: image RGBA size 512 x 512
contain bounding box and text from yolo prediction,
channel A value = 255 if the pixel contains drawing properties (lines, text)
else channel A value = 0
output:
drawing_bytes: string, encoded from drawing_array
"""
drawing_PIL = Image.fromarray(drawing_array, 'RGB')
iobuf = io.BytesIO()
drawing_PIL.save(iobuf, format='png')
drawing_bytes = 'data:image/png;base64,{}'.format((str(base64.b64encode(iobuf.getvalue()), 'utf-8')))
return drawing_bytes
start_input()
label_html = 'Capturing...'
img_data = ''
count = 0
while True:
js_reply = take_photo(label_html, img_data)
if not js_reply:
break
image = js_reply_to_image(js_reply)
drawing_array = get_drawing_array(image)
drawing_bytes = drawing_array_to_bytes(drawing_array)
img_data = drawing_bytes
导入base64
导入html
输入io
导入时间
从IPython.display导入显示,Javascript
从google.colab.output导入eval_js
将numpy作为np导入
从PIL导入图像
进口cv2
def start_input():#网络摄像头
js=Javascript(“”)
var视频;
var-div=null;
var流;
var captureCanvas;
var激励;
var标签元素;
var pendingResolve=null;
var关闭=错误;
函数removeDom(){
stream.getVideoTracks()[0]。停止();
video.remove();
div.remove();
视频=空;
div=null;
流=空;
imgElement=null;
captureCanvas=null;
labelement=null;
}
函数onAnimationFrame(){
如果(!关机){
window.requestAnimationFrame(onAnimationFrame);
}
如果(pendingResolve){
var结果=”;
如果(!关机){
getContext('2d').drawImage(视频,0,0,512,512);
结果=captureCanvas.toDataURL('image/jpeg',0.8)
}
var lp=pendingResolve;
pendingResolve=null;
lp(结果);
}
}
异步函数createDom(){
如果(div!==null){
回流;
}
div=document.createElement('div');
div.style.border='2px纯黑';
div.style.padding='3px';
div.style.width='100%';
div.style.maxWidth='600px';
文件.正文.附件(div);
const modelOut=document.createElement('div');
modelOut.innerHTML=“状态:”;
labelement=document.createElement('span');
labelement.innerText='无数据';
labelement.style.fontWeight='bold';
appendChild(labelement);
附加子类(模型化);
video=document.createElement('video');
video.style.display='block';
video.width=div.clientWidth-6;
video.setAttribute('playsinline','';
video.onclick=()=>{shutdown=true;};
stream=await navigator.mediaDevices.getUserMedia(
{视频:{facingMode:“环境”});
儿童部(录像);
imgElement=document.createElement('img');
imgElement.style.position='绝对';
imgElement.style.zIndex=1;
imgElement.onclick=()=>{shutdown=true;};
儿童部(教育部);
const指令=document.createElement('div');
instruction.innerHTML=
'' +
'完成后,单击此处或视频停止此演示';
儿童部(教学);
指令.onclick=()=>{shutdown=true;};
video.srcObject=流;
等待视频。播放();
captureCanvas=document.createElement('canvas');
captureCanvas.width=512;//video.videoWidth;
captureCanvas.height=512;//video.videoHeight;
window.requestAnimationFrame(onAnimationFrame);
回流;
}
异步函数takePhoto(标签,imgData){
如果(关闭){
移除对象();
关机=假;
返回“”;
}
var preCreate=Date.now();
stream=wait createDom();
var presshow=Date.now();
如果(标签!=“”){
labelement.innerHTML=标签;
}
如果(imgData!=“”){
var videoRect=video.getClientRects()[0];
imgElement.style.top=videoRect.top+“px”;
imgElement.style.left=videoRect.left+“px”;
imgElement.style.width=videoRect.width+“px”;
imgElement.style.height=videoRect.height+“px”;
imgElement.src=imgData;
}
var preCapture=Date.now();
var结果=等待新承诺(函数(解析、拒绝){
pendingResolve=解决;
});
关机=假;
返回{'create':预显示-预创建,
“展示”:预制-展示前,
“捕获”:Date.now()-preCapture,
'img':结果};
}
''')
显示(js)
def拍摄照片(标签、img_数据):
data=eval_js('takePhoto({},“{}”)')。格式(标签,img_数据))
返回数据
进口cv2
将numpy作为np导入
导入argparse
导入imutils
将matplotlib.pyplot作为plt导入
从sys导入平台
parser=argparse.ArgumentParser()
add_参数('--img size',type=int,default=416,help='推断大小(像素)'
opt=parser.parse_args(args=[])
权重='/content/gdrive/My Drive/yolov3_recent/yolov3_training_2000.weights'
从模型导入*
从模型导入*
从utils.dataset导入*
从utils.utils导入*
net=cv2.dnn.readNet(重量,'/content/weighting\u project/yolov3\u cfg')
类别=[]
以open(“/content/weightighting_project/classes.txt”,“r”)作为f:
class=f.read().splitlines()
font=cv2.font\u好时\u普通
颜色=np.随机.均匀(0,255,大小=(100,3))
框=[]
信任=[]
类_id=[]
框=[]
膝盖位置=[]
hip_loc=[]
英尺位置=[]
膝盖位置2=[]
hip_loc_2=[]
英尺位置2=[]
框2=[]
def js_reply_to_image(js_reply):
"""
输入:
js_回复:JavaScript对象,包含来自网络摄像头的图像
输出