Python Tensorflow和OpenCV实时分类_Python_Opencv_Machine Learning_Tensorflow_Classification

Python Tensorflow和OpenCV实时分类

python opencv machine-learning tensorflow

Python Tensorflow和OpenCV实时分类,python,opencv,machine-learning,tensorflow,classification,Python,Opencv,Machine Learning,Tensorflow,Classification,我正在测试机器学习软件，并使用该模型对网络进行再训练，以对所需对象进行分类最初，我的预测是在本地存储的图像上运行的，我意识到从文件中取消图形的持久性需要2-5秒，而运行实际的预测大约需要2-5秒此后，我修改了我的代码，以纳入OpenCV的摄像头提要，但在上述情况下，视频延迟是不可避免的在初始图形加载期间，预计会出现时间命中；这就是为什么要事先运行initialSetup（），但2-5秒是荒谬的。我喜欢我目前的申请；实时分类，这不是最好的加载方式。还有别的办法吗？我知道，对于手机版本，TS

我正在测试机器学习软件，并使用该模型对网络进行再训练，以对所需对象进行分类

最初，我的预测是在本地存储的图像上运行的，我意识到从文件中取消图形的持久性需要2-5秒，而运行实际的预测大约需要2-5秒

此后，我修改了我的代码，以纳入OpenCV的摄像头提要，但在上述情况下，视频延迟是不可避免的

在初始图形加载期间，预计会出现时间命中；这就是为什么要事先运行

initialSetup（）

，但2-5秒是荒谬的。我喜欢我目前的申请；实时分类，这不是最好的加载方式。还有别的办法吗？我知道，对于手机版本，TS建议缩小图表。在这里减肥会是一种方式吗？如果有关系的话，我的图形目前是87.4MB

除此之外，有没有办法加快预测过程

import os
import cv2
import timeit
import numpy as np
import tensorflow as tf

camera = cv2.VideoCapture(0)

# Loads label file, strips off carriage return
label_lines = [line.rstrip() for line
               in tf.gfile.GFile('retrained_labels.txt')]

def grabVideoFeed():
    grabbed, frame = camera.read()
    return frame if grabbed else None

def initialSetup():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    start_time = timeit.default_timer()

    # This takes 2-5 seconds to run
    # Unpersists graph from file
    with tf.gfile.FastGFile('retrained_graph.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        tf.import_graph_def(graph_def, name='')

    print 'Took {} seconds to unpersist the graph'.format(timeit.default_timer() - start_time)

def classify(image_data):
    print '********* Session Start *********'

    with tf.Session() as sess:
        start_time = timeit.default_timer()

        # Feed the image_data as input to the graph and get first prediction
        softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')

        print 'Tensor', softmax_tensor

        print 'Took {} seconds to feed data to graph'.format(timeit.default_timer() - start_time)

        start_time = timeit.default_timer()

        # This takes 2-5 seconds as well
        predictions = sess.run(softmax_tensor, {'Mul:0': image_data})

        print 'Took {} seconds to perform prediction'.format(timeit.default_timer() - start_time)

        start_time = timeit.default_timer()

        # Sort to show labels of first prediction in order of confidence
        top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]

        print 'Took {} seconds to sort the predictions'.format(timeit.default_timer() - start_time)

        for node_id in top_k:
            human_string = label_lines[node_id]
            score = predictions[0][node_id]
            print('%s (score = %.5f)' % (human_string, score))

        print '********* Session Ended *********'

initialSetup()

while True:
    frame = grabVideoFeed()

    if frame is None:
        raise SystemError('Issue grabbing the frame')

    frame = cv2.resize(frame, (299, 299), interpolation=cv2.INTER_CUBIC)

    # adhere to TS graph input structure
    numpy_frame = np.asarray(frame)
    numpy_frame = cv2.normalize(numpy_frame.astype('float'), None, -0.5, .5, cv2.NORM_MINMAX)
    numpy_final = np.expand_dims(numpy_frame, axis=0)

    classify(numpy_final)

    cv2.imshow('Main', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

camera.release()
cv2.destroyAllWindows()

编辑1

调试完代码后，我意识到创建会话是一项既耗费资源又耗费时间的操作

在前面的代码中，在运行预测的基础上，为每个OpenCV提要创建了一个新会话。将OpenCV操作包装在单个会话中可以大大提高时间，但这仍然会在初始运行时增加大量开销；预测需要2-3秒。之后，预测时间约为0.5秒，这使得相机进给仍然滞后

import os
import cv2
import timeit
import numpy as np
import tensorflow as tf

camera = cv2.VideoCapture(0)

# Loads label file, strips off carriage return
label_lines = [line.rstrip() for line
               in tf.gfile.GFile('retrained_labels.txt')]

def grabVideoFeed():
    grabbed, frame = camera.read()
    return frame if grabbed else None

def initialSetup():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    start_time = timeit.default_timer()

    # This takes 2-5 seconds to run
    # Unpersists graph from file
    with tf.gfile.FastGFile('retrained_graph.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        tf.import_graph_def(graph_def, name='')

    print 'Took {} seconds to unpersist the graph'.format(timeit.default_timer() - start_time)

initialSetup()

with tf.Session() as sess:
    start_time = timeit.default_timer()

    # Feed the image_data as input to the graph and get first prediction
    softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')

    print 'Took {} seconds to feed data to graph'.format(timeit.default_timer() - start_time)

    while True:
        frame = grabVideoFeed()

        if frame is None:
            raise SystemError('Issue grabbing the frame')

        frame = cv2.resize(frame, (299, 299), interpolation=cv2.INTER_CUBIC)

        cv2.imshow('Main', frame)

        # adhere to TS graph input structure
        numpy_frame = np.asarray(frame)
        numpy_frame = cv2.normalize(numpy_frame.astype('float'), None, -0.5, .5, cv2.NORM_MINMAX)
        numpy_final = np.expand_dims(numpy_frame, axis=0)

        start_time = timeit.default_timer()

        # This takes 2-5 seconds as well
        predictions = sess.run(softmax_tensor, {'Mul:0': numpy_final})

        print 'Took {} seconds to perform prediction'.format(timeit.default_timer() - start_time)

        start_time = timeit.default_timer()

        # Sort to show labels of first prediction in order of confidence
        top_k = predictions[0].argsort()[-len(predictions[0]):][::-1]

        print 'Took {} seconds to sort the predictions'.format(timeit.default_timer() - start_time)

        for node_id in top_k:
            human_string = label_lines[node_id]
            score = predictions[0][node_id]
            print('%s (score = %.5f)' % (human_string, score))

        print '********* Session Ended *********'

        if cv2.waitKey(1) & 0xFF == ord('q'):
            sess.close()
            break

camera.release()
cv2.destroyAllWindows()

编辑2

在胡乱摆弄之后，我无意中发现了和，这些就是我得到的结果

原始图形：87.4MB

量化图形：87.5MB

转换图：87.1MB

8位计算：22MB，但在使用时遇到问题。

我最近添加了一个选项，使用TensorFlow for Poets来训练较小的Mobilenet模型：

这可能有助于加快分类速度，但要牺牲一定的准确性。

请参阅：@Ruut我在6月23日读过这篇文章；事实上，第二天它就出来了。他使用多线程来加速I/O操作。我一直想在我的项目中尝试一下，但实际预测仍然需要0.4-0.8秒，我认为这是因为我的巨大模型。我仍在寻找缩小规模的方法Hello，我正试图实现与您希望的几乎相同的目标：通过OpenCV

Videocapture

实现实时Tensorflow分类。预测持续0.4秒。我安装了支持GPU的Tensorflow。请问您是否正在使用CPU/GPU构建

（2）

您是否有一个解决方案可以实现更快的速度？谢谢你的建议。谢谢。@KeithOYS我没有用GPU。这背后的原因是因为我要把我的模型移植到手机上。2：我在尝试运行量化图时仍然会遇到上面提到的错误。I/O当前在主线程上运行，所以考虑将它移到单独的线程上。这将有助于加快速度。Kethoys关于加速实际模型，考虑重新训练它，以感谢美国。我会把它转一转，让你知道。。还有一件事，我在玩弄对象检测api，并设法在自己的数据集上对其进行本地训练。既然你是一名谷歌工程师，我想让你接受我的观点，以及我下一次可以做些什么来改进它。非常感谢你的帮助！你做到实时了吗？即使在美国，我也面临着同样的问题desktop@HaraHaraMahadevaki我已经有一段时间没做这件事了。我建议你在移动网络上重新训练它。你能给我这样做的链接吗？顺便说一句，试着用这种方法。它大大提高了性能