Warning: file_get_contents(/data/phpspider/zhask/data//catemap/6/cplusplus/162.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
从Python访问OpenCV CUDA函数(无PyCUDA)_Python_C++_Opencv_Cuda - Fatal编程技术网

从Python访问OpenCV CUDA函数(无PyCUDA)

从Python访问OpenCV CUDA函数(无PyCUDA),python,c++,opencv,cuda,Python,C++,Opencv,Cuda,我正在编写一个Python应用程序,它使用OpenCV的Python绑定来进行标记检测和其他图像处理。我想使用OpenCV的CUDA模块来加速我的应用程序的某些部分,并在它们的.hpp文件中注意到,它们似乎在使用针对Python和Java的OpenCV导出宏。然而,我似乎无法访问这些CUDA函数,即使我正在使用_CUDA=ON构建OpenCV 为了访问GPU函数(如cudaarithm中的阈值),是否需要使用PyCUDA之类的包装器?或者,如果我在Python代码中调用cv2.threshold

我正在编写一个Python应用程序,它使用OpenCV的Python绑定来进行标记检测和其他图像处理。我想使用OpenCV的CUDA模块来加速我的应用程序的某些部分,并在它们的
.hpp
文件中注意到,它们似乎在使用针对Python和Java的OpenCV导出宏。然而,我似乎无法访问这些CUDA函数,即使我正在使用_CUDA=ON构建OpenCV

为了访问GPU函数(如cudaarithm中的阈值),是否需要使用PyCUDA之类的包装器?或者,如果我在Python代码中调用cv2.threshold()(而不是常规的基于CPU的实现),这些CUDA加速函数是否已经在使用

我看到的cv2的子模块如下所示:

  • 错误
  • 阿鲁科
  • 细部
  • 鱼眼
  • 法兰
  • 仪表
  • 毫升
  • ocl
  • ogl
  • 录像针
cv2.cuda
cv2.gpu
cv2.cudaarithm
都返回一个
AttributeError

我为构建OpenCV而运行的CMake指令如下所示:

cmake -DOPENCV_EXTRA_MODULES_PATH=/usr/local/lib/opencv_contrib/modules/ \
    -D WITH_CUDA=ON -D CUDA_FAST_MATH=1 \
    -D ENABLE_PRECOMPILED_HEADERS=OFF \
    -D BUILD_TESTS=OFF -D BUILD_PERF_TESTS=OFF -D BUILD_EXAMPLES=OFF \
    -D BUILD_opencv_java=OFF \
    -DBUILD_opencv_bgsegm=OFF -DBUILD_opencv_bioinspired=OFF -DBUILD_opencv_ccalib=OFF -DBUILD_opencv_cnn_3dobj=OFF -DBUILD_opencv_contrib_world=OFF -DBUILD_opencv_cvv=OFF -DBUILD_opencv_datasets=OFF -DBUILD_openc
v_dnn=OFF -DBUILD_opencv_dnns_easily_fooled=OFF -DBUILD_opencv_dpm=OFF -DBUILD_opencv_face=OFF -DBUILD_opencv_fuzzy=OFF -DBUILD_opencv_hdf=OFF -DBUILD_opencv_line_descriptor=OFF -DBUILD_opencv_matlab=OFF -DBUILD_o
pencv_optflow=OFF -DBUILD_opencv_plot=OFF -DBUILD_opencv_README.md=OFF -DBUILD_opencv_reg=OFF -DBUILD_opencv_rgbd=OFF -DBUILD_opencv_saliency=OFF -DBUILD_opencv_sfm=OFF -DBUILD_opencv_stereo=OFF -DBUILD_opencv_str
uctured_light=OFF -DBUILD_opencv_surface_matching=OFF -DBUILD_opencv_text=OFF -DBUILD_opencv_tracking=OFF -DBUILD_opencv_viz=OFF -DBUILD_opencv_xfeatures2d=OFF -DBUILD_opencv_ximgproc=OFF -DBUILD_opencv_xobjdetect
=OFF -DBUILD_opencv_xphoto=OFF ..
或者,如果我在Python代码中调用cv2.threshold()(而不是常规的基于CPU的实现),这些CUDA加速函数是否已经被使用

不,您必须从GPU加速模块显式调用它们。调用cv2.threshold()将只运行CPU版本

由于OpenCV的Python API围绕C++函数进行封装,检查C++ API通常会提供关于函数/模块在哪里的有用提示。 例如,通过transition guide,您可以看到从OpenCV 2.X到3.X所做的API更改。在这里,OpenCV 3.X上的GPU模块可以通过以前版本的cv2.cudacv2.GPU访问。3.X中的cuda模块分为几个小块:

  • cuda-cuda加速计算机视觉
  • cudaarithm-矩阵上的运算
  • cudabgsegm-背景分割
  • cudacodec-视频编码/解码
  • cudafeatures2d-特征检测和描述
  • cudafilters-图像过滤
  • cudaimgproc-图像处理
  • cudalegacy-传统支持
  • cudaoptflow-光流
  • 立体-立体对应
  • 图像扭曲
  • cudev-设备层

您应该在cv2中搜索这些模块。

因此在@NAmorim的回答和评论线程中确认,没有可访问的Python绑定到OpenCV的各种CUDA模块

通过使用Cython访问所需的CUDA函数,并实现必要的逻辑,将Python对象(主要是NumPy数组)转换为OpenCV C/C++对象,然后再转换回来,我能够绕过这个限制

工作代码 我首先编写了一个Cython定义文件,
GpuWrapper.pxd
。这个文件的目的是引用外部C/C++类和方法,比如我感兴趣的CUDA方法

from libcpp cimport bool
from cpython.ref cimport PyObject

# References PyObject to OpenCV object conversion code borrowed from OpenCV's own conversion file, cv2.cpp
cdef extern from 'pyopencv_converter.cpp':
    cdef PyObject* pyopencv_from(const Mat& m)
    cdef bool pyopencv_to(PyObject* o, Mat& m)

cdef extern from 'opencv2/imgproc.hpp' namespace 'cv':
    cdef enum InterpolationFlags:
        INTER_NEAREST = 0
    cdef enum ColorConversionCodes:
        COLOR_BGR2GRAY

cdef extern from 'opencv2/core/core.hpp':
    cdef int CV_8UC1
    cdef int CV_32FC1

cdef extern from 'opencv2/core/core.hpp' namespace 'cv':
    cdef cppclass Size_[T]:
        Size_() except +
        Size_(T width, T height) except +
        T width
        T height
    ctypedef Size_[int] Size2i
    ctypedef Size2i Size
    cdef cppclass Scalar[T]:
        Scalar() except +
        Scalar(T v0) except +

cdef extern from 'opencv2/core/core.hpp' namespace 'cv':
    cdef cppclass Mat:
        Mat() except +
        void create(int, int, int) except +
        void* data
        int rows
        int cols

cdef extern from 'opencv2/core/cuda.hpp' namespace 'cv::cuda':
    cdef cppclass GpuMat:
        GpuMat() except +
        void upload(Mat arr) except +
        void download(Mat dst) const
    cdef cppclass Stream:
        Stream() except +

cdef extern from 'opencv2/cudawarping.hpp' namespace 'cv::cuda':
    cdef void warpPerspective(GpuMat src, GpuMat dst, Mat M, Size dsize, int flags, int borderMode, Scalar borderValue, Stream& stream)
    # Function using default values
    cdef void warpPerspective(GpuMat src, GpuMat dst, Mat M, Size dsize, int flags)
我们还需要将Python对象转换为OpenCV对象的能力。我从OpenCV的
modules/python/src2/cv2.cpp
复制了前几百行代码。您可以在下面的附录中找到该代码

我们终于可以编写Cython包装器方法来调用OpenCV的CUDA函数了!这是Cython实现文件的一部分,
GpuWrapper.pyx

import numpy as np  # Import Python functions, attributes, submodules of numpy
cimport numpy as np  # Import numpy C/C++ API

def cudaWarpPerspectiveWrapper(np.ndarray[np.uint8_t, ndim=2] _src,
                               np.ndarray[np.float32_t, ndim=2] _M,
                               _size_tuple,
                               int _flags=INTER_NEAREST):
    # Create GPU/device InputArray for src
    cdef Mat src_mat
    cdef GpuMat src_gpu
    pyopencv_to(<PyObject*> _src, src_mat)
    src_gpu.upload(src_mat)

    # Create CPU/host InputArray for M
    cdef Mat M_mat = Mat()
    pyopencv_to(<PyObject*> _M, M_mat)

    # Create Size object from size tuple
    # Note that size/shape in Python is handled in row-major-order -- therefore, width is [1] and height is [0]
    cdef Size size = Size(<int> _size_tuple[1], <int> _size_tuple[0])

    # Create empty GPU/device OutputArray for dst
    cdef GpuMat dst_gpu = GpuMat()
    warpPerspective(src_gpu, dst_gpu, M_mat, size, INTER_NEAREST)

    # Get result of dst
    cdef Mat dst_host
    dst_gpu.download(dst_host)
    cdef np.ndarray out = <np.ndarray> pyopencv_from(dst_host)
    return out
setupGpuWrapper.py

import subprocess
import os
import numpy as np
from distutils.core import setup, Extension
from Cython.Build import cythonize
from Cython.Distutils import build_ext

"""
Run setup with the following command:
```
python setupGpuWrapper.py build_ext --inplace
```
"""

# Determine current directory of this setup file to find our module
CUR_DIR = os.path.dirname(__file__)
# Use pkg-config to determine library locations and include locations
opencv_libs_str = subprocess.check_output("pkg-config --libs opencv".split()).decode()
opencv_incs_str = subprocess.check_output("pkg-config --cflags opencv".split()).decode()
# Parse into usable format for Extension call
opencv_libs = [str(lib) for lib in opencv_libs_str.strip().split()]
opencv_incs = [str(inc) for inc in opencv_incs_str.strip().split()]

extensions = [
    Extension('GpuWrapper',
              sources=[os.path.join(CUR_DIR, 'GpuWrapper.pyx')],
              language='c++',
              include_dirs=[np.get_include()] + opencv_incs,
              extra_link_args=opencv_libs)
]

setup(
    cmdclass={'build_ext': build_ext},
    name="GpuWrapper",
    ext_modules=cythonize(extensions)
)

我使用以下方法访问OpenCV在Python中的C++ CUDA方法:

  • 创建自定义opencv_contrib模块
  • 编写C++代码包OPENCV CUDA方法
  • 使用OpenCV python绑定,公开自定义方法
  • 使用opencv_contrib构建opencv
  • 运行python代码进行测试

  • 我创建了一个小程序来演示相同的

    我使用OpenCV 4.0.0对此进行了一些测试@nchaumont提到,从OpenCV 4开始,就包含了CUDA的Python绑定

    至少从OpenCV 4.1.0(可能更早)开始,默认Python绑定包括CUDA,前提是OpenCV是使用CUDA支持构建的

    大多数功能都显示为
    cv2.cuda.thing
    (例如,
    cv2.cuda.cvtColor()


    目前,他们缺乏任何在线文档,例如,没有提到Python。你可以在python的RePL上使用<代码>帮助> /Cuff>函数来查看C++ C++文档,但应该是等价的。

    正如你注意到的,OpenCV有自己的Python绑定到C++函数。据我所知,你不需要皮库达。您使用的是哪个版本的OpenCV?访问OpenCV Cuda函数应该是直截了当的。嘿@NAmorim,感谢您的评论!我正在使用OpenCV 3.2.0-dev。但是,当我加载cv2可用的模块时,我没有看到CUDA的子模块(请参阅更新的问题)。在Python中是否已经替换了具有CUDA加速对应项的函数。那么,从OpenCV 4开始,到CUDA加速代码的Python绑定应该可以工作。下面是一篇关于如何实现它的帖子:不幸的是,我找不到这些模块中任何一个看起来应该存在的模块:
    >>cv2.cuda回溯(最近一次调用):文件“”,第1行,在AttributeError中:“module”对象没有属性“cuda”>>cv2.gpu回溯(最近一次调用):文件“”,第1行,在AttributeError中:“模块”对象没有属性“gpu”>>cv2.cudaarithm回溯(最近一次调用):文件“”,第1行,在AttributeError中:“模块”对象没有属性“cudaarithm”
    您是否同时让库工作?您是否尝试过检查构建OpenCV+Cuda是否成功?例如,如果在python中运行print cv2.getBuildInformation(),则应该获得所有cmake
    #include <Python.h>
    #include "numpy/ndarrayobject.h"
    #include "opencv2/core/core.hpp"
    
    static PyObject* opencv_error = 0;
    
    // === FAIL MESSAGE ====================================================================================================
    
    static int failmsg(const char *fmt, ...)
    {
        char str[1000];
    
        va_list ap;
        va_start(ap, fmt);
        vsnprintf(str, sizeof(str), fmt, ap);
        va_end(ap);
    
        PyErr_SetString(PyExc_TypeError, str);
        return 0;
    }
    
    struct ArgInfo
    {
        const char * name;
        bool outputarg;
        // more fields may be added if necessary
    
        ArgInfo(const char * name_, bool outputarg_)
            : name(name_)
            , outputarg(outputarg_) {}
    
        // to match with older pyopencv_to function signature
        operator const char *() const { return name; }
    };
    
    // === THREADING =======================================================================================================
    
    class PyAllowThreads
    {
    public:
        PyAllowThreads() : _state(PyEval_SaveThread()) {}
        ~PyAllowThreads()
        {
            PyEval_RestoreThread(_state);
        }
    private:
        PyThreadState* _state;
    };
    
    class PyEnsureGIL
    {
    public:
        PyEnsureGIL() : _state(PyGILState_Ensure()) {}
        ~PyEnsureGIL()
        {
            PyGILState_Release(_state);
        }
    private:
        PyGILState_STATE _state;
    };
    
    // === ERROR HANDLING ==================================================================================================
    
    #define ERRWRAP2(expr) \
    try \
    { \
        PyAllowThreads allowThreads; \
        expr; \
    } \
    catch (const cv::Exception &e) \
    { \
        PyErr_SetString(opencv_error, e.what()); \
        return 0; \
    }
    
    // === USING NAMESPACE CV ==============================================================================================
    
    using namespace cv;
    
    // === NUMPY ALLOCATOR =================================================================================================
    
    class NumpyAllocator : public MatAllocator
    {
    public:
        NumpyAllocator() { stdAllocator = Mat::getStdAllocator(); }
        ~NumpyAllocator() {}
    
        UMatData* allocate(PyObject* o, int dims, const int* sizes, int type, size_t* step) const
        {
            UMatData* u = new UMatData(this);
            u->data = u->origdata = (uchar*)PyArray_DATA((PyArrayObject*) o);
            npy_intp* _strides = PyArray_STRIDES((PyArrayObject*) o);
            for( int i = 0; i < dims - 1; i++ )
                step[i] = (size_t)_strides[i];
            step[dims-1] = CV_ELEM_SIZE(type);
            u->size = sizes[0]*step[0];
            u->userdata = o;
            return u;
        }
    
        UMatData* allocate(int dims0, const int* sizes, int type, void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const
        {
            if( data != 0 )
            {
                CV_Error(Error::StsAssert, "The data should normally be NULL!");
                // probably this is safe to do in such extreme case
                return stdAllocator->allocate(dims0, sizes, type, data, step, flags, usageFlags);
            }
            PyEnsureGIL gil;
    
            int depth = CV_MAT_DEPTH(type);
            int cn = CV_MAT_CN(type);
            const int f = (int)(sizeof(size_t)/8);
            int typenum = depth == CV_8U ? NPY_UBYTE : depth == CV_8S ? NPY_BYTE :
                          depth == CV_16U ? NPY_USHORT : depth == CV_16S ? NPY_SHORT :
                          depth == CV_32S ? NPY_INT : depth == CV_32F ? NPY_FLOAT :
                          depth == CV_64F ? NPY_DOUBLE : f*NPY_ULONGLONG + (f^1)*NPY_UINT;
            int i, dims = dims0;
            cv::AutoBuffer<npy_intp> _sizes(dims + 1);
            for( i = 0; i < dims; i++ )
                _sizes[i] = sizes[i];
            if( cn > 1 )
                _sizes[dims++] = cn;
            PyObject* o = PyArray_SimpleNew(dims, _sizes, typenum);
            if(!o)
                CV_Error_(Error::StsError, ("The numpy array of typenum=%d, ndims=%d can not be created", typenum, dims));
            return allocate(o, dims0, sizes, type, step);
        }
    
        bool allocate(UMatData* u, int accessFlags, UMatUsageFlags usageFlags) const
        {
            return stdAllocator->allocate(u, accessFlags, usageFlags);
        }
    
        void deallocate(UMatData* u) const
        {
            if(!u)
                return;
            PyEnsureGIL gil;
            CV_Assert(u->urefcount >= 0);
            CV_Assert(u->refcount >= 0);
            if(u->refcount == 0)
            {
                PyObject* o = (PyObject*)u->userdata;
                Py_XDECREF(o);
                delete u;
            }
        }
    
        const MatAllocator* stdAllocator;
    };
    
    // === ALLOCATOR INITIALIZATION ========================================================================================
    
    NumpyAllocator g_numpyAllocator;
    
    // === CONVERTOR FUNCTIONS =============================================================================================
    
    template<typename T> static
    bool pyopencv_to(PyObject* obj, T& p, const char* name = "<unknown>");
    
    template<typename T> static
    PyObject* pyopencv_from(const T& src);
    
    enum { ARG_NONE = 0, ARG_MAT = 1, ARG_SCALAR = 2 };
    
    // special case, when the convertor needs full ArgInfo structure
    static bool pyopencv_to(PyObject* o, Mat& m, const ArgInfo info)
    {
        bool allowND = true;
        if(!o || o == Py_None)
        {
            if( !m.data )
                m.allocator = &g_numpyAllocator;
            return true;
        }
    
        if( PyInt_Check(o) )
        {
            double v[] = {static_cast<double>(PyInt_AsLong((PyObject*)o)), 0., 0., 0.};
            m = Mat(4, 1, CV_64F, v).clone();
            return true;
        }
        if( PyFloat_Check(o) )
        {
            double v[] = {PyFloat_AsDouble((PyObject*)o), 0., 0., 0.};
            m = Mat(4, 1, CV_64F, v).clone();
            return true;
        }
        if( PyTuple_Check(o) )
        {
            int i, sz = (int)PyTuple_Size((PyObject*)o);
            m = Mat(sz, 1, CV_64F);
            for( i = 0; i < sz; i++ )
            {
                PyObject* oi = PyTuple_GET_ITEM(o, i);
                if( PyInt_Check(oi) )
                    m.at<double>(i) = (double)PyInt_AsLong(oi);
                else if( PyFloat_Check(oi) )
                    m.at<double>(i) = (double)PyFloat_AsDouble(oi);
                else
                {
                    failmsg("%s is not a numerical tuple", info.name);
                    m.release();
                    return false;
                }
            }
            return true;
        }
    
        if( !PyArray_Check(o) )
        {
            failmsg("%s is not a numpy array, neither a scalar", info.name);
            return false;
        }
    
        PyArrayObject* oarr = (PyArrayObject*) o;
    
        bool needcopy = false, needcast = false;
        int typenum = PyArray_TYPE(oarr), new_typenum = typenum;
        int type = typenum == NPY_UBYTE ? CV_8U :
                   typenum == NPY_BYTE ? CV_8S :
                   typenum == NPY_USHORT ? CV_16U :
                   typenum == NPY_SHORT ? CV_16S :
                   typenum == NPY_INT ? CV_32S :
                   typenum == NPY_INT32 ? CV_32S :
                   typenum == NPY_FLOAT ? CV_32F :
                   typenum == NPY_DOUBLE ? CV_64F : -1;
    
        if( type < 0 )
        {
            if( typenum == NPY_INT64 || typenum == NPY_UINT64 || typenum == NPY_LONG )
            {
                needcopy = needcast = true;
                new_typenum = NPY_INT;
                type = CV_32S;
            }
            else
            {
                failmsg("%s data type = %d is not supported", info.name, typenum);
                return false;
            }
        }
    
    #ifndef CV_MAX_DIM
        const int CV_MAX_DIM = 32;
    #endif
    
        int ndims = PyArray_NDIM(oarr);
        if(ndims >= CV_MAX_DIM)
        {
            failmsg("%s dimensionality (=%d) is too high", info.name, ndims);
            return false;
        }
    
        int size[CV_MAX_DIM+1];
        size_t step[CV_MAX_DIM+1];
        size_t elemsize = CV_ELEM_SIZE1(type);
        const npy_intp* _sizes = PyArray_DIMS(oarr);
        const npy_intp* _strides = PyArray_STRIDES(oarr);
        bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX;
    
        for( int i = ndims-1; i >= 0 && !needcopy; i-- )
        {
            // these checks handle cases of
            //  a) multi-dimensional (ndims > 2) arrays, as well as simpler 1- and 2-dimensional cases
            //  b) transposed arrays, where _strides[] elements go in non-descending order
            //  c) flipped arrays, where some of _strides[] elements are negative
            // the _sizes[i] > 1 is needed to avoid spurious copies when NPY_RELAXED_STRIDES is set
            if( (i == ndims-1 && _sizes[i] > 1 && (size_t)_strides[i] != elemsize) ||
                (i < ndims-1 && _sizes[i] > 1 && _strides[i] < _strides[i+1]) )
                needcopy = true;
        }
    
        if( ismultichannel && _strides[1] != (npy_intp)elemsize*_sizes[2] )
            needcopy = true;
    
        if (needcopy)
        {
            if (info.outputarg)
            {
                failmsg("Layout of the output array %s is incompatible with cv::Mat (step[ndims-1] != elemsize or step[1] != elemsize*nchannels)", info.name);
                return false;
            }
    
            if( needcast ) {
                o = PyArray_Cast(oarr, new_typenum);
                oarr = (PyArrayObject*) o;
            }
            else {
                oarr = PyArray_GETCONTIGUOUS(oarr);
                o = (PyObject*) oarr;
            }
    
            _strides = PyArray_STRIDES(oarr);
        }
    
        // Normalize strides in case NPY_RELAXED_STRIDES is set
        size_t default_step = elemsize;
        for ( int i = ndims - 1; i >= 0; --i )
        {
            size[i] = (int)_sizes[i];
            if ( size[i] > 1 )
            {
                step[i] = (size_t)_strides[i];
                default_step = step[i] * size[i];
            }
            else
            {
                step[i] = default_step;
                default_step *= size[i];
            }
        }
    
        // handle degenerate case
        if( ndims == 0) {
            size[ndims] = 1;
            step[ndims] = elemsize;
            ndims++;
        }
    
        if( ismultichannel )
        {
            ndims--;
            type |= CV_MAKETYPE(0, size[2]);
        }
    
        if( ndims > 2 && !allowND )
        {
            failmsg("%s has more than 2 dimensions", info.name);
            return false;
        }
    
        m = Mat(ndims, size, type, PyArray_DATA(oarr), step);
        m.u = g_numpyAllocator.allocate(o, ndims, size, type, step);
        m.addref();
    
        if( !needcopy )
        {
            Py_INCREF(o);
        }
        m.allocator = &g_numpyAllocator;
    
        return true;
    }
    
    template<>
    bool pyopencv_to(PyObject* o, Mat& m, const char* name)
    {
        return pyopencv_to(o, m, ArgInfo(name, 0));
    }
    
    template<>
    PyObject* pyopencv_from(const Mat& m)
    {
        if( !m.data )
            Py_RETURN_NONE;
        Mat temp, *p = (Mat*)&m;
        if(!p->u || p->allocator != &g_numpyAllocator)
        {
            temp.allocator = &g_numpyAllocator;
            ERRWRAP2(m.copyTo(temp));
            p = &temp;
        }
        PyObject* o = (PyObject*)p->u->userdata;
        Py_INCREF(o);
        return o;
    }
    
    import subprocess
    import os
    import numpy as np
    from distutils.core import setup, Extension
    from Cython.Build import cythonize
    from Cython.Distutils import build_ext
    
    """
    Run setup with the following command:
    ```
    python setupGpuWrapper.py build_ext --inplace
    ```
    """
    
    # Determine current directory of this setup file to find our module
    CUR_DIR = os.path.dirname(__file__)
    # Use pkg-config to determine library locations and include locations
    opencv_libs_str = subprocess.check_output("pkg-config --libs opencv".split()).decode()
    opencv_incs_str = subprocess.check_output("pkg-config --cflags opencv".split()).decode()
    # Parse into usable format for Extension call
    opencv_libs = [str(lib) for lib in opencv_libs_str.strip().split()]
    opencv_incs = [str(inc) for inc in opencv_incs_str.strip().split()]
    
    extensions = [
        Extension('GpuWrapper',
                  sources=[os.path.join(CUR_DIR, 'GpuWrapper.pyx')],
                  language='c++',
                  include_dirs=[np.get_include()] + opencv_incs,
                  extra_link_args=opencv_libs)
    ]
    
    setup(
        cmdclass={'build_ext': build_ext},
        name="GpuWrapper",
        ext_modules=cythonize(extensions)
    )