Python Numbapro是否支持Maxwell体系结构?

Python Numbapro是否支持Maxwell体系结构?,python,cuda,numba-pro,maxwell,Python,Cuda,Numba Pro,Maxwell,我想使用Numbapro API在python中执行CUDA内核。我有以下代码: import math import numpy from numbapro import jit, cuda, int32, float32 from matplotlib import pyplot @cuda.jit('void(float32[:], float32[:], float32[:], float32[:], float32, float32, float32, int32)') def ca

我想使用Numbapro API在python中执行CUDA内核。我有以下代码:

import math
import numpy
from numbapro import jit, cuda, int32, float32
from matplotlib import pyplot

@cuda.jit('void(float32[:], float32[:], float32[:], float32[:], float32, float32, float32, int32)')
def calculate_velocity_field(X, Y, u_source, v_source, x_source, y_source, strength_source, N):
    start  = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
    end    = N
    stride = cuda.gridDim.x * cuda.blockDim.x
    for i in range(start, end, stride):
        u_source[i] = strength_source/(2*math.pi) * (X[i]-x_source)/((X[i]-x_source)**2 + (Y[i]-y_source)**2)
        v_source[i] = strength_source/(2*math.pi) * (Y[i]-x_source)/((X[i]-x_source)**2 + (Y[i]-y_source)**2)


def main():
    N = 200                                # number of points in each direction
    x_start, x_end = -4.0, 4.0            # boundaries in the x-direction
    y_start, y_end = -2.0, 2.0            # boundaries in the y-direction
    x = numpy.linspace(x_start, x_end, N)    # creates a 1D-array with the x-coordinates
    y = numpy.linspace(y_start, y_end, N)    # creates a 1D-array with the y-coordinates

    X, Y = numpy.meshgrid(x, y)              # generates a mesh grid

    strength_source = 5.0                      # source strength
    x_source, y_source = -1.0, 0.0             # location of the source

    start = timer()

    #calculate grid dimensions
    blockSize = 1024
    gridSize  = int(math.ceil(float(N)/blockSize))

    #transfer memory to device
    X_d        = cuda.to_device(X)
    Y_d        = cuda.to_device(Y)
    u_source_d = cuda.device_array_like(X)
    v_source_d = cuda.device_array_like(Y)

    #launch kernel
    calculate_velocity_field[gridSize,blockSize](X_d,Y_d,u_source_d,v_source_d,x_source,y_source,strength_source,N)

    #transfer memory to host
    u_source = numpy.empty_like(X)
    v_source = numpy.empty_like(Y)
    u_source_d.to_host(u_source)
    v_source_d.to_host(v_source)

    elapsed_time = timer() - start
    print("Exec time with GPU %f s" % elapsed_time)

if __name__ == "__main__":
    main()
给我这个错误:

NvvmError                                 Traceback (most recent call last)
<ipython-input-17-85e4a6e56a14> in <module>()
----> 1 @cuda.jit('void(float32[:], float32[:], float32[:], float32[:], float32, float32, float32, int32)')
      2 def calculate_velocity_field(X, Y, u_source, v_source, x_source, y_source, strength_source, N):
      3     start  = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
      4     end    = N
      5     stride = cuda.gridDim.x * cuda.blockDim.x

~/.anaconda3/lib/python3.4/site-packages/numba/cuda/decorators.py in kernel_jit(func)
     89             # Force compilation for the current context
     90             if bind:
---> 91                 kernel.bind()
     92 
     93             return kernel

~/.anaconda3/lib/python3.4/site-packages/numba/cuda/compiler.py in bind(self)
    319         Force binding to current CUDA context
    320         """
--> 321         self._func.get()
    322 
    323     @property

~/.anaconda3/lib/python3.4/site-packages/numba/cuda/compiler.py in get(self)
    254         cufunc = self.cache.get(device.id)
    255         if cufunc is None:
--> 256             ptx = self.ptx.get()
    257 
    258             # Link

~/.anaconda3/lib/python3.4/site-packages/numba/cuda/compiler.py in get(self)
    226             arch = nvvm.get_arch_option(*cc)
    227             ptx = nvvm.llvm_to_ptx(self.llvmir, opt=3, arch=arch,
--> 228                                    **self._extra_options)
    229             self.cache[cc] = ptx
    230             if config.DUMP_ASSEMBLY:

~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py in llvm_to_ptx(llvmir, **opts)
    420     cu.add_module(llvmir.encode('utf8'))
    421     cu.add_module(libdevice.get())
--> 422     ptx = cu.compile(**opts)
    423     return ptx
    424 

~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py in compile(self, **options)
    211                                           for x in opts])
    212         err = self.driver.nvvmCompileProgram(self._handle, len(opts), c_opts)
--> 213         self._try_error(err, 'Failed to compile\n')
    214 
    215         # get result

~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py in _try_error(self, err, msg)
    229 
    230     def _try_error(self, err, msg):
--> 231         self.driver.check_error(err, "%s\n%s" % (msg, self.get_log()))
    232 
    233     def get_log(self):

~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py in check_error(self, error, msg, exit)
    118                 sys.exit(1)
    119             else:
--> 120                 raise exc
    121 
    122 

NvvmError: Failed to compile

libnvvm : error: -arch=compute_52 is an unsupported option
NVVM_ERROR_INVALID_OPTION
NvvmError回溯(最近一次调用)
在()
---->1@cuda.jit('void(float32[:],float32[:],float32[:],float32[:],float32,float32,float32,int32'))
2 def计算速度场(X,Y,u_源,v_源,X_源,Y_源,强度_源,N):
3 start=cuda.blockIdx.x*cuda.blockDim.x+cuda.threadIdx.x
4端=N
5步长=cuda.gridDim.x*cuda.blockDim.x
内核中的~/.anaconda3/lib/python3.4/site-packages/numba/cuda/decorators.py(func)
89#针对当前环境的强制汇编
90如果绑定:
--->91 kernel.bind()
92
93返回内核
绑定中的~/.anaconda3/lib/python3.4/site-packages/numba/cuda/compiler.py(self)
319强制绑定到当前CUDA上下文
320         """
-->321自我功能获取()
322
323@property
get(self)中的~/.anaconda3/lib/python3.4/site-packages/numba/cuda/compiler.py
254 cufunc=self.cache.get(device.id)
255如果cufunc为无:
-->256 ptx=self.ptx.get()
257
258链接
get(self)中的~/.anaconda3/lib/python3.4/site-packages/numba/cuda/compiler.py
226 arch=nvvm.get_arch_选项(*cc)
227 ptx=nvvm.llvm_to_ptx(self.llvmir,opt=3,arch=arch,
-->228**self.\u extra\u选项)
229自缓存[cc]=ptx
230如果config.DUMP_程序集:
llvm_to_ptx中的~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py(llvmir,**选项)
420 cu.add_模块(llvmir.encode('utf8'))
421 cu.add_模块(libdevice.get())
-->422 ptx=cu.compile(**选项)
423返回ptx
424
编译中的~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvm.py(self,**选项)
211用于选择中的x])
212 err=self.driver.nvvmCompileProgram(self.\u句柄、len(选项)、c\u选项)
-->213 self.\u try\u错误(错误“编译失败\n”)
214
215#获得结果
~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py in\u try\u error(self,err,msg)
229
230 def重试错误(self、err、msg):
-->231 self.driver.check_错误(错误,“%s\n%s”%(消息,self.get_log())
232
233 def获取日志(自身):
~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py检查错误(self,error,msg,exit)
118系统出口(1)
119其他:
-->120升exc
121
122
NvvmError:未能编译
libnvvm:错误:-arch=compute_52是不受支持的选项
NVVM\u错误\u无效\u选项
我尝试了另一个numbapro示例,同样的错误也发生了。 我不知道是Numbapro的bug不支持5.2计算功能,还是Nvidia NVVM的问题…建议

理论上,但我不知道发生了什么


我将Linux与CUDA 7.0和驱动程序版本346.29一起使用,终于找到了解决方案

  • 解决方案1:
康达更新cudatoolkit

Fetching package metadata: ....
# All requested packages already installed.
# packages in environment at ~/.anaconda3:
#
cudatoolkit               6.0                          p0
Fetching package metadata: ...... 
Solving package specifications: .
Package plan for installation in environment ~/.anaconda3:

The following packages will be downloaded:
     package                    |            build
    ---------------------------|-----------------
    cudatoolkit-7.0            |                1       190.8 MB

The following packages will be UPDATED:

    cudatoolkit: 6.0-p0 --> 7.0-1

Proceed ([y]/n)? y
看起来我更新CUDA工具包时没有更新到CUDA 7.0。可以执行第二个解决方案:

  • 解决方案2
conda安装-c numba cudatoolkit

Fetching package metadata: ....
# All requested packages already installed.
# packages in environment at ~/.anaconda3:
#
cudatoolkit               6.0                          p0
Fetching package metadata: ...... 
Solving package specifications: .
Package plan for installation in environment ~/.anaconda3:

The following packages will be downloaded:
     package                    |            build
    ---------------------------|-----------------
    cudatoolkit-7.0            |                1       190.8 MB

The following packages will be UPDATED:

    cudatoolkit: 6.0-p0 --> 7.0-1

Proceed ([y]/n)? y
之前:

In [4]: check_cuda()
------------------------------libraries detection-------------------------------
Finding cublas
    located at ~/.anaconda3/lib/libcublas.so.6.0.37
    trying to open library...   ok
Finding cusparse
    located at ~/.anaconda3/lib/libcusparse.so.6.0.37
    trying to open library...   ok
Finding cufft
    located at ~/.anaconda3/lib/libcufft.so.6.0.37
    trying to open library...   ok
Finding curand
    located at ~/.anaconda3/lib/libcurand.so.6.0.37
    trying to open library...   ok
Finding nvvm
    located at ~/.anaconda3/lib/libnvvm.so.2.0.0
    trying to open library...   ok
    finding libdevice for compute_20... ok
    finding libdevice for compute_30... ok
    finding libdevice for compute_35... ok
-------------------------------hardware detection-------------------------------
Found 1 CUDA devices
id 0      b'GeForce GTX 970'                              [SUPPORTED]
                      compute capability: 5.2
                           pci device id: 0
                              pci bus id: 7
Summary:
    1/1 devices are supported
PASSED
Out[4]: True
之后:

In [6]:  check_cuda()
------------------------------libraries detection-------------------------------
Finding cublas
    located at ~/.anaconda3/lib/libcublas.so.7.0.28
    trying to open library...   ok
Finding cusparse
    located at ~/.anaconda3/lib/libcusparse.so.7.0.28
    trying to open library...   ok
Finding cufft
    located at ~/.anaconda3/lib/libcufft.so.7.0.35
    trying to open library...   ok
Finding curand
    located at ~/.anaconda3/lib/libcurand.so.7.0.28
    trying to open library...   ok
Finding nvvm
    located at ~/.anaconda3/lib/libnvvm.so.3.0.0
    trying to open library...   ok
    finding libdevice for compute_20... ok
    finding libdevice for compute_30... ok
    finding libdevice for compute_35... ok
-------------------------------hardware detection-------------------------------
Found 1 CUDA devices
id 0      b'GeForce GTX 970'                              [SUPPORTED]
                      compute capability: 5.2
                           pci device id: 0
                              pci bus id: 7
Summary:
    1/1 devices are supported
PASSED
Out[6]:  True

您确定只安装了CUDA 7.0而没有安装其他CUDA工具包吗?或者Anaconda也没有安装自己版本的CUDA。CUDA 7.0发行版工具包支持Compute capability 5.2设备,但早期CUDA版本不支持。该错误表明您使用的是早期工具包。您可能需要与Continuum对话是的,我安装了CUDA 7.0。我怀疑Anaconda提供了他自己的CUDA版本。如果我执行“check_CUDA()“它显示的路径如下:~/.anaconda3/lib/libcublas.so.6.0.37好的,那么您实际上使用的是不支持Maxwell卡的CUDA 6。您可能需要与供应商讨论如何在产品中使用CUDA 7感谢您添加了一个解决方案。