Python Numbapro是否支持Maxwell体系结构?
我想使用Numbapro API在python中执行CUDA内核。我有以下代码:Python Numbapro是否支持Maxwell体系结构?,python,cuda,numba-pro,maxwell,Python,Cuda,Numba Pro,Maxwell,我想使用Numbapro API在python中执行CUDA内核。我有以下代码: import math import numpy from numbapro import jit, cuda, int32, float32 from matplotlib import pyplot @cuda.jit('void(float32[:], float32[:], float32[:], float32[:], float32, float32, float32, int32)') def ca
import math
import numpy
from numbapro import jit, cuda, int32, float32
from matplotlib import pyplot
@cuda.jit('void(float32[:], float32[:], float32[:], float32[:], float32, float32, float32, int32)')
def calculate_velocity_field(X, Y, u_source, v_source, x_source, y_source, strength_source, N):
start = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
end = N
stride = cuda.gridDim.x * cuda.blockDim.x
for i in range(start, end, stride):
u_source[i] = strength_source/(2*math.pi) * (X[i]-x_source)/((X[i]-x_source)**2 + (Y[i]-y_source)**2)
v_source[i] = strength_source/(2*math.pi) * (Y[i]-x_source)/((X[i]-x_source)**2 + (Y[i]-y_source)**2)
def main():
N = 200 # number of points in each direction
x_start, x_end = -4.0, 4.0 # boundaries in the x-direction
y_start, y_end = -2.0, 2.0 # boundaries in the y-direction
x = numpy.linspace(x_start, x_end, N) # creates a 1D-array with the x-coordinates
y = numpy.linspace(y_start, y_end, N) # creates a 1D-array with the y-coordinates
X, Y = numpy.meshgrid(x, y) # generates a mesh grid
strength_source = 5.0 # source strength
x_source, y_source = -1.0, 0.0 # location of the source
start = timer()
#calculate grid dimensions
blockSize = 1024
gridSize = int(math.ceil(float(N)/blockSize))
#transfer memory to device
X_d = cuda.to_device(X)
Y_d = cuda.to_device(Y)
u_source_d = cuda.device_array_like(X)
v_source_d = cuda.device_array_like(Y)
#launch kernel
calculate_velocity_field[gridSize,blockSize](X_d,Y_d,u_source_d,v_source_d,x_source,y_source,strength_source,N)
#transfer memory to host
u_source = numpy.empty_like(X)
v_source = numpy.empty_like(Y)
u_source_d.to_host(u_source)
v_source_d.to_host(v_source)
elapsed_time = timer() - start
print("Exec time with GPU %f s" % elapsed_time)
if __name__ == "__main__":
main()
给我这个错误:
NvvmError Traceback (most recent call last)
<ipython-input-17-85e4a6e56a14> in <module>()
----> 1 @cuda.jit('void(float32[:], float32[:], float32[:], float32[:], float32, float32, float32, int32)')
2 def calculate_velocity_field(X, Y, u_source, v_source, x_source, y_source, strength_source, N):
3 start = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
4 end = N
5 stride = cuda.gridDim.x * cuda.blockDim.x
~/.anaconda3/lib/python3.4/site-packages/numba/cuda/decorators.py in kernel_jit(func)
89 # Force compilation for the current context
90 if bind:
---> 91 kernel.bind()
92
93 return kernel
~/.anaconda3/lib/python3.4/site-packages/numba/cuda/compiler.py in bind(self)
319 Force binding to current CUDA context
320 """
--> 321 self._func.get()
322
323 @property
~/.anaconda3/lib/python3.4/site-packages/numba/cuda/compiler.py in get(self)
254 cufunc = self.cache.get(device.id)
255 if cufunc is None:
--> 256 ptx = self.ptx.get()
257
258 # Link
~/.anaconda3/lib/python3.4/site-packages/numba/cuda/compiler.py in get(self)
226 arch = nvvm.get_arch_option(*cc)
227 ptx = nvvm.llvm_to_ptx(self.llvmir, opt=3, arch=arch,
--> 228 **self._extra_options)
229 self.cache[cc] = ptx
230 if config.DUMP_ASSEMBLY:
~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py in llvm_to_ptx(llvmir, **opts)
420 cu.add_module(llvmir.encode('utf8'))
421 cu.add_module(libdevice.get())
--> 422 ptx = cu.compile(**opts)
423 return ptx
424
~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py in compile(self, **options)
211 for x in opts])
212 err = self.driver.nvvmCompileProgram(self._handle, len(opts), c_opts)
--> 213 self._try_error(err, 'Failed to compile\n')
214
215 # get result
~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py in _try_error(self, err, msg)
229
230 def _try_error(self, err, msg):
--> 231 self.driver.check_error(err, "%s\n%s" % (msg, self.get_log()))
232
233 def get_log(self):
~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py in check_error(self, error, msg, exit)
118 sys.exit(1)
119 else:
--> 120 raise exc
121
122
NvvmError: Failed to compile
libnvvm : error: -arch=compute_52 is an unsupported option
NVVM_ERROR_INVALID_OPTION
NvvmError回溯(最近一次调用)
在()
---->1@cuda.jit('void(float32[:],float32[:],float32[:],float32[:],float32,float32,float32,int32'))
2 def计算速度场(X,Y,u_源,v_源,X_源,Y_源,强度_源,N):
3 start=cuda.blockIdx.x*cuda.blockDim.x+cuda.threadIdx.x
4端=N
5步长=cuda.gridDim.x*cuda.blockDim.x
内核中的~/.anaconda3/lib/python3.4/site-packages/numba/cuda/decorators.py(func)
89#针对当前环境的强制汇编
90如果绑定:
--->91 kernel.bind()
92
93返回内核
绑定中的~/.anaconda3/lib/python3.4/site-packages/numba/cuda/compiler.py(self)
319强制绑定到当前CUDA上下文
320 """
-->321自我功能获取()
322
323@property
get(self)中的~/.anaconda3/lib/python3.4/site-packages/numba/cuda/compiler.py
254 cufunc=self.cache.get(device.id)
255如果cufunc为无:
-->256 ptx=self.ptx.get()
257
258链接
get(self)中的~/.anaconda3/lib/python3.4/site-packages/numba/cuda/compiler.py
226 arch=nvvm.get_arch_选项(*cc)
227 ptx=nvvm.llvm_to_ptx(self.llvmir,opt=3,arch=arch,
-->228**self.\u extra\u选项)
229自缓存[cc]=ptx
230如果config.DUMP_程序集:
llvm_to_ptx中的~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py(llvmir,**选项)
420 cu.add_模块(llvmir.encode('utf8'))
421 cu.add_模块(libdevice.get())
-->422 ptx=cu.compile(**选项)
423返回ptx
424
编译中的~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvm.py(self,**选项)
211用于选择中的x])
212 err=self.driver.nvvmCompileProgram(self.\u句柄、len(选项)、c\u选项)
-->213 self.\u try\u错误(错误“编译失败\n”)
214
215#获得结果
~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py in\u try\u error(self,err,msg)
229
230 def重试错误(self、err、msg):
-->231 self.driver.check_错误(错误,“%s\n%s”%(消息,self.get_log())
232
233 def获取日志(自身):
~/.anaconda3/lib/python3.4/site-packages/numba/cuda/cudadrv/nvvm.py检查错误(self,error,msg,exit)
118系统出口(1)
119其他:
-->120升exc
121
122
NvvmError:未能编译
libnvvm:错误:-arch=compute_52是不受支持的选项
NVVM\u错误\u无效\u选项
我尝试了另一个numbapro示例,同样的错误也发生了。
我不知道是Numbapro的bug不支持5.2计算功能,还是Nvidia NVVM的问题…建议
理论上,但我不知道发生了什么
我将Linux与CUDA 7.0和驱动程序版本346.29一起使用,终于找到了解决方案
- 解决方案1:
Fetching package metadata: ....
# All requested packages already installed.
# packages in environment at ~/.anaconda3:
#
cudatoolkit 6.0 p0
Fetching package metadata: ......
Solving package specifications: .
Package plan for installation in environment ~/.anaconda3:
The following packages will be downloaded:
package | build
---------------------------|-----------------
cudatoolkit-7.0 | 1 190.8 MB
The following packages will be UPDATED:
cudatoolkit: 6.0-p0 --> 7.0-1
Proceed ([y]/n)? y
看起来我更新CUDA工具包时没有更新到CUDA 7.0。可以执行第二个解决方案:
- 解决方案2
Fetching package metadata: ....
# All requested packages already installed.
# packages in environment at ~/.anaconda3:
#
cudatoolkit 6.0 p0
Fetching package metadata: ......
Solving package specifications: .
Package plan for installation in environment ~/.anaconda3:
The following packages will be downloaded:
package | build
---------------------------|-----------------
cudatoolkit-7.0 | 1 190.8 MB
The following packages will be UPDATED:
cudatoolkit: 6.0-p0 --> 7.0-1
Proceed ([y]/n)? y
之前:
In [4]: check_cuda()
------------------------------libraries detection-------------------------------
Finding cublas
located at ~/.anaconda3/lib/libcublas.so.6.0.37
trying to open library... ok
Finding cusparse
located at ~/.anaconda3/lib/libcusparse.so.6.0.37
trying to open library... ok
Finding cufft
located at ~/.anaconda3/lib/libcufft.so.6.0.37
trying to open library... ok
Finding curand
located at ~/.anaconda3/lib/libcurand.so.6.0.37
trying to open library... ok
Finding nvvm
located at ~/.anaconda3/lib/libnvvm.so.2.0.0
trying to open library... ok
finding libdevice for compute_20... ok
finding libdevice for compute_30... ok
finding libdevice for compute_35... ok
-------------------------------hardware detection-------------------------------
Found 1 CUDA devices
id 0 b'GeForce GTX 970' [SUPPORTED]
compute capability: 5.2
pci device id: 0
pci bus id: 7
Summary:
1/1 devices are supported
PASSED
Out[4]: True
之后:
In [6]: check_cuda()
------------------------------libraries detection-------------------------------
Finding cublas
located at ~/.anaconda3/lib/libcublas.so.7.0.28
trying to open library... ok
Finding cusparse
located at ~/.anaconda3/lib/libcusparse.so.7.0.28
trying to open library... ok
Finding cufft
located at ~/.anaconda3/lib/libcufft.so.7.0.35
trying to open library... ok
Finding curand
located at ~/.anaconda3/lib/libcurand.so.7.0.28
trying to open library... ok
Finding nvvm
located at ~/.anaconda3/lib/libnvvm.so.3.0.0
trying to open library... ok
finding libdevice for compute_20... ok
finding libdevice for compute_30... ok
finding libdevice for compute_35... ok
-------------------------------hardware detection-------------------------------
Found 1 CUDA devices
id 0 b'GeForce GTX 970' [SUPPORTED]
compute capability: 5.2
pci device id: 0
pci bus id: 7
Summary:
1/1 devices are supported
PASSED
Out[6]: True
您确定只安装了CUDA 7.0而没有安装其他CUDA工具包吗?或者Anaconda也没有安装自己版本的CUDA。CUDA 7.0发行版工具包支持Compute capability 5.2设备,但早期CUDA版本不支持。该错误表明您使用的是早期工具包。您可能需要与Continuum对话是的,我安装了CUDA 7.0。我怀疑Anaconda提供了他自己的CUDA版本。如果我执行“check_CUDA()“它显示的路径如下:~/.anaconda3/lib/libcublas.so.6.0.37好的,那么您实际上使用的是不支持Maxwell卡的CUDA 6。您可能需要与供应商讨论如何在产品中使用CUDA 7感谢您添加了一个解决方案。