C++ 调试CUDA-CUDAUNKNOWN错误

C++ 调试CUDA-CUDAUNKNOWN错误,c++,error-handling,cuda,C++,Error Handling,Cuda,我正在尝试使用CUDA创建mandlebrot集合的位图图像。我看了一些教程,在这里已经得到了一些帮助,帮助我将非托管CUDA dll与托管C#gui集成。我现在遇到的问题是,我的CUDA dll没有正确地形成位图-当我在内核启动后在cudaDeviceSynchronize()上使用错误检查宏时,我得到了CUDAUnknowError 以下是相关代码: #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline

我正在尝试使用CUDA创建mandlebrot集合的位图图像。我看了一些教程,在这里已经得到了一些帮助,帮助我将非托管CUDA dll与托管C#gui集成。我现在遇到的问题是,我的CUDA dll没有正确地形成位图-当我在内核启动后在cudaDeviceSynchronize()上使用错误检查宏时,我得到了CUDAUnknowError

以下是相关代码:

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
   if (code != cudaSuccess) 
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
}



struct complex
{
    float r, i;

    __device__ complex(float _r, float _i) : r(_r), i(_i) {}

    __device__ float magnitudeSquared(){ return (r*r + i*i) ; }

    __device__ complex& operator*=(const complex& rhs)
    {
        r = (r * rhs.r - i * rhs.i);
        i = (r * rhs.i + i * rhs.r);
        return *this;
    }

    __device__ complex& operator+=(const complex& rhs)
    {
        r = (r + rhs.r);
        i = (i + rhs.i);
        return *this;
    }
};

__device__ int mandlebrotDiverge(complex *z)
{
    complex c(*z);
    int i = 0;
    for(i = 0; i < MAX_ITERATIONS; i++)
    {
        *z *= *z;
        *z += c;
        if(z->magnitudeSquared() >= 2)
        {
            return 1;
        }
    }
    return 0;

}

__global__ void kernel(int *ptr, int width, int height)
{
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * blockDim.x * gridDim.x;

    float scale = 1.5f;
    complex z(scale*(float)(width/2 - x)/(width/2), scale*(float)(height/2 - y)/(height/2));

    if(offset < (1920*1080))
    {
        int mValue = mandlebrotDiverge(&z);
        ptr[offset*3 + (uint8_t)0] = (uint8_t)(mValue*255);
        ptr[offset*3 + (uint8_t)1] = (uint8_t)(mValue*255);
        ptr[offset*3 + (uint8_t)2] = (uint8_t)(mValue*255);
    }
}




extern "C" __declspec(dllexport) void __cdecl generateBitmap(void *bitmap)
{
    int width = 1920;
    int height = 1080;
    int *dev_bmp;

    dim3 blocks(width/16, height/16);
    dim3 threads(16, 16);

    gpuErrchk(cudaMalloc((void**)&dev_bmp, (3*width*height)));

    kernel<<<blocks, threads>>>(dev_bmp, width, height);
    gpuErrchk(cudaPeekAtLastError());
    gpuErrchk(cudaDeviceSynchronize());

    gpuErrchk(cudaMemcpy(bitmap, dev_bmp, (width*height*3), cudaMemcpyDeviceToHost));
    cudaFree(dev_bmp);
}
所以我从*int改为*unsigned char,因为我试图分配单个字节的数组,而不是int。清除了很多错误,但现在我明白了:

========= CUDA-MEMCHECK
========= Program hit error 6 on CUDA API call to cudaDeviceSynchronize 
=========     Saved host backtrace up to driver entry point at error
=========     Host Frame:C:\Windows\system32\nvcuda.dll (cuD3D11CtxCreate + 0x102459) [0x11e4b9]
=========     Host Frame:C:\...\cudart32_55.dll (cudaDeviceSynchronize + 0xdd) [0x1149d]
=========     Host Frame:C:\...\FractalMaxUnmanaged.dll (generateBitmap + 0xf0) [0x97c0]
=========
========= ERROR SUMMARY: 1 error
好的,我正在取得进展,但现在当我逐步完成c#应用程序时,字节缓冲区中每个字节的值都是255,这没有意义。以下是c#代码:


这里的问题是内存分配和拷贝错误,您忘记了/应该是以字节为单位的大小。由于
int
使用4个字节,因此实际分配的内存比内核所需的内存少。改为使用此字符(或使用只需要1字节的
无符号字符):


还要确保正确分配了
位图
。正如@Eugene所说,使用
cuda memcheck
是找到此类错误来源的好方法。

Check,关于未指定启动失败的部分。你能指定你正在使用的cuda版本吗?使用cuda 5.5-当我更改一些注册表值以阻止其超时时,消除了错误6,但是仍然无法正确生成位图。我使用了cuda memcheck,并更改了一些注册表值来停止我得到的超时错误,但是现在我没有看到任何错误,当我逐步通过c#应用程序时,我也没有得到任何错误。但是,显示的位图仅为白色,字节缓冲区中的位图数据仅为每个字节255。
========= CUDA-MEMCHECK
========= Program hit error 6 on CUDA API call to cudaDeviceSynchronize 
=========     Saved host backtrace up to driver entry point at error
=========     Host Frame:C:\Windows\system32\nvcuda.dll (cuD3D11CtxCreate + 0x102459) [0x11e4b9]
=========     Host Frame:C:\...\cudart32_55.dll (cudaDeviceSynchronize + 0xdd) [0x1149d]
=========     Host Frame:C:\...\FractalMaxUnmanaged.dll (generateBitmap + 0xf0) [0x97c0]
=========
========= ERROR SUMMARY: 1 error
public unsafe class NativeMethods
{

    [DllImport(@"C:\Users\Bill\Documents\Visual Studio 2012\Projects\FractalMaxUnmanaged\Debug\FractalMaxUnmanaged.dll", CallingConvention=CallingConvention.Cdecl)]
    public static extern void generateBitmap(void *bitmap);

    public static Bitmap create()
    {
        byte[] buf = new byte[1920 * 1080 * 3];
        fixed (void* pBuffer = buf)
        {
            generateBitmap(pBuffer);

        }
        IntPtr unmanagedPtr = Marshal.AllocHGlobal(buf.Length);
        Marshal.Copy(buf, 0, unmanagedPtr, buf.Length);
        Bitmap img = new Bitmap(1920, 1080, 1920*3, PixelFormat.Format24bppRgb, unmanagedPtr);

        Marshal.FreeHGlobal(unmanagedPtr);

        return img;
    }
}
cudaMalloc((void**)&dev_bmp, (3*width*height)*sizeof(int));
cudaMemcpy(bitmap, dev_bmp, (3*width*height)*sizeof(int), cudaMemcpyDeviceToHost);