无法在cuda内核函数中使用printf_Cuda_Printf

无法在cuda内核函数中使用printf

cuda

无法在cuda内核函数中使用printf,cuda,printf,Cuda,Printf,似乎printf在cuda代码的内核中不起作用 #include "Common.h" #include<cuda.h> #include <stdio.h> __device__ __global__ void Kernel(float *a_d , float *b_d ,int size) { int idx = threadIdx.x ; int idy = threadIdx.y ; //Allocating memory in the

似乎

printf

在cuda代码的内核中不起作用

#include "Common.h"
#include<cuda.h>
#include <stdio.h>

__device__ __global__ void Kernel(float *a_d , float *b_d ,int size)
{
    int idx = threadIdx.x ;
    int idy = threadIdx.y ;
    //Allocating memory in the share memory of the device
    __shared__ float temp[16][16];

    //Copying the data to the shared memory
    temp[idy][idx] = a_d[(idy * (size+1)) + idx] ;


    printf("idx=%d, idy=%d, size=%d\n", idx, idy, size);


    for(int i =1 ; i<size ;i++) {
            if((idy + i) < size) { // NO Thread divergence here
                    float var1 =(-1)*( temp[i-1][i-1]/temp[i+idy][i-1]);
                    temp[i+idy][idx] = temp[i-1][idx] +((var1) * (temp[i+idy ][idx]));
            }
            __syncthreads(); //Synchronizing all threads before Next iterat ion
    }
    b_d[idy*(size+1) + idx] = temp[idy][idx];
}

cuda版本是4，您需要使用cuPrintf，如中所示。请注意，printf是一种非常有限的调试方式，Nsight或Nsight eclipse edition IDEs更好。

引用CUDA编程指南“”。有关更多信息，请参阅编程指南

计算能力<2.x的设备可以使用cuPrintf

如果您使用的是2.x及以上版本的设备，并且尝试使用printf，请确保指定了arch=sm_20（或更高）。默认为sm_10，它没有足够的功能来支持printf

NVIDIA为CUDA提供三个源代码级调试器。在检查变量时，您可能会发现这些函数比printf更有用。 -Nsight Visual Studio Edition CUDA调试器 -Nsight Eclipse Edition CUDA调试器

-cuda gdb

我是否正确理解您试图从GPU打印一些文本？我并不奇怪它不被支持。是的。我想检查一些变量，恐怕不行。您可能会成功地使用模拟器，但我不希望在真正的设备上实现这一点。CC 2.0及更高版本支持内核的printf。如果您有cc 2.0或更高版本的GPU，请使用-arch=sm_20编译，错误应该作为Robert所说的补充：设备端printf需要使用ABI，因此包含printf（）的代码也不能使用-ABI=no编译，您必须包含stdio头文件。既然您在这里，就不能在xorg使用的同一设备上调试，这真是太愚蠢了。只是说-无意冒犯（编辑：ok不愚蠢但烦人）抱歉-许多cuPrintf（）示例中的任何一个都有效，例如，尽管现在compute capability 1.x的设备越来越难找到。

 error: calling a host function("printf") from a __device__/__global__ function("Kernel") is not allowed