提供垃圾值的CUDA程序_Cuda - Fatal编程技术网

提供垃圾值的CUDA程序

cuda

提供垃圾值的CUDA程序,cuda,Cuda,我真的不明白为什么下面代码的输出不是a和b #include<cutil.h> #include<iostream> __global__ void p(unsigned char **a){ unsigned char temp[2]; temp[0] = 'a'; temp[1] = 'b'; a[0] = temp; } void main(){ unsigned char **a ; cudaMalloc((void**)&a,

我真的不明白为什么下面代码的输出不是a和b

#include<cutil.h>
#include<iostream>
__global__ void p(unsigned char **a){


unsigned char temp[2];
temp[0] = 'a';
temp[1] = 'b';
a[0] = temp;


}

void main(){

    unsigned char **a ;
    cudaMalloc((void**)&a, sizeof(unsigned char*));
    p<<<1,1>>>(a);
    unsigned char **c;
    unsigned char b[2];
    cudaMemcpy(c, a, sizeof(unsigned char *), cudaMemcpyDeviceToHost);
    cudaMemcpy(b, c[0], 2*sizeof(unsigned char), cudaMemcpyDeviceToHost);
    for( int i=0 ; i < 2; i++){
        printf("%c\n", b[i]);
    }


    getchar();


}

#包括
#包括
__全局无效p（无符号字符**a）{
无符号字符温度[2]；
温度[0]=“a”；
温度[1]=“b”；
a[0]=温度；
}
void main（）{
无符号字符**a；
cudamaloc（（void**）和sizeof（unsigned char*）；
p（a）；
无符号字符**c；
无符号字符b[2]；
cudaMemcpy（c，a，sizeof（unsigned char*），cudaMemcpyDeviceToHost）；
cudaMemcpy（b，c[0]，2*sizeof（无符号字符），cudaMemcpyDeviceToHost）；
对于（int i=0；i<2；i++）{
printf（“%c\n”，b[i]）；
}
getchar（）；
}

我的逻辑出了什么问题？

我们暂时不谈CUDA。让我们制作一个函数，将数据写入用户提供的数组。用户通过指针传递数组：

void fill_me_up(int * dst)
{
  // We sure hope that `dst` points to a large enough area of memory!

  dst[0] = 28;
  dst[1] = 75;
}

int main()
{
  int my_memory[2]; // here's our memory -- automatic local storage

  fill_me_up(my_memory);     // OK, array decays to pointer-to-beginning
  fill_me_up(&my_memory[0]); // A bit more explicit

  int * your_memory = malloc(sizeof(int) * 2); // more memory, this time dynamic
  fill_me_up_again(your_memory);
  /* ... */
  free(your_memory);
}

现在，您对局部变量所做的操作没有意义，因为您想要使用局部变量的地址，在您离开函数作用域后，该地址将变得无效。你可以做的下一个最好的事情是<代码> MycPy.（），或者一些等价的C++算法：

void fill_me_up_again(int * dst)
{
  int temp[] = { 28, 75 };
  memcpy((void *)dst, (const void *)temp, sizeof(temp));
}

好，现在开始调用该函数：我们首先必须提供目标内存，然后传递一个指针：

void fill_me_up(int * dst)
{
  // We sure hope that `dst` points to a large enough area of memory!

  dst[0] = 28;
  dst[1] = 75;
}

int main()
{
  int my_memory[2]; // here's our memory -- automatic local storage

  fill_me_up(my_memory);     // OK, array decays to pointer-to-beginning
  fill_me_up(&my_memory[0]); // A bit more explicit

  int * your_memory = malloc(sizeof(int) * 2); // more memory, this time dynamic
  fill_me_up_again(your_memory);
  /* ... */
  free(your_memory);
}

<>（在C++中，您可能使用了代码<新INT[2 ] < /C>和<代码>删除YouLySuths<代码>，但是使用C<代码> MalCube（）/CUT>希望CUDA的连接变得清晰。

当你移动到CUDA设备时，你必须给它一个设备指针，而不是主机指针，所以你必须先设置一个，然后将结果复制回来，但这是唯一的改变。

从哪里开始——这在普通程序中甚至没有意义。我们必须谈谈指针、局部变量、数组和字符串，似乎：-）@kerrek：有没有办法在cuda的全局函数中声明数组，使其在函数结束后存在？设备函数（AFAIK）只能在设备上分配内存。是的，原则上，您可以在设备函数中分配动态设备内存，并通过参数返回指向该内存的指针，然后从主机访问该设备内存以复制数据。（这听起来设计很糟糕！）你真的想这么做吗？因为这是一个质量很差的问题，除了你之外，对任何人都没有帮助。它是。这就是为什么我否决了它，并投票关闭了它。