cudaMemcpyToSymbol vs cudaMemcpy

cudaMemcpyToSymbol vs cudaMemcpy,cuda,Cuda,我正在试图弄清楚为什么cudaMemcpyToSymbol不适合我。(但cudaMemcpy确实如此。) 第二个副本正在破坏第一个副本(flt)的内容,而第二个副本不会发生。(如果删除第二个副本,第一个副本可以正常工作。) 结果: GpuDumpFloatMemory<<<1,1>>>(0x500500000, 13, 320) TotThrds=1 ** Source of 1st copy 0x500500500: float[320]= 1.0

我正在试图弄清楚为什么cudaMemcpyToSymbol不适合我。(但cudaMemcpy确实如此。)

第二个副本正在破坏第一个副本(flt)的内容,而第二个副本不会发生。(如果删除第二个副本,第一个副本可以正常工作。)

结果:

GpuDumpFloatMemory<<<1,1>>>(0x500500000, 13, 320)  TotThrds=1   ** Source of 1st copy
  0x500500500: float[320]= 1.000
  0x500500504: float[321]= 0.866
  0x500500508: float[322]= 0.500
  0x50050050c: float[323]= -0.000
  0x500500510: float[324]= -0.500
  0x500500514: float[325]= -0.866
  0x500500518: float[326]= -1.000
  0x50050051c: float[327]= -0.866
  0x500500520: float[328]= -0.500
  0x500500524: float[329]= 0.000
  0x500500528: float[330]= 0.500
  0x50050052c: float[331]= 0.866
  0x500500530: float[332]= 1.000
  GpuDumpFloatMemory<<<1,1>>>(0x500100a98, 13, 320)  TotThrds=1     ** Dest of 1st copy
  0x500100f98: float[320]= 0.000
  0x500100f9c: float[321]= 0.500
  0x500100fa0: float[322]= 0.866
  0x500100fa4: float[323]= 1.000
  0x500100fa8: float[324]= 0.866
  0x500100fac: float[325]= 0.500
  0x500100fb0: float[326]= -0.000
  0x500100fb4: float[327]= -0.500
  0x500100fb8: float[328]= -0.866
  0x500100fbc: float[329]= -1.000
  0x500100fc0: float[330]= -0.866
  0x500100fc4: float[331]= -0.500
  0x500100fc8: float[332]= 0.000
  GpuDumpIntMemory<<<1,1>>>(0x500500780, 13, 0)  TotThrds=1      ** Source of 2nd copy
  0x500500780: int[0]= 1
  0x500500784: int[1]= 1
  0x500500788: int[2]= 1
  0x50050078c: int[3]= 1
  0x500500790: int[4]= 1
  0x500500794: int[5]= 1
  0x500500798: int[6]= 1
  0x50050079c: int[7]= 1
  0x5005007a0: int[8]= 1
  0x5005007a4: int[9]= 1
  0x5005007a8: int[10]= 1
  0x5005007ac: int[11]= 1
  0x5005007b0: int[12]= 0
  GpuDumpIntMemory<<<1,1>>>(0x500100818, 13, 0)  TotThrds=1      ** Dest of 2nd copy
  0x500100818: int[0]= 0
  0x50010081c: int[1]= 0
  0x500100820: int[2]= 0
  0x500100824: int[3]= 0
  0x500100828: int[4]= 0
  0x50010082c: int[5]= 0
  0x500100830: int[6]= 0
  0x500100834: int[7]= 0
  0x500100838: int[8]= 0
  0x50010083c: int[9]= 0
  0x500100840: int[10]= 0
  0x500100844: int[11]= 0
  0x500100848: int[12]= 0
  GpuDumpFloatMemory<<<1,1>>>(0x500500000, 13, 320)  TotThrds=1   ** Source of first copy
  0x500500500: float[320]= 1.000
  0x500500504: float[321]= 0.866
  0x500500508: float[322]= 0.500
  0x50050050c: float[323]= -0.000
  0x500500510: float[324]= -0.500
  0x500500514: float[325]= -0.866
  0x500500518: float[326]= -1.000
  0x50050051c: float[327]= -0.866
  0x500500520: float[328]= -0.500
  0x500500524: float[329]= 0.000
  0x500500528: float[330]= 0.500
  0x50050052c: float[331]= 0.866
  0x500500530: float[332]= 1.000
  GpuDumpFloatMemory<<<1,1>>>(0x500100a98, 13, 320)  TotThrds=1    ** Dest of first copy
  0x500100f98: float[320]= 1.000
  0x500100f9c: float[321]= 0.866
  0x500100fa0: float[322]= 0.500
  0x500100fa4: float[323]= -0.000
  0x500100fa8: float[324]= -0.500
  0x500100fac: float[325]= -0.866
  0x500100fb0: float[326]= -1.000
  0x500100fb4: float[327]= -0.866
  0x500100fb8: float[328]= -0.500
  0x500100fbc: float[329]= 0.000
  0x500100fc0: float[330]= 0.500
  0x500100fc4: float[331]= 0.866
  0x500100fc8: float[332]= 1.000
  GpuDumpIntMemory<<<1,1>>>(0x500500780, 13, 0)  TotThrds=1    ** Source of 2nd copy
  0x500500780: int[0]= 1
  0x500500784: int[1]= 1
  0x500500788: int[2]= 1
  0x50050078c: int[3]= 1
  0x500500790: int[4]= 1
  0x500500794: int[5]= 1
  0x500500798: int[6]= 1
  0x50050079c: int[7]= 1
  0x5005007a0: int[8]= 1
  0x5005007a4: int[9]= 1
  0x5005007a8: int[10]= 1
  0x5005007ac: int[11]= 1
  0x5005007b0: int[12]= 0
  GpuDumpIntMemory<<<1,1>>>(0x500100818, 13, 0)  TotThrds=1    ** Destination of 2nd copy
  0x500100818: int[0]= 1
  0x50010081c: int[1]= 1
  0x500100820: int[2]= 1
  0x500100824: int[3]= 1
  0x500100828: int[4]= 1
  0x50010082c: int[5]= 1
  0x500100830: int[6]= 1
  0x500100834: int[7]= 1
  0x500100838: int[8]= 1
  0x50010083c: int[9]= 1
  0x500100840: int[10]= 1
  0x500100844: int[11]= 1
  0x500100848: int[12]= 0
结果:

GpuDumpFloatMemory<<<1,1>>>(0x500500000, 13, 320)  TotThrds=1   ** Source of 1st copy
  0x500500500: float[320]= 1.000
  0x500500504: float[321]= 0.866
  0x500500508: float[322]= 0.500
  0x50050050c: float[323]= -0.000
  0x500500510: float[324]= -0.500
  0x500500514: float[325]= -0.866
  0x500500518: float[326]= -1.000
  0x50050051c: float[327]= -0.866
  0x500500520: float[328]= -0.500
  0x500500524: float[329]= 0.000
  0x500500528: float[330]= 0.500
  0x50050052c: float[331]= 0.866
  0x500500530: float[332]= 1.000
  GpuDumpFloatMemory<<<1,1>>>(0x500100a98, 13, 320)  TotThrds=1     ** Dest of 1st copy
  0x500100f98: float[320]= 0.000
  0x500100f9c: float[321]= 0.500
  0x500100fa0: float[322]= 0.866
  0x500100fa4: float[323]= 1.000
  0x500100fa8: float[324]= 0.866
  0x500100fac: float[325]= 0.500
  0x500100fb0: float[326]= -0.000
  0x500100fb4: float[327]= -0.500
  0x500100fb8: float[328]= -0.866
  0x500100fbc: float[329]= -1.000
  0x500100fc0: float[330]= -0.866
  0x500100fc4: float[331]= -0.500
  0x500100fc8: float[332]= 0.000
  GpuDumpIntMemory<<<1,1>>>(0x500500780, 13, 0)  TotThrds=1      ** Source of 2nd copy
  0x500500780: int[0]= 1
  0x500500784: int[1]= 1
  0x500500788: int[2]= 1
  0x50050078c: int[3]= 1
  0x500500790: int[4]= 1
  0x500500794: int[5]= 1
  0x500500798: int[6]= 1
  0x50050079c: int[7]= 1
  0x5005007a0: int[8]= 1
  0x5005007a4: int[9]= 1
  0x5005007a8: int[10]= 1
  0x5005007ac: int[11]= 1
  0x5005007b0: int[12]= 0
  GpuDumpIntMemory<<<1,1>>>(0x500100818, 13, 0)  TotThrds=1      ** Dest of 2nd copy
  0x500100818: int[0]= 0
  0x50010081c: int[1]= 0
  0x500100820: int[2]= 0
  0x500100824: int[3]= 0
  0x500100828: int[4]= 0
  0x50010082c: int[5]= 0
  0x500100830: int[6]= 0
  0x500100834: int[7]= 0
  0x500100838: int[8]= 0
  0x50010083c: int[9]= 0
  0x500100840: int[10]= 0
  0x500100844: int[11]= 0
  0x500100848: int[12]= 0
  GpuDumpFloatMemory<<<1,1>>>(0x500500000, 13, 320)  TotThrds=1   ** Source of first copy
  0x500500500: float[320]= 1.000
  0x500500504: float[321]= 0.866
  0x500500508: float[322]= 0.500
  0x50050050c: float[323]= -0.000
  0x500500510: float[324]= -0.500
  0x500500514: float[325]= -0.866
  0x500500518: float[326]= -1.000
  0x50050051c: float[327]= -0.866
  0x500500520: float[328]= -0.500
  0x500500524: float[329]= 0.000
  0x500500528: float[330]= 0.500
  0x50050052c: float[331]= 0.866
  0x500500530: float[332]= 1.000
  GpuDumpFloatMemory<<<1,1>>>(0x500100a98, 13, 320)  TotThrds=1    ** Dest of first copy
  0x500100f98: float[320]= 1.000
  0x500100f9c: float[321]= 0.866
  0x500100fa0: float[322]= 0.500
  0x500100fa4: float[323]= -0.000
  0x500100fa8: float[324]= -0.500
  0x500100fac: float[325]= -0.866
  0x500100fb0: float[326]= -1.000
  0x500100fb4: float[327]= -0.866
  0x500100fb8: float[328]= -0.500
  0x500100fbc: float[329]= 0.000
  0x500100fc0: float[330]= 0.500
  0x500100fc4: float[331]= 0.866
  0x500100fc8: float[332]= 1.000
  GpuDumpIntMemory<<<1,1>>>(0x500500780, 13, 0)  TotThrds=1    ** Source of 2nd copy
  0x500500780: int[0]= 1
  0x500500784: int[1]= 1
  0x500500788: int[2]= 1
  0x50050078c: int[3]= 1
  0x500500790: int[4]= 1
  0x500500794: int[5]= 1
  0x500500798: int[6]= 1
  0x50050079c: int[7]= 1
  0x5005007a0: int[8]= 1
  0x5005007a4: int[9]= 1
  0x5005007a8: int[10]= 1
  0x5005007ac: int[11]= 1
  0x5005007b0: int[12]= 0
  GpuDumpIntMemory<<<1,1>>>(0x500100818, 13, 0)  TotThrds=1    ** Destination of 2nd copy
  0x500100818: int[0]= 1
  0x50010081c: int[1]= 1
  0x500100820: int[2]= 1
  0x500100824: int[3]= 1
  0x500100828: int[4]= 1
  0x50010082c: int[5]= 1
  0x500100830: int[6]= 1
  0x500100834: int[7]= 1
  0x500100838: int[8]= 1
  0x50010083c: int[9]= 1
  0x500100840: int[10]= 1
  0x500100844: int[11]= 1
  0x500100848: int[12]= 0
GpuDumpFloatMemory(0x500500000,13320)TotThrds=1**第一份拷贝的来源
0x500500:浮点[320]=1.000
0x500504:浮点[321]=0.866
0x500508:浮点[322]=0.500
0x50050C:浮点[323]=-0.000
0x500500510:浮点[324]=-0.500
0x500514:浮点[325]=-0.866
0x500518:浮点[326]=-1.000
0x50051C:浮点[327]=-0.866
0x500520:浮点[328]=-0.500
0x500524:浮点[329]=0.000
0x500528:浮点[330]=0.500
0x50052C:浮点[331]=0.866
0x500530:浮点[332]=1.000
GpuDumpFloatMemory(0x500100a98,13320)总计=1**第一份副本的目的地
0x500100f98:浮点[320]=1.000
0x500100f9c:浮点[321]=0.866
0x500100fa0:浮点[322]=0.500
0x500100fa4:浮点[323]=-0.000
0x500100fa8:浮点[324]=-0.500
0x500100fac:浮点[325]=-0.866
0x500100fb0:浮点[326]=-1.000
0x500100fb4:浮点[327]=-0.866
0x500100fb8:浮点[328]=-0.500
0x500100fbc:浮点[329]=0.000
0x500100fc0:浮点[330]=0.500
0x500100fc4:浮点[331]=0.866
0x500100fc8:浮点[332]=1.000
GpuDumpIntMemory(0x500500780,13,0)TotThrds=1**第二个副本的源
0x500780:int[0]=1
0x500784:int[1]=1
0x500788:int[2]=1
0x50078C:int[3]=1
0x500790:int[4]=1
0x500794:int[5]=1
0x500798:int[6]=1
0x50079C:int[7]=1
0x5005007a0:int[8]=1
0x5005007a4:int[9]=1
0x5005007a8:int[10]=1
0x5005007ac:int[11]=1
0x5007B0:int[12]=0
GpuDumpIntMemory(0x500100818,13,0)TotThrds=1**第二次拷贝的目标
0x500100818:int[0]=1
0x50010081c:int[1]=1
0x500100820:int[2]=1
0x500100824:int[3]=1
0x500100828:int[4]=1
0x50010082c:int[5]=1
0x500100830:int[6]=1
0x500100834:int[7]=1
0x500100838:int[8]=1
0x50010083c:int[9]=1
0x500100840:int[10]=1
0x500100844:int[11]=1
0x500100848:int[12]=0

当我看到坏的情况时,似乎符号表发生了什么事情。与中一样,第一个复制目标的数据非常熟悉。不是被覆盖了,只是被移动了。好像指针是错的

我觉得第二本书好像坏了。您已定义此数组:

__constant__ int   ints[160];  // 640 bytes
正如正确指出的,它的长度为640字节

您的第二份副本如下所示:

cudaMemcpyToSymbol(ints,pFlts,640,1920,cudaMemcpyDeviceToDevice);  // second copy
这表示,“将总共640个字节从
pFlts
数组复制到
ints
数组,其中
ints
数组中的存储位置从数组开始的1920个字节开始。”

这行不通。
ints
数组只有640字节长。您不能选择插入1920字节的位置作为目标

从以下文件中:

偏移量-从符号开始的偏移量(字节)

在这种情况下,符号是
ints

也许你想要的是:

cudaMemcpyToSymbol(ints,pFlts+480,640,0,cudaMemcpyDeviceToDevice);  // second copy
编辑: 为了回答有关错误检查的评论中的问题,我编制了以下简单的测试程序:

#include <stdio.h>

#define cudaCheckErrors(msg) \
    do { \
        cudaError_t __err = cudaGetLastError(); \
        if (__err != cudaSuccess) { \
            fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
                msg, cudaGetErrorString(__err), \
                __FILE__, __LINE__); \
            fprintf(stderr, "*** FAILED - ABORTING\n"); \
            exit(1); \
        } \
    } while (0)

__constant__ int ints[160];

int main(){

  int *d_ints;
  cudaError_t mystatus;

  cudaMalloc((void **)&d_ints, sizeof(int)*160);
  cudaCheckErrors("cudamalloc fail");
  mystatus = cudaMemcpyToSymbol(ints, d_ints, 160*sizeof(int), 1920, cudaMemcpyDeviceToDevice);
  if (mystatus != cudaSuccess) printf("returned value was not cudaSuccess\n");
  cudaCheckErrors("cudamemcpytosymbol fail");

  printf("OK!\n");
  return 0;
}

这表示在这种情况下,cudaMemcpyToSymbol函数调用的错误返回值和
cudaGetLastError()
方法都返回错误。如果我在这个测试用例中将1920参数改为零,错误就会消失。

您正在对cuda调用进行错误检查吗?我们给了你一个例子。是的,我没有包括进行检查的宏。没有报告任何错误。(cudaSuccess)偏移量应用于符号,而不是源。那是你的问题。你的错误检查失败了。cuda运行时肯定会针对这种情况抛出错误。我只是使用上面链接的错误检查示例对它进行了测试。如果计数超过目标大小,我会得到“无效参数”。但如果偏移量超过目标大小,则不会出错。是的,我将偏移量用作源的偏移量,而不是目标的偏移量。(巴萨克沃兹)谢谢
returned value was not cudaSuccess
Fatal error: cudamemcpytosymbol fail (invalid argument at t94.cu:26)
*** FAILED - ABORTING