Cuda 传递给设备函数的共享内存地址仍然是共享内存？_Cuda_Cuda Gdb

Cuda 传递给设备函数的共享内存地址仍然是共享内存？

cuda

Cuda 传递给设备函数的共享内存地址仍然是共享内存？,cuda,cuda-gdb,Cuda,Cuda Gdb,假设我有这个\uuuu设备\uuuuu功能： __device__ unsigned char* dev_kernel(unsigned char* array_sh, int params){ return array_sh + params; } 在\uuuu global\uuuu内核中，我以如下方式使用它： uarray = dev_kernel (uarray, params); 其中，uarray是位于共享内存中的数组但是当我使用cuda gdb查看\uuuu glob

假设我有这个

\uuuu设备\uuuuu

功能：

__device__ unsigned char* dev_kernel(unsigned char* array_sh, int params){
    return array_sh + params;
}

在

\uuuu global\uuuu

内核中，我以如下方式使用它：

uarray = dev_kernel (uarray, params);

其中，

uarray

是位于共享内存中的数组

但是当我使用cuda gdb查看

\uuuu global\uuuu

内核中

uarray

的地址时，我得到：

(@generic unsigned char * @shared) 0x1000010 "z\377*"

(unsigned char * @generic) 0x1000010 <Error reading address 0x1000010: Operation not permitted>

在

\uuuu设备\uuuu

内核中，我得到：

(@generic unsigned char * @shared) 0x1000010 "z\377*"

(unsigned char * @generic) 0x1000010 <Error reading address 0x1000010: Operation not permitted>

（无符号字符*@generic）0x1000010

尽管有错误，程序运行正常（可能是cuda gdb的一些限制）

所以，我想知道：在

\uuu设备\uuu

内核中，

uarray

是共享的吗？我正在将数组从全局内存更改为共享内存，时间几乎相同（使用共享内存时，时间稍差）

所以，我想知道：在

\uuu设备\uuu

内核中，

uarray

是共享的吗

是的，当您以这种方式将指向共享内存的指针传递给设备函数时，它仍然指向共享内存中的相同位置

为了回答下面让我困惑的问题，我选择展示一个简单的例子：

$ cat t249.cu
#include <stdio.h>

#define SSIZE 256

__device__ unsigned char* dev_kernel(unsigned char* array_sh, int params){
    return array_sh + params;
}

__global__ void mykernel(){
  __shared__ unsigned char myshared[SSIZE];
  __shared__ unsigned char *u_array;
  for (int i = 0; i< SSIZE; i++)
    myshared[i] = (unsigned char) i;
  unsigned char *loc = dev_kernel(myshared, 5);
  u_array = loc;
  printf("val = %d\n", *loc);
  printf("val = %d\n", *u_array);
}

int main(){

  mykernel<<<1,1>>>();
  cudaDeviceSynchronize();
  return 0;
}
$ nvcc -arch=sm_20 -g -G -o t249 t249.cu
$ cuda-gdb ./t249
NVIDIA (R) CUDA Debugger
5.5 release
....
Reading symbols from /home/user2/misc/t249...done.
(cuda-gdb) break mykernel
Breakpoint 1 at 0x4025dc: file t249.cu, line 9.
(cuda-gdb) run
Starting program: /home/user2/misc/t249
[Thread debugging using libthread_db enabled]

Breakpoint 1, mykernel () at t249.cu:9
9       __global__ void mykernel(){
(cuda-gdb) break 14
Breakpoint 2 at 0x4025e1: file t249.cu, line 14.
(cuda-gdb) continue
Continuing.
[New Thread 0x7ffff725a700 (LWP 26184)]
[Context Create of context 0x67e360 on Device 0]
[Launch of CUDA Kernel 0 (mykernel<<<(1,1,1),(1,1,1)>>>) on Device 0]
[Switching focus to CUDA kernel 0, grid 1, block (0,0,0), thread (0,0,0), device 0, sm 2, warp 0, lane 0]

Breakpoint 1, mykernel<<<(1,1,1),(1,1,1)>>> () at t249.cu:12
12        for (int i = 0; i< SSIZE; i++)
(cuda-gdb) continue
Continuing.

Breakpoint 2, mykernel<<<(1,1,1),(1,1,1)>>> () at t249.cu:14
14        unsigned char *loc = dev_kernel(myshared, 5);
(cuda-gdb) print &(myshared[0])
$1 = (@shared unsigned char *) 0x8 ""
       ^
       |
     cuda-gdb is telling you that this pointer is defined in a __shared__ statement, and therefore it's storage is implicit and it is unmodifiable.

(cuda-gdb) print &(u_array)
$2 = (@generic unsigned char * @shared *) 0x0
       ^                          ^
       |                          u_array is stored in shared memory.
      u_array is a generic pointer, meaning it can point to anything.  

(cuda-gdb) step
dev_kernel(unsigned char * @generic, int) (array_sh=0x1000008 "", params=5)
    at t249.cu:6
6           return array_sh + params;
(cuda-gdb) print array_sh
$3 = (@generic unsigned char * @register) 0x1000008 ""
          ^                      ^
          |                    array_sh is stored in a register.
         array_sh is a generic pointer, it can point to anything.

(cuda-gdb) print u_array
No symbol "u_array" in current context.
 (note that I can't access u_array from inside the __device__ function, so I don't understand your comment there.)

(cuda-gdb) step
mykernel<<<(1,1,1),(1,1,1)>>> () at t249.cu:15
15        u_array = loc;
(cuda-gdb) step
16        printf("val = %d\n", *loc);
(cuda-gdb) print u_array
$4 = (
    @generic unsigned char * @shared) 0x100000d ......
       ^                         ^
       |                       u_array is stored in shared memory
     u_array is a generic pointer, it can point to anything
(cuda-gdb)

$cat t249.cu
#包括
#定义SSIZE 256
__设备\无符号字符*开发\内核（无符号字符*数组\ sh，int参数）{
返回数组_sh+参数；
}
__全局_uu; void mykernel（）{
__共享的_uu无符号字符myshared[SSIZE]；
__共享的_uu无符号字符*u_u数组；
对于（int i=0；i


虽然您还没有提供，但根据您获得的cuda gdb输出，我假设您对u_数组的定义与我的类似
请注意，像@shared
这样的指示符并不是告诉您指针指向的内存类型，而是告诉您它是什么类型的指针（在\uuuuuuu shared\uuuuu
语句中隐式定义）或者它存储的位置（在共享内存中）
如果这不能解决您的问题，请提供一个完整的示例，以及完整的cuda gdb会话输出，正如我所做的那样
所以，我想知道：在\uuu设备\uuu
内核中，uarray
是共享的吗
是的，当您以这种方式将指向共享内存的指针传递给设备函数时，它仍然指向共享内存中的相同位置
为了回答下面让我困惑的问题，我选择展示一个简单的例子：
$ cat t249.cu
#include <stdio.h>

#define SSIZE 256

__device__ unsigned char* dev_kernel(unsigned char* array_sh, int params){
    return array_sh + params;
}

__global__ void mykernel(){
  __shared__ unsigned char myshared[SSIZE];
  __shared__ unsigned char *u_array;
  for (int i = 0; i< SSIZE; i++)
    myshared[i] = (unsigned char) i;
  unsigned char *loc = dev_kernel(myshared, 5);
  u_array = loc;
  printf("val = %d\n", *loc);
  printf("val = %d\n", *u_array);
}

int main(){

  mykernel<<<1,1>>>();
  cudaDeviceSynchronize();
  return 0;
}
$ nvcc -arch=sm_20 -g -G -o t249 t249.cu
$ cuda-gdb ./t249
NVIDIA (R) CUDA Debugger
5.5 release
....
Reading symbols from /home/user2/misc/t249...done.
(cuda-gdb) break mykernel
Breakpoint 1 at 0x4025dc: file t249.cu, line 9.
(cuda-gdb) run
Starting program: /home/user2/misc/t249
[Thread debugging using libthread_db enabled]

Breakpoint 1, mykernel () at t249.cu:9
9       __global__ void mykernel(){
(cuda-gdb) break 14
Breakpoint 2 at 0x4025e1: file t249.cu, line 14.
(cuda-gdb) continue
Continuing.
[New Thread 0x7ffff725a700 (LWP 26184)]
[Context Create of context 0x67e360 on Device 0]
[Launch of CUDA Kernel 0 (mykernel<<<(1,1,1),(1,1,1)>>>) on Device 0]
[Switching focus to CUDA kernel 0, grid 1, block (0,0,0), thread (0,0,0), device 0, sm 2, warp 0, lane 0]

Breakpoint 1, mykernel<<<(1,1,1),(1,1,1)>>> () at t249.cu:12
12        for (int i = 0; i< SSIZE; i++)
(cuda-gdb) continue
Continuing.

Breakpoint 2, mykernel<<<(1,1,1),(1,1,1)>>> () at t249.cu:14
14        unsigned char *loc = dev_kernel(myshared, 5);
(cuda-gdb) print &(myshared[0])
$1 = (@shared unsigned char *) 0x8 ""
       ^
       |
     cuda-gdb is telling you that this pointer is defined in a __shared__ statement, and therefore it's storage is implicit and it is unmodifiable.

(cuda-gdb) print &(u_array)
$2 = (@generic unsigned char * @shared *) 0x0
       ^                          ^
       |                          u_array is stored in shared memory.
      u_array is a generic pointer, meaning it can point to anything.  

(cuda-gdb) step
dev_kernel(unsigned char * @generic, int) (array_sh=0x1000008 "", params=5)
    at t249.cu:6
6           return array_sh + params;
(cuda-gdb) print array_sh
$3 = (@generic unsigned char * @register) 0x1000008 ""
          ^                      ^
          |                    array_sh is stored in a register.
         array_sh is a generic pointer, it can point to anything.

(cuda-gdb) print u_array
No symbol "u_array" in current context.
 (note that I can't access u_array from inside the __device__ function, so I don't understand your comment there.)

(cuda-gdb) step
mykernel<<<(1,1,1),(1,1,1)>>> () at t249.cu:15
15        u_array = loc;
(cuda-gdb) step
16        printf("val = %d\n", *loc);
(cuda-gdb) print u_array
$4 = (
    @generic unsigned char * @shared) 0x100000d ......
       ^                         ^
       |                       u_array is stored in shared memory
     u_array is a generic pointer, it can point to anything
(cuda-gdb)

$cat t249.cu
#包括
#定义SSIZE 256
__设备\无符号字符*开发\内核（无符号字符*数组\ sh，int参数）{
返回数组_sh+参数；
}
__全局_uu; void mykernel（）{
__共享的_uu无符号字符myshared[SSIZE]；
__共享的_uu无符号字符*u_u数组；
对于（int i=0；i