C++ 在cuda设备上从统一内存打印阵列不起作用
我尝试在cuda设备上创建一些散列并在主机上打印它们。但是,在主机上的printf上,我在0x000000000120002f位置收到一个读取错误 相关行如下所示:C++ 在cuda设备上从统一内存打印阵列不起作用,c++,arrays,cuda,unified-memory,C++,Arrays,Cuda,Unified Memory,我尝试在cuda设备上创建一些散列并在主机上打印它们。但是,在主机上的printf上,我在0x000000000120002f位置收到一个读取错误 相关行如下所示: int main() { const int block_size = 2; const int num_blocks = 256; const int N = block_size * num_blocks; unsigned char** hashes; cudaMallocManaged(&hashes, N *
int main() {
const int block_size = 2;
const int num_blocks = 256;
const int N = block_size * num_blocks;
unsigned char** hashes;
cudaMallocManaged(&hashes, N * (32 * sizeof(unsigned char)));
cudaMemset(hashes, 0, N * (32 * sizeof(unsigned char)));
在设备上
__global__ void sha256_kernel(unsigned char **dhashes){
int idx = blockIdx.x * blockDim.x + threadIdx.x;
sha256_final(&ctx, sha);
dhashes[idx] = sha;
// printf("%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x\n", dhashes[idx][0], dhashes[idx][1], dhashes[idx][2], dhashes[idx][3], dhashes[idx][4], dhashes[idx][5], dhashes[idx][6], dhashes[idx][7], dhashes[idx][8], dhashes[idx][9], dhashes[idx][10], dhashes[idx][11], dhashes[idx][12], dhashes[idx][13], dhashes[idx][14], dhashes[idx][15],
// dhashes[idx][16], dhashes[idx][17], dhashes[idx][18], dhashes[idx][19], dhashes[idx][20], dhashes[idx][21], dhashes[idx][22], dhashes[idx][23], dhashes[idx][24], dhashes[idx][25], dhashes[idx][26], dhashes[idx][27], dhashes[idx][28], dhashes[idx][29], dhashes[idx][30], dhashes[idx][31]);
// printing here is correct
}
回到主办方
sha256_kernel << < num_blocks, block_size>> > (hashes);
cudaDeviceSynchronize();
for (int i = 0; i < N; i++) {
printf("%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x\n", hashes[i][0], hashes[i][1], hashes[i][2], hashes[i][3], hashes[i][4], hashes[i][5], hashes[i][6], hashes[i][7], hashes[i][8], hashes[i][9], hashes[i][10], hashes[i][11], hashes[i][12], hashes[i][13], hashes[i][14], hashes[i][15],
hashes[i][16], hashes[i][17], hashes[i][18], hashes[i][19], hashes[i][20], hashes[i][21], hashes[i][22], hashes[i][23], hashes[i][24], hashes[i][25], hashes[i][26], hashes[i][27], hashes[i][28], hashes[i][29], hashes[i][30], hashes[i][31]);
}//printing here doesn't work
sha256_内核>>(散列);
cudaDeviceSynchronize();
对于(int i=0;i
似乎是正确的,但当我尝试在主机上打印哈希值时,出现读取错误?用于保存哈希值的内存分配不正确。要为每个散列拥有指向内存的指针数组,需要为指针数组和散列本身分配内存,如下所示:
unsigned char** hashes;
unsigned char* buff;
cudaMallocManaged(&hashes, N * sizeof(unsigned char*));
cudaMallocManaged(&buff, N * (32 * sizeof(unsigned char)));
cudaMemset(buff, 0, N * (32 * sizeof(unsigned char)));
for(i=0; i<N; i++) hashes[i] = &buff[i*32];
无符号字符**散列;
未签名字符*buff;
cudaMallocManaged(&hash,N*sizeof(unsigned char*));
cudaMallocManaged(&buff,N*(32*sizeof(unsigned char)));
cudaMemset(buff,0,N*(32*sizeof(unsigned char));
对于(i=0;i whatsha
?device void sha256_final(sha256_CTX*CTX,BYTE hash[]),指向指针的指针不是二维数组,不能像二维数组那样进行分配。您的内存分配是对拼写错误的断章取义。在这种情况下,我应该添加标准免责声明中的“醒后不久和早晨浓缩咖啡前在手机上不戴眼镜书写”