Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/redis/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Arrays CUDA矩阵问题_Arrays_Matrix_Cuda - Fatal编程技术网

Arrays CUDA矩阵问题

Arrays CUDA矩阵问题,arrays,matrix,cuda,Arrays,Matrix,Cuda,我是CUDA的新手,我在尝试创建一个东西时遇到了很多问题。问题如下:我有一个方阵(现在它是5x5,但它会更大,比如1k x 1k),这个矩阵充满了随机数,然后我把这个矩阵传递给设备,它将在那里做一些工作(现在它只应用一些阈值)。代码如下: #define N 3 #define MINTHRESHOLD 100 #define MAXTHRESHOLD 200 #define THREADS 128 __global__ void applyThresh(int *d_base, int *d

我是CUDA的新手,我在尝试创建一个东西时遇到了很多问题。问题如下:我有一个方阵(现在它是5x5,但它会更大,比如1k x 1k),这个矩阵充满了随机数,然后我把这个矩阵传递给设备,它将在那里做一些工作(现在它只应用一些阈值)。代码如下:

#define N 3
#define MINTHRESHOLD 100
#define MAXTHRESHOLD 200
#define THREADS 128

__global__ void applyThresh(int *d_base, int *d_thresh) {
    int tid = blockDim.x * blockIdx.x + threadIdx.x;
    int stride = blockDim.x * gridDim.x;

    while(tid < N) {
        if(d_base[tid] > MAXTHRESHOLD) {
            d_thresh[tid] = MAXTHRESHOLD;
        } else if(d_base[tid] < MINTHRESHOLD) {
            d_thresh[tid] = MINTHRESHOLD;
        } else {
            d_thresh[tid] = d_base[tid];
        }
        tid += stride;
    }
}

int main( void ) {
    cudaError_t err;
        int *base, *thresh, *d_base, *d_thresh, i;

    base = (int*)malloc((N * N) * sizeof(int));
    thresh = (int*)malloc((N * N) * sizeof(int));

    err = cudaMalloc((void**)&d_base, (N * N) * sizeof(int));
    if(err != cudaSuccess) {printf("ERROR 1"); return -1;}
    err = cudaMalloc((void**)&d_thresh, (N * N) * sizeof(int));
    if(err != cudaSuccess) {printf("ERROR 2"); return -1;}


    for(i = 0; i < N * N; i++) {
        base[i] = rand() % 256;
        thresh[i] = 0;
    }

    err = cudaMemcpy(d_base, base, (N * N) * sizeof(int), cudaMemcpyHostToDevice);
    if(err != cudaSuccess){printf("ERROR 3"); return -1;}

    applyThresh<<<(N + THREADS - 1)/THREADS , THREADS>>>(d_base, d_thresh);

    err = cudaMemcpy(thresh, d_thresh, (N * N) * sizeof(int), cudaMemcpyDeviceToHost);
    if(err != cudaSuccess) {printf("ERROR 4"); return -1;}

    for(i = 0; i < N *N; i++) {
        printf("%d -> ", base[i]);  
        printf("%d\n", thresh[i]);
    }

    free(base);
    free(thresh);
    cudaFree(d_base);
    cudaFree(d_thresh);

    return 0;
}

我真的无法理解这个问题…我想这可能是由我用来访问矩阵的索引引起的,但我真的找不到解决方案:(

在内核中,使用

while(tid < N) {
while(tid

您只处理数组的第一个
N
元素。将其更改为
N*N

简直不敢相信这是一件如此愚蠢的事情……谢谢,它成功了;)
while(tid < N) {