Linux CUDA编程:编译错误

Linux CUDA编程:编译错误,linux,cuda,parallel-processing,nvidia,Linux,Cuda,Parallel Processing,Nvidia,我正在制作一个CUDA程序,它实现了对N个数字进行的数据并行前缀和计算。我的代码还应该使用随机数生成器在主机上生成数字。然而,我似乎总是遇到一个无法识别的标记,并期望在试图编译时在int main的结束括号上出现声明错误。我正在Linux上运行代码 #include <stdio.h> #include <cuda.h> #include <stdlib.h> #include <math.h> __global__ void gpu_cal(

我正在制作一个CUDA程序,它实现了对N个数字进行的数据并行前缀和计算。我的代码还应该使用随机数生成器在主机上生成数字。然而,我似乎总是遇到一个无法识别的标记,并期望在试图编译时在int main的结束括号上出现声明错误。我正在Linux上运行代码

#include <stdio.h>
#include <cuda.h>
#include <stdlib.h>
#include <math.h>


__global__ void gpu_cal(int *a,int i, int n) {
        int tid = blockIdx.x * blockDim.x + threadIdx.x;
        if(tid>=i && tid < n) {
                    a[tid] = a[tid]+a[tid-i];
        }
}


int main(void)
{         
        int key;
        int *dev_a;
        int N=10;//size of 1D array
        int B=1;//blocks in the grid
        int T=10;//threads in a block

        do{

                    printf ("Some limitations:\n");
                    printf ("            Maximum number of threads per block = 1024\n");
                    printf ("            Maximum sizes of x-dimension of thread block = 1024\n");
                    printf ("            Maximum size of each dimension of grid of thread blocks = 65535\n");
                    printf ("            N<=B*T\n");

                    do{
                                printf("Enter size of array in one dimension, currently %d\n",N);
                                scanf("%d",&N);
                                printf("Enter size of blocks in the grid, currently %d\n",B);
                                scanf("%d",&B);
                                printf("Enter size of threads in a block, currently %d\n",T);
                                scanf("%d",&T);

                                if(N>B*T)
                                            printf("N>B*T, this will result in an incorrect result generated by GPU, please try again\n");
                                if(T>1024)
                                            printf("T>1024, this will result in an incorrect result generated by GPU, please try again\n");
                      }while((N>B*T)||(T>1024));

                    cudaEvent_t start, stop;                       // using cuda events to measure time
                    float elapsed_time_ms1, elapsed_time_ms3;

                    int a[N],gpu_result[N];//for result generated by GPU
                    int cpu_result[N];//CPU result


                    cudaMalloc((void**)&dev_a,N * sizeof(int));//allocate memory on GPU
                    int i,j;

                    srand(1); //initialize random number generator
                    for (i=0; i < N; i++) // load array with some numbers
                    a[i] = (int)rand() ;

                    cudaMemcpy(dev_a, a , N*sizeof(int),cudaMemcpyHostToDevice);//load data from host to device

                    cudaEventCreate(&start);                    // instrument code to measure start time
                    cudaEventCreate(&stop);

                    cudaEventRecord(start, 0);

                    //GPU computation
                    for(j=0;j<log(N)/log(2);j++){

                    gpu_cal<<<B,T>>>(dev_a,pow(2,j),N);

                    cudaThreadSynchronize();

                    }

                    cudaMemcpy(gpu_result,dev_a,N*sizeof(int),cudaMemcpyDeviceToHost);

                    cudaEventRecord(stop, 0);     // instrument code to measue end time
                    cudaEventSynchronize(stop);
                    cudaEventElapsedTime(&elapsed_time_ms1, start, stop );

                    printf("\n\n\nTime to calculate results on GPU: %f ms.\n", elapsed_time_ms1);  // print out execution time

                    //CPU computation
                    cudaEventRecord(start, 0);     

                    for(i=0;i<N;i++)
                    {
                                cpu_result[i]=0;
                                for(j=0;j<=i;j++)
                                {
                                            cpu_result[i]=cpu_result[i]+a[j];
                                }
                    }

                    cudaEventRecord(stop, 0);     // instrument code to measue end time
                    cudaEventSynchronize(stop);
                    cudaEventElapsedTime(&elapsed_time_ms3, start, stop );

                    printf("Time to calculate results on CPU: %f ms.\n\n", elapsed_time_ms3);  // print out execution time


        //Error check
                    for(i=0;i < N;i++) {
                                if (gpu_result[i] != cpu_result[i] ) {
                                            printf("ERROR!!! CPU and GPU create different answers\n");
                                            break;
                                }
                    }
        //Calculate speedup

                    printf("Speedup on GPU compared to CPU= %f\n", (float) elapsed_time_ms3 / (float) elapsed_time_ms1);

                    printf("\nN=%d",N);
                    printf("\nB=%d",B);
                    printf("\nT=%d",T);

                    printf("\n\n\nEnter '1' to repeat, or other integer to terminate\n");
                    scanf("%d",&key);

        }while(key == 1);

        cudaFree(dev_a);//deallocation

        return 0;
}​

代码中最后一个}是Unicode字符。如果删除整行并重新键入},错误将消失。

代码中有两个编译错误

首先,最后一个结束括号是unicode字符,因此您应该将代码重新保存为unicode,或者删除并重写最后一个结束括号

第二,在此行使用的int类型变量N-int a[N],gpu_result[N]//对于GPU生成的结果

声明为int类型,但在C或C++编译器中不允许,因此您应该将N声明作为const int n/<