C线程分段错误

C线程分段错误,c,pthreads,segmentation-fault,C,Pthreads,Segmentation Fault,所以我试图用c&pthreads制作一个GPGPU仿真器,但是遇到了一个相当奇怪的问题,我不知道为什么会出现这个问题。代码如下: #include <stdlib.h> #include <stdio.h> #include <pthread.h> #include <assert.h> // simplifies malloc #define MALLOC(a) (a *)malloc(sizeof(a)) // Index of x/y c

所以我试图用c&pthreads制作一个GPGPU仿真器,但是遇到了一个相当奇怪的问题,我不知道为什么会出现这个问题。代码如下:

#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
#include <assert.h>

// simplifies malloc
#define MALLOC(a) (a *)malloc(sizeof(a))

// Index of x/y coordinate
#define x (0)
#define y (1)

// Defines size of a block
#define BLOCK_DIM_X (3)
#define BLOCK_DIM_Y (2)

// Defines size of the grid, i.e., how many blocks
#define GRID_DIM_X (5)
#define GRID_DIM_Y (7)

// Defines the number of threads in the grid
#define GRID_SIZE (BLOCK_DIM_X * BLOCK_DIM_Y * GRID_DIM_X * GRID_DIM_Y)

// execution environment for the kernel
typedef struct exec_env {
   int threadIdx[2];  // thread location 
   int blockIdx[2];
   int blockDim[2];
   int gridDim[2];

   float *A,*B;       // parameters for the thread
   float *C;
} exec_env;

// kernel 
void *kernel(void *arg)
{
    exec_env *env = (exec_env *) arg;

    // compute number of threads in a block
    int sz = env->blockDim[x] * env->blockDim[y]; 

    // compute the index of the first thread in the block
    int k = sz * (env->blockIdx[y]*env->gridDim[x] + env->blockIdx[x]);

    // compute the index of a thread inside a block
    k = k + env->threadIdx[y]*env->blockDim[x] + env->threadIdx[x]; 

    // check whether it is in range
    assert(k >= 0 && k < GRID_SIZE && "Wrong index computation"); 

    // print coordinates in block and grid and computed index
    /*printf("tx:%d ty:%d bx:%d by:%d idx:%d\n",env->threadIdx[x],
                                              env->threadIdx[y],
                                              env->blockIdx[x],
                                              env->blockIdx[y], k);
    */

    // retrieve two operands 
    float *A = &env->A[k];
    float *B = &env->B[k]; 
    printf("%f %f \n",*A, *B);
    // retrieve pointer to result 
    float *C = &env->C[k]; 

    // do actual computation here !!!

    // For assignment replace the following line with 
    // the code to do matrix addition and multiplication.
    *C = *A + *B; 

    // free execution environment (not needed anymore)
    free(env);

    return NULL;
}


// main function
int main(int argc, char **argv)
{
   float A[GRID_SIZE] = {-1}; 
   float B[GRID_SIZE] = {-1};
   float C[GRID_SIZE] = {-1}; 

   pthread_t threads[GRID_SIZE]; 

   int i=0, bx, by, tx, ty; 
   //Error location
   /*for (i = 0; i < GRID_SIZE;i++){
        A[i] = i;
        B[i] = i+1;
        printf("%f %f\n ", A[i], B[i]);
   }*/

   // Step 1: create execution environment for threads and create thread
   for (bx=0;bx<GRID_DIM_X;bx++) {
      for (by=0;by<GRID_DIM_Y;by++) {
         for (tx=0;tx<BLOCK_DIM_X;tx++) {
            for (ty=0;ty<BLOCK_DIM_Y;ty++) { 

               exec_env *e = MALLOC(exec_env);
               assert(e != NULL && "memory exhausted"); 

               e->threadIdx[x]=tx;
               e->threadIdx[y]=ty;

               e->blockIdx[x]=bx;
               e->blockIdx[y]=by; 

               e->blockDim[x]=BLOCK_DIM_X;
               e->blockDim[y]=BLOCK_DIM_Y;

               e->gridDim[x]=GRID_DIM_X;
               e->gridDim[y]=GRID_DIM_Y;

               // set parameters
               e->A = A; 
               e->B = B; 
               e->C = C;

               // create thread
               pthread_create(&threads[i++],NULL,kernel,(void *)e); 
            }
         }
      }
   }

   // Step 2: wait for completion of all threads
   for (i=0;i<GRID_SIZE;i++) { 
      pthread_join(threads[i], NULL); 
   }

   // Step 3: print result       
   for (i=0;i<GRID_SIZE;i++) { 
      printf("%f ",C[i]); 
   }
   printf("\n");

   return 0;
}

好的,这里的代码运行得很好,但只要我取消对分配A[I]=I和B[I]=I+1的循环的错误位置的注释,我就会被unix中的分段错误以及cygwin中C中的这些随机0抓拍到。我必须承认我的C语言基础很差,所以我很可能错过了一些东西。如果有人能告诉我们出了什么问题,我们将不胜感激。谢谢。

如果您对此进行评论,它会起作用,因为当4个嵌套循环开始时,我仍然是0

你有这个:

for (i = 0; i < GRID_SIZE;i++){
    A[i] = i;
    B[i] = i+1;
    printf("%f %f\n ", A[i], B[i]);
}

/* What value is `i` now ? */
因此pthread_create将尝试访问一些有趣的索引

pthread_create(&threads[i++],NULL,kernel,(void *)e);
                        ^