Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/c/66.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ POSIX线程在C中不产生加速_C++_C_Multithreading_Pthreads - Fatal编程技术网

C++ POSIX线程在C中不产生加速

C++ POSIX线程在C中不产生加速,c++,c,multithreading,pthreads,C++,C,Multithreading,Pthreads,我正在学习使用Pthreads进行并行处理。我有一个四核处理器。不幸的是,以下代码的并行化部分的运行速度大约是非并行化代码的5倍。我做错了什么?提前谢谢你的帮助 #include <stdio.h> #include <time.h> #include <pthread.h> #include <stdlib.h> #define NTHREADS 4 #define SIZE NTHREADS*10000000 struct params {

我正在学习使用Pthreads进行并行处理。我有一个四核处理器。不幸的是,以下代码的并行化部分的运行速度大约是非并行化代码的5倍。我做错了什么?提前谢谢你的帮助

#include <stdio.h>
#include <time.h>
#include <pthread.h>
#include <stdlib.h>
#define NTHREADS 4
#define SIZE NTHREADS*10000000

struct params {
  int * arr;
  int sum;
};

/* The worker function for the pthreads */
void * myFun (void * x){
  int i;
  struct params * b = (struct params *) x;
  for (i = 0; i < (int)(SIZE/NTHREADS); ++i){
    b->sum += b->arr[i];
  }
  return NULL;
}

/* unparallelized summing function*/
int arrSum(int * arr, int size){
  int sum = 0;
  for (int i = 0; i != size; ++i){
    sum += arr[i];
  }
  return sum;
}

int main(int argc, char * argv[]){
  clock_t begin, end;
  double runTime;
  int rc, i;
  int sum1, sum2 = 0;
  pthread_t threads[NTHREADS];

  /* create array to sum over */
  int * myArr = NULL;
  myArr = (int *) calloc(SIZE, sizeof(int));
  if (myArr == NULL){
    printf("problem allocating memory\n");
    return 1; 
  }
  for (int i = 0; i < SIZE; ++i){
    myArr[i] = 1;
  }

  /* create array of params structs to feed to threads */
  struct params p;
  p.sum = 0;
  struct params inputs[NTHREADS];
  for(i = 0; i != NTHREADS; ++i){
    p.arr = myArr + i*(int)(SIZE/NTHREADS);
    inputs[i] = p;
  }

  /* spawn the threads */
  begin = clock();
  for(i = 0; i != NTHREADS; ++i){
    rc = pthread_create(&threads[i], NULL, myFun, (void *) &inputs[i]);
  }

  /* wait for threads to finish */
  for(i = 0; i != NTHREADS; ++i){
    rc = pthread_join(threads[i], NULL);
  }
  end = clock();
  runTime = (double)(end - begin)/CLOCKS_PER_SEC;
  printf("Parallelized code run time: %f\n", runTime);

  /* run the unparallelized code */
  begin = clock();
  sum2 = arrSum(myArr, SIZE);
  end = clock();
  runTime = (double)(end - begin)/CLOCKS_PER_SEC;
  printf("Unparallelized code run time: %f\n", runTime);

  /* consolidate and print results from threads */
  for(i = 0; i != NTHREADS; ++i){
    sum1 += inputs[i].sum;
  }
  printf("sum1, sum2: %d, %d \n", sum1, sum2);

  free(myArr);

  /* be disappointed when my parallelized code showed no speedup */
  return 1;
}
#包括
#包括
#包括
#包括
#定义第4行
#定义大小为*10000000
结构参数{
int*arr;
整数和;
};
/*pthreads的辅助函数*/
void*myFun(void*x){
int i;
结构参数*b=(结构参数*)x;
对于(i=0;i<(int)(大小/n读数);+i){
b->sum+=b->arr[i];
}
返回NULL;
}
/*非并行求和函数*/
int arrSum(int*arr,int size){
整数和=0;
对于(int i=0;i!=size;++i){
总和+=arr[i];
}
回报金额;
}
int main(int argc,char*argv[]){
时钟开始,结束;
双运行时;
int rc,i;
int sum1,sum2=0;
pthread_t threads[n线程];
/*创建要求和的数组*/
int*myArr=NULL;
myArr=(int*)calloc(SIZE,sizeof(int));
if(myArr==NULL){
printf(“内存分配问题”);
返回1;
}
对于(int i=0;i
您缺少并行编程的一个重要方面

工作线程需要为每个进程创建一次,而不是为每个任务创建一次

创建和销毁线程需要时间

解决方案是使用线程池并将任务发送到线程池

我的建议是使用OpenMP,这大大简化了这项任务,并可与许多编译器配合使用

例如:

int sum = 0
#pragma omp for shared(sum)
 for(int i=0; i<SIZE; ++i)
 {
   #pragma omp atomic
   sum += myArr[i]
 }
int和=0
#用于共享的pragma omp(总和)

对于(int i=0;i而言,主要问题在于您正在使用。这是OpenMP标记与SO之间最常见的错误(如果频率列表在SO上有用,则应显示此错误)

获取墙时间的最简单方法是使用OpenMP中的函数:
omp\u get\u wtime()
。这在Linux和Windows上与GCC、ICC和MSVC一起工作(我假设Clang现在支持OpenMP 3.1)

当我将其与您的代码一起使用时,我会使用我的四核/八超线程i7 IVB系统:

Parallelized code run time: 0.048492
Unparallelized code run time: 0.115124
sum1, sum2: 400000000, 400000000

其他一些注释。您的计划很容易出错。您将每个线程的数组设置为

p.arr = myArr + i*(int)(SIZE/NTHREADS);
然后让每个线程在
(SIZE/NTHREADS)
上运行。对于
SIZE
NTHREADS
的某些值,这可能会导致错误的舍入结果

你应该把每条线都检查一遍

int start = ithread*SIZE/NTHREADS;
int finish = (ithreads+1)*SIZE/NTHREADS;
然后让每个线程指向数组的开头并执行以下操作

int sum = 0;
for (i = start; i < finish; ++i){
    sum += b->arr[i];
}
这是我使用的代码

//gcc -O3 -std=gnu99 t.c -lpthread -fopenmp
#include <stdio.h>
#include <time.h>
#include <pthread.h>
#include <stdlib.h>
#include <omp.h>

#define NTHREADS 4
#define SIZE NTHREADS*100000000

struct params {
  int * arr;
  int sum;
};

/* The worker function for the pthreads */
void * myFun (void * x){
  int i;
  struct params * b = (struct params *) x;
  int sum = 0;
  for (i = 0; i < (int)(SIZE/NTHREADS); ++i){
    sum += b->arr[i];
  }
  b->sum = sum;
  return NULL;
}

/* unparallelized summing function*/
int arrSum(int * arr, int size){
  int sum = 0;
  for (int i = 0; i < size; ++i){
    sum += arr[i];
  }
  return sum;
}

int main(int argc, char * argv[]) {
  double runTime;
  int rc, i;
  int sum1, sum2 = 0;
  pthread_t threads[NTHREADS];

  /* create array to sum over */
  int * myArr = NULL;
  myArr = (int *) calloc(SIZE, sizeof(int));
  if (myArr == NULL){
    printf("problem allocating memory\n");
    return 1; 
  }
  for (int i = 0; i < SIZE; ++i){
    myArr[i] = 1;
  }

  /* create array of params structs to feed to threads */
  struct params p;
  p.sum = 0;
  struct params inputs[NTHREADS];
  for(i = 0; i < NTHREADS; ++i){
    p.arr = myArr + i*(int)(SIZE/NTHREADS);
    inputs[i] = p;
  }

  /* spawn the threads */
  runTime = -omp_get_wtime();  
  for(i = 0; i != NTHREADS; ++i){
    rc = pthread_create(&threads[i], NULL, myFun, (void *) &inputs[i]);
  }

  /* wait for threads to finish */
  for(i = 0; i != NTHREADS; ++i){
    rc = pthread_join(threads[i], NULL);
  }

  runTime += omp_get_wtime();  
  printf("Parallelized code run time: %f\n", runTime);

  /* run the unparallelized code */
  runTime = -omp_get_wtime();
  sum2 = arrSum(myArr, SIZE);
  runTime += omp_get_wtime();
  printf("Unparallelized code run time: %f\n", runTime);

  /* consolidate and print results from threads */
  for(i = 0; i != NTHREADS; ++i){
    sum1 += inputs[i].sum;
  }
  printf("sum1, sum2: %d, %d \n", sum1, sum2);

  free(myArr);

  /* be disappointed when my parallelized code showed no speedup */
  return 1;
}
//gcc-O3-std=gnu99 t.c-lpthread-fopenmp
#包括
#包括
#包括
#包括
#包括
#定义第4行
#定义大小为*100000000
结构参数{
int*arr;
整数和;
};
/*pthreads的辅助函数*/
void*myFun(void*x){
int i;
结构参数*b=(结构参数*)x;
整数和=0;
对于(i=0;i<(int)(大小/n读数);+i){
总和+=b->arr[i];
}
b->sum=sum;
返回NULL;
}
/*非并行求和函数*/
int arrSum(int*arr,int size){
整数和=0;
对于(int i=0;i“/Cord>有什么理由添加C++标签吗?”Hi-Gooal:两种语言之间有很大的差异。例如,在C中,你不应该把 Vult*/Cube >,而C++中你必须这样做。一般来说,你不应该在C++中这样写代码,所以我假设它是C.代码> B->和+= B-> ARR[i];//gcc -O3 -std=gnu99 t.c -lpthread -fopenmp
#include <stdio.h>
#include <time.h>
#include <pthread.h>
#include <stdlib.h>
#include <omp.h>

#define NTHREADS 4
#define SIZE NTHREADS*100000000

struct params {
  int * arr;
  int sum;
};

/* The worker function for the pthreads */
void * myFun (void * x){
  int i;
  struct params * b = (struct params *) x;
  int sum = 0;
  for (i = 0; i < (int)(SIZE/NTHREADS); ++i){
    sum += b->arr[i];
  }
  b->sum = sum;
  return NULL;
}

/* unparallelized summing function*/
int arrSum(int * arr, int size){
  int sum = 0;
  for (int i = 0; i < size; ++i){
    sum += arr[i];
  }
  return sum;
}

int main(int argc, char * argv[]) {
  double runTime;
  int rc, i;
  int sum1, sum2 = 0;
  pthread_t threads[NTHREADS];

  /* create array to sum over */
  int * myArr = NULL;
  myArr = (int *) calloc(SIZE, sizeof(int));
  if (myArr == NULL){
    printf("problem allocating memory\n");
    return 1; 
  }
  for (int i = 0; i < SIZE; ++i){
    myArr[i] = 1;
  }

  /* create array of params structs to feed to threads */
  struct params p;
  p.sum = 0;
  struct params inputs[NTHREADS];
  for(i = 0; i < NTHREADS; ++i){
    p.arr = myArr + i*(int)(SIZE/NTHREADS);
    inputs[i] = p;
  }

  /* spawn the threads */
  runTime = -omp_get_wtime();  
  for(i = 0; i != NTHREADS; ++i){
    rc = pthread_create(&threads[i], NULL, myFun, (void *) &inputs[i]);
  }

  /* wait for threads to finish */
  for(i = 0; i != NTHREADS; ++i){
    rc = pthread_join(threads[i], NULL);
  }

  runTime += omp_get_wtime();  
  printf("Parallelized code run time: %f\n", runTime);

  /* run the unparallelized code */
  runTime = -omp_get_wtime();
  sum2 = arrSum(myArr, SIZE);
  runTime += omp_get_wtime();
  printf("Unparallelized code run time: %f\n", runTime);

  /* consolidate and print results from threads */
  for(i = 0; i != NTHREADS; ++i){
    sum1 += inputs[i].sum;
  }
  printf("sum1, sum2: %d, %d \n", sum1, sum2);

  free(myArr);

  /* be disappointed when my parallelized code showed no speedup */
  return 1;
}