C 澄清「;区域不能紧密嵌套在';与x27平行;地区“;
我试图理解OpenMP中的缩减是如何工作的。 我有一个简单的代码,涉及到减少C 澄清「;区域不能紧密嵌套在';与x27平行;地区“;,c,multithreading,performance,parallel-processing,openmp,C,Multithreading,Performance,Parallel Processing,Openmp,我试图理解OpenMP中的缩减是如何工作的。 我有一个简单的代码,涉及到减少 #include <stdio.h> #include <stdlib.h> #include <omp.h> int N = 100; int M = 200; int O = 300; double r2() { return ((double) rand() / (double) RAND_MAX); } int main(void) { double S
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
int N = 100;
int M = 200;
int O = 300;
double r2() {
return ((double) rand() / (double) RAND_MAX);
}
int main(void) {
double S = 0;
double *K = (double*) calloc(M * N, sizeof(double));
#pragma omp parallel for collapse(2)
{
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n++) {
#pragma omp for reduction(+:S)
for (int o = 0; o < O; o++) {
S += r2() - 0.25;
}
K[m * N + n] = S;
}
}
}
}
这是进行嵌套循环的正确方法吗
编辑: 对原来的问题做一个改动。我希望并行代码和顺序代码具有相同的结果
#pragma omp parallel for collapse(2)
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n++) {
#pragma omp for reduction(+:S)
for (int o = 0; o < O; o++) {
S += o;
}
K[m * N + n] = S;
}
}
用于折叠的pragma omp并行(2)
对于(int m=0;mrand
:
从手册页:
函数rand()不是可重入的或线程安全的,因为它使用在每次调用时修改的隐藏状态
对于多线程代码,请使用(例如)rand\r
我试图理解OpenMP中的缩减是如何工作的
为了便于论证,让我们假设r2()
将始终产生相同的值。
当一个代码有多个线程同时修改某个变量时,代码如下所示:
double S = 0;
#pragma omp parallel
for (int o = 0; o < O; o++) {
S += r2() - 0.25;
}
#pragma omp for reduction(+:S)
for (int o = 0; o < O; o++) {
S += r2() - 0.25;
}
但是,在您的完整代码中
#pragma omp parallel for collapse(2)
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n++) {
#pragma omp for reduction(+:S)
for (int o = 0; o < O; o++) {
S += r2() - 0.25;
}
K[m * N + n] = S;
}
}
没有竞争条件,因为变量S
是私有的。此外,在这种情况下,由于两个最外层循环的迭代在线程之间划分,每个线程都有一对唯一的m
和n
迭代,因此每个线程在其访问K[m*n+n]
期间将访问数组K
的唯一位置
但问题是,将两个外部循环并行化的版本不会产生与其顺序对应的版本相同的结果。这是因为
for (int o = 0; o < O; o++) {
S += r2() - 0.25;
}
K[m * N + n] = S;
如果您关心S
的值,那么所有这些(当然)都很重要,因为有人可能会认为,由于您使用的是生成随机值的函数,因此保持S值的顺序并不重要
带有线程安全随机生成器的版本
第1版
#pragma omp parallel
{
unsigned int myseed = omp_get_thread_num();
#pragma omp for collapse(2)
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n++) {
for (int o = 0; o < O; o++) {
double r = ((double) rand_r(&myseed) / (double) RAND_MAX);
S += r - 0.25;
}
K[m * N + n] = S;
}
}
#pragma omp并行
{
unsigned int myseed=omp_get_thread_num();
#用于塌陷的pragma omp(2)
对于(int m=0;m
}
第2版
double *K = (double*) calloc(M * N, sizeof(double));
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n++) {
#pragma omp parallel
{
unsigned int myseed = omp_get_thread_num();
#pragma omp for reduction(+:S)
for (int o = 0; o < O; o++) {
double r = ((double) rand_r(&myseed) / (double) RAND_MAX);
S += r - 0.25;
}
}
K[m * N + n] = S;
}
}
double*K=(double*)calloc(M*N,sizeof(double));
对于(int m=0;m
编辑:
对原来的问题做一个改动。我要平行的和平行的
顺序代码以具有相同的结果
#pragma omp parallel for collapse(2)
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n++) {
#pragma omp for reduction(+:S)
for (int o = 0; o < O; o++) {
S += o;
}
K[m * N + n] = S;
}
}
而不是:
#pragma omp parallel for collapse(2)
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n++) {
#pragma omp for reduction(+:S)
for (int o = 0; o < O; o++) {
S += o;
}
K[m * N + n] = S;
}
}
用于折叠的pragma omp并行(2)
对于(int m=0;mfor(int m=0;m
double *K = (double*) calloc(M * N, sizeof(double));
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n++) {
#pragma omp parallel
{
unsigned int myseed = omp_get_thread_num();
#pragma omp for reduction(+:S)
for (int o = 0; o < O; o++) {
double r = ((double) rand_r(&myseed) / (double) RAND_MAX);
S += r - 0.25;
}
}
K[m * N + n] = S;
}
}
#pragma omp parallel for collapse(2)
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n++) {
#pragma omp for reduction(+:S)
for (int o = 0; o < O; o++) {
S += o;
}
K[m * N + n] = S;
}
}
for (int m = 0; m < M; m++) {
for (int n = 0; n < N; n++) {
#pragma omp parallel for reduction(+:S)
for (int o = 0; o < O; o++) {
S += o;
}
K[m * N + n] = S;
}
}