在C上使用OpenMP时在a中使用并行_C_Loops_Parallel Processing_Openmp

在C上使用OpenMP时在a中使用并行

c loops parallel-processing

在C上使用OpenMP时在a中使用并行,c,loops,parallel-processing,openmp,C,Loops,Parallel Processing,Openmp,我试着在一段时间内做一个类似的事情，比如： while(!End){ for(...;...;...) // the parallel for ... // serial code } for循环是while循环的唯一并行部分。如果我这样做，我会有很多开销： cycles = 0; while(!End){ // 1k Million iterations aprox #pragma omp parallel for for(i=0;i<N;i+

我试着在一段时间内做一个类似的事情，比如：

while(!End){
    for(...;...;...) // the parallel for

    ...
    // serial code
}

for循环是while循环的唯一并行部分。如果我这样做，我会有很多开销：

cycles = 0;
while(!End){ // 1k Million iterations aprox
    #pragma omp parallel for
    for(i=0;i<N;i++) // the parallel for with 256 iteration aprox
        if(time[i] == cycles){
           if (wbusy[i]){
               wbusy[i] = 0;
               wfinished[i] = 1;
           }
        }


    // serial code
    ++cycles;    

}

cycles=0；
而（！End）{//1k百万次迭代
#pragma-omp并行
对于（i=0；i因此，通常不必太担心将并行区域放入循环中，因为现代openmp实现在使用线程团队之类的东西时非常有效，只要循环中有大量工作，就可以了。但是在这里，外循环计数~1e9，内循环计数~256，而且工作量非常小每次迭代完成—开销可能与正在完成的工作量相当或更糟，性能将受到影响
所以这两者之间会有明显的区别：
cycles = 0;
while(!End){ // 1k Million iterations aprox
    #pragma omp parallel for
    for(i=0;i<N;i++) // the parallel for with 256 iteration aprox
        if(time[i] == cycles){
           if (wbusy[i]){
               wbusy[i] = 0;
               wfinished[i] = 1;
           }
        } 

    // serial code
    ++cycles;    
}

cycles=0；
而（！End）{//1k百万次迭代
#pragma-omp并行
对于（i=0；itime；
}
int main（int argc，字符**argv）{
常数int n=256；
常量长整数=100000000升；
长整数时间[n]；
int-wbusy[n]；
完成的整数[n]；
对于（int i=0；这可能有很多原因，虽然人们可以提供各种可能性，但如果没有更多细节，任何人都无法给出明确的答案（例如，串行代码中发生了什么，循环看起来像什么，两者之间的数据依赖性是什么）。提供一个将使人们更容易帮助您。编辑了更多详细信息。有时，将while
循环替换为for循环是有意义的，并以额外计算的代价换取更多（简单）并行化和减少了总体计算时间。根据您向我们展示的内容，我不能说这是否适用于您的程序。我认为我的程序没有意义。
cycles = 0;
#pragma omp parallel
while(!End){ // 1k Million iterations aprox
    #pragma omp for
    for(i=0;i<N;i++) // the parallel for with 256 iteration aprox
        if(time[i] == cycles){
           if (wbusy[i]){
               wbusy[i] = 0;
               wfinished[i] = 1;
           }
        } 

    // serial code
    #pragma omp single 
    {
      ++cycles;    
    }
}

#include <stdio.h>
#include <stdlib.h>

struct tasktime_t {
    long int time;
    int task;
};

int stime_compare(const void *a, const void *b) {
    return ((struct tasktime_t *)a)->time - ((struct tasktime_t *)b)->time;
}

int main(int argc, char **argv) {
    const int n=256;
    const long int niters = 100000000l;
    long int time[n];
    int wbusy[n];
    int wfinished[n];

    for (int i=0; i<n; i++) {
        time[i] = rand() % niters;
        wbusy[i] = 1;
        wfinished[i] = 0;
    }

    struct tasktime_t stimes[n];

    for (int i=0; i<n; i++) {
        stimes[i].time = time[i];
        stimes[i].task = i;
    }

    qsort(stimes, n, sizeof(struct tasktime_t), stime_compare);

    long int cycles = 0;
    int next = 0;
    while(cycles < niters){ // 1k Million iterations aprox
        while ( (next < n) && (stimes[next].time == cycles) ) {
           int i = stimes[next].task;
           if (wbusy[i]){
               wbusy[i] = 0;
               wfinished[i] = 1;
           }
           next++;
        }

        ++cycles;
    }

    return 0;
}