C 递归、全局变量和OpenMP_C_Recursion_Parallel Processing

C 递归、全局变量和OpenMP

c recursion parallel-processing

C 递归、全局变量和OpenMP,c,recursion,parallel-processing,C,Recursion,Parallel Processing,我遇到了一个调用自身的函数的问题在这种情况下，我不清楚openmp线程的正确方式 long *arrayofindex; int length, N; void gen(long index) { if(index == 0) { #pragma omp parallel for for(int a=0; a<N; ++a) { #pragma omp critical {

我遇到了一个调用自身的函数的问题在这种情况下，我不清楚openmp线程的正确方式

long *arrayofindex; 
int length, N;

void gen(long index)
{
    if(index == 0)
    {
        #pragma omp parallel for
        for(int a=0; a<N; ++a)
        {
            #pragma omp critical
            {
            gen(index+1);
            ++arrayofindex[index];
            }
        }
    }
    else
    {
        for(arrayofindex[index]=0; arrayofindex[index]<N; ++arrayofindex[index])
        {
            if(index < length-1)
                gen(index+1);
            else printf("%ld\n", arrayofindex[index]);
        }
    }
}

int main(){
    length = 5, N = 4;
    arrayofindex = (long*) malloc(length * sizeof(long));
    for(int i=0; i<length; ++i)
        arrayofindex[index] = 0;
    gen(0);
}

long*arrayofindex；
int长度，N；
void gen（长索引）
{
如果（索引==0）
{
#pragma-omp并行
对于（int a=0；a，根据我对所给出的代码的理解，总体目标基本上是生成所有N
-字母单词，这些单词可以用长度
-字母组成。对gen（）的每次递归调用
对应一个字母位置，因此每次控件到达递归的底部时，arrayofindex
的第一个N
元素代表一个单词的字母
但是应该很明显，并行运行的多个线程不能使用相同的arrayofindex
。每个线程都希望在到达递归底部时，在arrayofindex
中找到沿其递归路径设置的值。这是该方法的基础。如果其他线程正在修改数组ofindex
同时，它们很可能会得到由不同线程设置的值的混乱。此外，您可能不会得到您希望的加速，因为线程需要同步对arrayofindex
的访问

注意：此问题与递归无关。如果将代码修改为迭代而不是递归，则会出现完全相同的问题——如果我希望提高性能，实际上我自己也会这样做，尽管我在这里没有说明这一点

有多种方法可以为每个OMP线程分配自己的工作数组。如果必须继续动态分配空间，那么您应该安排在并行区域内分配空间，以便每个线程分配自己的工作数组。但是，如果您愿意并且能够依赖变长数组，那么可能只有您需要的另一件事是一个OMPprivate
子句，附加到您的parallel for
构造中
例如，您的代码的这种变化对我来说是有效的：
void gen_tail(int length, int num_letters, int arrayofindex[], int position) {
    for (int letter = 0; letter < num_letters; letter++) {
        arrayofindex[position] = letter;
        if (position + 1 < length) {
            gen_tail(length, num_letters, arrayofindex, position + 1);
        } else {
            // this is the bottom ... do something with arrayofindex, such as:
            #pragma omp critical
            {
                for (int i = 0; i < length; i++) {
                    putchar('A' + arrayofindex[i]);
                }
                putchar('\n');
            }
        }
    }
}

void gen(int length, int num_letters) {
    assert(length > 1);
    int arrayofindex[length];  // Note: VLA, _not_ dynamically allocated

    // Marking the array 'private' means each thread gets its own copy.
    // This would not have the needed effect if 'arrayofindex' were a pointer.
    #pragma omp parallel for private(arrayofindex)
    for (int letter = 0; letter < num_letters; letter++) {
        arrayofindex[0] = letter;
        gen_tail(length, num_letters, arrayofindex, 1);
    }
}

int main(void) {
    gen(5, 4);
}

void gen_tail（整数长度、整数个字母、整数数组索引[]、整数位置）{
for（int字母=0；字母1）；
int arrayofindex[length]；//注意：VLA，_非u动态分配
//将数组标记为“private”意味着每个线程都有自己的副本。
//如果“arrayofindex”是指针，则不会产生所需的效果。
#专用pragma omp并行（arrayofindex）
for（int字母=0；字母

对我来说，这会产生预期的1024（=45）个结果，所有结果都是不同的，因为我有充分的理由期望它会这样做。
正如我所理解的代码一样，总体目标基本上是生成所有的N
-字母单词，这些单词可以用长度
字母表形成。对gen（）的每次递归调用
对应一个字母位置，因此每次控件到达递归的底部时，arrayofindex
的第一个N
元素代表一个单词的字母
但是应该很明显，并行运行的多个线程不能使用相同的arrayofindex
。每个线程都希望在到达递归底部时，在arrayofindex
中找到沿其递归路径设置的值。这是该方法的基础。如果其他线程正在修改数组ofindex
同时，它们很可能会得到由不同线程设置的值的混乱。此外，您可能不会得到您希望的加速，因为线程需要同步对arrayofindex
的访问

注意：此问题与递归无关。如果将代码修改为迭代而不是递归，则会出现完全相同的问题——如果我希望提高性能，实际上我自己也会这样做，尽管我在这里没有说明这一点

有多种方法可以为每个OMP线程分配自己的工作数组。如果必须继续动态分配空间，那么您应该安排在并行区域内分配空间，以便每个线程分配自己的工作数组。但是，如果您愿意并且能够依赖变长数组，那么可能只有您需要的另一件事是一个OMPprivate
子句，附加到您的parallel for
构造中
例如，您的代码的这种变化对我来说是有效的：
void gen_tail(int length, int num_letters, int arrayofindex[], int position) {
    for (int letter = 0; letter < num_letters; letter++) {
        arrayofindex[position] = letter;
        if (position + 1 < length) {
            gen_tail(length, num_letters, arrayofindex, position + 1);
        } else {
            // this is the bottom ... do something with arrayofindex, such as:
            #pragma omp critical
            {
                for (int i = 0; i < length; i++) {
                    putchar('A' + arrayofindex[i]);
                }
                putchar('\n');
            }
        }
    }
}

void gen(int length, int num_letters) {
    assert(length > 1);
    int arrayofindex[length];  // Note: VLA, _not_ dynamically allocated

    // Marking the array 'private' means each thread gets its own copy.
    // This would not have the needed effect if 'arrayofindex' were a pointer.
    #pragma omp parallel for private(arrayofindex)
    for (int letter = 0; letter < num_letters; letter++) {
        arrayofindex[0] = letter;
        gen_tail(length, num_letters, arrayofindex, 1);
    }
}

int main(void) {
    gen(5, 4);
}

void gen_tail（整数长度、整数个字母、整数数组索引[]、整数位置）{
for（int字母=0；字母