C# 优化并行环路性能_C#_Multithreading_Parallel Processing

C# 优化并行环路性能

c# multithreading parallel-processing

C# 优化并行环路性能,c#,multithreading,parallel-processing,C#,Multithreading,Parallel Processing,我有一个场景，在这个场景中，我需要迭代6400万个组合，并为每个组合对64000个数据项执行相同的处理逻辑我注意到，取决于我如何配置循环逻辑，即使在并行循环中，性能也会降低或提高以下是3种情况：通用数据： int numberofSets = 3; int set1ElementCount = 5840; int set2ElementCount = 5840; int set3ElementCount = 2; int combinationsCount = 68211200; //

我有一个场景，在这个场景中，我需要迭代6400万个组合，并为每个组合对64000个数据项执行相同的处理逻辑

我注意到，取决于我如何配置循环逻辑，即使在并行循环中，性能也会降低或提高

以下是3种情况：

通用数据：

int numberofSets = 3;

int set1ElementCount = 5840;
int set2ElementCount = 5840;
int set3ElementCount = 2;

int combinationsCount = 68211200; // = 5840 * 5840 * 2
int dataCount = 64000;

并行执行

代码：

结果:

约9分钟

好吧，有一点进步仍然是进步。不过，我还希望有更多的东西。

从对角线方向看这个问题，我想你是在受地域性的影响。您有二维数组，并且正在访问列。数组按行保存并加载到缓存中。当我有时间的时候，如果没有人回答，我会看得更多。也许试着把你的数组分成n个块，其中n是你拥有的核数，然后启动n个线程，每个线程在for循环中计算一个数据块。然后，您将最大限度地利用CPU，并在创建线程上花费很少的时间（仅n个）。您的第二种方法与上的第二个示例类似。该页面建议使用大数据集并行化任何循环，因此可以尝试并行化前两个循环。@arekzyla我现在正在尝试这种方法@arekzyla您可以在编辑部分检查我的实现。改进在错误的范围内。非常微小的改进。

int[,] combinations = new int[combinationsCount, numberofSets];
// combinations = generator.Generate(); // generate combinations
/* generated format is:
[0,0,0]
[0,0,1]
[1,0,0]
...
[5839, 5839, 1]
*/
//itterate combinations
Parallel.For(0, combinationsCount, (idx, state) =>
{
    int idx1 = combinations[idx, 0]; // a bit of hardcoding here since we have 3 sets of data
    int idx2 = combinations[idx, 1];
    int idx3 = combinations[idx, 3];

    // proccess data set for each combination
    for (int i = 0; i < dataCount; i++) {
        // do something
    }
});

// itterate set 1 in parallel
Parallel.For(0, set1ElementCount, (idx1, state) =>
{
    // itterate set 2
    for (int idx2 = 0; idx2  < set2ElementCount; idx2 ++)
    {
        // itterate set 3
        for (int idx3 = 0; idx3  < set3ElementCount; idx3 ++)
        {
            // proccess data set for each combination
            for (int i = 0; i < dataCount; i++)
            {
                // do something
            }
        }
    }
});

// itterate set 1
for (int idx1 = 0; idx1 < set1ElementCount; idx1 ++)
{
    // itterate set 2
    for (int idx2 = 0; idx2  < set2ElementCount; idx2 ++)
    {
        // itterate set 3
        for (int idx3 = 0; idx3  < set3ElementCount; idx3 ++)
        {
            // proccess data set for each combination
            for(int i = 0; i < dataCount; i++)
            {
                // do something
            }
        }
    }
}

var combinationsCount = combinations.Length;
int coreCount = 4;
int chuncSize = combinationsCount / coreCount;
List<int[][]> chunked = new List<int[][]>();
for (int i = 0; i < coreCount; i++)
{
    int skip = i * chuncSize;
    int take = chuncSize;

    int diff = (combinationsCount - skip) - take;
    if (diff < chuncSize)
        take = take + diff;

    var sub = combinations.Skip(skip).Take(take).ToArray();

    chunked.Add(sub);
}

// iterate chunks - each on a separate core
Parallel.For(0, coreCount, new ParallelOptions() { MaxDegreeOfParallelism = coreCount }, (chunkIndex, state) =>
{
    var chunk = chunked[chunkIndex];
    int chunkLength = chunk.Length;

    // iterate combinations per-chunk
    for (int idx = 0; idx < chunkLength; idx++)
    {
        // itterate data here
    }
}