C# 优化并行环路性能
我有一个场景,在这个场景中,我需要迭代6400万个组合,并为每个组合对64000个数据项执行相同的处理逻辑 我注意到,取决于我如何配置循环逻辑,即使在并行循环中,性能也会降低或提高 以下是3种情况: 通用数据:C# 优化并行环路性能,c#,multithreading,parallel-processing,C#,Multithreading,Parallel Processing,我有一个场景,在这个场景中,我需要迭代6400万个组合,并为每个组合对64000个数据项执行相同的处理逻辑 我注意到,取决于我如何配置循环逻辑,即使在并行循环中,性能也会降低或提高 以下是3种情况: 通用数据: int numberofSets = 3; int set1ElementCount = 5840; int set2ElementCount = 5840; int set3ElementCount = 2; int combinationsCount = 68211200; //
int numberofSets = 3;
int set1ElementCount = 5840;
int set2ElementCount = 5840;
int set3ElementCount = 2;
int combinationsCount = 68211200; // = 5840 * 5840 * 2
int dataCount = 64000;
好吧,有一点进步仍然是进步。不过,我还希望有更多的东西。从对角线方向看这个问题,我想你是在受地域性的影响。您有二维数组,并且正在访问列。数组按行保存并加载到缓存中。当我有时间的时候,如果没有人回答,我会看得更多。也许试着把你的数组分成n个块,其中n是你拥有的核数,然后启动n个线程,每个线程在for循环中计算一个数据块。然后,您将最大限度地利用CPU,并在创建线程上花费很少的时间(仅n个)。您的第二种方法与上的第二个示例类似。该页面建议使用大数据集并行化任何循环,因此可以尝试并行化前两个循环。@arekzyla我现在正在尝试这种方法@arekzyla您可以在编辑部分检查我的实现。改进在错误的范围内。非常微小的改进。
int[,] combinations = new int[combinationsCount, numberofSets];
// combinations = generator.Generate(); // generate combinations
/* generated format is:
[0,0,0]
[0,0,1]
[1,0,0]
...
[5839, 5839, 1]
*/
//itterate combinations
Parallel.For(0, combinationsCount, (idx, state) =>
{
int idx1 = combinations[idx, 0]; // a bit of hardcoding here since we have 3 sets of data
int idx2 = combinations[idx, 1];
int idx3 = combinations[idx, 3];
// proccess data set for each combination
for (int i = 0; i < dataCount; i++) {
// do something
}
});
// itterate set 1 in parallel
Parallel.For(0, set1ElementCount, (idx1, state) =>
{
// itterate set 2
for (int idx2 = 0; idx2 < set2ElementCount; idx2 ++)
{
// itterate set 3
for (int idx3 = 0; idx3 < set3ElementCount; idx3 ++)
{
// proccess data set for each combination
for (int i = 0; i < dataCount; i++)
{
// do something
}
}
}
});
// itterate set 1
for (int idx1 = 0; idx1 < set1ElementCount; idx1 ++)
{
// itterate set 2
for (int idx2 = 0; idx2 < set2ElementCount; idx2 ++)
{
// itterate set 3
for (int idx3 = 0; idx3 < set3ElementCount; idx3 ++)
{
// proccess data set for each combination
for(int i = 0; i < dataCount; i++)
{
// do something
}
}
}
}
var combinationsCount = combinations.Length;
int coreCount = 4;
int chuncSize = combinationsCount / coreCount;
List<int[][]> chunked = new List<int[][]>();
for (int i = 0; i < coreCount; i++)
{
int skip = i * chuncSize;
int take = chuncSize;
int diff = (combinationsCount - skip) - take;
if (diff < chuncSize)
take = take + diff;
var sub = combinations.Skip(skip).Take(take).ToArray();
chunked.Add(sub);
}
// iterate chunks - each on a separate core
Parallel.For(0, coreCount, new ParallelOptions() { MaxDegreeOfParallelism = coreCount }, (chunkIndex, state) =>
{
var chunk = chunked[chunkIndex];
int chunkLength = chunk.Length;
// iterate combinations per-chunk
for (int idx = 0; idx < chunkLength; idx++)
{
// itterate data here
}
}