c++；多核应用的堆栈效率我试图在C++中编写multicode Markov Chain代码，而我试图利用许多CPU（最多24个）来运行每个不同的链，在选择一个正确的容器来收集每个CPU上的数值评估时，我遇到了一个问题。我试图测量的基本上是布尔变量数组的平均值。我尝试过在“std:：vector”对象周围编写一个包装器，如下所示： struct densityStack { vector<int> density; //will store the sum of boolean varaibles int card; //will store the amount of elements we summed over for normalizing at the end densityStack(int size){ //constructor taking as only parameter the size of the array, usually size = 30 density = vector<int> (size, 0); card = 0; } void push_back(vector<int> & toBeAdded){ //method summing a new array (of measurements) to our stack for(auto valStack = density.begin(), newVal = toBeAdded.begin(); valStack != density.end(); ++valStack, ++ newVal) *valStack += *newVal; card++; } void savef(const char * fname){ //method outputting into a file ofstream out(fname); out.precision(10); out << card << "\n"; //saving the cardinal in first line for(auto val = density.begin(); val != density.end(); ++val) out << << (double) *val/card << "\n"; out.close(); } }; struct densityStack{ 向量密度；//将存储布尔变量之和 int card；//将存储我们求和的元素数量，以便在最后进行规范化 densityStack（int-size）{//构造函数仅将数组的大小作为参数，通常大小为30 密度=矢量（大小，0）；卡片=0； } void push_back（vector&tobeaded）{//方法将一个新数组（测量值）求和到堆栈中对于（auto-valStack=density.begin（），newVal=tobeaded.begin（）；valStack！=density.end（）；++valStack，++newVal） *valStack+=*newVal；卡++； } void savef（const char*fname）{//输出到文件中的方法流出流量（fname）；精度（10）； out_C++_Parallel Processing_Openmp_Markov Chains

c++；多核应用的堆栈效率我试图在C++中编写multicode Markov Chain代码，而我试图利用许多CPU（最多24个）来运行每个不同的链，在选择一个正确的容器来收集每个CPU上的数值评估时，我遇到了一个问题。我试图测量的基本上是布尔变量数组的平均值。我尝试过在“std:：vector”对象周围编写一个包装器，如下所示： struct densityStack { vector<int> density; //will store the sum of boolean varaibles int card; //will store the amount of elements we summed over for normalizing at the end densityStack(int size){ //constructor taking as only parameter the size of the array, usually size = 30 density = vector<int> (size, 0); card = 0; } void push_back(vector<int> & toBeAdded){ //method summing a new array (of measurements) to our stack for(auto valStack = density.begin(), newVal = toBeAdded.begin(); valStack != density.end(); ++valStack, ++ newVal) *valStack += *newVal; card++; } void savef(const char * fname){ //method outputting into a file ofstream out(fname); out.precision(10); out << card << "\n"; //saving the cardinal in first line for(auto val = density.begin(); val != density.end(); ++val) out << << (double) *val/card << "\n"; out.close(); } }; struct densityStack{ 向量密度；//将存储布尔变量之和 int card；//将存储我们求和的元素数量，以便在最后进行规范化 densityStack（int-size）{//构造函数仅将数组的大小作为参数，通常大小为30 密度=矢量（大小，0）；卡片=0； } void push_back（vector&tobeaded）{//方法将一个新数组（测量值）求和到堆栈中对于（auto-valStack=density.begin（），newVal=tobeaded.begin（）；valStack！=density.end（）；++valStack，++newVal） *valStack+=*newVal；卡++； } void savef（const char*fname）{//输出到文件中的方法流出流量（fname）；精度（10）； out

c++ parallel-processing

c++；多核应用的堆栈效率我试图在C++中编写multicode Markov Chain代码，而我试图利用许多CPU（最多24个）来运行每个不同的链，在选择一个正确的容器来收集每个CPU上的数值评估时，我遇到了一个问题。我试图测量的基本上是布尔变量数组的平均值。我尝试过在“std:：vector”对象周围编写一个包装器，如下所示： struct densityStack { vector<int> density; //will store the sum of boolean varaibles int card; //will store the amount of elements we summed over for normalizing at the end densityStack(int size){ //constructor taking as only parameter the size of the array, usually size = 30 density = vector<int> (size, 0); card = 0; } void push_back(vector<int> & toBeAdded){ //method summing a new array (of measurements) to our stack for(auto valStack = density.begin(), newVal = toBeAdded.begin(); valStack != density.end(); ++valStack, ++ newVal) *valStack += *newVal; card++; } void savef(const char * fname){ //method outputting into a file ofstream out(fname); out.precision(10); out << card << "\n"; //saving the cardinal in first line for(auto val = density.begin(); val != density.end(); ++val) out << << (double) *val/card << "\n"; out.close(); } }; struct densityStack{ 向量密度；//将存储布尔变量之和 int card；//将存储我们求和的元素数量，以便在最后进行规范化 densityStack（int-size）{//构造函数仅将数组的大小作为参数，通常大小为30 密度=矢量（大小，0）；卡片=0； } void push_back（vector&tobeaded）{//方法将一个新数组（测量值）求和到堆栈中对于（auto-valStack=density.begin（），newVal=tobeaded.begin（）；valStack！=density.end（）；++valStack，++newVal） *valStack+=*newVal；卡++； } void savef（const char*fname）{//输出到文件中的方法流出流量（fname）；精度（10）； out,c++,parallel-processing,openmp,markov-chains,C++,Parallel Processing,Openmp,Markov Chains,如何同步您的共享densityStack实例从这里有限的信息来看，我的猜测是，每次CPU有一小块数据时，CPU都会被阻塞等待写入数据。如果这是问题所在，一种提高性能的简单技术就是减少写入次数。为每个CPU保留一个数据缓冲区，并减少写入densityStack的频率。如果代码工作正常，则is可能更适合于“每次CPU核心有数据时，它都会调用push_back”-互斥锁在哪里？忘记效率，从正确的代码开始。多核累积的正确方法是在每个线程中累积子线程，然后将它们组合起来。我继续删除了我的评论，因为这更多

如何同步您的共享densityStack实例

从这里有限的信息来看，我的猜测是，每次CPU有一小块数据时，CPU都会被阻塞等待写入数据。如果这是问题所在，一种提高性能的简单技术就是减少写入次数。为每个CPU保留一个数据缓冲区，并减少写入densityStack的频率。

如果代码工作正常，则is可能更适合于“每次CPU核心有数据时，它都会调用push_back”-互斥锁在哪里？忘记效率，从正确的代码开始。多核累积的正确方法是在每个线程中累积子线程，然后将它们组合起来。我继续删除了我的评论，因为这更多的是一个“尝试这个”建议，而不是一个答案。但是，对于@MSalters添加互斥锁：我看不到向量的互斥体将保持单一大小，从不移动，只对其调用加法操作。我看不出它有任何线程不安全的地方。出于我自己的学习目的：在这种情况下有什么可能损坏/出错？（出于这种想法，我故意忽略savef方法）@Diniden-向量的大小不是固定的，它有通过push_-back追加的元素。push_-back不是原子的或线程安全的，因此需要将它包装在互斥锁中。@Diniden-行*valStack+=*newVal不是线程安全的。计算后线程可能会被中断（隐式）*valStack+*newVal，但在将其写回*valStack之前，+=语法在某种程度上掩盖了这不是一个原子操作。