C++ 如何使用c++；11?_C++_C++11_Asynchronous_Matrix_Future

C++ 如何使用c++；11?

c++ c++11 asynchronous matrix

C++ 如何使用c++；11?,c++,c++11,asynchronous,matrix,future,C++,C++11,Asynchronous,Matrix,Future,我正试图为矩阵乘法编写一个并行分治算法，其中矩阵为2^N x 2^N。我的解决方案适用于较小的矩阵，但当我尝试将两个2^6 x 2^6或更大的矩阵相乘时，它开始写入随机0，并且计算结果很糟糕。我认为问题是太多的线程开始在这个大小上，但我不能想出，因为一天以来，如何解决它。谢谢你的帮助。以下是我迄今为止的尝试： bool ParallelMultiply(const std::vector<std::vector<int> > &m1, const std::v

我正试图为矩阵乘法编写一个并行分治算法，其中矩阵为2^N x 2^N。我的解决方案适用于较小的矩阵，但当我尝试将两个2^6 x 2^6或更大的矩阵相乘时，它开始写入随机0，并且计算结果很糟糕。我认为问题是太多的线程开始在这个大小上，但我不能想出，因为一天以来，如何解决它。谢谢你的帮助。以下是我迄今为止的尝试：

bool ParallelMultiply(const std::vector<std::vector<int> > &m1, const std::vector<std::vector<int> > &m2, std::vector<std::vector<int> > &dest)
{
if((m1.size() == 1) && m1.size() == m1[0].size())
    dest[0][0] = m1[0][0]*m2[0][0];
else
{
    std::vector<std::vector<int> > temp(m1.size());
    for(int i=0; i < temp.size(); i++)
        temp[i].resize(temp.size());

    int subSize = m1.size()/2;

    std::vector<std::vector<int> > A11 = SubMatrix(m1, 0, 0);
    std::vector<std::vector<int> > A12 = SubMatrix(m1, 0, subSize);
    std::vector<std::vector<int> > A21 = SubMatrix(m1, subSize, 0);
    std::vector<std::vector<int> > A22 = SubMatrix(m1, subSize, subSize);
    std::vector<std::vector<int> > B11 = SubMatrix(m2, 0, 0);
    std::vector<std::vector<int> > B12 = SubMatrix(m2, 0, subSize);
    std::vector<std::vector<int> > B21 = SubMatrix(m2, subSize, 0);
    std::vector<std::vector<int> > B22 = SubMatrix(m2, subSize, subSize);
    std::vector<std::vector<int> > dest11 = SubMatrix(dest, 0, 0);
    std::vector<std::vector<int> > dest12 = SubMatrix(dest, 0, subSize);
    std::vector<std::vector<int> > dest21 = SubMatrix(dest, subSize, 0);
    std::vector<std::vector<int> > dest22 = SubMatrix(dest, subSize, subSize);
    std::vector<std::vector<int> > temp11 = SubMatrix(temp, 0, 0);
    std::vector<std::vector<int> > temp12 = SubMatrix(temp, 0, subSize);
    std::vector<std::vector<int> > temp21 = SubMatrix(temp, subSize, 0);
    std::vector<std::vector<int> > temp22 = SubMatrix(temp, subSize, subSize);


    std::future<bool> f1 = std::async(std::launch::async, ParallelMultiply, std::cref(A11), std::cref(B11), std::ref(dest11));
    std::future<bool> f2 = std::async(std::launch::async, ParallelMultiply, std::cref(A12), std::cref(B21), std::ref(temp11));

    std::future<bool> f3 = std::async(std::launch::async, ParallelMultiply, std::cref(A11), std::cref(B12), std::ref(dest12));
    std::future<bool> f4 = std::async(std::launch::async, ParallelMultiply, std::cref(A12), std::cref(B22), std::ref(temp12));


    std::future<bool> f5 = std::async(std::launch::async, ParallelMultiply, std::cref(A21), std::cref(B11), std::ref(dest21));
    std::future<bool> f6 = std::async(std::launch::async, ParallelMultiply, std::cref(A22), std::cref(B21), std::ref(temp21));

    std::future<bool> f7 = std::async(std::launch::async, ParallelMultiply, std::cref(A21), std::cref(B12), std::ref(dest22));
    std::future<bool> f8 = std::async(std::launch::async, ParallelMultiply, std::cref(A22), std::cref(B22), std::ref(temp22));

    f1.wait(); f2.wait(); f3.wait(); f4.wait(); f5.wait(); f6.wait(); f7.wait(); f8.wait();

    AddMatrices(temp11, dest11);
    AddMatrices(temp12, dest12);
    AddMatrices(temp21, dest21);
    AddMatrices(temp22, dest22);

    MergeMatrices(dest11, dest12, dest21, dest22, dest);

}

return true;
}

bool并行乘法（const std:：vector&m1，const std:：vector&m2，std:：vector&dest）
{
如果（（m1.size（）==1）和&m1.size（）==m1[0].size（））
dest[0][0]=m1[0][0]*m2[0][0]；
其他的
{
向量温度（m1.size（））；
对于（int i=0；i


其他两项职能：
void AddMatrices(const std::vector<std::vector<int> > &m, std::vector<std::vector<int> > &res)
{
for(int i=0; i < m.size(); i++)
    for(int j=0; j < m.size(); j++)
        res[i][j] += m[i][j];
}

void MergeMatrices(const std::vector<std::vector<int> > &m1, const std::vector<std::vector<int> > &m2, const std::vector<std::vector<int> > &m3, const std::vector<std::vector<int> > &m4, std::vector<std::vector<int> > &res)
{
int x = res.size()/2;
for(int i=0; i<x; i++)
    for(int j=0; j<x; j++)
    {
        res[i][j] = m1[i][j];
        res[i][x+j] = m2[i][j];
        res[x+i][j] = m3[i][j];
        res[x+i][x+j] = m4[i][j];
    }
}

void addmatrix（常量std:：vector&m，std:：vector&res）
{
对于（int i=0；i对于（int i=0；i您是否验证了SubMatrix、AddMatrix和MergeMatrix中的逻辑？您正在创建大量向量副本，因此此代码效率极低（除了执行矩阵乘法的原始定义），但我没有看到明显的错误。有点奇怪的是，在N！=N的情况下，N=N总是输入错误。我添加了另外两个函数，我认为它们都工作得很好。你能提供一个最小的布线示例吗？std:：launch\u异步策略可能会在递归中引发大量线程，你可能希望检查函数的资源正在使用。你必须停止复制不可变数据。你需要对源矩阵的视图进行操作，而不是复制。一旦你有了matrix\u视图
类型，你可以添加drop\u top（N）
和keep\u top（N）
和drop\u left（N）
和keep\u left（N）
方法，它消除了你的二次幂的痛苦。不要每次都把东西分成4个，试着沿着最长轴把它们分成2个。现在你的递归变得简单多了。