C++ 非线性有限元程序中重复稀疏矩阵组合的特征值有效利用_C++_Sparse Matrix_Eigen3_Finite Element Analysis_Non Linear

C++ 非线性有限元程序中重复稀疏矩阵组合的特征值有效利用

c++

C++ 非线性有限元程序中重复稀疏矩阵组合的特征值有效利用,c++,sparse-matrix,eigen3,finite-element-analysis,non-linear,C++,Sparse Matrix,Eigen3,Finite Element Analysis,Non Linear,我正在尝试使用Eigen有效地组装非线性有限元计算的刚度矩阵从我的有限元离散，我可以准确地提取我的稀疏模式。因此，我只能使用： mat.reserve(nnz); mat.setFromTriplets(TripletList.begin(), TripletList.end()); 如中所提议的那样我在这里提出的问题是：由于非线性的性质，我不得不经常重新填充矩阵。因此，我应该将所有贡献再次存储在一个三元组中，并反复使用mat.setFromTriplets（…）如果我重用mat.se

我正在尝试使用Eigen有效地组装非线性有限元计算的刚度矩阵

从我的有限元离散，我可以准确地提取我的稀疏模式。因此，我只能使用：

mat.reserve(nnz);
mat.setFromTriplets(TripletList.begin(), TripletList.end());

如中所提议的那样

我在这里提出的问题是：

由于非线性的性质，我不得不经常重新填充矩阵。因此，我应该将所有贡献再次存储在一个三元组中，并反复使用

mat.setFromTriplets（…）

如果我重用

mat.setFromTriplets（…）

，我可以利用这样一个事实，即我总是以相同的顺序计算程序集的元素矩阵，因此我在三元组中的索引永远不会更改，而只更改值。因此，可以绕过“内存中的搜索”，因为我可能可以将其存储在一个新数组中的位置

如果

mat.coeffRef（i，j）

更快，我可以利用上述事实吗

另外一个问题：（优先级较低）是否可以有效地存储和组装具有相同稀疏模式的3个矩阵，即如果必须在循环中进行存储和组装？例如，一个矩阵包装器，其中我有一个SparseMatrix来获取矩阵M1=mat[0]，M2=mat[1]，M3=mat[2]，其中mat[i]返回第一个矩阵，M1、M2和M3是例如

SparseMatrix M1（10001000）

一般设置如下（对于问题1-3，仅出现M1）：

如果我在我的i5-6600K 3.5Ghz和16GB ram上运行这个示例，我将得到以下结果。以秒为单位的时间

  Size Triplet   Nested LessNested  Sort_intIndex Sort_dum_pair findDuplica
    10   1e-06    1e-06      2e-06          1e-06         1e-06       1e-06 
   100 2.8e-05    4e-06    1.4e-05          5e-05       4.2e-05       1e-05 
  1000   0.003 0.000416   0.001489        0.01012       0.00627    0.000635 
 10000   0.426 0.093911    0.48912         1.5389      0.780676    0.061881 
100000 337.799  99.0801    37.3656        292.397       87.4488     0.79996

前三列表示不同方法的计算时间，第4至第6列表示不同预处理步骤的时间

对于100000行和列的大小，我的Ram相对较快地填满，因此应该小心最后一个表条目。这里最快的方法从2变为3

我的问题是，这种方法是否朝着提高效率的正确方向发展？这是一个完全错误的方向吗？例如，对于尺寸为10000的情况，0.48s的装配时间似乎有点高

此外，预处理步骤变得非常昂贵，有没有更好的方法来构造矩阵的顺序？最后一个问题是，基准测试是否以正确的方式进行

谢谢你抽出时间，

亚历克斯

我不太了解伊根的内部结构，但是：1-3。如果稀疏模式没有改变，那么使用

setFromTriplets

几乎肯定比直接设置值要慢（但要注意关于重复的差异），4。我敢打赌，从一个已经设置了模式的地方复制构建稀疏矩阵比从头开始构建要快（在以后必须替换值时是否还要快？我不知道…）。最后，你最安全的选择是测试你的选项并对它们进行分析——我认为你的想法很有可能成功。但我也建议你在

Eigen

中阅读稀疏矩阵数据是如何组织的。如果你想要好的性能，你最终必须关心这一点。我们不能在不知道你如何迭代非零的情况下回答。请参阅以获取一些初步见解。如果结构不变，最快的方法是覆盖矩阵的所有非零，并使用

it.valueRef（）=谢谢你的回答。根据你的反馈，我改进了我的问题。
SparseMatrix<double> mat(rows,cols);
std::vector<double> valuevector(nnz);
//Initially construction 
std::vector< Eigen::Triplet<double> > tripletList;

//naive fill of tripletList

//Sorting of entries and identifying double entries in tripletList from col and row values
//generating from this information operator P

for (int i=0; i<1000; i++) 
{
  //naive refill of tripletList

  valuevector= P*tripletList.value(); //constructing vector in efficient ordering from values of triplets (tripletList.value() call does not makes since for std::vector but i hope it is clear what i have in mind

  for (int k=0; k<mat.outerSize(); ++k)
    for (SparseMatrix<double>::InnerIterator it(mat,k); it; ++it)
          it.valueRef() =valuevector(it);
}

#include <iostream>
#include <Eigen/Sparse>
#include <random>
#include <fstream>
#include <chrono>

using namespace std::chrono;
using namespace Eigen;
using namespace std;

typedef Eigen::Triplet<double> T;


void findDuplicates(vector<pair<int, int> > &dummypair, Ref<VectorXi> multiplicity) {
  // Iterate over the vector and store the frequency of each element in map
  int pairCount = 0;
  pair<int, int> currentPair;
  for (int i = 0; i < multiplicity.size(); ++i) {
    currentPair = dummypair[pairCount];
    while (currentPair == dummypair[pairCount + multiplicity[i]]) {
      multiplicity[i]++;
    }
    pairCount += multiplicity[i];
  }
}

typedef Matrix<duration<double, std::milli>, Dynamic, Dynamic> MatrixXtime;

int main() {


  //init random generators
  std::default_random_engine gen;
  std::uniform_real_distribution<double> dist(0.0, 1.0);

  int sizesForTest = 5;
  int measures = 6;
  MatrixXtime timeArray(sizesForTest, measures);
  cout << "TripletTime NestetTime LNestedTime " << endl;
  for (int m = 0; m < sizesForTest; ++m) {


    int rows = pow(10, m + 1);
    int cols = rows;
    std::uniform_int_distribution<int> distentryrow(0, rows - 1);
    std::uniform_int_distribution<int> distentrycol(0, cols - 1);

    std::vector<T> tripletList;
    SparseMatrix<double> mat1(rows, cols);
//  SparseMatrix<double> mat2(rows,cols);
//  SparseMatrix<double> mat3(rows,cols);

    //generate sparsity pattern of matrix with  10% fill-in
    tripletList.emplace_back(3, 0, 15);
    for (int i = 0; i < rows; ++i)
      for (int j = 0; j < cols; ++j) {
        auto value = dist(gen);                         //generate random number
        auto value2 = dist(gen);                         //generate random number
        auto value3 = dist(gen);                         //generate random number
        if (value < 0.05) {
          auto rowindex = distentryrow(gen);
          auto colindex = distentrycol(gen);
          tripletList.emplace_back(rowindex, colindex, value);      //if larger than treshold, insert it

          //dublicate every third entry to mimic entries which appear more then once
          if (value2 < 0.3333333333333333333333)
            tripletList.emplace_back(rowindex, colindex, value);

          //triple every forth entry to mimic entries which appear more then once
          if (value3 < 0.25)
            tripletList.emplace_back(rowindex, colindex, value);
        }
      }
    tripletList.emplace_back(3, 0, 9);

    int numberOfValues = tripletList.size();

    //initially set all matrices from triplet to allocate space and sparsity pattern
    mat1.setFromTriplets(tripletList.begin(), tripletList.end());
//  mat2.setFromTriplets(tripletList.begin(), tripletList.end());
//  mat3.setFromTriplets(tripletList.begin(), tripletList.end());

    int nnz = mat1.nonZeros();
    //reset all entries back to zero to fill in later
    mat1.coeffs().setZero();
//  mat2.coeffs().setZero();
//  mat3.coeffs().setZero();

    //document sorting of entries for repetative insertion
    VectorXi internalIndex(numberOfValues);
    vector<pair<int, int> > dummypair(numberOfValues);

    VectorXd valuelist(numberOfValues);
    for (int l = 0; l < numberOfValues; ++l) {
      valuelist(l) = tripletList[l].value();
    }

    //init internalindex and dummy pair
    internalIndex = Eigen::VectorXi::LinSpaced(numberOfValues, 0.0, numberOfValues - 1);
    for (int i = 0; i < numberOfValues; ++i) {

      dummypair[i].first = tripletList[i].col();
      dummypair[i].second = tripletList[i].row();
    }

    auto start = high_resolution_clock::now();


// sort the vector  internalIndex based on the dummypair
    sort(internalIndex.begin(), internalIndex.end(), [&](int i, int j) {
        return dummypair[i].first < dummypair[j].first ||
               (dummypair[i].first == dummypair[j].first && dummypair[i].second < dummypair[j].second);
    });

    auto stop = high_resolution_clock::now();
    timeArray(m, 3) = (stop - start) / 1000;


    start = high_resolution_clock::now();
    sort(dummypair.begin(), dummypair.end());
    stop = high_resolution_clock::now();
    timeArray(m, 4) = (stop - start) / 1000;


    start = high_resolution_clock::now();
    VectorXi dublicatecount(nnz);
    dublicatecount.setOnes();
    findDuplicates(dummypair, dublicatecount);
    stop = high_resolution_clock::now();
    timeArray(m, 5) = (stop - start) / 1000;

    dummypair.clear();




    //calculate vector containing all indices of triplet
    //therefore vector[k] is the vectorXi containing the entries of triples which should be written at dof k
    int indextriplet = 0;
    int multiplicity = 0;

    vector<VectorXi> listofentires(mat1.nonZeros());
    for (int k = 0; k < mat1.nonZeros(); ++k) {
      multiplicity = dublicatecount[k];
      listofentires[k] = internalIndex.segment(indextriplet, multiplicity);
      indextriplet += multiplicity;
    }


    //========================================
    //Here the nonlinear analysis should start and everything beforehand is prepocessing

    //Test1 from triplets
    start = high_resolution_clock::now();

    mat1.setFromTriplets(tripletList.begin(), tripletList.end());

    stop = high_resolution_clock::now();
    timeArray(m, 0) = (stop - start) / 1000;

    mat1.coeffs().setZero();


    //Test2 use internalIndex but calculate listofentires on the fly
    indextriplet = 0;
    start = high_resolution_clock::now();

    for (int k = 0; k < mat1.nonZeros(); ++k) {
      multiplicity = dublicatecount[k];
      mat1.coeffs()[k] += valuelist(internalIndex.segment(indextriplet, multiplicity)).sum();
      indextriplet += multiplicity;
    }

    stop = high_resolution_clock::now();
    timeArray(m, 1) = (stop - start) / 1000;
    mat1.coeffs().setZero();

    //Test3 directly use listofentires
    start = high_resolution_clock::now();
    for (int k = 0; k < mat1.nonZeros(); ++k)
      mat1.coeffs()[k] += valuelist(listofentires[k]).sum();

    stop = high_resolution_clock::now();
    timeArray(m, 2) = (stop - start) / 1000;


    std::ofstream file("test.txt");
    if (file.is_open()) {
      file << mat1 << '\n';
    }
    cout << "Size: " << rows << ": ";
    for (int n = 0; n < measures; ++n)
      cout << timeArray(m, n).count() << " ";
    cout << endl;
  }

  return 0;
}

  Size Triplet   Nested LessNested  Sort_intIndex Sort_dum_pair findDuplica
    10   1e-06    1e-06      2e-06          1e-06         1e-06       1e-06 
   100 2.8e-05    4e-06    1.4e-05          5e-05       4.2e-05       1e-05 
  1000   0.003 0.000416   0.001489        0.01012       0.00627    0.000635 
 10000   0.426 0.093911    0.48912         1.5389      0.780676    0.061881 
100000 337.799  99.0801    37.3656        292.397       87.4488     0.79996