C++ 非线性有限元程序中重复稀疏矩阵组合的特征值有效利用
我正在尝试使用Eigen有效地组装非线性有限元计算的刚度矩阵 从我的有限元离散,我可以准确地提取我的稀疏模式。因此,我只能使用:C++ 非线性有限元程序中重复稀疏矩阵组合的特征值有效利用,c++,sparse-matrix,eigen3,finite-element-analysis,non-linear,C++,Sparse Matrix,Eigen3,Finite Element Analysis,Non Linear,我正在尝试使用Eigen有效地组装非线性有限元计算的刚度矩阵 从我的有限元离散,我可以准确地提取我的稀疏模式。因此,我只能使用: mat.reserve(nnz); mat.setFromTriplets(TripletList.begin(), TripletList.end()); 如中所提议的那样 我在这里提出的问题是: 由于非线性的性质,我不得不经常重新填充矩阵。因此,我应该将所有贡献再次存储在一个三元组中,并反复使用mat.setFromTriplets(…) 如果我重用mat.se
mat.reserve(nnz);
mat.setFromTriplets(TripletList.begin(), TripletList.end());
如中所提议的那样
我在这里提出的问题是:
mat.setFromTriplets(…)
mat.setFromTriplets(…)
,我可以利用这样一个事实,即我总是以相同的顺序计算程序集的元素矩阵,因此我在三元组中的索引永远不会更改,而只更改值。因此,可以绕过“内存中的搜索”,因为我可能可以将其存储在一个新数组中的位置mat.coeffRef(i,j)
更快,我可以利用上述事实吗SparseMatrix M1(10001000)
- Size Triplet Nested LessNested Sort_intIndex Sort_dum_pair findDuplica
10 1e-06 1e-06 2e-06 1e-06 1e-06 1e-06
100 2.8e-05 4e-06 1.4e-05 5e-05 4.2e-05 1e-05
1000 0.003 0.000416 0.001489 0.01012 0.00627 0.000635
10000 0.426 0.093911 0.48912 1.5389 0.780676 0.061881
100000 337.799 99.0801 37.3656 292.397 87.4488 0.79996
前三列表示不同方法的计算时间,第4至第6列表示不同预处理步骤的时间
对于100000行和列的大小,我的Ram相对较快地填满,因此应该小心最后一个表条目。这里最快的方法从2变为3
我的问题是,这种方法是否朝着提高效率的正确方向发展?这是一个完全错误的方向吗?例如,对于尺寸为10000的情况,0.48s的装配时间似乎有点高
此外,预处理步骤变得非常昂贵,有没有更好的方法来构造矩阵的顺序?最后一个问题是,基准测试是否以正确的方式进行
谢谢你抽出时间,
亚历克斯我不太了解伊根的内部结构,但是:1-3。如果稀疏模式没有改变,那么使用
setFromTriplets
几乎肯定比直接设置值要慢(但要注意关于重复的差异),4。我敢打赌,从一个已经设置了模式的地方复制构建稀疏矩阵比从头开始构建要快(在以后必须替换值时是否还要快?我不知道…)。最后,你最安全的选择是测试你的选项并对它们进行分析——我认为你的想法很有可能成功。但我也建议你在Eigen
中阅读稀疏矩阵数据是如何组织的。如果你想要好的性能,你最终必须关心这一点。我们不能在不知道你如何迭代非零的情况下回答。请参阅以获取一些初步见解。如果结构不变,最快的方法是覆盖矩阵的所有非零,并使用it.valueRef()=代码>谢谢你的回答。根据你的反馈,我改进了我的问题。
SparseMatrix<double> mat(rows,cols);
std::vector<double> valuevector(nnz);
//Initially construction
std::vector< Eigen::Triplet<double> > tripletList;
//naive fill of tripletList
//Sorting of entries and identifying double entries in tripletList from col and row values
//generating from this information operator P
for (int i=0; i<1000; i++)
{
//naive refill of tripletList
valuevector= P*tripletList.value(); //constructing vector in efficient ordering from values of triplets (tripletList.value() call does not makes since for std::vector but i hope it is clear what i have in mind
for (int k=0; k<mat.outerSize(); ++k)
for (SparseMatrix<double>::InnerIterator it(mat,k); it; ++it)
it.valueRef() =valuevector(it);
}
#include <iostream>
#include <Eigen/Sparse>
#include <random>
#include <fstream>
#include <chrono>
using namespace std::chrono;
using namespace Eigen;
using namespace std;
typedef Eigen::Triplet<double> T;
void findDuplicates(vector<pair<int, int> > &dummypair, Ref<VectorXi> multiplicity) {
// Iterate over the vector and store the frequency of each element in map
int pairCount = 0;
pair<int, int> currentPair;
for (int i = 0; i < multiplicity.size(); ++i) {
currentPair = dummypair[pairCount];
while (currentPair == dummypair[pairCount + multiplicity[i]]) {
multiplicity[i]++;
}
pairCount += multiplicity[i];
}
}
typedef Matrix<duration<double, std::milli>, Dynamic, Dynamic> MatrixXtime;
int main() {
//init random generators
std::default_random_engine gen;
std::uniform_real_distribution<double> dist(0.0, 1.0);
int sizesForTest = 5;
int measures = 6;
MatrixXtime timeArray(sizesForTest, measures);
cout << "TripletTime NestetTime LNestedTime " << endl;
for (int m = 0; m < sizesForTest; ++m) {
int rows = pow(10, m + 1);
int cols = rows;
std::uniform_int_distribution<int> distentryrow(0, rows - 1);
std::uniform_int_distribution<int> distentrycol(0, cols - 1);
std::vector<T> tripletList;
SparseMatrix<double> mat1(rows, cols);
// SparseMatrix<double> mat2(rows,cols);
// SparseMatrix<double> mat3(rows,cols);
//generate sparsity pattern of matrix with 10% fill-in
tripletList.emplace_back(3, 0, 15);
for (int i = 0; i < rows; ++i)
for (int j = 0; j < cols; ++j) {
auto value = dist(gen); //generate random number
auto value2 = dist(gen); //generate random number
auto value3 = dist(gen); //generate random number
if (value < 0.05) {
auto rowindex = distentryrow(gen);
auto colindex = distentrycol(gen);
tripletList.emplace_back(rowindex, colindex, value); //if larger than treshold, insert it
//dublicate every third entry to mimic entries which appear more then once
if (value2 < 0.3333333333333333333333)
tripletList.emplace_back(rowindex, colindex, value);
//triple every forth entry to mimic entries which appear more then once
if (value3 < 0.25)
tripletList.emplace_back(rowindex, colindex, value);
}
}
tripletList.emplace_back(3, 0, 9);
int numberOfValues = tripletList.size();
//initially set all matrices from triplet to allocate space and sparsity pattern
mat1.setFromTriplets(tripletList.begin(), tripletList.end());
// mat2.setFromTriplets(tripletList.begin(), tripletList.end());
// mat3.setFromTriplets(tripletList.begin(), tripletList.end());
int nnz = mat1.nonZeros();
//reset all entries back to zero to fill in later
mat1.coeffs().setZero();
// mat2.coeffs().setZero();
// mat3.coeffs().setZero();
//document sorting of entries for repetative insertion
VectorXi internalIndex(numberOfValues);
vector<pair<int, int> > dummypair(numberOfValues);
VectorXd valuelist(numberOfValues);
for (int l = 0; l < numberOfValues; ++l) {
valuelist(l) = tripletList[l].value();
}
//init internalindex and dummy pair
internalIndex = Eigen::VectorXi::LinSpaced(numberOfValues, 0.0, numberOfValues - 1);
for (int i = 0; i < numberOfValues; ++i) {
dummypair[i].first = tripletList[i].col();
dummypair[i].second = tripletList[i].row();
}
auto start = high_resolution_clock::now();
// sort the vector internalIndex based on the dummypair
sort(internalIndex.begin(), internalIndex.end(), [&](int i, int j) {
return dummypair[i].first < dummypair[j].first ||
(dummypair[i].first == dummypair[j].first && dummypair[i].second < dummypair[j].second);
});
auto stop = high_resolution_clock::now();
timeArray(m, 3) = (stop - start) / 1000;
start = high_resolution_clock::now();
sort(dummypair.begin(), dummypair.end());
stop = high_resolution_clock::now();
timeArray(m, 4) = (stop - start) / 1000;
start = high_resolution_clock::now();
VectorXi dublicatecount(nnz);
dublicatecount.setOnes();
findDuplicates(dummypair, dublicatecount);
stop = high_resolution_clock::now();
timeArray(m, 5) = (stop - start) / 1000;
dummypair.clear();
//calculate vector containing all indices of triplet
//therefore vector[k] is the vectorXi containing the entries of triples which should be written at dof k
int indextriplet = 0;
int multiplicity = 0;
vector<VectorXi> listofentires(mat1.nonZeros());
for (int k = 0; k < mat1.nonZeros(); ++k) {
multiplicity = dublicatecount[k];
listofentires[k] = internalIndex.segment(indextriplet, multiplicity);
indextriplet += multiplicity;
}
//========================================
//Here the nonlinear analysis should start and everything beforehand is prepocessing
//Test1 from triplets
start = high_resolution_clock::now();
mat1.setFromTriplets(tripletList.begin(), tripletList.end());
stop = high_resolution_clock::now();
timeArray(m, 0) = (stop - start) / 1000;
mat1.coeffs().setZero();
//Test2 use internalIndex but calculate listofentires on the fly
indextriplet = 0;
start = high_resolution_clock::now();
for (int k = 0; k < mat1.nonZeros(); ++k) {
multiplicity = dublicatecount[k];
mat1.coeffs()[k] += valuelist(internalIndex.segment(indextriplet, multiplicity)).sum();
indextriplet += multiplicity;
}
stop = high_resolution_clock::now();
timeArray(m, 1) = (stop - start) / 1000;
mat1.coeffs().setZero();
//Test3 directly use listofentires
start = high_resolution_clock::now();
for (int k = 0; k < mat1.nonZeros(); ++k)
mat1.coeffs()[k] += valuelist(listofentires[k]).sum();
stop = high_resolution_clock::now();
timeArray(m, 2) = (stop - start) / 1000;
std::ofstream file("test.txt");
if (file.is_open()) {
file << mat1 << '\n';
}
cout << "Size: " << rows << ": ";
for (int n = 0; n < measures; ++n)
cout << timeArray(m, n).count() << " ";
cout << endl;
}
return 0;
}
Size Triplet Nested LessNested Sort_intIndex Sort_dum_pair findDuplica
10 1e-06 1e-06 2e-06 1e-06 1e-06 1e-06
100 2.8e-05 4e-06 1.4e-05 5e-05 4.2e-05 1e-05
1000 0.003 0.000416 0.001489 0.01012 0.00627 0.000635
10000 0.426 0.093911 0.48912 1.5389 0.780676 0.061881
100000 337.799 99.0801 37.3656 292.397 87.4488 0.79996