// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2022 Julian Kent // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_SPARSEINVERSE_H #define EIGEN_SPARSEINVERSE_H // IWYU pragma: private #include "./InternalHeaderCheck.h" #include "../../../../Eigen/Sparse" #include "../../../../Eigen/SparseLU" namespace Eigen { /** * @brief Kahan algorithm based accumulator * * The Kahan sum algorithm guarantees to bound the error from floating point * accumulation to a fixed value, regardless of the number of accumulations * performed. Naive accumulation accumulates errors O(N), and pairwise O(logN). * However pairwise also requires O(logN) memory while Kahan summation requires * O(1) memory, but 4x the operations / latency. * * NB! Do not enable associative math optimizations, they may cause the Kahan * summation to be optimized out leaving you with naive summation again. * */ template class KahanSum { // Straightforward Kahan summation for accurate accumulation of a sum of numbers Scalar _sum{}; Scalar _correction{}; public: Scalar value() { return _sum; } void operator+=(Scalar increment) { const Scalar correctedIncrement = increment + _correction; const Scalar previousSum = _sum; _sum += correctedIncrement; _correction = correctedIncrement - (_sum - previousSum); } }; template class FABSum { // https://epubs.siam.org/doi/pdf/10.1137/19M1257780 // Fast and Accurate Blocked Summation // Uses naive summation for the fast sum, and Kahan summation for the accurate sum // Theoretically SIMD sum could be changed to a tree sum which would improve accuracy // over naive summation KahanSum _totalSum; Matrix _block; Index _blockUsed{}; public: Scalar value() { return _block.topRows(_blockUsed).sum() + _totalSum.value(); } void operator+=(Scalar increment) { _block(_blockUsed++, 0) = increment; if (_blockUsed == Width) { _totalSum += _block.sum(); _blockUsed = 0; } } }; /** * @brief computes an accurate dot product on two sparse vectors * * Uses an accurate summation algorithm for the accumulator in order to * compute an accurate dot product for two sparse vectors. * */ template typename Derived::Scalar accurateDot(const SparseMatrixBase& A, const SparseMatrixBase& other) { typedef typename Derived::Scalar Scalar; EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived, OtherDerived) static_assert(internal::is_same::value, "mismatched types"); internal::evaluator thisEval(A.derived()); typename Derived::ReverseInnerIterator i(thisEval, 0); internal::evaluator otherEval(other.derived()); typename OtherDerived::ReverseInnerIterator j(otherEval, 0); FABSum res; while (i && j) { if (i.index() == j.index()) { res += numext::conj(i.value()) * j.value(); --i; --j; } else if (i.index() > j.index()) --i; else --j; } return res.value(); } /** * @brief calculate sparse subset of inverse of sparse matrix * * This class returns a sparse subset of the inverse of the input matrix. * The nonzeros correspond to the nonzeros of the input, plus any additional * elements required due to fill-in of the internal LU factorization. This is * is minimized via a applying a fill-reducing permutation as part of the LU * factorization. * * If there are specific entries of the input matrix which you need inverse * values for, which are zero for the input, you need to insert entries into * the input sparse matrix for them to be calculated. * * Due to the sensitive nature of matrix inversion, particularly on large * matrices which are made possible via sparsity, high accuracy dot products * based on Kahan summation are used to reduce numerical error. If you still * encounter numerical errors you may with to equilibrate your matrix before * calculating the inverse, as well as making sure it is actually full rank. */ template class SparseInverse { public: typedef SparseMatrix MatrixType; typedef SparseMatrix RowMatrixType; SparseInverse() {} /** * @brief This Constructor is for if you already have a factored SparseLU and would like to use it to calculate a * sparse inverse. * * Just call this constructor with your already factored SparseLU class and you can directly call the .inverse() * method to get the result. */ SparseInverse(const SparseLU& slu) { _result = computeInverse(slu); } /** * @brief Calculate the sparse inverse from a given sparse input */ SparseInverse& compute(const SparseMatrix& A) { SparseLU slu; slu.compute(A); _result = computeInverse(slu); return *this; } /** * @brief return the already-calculated sparse inverse, or a 0x0 matrix if it could not be computed */ const MatrixType& inverse() const { return _result; } /** * @brief Internal function to calculate the sparse inverse in a functional way * @return A sparse inverse representation, or, if the decomposition didn't complete, a 0x0 matrix. */ static MatrixType computeInverse(const SparseLU& slu) { if (slu.info() != Success) { return MatrixType(0, 0); } // Extract from SparseLU and decompose into L, inverse D and U terms Matrix invD; RowMatrixType Upper; { RowMatrixType DU = slu.matrixU().toSparse(); invD = DU.diagonal().cwiseInverse(); Upper = (invD.asDiagonal() * DU).template triangularView(); } MatrixType Lower = slu.matrixL().toSparse().template triangularView(); // Compute the inverse and reapply the permutation matrix from the LU decomposition return slu.colsPermutation().transpose() * computeInverse(Upper, invD, Lower) * slu.rowsPermutation(); } /** * @brief Internal function to calculate the inverse from strictly upper, diagonal and strictly lower components */ static MatrixType computeInverse(const RowMatrixType& Upper, const Matrix& inverseDiagonal, const MatrixType& Lower) { // Calculate the 'minimal set', which is the nonzeros of (L+U).transpose() // It could be zeroed, but we will overwrite all non-zeros anyways. MatrixType colInv = Lower.transpose().template triangularView(); colInv += Upper.transpose(); // We also need rowmajor representation in order to do efficient row-wise dot products RowMatrixType rowInv = Upper.transpose().template triangularView(); rowInv += Lower.transpose(); // Use the Takahashi algorithm to build the supporting elements of the inverse // upwards and to the left, from the bottom right element, 1 col/row at a time for (Index recurseLevel = Upper.cols() - 1; recurseLevel >= 0; recurseLevel--) { const auto& col = Lower.col(recurseLevel); const auto& row = Upper.row(recurseLevel); // Calculate the inverse values for the nonzeros in this column typename MatrixType::ReverseInnerIterator colIter(colInv, recurseLevel); for (; recurseLevel < colIter.index(); --colIter) { const Scalar element = -accurateDot(col, rowInv.row(colIter.index())); colIter.valueRef() = element; rowInv.coeffRef(colIter.index(), recurseLevel) = element; } // Calculate the inverse values for the nonzeros in this row typename RowMatrixType::ReverseInnerIterator rowIter(rowInv, recurseLevel); for (; recurseLevel < rowIter.index(); --rowIter) { const Scalar element = -accurateDot(row, colInv.col(rowIter.index())); rowIter.valueRef() = element; colInv.coeffRef(recurseLevel, rowIter.index()) = element; } // And finally the diagonal, which corresponds to both row and col iterator now const Scalar diag = inverseDiagonal(recurseLevel) - accurateDot(row, colInv.col(recurseLevel)); rowIter.valueRef() = diag; colIter.valueRef() = diag; } return colInv; } private: MatrixType _result; }; } // namespace Eigen #endif