xref: /aosp_15_r20/external/eigen/Eigen/src/LU/PartialPivLU.h (revision bf2c37156dfe67e5dfebd6d394bad8b2ab5804d4)
1*bf2c3715SXin Li // This file is part of Eigen, a lightweight C++ template library
2*bf2c3715SXin Li // for linear algebra.
3*bf2c3715SXin Li //
4*bf2c3715SXin Li // Copyright (C) 2006-2009 Benoit Jacob <[email protected]>
5*bf2c3715SXin Li // Copyright (C) 2009 Gael Guennebaud <[email protected]>
6*bf2c3715SXin Li //
7*bf2c3715SXin Li // This Source Code Form is subject to the terms of the Mozilla
8*bf2c3715SXin Li // Public License v. 2.0. If a copy of the MPL was not distributed
9*bf2c3715SXin Li // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10*bf2c3715SXin Li 
11*bf2c3715SXin Li #ifndef EIGEN_PARTIALLU_H
12*bf2c3715SXin Li #define EIGEN_PARTIALLU_H
13*bf2c3715SXin Li 
14*bf2c3715SXin Li namespace Eigen {
15*bf2c3715SXin Li 
16*bf2c3715SXin Li namespace internal {
17*bf2c3715SXin Li template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
18*bf2c3715SXin Li  : traits<_MatrixType>
19*bf2c3715SXin Li {
20*bf2c3715SXin Li   typedef MatrixXpr XprKind;
21*bf2c3715SXin Li   typedef SolverStorage StorageKind;
22*bf2c3715SXin Li   typedef int StorageIndex;
23*bf2c3715SXin Li   typedef traits<_MatrixType> BaseTraits;
24*bf2c3715SXin Li   enum {
25*bf2c3715SXin Li     Flags = BaseTraits::Flags & RowMajorBit,
26*bf2c3715SXin Li     CoeffReadCost = Dynamic
27*bf2c3715SXin Li   };
28*bf2c3715SXin Li };
29*bf2c3715SXin Li 
30*bf2c3715SXin Li template<typename T,typename Derived>
31*bf2c3715SXin Li struct enable_if_ref;
32*bf2c3715SXin Li // {
33*bf2c3715SXin Li //   typedef Derived type;
34*bf2c3715SXin Li // };
35*bf2c3715SXin Li 
36*bf2c3715SXin Li template<typename T,typename Derived>
37*bf2c3715SXin Li struct enable_if_ref<Ref<T>,Derived> {
38*bf2c3715SXin Li   typedef Derived type;
39*bf2c3715SXin Li };
40*bf2c3715SXin Li 
41*bf2c3715SXin Li } // end namespace internal
42*bf2c3715SXin Li 
43*bf2c3715SXin Li /** \ingroup LU_Module
44*bf2c3715SXin Li   *
45*bf2c3715SXin Li   * \class PartialPivLU
46*bf2c3715SXin Li   *
47*bf2c3715SXin Li   * \brief LU decomposition of a matrix with partial pivoting, and related features
48*bf2c3715SXin Li   *
49*bf2c3715SXin Li   * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
50*bf2c3715SXin Li   *
51*bf2c3715SXin Li   * This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A
52*bf2c3715SXin Li   * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P
53*bf2c3715SXin Li   * is a permutation matrix.
54*bf2c3715SXin Li   *
55*bf2c3715SXin Li   * Typically, partial pivoting LU decomposition is only considered numerically stable for square invertible
56*bf2c3715SXin Li   * matrices. Thus LAPACK's dgesv and dgesvx require the matrix to be square and invertible. The present class
57*bf2c3715SXin Li   * does the same. It will assert that the matrix is square, but it won't (actually it can't) check that the
58*bf2c3715SXin Li   * matrix is invertible: it is your task to check that you only use this decomposition on invertible matrices.
59*bf2c3715SXin Li   *
60*bf2c3715SXin Li   * The guaranteed safe alternative, working for all matrices, is the full pivoting LU decomposition, provided
61*bf2c3715SXin Li   * by class FullPivLU.
62*bf2c3715SXin Li   *
63*bf2c3715SXin Li   * This is \b not a rank-revealing LU decomposition. Many features are intentionally absent from this class,
64*bf2c3715SXin Li   * such as rank computation. If you need these features, use class FullPivLU.
65*bf2c3715SXin Li   *
66*bf2c3715SXin Li   * This LU decomposition is suitable to invert invertible matrices. It is what MatrixBase::inverse() uses
67*bf2c3715SXin Li   * in the general case.
68*bf2c3715SXin Li   * On the other hand, it is \b not suitable to determine whether a given matrix is invertible.
69*bf2c3715SXin Li   *
70*bf2c3715SXin Li   * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP().
71*bf2c3715SXin Li   *
72*bf2c3715SXin Li   * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
73*bf2c3715SXin Li   *
74*bf2c3715SXin Li   * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU
75*bf2c3715SXin Li   */
76*bf2c3715SXin Li template<typename _MatrixType> class PartialPivLU
77*bf2c3715SXin Li   : public SolverBase<PartialPivLU<_MatrixType> >
78*bf2c3715SXin Li {
79*bf2c3715SXin Li   public:
80*bf2c3715SXin Li 
81*bf2c3715SXin Li     typedef _MatrixType MatrixType;
82*bf2c3715SXin Li     typedef SolverBase<PartialPivLU> Base;
83*bf2c3715SXin Li     friend class SolverBase<PartialPivLU>;
84*bf2c3715SXin Li 
85*bf2c3715SXin Li     EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU)
86*bf2c3715SXin Li     enum {
87*bf2c3715SXin Li       MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
88*bf2c3715SXin Li       MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
89*bf2c3715SXin Li     };
90*bf2c3715SXin Li     typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
91*bf2c3715SXin Li     typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
92*bf2c3715SXin Li     typedef typename MatrixType::PlainObject PlainObject;
93*bf2c3715SXin Li 
94*bf2c3715SXin Li     /**
95*bf2c3715SXin Li       * \brief Default Constructor.
96*bf2c3715SXin Li       *
97*bf2c3715SXin Li       * The default constructor is useful in cases in which the user intends to
98*bf2c3715SXin Li       * perform decompositions via PartialPivLU::compute(const MatrixType&).
99*bf2c3715SXin Li       */
100*bf2c3715SXin Li     PartialPivLU();
101*bf2c3715SXin Li 
102*bf2c3715SXin Li     /** \brief Default Constructor with memory preallocation
103*bf2c3715SXin Li       *
104*bf2c3715SXin Li       * Like the default constructor but with preallocation of the internal data
105*bf2c3715SXin Li       * according to the specified problem \a size.
106*bf2c3715SXin Li       * \sa PartialPivLU()
107*bf2c3715SXin Li       */
108*bf2c3715SXin Li     explicit PartialPivLU(Index size);
109*bf2c3715SXin Li 
110*bf2c3715SXin Li     /** Constructor.
111*bf2c3715SXin Li       *
112*bf2c3715SXin Li       * \param matrix the matrix of which to compute the LU decomposition.
113*bf2c3715SXin Li       *
114*bf2c3715SXin Li       * \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
115*bf2c3715SXin Li       * If you need to deal with non-full rank, use class FullPivLU instead.
116*bf2c3715SXin Li       */
117*bf2c3715SXin Li     template<typename InputType>
118*bf2c3715SXin Li     explicit PartialPivLU(const EigenBase<InputType>& matrix);
119*bf2c3715SXin Li 
120*bf2c3715SXin Li     /** Constructor for \link InplaceDecomposition inplace decomposition \endlink
121*bf2c3715SXin Li       *
122*bf2c3715SXin Li       * \param matrix the matrix of which to compute the LU decomposition.
123*bf2c3715SXin Li       *
124*bf2c3715SXin Li       * \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
125*bf2c3715SXin Li       * If you need to deal with non-full rank, use class FullPivLU instead.
126*bf2c3715SXin Li       */
127*bf2c3715SXin Li     template<typename InputType>
128*bf2c3715SXin Li     explicit PartialPivLU(EigenBase<InputType>& matrix);
129*bf2c3715SXin Li 
130*bf2c3715SXin Li     template<typename InputType>
131*bf2c3715SXin Li     PartialPivLU& compute(const EigenBase<InputType>& matrix) {
132*bf2c3715SXin Li       m_lu = matrix.derived();
133*bf2c3715SXin Li       compute();
134*bf2c3715SXin Li       return *this;
135*bf2c3715SXin Li     }
136*bf2c3715SXin Li 
137*bf2c3715SXin Li     /** \returns the LU decomposition matrix: the upper-triangular part is U, the
138*bf2c3715SXin Li       * unit-lower-triangular part is L (at least for square matrices; in the non-square
139*bf2c3715SXin Li       * case, special care is needed, see the documentation of class FullPivLU).
140*bf2c3715SXin Li       *
141*bf2c3715SXin Li       * \sa matrixL(), matrixU()
142*bf2c3715SXin Li       */
143*bf2c3715SXin Li     inline const MatrixType& matrixLU() const
144*bf2c3715SXin Li     {
145*bf2c3715SXin Li       eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
146*bf2c3715SXin Li       return m_lu;
147*bf2c3715SXin Li     }
148*bf2c3715SXin Li 
149*bf2c3715SXin Li     /** \returns the permutation matrix P.
150*bf2c3715SXin Li       */
151*bf2c3715SXin Li     inline const PermutationType& permutationP() const
152*bf2c3715SXin Li     {
153*bf2c3715SXin Li       eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
154*bf2c3715SXin Li       return m_p;
155*bf2c3715SXin Li     }
156*bf2c3715SXin Li 
157*bf2c3715SXin Li     #ifdef EIGEN_PARSED_BY_DOXYGEN
158*bf2c3715SXin Li     /** This method returns the solution x to the equation Ax=b, where A is the matrix of which
159*bf2c3715SXin Li       * *this is the LU decomposition.
160*bf2c3715SXin Li       *
161*bf2c3715SXin Li       * \param b the right-hand-side of the equation to solve. Can be a vector or a matrix,
162*bf2c3715SXin Li       *          the only requirement in order for the equation to make sense is that
163*bf2c3715SXin Li       *          b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition.
164*bf2c3715SXin Li       *
165*bf2c3715SXin Li       * \returns the solution.
166*bf2c3715SXin Li       *
167*bf2c3715SXin Li       * Example: \include PartialPivLU_solve.cpp
168*bf2c3715SXin Li       * Output: \verbinclude PartialPivLU_solve.out
169*bf2c3715SXin Li       *
170*bf2c3715SXin Li       * Since this PartialPivLU class assumes anyway that the matrix A is invertible, the solution
171*bf2c3715SXin Li       * theoretically exists and is unique regardless of b.
172*bf2c3715SXin Li       *
173*bf2c3715SXin Li       * \sa TriangularView::solve(), inverse(), computeInverse()
174*bf2c3715SXin Li       */
175*bf2c3715SXin Li     template<typename Rhs>
176*bf2c3715SXin Li     inline const Solve<PartialPivLU, Rhs>
177*bf2c3715SXin Li     solve(const MatrixBase<Rhs>& b) const;
178*bf2c3715SXin Li     #endif
179*bf2c3715SXin Li 
180*bf2c3715SXin Li     /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is
181*bf2c3715SXin Li         the LU decomposition.
182*bf2c3715SXin Li       */
183*bf2c3715SXin Li     inline RealScalar rcond() const
184*bf2c3715SXin Li     {
185*bf2c3715SXin Li       eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
186*bf2c3715SXin Li       return internal::rcond_estimate_helper(m_l1_norm, *this);
187*bf2c3715SXin Li     }
188*bf2c3715SXin Li 
189*bf2c3715SXin Li     /** \returns the inverse of the matrix of which *this is the LU decomposition.
190*bf2c3715SXin Li       *
191*bf2c3715SXin Li       * \warning The matrix being decomposed here is assumed to be invertible. If you need to check for
192*bf2c3715SXin Li       *          invertibility, use class FullPivLU instead.
193*bf2c3715SXin Li       *
194*bf2c3715SXin Li       * \sa MatrixBase::inverse(), LU::inverse()
195*bf2c3715SXin Li       */
196*bf2c3715SXin Li     inline const Inverse<PartialPivLU> inverse() const
197*bf2c3715SXin Li     {
198*bf2c3715SXin Li       eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
199*bf2c3715SXin Li       return Inverse<PartialPivLU>(*this);
200*bf2c3715SXin Li     }
201*bf2c3715SXin Li 
202*bf2c3715SXin Li     /** \returns the determinant of the matrix of which
203*bf2c3715SXin Li       * *this is the LU decomposition. It has only linear complexity
204*bf2c3715SXin Li       * (that is, O(n) where n is the dimension of the square matrix)
205*bf2c3715SXin Li       * as the LU decomposition has already been computed.
206*bf2c3715SXin Li       *
207*bf2c3715SXin Li       * \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers
208*bf2c3715SXin Li       *       optimized paths.
209*bf2c3715SXin Li       *
210*bf2c3715SXin Li       * \warning a determinant can be very big or small, so for matrices
211*bf2c3715SXin Li       * of large enough dimension, there is a risk of overflow/underflow.
212*bf2c3715SXin Li       *
213*bf2c3715SXin Li       * \sa MatrixBase::determinant()
214*bf2c3715SXin Li       */
215*bf2c3715SXin Li     Scalar determinant() const;
216*bf2c3715SXin Li 
217*bf2c3715SXin Li     MatrixType reconstructedMatrix() const;
218*bf2c3715SXin Li 
219*bf2c3715SXin Li     EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return m_lu.rows(); }
220*bf2c3715SXin Li     EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return m_lu.cols(); }
221*bf2c3715SXin Li 
222*bf2c3715SXin Li     #ifndef EIGEN_PARSED_BY_DOXYGEN
223*bf2c3715SXin Li     template<typename RhsType, typename DstType>
224*bf2c3715SXin Li     EIGEN_DEVICE_FUNC
225*bf2c3715SXin Li     void _solve_impl(const RhsType &rhs, DstType &dst) const {
226*bf2c3715SXin Li      /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
227*bf2c3715SXin Li       * So we proceed as follows:
228*bf2c3715SXin Li       * Step 1: compute c = Pb.
229*bf2c3715SXin Li       * Step 2: replace c by the solution x to Lx = c.
230*bf2c3715SXin Li       * Step 3: replace c by the solution x to Ux = c.
231*bf2c3715SXin Li       */
232*bf2c3715SXin Li 
233*bf2c3715SXin Li       // Step 1
234*bf2c3715SXin Li       dst = permutationP() * rhs;
235*bf2c3715SXin Li 
236*bf2c3715SXin Li       // Step 2
237*bf2c3715SXin Li       m_lu.template triangularView<UnitLower>().solveInPlace(dst);
238*bf2c3715SXin Li 
239*bf2c3715SXin Li       // Step 3
240*bf2c3715SXin Li       m_lu.template triangularView<Upper>().solveInPlace(dst);
241*bf2c3715SXin Li     }
242*bf2c3715SXin Li 
243*bf2c3715SXin Li     template<bool Conjugate, typename RhsType, typename DstType>
244*bf2c3715SXin Li     EIGEN_DEVICE_FUNC
245*bf2c3715SXin Li     void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const {
246*bf2c3715SXin Li      /* The decomposition PA = LU can be rewritten as A^T = U^T L^T P.
247*bf2c3715SXin Li       * So we proceed as follows:
248*bf2c3715SXin Li       * Step 1: compute c as the solution to L^T c = b
249*bf2c3715SXin Li       * Step 2: replace c by the solution x to U^T x = c.
250*bf2c3715SXin Li       * Step 3: update  c = P^-1 c.
251*bf2c3715SXin Li       */
252*bf2c3715SXin Li 
253*bf2c3715SXin Li       eigen_assert(rhs.rows() == m_lu.cols());
254*bf2c3715SXin Li 
255*bf2c3715SXin Li       // Step 1
256*bf2c3715SXin Li       dst = m_lu.template triangularView<Upper>().transpose()
257*bf2c3715SXin Li                 .template conjugateIf<Conjugate>().solve(rhs);
258*bf2c3715SXin Li       // Step 2
259*bf2c3715SXin Li       m_lu.template triangularView<UnitLower>().transpose()
260*bf2c3715SXin Li           .template conjugateIf<Conjugate>().solveInPlace(dst);
261*bf2c3715SXin Li       // Step 3
262*bf2c3715SXin Li       dst = permutationP().transpose() * dst;
263*bf2c3715SXin Li     }
264*bf2c3715SXin Li     #endif
265*bf2c3715SXin Li 
266*bf2c3715SXin Li   protected:
267*bf2c3715SXin Li 
268*bf2c3715SXin Li     static void check_template_parameters()
269*bf2c3715SXin Li     {
270*bf2c3715SXin Li       EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
271*bf2c3715SXin Li     }
272*bf2c3715SXin Li 
273*bf2c3715SXin Li     void compute();
274*bf2c3715SXin Li 
275*bf2c3715SXin Li     MatrixType m_lu;
276*bf2c3715SXin Li     PermutationType m_p;
277*bf2c3715SXin Li     TranspositionType m_rowsTranspositions;
278*bf2c3715SXin Li     RealScalar m_l1_norm;
279*bf2c3715SXin Li     signed char m_det_p;
280*bf2c3715SXin Li     bool m_isInitialized;
281*bf2c3715SXin Li };
282*bf2c3715SXin Li 
283*bf2c3715SXin Li template<typename MatrixType>
284*bf2c3715SXin Li PartialPivLU<MatrixType>::PartialPivLU()
285*bf2c3715SXin Li   : m_lu(),
286*bf2c3715SXin Li     m_p(),
287*bf2c3715SXin Li     m_rowsTranspositions(),
288*bf2c3715SXin Li     m_l1_norm(0),
289*bf2c3715SXin Li     m_det_p(0),
290*bf2c3715SXin Li     m_isInitialized(false)
291*bf2c3715SXin Li {
292*bf2c3715SXin Li }
293*bf2c3715SXin Li 
294*bf2c3715SXin Li template<typename MatrixType>
295*bf2c3715SXin Li PartialPivLU<MatrixType>::PartialPivLU(Index size)
296*bf2c3715SXin Li   : m_lu(size, size),
297*bf2c3715SXin Li     m_p(size),
298*bf2c3715SXin Li     m_rowsTranspositions(size),
299*bf2c3715SXin Li     m_l1_norm(0),
300*bf2c3715SXin Li     m_det_p(0),
301*bf2c3715SXin Li     m_isInitialized(false)
302*bf2c3715SXin Li {
303*bf2c3715SXin Li }
304*bf2c3715SXin Li 
305*bf2c3715SXin Li template<typename MatrixType>
306*bf2c3715SXin Li template<typename InputType>
307*bf2c3715SXin Li PartialPivLU<MatrixType>::PartialPivLU(const EigenBase<InputType>& matrix)
308*bf2c3715SXin Li   : m_lu(matrix.rows(),matrix.cols()),
309*bf2c3715SXin Li     m_p(matrix.rows()),
310*bf2c3715SXin Li     m_rowsTranspositions(matrix.rows()),
311*bf2c3715SXin Li     m_l1_norm(0),
312*bf2c3715SXin Li     m_det_p(0),
313*bf2c3715SXin Li     m_isInitialized(false)
314*bf2c3715SXin Li {
315*bf2c3715SXin Li   compute(matrix.derived());
316*bf2c3715SXin Li }
317*bf2c3715SXin Li 
318*bf2c3715SXin Li template<typename MatrixType>
319*bf2c3715SXin Li template<typename InputType>
320*bf2c3715SXin Li PartialPivLU<MatrixType>::PartialPivLU(EigenBase<InputType>& matrix)
321*bf2c3715SXin Li   : m_lu(matrix.derived()),
322*bf2c3715SXin Li     m_p(matrix.rows()),
323*bf2c3715SXin Li     m_rowsTranspositions(matrix.rows()),
324*bf2c3715SXin Li     m_l1_norm(0),
325*bf2c3715SXin Li     m_det_p(0),
326*bf2c3715SXin Li     m_isInitialized(false)
327*bf2c3715SXin Li {
328*bf2c3715SXin Li   compute();
329*bf2c3715SXin Li }
330*bf2c3715SXin Li 
331*bf2c3715SXin Li namespace internal {
332*bf2c3715SXin Li 
333*bf2c3715SXin Li /** \internal This is the blocked version of fullpivlu_unblocked() */
334*bf2c3715SXin Li template<typename Scalar, int StorageOrder, typename PivIndex, int SizeAtCompileTime=Dynamic>
335*bf2c3715SXin Li struct partial_lu_impl
336*bf2c3715SXin Li {
337*bf2c3715SXin Li   static const int UnBlockedBound = 16;
338*bf2c3715SXin Li   static const bool UnBlockedAtCompileTime = SizeAtCompileTime!=Dynamic && SizeAtCompileTime<=UnBlockedBound;
339*bf2c3715SXin Li   static const int ActualSizeAtCompileTime = UnBlockedAtCompileTime ? SizeAtCompileTime : Dynamic;
340*bf2c3715SXin Li   // Remaining rows and columns at compile-time:
341*bf2c3715SXin Li   static const int RRows = SizeAtCompileTime==2 ? 1 : Dynamic;
342*bf2c3715SXin Li   static const int RCols = SizeAtCompileTime==2 ? 1 : Dynamic;
343*bf2c3715SXin Li   typedef Matrix<Scalar, ActualSizeAtCompileTime, ActualSizeAtCompileTime, StorageOrder> MatrixType;
344*bf2c3715SXin Li   typedef Ref<MatrixType> MatrixTypeRef;
345*bf2c3715SXin Li   typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > BlockType;
346*bf2c3715SXin Li   typedef typename MatrixType::RealScalar RealScalar;
347*bf2c3715SXin Li 
348*bf2c3715SXin Li   /** \internal performs the LU decomposition in-place of the matrix \a lu
349*bf2c3715SXin Li     * using an unblocked algorithm.
350*bf2c3715SXin Li     *
351*bf2c3715SXin Li     * In addition, this function returns the row transpositions in the
352*bf2c3715SXin Li     * vector \a row_transpositions which must have a size equal to the number
353*bf2c3715SXin Li     * of columns of the matrix \a lu, and an integer \a nb_transpositions
354*bf2c3715SXin Li     * which returns the actual number of transpositions.
355*bf2c3715SXin Li     *
356*bf2c3715SXin Li     * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
357*bf2c3715SXin Li     */
358*bf2c3715SXin Li   static Index unblocked_lu(MatrixTypeRef& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions)
359*bf2c3715SXin Li   {
360*bf2c3715SXin Li     typedef scalar_score_coeff_op<Scalar> Scoring;
361*bf2c3715SXin Li     typedef typename Scoring::result_type Score;
362*bf2c3715SXin Li     const Index rows = lu.rows();
363*bf2c3715SXin Li     const Index cols = lu.cols();
364*bf2c3715SXin Li     const Index size = (std::min)(rows,cols);
365*bf2c3715SXin Li     // For small compile-time matrices it is worth processing the last row separately:
366*bf2c3715SXin Li     //  speedup: +100% for 2x2, +10% for others.
367*bf2c3715SXin Li     const Index endk = UnBlockedAtCompileTime ? size-1 : size;
368*bf2c3715SXin Li     nb_transpositions = 0;
369*bf2c3715SXin Li     Index first_zero_pivot = -1;
370*bf2c3715SXin Li     for(Index k = 0; k < endk; ++k)
371*bf2c3715SXin Li     {
372*bf2c3715SXin Li       int rrows = internal::convert_index<int>(rows-k-1);
373*bf2c3715SXin Li       int rcols = internal::convert_index<int>(cols-k-1);
374*bf2c3715SXin Li 
375*bf2c3715SXin Li       Index row_of_biggest_in_col;
376*bf2c3715SXin Li       Score biggest_in_corner
377*bf2c3715SXin Li         = lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col);
378*bf2c3715SXin Li       row_of_biggest_in_col += k;
379*bf2c3715SXin Li 
380*bf2c3715SXin Li       row_transpositions[k] = PivIndex(row_of_biggest_in_col);
381*bf2c3715SXin Li 
382*bf2c3715SXin Li       if(biggest_in_corner != Score(0))
383*bf2c3715SXin Li       {
384*bf2c3715SXin Li         if(k != row_of_biggest_in_col)
385*bf2c3715SXin Li         {
386*bf2c3715SXin Li           lu.row(k).swap(lu.row(row_of_biggest_in_col));
387*bf2c3715SXin Li           ++nb_transpositions;
388*bf2c3715SXin Li         }
389*bf2c3715SXin Li 
390*bf2c3715SXin Li         lu.col(k).tail(fix<RRows>(rrows)) /= lu.coeff(k,k);
391*bf2c3715SXin Li       }
392*bf2c3715SXin Li       else if(first_zero_pivot==-1)
393*bf2c3715SXin Li       {
394*bf2c3715SXin Li         // the pivot is exactly zero, we record the index of the first pivot which is exactly 0,
395*bf2c3715SXin Li         // and continue the factorization such we still have A = PLU
396*bf2c3715SXin Li         first_zero_pivot = k;
397*bf2c3715SXin Li       }
398*bf2c3715SXin Li 
399*bf2c3715SXin Li       if(k<rows-1)
400*bf2c3715SXin Li         lu.bottomRightCorner(fix<RRows>(rrows),fix<RCols>(rcols)).noalias() -= lu.col(k).tail(fix<RRows>(rrows)) * lu.row(k).tail(fix<RCols>(rcols));
401*bf2c3715SXin Li     }
402*bf2c3715SXin Li 
403*bf2c3715SXin Li     // special handling of the last entry
404*bf2c3715SXin Li     if(UnBlockedAtCompileTime)
405*bf2c3715SXin Li     {
406*bf2c3715SXin Li       Index k = endk;
407*bf2c3715SXin Li       row_transpositions[k] = PivIndex(k);
408*bf2c3715SXin Li       if (Scoring()(lu(k, k)) == Score(0) && first_zero_pivot == -1)
409*bf2c3715SXin Li         first_zero_pivot = k;
410*bf2c3715SXin Li     }
411*bf2c3715SXin Li 
412*bf2c3715SXin Li     return first_zero_pivot;
413*bf2c3715SXin Li   }
414*bf2c3715SXin Li 
415*bf2c3715SXin Li   /** \internal performs the LU decomposition in-place of the matrix represented
416*bf2c3715SXin Li     * by the variables \a rows, \a cols, \a lu_data, and \a lu_stride using a
417*bf2c3715SXin Li     * recursive, blocked algorithm.
418*bf2c3715SXin Li     *
419*bf2c3715SXin Li     * In addition, this function returns the row transpositions in the
420*bf2c3715SXin Li     * vector \a row_transpositions which must have a size equal to the number
421*bf2c3715SXin Li     * of columns of the matrix \a lu, and an integer \a nb_transpositions
422*bf2c3715SXin Li     * which returns the actual number of transpositions.
423*bf2c3715SXin Li     *
424*bf2c3715SXin Li     * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
425*bf2c3715SXin Li     *
426*bf2c3715SXin Li     * \note This very low level interface using pointers, etc. is to:
427*bf2c3715SXin Li     *   1 - reduce the number of instantiations to the strict minimum
428*bf2c3715SXin Li     *   2 - avoid infinite recursion of the instantiations with Block<Block<Block<...> > >
429*bf2c3715SXin Li     */
430*bf2c3715SXin Li   static Index blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, PivIndex* row_transpositions, PivIndex& nb_transpositions, Index maxBlockSize=256)
431*bf2c3715SXin Li   {
432*bf2c3715SXin Li     MatrixTypeRef lu = MatrixType::Map(lu_data,rows, cols, OuterStride<>(luStride));
433*bf2c3715SXin Li 
434*bf2c3715SXin Li     const Index size = (std::min)(rows,cols);
435*bf2c3715SXin Li 
436*bf2c3715SXin Li     // if the matrix is too small, no blocking:
437*bf2c3715SXin Li     if(UnBlockedAtCompileTime || size<=UnBlockedBound)
438*bf2c3715SXin Li     {
439*bf2c3715SXin Li       return unblocked_lu(lu, row_transpositions, nb_transpositions);
440*bf2c3715SXin Li     }
441*bf2c3715SXin Li 
442*bf2c3715SXin Li     // automatically adjust the number of subdivisions to the size
443*bf2c3715SXin Li     // of the matrix so that there is enough sub blocks:
444*bf2c3715SXin Li     Index blockSize;
445*bf2c3715SXin Li     {
446*bf2c3715SXin Li       blockSize = size/8;
447*bf2c3715SXin Li       blockSize = (blockSize/16)*16;
448*bf2c3715SXin Li       blockSize = (std::min)((std::max)(blockSize,Index(8)), maxBlockSize);
449*bf2c3715SXin Li     }
450*bf2c3715SXin Li 
451*bf2c3715SXin Li     nb_transpositions = 0;
452*bf2c3715SXin Li     Index first_zero_pivot = -1;
453*bf2c3715SXin Li     for(Index k = 0; k < size; k+=blockSize)
454*bf2c3715SXin Li     {
455*bf2c3715SXin Li       Index bs = (std::min)(size-k,blockSize); // actual size of the block
456*bf2c3715SXin Li       Index trows = rows - k - bs; // trailing rows
457*bf2c3715SXin Li       Index tsize = size - k - bs; // trailing size
458*bf2c3715SXin Li 
459*bf2c3715SXin Li       // partition the matrix:
460*bf2c3715SXin Li       //                          A00 | A01 | A02
461*bf2c3715SXin Li       // lu  = A_0 | A_1 | A_2 =  A10 | A11 | A12
462*bf2c3715SXin Li       //                          A20 | A21 | A22
463*bf2c3715SXin Li       BlockType A_0 = lu.block(0,0,rows,k);
464*bf2c3715SXin Li       BlockType A_2 = lu.block(0,k+bs,rows,tsize);
465*bf2c3715SXin Li       BlockType A11 = lu.block(k,k,bs,bs);
466*bf2c3715SXin Li       BlockType A12 = lu.block(k,k+bs,bs,tsize);
467*bf2c3715SXin Li       BlockType A21 = lu.block(k+bs,k,trows,bs);
468*bf2c3715SXin Li       BlockType A22 = lu.block(k+bs,k+bs,trows,tsize);
469*bf2c3715SXin Li 
470*bf2c3715SXin Li       PivIndex nb_transpositions_in_panel;
471*bf2c3715SXin Li       // recursively call the blocked LU algorithm on [A11^T A21^T]^T
472*bf2c3715SXin Li       // with a very small blocking size:
473*bf2c3715SXin Li       Index ret = blocked_lu(trows+bs, bs, &lu.coeffRef(k,k), luStride,
474*bf2c3715SXin Li                    row_transpositions+k, nb_transpositions_in_panel, 16);
475*bf2c3715SXin Li       if(ret>=0 && first_zero_pivot==-1)
476*bf2c3715SXin Li         first_zero_pivot = k+ret;
477*bf2c3715SXin Li 
478*bf2c3715SXin Li       nb_transpositions += nb_transpositions_in_panel;
479*bf2c3715SXin Li       // update permutations and apply them to A_0
480*bf2c3715SXin Li       for(Index i=k; i<k+bs; ++i)
481*bf2c3715SXin Li       {
482*bf2c3715SXin Li         Index piv = (row_transpositions[i] += internal::convert_index<PivIndex>(k));
483*bf2c3715SXin Li         A_0.row(i).swap(A_0.row(piv));
484*bf2c3715SXin Li       }
485*bf2c3715SXin Li 
486*bf2c3715SXin Li       if(trows)
487*bf2c3715SXin Li       {
488*bf2c3715SXin Li         // apply permutations to A_2
489*bf2c3715SXin Li         for(Index i=k;i<k+bs; ++i)
490*bf2c3715SXin Li           A_2.row(i).swap(A_2.row(row_transpositions[i]));
491*bf2c3715SXin Li 
492*bf2c3715SXin Li         // A12 = A11^-1 A12
493*bf2c3715SXin Li         A11.template triangularView<UnitLower>().solveInPlace(A12);
494*bf2c3715SXin Li 
495*bf2c3715SXin Li         A22.noalias() -= A21 * A12;
496*bf2c3715SXin Li       }
497*bf2c3715SXin Li     }
498*bf2c3715SXin Li     return first_zero_pivot;
499*bf2c3715SXin Li   }
500*bf2c3715SXin Li };
501*bf2c3715SXin Li 
502*bf2c3715SXin Li /** \internal performs the LU decomposition with partial pivoting in-place.
503*bf2c3715SXin Li   */
504*bf2c3715SXin Li template<typename MatrixType, typename TranspositionType>
505*bf2c3715SXin Li void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::StorageIndex& nb_transpositions)
506*bf2c3715SXin Li {
507*bf2c3715SXin Li   // Special-case of zero matrix.
508*bf2c3715SXin Li   if (lu.rows() == 0 || lu.cols() == 0) {
509*bf2c3715SXin Li     nb_transpositions = 0;
510*bf2c3715SXin Li     return;
511*bf2c3715SXin Li   }
512*bf2c3715SXin Li   eigen_assert(lu.cols() == row_transpositions.size());
513*bf2c3715SXin Li   eigen_assert(row_transpositions.size() < 2 || (&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1);
514*bf2c3715SXin Li 
515*bf2c3715SXin Li   partial_lu_impl
516*bf2c3715SXin Li     < typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor,
517*bf2c3715SXin Li       typename TranspositionType::StorageIndex,
518*bf2c3715SXin Li       EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime)>
519*bf2c3715SXin Li     ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);
520*bf2c3715SXin Li }
521*bf2c3715SXin Li 
522*bf2c3715SXin Li } // end namespace internal
523*bf2c3715SXin Li 
524*bf2c3715SXin Li template<typename MatrixType>
525*bf2c3715SXin Li void PartialPivLU<MatrixType>::compute()
526*bf2c3715SXin Li {
527*bf2c3715SXin Li   check_template_parameters();
528*bf2c3715SXin Li 
529*bf2c3715SXin Li   // the row permutation is stored as int indices, so just to be sure:
530*bf2c3715SXin Li   eigen_assert(m_lu.rows()<NumTraits<int>::highest());
531*bf2c3715SXin Li 
532*bf2c3715SXin Li   if(m_lu.cols()>0)
533*bf2c3715SXin Li     m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
534*bf2c3715SXin Li   else
535*bf2c3715SXin Li     m_l1_norm = RealScalar(0);
536*bf2c3715SXin Li 
537*bf2c3715SXin Li   eigen_assert(m_lu.rows() == m_lu.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
538*bf2c3715SXin Li   const Index size = m_lu.rows();
539*bf2c3715SXin Li 
540*bf2c3715SXin Li   m_rowsTranspositions.resize(size);
541*bf2c3715SXin Li 
542*bf2c3715SXin Li   typename TranspositionType::StorageIndex nb_transpositions;
543*bf2c3715SXin Li   internal::partial_lu_inplace(m_lu, m_rowsTranspositions, nb_transpositions);
544*bf2c3715SXin Li   m_det_p = (nb_transpositions%2) ? -1 : 1;
545*bf2c3715SXin Li 
546*bf2c3715SXin Li   m_p = m_rowsTranspositions;
547*bf2c3715SXin Li 
548*bf2c3715SXin Li   m_isInitialized = true;
549*bf2c3715SXin Li }
550*bf2c3715SXin Li 
551*bf2c3715SXin Li template<typename MatrixType>
552*bf2c3715SXin Li typename PartialPivLU<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const
553*bf2c3715SXin Li {
554*bf2c3715SXin Li   eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
555*bf2c3715SXin Li   return Scalar(m_det_p) * m_lu.diagonal().prod();
556*bf2c3715SXin Li }
557*bf2c3715SXin Li 
558*bf2c3715SXin Li /** \returns the matrix represented by the decomposition,
559*bf2c3715SXin Li  * i.e., it returns the product: P^{-1} L U.
560*bf2c3715SXin Li  * This function is provided for debug purpose. */
561*bf2c3715SXin Li template<typename MatrixType>
562*bf2c3715SXin Li MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
563*bf2c3715SXin Li {
564*bf2c3715SXin Li   eigen_assert(m_isInitialized && "LU is not initialized.");
565*bf2c3715SXin Li   // LU
566*bf2c3715SXin Li   MatrixType res = m_lu.template triangularView<UnitLower>().toDenseMatrix()
567*bf2c3715SXin Li                  * m_lu.template triangularView<Upper>();
568*bf2c3715SXin Li 
569*bf2c3715SXin Li   // P^{-1}(LU)
570*bf2c3715SXin Li   res = m_p.inverse() * res;
571*bf2c3715SXin Li 
572*bf2c3715SXin Li   return res;
573*bf2c3715SXin Li }
574*bf2c3715SXin Li 
575*bf2c3715SXin Li /***** Implementation details *****************************************************/
576*bf2c3715SXin Li 
577*bf2c3715SXin Li namespace internal {
578*bf2c3715SXin Li 
579*bf2c3715SXin Li /***** Implementation of inverse() *****************************************************/
580*bf2c3715SXin Li template<typename DstXprType, typename MatrixType>
581*bf2c3715SXin Li struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename PartialPivLU<MatrixType>::Scalar>, Dense2Dense>
582*bf2c3715SXin Li {
583*bf2c3715SXin Li   typedef PartialPivLU<MatrixType> LuType;
584*bf2c3715SXin Li   typedef Inverse<LuType> SrcXprType;
585*bf2c3715SXin Li   static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename LuType::Scalar> &)
586*bf2c3715SXin Li   {
587*bf2c3715SXin Li     dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
588*bf2c3715SXin Li   }
589*bf2c3715SXin Li };
590*bf2c3715SXin Li } // end namespace internal
591*bf2c3715SXin Li 
592*bf2c3715SXin Li /******** MatrixBase methods *******/
593*bf2c3715SXin Li 
594*bf2c3715SXin Li /** \lu_module
595*bf2c3715SXin Li   *
596*bf2c3715SXin Li   * \return the partial-pivoting LU decomposition of \c *this.
597*bf2c3715SXin Li   *
598*bf2c3715SXin Li   * \sa class PartialPivLU
599*bf2c3715SXin Li   */
600*bf2c3715SXin Li template<typename Derived>
601*bf2c3715SXin Li inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
602*bf2c3715SXin Li MatrixBase<Derived>::partialPivLu() const
603*bf2c3715SXin Li {
604*bf2c3715SXin Li   return PartialPivLU<PlainObject>(eval());
605*bf2c3715SXin Li }
606*bf2c3715SXin Li 
607*bf2c3715SXin Li /** \lu_module
608*bf2c3715SXin Li   *
609*bf2c3715SXin Li   * Synonym of partialPivLu().
610*bf2c3715SXin Li   *
611*bf2c3715SXin Li   * \return the partial-pivoting LU decomposition of \c *this.
612*bf2c3715SXin Li   *
613*bf2c3715SXin Li   * \sa class PartialPivLU
614*bf2c3715SXin Li   */
615*bf2c3715SXin Li template<typename Derived>
616*bf2c3715SXin Li inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
617*bf2c3715SXin Li MatrixBase<Derived>::lu() const
618*bf2c3715SXin Li {
619*bf2c3715SXin Li   return PartialPivLU<PlainObject>(eval());
620*bf2c3715SXin Li }
621*bf2c3715SXin Li 
622*bf2c3715SXin Li } // end namespace Eigen
623*bf2c3715SXin Li 
624*bf2c3715SXin Li #endif // EIGEN_PARTIALLU_H
625