src/LU/PartialPivLU.h

*bf2c3715SXin Li// This file is part of Eigen, a lightweight C++ template library
*bf2c3715SXin Li// for linear algebra.
*bf2c3715SXin Li//
*bf2c3715SXin Li// Copyright (C) 2006-2009 Benoit Jacob <[email protected]>
*bf2c3715SXin Li// Copyright (C) 2009 Gael Guennebaud <[email protected]>
*bf2c3715SXin Li//
*bf2c3715SXin Li// This Source Code Form is subject to the terms of the Mozilla
*bf2c3715SXin Li// Public License v. 2.0. If a copy of the MPL was not distributed
*bf2c3715SXin Li// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
*bf2c3715SXin Li
*bf2c3715SXin Li#ifndef EIGEN_PARTIALLU_H
*bf2c3715SXin Li#define EIGEN_PARTIALLU_H
*bf2c3715SXin Li
*bf2c3715SXin Linamespace Eigen {
*bf2c3715SXin Li
*bf2c3715SXin Linamespace internal {
*bf2c3715SXin Litemplate<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
*bf2c3715SXin Li : traits<_MatrixType>
*bf2c3715SXin Li{
*bf2c3715SXin Li  typedef MatrixXpr XprKind;
*bf2c3715SXin Li  typedef SolverStorage StorageKind;
*bf2c3715SXin Li  typedef int StorageIndex;
*bf2c3715SXin Li  typedef traits<_MatrixType> BaseTraits;
*bf2c3715SXin Li  enum {
*bf2c3715SXin Li    Flags = BaseTraits::Flags & RowMajorBit,
*bf2c3715SXin Li    CoeffReadCost = Dynamic
*bf2c3715SXin Li  };
*bf2c3715SXin Li};
*bf2c3715SXin Li
*bf2c3715SXin Litemplate<typename T,typename Derived>
*bf2c3715SXin Listruct enable_if_ref;
*bf2c3715SXin Li// {
*bf2c3715SXin Li//   typedef Derived type;
*bf2c3715SXin Li// };
*bf2c3715SXin Li
*bf2c3715SXin Litemplate<typename T,typename Derived>
*bf2c3715SXin Listruct enable_if_ref<Ref<T>,Derived> {
*bf2c3715SXin Li  typedef Derived type;
*bf2c3715SXin Li};
*bf2c3715SXin Li
*bf2c3715SXin Li} // end namespace internal
*bf2c3715SXin Li
*bf2c3715SXin Li/** \ingroup LU_Module
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * \class PartialPivLU
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * \brief LU decomposition of a matrix with partial pivoting, and related features
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A
*bf2c3715SXin Li  * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P
*bf2c3715SXin Li  * is a permutation matrix.
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * Typically, partial pivoting LU decomposition is only considered numerically stable for square invertible
*bf2c3715SXin Li  * matrices. Thus LAPACK's dgesv and dgesvx require the matrix to be square and invertible. The present class
*bf2c3715SXin Li  * does the same. It will assert that the matrix is square, but it won't (actually it can't) check that the
*bf2c3715SXin Li  * matrix is invertible: it is your task to check that you only use this decomposition on invertible matrices.
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * The guaranteed safe alternative, working for all matrices, is the full pivoting LU decomposition, provided
*bf2c3715SXin Li  * by class FullPivLU.
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * This is \b not a rank-revealing LU decomposition. Many features are intentionally absent from this class,
*bf2c3715SXin Li  * such as rank computation. If you need these features, use class FullPivLU.
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * This LU decomposition is suitable to invert invertible matrices. It is what MatrixBase::inverse() uses
*bf2c3715SXin Li  * in the general case.
*bf2c3715SXin Li  * On the other hand, it is \b not suitable to determine whether a given matrix is invertible.
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP().
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU
*bf2c3715SXin Li  */
*bf2c3715SXin Litemplate<typename _MatrixType> class PartialPivLU
*bf2c3715SXin Li  : public SolverBase<PartialPivLU<_MatrixType> >
*bf2c3715SXin Li{
*bf2c3715SXin Li  public:
*bf2c3715SXin Li
*bf2c3715SXin Li    typedef _MatrixType MatrixType;
*bf2c3715SXin Li    typedef SolverBase<PartialPivLU> Base;
*bf2c3715SXin Li    friend class SolverBase<PartialPivLU>;
*bf2c3715SXin Li
*bf2c3715SXin Li    EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU)
*bf2c3715SXin Li    enum {
*bf2c3715SXin Li      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
*bf2c3715SXin Li      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
*bf2c3715SXin Li    };
*bf2c3715SXin Li    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
*bf2c3715SXin Li    typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
*bf2c3715SXin Li    typedef typename MatrixType::PlainObject PlainObject;
*bf2c3715SXin Li
*bf2c3715SXin Li    /**
*bf2c3715SXin Li      * \brief Default Constructor.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * The default constructor is useful in cases in which the user intends to
*bf2c3715SXin Li      * perform decompositions via PartialPivLU::compute(const MatrixType&).
*bf2c3715SXin Li      */
*bf2c3715SXin Li    PartialPivLU();
*bf2c3715SXin Li
*bf2c3715SXin Li    /** \brief Default Constructor with memory preallocation
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * Like the default constructor but with preallocation of the internal data
*bf2c3715SXin Li      * according to the specified problem \a size.
*bf2c3715SXin Li      * \sa PartialPivLU()
*bf2c3715SXin Li      */
*bf2c3715SXin Li    explicit PartialPivLU(Index size);
*bf2c3715SXin Li
*bf2c3715SXin Li    /** Constructor.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \param matrix the matrix of which to compute the LU decomposition.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
*bf2c3715SXin Li      * If you need to deal with non-full rank, use class FullPivLU instead.
*bf2c3715SXin Li      */
*bf2c3715SXin Li    template<typename InputType>
*bf2c3715SXin Li    explicit PartialPivLU(const EigenBase<InputType>& matrix);
*bf2c3715SXin Li
*bf2c3715SXin Li    /** Constructor for \link InplaceDecomposition inplace decomposition \endlink
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \param matrix the matrix of which to compute the LU decomposition.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
*bf2c3715SXin Li      * If you need to deal with non-full rank, use class FullPivLU instead.
*bf2c3715SXin Li      */
*bf2c3715SXin Li    template<typename InputType>
*bf2c3715SXin Li    explicit PartialPivLU(EigenBase<InputType>& matrix);
*bf2c3715SXin Li
*bf2c3715SXin Li    template<typename InputType>
*bf2c3715SXin Li    PartialPivLU& compute(const EigenBase<InputType>& matrix) {
*bf2c3715SXin Li      m_lu = matrix.derived();
*bf2c3715SXin Li      compute();
*bf2c3715SXin Li      return *this;
*bf2c3715SXin Li    }
*bf2c3715SXin Li
*bf2c3715SXin Li    /** \returns the LU decomposition matrix: the upper-triangular part is U, the
*bf2c3715SXin Li      * unit-lower-triangular part is L (at least for square matrices; in the non-square
*bf2c3715SXin Li      * case, special care is needed, see the documentation of class FullPivLU).
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \sa matrixL(), matrixU()
*bf2c3715SXin Li      */
*bf2c3715SXin Li    inline const MatrixType& matrixLU() const
*bf2c3715SXin Li    {
*bf2c3715SXin Li      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
*bf2c3715SXin Li      return m_lu;
*bf2c3715SXin Li    }
*bf2c3715SXin Li
*bf2c3715SXin Li    /** \returns the permutation matrix P.
*bf2c3715SXin Li      */
*bf2c3715SXin Li    inline const PermutationType& permutationP() const
*bf2c3715SXin Li    {
*bf2c3715SXin Li      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
*bf2c3715SXin Li      return m_p;
*bf2c3715SXin Li    }
*bf2c3715SXin Li
*bf2c3715SXin Li    #ifdef EIGEN_PARSED_BY_DOXYGEN
*bf2c3715SXin Li    /** This method returns the solution x to the equation Ax=b, where A is the matrix of which
*bf2c3715SXin Li      * *this is the LU decomposition.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \param b the right-hand-side of the equation to solve. Can be a vector or a matrix,
*bf2c3715SXin Li      *          the only requirement in order for the equation to make sense is that
*bf2c3715SXin Li      *          b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \returns the solution.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * Example: \include PartialPivLU_solve.cpp
*bf2c3715SXin Li      * Output: \verbinclude PartialPivLU_solve.out
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * Since this PartialPivLU class assumes anyway that the matrix A is invertible, the solution
*bf2c3715SXin Li      * theoretically exists and is unique regardless of b.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \sa TriangularView::solve(), inverse(), computeInverse()
*bf2c3715SXin Li      */
*bf2c3715SXin Li    template<typename Rhs>
*bf2c3715SXin Li    inline const Solve<PartialPivLU, Rhs>
*bf2c3715SXin Li    solve(const MatrixBase<Rhs>& b) const;
*bf2c3715SXin Li    #endif
*bf2c3715SXin Li
*bf2c3715SXin Li    /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is
*bf2c3715SXin Li        the LU decomposition.
*bf2c3715SXin Li      */
*bf2c3715SXin Li    inline RealScalar rcond() const
*bf2c3715SXin Li    {
*bf2c3715SXin Li      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
*bf2c3715SXin Li      return internal::rcond_estimate_helper(m_l1_norm, *this);
*bf2c3715SXin Li    }
*bf2c3715SXin Li
*bf2c3715SXin Li    /** \returns the inverse of the matrix of which *this is the LU decomposition.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \warning The matrix being decomposed here is assumed to be invertible. If you need to check for
*bf2c3715SXin Li      *          invertibility, use class FullPivLU instead.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \sa MatrixBase::inverse(), LU::inverse()
*bf2c3715SXin Li      */
*bf2c3715SXin Li    inline const Inverse<PartialPivLU> inverse() const
*bf2c3715SXin Li    {
*bf2c3715SXin Li      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
*bf2c3715SXin Li      return Inverse<PartialPivLU>(*this);
*bf2c3715SXin Li    }
*bf2c3715SXin Li
*bf2c3715SXin Li    /** \returns the determinant of the matrix of which
*bf2c3715SXin Li      * *this is the LU decomposition. It has only linear complexity
*bf2c3715SXin Li      * (that is, O(n) where n is the dimension of the square matrix)
*bf2c3715SXin Li      * as the LU decomposition has already been computed.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers
*bf2c3715SXin Li      *       optimized paths.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \warning a determinant can be very big or small, so for matrices
*bf2c3715SXin Li      * of large enough dimension, there is a risk of overflow/underflow.
*bf2c3715SXin Li      *
*bf2c3715SXin Li      * \sa MatrixBase::determinant()
*bf2c3715SXin Li      */
*bf2c3715SXin Li    Scalar determinant() const;
*bf2c3715SXin Li
*bf2c3715SXin Li    MatrixType reconstructedMatrix() const;
*bf2c3715SXin Li
*bf2c3715SXin Li    EIGEN_CONSTEXPR inline Index rows() const EIGEN_NOEXCEPT { return m_lu.rows(); }
*bf2c3715SXin Li    EIGEN_CONSTEXPR inline Index cols() const EIGEN_NOEXCEPT { return m_lu.cols(); }
*bf2c3715SXin Li
*bf2c3715SXin Li    #ifndef EIGEN_PARSED_BY_DOXYGEN
*bf2c3715SXin Li    template<typename RhsType, typename DstType>
*bf2c3715SXin Li    EIGEN_DEVICE_FUNC
*bf2c3715SXin Li    void _solve_impl(const RhsType &rhs, DstType &dst) const {
*bf2c3715SXin Li     /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
*bf2c3715SXin Li      * So we proceed as follows:
*bf2c3715SXin Li      * Step 1: compute c = Pb.
*bf2c3715SXin Li      * Step 2: replace c by the solution x to Lx = c.
*bf2c3715SXin Li      * Step 3: replace c by the solution x to Ux = c.
*bf2c3715SXin Li      */
*bf2c3715SXin Li
*bf2c3715SXin Li      // Step 1
*bf2c3715SXin Li      dst = permutationP() * rhs;
*bf2c3715SXin Li
*bf2c3715SXin Li      // Step 2
*bf2c3715SXin Li      m_lu.template triangularView<UnitLower>().solveInPlace(dst);
*bf2c3715SXin Li
*bf2c3715SXin Li      // Step 3
*bf2c3715SXin Li      m_lu.template triangularView<Upper>().solveInPlace(dst);
*bf2c3715SXin Li    }
*bf2c3715SXin Li
*bf2c3715SXin Li    template<bool Conjugate, typename RhsType, typename DstType>
*bf2c3715SXin Li    EIGEN_DEVICE_FUNC
*bf2c3715SXin Li    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const {
*bf2c3715SXin Li     /* The decomposition PA = LU can be rewritten as A^T = U^T L^T P.
*bf2c3715SXin Li      * So we proceed as follows:
*bf2c3715SXin Li      * Step 1: compute c as the solution to L^T c = b
*bf2c3715SXin Li      * Step 2: replace c by the solution x to U^T x = c.
*bf2c3715SXin Li      * Step 3: update  c = P^-1 c.
*bf2c3715SXin Li      */
*bf2c3715SXin Li
*bf2c3715SXin Li      eigen_assert(rhs.rows() == m_lu.cols());
*bf2c3715SXin Li
*bf2c3715SXin Li      // Step 1
*bf2c3715SXin Li      dst = m_lu.template triangularView<Upper>().transpose()
*bf2c3715SXin Li                .template conjugateIf<Conjugate>().solve(rhs);
*bf2c3715SXin Li      // Step 2
*bf2c3715SXin Li      m_lu.template triangularView<UnitLower>().transpose()
*bf2c3715SXin Li          .template conjugateIf<Conjugate>().solveInPlace(dst);
*bf2c3715SXin Li      // Step 3
*bf2c3715SXin Li      dst = permutationP().transpose() * dst;
*bf2c3715SXin Li    }
*bf2c3715SXin Li    #endif
*bf2c3715SXin Li
*bf2c3715SXin Li  protected:
*bf2c3715SXin Li
*bf2c3715SXin Li    static void check_template_parameters()
*bf2c3715SXin Li    {
*bf2c3715SXin Li      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
*bf2c3715SXin Li    }
*bf2c3715SXin Li
*bf2c3715SXin Li    void compute();
*bf2c3715SXin Li
*bf2c3715SXin Li    MatrixType m_lu;
*bf2c3715SXin Li    PermutationType m_p;
*bf2c3715SXin Li    TranspositionType m_rowsTranspositions;
*bf2c3715SXin Li    RealScalar m_l1_norm;
*bf2c3715SXin Li    signed char m_det_p;
*bf2c3715SXin Li    bool m_isInitialized;
*bf2c3715SXin Li};
*bf2c3715SXin Li
*bf2c3715SXin Litemplate<typename MatrixType>
*bf2c3715SXin LiPartialPivLU<MatrixType>::PartialPivLU()
*bf2c3715SXin Li  : m_lu(),
*bf2c3715SXin Li    m_p(),
*bf2c3715SXin Li    m_rowsTranspositions(),
*bf2c3715SXin Li    m_l1_norm(0),
*bf2c3715SXin Li    m_det_p(0),
*bf2c3715SXin Li    m_isInitialized(false)
*bf2c3715SXin Li{
*bf2c3715SXin Li}
*bf2c3715SXin Li
*bf2c3715SXin Litemplate<typename MatrixType>
*bf2c3715SXin LiPartialPivLU<MatrixType>::PartialPivLU(Index size)
*bf2c3715SXin Li  : m_lu(size, size),
*bf2c3715SXin Li    m_p(size),
*bf2c3715SXin Li    m_rowsTranspositions(size),
*bf2c3715SXin Li    m_l1_norm(0),
*bf2c3715SXin Li    m_det_p(0),
*bf2c3715SXin Li    m_isInitialized(false)
*bf2c3715SXin Li{
*bf2c3715SXin Li}
*bf2c3715SXin Li
*bf2c3715SXin Litemplate<typename MatrixType>
*bf2c3715SXin Litemplate<typename InputType>
*bf2c3715SXin LiPartialPivLU<MatrixType>::PartialPivLU(const EigenBase<InputType>& matrix)
*bf2c3715SXin Li  : m_lu(matrix.rows(),matrix.cols()),
*bf2c3715SXin Li    m_p(matrix.rows()),
*bf2c3715SXin Li    m_rowsTranspositions(matrix.rows()),
*bf2c3715SXin Li    m_l1_norm(0),
*bf2c3715SXin Li    m_det_p(0),
*bf2c3715SXin Li    m_isInitialized(false)
*bf2c3715SXin Li{
*bf2c3715SXin Li  compute(matrix.derived());
*bf2c3715SXin Li}
*bf2c3715SXin Li
*bf2c3715SXin Litemplate<typename MatrixType>
*bf2c3715SXin Litemplate<typename InputType>
*bf2c3715SXin LiPartialPivLU<MatrixType>::PartialPivLU(EigenBase<InputType>& matrix)
*bf2c3715SXin Li  : m_lu(matrix.derived()),
*bf2c3715SXin Li    m_p(matrix.rows()),
*bf2c3715SXin Li    m_rowsTranspositions(matrix.rows()),
*bf2c3715SXin Li    m_l1_norm(0),
*bf2c3715SXin Li    m_det_p(0),
*bf2c3715SXin Li    m_isInitialized(false)
*bf2c3715SXin Li{
*bf2c3715SXin Li  compute();
*bf2c3715SXin Li}
*bf2c3715SXin Li
*bf2c3715SXin Linamespace internal {
*bf2c3715SXin Li
*bf2c3715SXin Li/** \internal This is the blocked version of fullpivlu_unblocked() */
*bf2c3715SXin Litemplate<typename Scalar, int StorageOrder, typename PivIndex, int SizeAtCompileTime=Dynamic>
*bf2c3715SXin Listruct partial_lu_impl
*bf2c3715SXin Li{
*bf2c3715SXin Li  static const int UnBlockedBound = 16;
*bf2c3715SXin Li  static const bool UnBlockedAtCompileTime = SizeAtCompileTime!=Dynamic && SizeAtCompileTime<=UnBlockedBound;
*bf2c3715SXin Li  static const int ActualSizeAtCompileTime = UnBlockedAtCompileTime ? SizeAtCompileTime : Dynamic;
*bf2c3715SXin Li  // Remaining rows and columns at compile-time:
*bf2c3715SXin Li  static const int RRows = SizeAtCompileTime==2 ? 1 : Dynamic;
*bf2c3715SXin Li  static const int RCols = SizeAtCompileTime==2 ? 1 : Dynamic;
*bf2c3715SXin Li  typedef Matrix<Scalar, ActualSizeAtCompileTime, ActualSizeAtCompileTime, StorageOrder> MatrixType;
*bf2c3715SXin Li  typedef Ref<MatrixType> MatrixTypeRef;
*bf2c3715SXin Li  typedef Ref<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > BlockType;
*bf2c3715SXin Li  typedef typename MatrixType::RealScalar RealScalar;
*bf2c3715SXin Li
*bf2c3715SXin Li  /** \internal performs the LU decomposition in-place of the matrix \a lu
*bf2c3715SXin Li    * using an unblocked algorithm.
*bf2c3715SXin Li    *
*bf2c3715SXin Li    * In addition, this function returns the row transpositions in the
*bf2c3715SXin Li    * vector \a row_transpositions which must have a size equal to the number
*bf2c3715SXin Li    * of columns of the matrix \a lu, and an integer \a nb_transpositions
*bf2c3715SXin Li    * which returns the actual number of transpositions.
*bf2c3715SXin Li    *
*bf2c3715SXin Li    * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
*bf2c3715SXin Li    */
*bf2c3715SXin Li  static Index unblocked_lu(MatrixTypeRef& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions)
*bf2c3715SXin Li  {
*bf2c3715SXin Li    typedef scalar_score_coeff_op<Scalar> Scoring;
*bf2c3715SXin Li    typedef typename Scoring::result_type Score;
*bf2c3715SXin Li    const Index rows = lu.rows();
*bf2c3715SXin Li    const Index cols = lu.cols();
*bf2c3715SXin Li    const Index size = (std::min)(rows,cols);
*bf2c3715SXin Li    // For small compile-time matrices it is worth processing the last row separately:
*bf2c3715SXin Li    //  speedup: +100% for 2x2, +10% for others.
*bf2c3715SXin Li    const Index endk = UnBlockedAtCompileTime ? size-1 : size;
*bf2c3715SXin Li    nb_transpositions = 0;
*bf2c3715SXin Li    Index first_zero_pivot = -1;
*bf2c3715SXin Li    for(Index k = 0; k < endk; ++k)
*bf2c3715SXin Li    {
*bf2c3715SXin Li      int rrows = internal::convert_index<int>(rows-k-1);
*bf2c3715SXin Li      int rcols = internal::convert_index<int>(cols-k-1);
*bf2c3715SXin Li
*bf2c3715SXin Li      Index row_of_biggest_in_col;
*bf2c3715SXin Li      Score biggest_in_corner
*bf2c3715SXin Li        = lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col);
*bf2c3715SXin Li      row_of_biggest_in_col += k;
*bf2c3715SXin Li
*bf2c3715SXin Li      row_transpositions[k] = PivIndex(row_of_biggest_in_col);
*bf2c3715SXin Li
*bf2c3715SXin Li      if(biggest_in_corner != Score(0))
*bf2c3715SXin Li      {
*bf2c3715SXin Li        if(k != row_of_biggest_in_col)
*bf2c3715SXin Li        {
*bf2c3715SXin Li          lu.row(k).swap(lu.row(row_of_biggest_in_col));
*bf2c3715SXin Li          ++nb_transpositions;
*bf2c3715SXin Li        }
*bf2c3715SXin Li
*bf2c3715SXin Li        lu.col(k).tail(fix<RRows>(rrows)) /= lu.coeff(k,k);
*bf2c3715SXin Li      }
*bf2c3715SXin Li      else if(first_zero_pivot==-1)
*bf2c3715SXin Li      {
*bf2c3715SXin Li        // the pivot is exactly zero, we record the index of the first pivot which is exactly 0,
*bf2c3715SXin Li        // and continue the factorization such we still have A = PLU
*bf2c3715SXin Li        first_zero_pivot = k;
*bf2c3715SXin Li      }
*bf2c3715SXin Li
*bf2c3715SXin Li      if(k<rows-1)
*bf2c3715SXin Li        lu.bottomRightCorner(fix<RRows>(rrows),fix<RCols>(rcols)).noalias() -= lu.col(k).tail(fix<RRows>(rrows)) * lu.row(k).tail(fix<RCols>(rcols));
*bf2c3715SXin Li    }
*bf2c3715SXin Li
*bf2c3715SXin Li    // special handling of the last entry
*bf2c3715SXin Li    if(UnBlockedAtCompileTime)
*bf2c3715SXin Li    {
*bf2c3715SXin Li      Index k = endk;
*bf2c3715SXin Li      row_transpositions[k] = PivIndex(k);
*bf2c3715SXin Li      if (Scoring()(lu(k, k)) == Score(0) && first_zero_pivot == -1)
*bf2c3715SXin Li        first_zero_pivot = k;
*bf2c3715SXin Li    }
*bf2c3715SXin Li
*bf2c3715SXin Li    return first_zero_pivot;
*bf2c3715SXin Li  }
*bf2c3715SXin Li
*bf2c3715SXin Li  /** \internal performs the LU decomposition in-place of the matrix represented
*bf2c3715SXin Li    * by the variables \a rows, \a cols, \a lu_data, and \a lu_stride using a
*bf2c3715SXin Li    * recursive, blocked algorithm.
*bf2c3715SXin Li    *
*bf2c3715SXin Li    * In addition, this function returns the row transpositions in the
*bf2c3715SXin Li    * vector \a row_transpositions which must have a size equal to the number
*bf2c3715SXin Li    * of columns of the matrix \a lu, and an integer \a nb_transpositions
*bf2c3715SXin Li    * which returns the actual number of transpositions.
*bf2c3715SXin Li    *
*bf2c3715SXin Li    * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
*bf2c3715SXin Li    *
*bf2c3715SXin Li    * \note This very low level interface using pointers, etc. is to:
*bf2c3715SXin Li    *   1 - reduce the number of instantiations to the strict minimum
*bf2c3715SXin Li    *   2 - avoid infinite recursion of the instantiations with Block<Block<Block<...> > >
*bf2c3715SXin Li    */
*bf2c3715SXin Li  static Index blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, PivIndex* row_transpositions, PivIndex& nb_transpositions, Index maxBlockSize=256)
*bf2c3715SXin Li  {
*bf2c3715SXin Li    MatrixTypeRef lu = MatrixType::Map(lu_data,rows, cols, OuterStride<>(luStride));
*bf2c3715SXin Li
*bf2c3715SXin Li    const Index size = (std::min)(rows,cols);
*bf2c3715SXin Li
*bf2c3715SXin Li    // if the matrix is too small, no blocking:
*bf2c3715SXin Li    if(UnBlockedAtCompileTime || size<=UnBlockedBound)
*bf2c3715SXin Li    {
*bf2c3715SXin Li      return unblocked_lu(lu, row_transpositions, nb_transpositions);
*bf2c3715SXin Li    }
*bf2c3715SXin Li
*bf2c3715SXin Li    // automatically adjust the number of subdivisions to the size
*bf2c3715SXin Li    // of the matrix so that there is enough sub blocks:
*bf2c3715SXin Li    Index blockSize;
*bf2c3715SXin Li    {
*bf2c3715SXin Li      blockSize = size/8;
*bf2c3715SXin Li      blockSize = (blockSize/16)*16;
*bf2c3715SXin Li      blockSize = (std::min)((std::max)(blockSize,Index(8)), maxBlockSize);
*bf2c3715SXin Li    }
*bf2c3715SXin Li
*bf2c3715SXin Li    nb_transpositions = 0;
*bf2c3715SXin Li    Index first_zero_pivot = -1;
*bf2c3715SXin Li    for(Index k = 0; k < size; k+=blockSize)
*bf2c3715SXin Li    {
*bf2c3715SXin Li      Index bs = (std::min)(size-k,blockSize); // actual size of the block
*bf2c3715SXin Li      Index trows = rows - k - bs; // trailing rows
*bf2c3715SXin Li      Index tsize = size - k - bs; // trailing size
*bf2c3715SXin Li
*bf2c3715SXin Li      // partition the matrix:
*bf2c3715SXin Li      //                          A00 | A01 | A02
*bf2c3715SXin Li      // lu  = A_0 | A_1 | A_2 =  A10 | A11 | A12
*bf2c3715SXin Li      //                          A20 | A21 | A22
*bf2c3715SXin Li      BlockType A_0 = lu.block(0,0,rows,k);
*bf2c3715SXin Li      BlockType A_2 = lu.block(0,k+bs,rows,tsize);
*bf2c3715SXin Li      BlockType A11 = lu.block(k,k,bs,bs);
*bf2c3715SXin Li      BlockType A12 = lu.block(k,k+bs,bs,tsize);
*bf2c3715SXin Li      BlockType A21 = lu.block(k+bs,k,trows,bs);
*bf2c3715SXin Li      BlockType A22 = lu.block(k+bs,k+bs,trows,tsize);
*bf2c3715SXin Li
*bf2c3715SXin Li      PivIndex nb_transpositions_in_panel;
*bf2c3715SXin Li      // recursively call the blocked LU algorithm on [A11^T A21^T]^T
*bf2c3715SXin Li      // with a very small blocking size:
*bf2c3715SXin Li      Index ret = blocked_lu(trows+bs, bs, &lu.coeffRef(k,k), luStride,
*bf2c3715SXin Li                   row_transpositions+k, nb_transpositions_in_panel, 16);
*bf2c3715SXin Li      if(ret>=0 && first_zero_pivot==-1)
*bf2c3715SXin Li        first_zero_pivot = k+ret;
*bf2c3715SXin Li
*bf2c3715SXin Li      nb_transpositions += nb_transpositions_in_panel;
*bf2c3715SXin Li      // update permutations and apply them to A_0
*bf2c3715SXin Li      for(Index i=k; i<k+bs; ++i)
*bf2c3715SXin Li      {
*bf2c3715SXin Li        Index piv = (row_transpositions[i] += internal::convert_index<PivIndex>(k));
*bf2c3715SXin Li        A_0.row(i).swap(A_0.row(piv));
*bf2c3715SXin Li      }
*bf2c3715SXin Li
*bf2c3715SXin Li      if(trows)
*bf2c3715SXin Li      {
*bf2c3715SXin Li        // apply permutations to A_2
*bf2c3715SXin Li        for(Index i=k;i<k+bs; ++i)
*bf2c3715SXin Li          A_2.row(i).swap(A_2.row(row_transpositions[i]));
*bf2c3715SXin Li
*bf2c3715SXin Li        // A12 = A11^-1 A12
*bf2c3715SXin Li        A11.template triangularView<UnitLower>().solveInPlace(A12);
*bf2c3715SXin Li
*bf2c3715SXin Li        A22.noalias() -= A21 * A12;
*bf2c3715SXin Li      }
*bf2c3715SXin Li    }
*bf2c3715SXin Li    return first_zero_pivot;
*bf2c3715SXin Li  }
*bf2c3715SXin Li};
*bf2c3715SXin Li
*bf2c3715SXin Li/** \internal performs the LU decomposition with partial pivoting in-place.
*bf2c3715SXin Li  */
*bf2c3715SXin Litemplate<typename MatrixType, typename TranspositionType>
*bf2c3715SXin Livoid partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::StorageIndex& nb_transpositions)
*bf2c3715SXin Li{
*bf2c3715SXin Li  // Special-case of zero matrix.
*bf2c3715SXin Li  if (lu.rows() == 0 || lu.cols() == 0) {
*bf2c3715SXin Li    nb_transpositions = 0;
*bf2c3715SXin Li    return;
*bf2c3715SXin Li  }
*bf2c3715SXin Li  eigen_assert(lu.cols() == row_transpositions.size());
*bf2c3715SXin Li  eigen_assert(row_transpositions.size() < 2 || (&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1);
*bf2c3715SXin Li
*bf2c3715SXin Li  partial_lu_impl
*bf2c3715SXin Li    < typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor,
*bf2c3715SXin Li      typename TranspositionType::StorageIndex,
*bf2c3715SXin Li      EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime)>
*bf2c3715SXin Li    ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);
*bf2c3715SXin Li}
*bf2c3715SXin Li
*bf2c3715SXin Li} // end namespace internal
*bf2c3715SXin Li
*bf2c3715SXin Litemplate<typename MatrixType>
*bf2c3715SXin Livoid PartialPivLU<MatrixType>::compute()
*bf2c3715SXin Li{
*bf2c3715SXin Li  check_template_parameters();
*bf2c3715SXin Li
*bf2c3715SXin Li  // the row permutation is stored as int indices, so just to be sure:
*bf2c3715SXin Li  eigen_assert(m_lu.rows()<NumTraits<int>::highest());
*bf2c3715SXin Li
*bf2c3715SXin Li  if(m_lu.cols()>0)
*bf2c3715SXin Li    m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
*bf2c3715SXin Li  else
*bf2c3715SXin Li    m_l1_norm = RealScalar(0);
*bf2c3715SXin Li
*bf2c3715SXin Li  eigen_assert(m_lu.rows() == m_lu.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
*bf2c3715SXin Li  const Index size = m_lu.rows();
*bf2c3715SXin Li
*bf2c3715SXin Li  m_rowsTranspositions.resize(size);
*bf2c3715SXin Li
*bf2c3715SXin Li  typename TranspositionType::StorageIndex nb_transpositions;
*bf2c3715SXin Li  internal::partial_lu_inplace(m_lu, m_rowsTranspositions, nb_transpositions);
*bf2c3715SXin Li  m_det_p = (nb_transpositions%2) ? -1 : 1;
*bf2c3715SXin Li
*bf2c3715SXin Li  m_p = m_rowsTranspositions;
*bf2c3715SXin Li
*bf2c3715SXin Li  m_isInitialized = true;
*bf2c3715SXin Li}
*bf2c3715SXin Li
*bf2c3715SXin Litemplate<typename MatrixType>
*bf2c3715SXin Litypename PartialPivLU<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const
*bf2c3715SXin Li{
*bf2c3715SXin Li  eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
*bf2c3715SXin Li  return Scalar(m_det_p) * m_lu.diagonal().prod();
*bf2c3715SXin Li}
*bf2c3715SXin Li
*bf2c3715SXin Li/** \returns the matrix represented by the decomposition,
*bf2c3715SXin Li * i.e., it returns the product: P^{-1} L U.
*bf2c3715SXin Li * This function is provided for debug purpose. */
*bf2c3715SXin Litemplate<typename MatrixType>
*bf2c3715SXin LiMatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
*bf2c3715SXin Li{
*bf2c3715SXin Li  eigen_assert(m_isInitialized && "LU is not initialized.");
*bf2c3715SXin Li  // LU
*bf2c3715SXin Li  MatrixType res = m_lu.template triangularView<UnitLower>().toDenseMatrix()
*bf2c3715SXin Li                 * m_lu.template triangularView<Upper>();
*bf2c3715SXin Li
*bf2c3715SXin Li  // P^{-1}(LU)
*bf2c3715SXin Li  res = m_p.inverse() * res;
*bf2c3715SXin Li
*bf2c3715SXin Li  return res;
*bf2c3715SXin Li}
*bf2c3715SXin Li
*bf2c3715SXin Li/***** Implementation details *****************************************************/
*bf2c3715SXin Li
*bf2c3715SXin Linamespace internal {
*bf2c3715SXin Li
*bf2c3715SXin Li/***** Implementation of inverse() *****************************************************/
*bf2c3715SXin Litemplate<typename DstXprType, typename MatrixType>
*bf2c3715SXin Listruct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename PartialPivLU<MatrixType>::Scalar>, Dense2Dense>
*bf2c3715SXin Li{
*bf2c3715SXin Li  typedef PartialPivLU<MatrixType> LuType;
*bf2c3715SXin Li  typedef Inverse<LuType> SrcXprType;
*bf2c3715SXin Li  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename LuType::Scalar> &)
*bf2c3715SXin Li  {
*bf2c3715SXin Li    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
*bf2c3715SXin Li  }
*bf2c3715SXin Li};
*bf2c3715SXin Li} // end namespace internal
*bf2c3715SXin Li
*bf2c3715SXin Li/******** MatrixBase methods *******/
*bf2c3715SXin Li
*bf2c3715SXin Li/** \lu_module
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * \return the partial-pivoting LU decomposition of \c *this.
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * \sa class PartialPivLU
*bf2c3715SXin Li  */
*bf2c3715SXin Litemplate<typename Derived>
*bf2c3715SXin Liinline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
*bf2c3715SXin LiMatrixBase<Derived>::partialPivLu() const
*bf2c3715SXin Li{
*bf2c3715SXin Li  return PartialPivLU<PlainObject>(eval());
*bf2c3715SXin Li}
*bf2c3715SXin Li
*bf2c3715SXin Li/** \lu_module
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * Synonym of partialPivLu().
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * \return the partial-pivoting LU decomposition of \c *this.
*bf2c3715SXin Li  *
*bf2c3715SXin Li  * \sa class PartialPivLU
*bf2c3715SXin Li  */
*bf2c3715SXin Litemplate<typename Derived>
*bf2c3715SXin Liinline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
*bf2c3715SXin LiMatrixBase<Derived>::lu() const
*bf2c3715SXin Li{
*bf2c3715SXin Li  return PartialPivLU<PlainObject>(eval());
*bf2c3715SXin Li}
*bf2c3715SXin Li
*bf2c3715SXin Li} // end namespace Eigen
*bf2c3715SXin Li
*bf2c3715SXin Li#endif // EIGEN_PARTIALLU_H