// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2006-2008 Benoit Jacob // Copyright (C) 2008-2011 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_GENERAL_PRODUCT_H #define EIGEN_GENERAL_PRODUCT_H // IWYU pragma: private #include "./InternalHeaderCheck.h" namespace Eigen { enum { Large = 2, Small = 3 }; // Define the threshold value to fallback from the generic matrix-matrix product // implementation (heavy) to the lightweight coeff-based product one. // See generic_product_impl // in products/GeneralMatrixMatrix.h for more details. // TODO This threshold should also be used in the compile-time selector below. #ifndef EIGEN_GEMM_TO_COEFFBASED_THRESHOLD // This default value has been obtained on a Haswell architecture. #define EIGEN_GEMM_TO_COEFFBASED_THRESHOLD 20 #endif namespace internal { template struct product_type_selector; template struct product_size_category { enum { #ifndef EIGEN_GPU_COMPILE_PHASE is_large = MaxSize == Dynamic || Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || (Size == Dynamic && MaxSize >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), #else is_large = 0, #endif value = is_large ? Large : Size == 1 ? 1 : Small }; }; template struct product_type { typedef remove_all_t Lhs_; typedef remove_all_t Rhs_; enum { MaxRows = traits::MaxRowsAtCompileTime, Rows = traits::RowsAtCompileTime, MaxCols = traits::MaxColsAtCompileTime, Cols = traits::ColsAtCompileTime, MaxDepth = min_size_prefer_fixed(traits::MaxColsAtCompileTime, traits::MaxRowsAtCompileTime), Depth = min_size_prefer_fixed(traits::ColsAtCompileTime, traits::RowsAtCompileTime) }; // the splitting into different lines of code here, introducing the _select enums and the typedef below, // is to work around an internal compiler error with gcc 4.1 and 4.2. private: enum { rows_select = product_size_category::value, cols_select = product_size_category::value, depth_select = product_size_category::value }; typedef product_type_selector selector; public: enum { value = selector::ret, ret = selector::ret }; #ifdef EIGEN_DEBUG_PRODUCT static void debug() { EIGEN_DEBUG_VAR(Rows); EIGEN_DEBUG_VAR(Cols); EIGEN_DEBUG_VAR(Depth); EIGEN_DEBUG_VAR(rows_select); EIGEN_DEBUG_VAR(cols_select); EIGEN_DEBUG_VAR(depth_select); EIGEN_DEBUG_VAR(value); } #endif }; /* The following allows to select the kind of product at compile time * based on the three dimensions of the product. * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ // FIXME I'm not sure the current mapping is the ideal one. template struct product_type_selector { enum { ret = OuterProduct }; }; template struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; template struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; }; template struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; }; template <> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; }; template <> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; template <> struct product_type_selector<1, Small, Small> { enum { ret = CoeffBasedProductMode }; }; template <> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; template <> struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; template <> struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; template <> struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; template <> struct product_type_selector<1, Large, Small> { enum { ret = CoeffBasedProductMode }; }; template <> struct product_type_selector<1, Large, Large> { enum { ret = GemvProduct }; }; template <> struct product_type_selector<1, Small, Large> { enum { ret = CoeffBasedProductMode }; }; template <> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; template <> struct product_type_selector { enum { ret = GemvProduct }; }; template <> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; template <> struct product_type_selector { enum { ret = GemmProduct }; }; template <> struct product_type_selector { enum { ret = GemmProduct }; }; template <> struct product_type_selector { enum { ret = GemmProduct }; }; template <> struct product_type_selector { enum { ret = GemmProduct }; }; template <> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; template <> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; template <> struct product_type_selector { enum { ret = GemmProduct }; }; } // end namespace internal /*********************************************************************** * Implementation of Inner Vector Vector Product ***********************************************************************/ // FIXME : maybe the "inner product" could return a Scalar // instead of a 1x1 matrix ?? // Pro: more natural for the user // Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix // product ends up to a row-vector times col-vector product... To tackle this use // case, we could have a specialization for Block with: operator=(Scalar x); /*********************************************************************** * Implementation of Outer Vector Vector Product ***********************************************************************/ /*********************************************************************** * Implementation of General Matrix Vector Product ***********************************************************************/ /* According to the shape/flags of the matrix we have to distinghish 3 different cases: * 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine * 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine * 3 - all other cases are handled using a simple loop along the outer-storage direction. * Therefore we need a lower level meta selector. * Furthermore, if the matrix is the rhs, then the product has to be transposed. */ namespace internal { template struct gemv_dense_selector; } // end namespace internal namespace internal { template struct gemv_static_vector_if; template struct gemv_static_vector_if { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; } }; template struct gemv_static_vector_if { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { return 0; } }; template struct gemv_static_vector_if { #if EIGEN_MAX_STATIC_ALIGN_BYTES != 0 internal::plain_array m_data; EIGEN_STRONG_INLINE constexpr Scalar* data() { return m_data.array; } #else // Some architectures cannot align on the stack, // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. internal::plain_array m_data; EIGEN_STRONG_INLINE constexpr Scalar* data() { return reinterpret_cast((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) + EIGEN_MAX_ALIGN_BYTES); } #endif }; // The vector is on the left => transposition template struct gemv_dense_selector { template static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) { Transpose destT(dest); enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; gemv_dense_selector::run(rhs.transpose(), lhs.transpose(), destT, alpha); } }; template <> struct gemv_dense_selector { template static inline void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) { typedef typename Lhs::Scalar LhsScalar; typedef typename Rhs::Scalar RhsScalar; typedef typename Dest::Scalar ResScalar; typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; typedef Map, plain_enum_min(AlignedMax, internal::packet_traits::size)> MappedDest; ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs); // make sure Dest is a compile-time vector type (bug 1166) typedef std::conditional_t ActualDest; enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 // on, the other hand it is good for the cache to pack the vector anyways... EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime == 1), ComplexByReal = (NumTraits::IsComplex) && (!NumTraits::IsComplex), MightCannotUseDest = ((!EvalToDestAtCompileTime) || ComplexByReal) && (ActualDest::MaxSizeAtCompileTime != 0) }; typedef const_blas_data_mapper LhsMapper; typedef const_blas_data_mapper RhsMapper; RhsScalar compatibleAlpha = get_factor::run(actualAlpha); if (!MightCannotUseDest) { // shortcut if we are sure to be able to use dest directly, // this ease the compiler to generate cleaner and more optimzized code for most common cases general_matrix_vector_product::run(actualLhs.rows(), actualLhs.cols(), LhsMapper(actualLhs.data(), actualLhs.outerStride()), RhsMapper(actualRhs.data(), actualRhs.innerStride()), dest.data(), 1, compatibleAlpha); } else { gemv_static_vector_if static_dest; const bool alphaIsCompatible = (!ComplexByReal) || (numext::is_exactly_zero(numext::imag(actualAlpha))); const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; ei_declare_aligned_stack_constructed_variable(ResScalar, actualDestPtr, dest.size(), evalToDest ? dest.data() : static_dest.data()); if (!evalToDest) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN constexpr int Size = Dest::SizeAtCompileTime; Index size = dest.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif if (!alphaIsCompatible) { MappedDest(actualDestPtr, dest.size()).setZero(); compatibleAlpha = RhsScalar(1); } else MappedDest(actualDestPtr, dest.size()) = dest; } general_matrix_vector_product::run(actualLhs.rows(), actualLhs.cols(), LhsMapper(actualLhs.data(), actualLhs.outerStride()), RhsMapper(actualRhs.data(), actualRhs.innerStride()), actualDestPtr, 1, compatibleAlpha); if (!evalToDest) { if (!alphaIsCompatible) dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size()); else dest = MappedDest(actualDestPtr, dest.size()); } } } }; template <> struct gemv_dense_selector { template static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) { typedef typename Lhs::Scalar LhsScalar; typedef typename Rhs::Scalar RhsScalar; typedef typename Dest::Scalar ResScalar; typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; typedef internal::remove_all_t ActualRhsTypeCleaned; std::add_const_t actualLhs = LhsBlasTraits::extract(lhs); std::add_const_t actualRhs = RhsBlasTraits::extract(rhs); ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 // on, the other hand it is good for the cache to pack the vector anyways... DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime == 1 || ActualRhsTypeCleaned::MaxSizeAtCompileTime == 0 }; gemv_static_vector_if static_rhs; ei_declare_aligned_stack_constructed_variable( RhsScalar, actualRhsPtr, actualRhs.size(), DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); if (!DirectlyUseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN constexpr int Size = ActualRhsTypeCleaned::SizeAtCompileTime; Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif Map(actualRhsPtr, actualRhs.size()) = actualRhs; } typedef const_blas_data_mapper LhsMapper; typedef const_blas_data_mapper RhsMapper; general_matrix_vector_product:: run(actualLhs.rows(), actualLhs.cols(), LhsMapper(actualLhs.data(), actualLhs.outerStride()), RhsMapper(actualRhsPtr, 1), dest.data(), dest.col(0).innerStride(), // NOTE if dest is not a vector at compile-time, then dest.innerStride() might // be wrong. (bug 1166) actualAlpha); } }; template <> struct gemv_dense_selector { template static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) { EIGEN_STATIC_ASSERT((!nested_eval::Evaluate), EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, // otherwise use a temp typename nested_eval::type actual_rhs(rhs); const Index size = rhs.rows(); for (Index k = 0; k < size; ++k) dest += (alpha * actual_rhs.coeff(k)) * lhs.col(k); } }; template <> struct gemv_dense_selector { template static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) { EIGEN_STATIC_ASSERT((!nested_eval::Evaluate), EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); typename nested_eval::type actual_rhs(rhs); const Index rows = dest.rows(); for (Index i = 0; i < rows; ++i) dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum(); } }; } // end namespace internal /*************************************************************************** * Implementation of matrix base methods ***************************************************************************/ /** \returns the matrix product of \c *this and \a other. * * \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*(). * * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*() */ template template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Product MatrixBase::operator*( const MatrixBase& other) const { // A note regarding the function declaration: In MSVC, this function will sometimes // not be inlined since DenseStorage is an unwindable object for dynamic // matrices and product types are holding a member to store the result. // Thus it does not help tagging this function with EIGEN_STRONG_INLINE. enum { ProductIsValid = Derived::ColsAtCompileTime == Dynamic || OtherDerived::RowsAtCompileTime == Dynamic || int(Derived::ColsAtCompileTime) == int(OtherDerived::RowsAtCompileTime), AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived, OtherDerived) }; // note to the lost user: // * for a dot product use: v1.dot(v2) // * for a coeff-wise product use: v1.cwiseProduct(v2) EIGEN_STATIC_ASSERT( ProductIsValid || !(AreVectors && SameSizes), INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) #ifdef EIGEN_DEBUG_PRODUCT internal::product_type::debug(); #endif return Product(derived(), other.derived()); } /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. * * The returned product will behave like any other expressions: the coefficients of the product will be * computed once at a time as requested. This might be useful in some extremely rare cases when only * a small and no coherent fraction of the result's coefficients have to be computed. * * \warning This version of the matrix product can be much much slower. So use it only if you know * what you are doing and that you measured a true speed improvement. * * \sa operator*(const MatrixBase&) */ template template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Product MatrixBase::lazyProduct(const MatrixBase& other) const { enum { ProductIsValid = Derived::ColsAtCompileTime == Dynamic || OtherDerived::RowsAtCompileTime == Dynamic || int(Derived::ColsAtCompileTime) == int(OtherDerived::RowsAtCompileTime), AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived, OtherDerived) }; // note to the lost user: // * for a dot product use: v1.dot(v2) // * for a coeff-wise product use: v1.cwiseProduct(v2) EIGEN_STATIC_ASSERT( ProductIsValid || !(AreVectors && SameSizes), INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) return Product(derived(), other.derived()); } } // end namespace Eigen #endif // EIGEN_PRODUCT_H