1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2014 Benoit Steiner <[email protected]> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H 12 13 namespace Eigen { 14 15 /** \class TensorForcedEval 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief Tensor reshaping class. 19 * 20 * 21 */ 22 namespace internal { 23 template<typename XprType> 24 struct traits<TensorForcedEvalOp<XprType> > 25 { 26 // Type promotion to handle the case where the types of the lhs and the rhs are different. 27 typedef typename XprType::Scalar Scalar; 28 typedef traits<XprType> XprTraits; 29 typedef typename traits<XprType>::StorageKind StorageKind; 30 typedef typename traits<XprType>::Index Index; 31 typedef typename XprType::Nested Nested; 32 typedef typename remove_reference<Nested>::type _Nested; 33 static const int NumDimensions = XprTraits::NumDimensions; 34 static const int Layout = XprTraits::Layout; 35 typedef typename XprTraits::PointerType PointerType; 36 37 enum { 38 Flags = 0 39 }; 40 }; 41 42 template<typename XprType> 43 struct eval<TensorForcedEvalOp<XprType>, Eigen::Dense> 44 { 45 typedef const TensorForcedEvalOp<XprType>& type; 46 }; 47 48 template<typename XprType> 49 struct nested<TensorForcedEvalOp<XprType>, 1, typename eval<TensorForcedEvalOp<XprType> >::type> 50 { 51 typedef TensorForcedEvalOp<XprType> type; 52 }; 53 54 } // end namespace internal 55 56 57 58 template<typename XprType> 59 class TensorForcedEvalOp : public TensorBase<TensorForcedEvalOp<XprType>, ReadOnlyAccessors> 60 { 61 public: 62 typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Scalar Scalar; 63 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 64 typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; 65 typedef typename Eigen::internal::nested<TensorForcedEvalOp>::type Nested; 66 typedef typename Eigen::internal::traits<TensorForcedEvalOp>::StorageKind StorageKind; 67 typedef typename Eigen::internal::traits<TensorForcedEvalOp>::Index Index; 68 69 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorForcedEvalOp(const XprType& expr) 70 : m_xpr(expr) {} 71 72 EIGEN_DEVICE_FUNC 73 const typename internal::remove_all<typename XprType::Nested>::type& 74 expression() const { return m_xpr; } 75 76 protected: 77 typename XprType::Nested m_xpr; 78 }; 79 80 namespace internal { 81 template <typename Device, typename CoeffReturnType> 82 struct non_integral_type_placement_new{ 83 template <typename StorageType> 84 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index numValues, StorageType m_buffer) { 85 // Initialize non-trivially constructible types. 86 if (!internal::is_arithmetic<CoeffReturnType>::value) { 87 for (Index i = 0; i < numValues; ++i) new (m_buffer + i) CoeffReturnType(); 88 } 89 } 90 }; 91 92 // SYCL does not support non-integral types 93 // having new (m_buffer + i) CoeffReturnType() causes the following compiler error for SYCL Devices 94 // no matching function for call to 'operator new' 95 template <typename CoeffReturnType> 96 struct non_integral_type_placement_new<Eigen::SyclDevice, CoeffReturnType> { 97 template <typename StorageType> 98 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index, StorageType) { 99 } 100 }; 101 } // end namespace internal 102 103 template<typename ArgType_, typename Device> 104 struct TensorEvaluator<const TensorForcedEvalOp<ArgType_>, Device> 105 { 106 typedef const typename internal::remove_all<ArgType_>::type ArgType; 107 typedef TensorForcedEvalOp<ArgType> XprType; 108 typedef typename ArgType::Scalar Scalar; 109 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; 110 typedef typename XprType::Index Index; 111 typedef typename XprType::CoeffReturnType CoeffReturnType; 112 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 113 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 114 typedef typename Eigen::internal::traits<XprType>::PointerType TensorPointerType; 115 typedef StorageMemory<CoeffReturnType, Device> Storage; 116 typedef typename Storage::Type EvaluatorPointerType; 117 118 enum { 119 IsAligned = true, 120 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), 121 BlockAccess = internal::is_arithmetic<CoeffReturnType>::value, 122 PreferBlockAccess = false, 123 Layout = TensorEvaluator<ArgType, Device>::Layout, 124 RawAccess = true 125 }; 126 127 static const int NumDims = internal::traits<ArgType>::NumDimensions; 128 129 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 130 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; 131 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; 132 133 typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims, 134 Layout, Index> 135 TensorBlock; 136 //===--------------------------------------------------------------------===// 137 138 TensorEvaluator(const XprType& op, const Device& device) 139 : m_impl(op.expression(), device), m_op(op.expression()), 140 m_device(device), m_buffer(NULL) 141 { } 142 143 EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } 144 145 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { 146 const Index numValues = internal::array_prod(m_impl.dimensions()); 147 m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType))); 148 149 internal::non_integral_type_placement_new<Device, CoeffReturnType>()(numValues, m_buffer); 150 151 typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo; 152 EvalTo evalToTmp(m_device.get(m_buffer), m_op); 153 154 internal::TensorExecutor< 155 const EvalTo, typename internal::remove_const<Device>::type, 156 /*Vectorizable=*/internal::IsVectorizable<Device, const ArgType>::value, 157 /*Tiling=*/internal::IsTileable<Device, const ArgType>::value>:: 158 run(evalToTmp, m_device); 159 160 return true; 161 } 162 163 #ifdef EIGEN_USE_THREADS 164 template <typename EvalSubExprsCallback> 165 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( 166 EvaluatorPointerType, EvalSubExprsCallback done) { 167 const Index numValues = internal::array_prod(m_impl.dimensions()); 168 m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp( 169 numValues * sizeof(CoeffReturnType))); 170 typedef TensorEvalToOp<const typename internal::remove_const<ArgType>::type> 171 EvalTo; 172 EvalTo evalToTmp(m_device.get(m_buffer), m_op); 173 174 auto on_done = std::bind([](EvalSubExprsCallback done_) { done_(true); }, 175 std::move(done)); 176 internal::TensorAsyncExecutor< 177 const EvalTo, typename internal::remove_const<Device>::type, 178 decltype(on_done), 179 /*Vectorizable=*/internal::IsVectorizable<Device, const ArgType>::value, 180 /*Tiling=*/internal::IsTileable<Device, const ArgType>::value>:: 181 runAsync(evalToTmp, m_device, std::move(on_done)); 182 } 183 #endif 184 185 EIGEN_STRONG_INLINE void cleanup() { 186 m_device.deallocate_temp(m_buffer); 187 m_buffer = NULL; 188 } 189 190 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 191 { 192 return m_buffer[index]; 193 } 194 195 template<int LoadMode> 196 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 197 { 198 return internal::ploadt<PacketReturnType, LoadMode>(m_buffer + index); 199 } 200 201 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 202 internal::TensorBlockResourceRequirements getResourceRequirements() const { 203 return internal::TensorBlockResourceRequirements::any(); 204 } 205 206 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock 207 block(TensorBlockDesc& desc, TensorBlockScratch& scratch, 208 bool /*root_of_expr_ast*/ = false) const { 209 assert(m_buffer != NULL); 210 return TensorBlock::materialize(m_buffer, m_impl.dimensions(), desc, scratch); 211 } 212 213 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { 214 return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); 215 } 216 217 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 218 EvaluatorPointerType data() const { return m_buffer; } 219 220 #ifdef EIGEN_USE_SYCL 221 // binding placeholder accessors to a command group handler for SYCL 222 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 223 m_buffer.bind(cgh); 224 m_impl.bind(cgh); 225 } 226 #endif 227 private: 228 TensorEvaluator<ArgType, Device> m_impl; 229 const ArgType m_op; 230 const Device EIGEN_DEVICE_REF m_device; 231 EvaluatorPointerType m_buffer; 232 }; 233 234 235 } // end namespace Eigen 236 237 #endif // EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H 238