xref: /aosp_15_r20/external/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h (revision bf2c37156dfe67e5dfebd6d394bad8b2ab5804d4)
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Navdeep Jaitly <[email protected]>
5 //                    Benoit Steiner <[email protected]>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
12 #define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
13 namespace Eigen {
14 
15 /** \class TensorReverse
16   * \ingroup CXX11_Tensor_Module
17   *
18   * \brief Tensor reverse elements class.
19   *
20   */
21 namespace internal {
22 template<typename ReverseDimensions, typename XprType>
23 struct traits<TensorReverseOp<ReverseDimensions,
24                               XprType> > : public traits<XprType>
25 {
26   typedef typename XprType::Scalar Scalar;
27   typedef traits<XprType> XprTraits;
28   typedef typename XprTraits::StorageKind StorageKind;
29   typedef typename XprTraits::Index Index;
30   typedef typename XprType::Nested Nested;
31   typedef typename remove_reference<Nested>::type _Nested;
32   static const int NumDimensions = XprTraits::NumDimensions;
33   static const int Layout = XprTraits::Layout;
34   typedef typename XprTraits::PointerType PointerType;
35 };
36 
37 template<typename ReverseDimensions, typename XprType>
38 struct eval<TensorReverseOp<ReverseDimensions, XprType>, Eigen::Dense>
39 {
40   typedef const TensorReverseOp<ReverseDimensions, XprType>& type;
41 };
42 
43 template<typename ReverseDimensions, typename XprType>
44 struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1,
45             typename eval<TensorReverseOp<ReverseDimensions, XprType> >::type>
46 {
47   typedef TensorReverseOp<ReverseDimensions, XprType> type;
48 };
49 
50 }  // end namespace internal
51 
52 template<typename ReverseDimensions, typename XprType>
53 class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
54                                           XprType>, WriteAccessors>
55 {
56   public:
57     typedef TensorBase<TensorReverseOp<ReverseDimensions, XprType>, WriteAccessors>Base;
58     typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar Scalar;
59     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
60     typedef typename XprType::CoeffReturnType CoeffReturnType;
61     typedef typename Eigen::internal::nested<TensorReverseOp>::type Nested;
62     typedef typename Eigen::internal::traits<TensorReverseOp>::StorageKind
63                                                                       StorageKind;
64     typedef typename Eigen::internal::traits<TensorReverseOp>::Index Index;
65 
66     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp(
67       const XprType& expr, const ReverseDimensions& reverse_dims)
68       : m_xpr(expr), m_reverse_dims(reverse_dims) { }
69 
70     EIGEN_DEVICE_FUNC
71     const ReverseDimensions& reverse() const { return m_reverse_dims; }
72 
73     EIGEN_DEVICE_FUNC
74     const typename internal::remove_all<typename XprType::Nested>::type&
75     expression() const { return m_xpr; }
76 
77     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReverseOp)
78 
79 
80   protected:
81     typename XprType::Nested m_xpr;
82     const ReverseDimensions m_reverse_dims;
83 };
84 
85 // Eval as rvalue
86 template<typename ReverseDimensions, typename ArgType, typename Device>
87 struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device>
88 {
89   typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
90   typedef typename XprType::Index Index;
91   static const int NumDims = internal::array_size<ReverseDimensions>::value;
92   typedef DSizes<Index, NumDims> Dimensions;
93   typedef typename XprType::Scalar Scalar;
94   typedef typename XprType::CoeffReturnType CoeffReturnType;
95   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
96   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
97   typedef StorageMemory<CoeffReturnType, Device> Storage;
98   typedef typename Storage::Type EvaluatorPointerType;
99 
100   enum {
101     IsAligned         = false,
102     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
103     BlockAccess       = NumDims > 0,
104     PreferBlockAccess = true,
105     Layout            = TensorEvaluator<ArgType, Device>::Layout,
106     CoordAccess       = false,  // to be implemented
107     RawAccess         = false
108   };
109 
110   typedef internal::TensorIntDivisor<Index> IndexDivisor;
111 
112   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
113   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
114   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
115 
116   typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
117       ArgTensorBlock;
118 
119   typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
120                                                      Layout, Index>
121       TensorBlock;
122   //===--------------------------------------------------------------------===//
123 
124   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
125       : m_impl(op.expression(), device),
126         m_reverse(op.reverse()),
127         m_device(device)
128   {
129     // Reversing a scalar isn't supported yet. It would be a no-op anyway.
130     EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
131 
132     // Compute strides
133     m_dimensions = m_impl.dimensions();
134     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
135       m_strides[0] = 1;
136       for (int i = 1; i < NumDims; ++i) {
137         m_strides[i] = m_strides[i-1] * m_dimensions[i-1];
138         if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
139       }
140     } else {
141       m_strides[NumDims-1] = 1;
142       for (int i = NumDims - 2; i >= 0; --i) {
143         m_strides[i] = m_strides[i+1] * m_dimensions[i+1];
144         if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
145       }
146     }
147   }
148 
149   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
150   const Dimensions& dimensions() const { return m_dimensions; }
151 
152   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
153     m_impl.evalSubExprsIfNeeded(NULL);
154     return true;
155   }
156 
157 #ifdef EIGEN_USE_THREADS
158   template <typename EvalSubExprsCallback>
159   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
160       EvaluatorPointerType, EvalSubExprsCallback done) {
161     m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
162   }
163 #endif  // EIGEN_USE_THREADS
164 
165   EIGEN_STRONG_INLINE void cleanup() {
166     m_impl.cleanup();
167   }
168 
169   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex(
170       Index index) const {
171     eigen_assert(index < dimensions().TotalSize());
172     Index inputIndex = 0;
173     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
174       EIGEN_UNROLL_LOOP
175       for (int i = NumDims - 1; i > 0; --i) {
176         Index idx = index / m_fastStrides[i];
177         index -= idx * m_strides[i];
178         if (m_reverse[i]) {
179           idx = m_dimensions[i] - idx - 1;
180         }
181         inputIndex += idx * m_strides[i] ;
182       }
183       if (m_reverse[0]) {
184         inputIndex += (m_dimensions[0] - index - 1);
185       } else {
186         inputIndex += index;
187       }
188     } else {
189       EIGEN_UNROLL_LOOP
190       for (int i = 0; i < NumDims - 1; ++i) {
191         Index idx = index / m_fastStrides[i];
192         index -= idx * m_strides[i];
193         if (m_reverse[i]) {
194           idx = m_dimensions[i] - idx - 1;
195         }
196         inputIndex += idx * m_strides[i] ;
197       }
198       if (m_reverse[NumDims-1]) {
199         inputIndex += (m_dimensions[NumDims-1] - index - 1);
200       } else {
201         inputIndex += index;
202       }
203     }
204     return inputIndex;
205   }
206 
207   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
208       Index index) const  {
209     return m_impl.coeff(reverseIndex(index));
210   }
211 
212   template<int LoadMode>
213   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
214   PacketReturnType packet(Index index) const
215   {
216     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
217     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
218 
219     // TODO(ndjaitly): write a better packing routine that uses
220     // local structure.
221     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
222                                                             values[PacketSize];
223     EIGEN_UNROLL_LOOP
224     for (int i = 0; i < PacketSize; ++i) {
225       values[i] = coeff(index+i);
226     }
227     PacketReturnType rslt = internal::pload<PacketReturnType>(values);
228     return rslt;
229   }
230 
231   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
232   internal::TensorBlockResourceRequirements getResourceRequirements() const {
233     const size_t target_size = m_device.lastLevelCacheSize();
234     // Block evaluation reads underlying memory in reverse order, and default
235     // cost model does not properly catch this in bytes stored/loaded.
236     return internal::TensorBlockResourceRequirements::skewed<Scalar>(
237                target_size)
238         .addCostPerCoeff({0, 0, 24});
239   }
240 
241   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
242   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
243           bool /*root_of_expr_ast*/ = false) const {
244     // TODO(ezhulenev): If underlying tensor expression supports and prefers
245     // block evaluation we must use it. Currently we use coeff and packet
246     // access into the underlying tensor expression.
247     // static const bool useBlockAccessForArgType =
248     //     TensorEvaluator<ArgType, Device>::BlockAccess &&
249     //     TensorEvaluator<ArgType, Device>::PreferBlockAccess;
250 
251     static const bool isColMajor =
252         static_cast<int>(Layout) == static_cast<int>(ColMajor);
253 
254     static const Index inner_dim_idx = isColMajor ? 0 : NumDims - 1;
255     const bool inner_dim_reversed = m_reverse[inner_dim_idx];
256 
257     // Offset in the output block.
258     Index block_offset = 0;
259 
260     // Offset in the input Tensor.
261     Index input_offset = reverseIndex(desc.offset());
262 
263     // Initialize output block iterator state. Dimension in this array are
264     // always in inner_most -> outer_most order (col major layout).
265     array<BlockIteratorState, NumDims> it;
266     for (int i = 0; i < NumDims; ++i) {
267       const int dim = isColMajor ? i : NumDims - 1 - i;
268       it[i].size = desc.dimension(dim);
269       it[i].count = 0;
270       it[i].reverse = m_reverse[dim];
271 
272       it[i].block_stride =
273           i == 0 ? 1 : (it[i - 1].size * it[i - 1].block_stride);
274       it[i].block_span = it[i].block_stride * (it[i].size - 1);
275 
276       it[i].input_stride = m_strides[dim];
277       it[i].input_span = it[i].input_stride * (it[i].size - 1);
278 
279       if (it[i].reverse) {
280         it[i].input_stride = -1 * it[i].input_stride;
281         it[i].input_span = -1 * it[i].input_span;
282       }
283     }
284 
285     // If multiple inner dimensions have the same reverse flag, check if we can
286     // merge them into a single virtual inner dimension.
287     int effective_inner_dim = 0;
288     for (int i = 1; i < NumDims; ++i) {
289       if (it[i].reverse != it[effective_inner_dim].reverse) break;
290       if (it[i].block_stride != it[effective_inner_dim].size) break;
291       if (it[i].block_stride != numext::abs(it[i].input_stride)) break;
292 
293       it[i].size = it[effective_inner_dim].size * it[i].size;
294 
295       it[i].block_stride = 1;
296       it[i].input_stride = (inner_dim_reversed ? -1 : 1);
297 
298       it[i].block_span = it[i].block_stride * (it[i].size - 1);
299       it[i].input_span = it[i].input_stride * (it[i].size - 1);
300 
301       effective_inner_dim = i;
302     }
303 
304     eigen_assert(it[effective_inner_dim].block_stride == 1);
305     eigen_assert(it[effective_inner_dim].input_stride ==
306                  (inner_dim_reversed ? -1 : 1));
307 
308     const Index inner_dim_size = it[effective_inner_dim].size;
309 
310     // Prepare storage for the materialized reverse result.
311     const typename TensorBlock::Storage block_storage =
312         TensorBlock::prepareStorage(desc, scratch);
313     CoeffReturnType* block_buffer = block_storage.data();
314 
315     while (it[NumDims - 1].count < it[NumDims - 1].size) {
316       // Copy inner-most dimension data from reversed location in input.
317       Index dst = block_offset;
318       Index src = input_offset;
319 
320       // NOTE(ezhulenev): Adding vectorized path with internal::preverse showed
321       // worse results in benchmarks than a simple coefficient loop.
322       if (inner_dim_reversed) {
323         for (Index i = 0; i < inner_dim_size; ++i) {
324           block_buffer[dst] = m_impl.coeff(src);
325           ++dst;
326           --src;
327         }
328       } else {
329         for (Index i = 0; i < inner_dim_size; ++i) {
330           block_buffer[dst] = m_impl.coeff(src);
331           ++dst;
332           ++src;
333         }
334       }
335 
336       // For the 1d tensor we need to generate only one inner-most dimension.
337       if ((NumDims - effective_inner_dim) == 1) break;
338 
339       // Update offset.
340       for (Index i = effective_inner_dim + 1; i < NumDims; ++i) {
341         if (++it[i].count < it[i].size) {
342           block_offset += it[i].block_stride;
343           input_offset += it[i].input_stride;
344           break;
345         }
346         if (i != NumDims - 1) it[i].count = 0;
347         block_offset -= it[i].block_span;
348         input_offset -= it[i].input_span;
349       }
350     }
351 
352     return block_storage.AsTensorMaterializedBlock();
353   }
354 
355   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
356     double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
357                                      2 * TensorOpCost::MulCost<Index>() +
358                                      TensorOpCost::DivCost<Index>());
359     for (int i = 0; i < NumDims; ++i) {
360       if (m_reverse[i]) {
361         compute_cost += 2 * TensorOpCost::AddCost<Index>();
362       }
363     }
364     return m_impl.costPerCoeff(vectorized) +
365            TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
366   }
367 
368   EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
369 
370 #ifdef EIGEN_USE_SYCL
371   // binding placeholder accessors to a command group handler for SYCL
372   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
373     m_impl.bind(cgh);
374   }
375 #endif
376 
377  protected:
378   Dimensions m_dimensions;
379   array<Index, NumDims> m_strides;
380   array<IndexDivisor, NumDims> m_fastStrides;
381   TensorEvaluator<ArgType, Device> m_impl;
382   ReverseDimensions m_reverse;
383   const Device EIGEN_DEVICE_REF m_device;
384 
385  private:
386   struct BlockIteratorState {
387     BlockIteratorState()
388         : size(0),
389           count(0),
390           reverse(false),
391           block_stride(0),
392           block_span(0),
393           input_stride(0),
394           input_span(0) {}
395 
396     Index size;
397     Index count;
398     bool reverse;
399     Index block_stride;
400     Index block_span;
401     Index input_stride;
402     Index input_span;
403   };
404 };
405 
406 // Eval as lvalue
407 
408 template <typename ReverseDimensions, typename ArgType, typename Device>
409 struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
410     : public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
411                              Device> {
412   typedef TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
413                           Device> Base;
414   typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
415   typedef typename XprType::Index Index;
416   static const int NumDims = internal::array_size<ReverseDimensions>::value;
417   typedef DSizes<Index, NumDims> Dimensions;
418 
419   enum {
420     IsAligned = false,
421     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
422     BlockAccess = false,
423     PreferBlockAccess = false,
424     Layout = TensorEvaluator<ArgType, Device>::Layout,
425     CoordAccess = false,  // to be implemented
426     RawAccess = false
427   };
428   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
429       : Base(op, device) {}
430 
431   typedef typename XprType::Scalar Scalar;
432   typedef typename XprType::CoeffReturnType CoeffReturnType;
433   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
434   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
435 
436   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
437   typedef internal::TensorBlockNotImplemented TensorBlock;
438   //===--------------------------------------------------------------------===//
439 
440   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
441   const Dimensions& dimensions() const { return this->m_dimensions; }
442 
443   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
444     return this->m_impl.coeffRef(this->reverseIndex(index));
445   }
446 
447   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
448   void writePacket(Index index, const PacketReturnType& x) {
449     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
450     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
451 
452     // This code is pilfered from TensorMorphing.h
453     EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
454     internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
455     EIGEN_UNROLL_LOOP
456     for (int i = 0; i < PacketSize; ++i) {
457       this->coeffRef(index+i) = values[i];
458     }
459   }
460 };
461 
462 
463 }  // end namespace Eigen
464 
465 #endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
466