xref: /aosp_15_r20/external/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h (revision bf2c37156dfe67e5dfebd6d394bad8b2ab5804d4)
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <[email protected]>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
12 
13 namespace Eigen {
14 
15 /** \class TensorStriding
16   * \ingroup CXX11_Tensor_Module
17   *
18   * \brief Tensor striding class.
19   *
20   *
21   */
22 namespace internal {
23 template<typename Strides, typename XprType>
24 struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType>
25 {
26   typedef typename XprType::Scalar Scalar;
27   typedef traits<XprType> XprTraits;
28   typedef typename XprTraits::StorageKind StorageKind;
29   typedef typename XprTraits::Index Index;
30   typedef typename XprType::Nested Nested;
31   typedef typename remove_reference<Nested>::type _Nested;
32   static const int NumDimensions = XprTraits::NumDimensions;
33   static const int Layout = XprTraits::Layout;
34   typedef typename XprTraits::PointerType PointerType;
35 };
36 
37 template<typename Strides, typename XprType>
38 struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense>
39 {
40   typedef const TensorStridingOp<Strides, XprType>EIGEN_DEVICE_REF type;
41 };
42 
43 template<typename Strides, typename XprType>
44 struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type>
45 {
46   typedef TensorStridingOp<Strides, XprType> type;
47 };
48 
49 }  // end namespace internal
50 
51 
52 
53 template<typename Strides, typename XprType>
54 class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> >
55 {
56   public:
57     typedef TensorBase<TensorStridingOp<Strides, XprType> > Base;
58     typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar;
59     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
60     typedef typename XprType::CoeffReturnType CoeffReturnType;
61     typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested;
62     typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind;
63     typedef typename Eigen::internal::traits<TensorStridingOp>::Index Index;
64 
65     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType& expr, const Strides& dims)
66       : m_xpr(expr), m_dims(dims) {}
67 
68     EIGEN_DEVICE_FUNC
69     const Strides& strides() const { return m_dims; }
70 
71     EIGEN_DEVICE_FUNC
72     const typename internal::remove_all<typename XprType::Nested>::type&
73     expression() const { return m_xpr; }
74 
75     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingOp)
76 
77   protected:
78     typename XprType::Nested m_xpr;
79     const Strides m_dims;
80 };
81 
82 
83 // Eval as rvalue
84 template<typename Strides, typename ArgType, typename Device>
85 struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
86 {
87   typedef TensorStridingOp<Strides, ArgType> XprType;
88   typedef typename XprType::Index Index;
89   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
90   typedef DSizes<Index, NumDims> Dimensions;
91   typedef typename XprType::Scalar Scalar;
92   typedef typename XprType::CoeffReturnType CoeffReturnType;
93   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
94   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
95   typedef StorageMemory<CoeffReturnType, Device> Storage;
96   typedef typename Storage::Type EvaluatorPointerType;
97 
98   enum {
99     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
100     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
101     BlockAccess = false,
102     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
103     Layout = TensorEvaluator<ArgType, Device>::Layout,
104     CoordAccess = false,  // to be implemented
105     RawAccess = false
106   };
107 
108   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
109   typedef internal::TensorBlockNotImplemented TensorBlock;
110   //===--------------------------------------------------------------------===//
111 
112   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
113       : m_impl(op.expression(), device)
114   {
115     m_dimensions = m_impl.dimensions();
116     for (int i = 0; i < NumDims; ++i) {
117       m_dimensions[i] =Eigen::numext::ceil(static_cast<float>(m_dimensions[i]) / op.strides()[i]);
118     }
119 
120     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
121     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
122       m_outputStrides[0] = 1;
123       m_inputStrides[0] = 1;
124       for (int i = 1; i < NumDims; ++i) {
125         m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
126         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
127         m_inputStrides[i-1] *= op.strides()[i-1];
128       }
129       m_inputStrides[NumDims-1] *= op.strides()[NumDims-1];
130     } else {  // RowMajor
131       m_outputStrides[NumDims-1] = 1;
132       m_inputStrides[NumDims-1] = 1;
133       for (int i = NumDims - 2; i >= 0; --i) {
134         m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
135         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
136         m_inputStrides[i+1] *= op.strides()[i+1];
137       }
138       m_inputStrides[0] *= op.strides()[0];
139     }
140   }
141 
142 
143   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
144 
145   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType/*data*/) {
146     m_impl.evalSubExprsIfNeeded(NULL);
147     return true;
148   }
149   EIGEN_STRONG_INLINE void cleanup() {
150     m_impl.cleanup();
151   }
152 
153   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
154   {
155     return m_impl.coeff(srcCoeff(index));
156   }
157 
158   template<int LoadMode>
159   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
160   {
161     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
162     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
163 
164     Index inputIndices[] = {0, 0};
165     Index indices[] = {index, index + PacketSize - 1};
166     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
167       EIGEN_UNROLL_LOOP
168       for (int i = NumDims - 1; i > 0; --i) {
169         const Index idx0 = indices[0] / m_outputStrides[i];
170         const Index idx1 = indices[1] / m_outputStrides[i];
171         inputIndices[0] += idx0 * m_inputStrides[i];
172         inputIndices[1] += idx1 * m_inputStrides[i];
173         indices[0] -= idx0 * m_outputStrides[i];
174         indices[1] -= idx1 * m_outputStrides[i];
175       }
176       inputIndices[0] += indices[0] * m_inputStrides[0];
177       inputIndices[1] += indices[1] * m_inputStrides[0];
178     } else {  // RowMajor
179       EIGEN_UNROLL_LOOP
180       for (int i = 0; i < NumDims - 1; ++i) {
181         const Index idx0 = indices[0] / m_outputStrides[i];
182         const Index idx1 = indices[1] / m_outputStrides[i];
183         inputIndices[0] += idx0 * m_inputStrides[i];
184         inputIndices[1] += idx1 * m_inputStrides[i];
185         indices[0] -= idx0 * m_outputStrides[i];
186         indices[1] -= idx1 * m_outputStrides[i];
187       }
188       inputIndices[0] += indices[0] * m_inputStrides[NumDims-1];
189       inputIndices[1] += indices[1] * m_inputStrides[NumDims-1];
190     }
191     if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
192       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
193       return rslt;
194     }
195     else {
196       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
197       values[0] = m_impl.coeff(inputIndices[0]);
198       values[PacketSize-1] = m_impl.coeff(inputIndices[1]);
199       EIGEN_UNROLL_LOOP
200       for (int i = 1; i < PacketSize-1; ++i) {
201         values[i] = coeff(index+i);
202       }
203       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
204       return rslt;
205     }
206   }
207 
208   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
209     double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() +
210                                            TensorOpCost::MulCost<Index>() +
211                                            TensorOpCost::DivCost<Index>()) +
212         TensorOpCost::MulCost<Index>();
213     if (vectorized) {
214       compute_cost *= 2;  // packet() computes two indices
215     }
216     const int innerDim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : (NumDims - 1);
217     return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) +
218         // Computation is not vectorized per se, but it is done once per packet.
219         TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
220   }
221 
222   EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
223 
224 #ifdef EIGEN_USE_SYCL
225   // binding placeholder accessors to a command group handler for SYCL
226   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
227     m_impl.bind(cgh);
228   }
229 #endif
230  protected:
231   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
232   {
233     Index inputIndex = 0;
234     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
235       EIGEN_UNROLL_LOOP
236       for (int i = NumDims - 1; i > 0; --i) {
237         const Index idx = index / m_outputStrides[i];
238         inputIndex += idx * m_inputStrides[i];
239         index -= idx * m_outputStrides[i];
240       }
241       inputIndex += index * m_inputStrides[0];
242     } else {  // RowMajor
243       EIGEN_UNROLL_LOOP
244       for (int i = 0; i < NumDims - 1; ++i) {
245         const Index idx = index / m_outputStrides[i];
246         inputIndex += idx * m_inputStrides[i];
247         index -= idx * m_outputStrides[i];
248       }
249       inputIndex += index * m_inputStrides[NumDims-1];
250     }
251     return inputIndex;
252   }
253 
254   Dimensions m_dimensions;
255   array<Index, NumDims> m_outputStrides;
256   array<Index, NumDims> m_inputStrides;
257   TensorEvaluator<ArgType, Device> m_impl;
258 };
259 
260 // Eval as lvalue
261 template<typename Strides, typename ArgType, typename Device>
262 struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device>
263     : public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device>
264 {
265   typedef TensorStridingOp<Strides, ArgType> XprType;
266   typedef TensorEvaluator<const XprType, Device> Base;
267   //  typedef typename XprType::Index Index;
268   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
269   //  typedef DSizes<Index, NumDims> Dimensions;
270 
271   enum {
272     IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
273     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
274     PreferBlockAccess = false,
275     Layout = TensorEvaluator<ArgType, Device>::Layout,
276     CoordAccess = false,  // to be implemented
277     RawAccess = false
278   };
279 
280   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
281       : Base(op, device) { }
282 
283   typedef typename XprType::Index Index;
284   typedef typename XprType::Scalar Scalar;
285   typedef typename XprType::CoeffReturnType CoeffReturnType;
286   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
287   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
288 
289   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
290   {
291     return this->m_impl.coeffRef(this->srcCoeff(index));
292   }
293 
294   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
295   void writePacket(Index index, const PacketReturnType& x)
296   {
297     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
298     eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize());
299 
300     Index inputIndices[] = {0, 0};
301     Index indices[] = {index, index + PacketSize - 1};
302     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
303       EIGEN_UNROLL_LOOP
304       for (int i = NumDims - 1; i > 0; --i) {
305         const Index idx0 = indices[0] / this->m_outputStrides[i];
306         const Index idx1 = indices[1] / this->m_outputStrides[i];
307         inputIndices[0] += idx0 * this->m_inputStrides[i];
308         inputIndices[1] += idx1 * this->m_inputStrides[i];
309         indices[0] -= idx0 * this->m_outputStrides[i];
310         indices[1] -= idx1 * this->m_outputStrides[i];
311       }
312       inputIndices[0] += indices[0] * this->m_inputStrides[0];
313       inputIndices[1] += indices[1] * this->m_inputStrides[0];
314     } else {  // RowMajor
315       EIGEN_UNROLL_LOOP
316       for (int i = 0; i < NumDims - 1; ++i) {
317         const Index idx0 = indices[0] / this->m_outputStrides[i];
318         const Index idx1 = indices[1] / this->m_outputStrides[i];
319         inputIndices[0] += idx0 * this->m_inputStrides[i];
320         inputIndices[1] += idx1 * this->m_inputStrides[i];
321         indices[0] -= idx0 * this->m_outputStrides[i];
322         indices[1] -= idx1 * this->m_outputStrides[i];
323       }
324       inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1];
325       inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1];
326     }
327     if (inputIndices[1] - inputIndices[0] == PacketSize - 1) {
328       this->m_impl.template writePacket<Unaligned>(inputIndices[0], x);
329     }
330     else {
331       EIGEN_ALIGN_MAX Scalar values[PacketSize];
332       internal::pstore<Scalar, PacketReturnType>(values, x);
333       this->m_impl.coeffRef(inputIndices[0]) = values[0];
334       this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1];
335       EIGEN_UNROLL_LOOP
336       for (int i = 1; i < PacketSize-1; ++i) {
337         this->coeffRef(index+i) = values[i];
338       }
339     }
340   }
341 };
342 
343 
344 } // end namespace Eigen
345 
346 #endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H
347