xref: /aosp_15_r20/external/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h (revision bf2c37156dfe67e5dfebd6d394bad8b2ab5804d4)
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <[email protected]>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
12 
13 namespace Eigen {
14 
15 /** \class TensorReshaping
16   * \ingroup CXX11_Tensor_Module
17   *
18   * \brief Tensor reshaping class.
19   *
20   *
21   */
22 namespace internal {
23 template<typename NewDimensions, typename XprType>
24 struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
25 {
26   typedef typename XprType::Scalar Scalar;
27   typedef traits<XprType> XprTraits;
28   typedef typename XprTraits::StorageKind StorageKind;
29   typedef typename XprTraits::Index Index;
30   typedef typename XprType::Nested Nested;
31   typedef typename remove_reference<Nested>::type _Nested;
32   static const int NumDimensions = array_size<NewDimensions>::value;
33   static const int Layout = XprTraits::Layout;
34   typedef typename XprTraits::PointerType PointerType;
35 };
36 
37 template<typename NewDimensions, typename XprType>
38 struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
39 {
40   typedef const TensorReshapingOp<NewDimensions, XprType>EIGEN_DEVICE_REF type;
41 };
42 
43 template<typename NewDimensions, typename XprType>
44 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
45 {
46   typedef TensorReshapingOp<NewDimensions, XprType> type;
47 };
48 
49 }  // end namespace internal
50 
51 
52 
53 template<typename NewDimensions, typename XprType>
54 class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
55 {
56   public:
57   typedef TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> Base;
58   typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
59   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
60   typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
61   typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
62   typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
63 
64   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
65       : m_xpr(expr), m_dims(dims) {}
66 
67     EIGEN_DEVICE_FUNC
68     const NewDimensions& dimensions() const { return m_dims; }
69 
70     EIGEN_DEVICE_FUNC
71     const typename internal::remove_all<typename XprType::Nested>::type&
72     expression() const { return m_xpr; }
73 
74     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReshapingOp)
75 
76   protected:
77     typename XprType::Nested m_xpr;
78     const NewDimensions m_dims;
79 };
80 
81 
82 // Eval as rvalue
83 template<typename NewDimensions, typename ArgType, typename Device>
84 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
85 {
86   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
87   typedef NewDimensions Dimensions;
88 
89   typedef typename XprType::Index Index;
90   typedef typename XprType::Scalar Scalar;
91   typedef typename XprType::CoeffReturnType CoeffReturnType;
92   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
93   typedef StorageMemory<CoeffReturnType, Device> Storage;
94   typedef typename Storage::Type EvaluatorPointerType;
95   typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
96 
97   static const int NumOutputDims = internal::array_size<Dimensions>::value;
98   static const int NumInputDims  = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
99 
100   enum ReshapingKind {
101     // We do not use layout information to determine reshaping kind.
102     // Depending on the layout `N` can be inner or outer dimension.
103     OneByN = 0,  // expr.reshape(1, N)
104     NByOne = 1,  // expr.reshape(N, 1)
105     Runtime = 2  // Reshape dimensions are dynamic (specified at runtime).
106   };
107 
108   // clang-format off
109   static const ReshapingKind kind =
110 #if defined(EIGEN_HAS_INDEX_LIST)
111         (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/0, /*value=*/1)) ? OneByN
112       : (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/1, /*value=*/1)) ? NByOne
113       : Runtime;
114 #else
115         Runtime;
116 #endif
117   // clang-format on
118 
119   enum {
120     IsAligned         = TensorEvaluator<ArgType, Device>::IsAligned,
121     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
122     // For trivial reshapes with raw access to underlying data we will provide
123     // zero overhead block access.
124     // TODO(ezhulenev): Consider adding block access without raw access?
125     BlockAccess       = TensorEvaluator<ArgType, Device>::RawAccess &&
126                         NumInputDims > 0 && NumOutputDims > 0,
127     PreferBlockAccess = false,
128     Layout            = TensorEvaluator<ArgType, Device>::Layout,
129     CoordAccess       = false,  // to be implemented
130     RawAccess         = TensorEvaluator<ArgType, Device>::RawAccess
131   };
132 
133   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
134 
135   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
136   typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc;
137   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
138 
139   typedef
140       typename internal::TensorMaterializedBlock<ScalarNoConst, NumOutputDims,
141                                                  Layout, Index>
142           TensorBlock;
143   //===--------------------------------------------------------------------===//
144 
145   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
146       : m_impl(op.expression(), device), m_dimensions(op.dimensions())
147   {
148     // The total size of the reshaped tensor must be equal to the total size
149     // of the input tensor.
150     eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
151   }
152 
153   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
154 
155 #ifdef EIGEN_USE_THREADS
156   template <typename EvalSubExprsCallback>
157   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
158       EvaluatorPointerType data, EvalSubExprsCallback done) {
159     m_impl.evalSubExprsIfNeededAsync(data, std::move(done));
160   }
161 #endif
162 
163   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
164     return m_impl.evalSubExprsIfNeeded(data);
165   }
166   EIGEN_STRONG_INLINE void cleanup() {
167     m_impl.cleanup();
168   }
169 
170   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
171   {
172     return m_impl.coeff(index);
173   }
174 
175   template<int LoadMode>
176   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
177   {
178     return m_impl.template packet<LoadMode>(index);
179   }
180 
181   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
182     return m_impl.costPerCoeff(vectorized);
183   }
184 
185   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
186   internal::TensorBlockResourceRequirements getResourceRequirements() const {
187     return internal::TensorBlockResourceRequirements::any();
188   }
189 
190   // required in block(OutputTensorBlock* output_block) const
191   // For C++03 compatibility this must be defined outside the method
192   struct BlockIteratorState {
193     Index stride;
194     Index span;
195     Index size;
196     Index count;
197   };
198 
199   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
200   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
201           bool /*root_of_expr_ast*/ = false) const {
202     eigen_assert(m_impl.data() != NULL);
203     eigen_assert((kind == Runtime) ||
204                  (kind == OneByN && desc.dimensions()[0] == 1) ||
205                  (kind == NByOne && desc.dimensions()[1] == 1));
206 
207     if (kind == OneByN || kind == NByOne) {
208       // We can guarantee at compile time that block is just a contiguous slice
209       // of the underlying expression memory buffer.
210       return TensorBlock(internal::TensorBlockKind::kView,
211                            m_impl.data() + desc.offset(), desc.dimensions());
212     } else {
213       // This will do additional runtime checks, and in the end it might be also
214       // a view, or it might be a block materialized in the temporary buffer.
215       return TensorBlock::materialize(m_impl.data(), m_dimensions, desc,
216                                         scratch);
217     }
218   }
219 
220   EIGEN_DEVICE_FUNC typename Storage::Type data() const {
221     return constCast(m_impl.data());
222   }
223 
224   EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
225 
226   #ifdef EIGEN_USE_SYCL
227   // binding placeholder accessors to a command group handler for SYCL
228   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
229     m_impl.bind(cgh);
230   }
231   #endif
232  protected:
233   TensorEvaluator<ArgType, Device> m_impl;
234   NewDimensions m_dimensions;
235 };
236 
237 
238 // Eval as lvalue
239 template<typename NewDimensions, typename ArgType, typename Device>
240   struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
241   : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
242 
243 {
244   typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
245   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
246   typedef NewDimensions Dimensions;
247 
248   enum {
249     IsAligned         = TensorEvaluator<ArgType, Device>::IsAligned,
250     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
251     BlockAccess       = TensorEvaluator<ArgType, Device>::RawAccess,
252     PreferBlockAccess = false,
253     Layout            = TensorEvaluator<ArgType, Device>::Layout,
254     CoordAccess       = false,  // to be implemented
255     RawAccess         = TensorEvaluator<ArgType, Device>::RawAccess
256   };
257 
258   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
259     : Base(op, device)
260   { }
261 
262   typedef typename XprType::Index Index;
263   typedef typename XprType::Scalar Scalar;
264   typedef typename XprType::CoeffReturnType CoeffReturnType;
265   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
266 
267   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
268   typedef internal::TensorBlockDescriptor<TensorEvaluator::NumOutputDims, Index>
269       TensorBlockDesc;
270   //===--------------------------------------------------------------------===//
271 
272   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
273   {
274     return this->m_impl.coeffRef(index);
275   }
276 
277   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
278   void writePacket(Index index, const PacketReturnType& x)
279   {
280     this->m_impl.template writePacket<StoreMode>(index, x);
281   }
282 
283   template <typename TensorBlock>
284   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
285       const TensorBlockDesc& desc, const TensorBlock& block) {
286     assert(this->m_impl.data() != NULL);
287 
288     typedef typename TensorBlock::XprType TensorBlockExpr;
289     typedef internal::TensorBlockAssignment<
290         Scalar, TensorEvaluator::NumOutputDims, TensorBlockExpr, Index>
291         TensorBlockAssign;
292 
293     TensorBlockAssign::Run(
294         TensorBlockAssign::target(desc.dimensions(),
295                                   internal::strides<Layout>(this->dimensions()),
296                                   this->m_impl.data(), desc.offset()),
297         block.expr());
298   }
299 };
300 
301 
302 /** \class TensorSlicing
303   * \ingroup CXX11_Tensor_Module
304   *
305   * \brief Tensor slicing class.
306   *
307   *
308   */
309 namespace internal {
310 template<typename StartIndices, typename Sizes, typename XprType>
311 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
312 {
313   typedef typename XprType::Scalar Scalar;
314   typedef traits<XprType> XprTraits;
315   typedef typename XprTraits::StorageKind StorageKind;
316   typedef typename XprTraits::Index Index;
317   typedef typename XprType::Nested Nested;
318   typedef typename remove_reference<Nested>::type _Nested;
319   static const int NumDimensions = array_size<StartIndices>::value;
320   static const int Layout = XprTraits::Layout;
321   typedef typename XprTraits::PointerType PointerType;
322 };
323 
324 template<typename StartIndices, typename Sizes, typename XprType>
325 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
326 {
327   typedef const TensorSlicingOp<StartIndices, Sizes, XprType>EIGEN_DEVICE_REF type;
328 };
329 
330 template<typename StartIndices, typename Sizes, typename XprType>
331 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
332 {
333   typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
334 };
335 
336 }  // end namespace internal
337 
338 
339 
340 template<typename StartIndices, typename Sizes, typename XprType>
341 class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
342 {
343   public:
344   typedef TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> > Base;
345   typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
346   typedef typename XprType::CoeffReturnType CoeffReturnType;
347   typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
348   typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
349   typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
350 
351   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
352       : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
353 
354     EIGEN_DEVICE_FUNC
355     const StartIndices& startIndices() const { return m_indices; }
356     EIGEN_DEVICE_FUNC
357     const Sizes& sizes() const { return m_sizes; }
358 
359     EIGEN_DEVICE_FUNC
360     const typename internal::remove_all<typename XprType::Nested>::type&
361     expression() const { return m_xpr; }
362 
363     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorSlicingOp)
364 
365   protected:
366     typename XprType::Nested m_xpr;
367     const StartIndices m_indices;
368     const Sizes m_sizes;
369 };
370 
371 
372 // Fixme: figure out the exact threshold
373 namespace {
374 template <typename Index, typename Device, bool BlockAccess> struct MemcpyTriggerForSlicing {
375   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
376   EIGEN_DEVICE_FUNC bool operator ()(Index total, Index contiguous) const {
377     const bool prefer_block_evaluation = BlockAccess && total > 32*1024;
378     return !prefer_block_evaluation && contiguous > threshold_;
379   }
380 
381  private:
382   Index threshold_;
383 };
384 
385 // It is very expensive to start the memcpy kernel on GPU: we therefore only
386 // use it for large copies.
387 #ifdef EIGEN_USE_GPU
388 template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index, GpuDevice, BlockAccess>  {
389   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
390   EIGEN_DEVICE_FUNC bool operator ()(Index, Index contiguous) const { return contiguous > 4*1024*1024; }
391 };
392 #endif
393 
394 // It is very expensive to start the memcpy kernel on GPU: we therefore only
395 // use it for large copies.
396 #ifdef EIGEN_USE_SYCL
397 template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index, Eigen::SyclDevice, BlockAccess>  {
398   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { }
399   EIGEN_DEVICE_FUNC bool operator ()(Index, Index contiguous) const { return contiguous > 4*1024*1024; }
400 };
401 #endif
402 
403 }
404 
405 // Eval as rvalue
406 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
407 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
408 {
409   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
410   static const int NumDims = internal::array_size<Sizes>::value;
411 
412   typedef typename XprType::Index Index;
413   typedef typename XprType::Scalar Scalar;
414   typedef typename XprType::CoeffReturnType CoeffReturnType;
415   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
416   typedef Sizes Dimensions;
417   typedef StorageMemory<CoeffReturnType, Device> Storage;
418   typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
419   typedef typename Storage::Type EvaluatorPointerType;
420 
421   enum {
422     // Alignment can't be guaranteed at compile time since it depends on the
423     // slice offsets and sizes.
424     IsAligned         = false,
425     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
426     BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess &&
427                         // FIXME: Temporary workaround for bug in slicing of bool tensors.
428                         !internal::is_same<typename internal::remove_const<Scalar>::type, bool>::value,
429     PreferBlockAccess = true,
430     Layout            = TensorEvaluator<ArgType, Device>::Layout,
431     CoordAccess       = false,
432     RawAccess         = false
433   };
434 
435   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
436 
437   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
438   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
439   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
440 
441   // Tensor slicing does not change the block type.
442   typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
443       TensorBlock;
444   //===--------------------------------------------------------------------===//
445 
446   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
447       : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
448   {
449     m_is_identity = true;
450     for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
451       eigen_assert(m_impl.dimensions()[i] >=
452                    op.sizes()[i] + op.startIndices()[i]);
453       if (m_impl.dimensions()[i] != op.sizes()[i] ||
454           op.startIndices()[i] != 0) {
455         m_is_identity = false;
456       }
457     }
458 
459     // No strides for scalars.
460     if (NumDims == 0) return;
461 
462     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
463     const Sizes& output_dims = op.sizes();
464     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
465       m_inputStrides[0] = 1;
466       for (int i = 1; i < NumDims; ++i) {
467         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
468       }
469 
470      // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
471       m_outputStrides[0] = 1;
472       for (int i = 1; i < NumDims; ++i) {
473         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
474         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
475       }
476     } else {
477       m_inputStrides[NumDims-1] = 1;
478       for (int i = NumDims - 2; i >= 0; --i) {
479         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
480       }
481 
482      // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
483       m_outputStrides[NumDims-1] = 1;
484       for (int i = NumDims - 2; i >= 0; --i) {
485         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
486         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
487       }
488     }
489   }
490 
491   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
492 
493   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
494     m_impl.evalSubExprsIfNeeded(NULL);
495     if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization
496         && data && m_impl.data()) {
497       Index contiguous_values = 1;
498       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
499         for (int i = 0; i < NumDims; ++i) {
500           contiguous_values *= dimensions()[i];
501           if (dimensions()[i] != m_impl.dimensions()[i]) {
502             break;
503           }
504         }
505       } else {
506         for (int i = NumDims-1; i >= 0; --i) {
507           contiguous_values *= dimensions()[i];
508           if (dimensions()[i] != m_impl.dimensions()[i]) {
509             break;
510           }
511         }
512       }
513       // Use memcpy if it's going to be faster than using the regular evaluation.
514       const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
515       if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
516         EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
517         for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
518           Index offset = srcCoeff(i);
519           m_device.memcpy((void*)(m_device.get(data + i)), m_device.get(src+offset), contiguous_values * sizeof(Scalar));
520         }
521         return false;
522       }
523     }
524     return true;
525   }
526 
527 #ifdef EIGEN_USE_THREADS
528   template <typename EvalSubExprsCallback>
529   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
530       EvaluatorPointerType /*data*/, EvalSubExprsCallback done) {
531     m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
532   }
533 #endif  // EIGEN_USE_THREADS
534 
535   EIGEN_STRONG_INLINE void cleanup() {
536     m_impl.cleanup();
537   }
538 
539   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
540   {
541     if (m_is_identity) {
542       return m_impl.coeff(index);
543     } else {
544       return m_impl.coeff(srcCoeff(index));
545     }
546   }
547 
548   template<int LoadMode>
549   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
550   {
551     const int packetSize = PacketType<CoeffReturnType, Device>::size;
552     EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
553     eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
554 
555     if (m_is_identity) {
556       return m_impl.template packet<LoadMode>(index);
557     }
558 
559     Index inputIndices[] = {0, 0};
560     Index indices[] = {index, index + packetSize - 1};
561     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
562       EIGEN_UNROLL_LOOP
563       for (int i = NumDims - 1; i > 0; --i) {
564         const Index idx0 = indices[0] / m_fastOutputStrides[i];
565         const Index idx1 = indices[1] / m_fastOutputStrides[i];
566         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
567         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
568         indices[0] -= idx0 * m_outputStrides[i];
569         indices[1] -= idx1 * m_outputStrides[i];
570       }
571       inputIndices[0] += (indices[0] + m_offsets[0]);
572       inputIndices[1] += (indices[1] + m_offsets[0]);
573     } else {
574       EIGEN_UNROLL_LOOP
575       for (int i = 0; i < NumDims - 1; ++i) {
576         const Index idx0 = indices[0] / m_fastOutputStrides[i];
577         const Index idx1 = indices[1] / m_fastOutputStrides[i];
578         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
579         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
580         indices[0] -= idx0 * m_outputStrides[i];
581         indices[1] -= idx1 * m_outputStrides[i];
582       }
583       inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
584       inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
585     }
586     if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
587       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
588       return rslt;
589     }
590     else {
591       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
592       values[0] = m_impl.coeff(inputIndices[0]);
593       values[packetSize-1] = m_impl.coeff(inputIndices[1]);
594       EIGEN_UNROLL_LOOP
595       for (int i = 1; i < packetSize-1; ++i) {
596         values[i] = coeff(index+i);
597       }
598       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
599       return rslt;
600     }
601   }
602 
603   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
604     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
605   }
606 
607   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
608   internal::TensorBlockResourceRequirements getResourceRequirements() const {
609     const size_t target_size = m_device.lastLevelCacheSize();
610     return internal::TensorBlockResourceRequirements::merge(
611         internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
612         m_impl.getResourceRequirements());
613   }
614 
615   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
616   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
617           bool /*root_of_expr_ast*/ = false) const {
618     TensorBlockDesc arg_desc = desc.WithOffset(srcCoeff(desc.offset()));
619     TensorBlock block = m_impl.block(arg_desc, scratch);
620     if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
621     return block;
622   }
623 
624   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
625     typename Storage::Type result = constCast(m_impl.data());
626     if (result) {
627       Index offset = 0;
628       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
629         for (int i = 0; i < NumDims; ++i) {
630           if (m_dimensions[i] != m_impl.dimensions()[i]) {
631             offset += m_offsets[i] * m_inputStrides[i];
632             for (int j = i+1; j < NumDims; ++j) {
633               if (m_dimensions[j] > 1) {
634                 return NULL;
635               }
636               offset += m_offsets[j] * m_inputStrides[j];
637             }
638             break;
639           }
640         }
641       } else {
642         for (int i = NumDims - 1; i >= 0; --i) {
643           if (m_dimensions[i] != m_impl.dimensions()[i]) {
644             offset += m_offsets[i] * m_inputStrides[i];
645             for (int j = i-1; j >= 0; --j) {
646               if (m_dimensions[j] > 1) {
647                 return NULL;
648               }
649               offset += m_offsets[j] * m_inputStrides[j];
650             }
651             break;
652           }
653         }
654       }
655       return result + offset;
656     }
657     return NULL;
658   }
659 #ifdef EIGEN_USE_SYCL
660   // binding placeholder accessors to a command group handler for SYCL
661   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
662     m_impl.bind(cgh);
663   }
664 #endif
665 
666  protected:
667   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
668   {
669     Index inputIndex = 0;
670     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
671       EIGEN_UNROLL_LOOP
672       for (int i = NumDims - 1; i > 0; --i) {
673         const Index idx = index / m_fastOutputStrides[i];
674         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
675         index -= idx * m_outputStrides[i];
676       }
677       inputIndex += (index + m_offsets[0]);
678     } else {
679       EIGEN_UNROLL_LOOP
680       for (int i = 0; i < NumDims - 1; ++i) {
681         const Index idx = index / m_fastOutputStrides[i];
682         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
683         index -= idx * m_outputStrides[i];
684       }
685       inputIndex += (index + m_offsets[NumDims-1]);
686     }
687     return inputIndex;
688   }
689 
690   array<Index, NumDims> m_outputStrides;
691   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
692   array<Index, NumDims> m_inputStrides;
693   TensorEvaluator<ArgType, Device> m_impl;
694   const Device EIGEN_DEVICE_REF m_device;
695   Dimensions m_dimensions;
696   bool m_is_identity;
697   const StartIndices m_offsets;
698 };
699 
700 
701 // Eval as lvalue
702 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
703 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
704   : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
705 {
706   typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
707   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
708   static const int NumDims = internal::array_size<Sizes>::value;
709 
710   typedef typename XprType::Index Index;
711   typedef typename XprType::Scalar Scalar;
712   typedef typename XprType::CoeffReturnType CoeffReturnType;
713   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
714   typedef Sizes Dimensions;
715 
716   enum {
717     IsAligned         = false,
718     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
719     BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess,
720     PreferBlockAccess = true,
721     Layout            = TensorEvaluator<ArgType, Device>::Layout,
722     CoordAccess       = false,
723     RawAccess         = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
724   };
725 
726   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
727 
728   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
729   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
730   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
731   //===--------------------------------------------------------------------===//
732 
733   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
734     : Base(op, device)
735     { }
736 
737   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
738   {
739     if (this->m_is_identity) {
740       return this->m_impl.coeffRef(index);
741     } else {
742       return this->m_impl.coeffRef(this->srcCoeff(index));
743     }
744   }
745 
746   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
747   void writePacket(Index index, const PacketReturnType& x)
748   {
749     if (this->m_is_identity) {
750       this->m_impl.template writePacket<StoreMode>(index, x);
751       return;
752     }
753 
754     const int packetSize = PacketType<CoeffReturnType, Device>::size;
755     Index inputIndices[] = {0, 0};
756     Index indices[] = {index, index + packetSize - 1};
757     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
758       EIGEN_UNROLL_LOOP
759       for (int i = NumDims - 1; i > 0; --i) {
760         const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
761         const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
762         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
763         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
764         indices[0] -= idx0 * this->m_outputStrides[i];
765         indices[1] -= idx1 * this->m_outputStrides[i];
766       }
767       inputIndices[0] += (indices[0] + this->m_offsets[0]);
768       inputIndices[1] += (indices[1] + this->m_offsets[0]);
769     } else {
770       EIGEN_UNROLL_LOOP
771       for (int i = 0; i < NumDims - 1; ++i) {
772         const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
773         const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
774         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
775         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
776         indices[0] -= idx0 * this->m_outputStrides[i];
777         indices[1] -= idx1 * this->m_outputStrides[i];
778       }
779       inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
780       inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
781     }
782     if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
783       this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
784     }
785     else {
786       EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
787       internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
788       this->m_impl.coeffRef(inputIndices[0]) = values[0];
789       this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
790       EIGEN_UNROLL_LOOP
791       for (int i = 1; i < packetSize-1; ++i) {
792         this->coeffRef(index+i) = values[i];
793       }
794     }
795   }
796 
797   template<typename TensorBlock>
798   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
799       const TensorBlockDesc& desc, const TensorBlock& block) {
800     TensorBlockDesc arg_desc = desc.WithOffset(this->srcCoeff(desc.offset()));
801     this->m_impl.writeBlock(arg_desc, block);
802   }
803 };
804 
805 namespace internal {
806 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
807 struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType>
808 {
809   typedef typename XprType::Scalar Scalar;
810   typedef traits<XprType> XprTraits;
811   typedef typename XprTraits::StorageKind StorageKind;
812   typedef typename XprTraits::Index Index;
813   typedef typename XprType::Nested Nested;
814   typedef typename remove_reference<Nested>::type _Nested;
815   static const int NumDimensions = array_size<StartIndices>::value;
816   static const int Layout = XprTraits::Layout;
817   typedef typename XprTraits::PointerType PointerType;
818 };
819 
820 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
821 struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
822 {
823   typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>EIGEN_DEVICE_REF type;
824 };
825 
826 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
827 struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
828 {
829   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
830 };
831 
832 }  // end namespace internal
833 
834 
835 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
836 class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
837 {
838   public:
839   typedef TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > Base;
840   typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
841   typedef typename XprType::CoeffReturnType CoeffReturnType;
842   typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
843   typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
844   typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
845 
846   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
847     const XprType& expr, const StartIndices& startIndices,
848     const StopIndices& stopIndices, const Strides& strides)
849       : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
850         m_strides(strides) {}
851 
852     EIGEN_DEVICE_FUNC
853     const StartIndices& startIndices() const { return m_startIndices; }
854     EIGEN_DEVICE_FUNC
855     const StartIndices& stopIndices() const { return m_stopIndices; }
856     EIGEN_DEVICE_FUNC
857     const StartIndices& strides() const { return m_strides; }
858 
859     EIGEN_DEVICE_FUNC
860     const typename internal::remove_all<typename XprType::Nested>::type&
861     expression() const { return m_xpr; }
862 
863     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingSlicingOp)
864 
865   protected:
866     typename XprType::Nested m_xpr;
867     const StartIndices m_startIndices;
868     const StopIndices m_stopIndices;
869     const Strides m_strides;
870 };
871 
872 // Eval as rvalue
873 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
874 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
875 {
876   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
877   static const int NumDims = internal::array_size<Strides>::value;
878   typedef typename XprType::Index Index;
879   typedef typename XprType::Scalar Scalar;
880   typedef typename XprType::CoeffReturnType CoeffReturnType;
881   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
882   typedef StorageMemory<CoeffReturnType, Device> Storage;
883   typedef typename Storage::Type EvaluatorPointerType;
884   typedef Strides Dimensions;
885 
886   enum {
887     // Alignment can't be guaranteed at compile time since it depends on the
888     // slice offsets and sizes.
889     IsAligned = false,
890     PacketAccess = false,
891     BlockAccess = false,
892     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
893     Layout = TensorEvaluator<ArgType, Device>::Layout,
894     RawAccess = false
895   };
896 
897   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
898   typedef internal::TensorBlockNotImplemented TensorBlock;
899   //===--------------------------------------------------------------------===//
900 
901   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
902       : m_impl(op.expression(), device),
903         m_device(device),
904         m_strides(op.strides())
905   {
906     // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
907     DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
908     for (ptrdiff_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
909       eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
910       if (m_strides[i] > 0) {
911         startIndicesClamped[i] =
912             clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
913         stopIndicesClamped[i] =
914             clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
915       } else {
916         /* implies m_strides[i] < 0 by assert */
917         startIndicesClamped[i] =
918             clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
919         stopIndicesClamped[i] =
920             clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
921       }
922       m_startIndices[i] = startIndicesClamped[i];
923     }
924 
925     typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
926     const InputDimensions& input_dims = m_impl.dimensions();
927 
928     // compute output tensor shape
929     m_is_identity = true;
930     for (int i = 0; i < NumDims; i++) {
931       Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
932       if (interval == 0 || ((interval < 0) != (m_strides[i] < 0))) {
933         m_dimensions[i] = 0;
934       } else {
935         m_dimensions[i] =
936             (interval / m_strides[i]) + (interval % m_strides[i] != 0 ? 1 : 0);
937         eigen_assert(m_dimensions[i] >= 0);
938       }
939       if (m_strides[i] != 1 || interval != m_impl.dimensions()[i]) {
940         m_is_identity = false;
941       }
942     }
943 
944     Strides output_dims = m_dimensions;
945 
946     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
947       m_inputStrides[0] = m_strides[0];
948       m_offsets[0] = startIndicesClamped[0];
949       Index previousDimProduct = 1;
950       for (int i = 1; i < NumDims; ++i) {
951         previousDimProduct *= input_dims[i-1];
952         m_inputStrides[i] = previousDimProduct * m_strides[i];
953         m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
954       }
955 
956       // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
957       m_outputStrides[0] = 1;
958       for (int i = 1; i < NumDims; ++i) {
959         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
960         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
961       }
962     } else {
963       m_inputStrides[NumDims-1] = m_strides[NumDims-1];
964       m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
965       Index previousDimProduct = 1;
966       for (int i = NumDims - 2; i >= 0; --i) {
967         previousDimProduct *= input_dims[i+1];
968         m_inputStrides[i] = previousDimProduct * m_strides[i];
969         m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
970       }
971 
972       m_outputStrides[NumDims-1] = 1;
973       for (int i = NumDims - 2; i >= 0; --i) {
974         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
975         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
976       }
977     }
978   }
979 
980   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
981 
982 
983   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
984     m_impl.evalSubExprsIfNeeded(NULL);
985     return true;
986   }
987 
988   EIGEN_STRONG_INLINE void cleanup() {
989     m_impl.cleanup();
990   }
991 
992   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
993   {
994     if (m_is_identity) {
995       return m_impl.coeff(index);
996     } else {
997       return m_impl.coeff(srcCoeff(index));
998     }
999   }
1000 
1001   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
1002     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
1003   }
1004 
1005   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
1006     return NULL;
1007   }
1008 #ifdef EIGEN_USE_SYCL
1009   // binding placeholder accessors to a command group handler for SYCL
1010   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
1011     m_impl.bind(cgh);
1012   }
1013 #endif
1014  protected:
1015   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
1016   {
1017     Index inputIndex = 0;
1018     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
1019       EIGEN_UNROLL_LOOP
1020       for (int i = NumDims - 1; i >= 0; --i) {
1021         const Index idx = index / m_fastOutputStrides[i];
1022         inputIndex += idx * m_inputStrides[i] + m_offsets[i];
1023         index -= idx * m_outputStrides[i];
1024       }
1025     } else {
1026       EIGEN_UNROLL_LOOP
1027       for (int i = 0; i < NumDims; ++i) {
1028         const Index idx = index / m_fastOutputStrides[i];
1029         inputIndex += idx * m_inputStrides[i] + m_offsets[i];
1030         index -= idx * m_outputStrides[i];
1031       }
1032     }
1033     return inputIndex;
1034   }
1035 
1036   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
1037 #ifndef SYCL_DEVICE_ONLY
1038     return numext::maxi(min, numext::mini(max,value));
1039 #else
1040     return cl::sycl::clamp(value, min, max);
1041 #endif
1042   }
1043 
1044   array<Index, NumDims> m_outputStrides;
1045   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
1046   array<Index, NumDims> m_inputStrides;
1047   bool m_is_identity;
1048   TensorEvaluator<ArgType, Device> m_impl;
1049   const Device EIGEN_DEVICE_REF m_device;
1050   DSizes<Index, NumDims> m_startIndices; // clamped startIndices
1051   DSizes<Index, NumDims> m_dimensions;
1052   DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
1053   const Strides m_strides;
1054 };
1055 
1056 // Eval as lvalue
1057 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
1058 struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
1059   : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
1060 {
1061   typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
1062   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
1063   static const int NumDims = internal::array_size<Strides>::value;
1064 
1065   enum {
1066     IsAligned = false,
1067     PacketAccess = false,
1068     BlockAccess = false,
1069     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
1070     Layout = TensorEvaluator<ArgType, Device>::Layout,
1071     CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
1072     RawAccess = false
1073   };
1074 
1075   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
1076   typedef internal::TensorBlockNotImplemented TensorBlock;
1077   //===--------------------------------------------------------------------===//
1078 
1079   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
1080     : Base(op, device)
1081     { }
1082 
1083   typedef typename XprType::Index Index;
1084   typedef typename XprType::Scalar Scalar;
1085   typedef typename XprType::CoeffReturnType CoeffReturnType;
1086   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
1087   typedef Strides Dimensions;
1088 
1089   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
1090   {
1091     if (this->m_is_identity) {
1092       return this->m_impl.coeffRef(index);
1093     } else {
1094       return this->m_impl.coeffRef(this->srcCoeff(index));
1095     }
1096   }
1097 };
1098 
1099 
1100 } // end namespace Eigen
1101 
1102 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
1103