1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2014 Benoit Steiner <[email protected]> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H 12 13 namespace Eigen { 14 15 /** \class TensorReshaping 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief Tensor reshaping class. 19 * 20 * 21 */ 22 namespace internal { 23 template<typename NewDimensions, typename XprType> 24 struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType> 25 { 26 typedef typename XprType::Scalar Scalar; 27 typedef traits<XprType> XprTraits; 28 typedef typename XprTraits::StorageKind StorageKind; 29 typedef typename XprTraits::Index Index; 30 typedef typename XprType::Nested Nested; 31 typedef typename remove_reference<Nested>::type _Nested; 32 static const int NumDimensions = array_size<NewDimensions>::value; 33 static const int Layout = XprTraits::Layout; 34 typedef typename XprTraits::PointerType PointerType; 35 }; 36 37 template<typename NewDimensions, typename XprType> 38 struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense> 39 { 40 typedef const TensorReshapingOp<NewDimensions, XprType>EIGEN_DEVICE_REF type; 41 }; 42 43 template<typename NewDimensions, typename XprType> 44 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type> 45 { 46 typedef TensorReshapingOp<NewDimensions, XprType> type; 47 }; 48 49 } // end namespace internal 50 51 52 53 template<typename NewDimensions, typename XprType> 54 class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> 55 { 56 public: 57 typedef TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> Base; 58 typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar; 59 typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; 60 typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested; 61 typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind; 62 typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index; 63 64 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims) 65 : m_xpr(expr), m_dims(dims) {} 66 67 EIGEN_DEVICE_FUNC 68 const NewDimensions& dimensions() const { return m_dims; } 69 70 EIGEN_DEVICE_FUNC 71 const typename internal::remove_all<typename XprType::Nested>::type& 72 expression() const { return m_xpr; } 73 74 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReshapingOp) 75 76 protected: 77 typename XprType::Nested m_xpr; 78 const NewDimensions m_dims; 79 }; 80 81 82 // Eval as rvalue 83 template<typename NewDimensions, typename ArgType, typename Device> 84 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> 85 { 86 typedef TensorReshapingOp<NewDimensions, ArgType> XprType; 87 typedef NewDimensions Dimensions; 88 89 typedef typename XprType::Index Index; 90 typedef typename XprType::Scalar Scalar; 91 typedef typename XprType::CoeffReturnType CoeffReturnType; 92 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 93 typedef StorageMemory<CoeffReturnType, Device> Storage; 94 typedef typename Storage::Type EvaluatorPointerType; 95 typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage; 96 97 static const int NumOutputDims = internal::array_size<Dimensions>::value; 98 static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; 99 100 enum ReshapingKind { 101 // We do not use layout information to determine reshaping kind. 102 // Depending on the layout `N` can be inner or outer dimension. 103 OneByN = 0, // expr.reshape(1, N) 104 NByOne = 1, // expr.reshape(N, 1) 105 Runtime = 2 // Reshape dimensions are dynamic (specified at runtime). 106 }; 107 108 // clang-format off 109 static const ReshapingKind kind = 110 #if defined(EIGEN_HAS_INDEX_LIST) 111 (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/0, /*value=*/1)) ? OneByN 112 : (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/1, /*value=*/1)) ? NByOne 113 : Runtime; 114 #else 115 Runtime; 116 #endif 117 // clang-format on 118 119 enum { 120 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, 121 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 122 // For trivial reshapes with raw access to underlying data we will provide 123 // zero overhead block access. 124 // TODO(ezhulenev): Consider adding block access without raw access? 125 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess && 126 NumInputDims > 0 && NumOutputDims > 0, 127 PreferBlockAccess = false, 128 Layout = TensorEvaluator<ArgType, Device>::Layout, 129 CoordAccess = false, // to be implemented 130 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess 131 }; 132 133 typedef typename internal::remove_const<Scalar>::type ScalarNoConst; 134 135 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 136 typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc; 137 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; 138 139 typedef 140 typename internal::TensorMaterializedBlock<ScalarNoConst, NumOutputDims, 141 Layout, Index> 142 TensorBlock; 143 //===--------------------------------------------------------------------===// 144 145 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 146 : m_impl(op.expression(), device), m_dimensions(op.dimensions()) 147 { 148 // The total size of the reshaped tensor must be equal to the total size 149 // of the input tensor. 150 eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions())); 151 } 152 153 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 154 155 #ifdef EIGEN_USE_THREADS 156 template <typename EvalSubExprsCallback> 157 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( 158 EvaluatorPointerType data, EvalSubExprsCallback done) { 159 m_impl.evalSubExprsIfNeededAsync(data, std::move(done)); 160 } 161 #endif 162 163 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { 164 return m_impl.evalSubExprsIfNeeded(data); 165 } 166 EIGEN_STRONG_INLINE void cleanup() { 167 m_impl.cleanup(); 168 } 169 170 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 171 { 172 return m_impl.coeff(index); 173 } 174 175 template<int LoadMode> 176 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 177 { 178 return m_impl.template packet<LoadMode>(index); 179 } 180 181 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { 182 return m_impl.costPerCoeff(vectorized); 183 } 184 185 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 186 internal::TensorBlockResourceRequirements getResourceRequirements() const { 187 return internal::TensorBlockResourceRequirements::any(); 188 } 189 190 // required in block(OutputTensorBlock* output_block) const 191 // For C++03 compatibility this must be defined outside the method 192 struct BlockIteratorState { 193 Index stride; 194 Index span; 195 Index size; 196 Index count; 197 }; 198 199 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock 200 block(TensorBlockDesc& desc, TensorBlockScratch& scratch, 201 bool /*root_of_expr_ast*/ = false) const { 202 eigen_assert(m_impl.data() != NULL); 203 eigen_assert((kind == Runtime) || 204 (kind == OneByN && desc.dimensions()[0] == 1) || 205 (kind == NByOne && desc.dimensions()[1] == 1)); 206 207 if (kind == OneByN || kind == NByOne) { 208 // We can guarantee at compile time that block is just a contiguous slice 209 // of the underlying expression memory buffer. 210 return TensorBlock(internal::TensorBlockKind::kView, 211 m_impl.data() + desc.offset(), desc.dimensions()); 212 } else { 213 // This will do additional runtime checks, and in the end it might be also 214 // a view, or it might be a block materialized in the temporary buffer. 215 return TensorBlock::materialize(m_impl.data(), m_dimensions, desc, 216 scratch); 217 } 218 } 219 220 EIGEN_DEVICE_FUNC typename Storage::Type data() const { 221 return constCast(m_impl.data()); 222 } 223 224 EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } 225 226 #ifdef EIGEN_USE_SYCL 227 // binding placeholder accessors to a command group handler for SYCL 228 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 229 m_impl.bind(cgh); 230 } 231 #endif 232 protected: 233 TensorEvaluator<ArgType, Device> m_impl; 234 NewDimensions m_dimensions; 235 }; 236 237 238 // Eval as lvalue 239 template<typename NewDimensions, typename ArgType, typename Device> 240 struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device> 241 : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> 242 243 { 244 typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base; 245 typedef TensorReshapingOp<NewDimensions, ArgType> XprType; 246 typedef NewDimensions Dimensions; 247 248 enum { 249 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, 250 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 251 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess, 252 PreferBlockAccess = false, 253 Layout = TensorEvaluator<ArgType, Device>::Layout, 254 CoordAccess = false, // to be implemented 255 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess 256 }; 257 258 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 259 : Base(op, device) 260 { } 261 262 typedef typename XprType::Index Index; 263 typedef typename XprType::Scalar Scalar; 264 typedef typename XprType::CoeffReturnType CoeffReturnType; 265 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 266 267 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 268 typedef internal::TensorBlockDescriptor<TensorEvaluator::NumOutputDims, Index> 269 TensorBlockDesc; 270 //===--------------------------------------------------------------------===// 271 272 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) 273 { 274 return this->m_impl.coeffRef(index); 275 } 276 277 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 278 void writePacket(Index index, const PacketReturnType& x) 279 { 280 this->m_impl.template writePacket<StoreMode>(index, x); 281 } 282 283 template <typename TensorBlock> 284 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( 285 const TensorBlockDesc& desc, const TensorBlock& block) { 286 assert(this->m_impl.data() != NULL); 287 288 typedef typename TensorBlock::XprType TensorBlockExpr; 289 typedef internal::TensorBlockAssignment< 290 Scalar, TensorEvaluator::NumOutputDims, TensorBlockExpr, Index> 291 TensorBlockAssign; 292 293 TensorBlockAssign::Run( 294 TensorBlockAssign::target(desc.dimensions(), 295 internal::strides<Layout>(this->dimensions()), 296 this->m_impl.data(), desc.offset()), 297 block.expr()); 298 } 299 }; 300 301 302 /** \class TensorSlicing 303 * \ingroup CXX11_Tensor_Module 304 * 305 * \brief Tensor slicing class. 306 * 307 * 308 */ 309 namespace internal { 310 template<typename StartIndices, typename Sizes, typename XprType> 311 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType> 312 { 313 typedef typename XprType::Scalar Scalar; 314 typedef traits<XprType> XprTraits; 315 typedef typename XprTraits::StorageKind StorageKind; 316 typedef typename XprTraits::Index Index; 317 typedef typename XprType::Nested Nested; 318 typedef typename remove_reference<Nested>::type _Nested; 319 static const int NumDimensions = array_size<StartIndices>::value; 320 static const int Layout = XprTraits::Layout; 321 typedef typename XprTraits::PointerType PointerType; 322 }; 323 324 template<typename StartIndices, typename Sizes, typename XprType> 325 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense> 326 { 327 typedef const TensorSlicingOp<StartIndices, Sizes, XprType>EIGEN_DEVICE_REF type; 328 }; 329 330 template<typename StartIndices, typename Sizes, typename XprType> 331 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type> 332 { 333 typedef TensorSlicingOp<StartIndices, Sizes, XprType> type; 334 }; 335 336 } // end namespace internal 337 338 339 340 template<typename StartIndices, typename Sizes, typename XprType> 341 class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> > 342 { 343 public: 344 typedef TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> > Base; 345 typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar; 346 typedef typename XprType::CoeffReturnType CoeffReturnType; 347 typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested; 348 typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind; 349 typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index; 350 351 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes) 352 : m_xpr(expr), m_indices(indices), m_sizes(sizes) {} 353 354 EIGEN_DEVICE_FUNC 355 const StartIndices& startIndices() const { return m_indices; } 356 EIGEN_DEVICE_FUNC 357 const Sizes& sizes() const { return m_sizes; } 358 359 EIGEN_DEVICE_FUNC 360 const typename internal::remove_all<typename XprType::Nested>::type& 361 expression() const { return m_xpr; } 362 363 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorSlicingOp) 364 365 protected: 366 typename XprType::Nested m_xpr; 367 const StartIndices m_indices; 368 const Sizes m_sizes; 369 }; 370 371 372 // Fixme: figure out the exact threshold 373 namespace { 374 template <typename Index, typename Device, bool BlockAccess> struct MemcpyTriggerForSlicing { 375 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { } 376 EIGEN_DEVICE_FUNC bool operator ()(Index total, Index contiguous) const { 377 const bool prefer_block_evaluation = BlockAccess && total > 32*1024; 378 return !prefer_block_evaluation && contiguous > threshold_; 379 } 380 381 private: 382 Index threshold_; 383 }; 384 385 // It is very expensive to start the memcpy kernel on GPU: we therefore only 386 // use it for large copies. 387 #ifdef EIGEN_USE_GPU 388 template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index, GpuDevice, BlockAccess> { 389 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { } 390 EIGEN_DEVICE_FUNC bool operator ()(Index, Index contiguous) const { return contiguous > 4*1024*1024; } 391 }; 392 #endif 393 394 // It is very expensive to start the memcpy kernel on GPU: we therefore only 395 // use it for large copies. 396 #ifdef EIGEN_USE_SYCL 397 template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index, Eigen::SyclDevice, BlockAccess> { 398 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { } 399 EIGEN_DEVICE_FUNC bool operator ()(Index, Index contiguous) const { return contiguous > 4*1024*1024; } 400 }; 401 #endif 402 403 } 404 405 // Eval as rvalue 406 template<typename StartIndices, typename Sizes, typename ArgType, typename Device> 407 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> 408 { 409 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType; 410 static const int NumDims = internal::array_size<Sizes>::value; 411 412 typedef typename XprType::Index Index; 413 typedef typename XprType::Scalar Scalar; 414 typedef typename XprType::CoeffReturnType CoeffReturnType; 415 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 416 typedef Sizes Dimensions; 417 typedef StorageMemory<CoeffReturnType, Device> Storage; 418 typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage; 419 typedef typename Storage::Type EvaluatorPointerType; 420 421 enum { 422 // Alignment can't be guaranteed at compile time since it depends on the 423 // slice offsets and sizes. 424 IsAligned = false, 425 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 426 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess && 427 // FIXME: Temporary workaround for bug in slicing of bool tensors. 428 !internal::is_same<typename internal::remove_const<Scalar>::type, bool>::value, 429 PreferBlockAccess = true, 430 Layout = TensorEvaluator<ArgType, Device>::Layout, 431 CoordAccess = false, 432 RawAccess = false 433 }; 434 435 typedef typename internal::remove_const<Scalar>::type ScalarNoConst; 436 437 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 438 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; 439 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; 440 441 // Tensor slicing does not change the block type. 442 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock 443 TensorBlock; 444 //===--------------------------------------------------------------------===// 445 446 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 447 : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) 448 { 449 m_is_identity = true; 450 for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) { 451 eigen_assert(m_impl.dimensions()[i] >= 452 op.sizes()[i] + op.startIndices()[i]); 453 if (m_impl.dimensions()[i] != op.sizes()[i] || 454 op.startIndices()[i] != 0) { 455 m_is_identity = false; 456 } 457 } 458 459 // No strides for scalars. 460 if (NumDims == 0) return; 461 462 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); 463 const Sizes& output_dims = op.sizes(); 464 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 465 m_inputStrides[0] = 1; 466 for (int i = 1; i < NumDims; ++i) { 467 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; 468 } 469 470 // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. 471 m_outputStrides[0] = 1; 472 for (int i = 1; i < NumDims; ++i) { 473 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; 474 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1); 475 } 476 } else { 477 m_inputStrides[NumDims-1] = 1; 478 for (int i = NumDims - 2; i >= 0; --i) { 479 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; 480 } 481 482 // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed. 483 m_outputStrides[NumDims-1] = 1; 484 for (int i = NumDims - 2; i >= 0; --i) { 485 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; 486 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1); 487 } 488 } 489 } 490 491 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 492 493 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) { 494 m_impl.evalSubExprsIfNeeded(NULL); 495 if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization 496 && data && m_impl.data()) { 497 Index contiguous_values = 1; 498 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 499 for (int i = 0; i < NumDims; ++i) { 500 contiguous_values *= dimensions()[i]; 501 if (dimensions()[i] != m_impl.dimensions()[i]) { 502 break; 503 } 504 } 505 } else { 506 for (int i = NumDims-1; i >= 0; --i) { 507 contiguous_values *= dimensions()[i]; 508 if (dimensions()[i] != m_impl.dimensions()[i]) { 509 break; 510 } 511 } 512 } 513 // Use memcpy if it's going to be faster than using the regular evaluation. 514 const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device); 515 if (trigger(internal::array_prod(dimensions()), contiguous_values)) { 516 EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data(); 517 for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) { 518 Index offset = srcCoeff(i); 519 m_device.memcpy((void*)(m_device.get(data + i)), m_device.get(src+offset), contiguous_values * sizeof(Scalar)); 520 } 521 return false; 522 } 523 } 524 return true; 525 } 526 527 #ifdef EIGEN_USE_THREADS 528 template <typename EvalSubExprsCallback> 529 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync( 530 EvaluatorPointerType /*data*/, EvalSubExprsCallback done) { 531 m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); }); 532 } 533 #endif // EIGEN_USE_THREADS 534 535 EIGEN_STRONG_INLINE void cleanup() { 536 m_impl.cleanup(); 537 } 538 539 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 540 { 541 if (m_is_identity) { 542 return m_impl.coeff(index); 543 } else { 544 return m_impl.coeff(srcCoeff(index)); 545 } 546 } 547 548 template<int LoadMode> 549 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 550 { 551 const int packetSize = PacketType<CoeffReturnType, Device>::size; 552 EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 553 eigen_assert(index+packetSize-1 < internal::array_prod(dimensions())); 554 555 if (m_is_identity) { 556 return m_impl.template packet<LoadMode>(index); 557 } 558 559 Index inputIndices[] = {0, 0}; 560 Index indices[] = {index, index + packetSize - 1}; 561 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 562 EIGEN_UNROLL_LOOP 563 for (int i = NumDims - 1; i > 0; --i) { 564 const Index idx0 = indices[0] / m_fastOutputStrides[i]; 565 const Index idx1 = indices[1] / m_fastOutputStrides[i]; 566 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; 567 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; 568 indices[0] -= idx0 * m_outputStrides[i]; 569 indices[1] -= idx1 * m_outputStrides[i]; 570 } 571 inputIndices[0] += (indices[0] + m_offsets[0]); 572 inputIndices[1] += (indices[1] + m_offsets[0]); 573 } else { 574 EIGEN_UNROLL_LOOP 575 for (int i = 0; i < NumDims - 1; ++i) { 576 const Index idx0 = indices[0] / m_fastOutputStrides[i]; 577 const Index idx1 = indices[1] / m_fastOutputStrides[i]; 578 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; 579 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; 580 indices[0] -= idx0 * m_outputStrides[i]; 581 indices[1] -= idx1 * m_outputStrides[i]; 582 } 583 inputIndices[0] += (indices[0] + m_offsets[NumDims-1]); 584 inputIndices[1] += (indices[1] + m_offsets[NumDims-1]); 585 } 586 if (inputIndices[1] - inputIndices[0] == packetSize - 1) { 587 PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); 588 return rslt; 589 } 590 else { 591 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize]; 592 values[0] = m_impl.coeff(inputIndices[0]); 593 values[packetSize-1] = m_impl.coeff(inputIndices[1]); 594 EIGEN_UNROLL_LOOP 595 for (int i = 1; i < packetSize-1; ++i) { 596 values[i] = coeff(index+i); 597 } 598 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 599 return rslt; 600 } 601 } 602 603 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { 604 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims); 605 } 606 607 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 608 internal::TensorBlockResourceRequirements getResourceRequirements() const { 609 const size_t target_size = m_device.lastLevelCacheSize(); 610 return internal::TensorBlockResourceRequirements::merge( 611 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size), 612 m_impl.getResourceRequirements()); 613 } 614 615 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock 616 block(TensorBlockDesc& desc, TensorBlockScratch& scratch, 617 bool /*root_of_expr_ast*/ = false) const { 618 TensorBlockDesc arg_desc = desc.WithOffset(srcCoeff(desc.offset())); 619 TensorBlock block = m_impl.block(arg_desc, scratch); 620 if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer(); 621 return block; 622 } 623 624 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const { 625 typename Storage::Type result = constCast(m_impl.data()); 626 if (result) { 627 Index offset = 0; 628 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 629 for (int i = 0; i < NumDims; ++i) { 630 if (m_dimensions[i] != m_impl.dimensions()[i]) { 631 offset += m_offsets[i] * m_inputStrides[i]; 632 for (int j = i+1; j < NumDims; ++j) { 633 if (m_dimensions[j] > 1) { 634 return NULL; 635 } 636 offset += m_offsets[j] * m_inputStrides[j]; 637 } 638 break; 639 } 640 } 641 } else { 642 for (int i = NumDims - 1; i >= 0; --i) { 643 if (m_dimensions[i] != m_impl.dimensions()[i]) { 644 offset += m_offsets[i] * m_inputStrides[i]; 645 for (int j = i-1; j >= 0; --j) { 646 if (m_dimensions[j] > 1) { 647 return NULL; 648 } 649 offset += m_offsets[j] * m_inputStrides[j]; 650 } 651 break; 652 } 653 } 654 } 655 return result + offset; 656 } 657 return NULL; 658 } 659 #ifdef EIGEN_USE_SYCL 660 // binding placeholder accessors to a command group handler for SYCL 661 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 662 m_impl.bind(cgh); 663 } 664 #endif 665 666 protected: 667 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const 668 { 669 Index inputIndex = 0; 670 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 671 EIGEN_UNROLL_LOOP 672 for (int i = NumDims - 1; i > 0; --i) { 673 const Index idx = index / m_fastOutputStrides[i]; 674 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; 675 index -= idx * m_outputStrides[i]; 676 } 677 inputIndex += (index + m_offsets[0]); 678 } else { 679 EIGEN_UNROLL_LOOP 680 for (int i = 0; i < NumDims - 1; ++i) { 681 const Index idx = index / m_fastOutputStrides[i]; 682 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; 683 index -= idx * m_outputStrides[i]; 684 } 685 inputIndex += (index + m_offsets[NumDims-1]); 686 } 687 return inputIndex; 688 } 689 690 array<Index, NumDims> m_outputStrides; 691 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides; 692 array<Index, NumDims> m_inputStrides; 693 TensorEvaluator<ArgType, Device> m_impl; 694 const Device EIGEN_DEVICE_REF m_device; 695 Dimensions m_dimensions; 696 bool m_is_identity; 697 const StartIndices m_offsets; 698 }; 699 700 701 // Eval as lvalue 702 template<typename StartIndices, typename Sizes, typename ArgType, typename Device> 703 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> 704 : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> 705 { 706 typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base; 707 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType; 708 static const int NumDims = internal::array_size<Sizes>::value; 709 710 typedef typename XprType::Index Index; 711 typedef typename XprType::Scalar Scalar; 712 typedef typename XprType::CoeffReturnType CoeffReturnType; 713 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 714 typedef Sizes Dimensions; 715 716 enum { 717 IsAligned = false, 718 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 719 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess, 720 PreferBlockAccess = true, 721 Layout = TensorEvaluator<ArgType, Device>::Layout, 722 CoordAccess = false, 723 RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess 724 }; 725 726 typedef typename internal::remove_const<Scalar>::type ScalarNoConst; 727 728 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 729 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; 730 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; 731 //===--------------------------------------------------------------------===// 732 733 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 734 : Base(op, device) 735 { } 736 737 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) 738 { 739 if (this->m_is_identity) { 740 return this->m_impl.coeffRef(index); 741 } else { 742 return this->m_impl.coeffRef(this->srcCoeff(index)); 743 } 744 } 745 746 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 747 void writePacket(Index index, const PacketReturnType& x) 748 { 749 if (this->m_is_identity) { 750 this->m_impl.template writePacket<StoreMode>(index, x); 751 return; 752 } 753 754 const int packetSize = PacketType<CoeffReturnType, Device>::size; 755 Index inputIndices[] = {0, 0}; 756 Index indices[] = {index, index + packetSize - 1}; 757 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 758 EIGEN_UNROLL_LOOP 759 for (int i = NumDims - 1; i > 0; --i) { 760 const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; 761 const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; 762 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; 763 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; 764 indices[0] -= idx0 * this->m_outputStrides[i]; 765 indices[1] -= idx1 * this->m_outputStrides[i]; 766 } 767 inputIndices[0] += (indices[0] + this->m_offsets[0]); 768 inputIndices[1] += (indices[1] + this->m_offsets[0]); 769 } else { 770 EIGEN_UNROLL_LOOP 771 for (int i = 0; i < NumDims - 1; ++i) { 772 const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; 773 const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; 774 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; 775 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; 776 indices[0] -= idx0 * this->m_outputStrides[i]; 777 indices[1] -= idx1 * this->m_outputStrides[i]; 778 } 779 inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]); 780 inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]); 781 } 782 if (inputIndices[1] - inputIndices[0] == packetSize - 1) { 783 this->m_impl.template writePacket<StoreMode>(inputIndices[0], x); 784 } 785 else { 786 EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; 787 internal::pstore<CoeffReturnType, PacketReturnType>(values, x); 788 this->m_impl.coeffRef(inputIndices[0]) = values[0]; 789 this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1]; 790 EIGEN_UNROLL_LOOP 791 for (int i = 1; i < packetSize-1; ++i) { 792 this->coeffRef(index+i) = values[i]; 793 } 794 } 795 } 796 797 template<typename TensorBlock> 798 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock( 799 const TensorBlockDesc& desc, const TensorBlock& block) { 800 TensorBlockDesc arg_desc = desc.WithOffset(this->srcCoeff(desc.offset())); 801 this->m_impl.writeBlock(arg_desc, block); 802 } 803 }; 804 805 namespace internal { 806 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> 807 struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType> 808 { 809 typedef typename XprType::Scalar Scalar; 810 typedef traits<XprType> XprTraits; 811 typedef typename XprTraits::StorageKind StorageKind; 812 typedef typename XprTraits::Index Index; 813 typedef typename XprType::Nested Nested; 814 typedef typename remove_reference<Nested>::type _Nested; 815 static const int NumDimensions = array_size<StartIndices>::value; 816 static const int Layout = XprTraits::Layout; 817 typedef typename XprTraits::PointerType PointerType; 818 }; 819 820 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> 821 struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense> 822 { 823 typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>EIGEN_DEVICE_REF type; 824 }; 825 826 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> 827 struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type> 828 { 829 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type; 830 }; 831 832 } // end namespace internal 833 834 835 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType> 836 class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > 837 { 838 public: 839 typedef TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > Base; 840 typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar; 841 typedef typename XprType::CoeffReturnType CoeffReturnType; 842 typedef typename internal::nested<TensorStridingSlicingOp>::type Nested; 843 typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind; 844 typedef typename internal::traits<TensorStridingSlicingOp>::Index Index; 845 846 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp( 847 const XprType& expr, const StartIndices& startIndices, 848 const StopIndices& stopIndices, const Strides& strides) 849 : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices), 850 m_strides(strides) {} 851 852 EIGEN_DEVICE_FUNC 853 const StartIndices& startIndices() const { return m_startIndices; } 854 EIGEN_DEVICE_FUNC 855 const StartIndices& stopIndices() const { return m_stopIndices; } 856 EIGEN_DEVICE_FUNC 857 const StartIndices& strides() const { return m_strides; } 858 859 EIGEN_DEVICE_FUNC 860 const typename internal::remove_all<typename XprType::Nested>::type& 861 expression() const { return m_xpr; } 862 863 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingSlicingOp) 864 865 protected: 866 typename XprType::Nested m_xpr; 867 const StartIndices m_startIndices; 868 const StopIndices m_stopIndices; 869 const Strides m_strides; 870 }; 871 872 // Eval as rvalue 873 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device> 874 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> 875 { 876 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType; 877 static const int NumDims = internal::array_size<Strides>::value; 878 typedef typename XprType::Index Index; 879 typedef typename XprType::Scalar Scalar; 880 typedef typename XprType::CoeffReturnType CoeffReturnType; 881 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 882 typedef StorageMemory<CoeffReturnType, Device> Storage; 883 typedef typename Storage::Type EvaluatorPointerType; 884 typedef Strides Dimensions; 885 886 enum { 887 // Alignment can't be guaranteed at compile time since it depends on the 888 // slice offsets and sizes. 889 IsAligned = false, 890 PacketAccess = false, 891 BlockAccess = false, 892 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess, 893 Layout = TensorEvaluator<ArgType, Device>::Layout, 894 RawAccess = false 895 }; 896 897 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 898 typedef internal::TensorBlockNotImplemented TensorBlock; 899 //===--------------------------------------------------------------------===// 900 901 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 902 : m_impl(op.expression(), device), 903 m_device(device), 904 m_strides(op.strides()) 905 { 906 // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero 907 DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped; 908 for (ptrdiff_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { 909 eigen_assert(m_strides[i] != 0 && "0 stride is invalid"); 910 if (m_strides[i] > 0) { 911 startIndicesClamped[i] = 912 clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]); 913 stopIndicesClamped[i] = 914 clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]); 915 } else { 916 /* implies m_strides[i] < 0 by assert */ 917 startIndicesClamped[i] = 918 clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1); 919 stopIndicesClamped[i] = 920 clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1); 921 } 922 m_startIndices[i] = startIndicesClamped[i]; 923 } 924 925 typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; 926 const InputDimensions& input_dims = m_impl.dimensions(); 927 928 // compute output tensor shape 929 m_is_identity = true; 930 for (int i = 0; i < NumDims; i++) { 931 Index interval = stopIndicesClamped[i] - startIndicesClamped[i]; 932 if (interval == 0 || ((interval < 0) != (m_strides[i] < 0))) { 933 m_dimensions[i] = 0; 934 } else { 935 m_dimensions[i] = 936 (interval / m_strides[i]) + (interval % m_strides[i] != 0 ? 1 : 0); 937 eigen_assert(m_dimensions[i] >= 0); 938 } 939 if (m_strides[i] != 1 || interval != m_impl.dimensions()[i]) { 940 m_is_identity = false; 941 } 942 } 943 944 Strides output_dims = m_dimensions; 945 946 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 947 m_inputStrides[0] = m_strides[0]; 948 m_offsets[0] = startIndicesClamped[0]; 949 Index previousDimProduct = 1; 950 for (int i = 1; i < NumDims; ++i) { 951 previousDimProduct *= input_dims[i-1]; 952 m_inputStrides[i] = previousDimProduct * m_strides[i]; 953 m_offsets[i] = startIndicesClamped[i] * previousDimProduct; 954 } 955 956 // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. 957 m_outputStrides[0] = 1; 958 for (int i = 1; i < NumDims; ++i) { 959 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; 960 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1); 961 } 962 } else { 963 m_inputStrides[NumDims-1] = m_strides[NumDims-1]; 964 m_offsets[NumDims-1] = startIndicesClamped[NumDims-1]; 965 Index previousDimProduct = 1; 966 for (int i = NumDims - 2; i >= 0; --i) { 967 previousDimProduct *= input_dims[i+1]; 968 m_inputStrides[i] = previousDimProduct * m_strides[i]; 969 m_offsets[i] = startIndicesClamped[i] * previousDimProduct; 970 } 971 972 m_outputStrides[NumDims-1] = 1; 973 for (int i = NumDims - 2; i >= 0; --i) { 974 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; 975 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1); 976 } 977 } 978 } 979 980 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 981 982 983 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { 984 m_impl.evalSubExprsIfNeeded(NULL); 985 return true; 986 } 987 988 EIGEN_STRONG_INLINE void cleanup() { 989 m_impl.cleanup(); 990 } 991 992 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 993 { 994 if (m_is_identity) { 995 return m_impl.coeff(index); 996 } else { 997 return m_impl.coeff(srcCoeff(index)); 998 } 999 } 1000 1001 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { 1002 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims); 1003 } 1004 1005 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const { 1006 return NULL; 1007 } 1008 #ifdef EIGEN_USE_SYCL 1009 // binding placeholder accessors to a command group handler for SYCL 1010 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 1011 m_impl.bind(cgh); 1012 } 1013 #endif 1014 protected: 1015 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const 1016 { 1017 Index inputIndex = 0; 1018 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 1019 EIGEN_UNROLL_LOOP 1020 for (int i = NumDims - 1; i >= 0; --i) { 1021 const Index idx = index / m_fastOutputStrides[i]; 1022 inputIndex += idx * m_inputStrides[i] + m_offsets[i]; 1023 index -= idx * m_outputStrides[i]; 1024 } 1025 } else { 1026 EIGEN_UNROLL_LOOP 1027 for (int i = 0; i < NumDims; ++i) { 1028 const Index idx = index / m_fastOutputStrides[i]; 1029 inputIndex += idx * m_inputStrides[i] + m_offsets[i]; 1030 index -= idx * m_outputStrides[i]; 1031 } 1032 } 1033 return inputIndex; 1034 } 1035 1036 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) { 1037 #ifndef SYCL_DEVICE_ONLY 1038 return numext::maxi(min, numext::mini(max,value)); 1039 #else 1040 return cl::sycl::clamp(value, min, max); 1041 #endif 1042 } 1043 1044 array<Index, NumDims> m_outputStrides; 1045 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides; 1046 array<Index, NumDims> m_inputStrides; 1047 bool m_is_identity; 1048 TensorEvaluator<ArgType, Device> m_impl; 1049 const Device EIGEN_DEVICE_REF m_device; 1050 DSizes<Index, NumDims> m_startIndices; // clamped startIndices 1051 DSizes<Index, NumDims> m_dimensions; 1052 DSizes<Index, NumDims> m_offsets; // offset in a flattened shape 1053 const Strides m_strides; 1054 }; 1055 1056 // Eval as lvalue 1057 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device> 1058 struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> 1059 : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> 1060 { 1061 typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base; 1062 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType; 1063 static const int NumDims = internal::array_size<Strides>::value; 1064 1065 enum { 1066 IsAligned = false, 1067 PacketAccess = false, 1068 BlockAccess = false, 1069 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess, 1070 Layout = TensorEvaluator<ArgType, Device>::Layout, 1071 CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess, 1072 RawAccess = false 1073 }; 1074 1075 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 1076 typedef internal::TensorBlockNotImplemented TensorBlock; 1077 //===--------------------------------------------------------------------===// 1078 1079 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 1080 : Base(op, device) 1081 { } 1082 1083 typedef typename XprType::Index Index; 1084 typedef typename XprType::Scalar Scalar; 1085 typedef typename XprType::CoeffReturnType CoeffReturnType; 1086 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 1087 typedef Strides Dimensions; 1088 1089 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) 1090 { 1091 if (this->m_is_identity) { 1092 return this->m_impl.coeffRef(index); 1093 } else { 1094 return this->m_impl.coeffRef(this->srcCoeff(index)); 1095 } 1096 } 1097 }; 1098 1099 1100 } // end namespace Eigen 1101 1102 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H 1103