1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2014 Benoit Steiner <[email protected]> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H 12 13 14 namespace Eigen { 15 namespace internal { 16 17 enum { 18 ShardByRow = 0, 19 ShardByCol = 1 20 }; 21 22 23 // Default Blocking Strategy 24 template<typename ResScalar, typename LhsScalar, typename RhsScalar, typename StorageIndex, int ShardingType = ShardByCol> 25 class TensorContractionBlocking { 26 public: 27 28 /* 29 adding EIGEN_DEVICE_FUNC unconditionally to 'TensorContractionBlocking' constructor in `TensorContractionBlocking.h` 30 requires adding EIGEN_DEVICE_FUNC to `computeProductBlockingSizes` in `GeneralBlockPanelKernel.h` 31 which in turn, requires adding EIGEN_DEVICE_FUNC to `evaluateProductBlockingSizesHeuristic` in `GeneralBlockPanelKernel.h` 32 which in turn, requires adding EIGEN_DEVICE_FUNC to `manage_caching_sizes` in `GeneralBlockPanelKernel.h` 33 (else HIPCC will error out) 34 35 However adding EIGEN_DEVICE_FUNC to `manage_caching_sizes` in `GeneralBlockPanelKernel.h` 36 results in NVCC erroring out with the following error 37 38 ../Eigen/src/Core/products/GeneralBlockPanelKernel.h(57): error #2901: 39 dynamic initialization is not supported for function-scope static variables within a __device__/__global__ function 40 */ 41 42 #if !defined(EIGEN_HIPCC) 43 EIGEN_DEVICE_FUNC 44 #endif 45 TensorContractionBlocking(StorageIndex k, StorageIndex m, StorageIndex n, StorageIndex num_threads = 1) : kc_(k)46 kc_(k), mc_(m), nc_(n) 47 { 48 if (ShardingType == ShardByCol) { 49 computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, mc_, nc_, num_threads); 50 } 51 else { 52 computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, nc_, mc_, num_threads); 53 } 54 55 const int rhs_packet_size = internal::packet_traits<RhsScalar>::size; 56 kc_ = (rhs_packet_size <= 8 || kc_ <= rhs_packet_size) ? 57 kc_ : (kc_ / rhs_packet_size) * rhs_packet_size; 58 } 59 kc()60 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex kc() const { return kc_; } mc()61 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex mc() const { return mc_; } nc()62 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex nc() const { return nc_; } 63 64 private: 65 StorageIndex kc_; 66 StorageIndex mc_; 67 StorageIndex nc_; 68 }; 69 70 } // end namespace internal 71 } // end namespace Eigen 72 73 #endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H 74