1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2015 Benoit Steiner <[email protected]> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H 12 13 namespace Eigen { 14 15 /** \class TensorGeneratorOp 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief Tensor generator class. 19 * 20 * 21 */ 22 namespace internal { 23 template<typename Generator, typename XprType> 24 struct traits<TensorGeneratorOp<Generator, XprType> > : public traits<XprType> 25 { 26 typedef typename XprType::Scalar Scalar; 27 typedef traits<XprType> XprTraits; 28 typedef typename XprTraits::StorageKind StorageKind; 29 typedef typename XprTraits::Index Index; 30 typedef typename XprType::Nested Nested; 31 typedef typename remove_reference<Nested>::type _Nested; 32 static const int NumDimensions = XprTraits::NumDimensions; 33 static const int Layout = XprTraits::Layout; 34 typedef typename XprTraits::PointerType PointerType; 35 }; 36 37 template<typename Generator, typename XprType> 38 struct eval<TensorGeneratorOp<Generator, XprType>, Eigen::Dense> 39 { 40 typedef const TensorGeneratorOp<Generator, XprType>& type; 41 }; 42 43 template<typename Generator, typename XprType> 44 struct nested<TensorGeneratorOp<Generator, XprType>, 1, typename eval<TensorGeneratorOp<Generator, XprType> >::type> 45 { 46 typedef TensorGeneratorOp<Generator, XprType> type; 47 }; 48 49 } // end namespace internal 50 51 52 53 template<typename Generator, typename XprType> 54 class TensorGeneratorOp : public TensorBase<TensorGeneratorOp<Generator, XprType>, ReadOnlyAccessors> 55 { 56 public: 57 typedef typename Eigen::internal::traits<TensorGeneratorOp>::Scalar Scalar; 58 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 59 typedef typename XprType::CoeffReturnType CoeffReturnType; 60 typedef typename Eigen::internal::nested<TensorGeneratorOp>::type Nested; 61 typedef typename Eigen::internal::traits<TensorGeneratorOp>::StorageKind StorageKind; 62 typedef typename Eigen::internal::traits<TensorGeneratorOp>::Index Index; 63 64 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorGeneratorOp(const XprType& expr, const Generator& generator) 65 : m_xpr(expr), m_generator(generator) {} 66 67 EIGEN_DEVICE_FUNC 68 const Generator& generator() const { return m_generator; } 69 70 EIGEN_DEVICE_FUNC 71 const typename internal::remove_all<typename XprType::Nested>::type& 72 expression() const { return m_xpr; } 73 74 protected: 75 typename XprType::Nested m_xpr; 76 const Generator m_generator; 77 }; 78 79 80 // Eval as rvalue 81 template<typename Generator, typename ArgType, typename Device> 82 struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device> 83 { 84 typedef TensorGeneratorOp<Generator, ArgType> XprType; 85 typedef typename XprType::Index Index; 86 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; 87 static const int NumDims = internal::array_size<Dimensions>::value; 88 typedef typename XprType::Scalar Scalar; 89 typedef typename XprType::CoeffReturnType CoeffReturnType; 90 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 91 typedef StorageMemory<CoeffReturnType, Device> Storage; 92 typedef typename Storage::Type EvaluatorPointerType; 93 enum { 94 IsAligned = false, 95 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1), 96 BlockAccess = true, 97 PreferBlockAccess = true, 98 Layout = TensorEvaluator<ArgType, Device>::Layout, 99 CoordAccess = false, // to be implemented 100 RawAccess = false 101 }; 102 103 typedef internal::TensorIntDivisor<Index> IndexDivisor; 104 105 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 106 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc; 107 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch; 108 109 typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims, 110 Layout, Index> 111 TensorBlock; 112 //===--------------------------------------------------------------------===// 113 114 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 115 : m_device(device), m_generator(op.generator()) 116 { 117 TensorEvaluator<ArgType, Device> argImpl(op.expression(), device); 118 m_dimensions = argImpl.dimensions(); 119 120 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 121 m_strides[0] = 1; 122 EIGEN_UNROLL_LOOP 123 for (int i = 1; i < NumDims; ++i) { 124 m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; 125 if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]); 126 } 127 } else { 128 m_strides[NumDims - 1] = 1; 129 EIGEN_UNROLL_LOOP 130 for (int i = NumDims - 2; i >= 0; --i) { 131 m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; 132 if (m_strides[i] != 0) m_fast_strides[i] = IndexDivisor(m_strides[i]); 133 } 134 } 135 } 136 137 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 138 139 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType /*data*/) { 140 return true; 141 } 142 EIGEN_STRONG_INLINE void cleanup() { 143 } 144 145 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 146 { 147 array<Index, NumDims> coords; 148 extract_coordinates(index, coords); 149 return m_generator(coords); 150 } 151 152 template<int LoadMode> 153 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 154 { 155 const int packetSize = PacketType<CoeffReturnType, Device>::size; 156 EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 157 eigen_assert(index+packetSize-1 < dimensions().TotalSize()); 158 159 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize]; 160 for (int i = 0; i < packetSize; ++i) { 161 values[i] = coeff(index+i); 162 } 163 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 164 return rslt; 165 } 166 167 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 168 internal::TensorBlockResourceRequirements getResourceRequirements() const { 169 const size_t target_size = m_device.firstLevelCacheSize(); 170 // TODO(ezhulenev): Generator should have a cost. 171 return internal::TensorBlockResourceRequirements::skewed<Scalar>( 172 target_size); 173 } 174 175 struct BlockIteratorState { 176 Index stride; 177 Index span; 178 Index size; 179 Index count; 180 }; 181 182 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock 183 block(TensorBlockDesc& desc, TensorBlockScratch& scratch, 184 bool /*root_of_expr_ast*/ = false) const { 185 static const bool is_col_major = 186 static_cast<int>(Layout) == static_cast<int>(ColMajor); 187 188 // Compute spatial coordinates for the first block element. 189 array<Index, NumDims> coords; 190 extract_coordinates(desc.offset(), coords); 191 array<Index, NumDims> initial_coords = coords; 192 193 // Offset in the output block buffer. 194 Index offset = 0; 195 196 // Initialize output block iterator state. Dimension in this array are 197 // always in inner_most -> outer_most order (col major layout). 198 array<BlockIteratorState, NumDims> it; 199 for (int i = 0; i < NumDims; ++i) { 200 const int dim = is_col_major ? i : NumDims - 1 - i; 201 it[i].size = desc.dimension(dim); 202 it[i].stride = i == 0 ? 1 : (it[i - 1].size * it[i - 1].stride); 203 it[i].span = it[i].stride * (it[i].size - 1); 204 it[i].count = 0; 205 } 206 eigen_assert(it[0].stride == 1); 207 208 // Prepare storage for the materialized generator result. 209 const typename TensorBlock::Storage block_storage = 210 TensorBlock::prepareStorage(desc, scratch); 211 212 CoeffReturnType* block_buffer = block_storage.data(); 213 214 static const int packet_size = PacketType<CoeffReturnType, Device>::size; 215 216 static const int inner_dim = is_col_major ? 0 : NumDims - 1; 217 const Index inner_dim_size = it[0].size; 218 const Index inner_dim_vectorized = inner_dim_size - packet_size; 219 220 while (it[NumDims - 1].count < it[NumDims - 1].size) { 221 Index i = 0; 222 // Generate data for the vectorized part of the inner-most dimension. 223 for (; i <= inner_dim_vectorized; i += packet_size) { 224 for (Index j = 0; j < packet_size; ++j) { 225 array<Index, NumDims> j_coords = coords; // Break loop dependence. 226 j_coords[inner_dim] += j; 227 *(block_buffer + offset + i + j) = m_generator(j_coords); 228 } 229 coords[inner_dim] += packet_size; 230 } 231 // Finalize non-vectorized part of the inner-most dimension. 232 for (; i < inner_dim_size; ++i) { 233 *(block_buffer + offset + i) = m_generator(coords); 234 coords[inner_dim]++; 235 } 236 coords[inner_dim] = initial_coords[inner_dim]; 237 238 // For the 1d tensor we need to generate only one inner-most dimension. 239 if (NumDims == 1) break; 240 241 // Update offset. 242 for (i = 1; i < NumDims; ++i) { 243 if (++it[i].count < it[i].size) { 244 offset += it[i].stride; 245 coords[is_col_major ? i : NumDims - 1 - i]++; 246 break; 247 } 248 if (i != NumDims - 1) it[i].count = 0; 249 coords[is_col_major ? i : NumDims - 1 - i] = 250 initial_coords[is_col_major ? i : NumDims - 1 - i]; 251 offset -= it[i].span; 252 } 253 } 254 255 return block_storage.AsTensorMaterializedBlock(); 256 } 257 258 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 259 costPerCoeff(bool) const { 260 // TODO(rmlarsen): This is just a placeholder. Define interface to make 261 // generators return their cost. 262 return TensorOpCost(0, 0, TensorOpCost::AddCost<Scalar>() + 263 TensorOpCost::MulCost<Scalar>()); 264 } 265 266 EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; } 267 268 #ifdef EIGEN_USE_SYCL 269 // binding placeholder accessors to a command group handler for SYCL 270 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler&) const {} 271 #endif 272 273 protected: 274 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 275 void extract_coordinates(Index index, array<Index, NumDims>& coords) const { 276 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 277 for (int i = NumDims - 1; i > 0; --i) { 278 const Index idx = index / m_fast_strides[i]; 279 index -= idx * m_strides[i]; 280 coords[i] = idx; 281 } 282 coords[0] = index; 283 } else { 284 for (int i = 0; i < NumDims - 1; ++i) { 285 const Index idx = index / m_fast_strides[i]; 286 index -= idx * m_strides[i]; 287 coords[i] = idx; 288 } 289 coords[NumDims-1] = index; 290 } 291 } 292 293 const Device EIGEN_DEVICE_REF m_device; 294 Dimensions m_dimensions; 295 array<Index, NumDims> m_strides; 296 array<IndexDivisor, NumDims> m_fast_strides; 297 Generator m_generator; 298 }; 299 300 } // end namespace Eigen 301 302 #endif // EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H 303