1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2014 Benoit Steiner <[email protected]> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H 12 13 namespace Eigen { 14 15 /** \class TensorStriding 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief Tensor striding class. 19 * 20 * 21 */ 22 namespace internal { 23 template<typename Strides, typename XprType> 24 struct traits<TensorStridingOp<Strides, XprType> > : public traits<XprType> 25 { 26 typedef typename XprType::Scalar Scalar; 27 typedef traits<XprType> XprTraits; 28 typedef typename XprTraits::StorageKind StorageKind; 29 typedef typename XprTraits::Index Index; 30 typedef typename XprType::Nested Nested; 31 typedef typename remove_reference<Nested>::type _Nested; 32 static const int NumDimensions = XprTraits::NumDimensions; 33 static const int Layout = XprTraits::Layout; 34 typedef typename XprTraits::PointerType PointerType; 35 }; 36 37 template<typename Strides, typename XprType> 38 struct eval<TensorStridingOp<Strides, XprType>, Eigen::Dense> 39 { 40 typedef const TensorStridingOp<Strides, XprType>EIGEN_DEVICE_REF type; 41 }; 42 43 template<typename Strides, typename XprType> 44 struct nested<TensorStridingOp<Strides, XprType>, 1, typename eval<TensorStridingOp<Strides, XprType> >::type> 45 { 46 typedef TensorStridingOp<Strides, XprType> type; 47 }; 48 49 } // end namespace internal 50 51 52 53 template<typename Strides, typename XprType> 54 class TensorStridingOp : public TensorBase<TensorStridingOp<Strides, XprType> > 55 { 56 public: 57 typedef TensorBase<TensorStridingOp<Strides, XprType> > Base; 58 typedef typename Eigen::internal::traits<TensorStridingOp>::Scalar Scalar; 59 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 60 typedef typename XprType::CoeffReturnType CoeffReturnType; 61 typedef typename Eigen::internal::nested<TensorStridingOp>::type Nested; 62 typedef typename Eigen::internal::traits<TensorStridingOp>::StorageKind StorageKind; 63 typedef typename Eigen::internal::traits<TensorStridingOp>::Index Index; 64 65 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingOp(const XprType& expr, const Strides& dims) 66 : m_xpr(expr), m_dims(dims) {} 67 68 EIGEN_DEVICE_FUNC 69 const Strides& strides() const { return m_dims; } 70 71 EIGEN_DEVICE_FUNC 72 const typename internal::remove_all<typename XprType::Nested>::type& 73 expression() const { return m_xpr; } 74 75 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingOp) 76 77 protected: 78 typename XprType::Nested m_xpr; 79 const Strides m_dims; 80 }; 81 82 83 // Eval as rvalue 84 template<typename Strides, typename ArgType, typename Device> 85 struct TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> 86 { 87 typedef TensorStridingOp<Strides, ArgType> XprType; 88 typedef typename XprType::Index Index; 89 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; 90 typedef DSizes<Index, NumDims> Dimensions; 91 typedef typename XprType::Scalar Scalar; 92 typedef typename XprType::CoeffReturnType CoeffReturnType; 93 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 94 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 95 typedef StorageMemory<CoeffReturnType, Device> Storage; 96 typedef typename Storage::Type EvaluatorPointerType; 97 98 enum { 99 IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, 100 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 101 BlockAccess = false, 102 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess, 103 Layout = TensorEvaluator<ArgType, Device>::Layout, 104 CoordAccess = false, // to be implemented 105 RawAccess = false 106 }; 107 108 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===// 109 typedef internal::TensorBlockNotImplemented TensorBlock; 110 //===--------------------------------------------------------------------===// 111 112 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 113 : m_impl(op.expression(), device) 114 { 115 m_dimensions = m_impl.dimensions(); 116 for (int i = 0; i < NumDims; ++i) { 117 m_dimensions[i] =Eigen::numext::ceil(static_cast<float>(m_dimensions[i]) / op.strides()[i]); 118 } 119 120 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); 121 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 122 m_outputStrides[0] = 1; 123 m_inputStrides[0] = 1; 124 for (int i = 1; i < NumDims; ++i) { 125 m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; 126 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; 127 m_inputStrides[i-1] *= op.strides()[i-1]; 128 } 129 m_inputStrides[NumDims-1] *= op.strides()[NumDims-1]; 130 } else { // RowMajor 131 m_outputStrides[NumDims-1] = 1; 132 m_inputStrides[NumDims-1] = 1; 133 for (int i = NumDims - 2; i >= 0; --i) { 134 m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; 135 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; 136 m_inputStrides[i+1] *= op.strides()[i+1]; 137 } 138 m_inputStrides[0] *= op.strides()[0]; 139 } 140 } 141 142 143 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } 144 145 EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType/*data*/) { 146 m_impl.evalSubExprsIfNeeded(NULL); 147 return true; 148 } 149 EIGEN_STRONG_INLINE void cleanup() { 150 m_impl.cleanup(); 151 } 152 153 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const 154 { 155 return m_impl.coeff(srcCoeff(index)); 156 } 157 158 template<int LoadMode> 159 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const 160 { 161 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 162 eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); 163 164 Index inputIndices[] = {0, 0}; 165 Index indices[] = {index, index + PacketSize - 1}; 166 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 167 EIGEN_UNROLL_LOOP 168 for (int i = NumDims - 1; i > 0; --i) { 169 const Index idx0 = indices[0] / m_outputStrides[i]; 170 const Index idx1 = indices[1] / m_outputStrides[i]; 171 inputIndices[0] += idx0 * m_inputStrides[i]; 172 inputIndices[1] += idx1 * m_inputStrides[i]; 173 indices[0] -= idx0 * m_outputStrides[i]; 174 indices[1] -= idx1 * m_outputStrides[i]; 175 } 176 inputIndices[0] += indices[0] * m_inputStrides[0]; 177 inputIndices[1] += indices[1] * m_inputStrides[0]; 178 } else { // RowMajor 179 EIGEN_UNROLL_LOOP 180 for (int i = 0; i < NumDims - 1; ++i) { 181 const Index idx0 = indices[0] / m_outputStrides[i]; 182 const Index idx1 = indices[1] / m_outputStrides[i]; 183 inputIndices[0] += idx0 * m_inputStrides[i]; 184 inputIndices[1] += idx1 * m_inputStrides[i]; 185 indices[0] -= idx0 * m_outputStrides[i]; 186 indices[1] -= idx1 * m_outputStrides[i]; 187 } 188 inputIndices[0] += indices[0] * m_inputStrides[NumDims-1]; 189 inputIndices[1] += indices[1] * m_inputStrides[NumDims-1]; 190 } 191 if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { 192 PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); 193 return rslt; 194 } 195 else { 196 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; 197 values[0] = m_impl.coeff(inputIndices[0]); 198 values[PacketSize-1] = m_impl.coeff(inputIndices[1]); 199 EIGEN_UNROLL_LOOP 200 for (int i = 1; i < PacketSize-1; ++i) { 201 values[i] = coeff(index+i); 202 } 203 PacketReturnType rslt = internal::pload<PacketReturnType>(values); 204 return rslt; 205 } 206 } 207 208 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { 209 double compute_cost = (NumDims - 1) * (TensorOpCost::AddCost<Index>() + 210 TensorOpCost::MulCost<Index>() + 211 TensorOpCost::DivCost<Index>()) + 212 TensorOpCost::MulCost<Index>(); 213 if (vectorized) { 214 compute_cost *= 2; // packet() computes two indices 215 } 216 const int innerDim = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? 0 : (NumDims - 1); 217 return m_impl.costPerCoeff(vectorized && m_inputStrides[innerDim] == 1) + 218 // Computation is not vectorized per se, but it is done once per packet. 219 TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); 220 } 221 222 EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; } 223 224 #ifdef EIGEN_USE_SYCL 225 // binding placeholder accessors to a command group handler for SYCL 226 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const { 227 m_impl.bind(cgh); 228 } 229 #endif 230 protected: 231 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const 232 { 233 Index inputIndex = 0; 234 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 235 EIGEN_UNROLL_LOOP 236 for (int i = NumDims - 1; i > 0; --i) { 237 const Index idx = index / m_outputStrides[i]; 238 inputIndex += idx * m_inputStrides[i]; 239 index -= idx * m_outputStrides[i]; 240 } 241 inputIndex += index * m_inputStrides[0]; 242 } else { // RowMajor 243 EIGEN_UNROLL_LOOP 244 for (int i = 0; i < NumDims - 1; ++i) { 245 const Index idx = index / m_outputStrides[i]; 246 inputIndex += idx * m_inputStrides[i]; 247 index -= idx * m_outputStrides[i]; 248 } 249 inputIndex += index * m_inputStrides[NumDims-1]; 250 } 251 return inputIndex; 252 } 253 254 Dimensions m_dimensions; 255 array<Index, NumDims> m_outputStrides; 256 array<Index, NumDims> m_inputStrides; 257 TensorEvaluator<ArgType, Device> m_impl; 258 }; 259 260 // Eval as lvalue 261 template<typename Strides, typename ArgType, typename Device> 262 struct TensorEvaluator<TensorStridingOp<Strides, ArgType>, Device> 263 : public TensorEvaluator<const TensorStridingOp<Strides, ArgType>, Device> 264 { 265 typedef TensorStridingOp<Strides, ArgType> XprType; 266 typedef TensorEvaluator<const XprType, Device> Base; 267 // typedef typename XprType::Index Index; 268 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; 269 // typedef DSizes<Index, NumDims> Dimensions; 270 271 enum { 272 IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false, 273 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, 274 PreferBlockAccess = false, 275 Layout = TensorEvaluator<ArgType, Device>::Layout, 276 CoordAccess = false, // to be implemented 277 RawAccess = false 278 }; 279 280 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) 281 : Base(op, device) { } 282 283 typedef typename XprType::Index Index; 284 typedef typename XprType::Scalar Scalar; 285 typedef typename XprType::CoeffReturnType CoeffReturnType; 286 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; 287 static const int PacketSize = PacketType<CoeffReturnType, Device>::size; 288 289 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) 290 { 291 return this->m_impl.coeffRef(this->srcCoeff(index)); 292 } 293 294 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 295 void writePacket(Index index, const PacketReturnType& x) 296 { 297 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) 298 eigen_assert(index+PacketSize-1 < this->dimensions().TotalSize()); 299 300 Index inputIndices[] = {0, 0}; 301 Index indices[] = {index, index + PacketSize - 1}; 302 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 303 EIGEN_UNROLL_LOOP 304 for (int i = NumDims - 1; i > 0; --i) { 305 const Index idx0 = indices[0] / this->m_outputStrides[i]; 306 const Index idx1 = indices[1] / this->m_outputStrides[i]; 307 inputIndices[0] += idx0 * this->m_inputStrides[i]; 308 inputIndices[1] += idx1 * this->m_inputStrides[i]; 309 indices[0] -= idx0 * this->m_outputStrides[i]; 310 indices[1] -= idx1 * this->m_outputStrides[i]; 311 } 312 inputIndices[0] += indices[0] * this->m_inputStrides[0]; 313 inputIndices[1] += indices[1] * this->m_inputStrides[0]; 314 } else { // RowMajor 315 EIGEN_UNROLL_LOOP 316 for (int i = 0; i < NumDims - 1; ++i) { 317 const Index idx0 = indices[0] / this->m_outputStrides[i]; 318 const Index idx1 = indices[1] / this->m_outputStrides[i]; 319 inputIndices[0] += idx0 * this->m_inputStrides[i]; 320 inputIndices[1] += idx1 * this->m_inputStrides[i]; 321 indices[0] -= idx0 * this->m_outputStrides[i]; 322 indices[1] -= idx1 * this->m_outputStrides[i]; 323 } 324 inputIndices[0] += indices[0] * this->m_inputStrides[NumDims-1]; 325 inputIndices[1] += indices[1] * this->m_inputStrides[NumDims-1]; 326 } 327 if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { 328 this->m_impl.template writePacket<Unaligned>(inputIndices[0], x); 329 } 330 else { 331 EIGEN_ALIGN_MAX Scalar values[PacketSize]; 332 internal::pstore<Scalar, PacketReturnType>(values, x); 333 this->m_impl.coeffRef(inputIndices[0]) = values[0]; 334 this->m_impl.coeffRef(inputIndices[1]) = values[PacketSize-1]; 335 EIGEN_UNROLL_LOOP 336 for (int i = 1; i < PacketSize-1; ++i) { 337 this->coeffRef(index+i) = values[i]; 338 } 339 } 340 } 341 }; 342 343 344 } // end namespace Eigen 345 346 #endif // EIGEN_CXX11_TENSOR_TENSOR_STRIDING_H 347