1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2014 Benoit Steiner <[email protected]> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H 12 13 namespace Eigen { 14 15 /** \class TensorDevice 16 * \ingroup CXX11_Tensor_Module 17 * 18 * \brief Pseudo expression providing an operator = that will evaluate its argument 19 * on the specified computing 'device' (GPU, thread pool, ...) 20 * 21 * Example: 22 * C.device(EIGEN_GPU) = A + B; 23 * 24 * Todo: operator *= and /=. 25 */ 26 27 template <typename ExpressionType, typename DeviceType> class TensorDevice { 28 public: TensorDevice(const DeviceType & device,ExpressionType & expression)29 TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} 30 EIGEN_DEFAULT_COPY_CONSTRUCTOR(TensorDevice)31 EIGEN_DEFAULT_COPY_CONSTRUCTOR(TensorDevice) 32 33 template<typename OtherDerived> 34 EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { 35 typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign; 36 Assign assign(m_expression, other); 37 internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device); 38 return *this; 39 } 40 41 template<typename OtherDerived> 42 EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { 43 typedef typename OtherDerived::Scalar Scalar; 44 typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum; 45 Sum sum(m_expression, other); 46 typedef TensorAssignOp<ExpressionType, const Sum> Assign; 47 Assign assign(m_expression, sum); 48 internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device); 49 return *this; 50 } 51 52 template<typename OtherDerived> 53 EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { 54 typedef typename OtherDerived::Scalar Scalar; 55 typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference; 56 Difference difference(m_expression, other); 57 typedef TensorAssignOp<ExpressionType, const Difference> Assign; 58 Assign assign(m_expression, difference); 59 internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device); 60 return *this; 61 } 62 63 protected: 64 const DeviceType& m_device; 65 ExpressionType& m_expression; 66 }; 67 68 /** \class TensorAsyncDevice 69 * \ingroup CXX11_Tensor_Module 70 * 71 * \brief Pseudo expression providing an operator = that will evaluate its 72 * argument asynchronously on the specified device. Currently only 73 * ThreadPoolDevice implements proper asynchronous execution, while the default 74 * and GPU devices just run the expression synchronously and call m_done() on 75 * completion.. 76 * 77 * Example: 78 * auto done = []() { ... expression evaluation done ... }; 79 * C.device(thread_pool_device, std::move(done)) = A + B; 80 */ 81 82 template <typename ExpressionType, typename DeviceType, typename DoneCallback> 83 class TensorAsyncDevice { 84 public: TensorAsyncDevice(const DeviceType & device,ExpressionType & expression,DoneCallback done)85 TensorAsyncDevice(const DeviceType& device, ExpressionType& expression, 86 DoneCallback done) 87 : m_device(device), m_expression(expression), m_done(std::move(done)) {} 88 89 template <typename OtherDerived> 90 EIGEN_STRONG_INLINE TensorAsyncDevice& operator=(const OtherDerived& other) { 91 typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign; 92 typedef internal::TensorExecutor<const Assign, DeviceType> Executor; 93 94 Assign assign(m_expression, other); 95 Executor::run(assign, m_device); 96 m_done(); 97 98 return *this; 99 } 100 101 protected: 102 const DeviceType& m_device; 103 ExpressionType& m_expression; 104 DoneCallback m_done; 105 }; 106 107 108 #ifdef EIGEN_USE_THREADS 109 template <typename ExpressionType, typename DoneCallback> 110 class TensorAsyncDevice<ExpressionType, ThreadPoolDevice, DoneCallback> { 111 public: TensorAsyncDevice(const ThreadPoolDevice & device,ExpressionType & expression,DoneCallback done)112 TensorAsyncDevice(const ThreadPoolDevice& device, ExpressionType& expression, 113 DoneCallback done) 114 : m_device(device), m_expression(expression), m_done(std::move(done)) {} 115 116 template <typename OtherDerived> 117 EIGEN_STRONG_INLINE TensorAsyncDevice& operator=(const OtherDerived& other) { 118 typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign; 119 typedef internal::TensorAsyncExecutor<const Assign, ThreadPoolDevice, DoneCallback> Executor; 120 121 // WARNING: After assignment 'm_done' callback will be in undefined state. 122 Assign assign(m_expression, other); 123 Executor::runAsync(assign, m_device, std::move(m_done)); 124 125 return *this; 126 } 127 128 protected: 129 const ThreadPoolDevice& m_device; 130 ExpressionType& m_expression; 131 DoneCallback m_done; 132 }; 133 #endif 134 135 } // end namespace Eigen 136 137 #endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H 138