xref: /aosp_15_r20/external/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h (revision bf2c37156dfe67e5dfebd6d394bad8b2ab5804d4)
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <[email protected]>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
12 
13 namespace Eigen {
14 
15 /** \class TensorDevice
16   * \ingroup CXX11_Tensor_Module
17   *
18   * \brief Pseudo expression providing an operator = that will evaluate its argument
19   * on the specified computing 'device' (GPU, thread pool, ...)
20   *
21   * Example:
22   *    C.device(EIGEN_GPU) = A + B;
23   *
24   * Todo: operator *= and /=.
25   */
26 
27 template <typename ExpressionType, typename DeviceType> class TensorDevice {
28   public:
TensorDevice(const DeviceType & device,ExpressionType & expression)29     TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {}
30 
EIGEN_DEFAULT_COPY_CONSTRUCTOR(TensorDevice)31     EIGEN_DEFAULT_COPY_CONSTRUCTOR(TensorDevice)
32 
33     template<typename OtherDerived>
34     EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) {
35       typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
36       Assign assign(m_expression, other);
37       internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
38       return *this;
39     }
40 
41     template<typename OtherDerived>
42     EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) {
43       typedef typename OtherDerived::Scalar Scalar;
44       typedef TensorCwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ExpressionType, const OtherDerived> Sum;
45       Sum sum(m_expression, other);
46       typedef TensorAssignOp<ExpressionType, const Sum> Assign;
47       Assign assign(m_expression, sum);
48       internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
49       return *this;
50     }
51 
52     template<typename OtherDerived>
53     EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) {
54       typedef typename OtherDerived::Scalar Scalar;
55       typedef TensorCwiseBinaryOp<internal::scalar_difference_op<Scalar>, const ExpressionType, const OtherDerived> Difference;
56       Difference difference(m_expression, other);
57       typedef TensorAssignOp<ExpressionType, const Difference> Assign;
58       Assign assign(m_expression, difference);
59       internal::TensorExecutor<const Assign, DeviceType>::run(assign, m_device);
60       return *this;
61     }
62 
63   protected:
64     const DeviceType& m_device;
65     ExpressionType& m_expression;
66 };
67 
68 /** \class TensorAsyncDevice
69  * \ingroup CXX11_Tensor_Module
70  *
71  * \brief Pseudo expression providing an operator = that will evaluate its
72  * argument asynchronously on the specified device. Currently only
73  * ThreadPoolDevice implements proper asynchronous execution, while the default
74  * and GPU devices just run the expression synchronously and call m_done() on
75  * completion..
76  *
77  * Example:
78  *    auto done = []() { ... expression evaluation done ... };
79  *    C.device(thread_pool_device, std::move(done)) = A + B;
80  */
81 
82 template <typename ExpressionType, typename DeviceType, typename DoneCallback>
83 class TensorAsyncDevice {
84  public:
TensorAsyncDevice(const DeviceType & device,ExpressionType & expression,DoneCallback done)85   TensorAsyncDevice(const DeviceType& device, ExpressionType& expression,
86                     DoneCallback done)
87       : m_device(device), m_expression(expression), m_done(std::move(done)) {}
88 
89   template <typename OtherDerived>
90   EIGEN_STRONG_INLINE TensorAsyncDevice& operator=(const OtherDerived& other) {
91     typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
92     typedef internal::TensorExecutor<const Assign, DeviceType> Executor;
93 
94     Assign assign(m_expression, other);
95     Executor::run(assign, m_device);
96     m_done();
97 
98     return *this;
99   }
100 
101  protected:
102   const DeviceType& m_device;
103   ExpressionType& m_expression;
104   DoneCallback m_done;
105 };
106 
107 
108 #ifdef EIGEN_USE_THREADS
109 template <typename ExpressionType, typename DoneCallback>
110 class TensorAsyncDevice<ExpressionType, ThreadPoolDevice, DoneCallback> {
111  public:
TensorAsyncDevice(const ThreadPoolDevice & device,ExpressionType & expression,DoneCallback done)112   TensorAsyncDevice(const ThreadPoolDevice& device, ExpressionType& expression,
113                     DoneCallback done)
114       : m_device(device), m_expression(expression), m_done(std::move(done)) {}
115 
116   template <typename OtherDerived>
117   EIGEN_STRONG_INLINE TensorAsyncDevice& operator=(const OtherDerived& other) {
118     typedef TensorAssignOp<ExpressionType, const OtherDerived> Assign;
119     typedef internal::TensorAsyncExecutor<const Assign, ThreadPoolDevice, DoneCallback> Executor;
120 
121     // WARNING: After assignment 'm_done' callback will be in undefined state.
122     Assign assign(m_expression, other);
123     Executor::runAsync(assign, m_device, std::move(m_done));
124 
125     return *this;
126   }
127 
128  protected:
129   const ThreadPoolDevice& m_device;
130   ExpressionType& m_expression;
131   DoneCallback m_done;
132 };
133 #endif
134 
135 } // end namespace Eigen
136 
137 #endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H
138