xref: /aosp_15_r20/external/armnn/src/backends/neon/workloads/NeonBatchMatMulWorkload.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include "NeonBaseWorkload.hpp"
9 
10 #include <arm_compute/runtime/IFunction.h>
11 #include <arm_compute/runtime/Tensor.h>
12 
13 #include <memory>
14 
15 namespace armnn
16 {
17     arm_compute::Status NeonBatchMatMulValidate(const TensorInfo& inputX,
18                                                 const TensorInfo& inputY,
19                                                 const TensorInfo& output,
20                                                 const BatchMatMulDescriptor& descriptor);
21 
22     class NeonBatchMatMulWorkload : public NeonBaseWorkload<BatchMatMulQueueDescriptor>
23     {
24     public:
25         NeonBatchMatMulWorkload(const BatchMatMulQueueDescriptor& descriptor,
26                                 const WorkloadInfo& info);
27         virtual void Execute() const override;
28 
29     private:
30         // ACL layers required to fully form a Batch Mat Mul layer.
31         std::unique_ptr<arm_compute::IFunction> m_GEMMLayer;
32         std::unique_ptr<arm_compute::IFunction> m_PermuteLayerX;
33         std::unique_ptr<arm_compute::IFunction> m_PermuteLayerY;
34 
35         // Additional ACL arm_compute::Tensors.
36         // Required to perform permutations.
37         arm_compute::Tensor m_PermutedTensorX;
38         arm_compute::Tensor m_PermutedTensorY;
39 
40     };
41 } //namespace armnn
42