xref: /aosp_15_r20/external/armnn/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include <armnn/backends/Workload.hpp>
9 
10 #include <arm_compute/runtime/CL/functions/CLDepthConvertLayer.h>
11 
12 #include <cl/ICLTensorProxy.hpp>
13 
14 namespace armnn
15 {
16 
17 class ClConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>
18 {
19 public:
20 
21     ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor,
22                                 const WorkloadInfo& info,
23                                 const arm_compute::CLCompileContext& clCompileContext);
24     virtual void Execute() const override;
25 
SupportsTensorHandleReplacement() const26     bool SupportsTensorHandleReplacement() const override { return true;};
27 
28     // Replace input tensor handle with the given TensorHandle
29     void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override;
30 
31     // Replace output tensor handle with the given TensorHandle
32     void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override;
33 private:
34     mutable arm_compute::CLDepthConvertLayer m_Layer;
35     virtual void Reconfigure();
36 
37     std::unique_ptr<ICLTensorProxy> m_InputProxy;
38     std::unique_ptr<ICLTensorProxy> m_OutputProxy;
39 };
40 
41 arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, const TensorInfo& output);
42 
43 } //namespace armnn
44