xref: /aosp_15_r20/external/armnn/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ClBatchNormalizationFloatWorkload.hpp"
7 #include "ClWorkloadUtils.hpp"
8 
9 #include <aclCommon/ArmComputeTensorUtils.hpp>
10 #include <aclCommon/ArmComputeUtils.hpp>
11 #include <armnn/backends/TensorHandle.hpp>
12 #include <cl/ClLayerSupport.hpp>
13 #include <cl/ClTensorHandle.hpp>
14 
15 namespace armnn
16 {
17 using namespace armcomputetensorutils;
18 
ClBatchNormalizationValidate(const TensorInfo & input,const TensorInfo & output,const TensorInfo & mean,const TensorInfo & var,const TensorInfo & beta,const TensorInfo & gamma,const BatchNormalizationDescriptor & descriptor,const ActivationDescriptor * activationDescriptor)19 arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
20                                                  const TensorInfo& output,
21                                                  const TensorInfo& mean,
22                                                  const TensorInfo& var,
23                                                  const TensorInfo& beta,
24                                                  const TensorInfo& gamma,
25                                                  const BatchNormalizationDescriptor& descriptor,
26                                                  const ActivationDescriptor* activationDescriptor)
27 {
28     const arm_compute::TensorInfo aclInputInfo =
29         armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
30     const arm_compute::TensorInfo aclOutputInfo =
31         armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
32     const arm_compute::TensorInfo aclMeanInfo =
33         armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
34     const arm_compute::TensorInfo aclVarInfo =
35         armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
36     const arm_compute::TensorInfo aclBetaInfo =
37         armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
38     const arm_compute::TensorInfo aclGammaInfo =
39         armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
40 
41     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
42             activationDescriptor);
43 
44     return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo,
45                                                             &aclOutputInfo,
46                                                             &aclMeanInfo,
47                                                             &aclVarInfo,
48                                                             &aclBetaInfo,
49                                                             &aclGammaInfo,
50                                                             descriptor.m_Eps,
51                                                             activationInfo);
52 }
53 
ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor & descriptor,const WorkloadInfo & info,const arm_compute::CLCompileContext & clCompileContext)54 ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload(
55     const BatchNormalizationQueueDescriptor& descriptor,
56     const WorkloadInfo& info,
57     const arm_compute::CLCompileContext& clCompileContext)
58     : FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info)
59 {
60     // Report Profiling Details
61     ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClBatchNormalizationWorkload_Construct",
62                                          descriptor.m_Parameters,
63                                          info,
64                                          this->GetGuid());
65 
66     m_Mean = std::make_unique<arm_compute::CLTensor>();
67     BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo());
68 
69     m_Variance = std::make_unique<arm_compute::CLTensor>();
70     BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo());
71 
72     m_Gamma = std::make_unique<arm_compute::CLTensor>();
73     BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo());
74 
75     m_Beta = std::make_unique<arm_compute::CLTensor>();
76     BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo());
77 
78     m_Data.ValidateInputsOutputs("ClBatchNormalizationFloatWorkload", 1, 1);
79 
80     arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
81     arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
82 
83     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
84     input.info()->set_data_layout(aclDataLayout);
85     output.info()->set_data_layout(aclDataLayout);
86 
87     const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
88 
89     {
90         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClBatchNormalizationFloatWorkload_configure");
91         m_Layer.configure(clCompileContext,
92                           &input,
93                           &output,
94                           m_Mean.get(),
95                           m_Variance.get(),
96                           m_Beta.get(),
97                           m_Gamma.get(),
98                           m_Data.m_Parameters.m_Eps,
99                           activationInfo);
100     }
101 
102     InitializeArmComputeClTensorData(*m_Mean, m_Data.m_Mean);
103     InitializeArmComputeClTensorData(*m_Variance, m_Data.m_Variance);
104     InitializeArmComputeClTensorData(*m_Beta, m_Data.m_Beta);
105     InitializeArmComputeClTensorData(*m_Gamma, m_Data.m_Gamma);
106 
107     // Force Compute Library to perform the necessary copying and reshaping, after which
108     // delete all the input tensors that will no longer be needed
109     m_Layer.prepare();
110     FreeUnusedTensors();
111 }
112 
Execute() const113 void ClBatchNormalizationFloatWorkload::Execute() const
114 {
115     ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClBatchNormalizationFloatWorkload_Execute", this->GetGuid());
116     RunClFunction(m_Layer, CHECK_LOCATION());
117 }
118 
FreeUnusedTensors()119 void ClBatchNormalizationFloatWorkload::FreeUnusedTensors()
120 {
121     FreeTensorIfUnused(m_Mean);
122     FreeTensorIfUnused(m_Variance);
123     FreeTensorIfUnused(m_Gamma);
124     FreeTensorIfUnused(m_Beta);
125 }
126 
ReplaceInputTensorHandle(ITensorHandle * tensorHandle,unsigned int slot)127 void ClBatchNormalizationFloatWorkload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot)
128 {
129     ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
130     this->m_Data.m_Inputs[slot] = tensorHandle;
131     try
132     {
133         Reconfigure();
134     }
135     catch(armnn::UnimplementedException& e)
136     {
137         // Cannot reconfigure, revert the slot back and throw the exception.
138         this->m_Data.m_Inputs[slot] = backupHandle;
139         throw e;
140     }
141 }
142 
143 // Replace output tensor handle with the given TensorHandle
ReplaceOutputTensorHandle(ITensorHandle * tensorHandle,unsigned int slot)144 void ClBatchNormalizationFloatWorkload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot)
145 {
146     ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
147     this->m_Data.m_Inputs[slot] = tensorHandle;
148     try
149     {
150         Reconfigure();
151     }
152     catch(armnn::UnimplementedException& e)
153     {
154         // Cannot reconfigure, revert the slot back and throw the exception.
155         this->m_Data.m_Inputs[slot] = backupHandle;
156         throw e;
157     }
158 }
159 
Reconfigure()160 void ClBatchNormalizationFloatWorkload::Reconfigure()
161 {
162     throw armnn::UnimplementedException("Reconfigure not implemented for this workload");
163 }
164 
165 } //namespace armnn
166