xref: /aosp_15_r20/external/armnn/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ClDepthwiseConvolutionWorkload.hpp"
7 
8 #include <ResolveType.hpp>
9 #include "ClWorkloadUtils.hpp"
10 
11 #include <armnn/Exceptions.hpp>
12 #include <aclCommon/ArmComputeUtils.hpp>
13 #include <aclCommon/ArmComputeTensorUtils.hpp>
14 #include <cl/ClTensorHandle.hpp>
15 #include <armnn/backends/TensorHandle.hpp>
16 #include <backendsCommon/WorkloadUtils.hpp>
17 #include <armnn/backends/WorkloadData.hpp>
18 
19 #include <arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h>
20 
21 namespace armnn
22 {
23 
24 using namespace armcomputetensorutils;
25 
ClDepthwiseConvolutionWorkloadValidate(const TensorInfo & input,const TensorInfo & output,const DepthwiseConvolution2dDescriptor & descriptor,const TensorInfo & weights,const Optional<TensorInfo> & biases,const ActivationDescriptor * activationDescriptor)26 arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
27                                                            const TensorInfo& output,
28                                                            const DepthwiseConvolution2dDescriptor& descriptor,
29                                                            const TensorInfo& weights,
30                                                            const Optional<TensorInfo>& biases,
31                                                            const ActivationDescriptor* activationDescriptor)
32 {
33     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input,  descriptor.m_DataLayout);
34     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
35 
36     // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
37     //
38     // ACL format for weights for depthwise is:
39     // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
40     // - [1, C, H, W] for [N, C, H, W] input/output layout
41     //
42     // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
43     // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
44     // so we do the permute here for the TensorInfo weights.
45     unsigned int aclDepthMultiplier;
46     TensorInfo weightsPermuted;
47     std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
48 
49     // Convert the weights into the compute library format
50     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
51     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
52 
53     arm_compute::TensorInfo aclBiasesInfo;
54     arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
55     if (descriptor.m_BiasEnabled)
56     {
57         ARMNN_ASSERT(biases.has_value());
58         // Same for bias as weights. We don't currently support non const.
59         if (!biases.value().IsConstant())
60         {
61             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
62                                        "ArmNN ClDepthwiseConv2dWorkload does not support non constant bias."};
63         }
64         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
65         aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
66         optionalAclBiasesInfo = &aclBiasesInfo;
67     }
68 
69     const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
70     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
71             descriptor.m_DilationX,
72             descriptor.m_DilationY);
73 
74     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
75             activationDescriptor);
76 
77     return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
78                                                               &aclWeightsInfo,
79                                                               optionalAclBiasesInfo,
80                                                               &aclOutputInfo,
81                                                               aclPadStrideInfo,
82                                                               aclDepthMultiplier,
83                                                               activationInfo,
84                                                               aclDilationInfo);
85 
86 }
87 
ClDepthwiseConvolutionWorkload(const DepthwiseConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info,const arm_compute::CLCompileContext & clCompileContext)88 ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
89     const DepthwiseConvolution2dQueueDescriptor& descriptor,
90     const WorkloadInfo& info,
91     const arm_compute::CLCompileContext& clCompileContext)
92     : ClBaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
93 {
94     // Add details for profiling output
95     WorkloadInfo detailsInfo;
96 
97     detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
98     detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
99     detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(info.m_InputTensorInfos[1]);
100     if (descriptor.m_Parameters.m_BiasEnabled)
101     {
102         detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(info.m_InputTensorInfos[2]);
103     }
104 
105     // Report Profiling Details
106     ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClDepthwiseConvolutionWorkload_Construct",
107                                          descriptor.m_Parameters,
108                                          detailsInfo,
109                                          GetGuid());
110 
111     m_Data.ValidateInputsOutputs("ClDepthwiseConv2dWorkload", descriptor.m_Parameters.GetNumInputs(), 1);
112 
113     arm_compute::ICLTensor& input = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
114     arm_compute::ICLTensor& output = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
115     arm_compute::ICLTensor& weights = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
116     arm_compute::ITensorInfo* weightsInfo = weights.info();
117     arm_compute::ITensorInfo* inputInfo = input.info();
118     auto weightsShape = weightsInfo->tensor_shape();
119     auto inputShape = inputInfo->tensor_shape();
120 
121     // The PermuteDepthwiseConv2dWeights backend optimization has been performed,
122     // converting weights to have the same data layout as input.
123     unsigned int depthMultiplier =
124         ComputeDepthwiseConv2dDepthMultiplier(m_Data.m_Parameters.m_DataLayout, weightsShape, inputShape);
125 
126     arm_compute::ICLTensor* bias  = nullptr;
127     if (m_Data.m_Parameters.m_BiasEnabled)
128     {
129         bias = &PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor();
130     }
131 
132     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
133                 m_Data.m_Parameters.m_DilationX,
134                 m_Data.m_Parameters.m_DilationY);
135 
136     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
137     input.info()->set_data_layout(aclDataLayout);
138     weights.info()->set_data_layout(aclDataLayout);
139     output.info()->set_data_layout(aclDataLayout);
140 
141     arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
142 
143     const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
144 
145     m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
146 
147     {
148         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClDepthwiseConvolutionWorkload_configure");
149         static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure(
150                 clCompileContext,
151                 &input,
152                 &weights,
153                 bias,
154                 &output,
155                 padStrideInfo,
156                 depthMultiplier,
157                 activationInfo,
158                 aclDilationInfo);
159     }
160     ARMNN_ASSERT(m_DepthwiseConvolutionLayer);
161 }
162 
Execute() const163 void ClDepthwiseConvolutionWorkload::Execute() const
164 {
165     ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDepthwiseConvolutionWorkload_Execute", GetGuid());
166     ARMNN_ASSERT(m_DepthwiseConvolutionLayer);
167 
168     RunClFunction(*m_DepthwiseConvolutionLayer, CHECK_LOCATION());
169 }
170 
171 } // namespace armnn
172