xref: /aosp_15_r20/external/armnn/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonDepthwiseConvolutionWorkload.hpp"
7 
8 #include "NeonWorkloadUtils.hpp"
9 
10 #include <armnnUtils/DataLayoutIndexed.hpp>
11 
12 #include <aclCommon/ArmComputeTensorUtils.hpp>
13 #include <aclCommon/ArmComputeUtils.hpp>
14 
15 #include <neon/NeonLayerSupport.hpp>
16 
17 #include <armnn/backends/TensorHandle.hpp>
18 #include <backendsCommon/WorkloadUtils.hpp>
19 
20 #include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h>
21 
22 using namespace armnnUtils;
23 
24 namespace armnn
25 {
26 
27 using namespace armcomputetensorutils;
28 
NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo & input,const TensorInfo & output,const DepthwiseConvolution2dDescriptor & descriptor,const TensorInfo & weights,const Optional<TensorInfo> & biases,const ActivationDescriptor * activationDescriptor)29 arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
30                                                              const TensorInfo& output,
31                                                              const DepthwiseConvolution2dDescriptor& descriptor,
32                                                              const TensorInfo& weights,
33                                                              const Optional<TensorInfo>& biases,
34                                                              const ActivationDescriptor* activationDescriptor)
35 {
36     const arm_compute::TensorInfo aclInputInfo   = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
37     const arm_compute::TensorInfo aclOutputInfo  = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
38 
39     // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
40     //
41     // ACL format for weights for depthwise is:
42     // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
43     // - [1, C, H, W] for [N, C, H, W] input/output layout
44     //
45     // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
46     // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
47     // so we do the permute here for the TensorInfo weights.
48     unsigned int aclDepthMultiplier;
49     TensorInfo weightsPermuted;
50     std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout);
51 
52     // Convert the weights into the compute library format
53     arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
54     aclWeightsInfo.set_are_values_constant(weights.IsConstant());
55 
56     arm_compute::TensorInfo aclBiasesInfo;
57     arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
58     if (descriptor.m_BiasEnabled)
59     {
60         ARMNN_ASSERT(biases.has_value());
61         // Same for bias as weights. We don't currently support non const.
62         if (!biases.value().IsConstant())
63         {
64             return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
65                                        "ArmNN NeonDepthwiseConv2dWorkload does not support non constant bias."};
66         }
67         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
68         aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
69         optionalAclBiasesInfo = &aclBiasesInfo;
70     }
71 
72     arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
73     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
74         descriptor.m_DilationX, descriptor.m_DilationY);
75 
76     const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
77         activationDescriptor);
78 
79     return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
80                                                               &aclWeightsInfo,
81                                                               optionalAclBiasesInfo,
82                                                               &aclOutputInfo,
83                                                               aclPadStrideInfo,
84                                                               aclDepthMultiplier,
85                                                               activationInfo,
86                                                               aclDilationInfo);
87 }
88 
NeonDepthwiseConvolutionWorkload(const DepthwiseConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info)89 NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
90     const DepthwiseConvolution2dQueueDescriptor& descriptor,
91     const WorkloadInfo& info)
92     : NeonBaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
93 {
94     arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
95     arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
96     arm_compute::ITensor& weights = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
97     arm_compute::ITensor* biasesPtr = nullptr;
98     if (m_Data.m_Parameters.m_BiasEnabled)
99     {
100         biasesPtr = &PolymorphicDowncast<IAclTensorHandle *>(m_Data.m_Inputs[2])->GetTensor();
101     }
102 
103     arm_compute::ITensorInfo* weightsInfo = weights.info();
104     arm_compute::ITensorInfo* inputInfo = input.info();
105     auto weightsShape = weightsInfo->tensor_shape();
106     auto inputShape = inputInfo->tensor_shape();
107 
108     // The PermuteDepthwiseConv2dWeights backend optimization has been performed,
109     // converting weights to have the same data layout as input.
110     unsigned int depthMultiplier =
111         ComputeDepthwiseConv2dDepthMultiplier(m_Data.m_Parameters.m_DataLayout, weightsShape, inputShape);
112 
113     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
114         m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY);
115 
116     uint32_t numInputs = m_Data.m_Parameters.m_BiasEnabled ? 3: 2;
117     m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", numInputs, 1);
118 
119     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
120     input.info()->set_data_layout(aclDataLayout);
121     weights.info()->set_data_layout(aclDataLayout);
122     output.info()->set_data_layout(aclDataLayout);
123 
124     arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
125 
126     const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
127 
128     m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
129     static_cast<arm_compute::NEDepthwiseConvolutionLayer*>(
130         m_pDepthwiseConvolutionLayer.get())->configure(&input,
131                                                        &weights,
132                                                        biasesPtr,
133                                                        &output,
134                                                        padStrideInfo,
135                                                        depthMultiplier,
136                                                        activationInfo,
137                                                        aclDilationInfo);
138 
139     // Add details for profiling output
140     WorkloadInfo detailsInfo;
141 
142     detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
143     detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
144     detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(info.m_InputTensorInfos[1]);
145     if (descriptor.m_Parameters.m_BiasEnabled)
146     {
147         detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(info.m_InputTensorInfos[2]);
148     }
149 
150     // Report Profiling Details
151     ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonDepthwiseConvolution2dWorkload_Construct",
152                                          descriptor.m_Parameters,
153                                          detailsInfo,
154                                          GetGuid());
155 
156     ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
157 
158     m_pDepthwiseConvolutionLayer->prepare();
159 }
160 
Execute() const161 void NeonDepthwiseConvolutionWorkload::Execute() const
162 {
163     ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDepthwiseConvolutionWorkload_Execute", GetGuid());
164     ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
165 
166     m_pDepthwiseConvolutionLayer->run();
167 }
168 
169 } //namespace armnn
170