1 //
2 // Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "NeonDepthwiseConvolutionWorkload.hpp"
7
8 #include "NeonWorkloadUtils.hpp"
9
10 #include <armnnUtils/DataLayoutIndexed.hpp>
11
12 #include <aclCommon/ArmComputeTensorUtils.hpp>
13 #include <aclCommon/ArmComputeUtils.hpp>
14
15 #include <neon/NeonLayerSupport.hpp>
16
17 #include <armnn/backends/TensorHandle.hpp>
18 #include <backendsCommon/WorkloadUtils.hpp>
19
20 #include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h>
21
22 using namespace armnnUtils;
23
24 namespace armnn
25 {
26
27 using namespace armcomputetensorutils;
28
NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo & input,const TensorInfo & output,const DepthwiseConvolution2dDescriptor & descriptor,const TensorInfo & weights,const Optional<TensorInfo> & biases,const ActivationDescriptor * activationDescriptor)29 arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
30 const TensorInfo& output,
31 const DepthwiseConvolution2dDescriptor& descriptor,
32 const TensorInfo& weights,
33 const Optional<TensorInfo>& biases,
34 const ActivationDescriptor* activationDescriptor)
35 {
36 const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
37 const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
38
39 // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
40 //
41 // ACL format for weights for depthwise is:
42 // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
43 // - [1, C, H, W] for [N, C, H, W] input/output layout
44 //
45 // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
46 // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
47 // so we do the permute here for the TensorInfo weights.
48 unsigned int aclDepthMultiplier;
49 TensorInfo weightsPermuted;
50 std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout);
51
52 // Convert the weights into the compute library format
53 arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
54 aclWeightsInfo.set_are_values_constant(weights.IsConstant());
55
56 arm_compute::TensorInfo aclBiasesInfo;
57 arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
58 if (descriptor.m_BiasEnabled)
59 {
60 ARMNN_ASSERT(biases.has_value());
61 // Same for bias as weights. We don't currently support non const.
62 if (!biases.value().IsConstant())
63 {
64 return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
65 "ArmNN NeonDepthwiseConv2dWorkload does not support non constant bias."};
66 }
67 aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
68 aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
69 optionalAclBiasesInfo = &aclBiasesInfo;
70 }
71
72 arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
73 const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
74 descriptor.m_DilationX, descriptor.m_DilationY);
75
76 const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
77 activationDescriptor);
78
79 return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
80 &aclWeightsInfo,
81 optionalAclBiasesInfo,
82 &aclOutputInfo,
83 aclPadStrideInfo,
84 aclDepthMultiplier,
85 activationInfo,
86 aclDilationInfo);
87 }
88
NeonDepthwiseConvolutionWorkload(const DepthwiseConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info)89 NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
90 const DepthwiseConvolution2dQueueDescriptor& descriptor,
91 const WorkloadInfo& info)
92 : NeonBaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
93 {
94 arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
95 arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
96 arm_compute::ITensor& weights = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
97 arm_compute::ITensor* biasesPtr = nullptr;
98 if (m_Data.m_Parameters.m_BiasEnabled)
99 {
100 biasesPtr = &PolymorphicDowncast<IAclTensorHandle *>(m_Data.m_Inputs[2])->GetTensor();
101 }
102
103 arm_compute::ITensorInfo* weightsInfo = weights.info();
104 arm_compute::ITensorInfo* inputInfo = input.info();
105 auto weightsShape = weightsInfo->tensor_shape();
106 auto inputShape = inputInfo->tensor_shape();
107
108 // The PermuteDepthwiseConv2dWeights backend optimization has been performed,
109 // converting weights to have the same data layout as input.
110 unsigned int depthMultiplier =
111 ComputeDepthwiseConv2dDepthMultiplier(m_Data.m_Parameters.m_DataLayout, weightsShape, inputShape);
112
113 const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
114 m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY);
115
116 uint32_t numInputs = m_Data.m_Parameters.m_BiasEnabled ? 3: 2;
117 m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", numInputs, 1);
118
119 arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
120 input.info()->set_data_layout(aclDataLayout);
121 weights.info()->set_data_layout(aclDataLayout);
122 output.info()->set_data_layout(aclDataLayout);
123
124 arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
125
126 const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
127
128 m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
129 static_cast<arm_compute::NEDepthwiseConvolutionLayer*>(
130 m_pDepthwiseConvolutionLayer.get())->configure(&input,
131 &weights,
132 biasesPtr,
133 &output,
134 padStrideInfo,
135 depthMultiplier,
136 activationInfo,
137 aclDilationInfo);
138
139 // Add details for profiling output
140 WorkloadInfo detailsInfo;
141
142 detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
143 detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
144 detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(info.m_InputTensorInfos[1]);
145 if (descriptor.m_Parameters.m_BiasEnabled)
146 {
147 detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(info.m_InputTensorInfos[2]);
148 }
149
150 // Report Profiling Details
151 ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonDepthwiseConvolution2dWorkload_Construct",
152 descriptor.m_Parameters,
153 detailsInfo,
154 GetGuid());
155
156 ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
157
158 m_pDepthwiseConvolutionLayer->prepare();
159 }
160
Execute() const161 void NeonDepthwiseConvolutionWorkload::Execute() const
162 {
163 ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDepthwiseConvolutionWorkload_Execute", GetGuid());
164 ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
165
166 m_pDepthwiseConvolutionLayer->run();
167 }
168
169 } //namespace armnn
170