1 //
2 // Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "ClDepthwiseConvolutionWorkload.hpp"
7
8 #include <ResolveType.hpp>
9 #include "ClWorkloadUtils.hpp"
10
11 #include <armnn/Exceptions.hpp>
12 #include <aclCommon/ArmComputeUtils.hpp>
13 #include <aclCommon/ArmComputeTensorUtils.hpp>
14 #include <cl/ClTensorHandle.hpp>
15 #include <armnn/backends/TensorHandle.hpp>
16 #include <backendsCommon/WorkloadUtils.hpp>
17 #include <armnn/backends/WorkloadData.hpp>
18
19 #include <arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h>
20
21 namespace armnn
22 {
23
24 using namespace armcomputetensorutils;
25
ClDepthwiseConvolutionWorkloadValidate(const TensorInfo & input,const TensorInfo & output,const DepthwiseConvolution2dDescriptor & descriptor,const TensorInfo & weights,const Optional<TensorInfo> & biases,const ActivationDescriptor * activationDescriptor)26 arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
27 const TensorInfo& output,
28 const DepthwiseConvolution2dDescriptor& descriptor,
29 const TensorInfo& weights,
30 const Optional<TensorInfo>& biases,
31 const ActivationDescriptor* activationDescriptor)
32 {
33 const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
34 const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
35
36 // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
37 //
38 // ACL format for weights for depthwise is:
39 // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
40 // - [1, C, H, W] for [N, C, H, W] input/output layout
41 //
42 // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
43 // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
44 // so we do the permute here for the TensorInfo weights.
45 unsigned int aclDepthMultiplier;
46 TensorInfo weightsPermuted;
47 std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
48
49 // Convert the weights into the compute library format
50 arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
51 aclWeightsInfo.set_are_values_constant(weights.IsConstant());
52
53 arm_compute::TensorInfo aclBiasesInfo;
54 arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
55 if (descriptor.m_BiasEnabled)
56 {
57 ARMNN_ASSERT(biases.has_value());
58 // Same for bias as weights. We don't currently support non const.
59 if (!biases.value().IsConstant())
60 {
61 return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
62 "ArmNN ClDepthwiseConv2dWorkload does not support non constant bias."};
63 }
64 aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
65 aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
66 optionalAclBiasesInfo = &aclBiasesInfo;
67 }
68
69 const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
70 const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
71 descriptor.m_DilationX,
72 descriptor.m_DilationY);
73
74 const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
75 activationDescriptor);
76
77 return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
78 &aclWeightsInfo,
79 optionalAclBiasesInfo,
80 &aclOutputInfo,
81 aclPadStrideInfo,
82 aclDepthMultiplier,
83 activationInfo,
84 aclDilationInfo);
85
86 }
87
ClDepthwiseConvolutionWorkload(const DepthwiseConvolution2dQueueDescriptor & descriptor,const WorkloadInfo & info,const arm_compute::CLCompileContext & clCompileContext)88 ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
89 const DepthwiseConvolution2dQueueDescriptor& descriptor,
90 const WorkloadInfo& info,
91 const arm_compute::CLCompileContext& clCompileContext)
92 : ClBaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
93 {
94 // Add details for profiling output
95 WorkloadInfo detailsInfo;
96
97 detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
98 detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
99 detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(info.m_InputTensorInfos[1]);
100 if (descriptor.m_Parameters.m_BiasEnabled)
101 {
102 detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(info.m_InputTensorInfos[2]);
103 }
104
105 // Report Profiling Details
106 ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClDepthwiseConvolutionWorkload_Construct",
107 descriptor.m_Parameters,
108 detailsInfo,
109 GetGuid());
110
111 m_Data.ValidateInputsOutputs("ClDepthwiseConv2dWorkload", descriptor.m_Parameters.GetNumInputs(), 1);
112
113 arm_compute::ICLTensor& input = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
114 arm_compute::ICLTensor& output = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
115 arm_compute::ICLTensor& weights = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
116 arm_compute::ITensorInfo* weightsInfo = weights.info();
117 arm_compute::ITensorInfo* inputInfo = input.info();
118 auto weightsShape = weightsInfo->tensor_shape();
119 auto inputShape = inputInfo->tensor_shape();
120
121 // The PermuteDepthwiseConv2dWeights backend optimization has been performed,
122 // converting weights to have the same data layout as input.
123 unsigned int depthMultiplier =
124 ComputeDepthwiseConv2dDepthMultiplier(m_Data.m_Parameters.m_DataLayout, weightsShape, inputShape);
125
126 arm_compute::ICLTensor* bias = nullptr;
127 if (m_Data.m_Parameters.m_BiasEnabled)
128 {
129 bias = &PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor();
130 }
131
132 const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
133 m_Data.m_Parameters.m_DilationX,
134 m_Data.m_Parameters.m_DilationY);
135
136 arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
137 input.info()->set_data_layout(aclDataLayout);
138 weights.info()->set_data_layout(aclDataLayout);
139 output.info()->set_data_layout(aclDataLayout);
140
141 arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
142
143 const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
144
145 m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
146
147 {
148 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClDepthwiseConvolutionWorkload_configure");
149 static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure(
150 clCompileContext,
151 &input,
152 &weights,
153 bias,
154 &output,
155 padStrideInfo,
156 depthMultiplier,
157 activationInfo,
158 aclDilationInfo);
159 }
160 ARMNN_ASSERT(m_DepthwiseConvolutionLayer);
161 }
162
Execute() const163 void ClDepthwiseConvolutionWorkload::Execute() const
164 {
165 ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDepthwiseConvolutionWorkload_Execute", GetGuid());
166 ARMNN_ASSERT(m_DepthwiseConvolutionLayer);
167
168 RunClFunction(*m_DepthwiseConvolutionLayer, CHECK_LOCATION());
169 }
170
171 } // namespace armnn
172