xref: /aosp_15_r20/external/ComputeLibrary/src/cpu/operators/CpuDepthwiseConv2d.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1*c217d954SCole Faust /*
2*c217d954SCole Faust  * Copyright (c) 2021-2022 Arm Limited.
3*c217d954SCole Faust  *
4*c217d954SCole Faust  * SPDX-License-Identifier: MIT
5*c217d954SCole Faust  *
6*c217d954SCole Faust  * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust  * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust  * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust  * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust  * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust  *
13*c217d954SCole Faust  * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust  * copies or substantial portions of the Software.
15*c217d954SCole Faust  *
16*c217d954SCole Faust  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust  * SOFTWARE.
23*c217d954SCole Faust  */
24*c217d954SCole Faust #include "src/cpu/operators/CpuDepthwiseConv2d.h"
25*c217d954SCole Faust 
26*c217d954SCole Faust #include "arm_compute/core/TensorInfo.h"
27*c217d954SCole Faust #include "arm_compute/core/Validate.h"
28*c217d954SCole Faust #include "arm_compute/core/utils/misc/InfoHelpers.h"
29*c217d954SCole Faust #include "arm_compute/core/utils/misc/ShapeCalculator.h"
30*c217d954SCole Faust #include "arm_compute/runtime/NEON/NEScheduler.h"
31*c217d954SCole Faust #include "src/common/utils/Log.h"
32*c217d954SCole Faust #include "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h"
33*c217d954SCole Faust 
34*c217d954SCole Faust namespace arm_compute
35*c217d954SCole Faust {
36*c217d954SCole Faust namespace cpu
37*c217d954SCole Faust {
38*c217d954SCole Faust namespace
39*c217d954SCole Faust {
validate_arguments_optimized(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)40*c217d954SCole Faust Status validate_arguments_optimized(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
41*c217d954SCole Faust {
42*c217d954SCole Faust     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
43*c217d954SCole Faust     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
44*c217d954SCole Faust     if(!is_data_type_quantized_per_channel(weights->data_type()))
45*c217d954SCole Faust     {
46*c217d954SCole Faust         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights);
47*c217d954SCole Faust     }
48*c217d954SCole Faust     ARM_COMPUTE_RETURN_ERROR_ON(src->data_layout() == DataLayout::UNKNOWN);
49*c217d954SCole Faust     ARM_COMPUTE_RETURN_ERROR_ON(info.dilation.x() < 1 || info.dilation.y() < 1);
50*c217d954SCole Faust     const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
51*c217d954SCole Faust     const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
52*c217d954SCole Faust     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (info.dilation.x() - 1) > src->dimension(idx_w) + info.pad_stride_info.pad_left() +
53*c217d954SCole Faust                                 info.pad_stride_info.pad_right());
54*c217d954SCole Faust     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (info.dilation.y() - 1) > src->dimension(idx_h) + info.pad_stride_info.pad_top() +
55*c217d954SCole Faust                                 info.pad_stride_info.pad_bottom());
56*c217d954SCole Faust 
57*c217d954SCole Faust     if(biases != nullptr)
58*c217d954SCole Faust     {
59*c217d954SCole Faust         const unsigned int channel_idx = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL);
60*c217d954SCole Faust         ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
61*c217d954SCole Faust         ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
62*c217d954SCole Faust     }
63*c217d954SCole Faust 
64*c217d954SCole Faust     ARM_COMPUTE_RETURN_ON_ERROR(CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, biases, dst, info));
65*c217d954SCole Faust 
66*c217d954SCole Faust     // Validate Activation Layer
67*c217d954SCole Faust     if(info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info))
68*c217d954SCole Faust     {
69*c217d954SCole Faust         ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info));
70*c217d954SCole Faust     }
71*c217d954SCole Faust     return Status{};
72*c217d954SCole Faust }
73*c217d954SCole Faust } // namespace
74*c217d954SCole Faust 
configure(ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,ITensorInfo * dst,const ConvolutionInfo & info)75*c217d954SCole Faust void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorInfo           *src,
76*c217d954SCole Faust                                                                         const ITensorInfo     *weights,
77*c217d954SCole Faust                                                                         const ITensorInfo     *biases,
78*c217d954SCole Faust                                                                         ITensorInfo           *dst,
79*c217d954SCole Faust                                                                         const ConvolutionInfo &info)
80*c217d954SCole Faust {
81*c217d954SCole Faust     ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
82*c217d954SCole Faust     // Perform validation step
83*c217d954SCole Faust     ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, (biases == nullptr) ? nullptr : biases,
84*c217d954SCole Faust                                                                              dst, info));
85*c217d954SCole Faust 
86*c217d954SCole Faust     _is_quantized = is_data_type_quantized_asymmetric(src->data_type());
87*c217d954SCole Faust     _has_bias     = biases != nullptr;
88*c217d954SCole Faust     _is_nchw      = src->data_layout() == DataLayout::NCHW;
89*c217d954SCole Faust     _permute      = _is_nchw;
90*c217d954SCole Faust     _is_prepared  = false;
91*c217d954SCole Faust     _are_weights_const = weights->are_values_constant();
92*c217d954SCole Faust 
93*c217d954SCole Faust     // Configure pipeline
94*c217d954SCole Faust     _is_activationlayer_enabled = info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info);
95*c217d954SCole Faust 
96*c217d954SCole Faust     _dwc_optimized_func = std::make_unique<CpuDepthwiseConv2dAssemblyDispatch>();
97*c217d954SCole Faust     if(_is_nchw)
98*c217d954SCole Faust     {
99*c217d954SCole Faust         _permute_input   = std::make_unique<cpu::CpuPermute>();
100*c217d954SCole Faust         _permute_weights = std::make_unique<cpu::CpuPermute>();
101*c217d954SCole Faust         _permute_output  = std::make_unique<cpu::CpuPermute>();
102*c217d954SCole Faust 
103*c217d954SCole Faust         auto input_perm   = std::make_unique<TensorInfo>();
104*c217d954SCole Faust         auto weights_perm = std::make_unique<TensorInfo>();
105*c217d954SCole Faust         auto output_perm  = std::make_unique<TensorInfo>();
106*c217d954SCole Faust 
107*c217d954SCole Faust         // Configure the function to transform the input tensor from NCHW -> NHWC
108*c217d954SCole Faust         _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U));
109*c217d954SCole Faust         input_perm->set_data_layout(DataLayout::NHWC);
110*c217d954SCole Faust 
111*c217d954SCole Faust         // Configure the function to transform the weights tensor from IHW -> HWI
112*c217d954SCole Faust         _permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
113*c217d954SCole Faust         weights_perm->set_data_layout(DataLayout::NHWC);
114*c217d954SCole Faust 
115*c217d954SCole Faust         output_perm->set_data_layout(DataLayout::NHWC);
116*c217d954SCole Faust         output_perm->set_quantization_info(dst->quantization_info());
117*c217d954SCole Faust 
118*c217d954SCole Faust         // Configure optimized depthwise
119*c217d954SCole Faust         _dwc_optimized_func->configure(input_perm.get(), weights_perm.get(), biases, output_perm.get(), info);
120*c217d954SCole Faust 
121*c217d954SCole Faust         // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
122*c217d954SCole Faust         output_perm->set_data_layout(DataLayout::NHWC);
123*c217d954SCole Faust         _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U));
124*c217d954SCole Faust     }
125*c217d954SCole Faust     else
126*c217d954SCole Faust     {
127*c217d954SCole Faust         _dwc_optimized_func->configure(src, weights, biases, dst, info);
128*c217d954SCole Faust     }
129*c217d954SCole Faust 
130*c217d954SCole Faust     // Configure activation
131*c217d954SCole Faust     if(_is_activationlayer_enabled)
132*c217d954SCole Faust     {
133*c217d954SCole Faust         _activationlayer_function = std::make_unique<cpu::CpuActivation>();
134*c217d954SCole Faust         _activationlayer_function->configure(dst, nullptr, info.act_info);
135*c217d954SCole Faust     }
136*c217d954SCole Faust }
137*c217d954SCole Faust 
validate(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)138*c217d954SCole Faust Status CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::validate(const ITensorInfo     *src,
139*c217d954SCole Faust                                                                          const ITensorInfo     *weights,
140*c217d954SCole Faust                                                                          const ITensorInfo     *biases,
141*c217d954SCole Faust                                                                          const ITensorInfo     *dst,
142*c217d954SCole Faust                                                                          const ConvolutionInfo &info)
143*c217d954SCole Faust {
144*c217d954SCole Faust     return validate_arguments_optimized(src, weights, biases, dst, info);
145*c217d954SCole Faust }
146*c217d954SCole Faust 
run(ITensorPack & tensors)147*c217d954SCole Faust void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &tensors)
148*c217d954SCole Faust {
149*c217d954SCole Faust     ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
150*c217d954SCole Faust     prepare(tensors);
151*c217d954SCole Faust 
152*c217d954SCole Faust     auto bias           = tensors.get_const_tensor(TensorType::ACL_SRC_2);
153*c217d954SCole Faust     auto dst            = tensors.get_tensor(TensorType::ACL_DST_0);
154*c217d954SCole Faust     auto workspace      = tensors.get_tensor(TensorType::ACL_INT_3);
155*c217d954SCole Faust     auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
156*c217d954SCole Faust 
157*c217d954SCole Faust     // Permute input
158*c217d954SCole Faust     if(_permute)
159*c217d954SCole Faust     {
160*c217d954SCole Faust         ITensorPack pack;
161*c217d954SCole Faust         auto        src      = tensors.get_const_tensor(TensorType::ACL_SRC_0);
162*c217d954SCole Faust         auto        src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
163*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC, src);
164*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_DST, src_perm);
165*c217d954SCole Faust         _permute_input->run(pack);
166*c217d954SCole Faust     }
167*c217d954SCole Faust 
168*c217d954SCole Faust     // Run assembly function
169*c217d954SCole Faust     if(_is_nchw)
170*c217d954SCole Faust     {
171*c217d954SCole Faust         auto src_perm     = tensors.get_tensor(TensorType::ACL_INT_0);
172*c217d954SCole Faust         auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
173*c217d954SCole Faust         auto dst_perm     = tensors.get_tensor(TensorType::ACL_INT_2);
174*c217d954SCole Faust 
175*c217d954SCole Faust         ITensorPack pack;
176*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC_0, src_perm);
177*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC_1, weights_perm);
178*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC_2, bias);
179*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_INT_0, workspace);
180*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
181*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_DST, dst_perm);
182*c217d954SCole Faust         _dwc_optimized_func->run(pack);
183*c217d954SCole Faust     }
184*c217d954SCole Faust     else
185*c217d954SCole Faust     {
186*c217d954SCole Faust         auto src     = tensors.get_tensor(TensorType::ACL_SRC_0);
187*c217d954SCole Faust         auto weights = tensors.get_tensor(TensorType::ACL_SRC_1);
188*c217d954SCole Faust         auto dst     = tensors.get_tensor(TensorType::ACL_DST);
189*c217d954SCole Faust 
190*c217d954SCole Faust         ITensorPack pack;
191*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC_0, src);
192*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC_1, weights);
193*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC_2, bias);
194*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_INT_0, workspace);
195*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
196*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_DST, dst);
197*c217d954SCole Faust         _dwc_optimized_func->run(pack);
198*c217d954SCole Faust     }
199*c217d954SCole Faust 
200*c217d954SCole Faust     // Permute output
201*c217d954SCole Faust     if(_is_nchw)
202*c217d954SCole Faust     {
203*c217d954SCole Faust         ITensorPack pack;
204*c217d954SCole Faust         auto        dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
205*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC, dst_perm);
206*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_DST, dst);
207*c217d954SCole Faust         _permute_output->run(pack);
208*c217d954SCole Faust     }
209*c217d954SCole Faust 
210*c217d954SCole Faust     // Run activation
211*c217d954SCole Faust     if(_is_activationlayer_enabled)
212*c217d954SCole Faust     {
213*c217d954SCole Faust         ITensorPack pack;
214*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC, dst);
215*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_DST, dst);
216*c217d954SCole Faust         _activationlayer_function->run(pack);
217*c217d954SCole Faust     }
218*c217d954SCole Faust }
219*c217d954SCole Faust 
prepare(ITensorPack & tensors)220*c217d954SCole Faust void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors)
221*c217d954SCole Faust {
222*c217d954SCole Faust     // if weights are not constant then we need to repack so that weights
223*c217d954SCole Faust     // can be updated in-place
224*c217d954SCole Faust     if(!_are_weights_const)
225*c217d954SCole Faust     {
226*c217d954SCole Faust         auto weights        = tensors.get_const_tensor(TensorType::ACL_SRC_1);
227*c217d954SCole Faust         auto bias           = tensors.get_const_tensor(TensorType::ACL_SRC_2);
228*c217d954SCole Faust         auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
229*c217d954SCole Faust 
230*c217d954SCole Faust         ITensorPack pack_opt;
231*c217d954SCole Faust         pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
232*c217d954SCole Faust         pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
233*c217d954SCole Faust         pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
234*c217d954SCole Faust 
235*c217d954SCole Faust         // Prepare optimized function
236*c217d954SCole Faust         _dwc_optimized_func->prepare(pack_opt);
237*c217d954SCole Faust 
238*c217d954SCole Faust         return;
239*c217d954SCole Faust     }
240*c217d954SCole Faust 
241*c217d954SCole Faust     if(!_is_prepared)
242*c217d954SCole Faust     {
243*c217d954SCole Faust         auto weights        = tensors.get_const_tensor(TensorType::ACL_SRC_1);
244*c217d954SCole Faust         auto bias           = tensors.get_const_tensor(TensorType::ACL_SRC_2);
245*c217d954SCole Faust         auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
246*c217d954SCole Faust 
247*c217d954SCole Faust         // Permute weights
248*c217d954SCole Faust         if(_permute)
249*c217d954SCole Faust         {
250*c217d954SCole Faust             auto permuted_weights = tensors.get_tensor(TensorType::ACL_INT_1);
251*c217d954SCole Faust 
252*c217d954SCole Faust             ITensorPack pack;
253*c217d954SCole Faust             pack.add_tensor(TensorType::ACL_SRC, weights);
254*c217d954SCole Faust             pack.add_tensor(TensorType::ACL_DST, permuted_weights);
255*c217d954SCole Faust             _permute_weights->run(pack);
256*c217d954SCole Faust 
257*c217d954SCole Faust             weights->mark_as_unused();
258*c217d954SCole Faust 
259*c217d954SCole Faust             ITensorPack pack_opt;
260*c217d954SCole Faust             pack_opt.add_const_tensor(TensorType::ACL_SRC_1, permuted_weights);
261*c217d954SCole Faust             pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
262*c217d954SCole Faust             pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
263*c217d954SCole Faust 
264*c217d954SCole Faust             // Prepare optimized function
265*c217d954SCole Faust             _dwc_optimized_func->prepare(pack_opt);
266*c217d954SCole Faust         }
267*c217d954SCole Faust         else
268*c217d954SCole Faust         {
269*c217d954SCole Faust             ITensorPack pack_opt;
270*c217d954SCole Faust             pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
271*c217d954SCole Faust             pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
272*c217d954SCole Faust             pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
273*c217d954SCole Faust 
274*c217d954SCole Faust             // Prepare optimized function
275*c217d954SCole Faust             _dwc_optimized_func->prepare(pack_opt);
276*c217d954SCole Faust         }
277*c217d954SCole Faust 
278*c217d954SCole Faust         _is_prepared = true;
279*c217d954SCole Faust     }
280*c217d954SCole Faust }
281*c217d954SCole Faust 
configure(ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,ITensorInfo * dst,const ConvolutionInfo & info)282*c217d954SCole Faust void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
283*c217d954SCole Faust {
284*c217d954SCole Faust     ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
285*c217d954SCole Faust     ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2d::validate(src, weights, (biases == nullptr) ? nullptr : biases,
286*c217d954SCole Faust                                                             dst, info));
287*c217d954SCole Faust 
288*c217d954SCole Faust     _is_nchw     = src->data_layout() == DataLayout::NCHW;
289*c217d954SCole Faust     _is_prepared = !_is_nchw;
290*c217d954SCole Faust 
291*c217d954SCole Faust     ITensorInfo       *input_to_use   = src;
292*c217d954SCole Faust     const ITensorInfo *weights_to_use = weights;
293*c217d954SCole Faust     ITensorInfo       *output_to_use  = dst;
294*c217d954SCole Faust 
295*c217d954SCole Faust     auto input_perm   = std::make_unique<TensorInfo>();
296*c217d954SCole Faust     auto weights_perm = std::make_unique<TensorInfo>();
297*c217d954SCole Faust     auto output_perm  = std::make_unique<TensorInfo>(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
298*c217d954SCole Faust 
299*c217d954SCole Faust     if(_is_nchw)
300*c217d954SCole Faust     {
301*c217d954SCole Faust         _permute_input   = std::make_unique<cpu::CpuPermute>();
302*c217d954SCole Faust         _permute_weights = std::make_unique<cpu::CpuPermute>();
303*c217d954SCole Faust 
304*c217d954SCole Faust         _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U));
305*c217d954SCole Faust         input_perm->set_data_layout(DataLayout::NHWC);
306*c217d954SCole Faust         input_to_use = input_perm.get();
307*c217d954SCole Faust 
308*c217d954SCole Faust         _permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
309*c217d954SCole Faust         weights_perm->set_data_layout(DataLayout::NHWC);
310*c217d954SCole Faust         weights_to_use = weights_perm.get();
311*c217d954SCole Faust 
312*c217d954SCole Faust         output_to_use = output_perm.get();
313*c217d954SCole Faust     }
314*c217d954SCole Faust 
315*c217d954SCole Faust     _depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
316*c217d954SCole Faust     _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, info);
317*c217d954SCole Faust 
318*c217d954SCole Faust     if(_is_nchw)
319*c217d954SCole Faust     {
320*c217d954SCole Faust         _permute_output = std::make_unique<cpu::CpuPermute>();
321*c217d954SCole Faust         _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U));
322*c217d954SCole Faust         output_perm->set_data_layout(DataLayout::NHWC);
323*c217d954SCole Faust     }
324*c217d954SCole Faust 
325*c217d954SCole Faust     //Configure Activation Layer
326*c217d954SCole Faust     _is_activationlayer_enabled = info.act_info.enabled();
327*c217d954SCole Faust     if(_is_activationlayer_enabled)
328*c217d954SCole Faust     {
329*c217d954SCole Faust         _activationlayer_function = std::make_unique<cpu::CpuActivation>();
330*c217d954SCole Faust         _activationlayer_function->configure(dst, nullptr, info.act_info);
331*c217d954SCole Faust     }
332*c217d954SCole Faust }
333*c217d954SCole Faust 
validate(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)334*c217d954SCole Faust Status CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
335*c217d954SCole Faust                                                                const ConvolutionInfo &info)
336*c217d954SCole Faust {
337*c217d954SCole Faust     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
338*c217d954SCole Faust     if(src->data_layout() == DataLayout::NCHW)
339*c217d954SCole Faust     {
340*c217d954SCole Faust         TensorShape permuted_input_shape   = src->tensor_shape();
341*c217d954SCole Faust         TensorShape permuted_weights_shape = weights->tensor_shape();
342*c217d954SCole Faust         TensorShape permuted_output_shape  = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info);
343*c217d954SCole Faust         permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
344*c217d954SCole Faust         permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
345*c217d954SCole Faust         permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
346*c217d954SCole Faust 
347*c217d954SCole Faust         const TensorInfo permuted_input   = TensorInfo(src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
348*c217d954SCole Faust         const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
349*c217d954SCole Faust         const TensorInfo permuted_output  = TensorInfo(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
350*c217d954SCole Faust 
351*c217d954SCole Faust         ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(src, &permuted_input, PermutationVector(2U, 0U, 1U)));
352*c217d954SCole Faust         ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
353*c217d954SCole Faust         ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(&permuted_output, dst, PermutationVector(1U, 2U, 0U)));
354*c217d954SCole Faust 
355*c217d954SCole Faust         ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, info));
356*c217d954SCole Faust     }
357*c217d954SCole Faust     else
358*c217d954SCole Faust     {
359*c217d954SCole Faust         ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(src, weights, biases, dst, info));
360*c217d954SCole Faust     }
361*c217d954SCole Faust 
362*c217d954SCole Faust     // Validate Activation Layer
363*c217d954SCole Faust     if(info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info))
364*c217d954SCole Faust     {
365*c217d954SCole Faust         ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info));
366*c217d954SCole Faust     }
367*c217d954SCole Faust 
368*c217d954SCole Faust     return Status{};
369*c217d954SCole Faust }
370*c217d954SCole Faust 
run(ITensorPack & tensors)371*c217d954SCole Faust void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::run(ITensorPack &tensors)
372*c217d954SCole Faust {
373*c217d954SCole Faust     auto src     = tensors.get_const_tensor(TensorType::ACL_SRC_0);
374*c217d954SCole Faust     auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
375*c217d954SCole Faust     auto biases  = tensors.get_const_tensor(TensorType::ACL_SRC_2);
376*c217d954SCole Faust     auto dst     = tensors.get_tensor(TensorType::ACL_DST_0);
377*c217d954SCole Faust 
378*c217d954SCole Faust     if(_is_nchw)
379*c217d954SCole Faust     {
380*c217d954SCole Faust         prepare(tensors);
381*c217d954SCole Faust         auto src_perm     = tensors.get_tensor(TensorType::ACL_INT_0);
382*c217d954SCole Faust         auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
383*c217d954SCole Faust         auto dst_perm     = tensors.get_tensor(TensorType::ACL_INT_2);
384*c217d954SCole Faust 
385*c217d954SCole Faust         ITensorPack pack;
386*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC, src);
387*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_DST, src_perm);
388*c217d954SCole Faust         _permute_input->run(pack);
389*c217d954SCole Faust 
390*c217d954SCole Faust         ITensorPack pack_depth;
391*c217d954SCole Faust         pack_depth.add_const_tensor(TensorType::ACL_SRC_0, src_perm);
392*c217d954SCole Faust         pack_depth.add_const_tensor(TensorType::ACL_SRC_1, weights_perm);
393*c217d954SCole Faust         pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
394*c217d954SCole Faust         pack_depth.add_tensor(TensorType::ACL_DST, dst_perm);
395*c217d954SCole Faust         NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
396*c217d954SCole Faust     }
397*c217d954SCole Faust     else
398*c217d954SCole Faust     {
399*c217d954SCole Faust         ITensorPack pack_depth;
400*c217d954SCole Faust         pack_depth.add_tensor(TensorType::ACL_SRC_0, src);
401*c217d954SCole Faust         pack_depth.add_tensor(TensorType::ACL_SRC_1, weights);
402*c217d954SCole Faust         pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
403*c217d954SCole Faust         pack_depth.add_tensor(TensorType::ACL_DST, dst);
404*c217d954SCole Faust         NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
405*c217d954SCole Faust     }
406*c217d954SCole Faust 
407*c217d954SCole Faust     if(_is_nchw)
408*c217d954SCole Faust     {
409*c217d954SCole Faust         ITensorPack pack;
410*c217d954SCole Faust         auto        dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
411*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC, dst_perm);
412*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_DST, dst);
413*c217d954SCole Faust         _permute_output->run(pack);
414*c217d954SCole Faust     }
415*c217d954SCole Faust 
416*c217d954SCole Faust     if(_is_activationlayer_enabled)
417*c217d954SCole Faust     {
418*c217d954SCole Faust         ITensorPack pack;
419*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC, dst);
420*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_DST, dst);
421*c217d954SCole Faust         _activationlayer_function->run(pack);
422*c217d954SCole Faust     }
423*c217d954SCole Faust }
424*c217d954SCole Faust 
prepare(ITensorPack & tensors)425*c217d954SCole Faust void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::prepare(ITensorPack &tensors)
426*c217d954SCole Faust {
427*c217d954SCole Faust     if(!_is_prepared)
428*c217d954SCole Faust     {
429*c217d954SCole Faust         auto weights      = tensors.get_const_tensor(TensorType::ACL_SRC_1);
430*c217d954SCole Faust         auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
431*c217d954SCole Faust 
432*c217d954SCole Faust         ARM_COMPUTE_ERROR_ON(!weights->is_used());
433*c217d954SCole Faust 
434*c217d954SCole Faust         ITensorPack pack;
435*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_SRC, weights);
436*c217d954SCole Faust         pack.add_tensor(TensorType::ACL_DST, weights_perm);
437*c217d954SCole Faust 
438*c217d954SCole Faust         _permute_weights->run(pack);
439*c217d954SCole Faust         weights->mark_as_unused();
440*c217d954SCole Faust         _is_prepared = true;
441*c217d954SCole Faust     }
442*c217d954SCole Faust }
443*c217d954SCole Faust 
configure(ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,ITensorInfo * dst,const ConvolutionInfo & info)444*c217d954SCole Faust void CpuDepthwiseConv2d::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
445*c217d954SCole Faust {
446*c217d954SCole Faust     ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, info);
447*c217d954SCole Faust 
448*c217d954SCole Faust     _depth_conv_func = get_depthwiseconvolution_function(src, weights, (biases != nullptr) ? biases : nullptr, dst, info);
449*c217d954SCole Faust     switch(_depth_conv_func)
450*c217d954SCole Faust     {
451*c217d954SCole Faust         case DepthwiseConvolutionFunction::OPTIMIZED:
452*c217d954SCole Faust             _func_optimized.configure(src, weights, biases, dst, info);
453*c217d954SCole Faust             break;
454*c217d954SCole Faust         case DepthwiseConvolutionFunction::GENERIC:
455*c217d954SCole Faust             _func_generic.configure(src, weights, biases, dst, info);
456*c217d954SCole Faust             break;
457*c217d954SCole Faust         default:
458*c217d954SCole Faust             ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
459*c217d954SCole Faust     }
460*c217d954SCole Faust }
461*c217d954SCole Faust 
validate(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)462*c217d954SCole Faust Status CpuDepthwiseConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
463*c217d954SCole Faust {
464*c217d954SCole Faust     DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(src, weights, biases, dst, info);
465*c217d954SCole Faust     switch(depth_conv_func)
466*c217d954SCole Faust     {
467*c217d954SCole Faust         case DepthwiseConvolutionFunction::OPTIMIZED:
468*c217d954SCole Faust             return CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info);
469*c217d954SCole Faust             break;
470*c217d954SCole Faust         case DepthwiseConvolutionFunction::GENERIC:
471*c217d954SCole Faust             return CpuDepthwiseConv2dGeneric::validate(src, weights, biases, dst, info);
472*c217d954SCole Faust             break;
473*c217d954SCole Faust         default:
474*c217d954SCole Faust             ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
475*c217d954SCole Faust     }
476*c217d954SCole Faust }
477*c217d954SCole Faust 
get_depthwiseconvolution_function(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)478*c217d954SCole Faust DepthwiseConvolutionFunction CpuDepthwiseConv2d::get_depthwiseconvolution_function(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
479*c217d954SCole Faust                                                                                    const ConvolutionInfo &info)
480*c217d954SCole Faust {
481*c217d954SCole Faust     if(bool(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info)))
482*c217d954SCole Faust     {
483*c217d954SCole Faust         return DepthwiseConvolutionFunction::OPTIMIZED;
484*c217d954SCole Faust     }
485*c217d954SCole Faust     else
486*c217d954SCole Faust     {
487*c217d954SCole Faust         return DepthwiseConvolutionFunction::GENERIC;
488*c217d954SCole Faust     }
489*c217d954SCole Faust }
490*c217d954SCole Faust 
run(ITensorPack & tensors)491*c217d954SCole Faust void CpuDepthwiseConv2d::run(ITensorPack &tensors)
492*c217d954SCole Faust {
493*c217d954SCole Faust     switch(_depth_conv_func)
494*c217d954SCole Faust     {
495*c217d954SCole Faust         case DepthwiseConvolutionFunction::OPTIMIZED:
496*c217d954SCole Faust             _func_optimized.run(tensors);
497*c217d954SCole Faust             break;
498*c217d954SCole Faust         case DepthwiseConvolutionFunction::GENERIC:
499*c217d954SCole Faust             _func_generic.run(tensors);
500*c217d954SCole Faust             break;
501*c217d954SCole Faust         default:
502*c217d954SCole Faust             ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
503*c217d954SCole Faust     }
504*c217d954SCole Faust }
505*c217d954SCole Faust 
prepare(ITensorPack & tensors)506*c217d954SCole Faust void CpuDepthwiseConv2d::prepare(ITensorPack &tensors)
507*c217d954SCole Faust {
508*c217d954SCole Faust     switch(_depth_conv_func)
509*c217d954SCole Faust     {
510*c217d954SCole Faust         case DepthwiseConvolutionFunction::OPTIMIZED:
511*c217d954SCole Faust             _func_optimized.prepare(tensors);
512*c217d954SCole Faust             break;
513*c217d954SCole Faust         case DepthwiseConvolutionFunction::GENERIC:
514*c217d954SCole Faust             _func_generic.prepare(tensors);
515*c217d954SCole Faust             break;
516*c217d954SCole Faust         default:
517*c217d954SCole Faust             ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
518*c217d954SCole Faust     }
519*c217d954SCole Faust }
520*c217d954SCole Faust } // namespace cpu
521*c217d954SCole Faust } // namespace arm_compute
522