1*c217d954SCole Faust /*
2*c217d954SCole Faust * Copyright (c) 2021-2022 Arm Limited.
3*c217d954SCole Faust *
4*c217d954SCole Faust * SPDX-License-Identifier: MIT
5*c217d954SCole Faust *
6*c217d954SCole Faust * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust *
13*c217d954SCole Faust * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust * copies or substantial portions of the Software.
15*c217d954SCole Faust *
16*c217d954SCole Faust * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust * SOFTWARE.
23*c217d954SCole Faust */
24*c217d954SCole Faust #include "src/cpu/operators/CpuDepthwiseConv2d.h"
25*c217d954SCole Faust
26*c217d954SCole Faust #include "arm_compute/core/TensorInfo.h"
27*c217d954SCole Faust #include "arm_compute/core/Validate.h"
28*c217d954SCole Faust #include "arm_compute/core/utils/misc/InfoHelpers.h"
29*c217d954SCole Faust #include "arm_compute/core/utils/misc/ShapeCalculator.h"
30*c217d954SCole Faust #include "arm_compute/runtime/NEON/NEScheduler.h"
31*c217d954SCole Faust #include "src/common/utils/Log.h"
32*c217d954SCole Faust #include "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h"
33*c217d954SCole Faust
34*c217d954SCole Faust namespace arm_compute
35*c217d954SCole Faust {
36*c217d954SCole Faust namespace cpu
37*c217d954SCole Faust {
38*c217d954SCole Faust namespace
39*c217d954SCole Faust {
validate_arguments_optimized(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)40*c217d954SCole Faust Status validate_arguments_optimized(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
41*c217d954SCole Faust {
42*c217d954SCole Faust ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
43*c217d954SCole Faust ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
44*c217d954SCole Faust if(!is_data_type_quantized_per_channel(weights->data_type()))
45*c217d954SCole Faust {
46*c217d954SCole Faust ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights);
47*c217d954SCole Faust }
48*c217d954SCole Faust ARM_COMPUTE_RETURN_ERROR_ON(src->data_layout() == DataLayout::UNKNOWN);
49*c217d954SCole Faust ARM_COMPUTE_RETURN_ERROR_ON(info.dilation.x() < 1 || info.dilation.y() < 1);
50*c217d954SCole Faust const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
51*c217d954SCole Faust const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
52*c217d954SCole Faust ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (info.dilation.x() - 1) > src->dimension(idx_w) + info.pad_stride_info.pad_left() +
53*c217d954SCole Faust info.pad_stride_info.pad_right());
54*c217d954SCole Faust ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (info.dilation.y() - 1) > src->dimension(idx_h) + info.pad_stride_info.pad_top() +
55*c217d954SCole Faust info.pad_stride_info.pad_bottom());
56*c217d954SCole Faust
57*c217d954SCole Faust if(biases != nullptr)
58*c217d954SCole Faust {
59*c217d954SCole Faust const unsigned int channel_idx = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL);
60*c217d954SCole Faust ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
61*c217d954SCole Faust ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
62*c217d954SCole Faust }
63*c217d954SCole Faust
64*c217d954SCole Faust ARM_COMPUTE_RETURN_ON_ERROR(CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, biases, dst, info));
65*c217d954SCole Faust
66*c217d954SCole Faust // Validate Activation Layer
67*c217d954SCole Faust if(info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info))
68*c217d954SCole Faust {
69*c217d954SCole Faust ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info));
70*c217d954SCole Faust }
71*c217d954SCole Faust return Status{};
72*c217d954SCole Faust }
73*c217d954SCole Faust } // namespace
74*c217d954SCole Faust
configure(ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,ITensorInfo * dst,const ConvolutionInfo & info)75*c217d954SCole Faust void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorInfo *src,
76*c217d954SCole Faust const ITensorInfo *weights,
77*c217d954SCole Faust const ITensorInfo *biases,
78*c217d954SCole Faust ITensorInfo *dst,
79*c217d954SCole Faust const ConvolutionInfo &info)
80*c217d954SCole Faust {
81*c217d954SCole Faust ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
82*c217d954SCole Faust // Perform validation step
83*c217d954SCole Faust ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, (biases == nullptr) ? nullptr : biases,
84*c217d954SCole Faust dst, info));
85*c217d954SCole Faust
86*c217d954SCole Faust _is_quantized = is_data_type_quantized_asymmetric(src->data_type());
87*c217d954SCole Faust _has_bias = biases != nullptr;
88*c217d954SCole Faust _is_nchw = src->data_layout() == DataLayout::NCHW;
89*c217d954SCole Faust _permute = _is_nchw;
90*c217d954SCole Faust _is_prepared = false;
91*c217d954SCole Faust _are_weights_const = weights->are_values_constant();
92*c217d954SCole Faust
93*c217d954SCole Faust // Configure pipeline
94*c217d954SCole Faust _is_activationlayer_enabled = info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info);
95*c217d954SCole Faust
96*c217d954SCole Faust _dwc_optimized_func = std::make_unique<CpuDepthwiseConv2dAssemblyDispatch>();
97*c217d954SCole Faust if(_is_nchw)
98*c217d954SCole Faust {
99*c217d954SCole Faust _permute_input = std::make_unique<cpu::CpuPermute>();
100*c217d954SCole Faust _permute_weights = std::make_unique<cpu::CpuPermute>();
101*c217d954SCole Faust _permute_output = std::make_unique<cpu::CpuPermute>();
102*c217d954SCole Faust
103*c217d954SCole Faust auto input_perm = std::make_unique<TensorInfo>();
104*c217d954SCole Faust auto weights_perm = std::make_unique<TensorInfo>();
105*c217d954SCole Faust auto output_perm = std::make_unique<TensorInfo>();
106*c217d954SCole Faust
107*c217d954SCole Faust // Configure the function to transform the input tensor from NCHW -> NHWC
108*c217d954SCole Faust _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U));
109*c217d954SCole Faust input_perm->set_data_layout(DataLayout::NHWC);
110*c217d954SCole Faust
111*c217d954SCole Faust // Configure the function to transform the weights tensor from IHW -> HWI
112*c217d954SCole Faust _permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
113*c217d954SCole Faust weights_perm->set_data_layout(DataLayout::NHWC);
114*c217d954SCole Faust
115*c217d954SCole Faust output_perm->set_data_layout(DataLayout::NHWC);
116*c217d954SCole Faust output_perm->set_quantization_info(dst->quantization_info());
117*c217d954SCole Faust
118*c217d954SCole Faust // Configure optimized depthwise
119*c217d954SCole Faust _dwc_optimized_func->configure(input_perm.get(), weights_perm.get(), biases, output_perm.get(), info);
120*c217d954SCole Faust
121*c217d954SCole Faust // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
122*c217d954SCole Faust output_perm->set_data_layout(DataLayout::NHWC);
123*c217d954SCole Faust _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U));
124*c217d954SCole Faust }
125*c217d954SCole Faust else
126*c217d954SCole Faust {
127*c217d954SCole Faust _dwc_optimized_func->configure(src, weights, biases, dst, info);
128*c217d954SCole Faust }
129*c217d954SCole Faust
130*c217d954SCole Faust // Configure activation
131*c217d954SCole Faust if(_is_activationlayer_enabled)
132*c217d954SCole Faust {
133*c217d954SCole Faust _activationlayer_function = std::make_unique<cpu::CpuActivation>();
134*c217d954SCole Faust _activationlayer_function->configure(dst, nullptr, info.act_info);
135*c217d954SCole Faust }
136*c217d954SCole Faust }
137*c217d954SCole Faust
validate(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)138*c217d954SCole Faust Status CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::validate(const ITensorInfo *src,
139*c217d954SCole Faust const ITensorInfo *weights,
140*c217d954SCole Faust const ITensorInfo *biases,
141*c217d954SCole Faust const ITensorInfo *dst,
142*c217d954SCole Faust const ConvolutionInfo &info)
143*c217d954SCole Faust {
144*c217d954SCole Faust return validate_arguments_optimized(src, weights, biases, dst, info);
145*c217d954SCole Faust }
146*c217d954SCole Faust
run(ITensorPack & tensors)147*c217d954SCole Faust void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &tensors)
148*c217d954SCole Faust {
149*c217d954SCole Faust ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
150*c217d954SCole Faust prepare(tensors);
151*c217d954SCole Faust
152*c217d954SCole Faust auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
153*c217d954SCole Faust auto dst = tensors.get_tensor(TensorType::ACL_DST_0);
154*c217d954SCole Faust auto workspace = tensors.get_tensor(TensorType::ACL_INT_3);
155*c217d954SCole Faust auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
156*c217d954SCole Faust
157*c217d954SCole Faust // Permute input
158*c217d954SCole Faust if(_permute)
159*c217d954SCole Faust {
160*c217d954SCole Faust ITensorPack pack;
161*c217d954SCole Faust auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
162*c217d954SCole Faust auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
163*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC, src);
164*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST, src_perm);
165*c217d954SCole Faust _permute_input->run(pack);
166*c217d954SCole Faust }
167*c217d954SCole Faust
168*c217d954SCole Faust // Run assembly function
169*c217d954SCole Faust if(_is_nchw)
170*c217d954SCole Faust {
171*c217d954SCole Faust auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
172*c217d954SCole Faust auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
173*c217d954SCole Faust auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
174*c217d954SCole Faust
175*c217d954SCole Faust ITensorPack pack;
176*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_0, src_perm);
177*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_1, weights_perm);
178*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_2, bias);
179*c217d954SCole Faust pack.add_tensor(TensorType::ACL_INT_0, workspace);
180*c217d954SCole Faust pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
181*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST, dst_perm);
182*c217d954SCole Faust _dwc_optimized_func->run(pack);
183*c217d954SCole Faust }
184*c217d954SCole Faust else
185*c217d954SCole Faust {
186*c217d954SCole Faust auto src = tensors.get_tensor(TensorType::ACL_SRC_0);
187*c217d954SCole Faust auto weights = tensors.get_tensor(TensorType::ACL_SRC_1);
188*c217d954SCole Faust auto dst = tensors.get_tensor(TensorType::ACL_DST);
189*c217d954SCole Faust
190*c217d954SCole Faust ITensorPack pack;
191*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_0, src);
192*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_1, weights);
193*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_2, bias);
194*c217d954SCole Faust pack.add_tensor(TensorType::ACL_INT_0, workspace);
195*c217d954SCole Faust pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
196*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST, dst);
197*c217d954SCole Faust _dwc_optimized_func->run(pack);
198*c217d954SCole Faust }
199*c217d954SCole Faust
200*c217d954SCole Faust // Permute output
201*c217d954SCole Faust if(_is_nchw)
202*c217d954SCole Faust {
203*c217d954SCole Faust ITensorPack pack;
204*c217d954SCole Faust auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
205*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC, dst_perm);
206*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST, dst);
207*c217d954SCole Faust _permute_output->run(pack);
208*c217d954SCole Faust }
209*c217d954SCole Faust
210*c217d954SCole Faust // Run activation
211*c217d954SCole Faust if(_is_activationlayer_enabled)
212*c217d954SCole Faust {
213*c217d954SCole Faust ITensorPack pack;
214*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC, dst);
215*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST, dst);
216*c217d954SCole Faust _activationlayer_function->run(pack);
217*c217d954SCole Faust }
218*c217d954SCole Faust }
219*c217d954SCole Faust
prepare(ITensorPack & tensors)220*c217d954SCole Faust void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors)
221*c217d954SCole Faust {
222*c217d954SCole Faust // if weights are not constant then we need to repack so that weights
223*c217d954SCole Faust // can be updated in-place
224*c217d954SCole Faust if(!_are_weights_const)
225*c217d954SCole Faust {
226*c217d954SCole Faust auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
227*c217d954SCole Faust auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
228*c217d954SCole Faust auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
229*c217d954SCole Faust
230*c217d954SCole Faust ITensorPack pack_opt;
231*c217d954SCole Faust pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
232*c217d954SCole Faust pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
233*c217d954SCole Faust pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
234*c217d954SCole Faust
235*c217d954SCole Faust // Prepare optimized function
236*c217d954SCole Faust _dwc_optimized_func->prepare(pack_opt);
237*c217d954SCole Faust
238*c217d954SCole Faust return;
239*c217d954SCole Faust }
240*c217d954SCole Faust
241*c217d954SCole Faust if(!_is_prepared)
242*c217d954SCole Faust {
243*c217d954SCole Faust auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
244*c217d954SCole Faust auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
245*c217d954SCole Faust auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
246*c217d954SCole Faust
247*c217d954SCole Faust // Permute weights
248*c217d954SCole Faust if(_permute)
249*c217d954SCole Faust {
250*c217d954SCole Faust auto permuted_weights = tensors.get_tensor(TensorType::ACL_INT_1);
251*c217d954SCole Faust
252*c217d954SCole Faust ITensorPack pack;
253*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC, weights);
254*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST, permuted_weights);
255*c217d954SCole Faust _permute_weights->run(pack);
256*c217d954SCole Faust
257*c217d954SCole Faust weights->mark_as_unused();
258*c217d954SCole Faust
259*c217d954SCole Faust ITensorPack pack_opt;
260*c217d954SCole Faust pack_opt.add_const_tensor(TensorType::ACL_SRC_1, permuted_weights);
261*c217d954SCole Faust pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
262*c217d954SCole Faust pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
263*c217d954SCole Faust
264*c217d954SCole Faust // Prepare optimized function
265*c217d954SCole Faust _dwc_optimized_func->prepare(pack_opt);
266*c217d954SCole Faust }
267*c217d954SCole Faust else
268*c217d954SCole Faust {
269*c217d954SCole Faust ITensorPack pack_opt;
270*c217d954SCole Faust pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
271*c217d954SCole Faust pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
272*c217d954SCole Faust pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
273*c217d954SCole Faust
274*c217d954SCole Faust // Prepare optimized function
275*c217d954SCole Faust _dwc_optimized_func->prepare(pack_opt);
276*c217d954SCole Faust }
277*c217d954SCole Faust
278*c217d954SCole Faust _is_prepared = true;
279*c217d954SCole Faust }
280*c217d954SCole Faust }
281*c217d954SCole Faust
configure(ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,ITensorInfo * dst,const ConvolutionInfo & info)282*c217d954SCole Faust void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
283*c217d954SCole Faust {
284*c217d954SCole Faust ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
285*c217d954SCole Faust ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2d::validate(src, weights, (biases == nullptr) ? nullptr : biases,
286*c217d954SCole Faust dst, info));
287*c217d954SCole Faust
288*c217d954SCole Faust _is_nchw = src->data_layout() == DataLayout::NCHW;
289*c217d954SCole Faust _is_prepared = !_is_nchw;
290*c217d954SCole Faust
291*c217d954SCole Faust ITensorInfo *input_to_use = src;
292*c217d954SCole Faust const ITensorInfo *weights_to_use = weights;
293*c217d954SCole Faust ITensorInfo *output_to_use = dst;
294*c217d954SCole Faust
295*c217d954SCole Faust auto input_perm = std::make_unique<TensorInfo>();
296*c217d954SCole Faust auto weights_perm = std::make_unique<TensorInfo>();
297*c217d954SCole Faust auto output_perm = std::make_unique<TensorInfo>(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
298*c217d954SCole Faust
299*c217d954SCole Faust if(_is_nchw)
300*c217d954SCole Faust {
301*c217d954SCole Faust _permute_input = std::make_unique<cpu::CpuPermute>();
302*c217d954SCole Faust _permute_weights = std::make_unique<cpu::CpuPermute>();
303*c217d954SCole Faust
304*c217d954SCole Faust _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U));
305*c217d954SCole Faust input_perm->set_data_layout(DataLayout::NHWC);
306*c217d954SCole Faust input_to_use = input_perm.get();
307*c217d954SCole Faust
308*c217d954SCole Faust _permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
309*c217d954SCole Faust weights_perm->set_data_layout(DataLayout::NHWC);
310*c217d954SCole Faust weights_to_use = weights_perm.get();
311*c217d954SCole Faust
312*c217d954SCole Faust output_to_use = output_perm.get();
313*c217d954SCole Faust }
314*c217d954SCole Faust
315*c217d954SCole Faust _depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
316*c217d954SCole Faust _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, info);
317*c217d954SCole Faust
318*c217d954SCole Faust if(_is_nchw)
319*c217d954SCole Faust {
320*c217d954SCole Faust _permute_output = std::make_unique<cpu::CpuPermute>();
321*c217d954SCole Faust _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U));
322*c217d954SCole Faust output_perm->set_data_layout(DataLayout::NHWC);
323*c217d954SCole Faust }
324*c217d954SCole Faust
325*c217d954SCole Faust //Configure Activation Layer
326*c217d954SCole Faust _is_activationlayer_enabled = info.act_info.enabled();
327*c217d954SCole Faust if(_is_activationlayer_enabled)
328*c217d954SCole Faust {
329*c217d954SCole Faust _activationlayer_function = std::make_unique<cpu::CpuActivation>();
330*c217d954SCole Faust _activationlayer_function->configure(dst, nullptr, info.act_info);
331*c217d954SCole Faust }
332*c217d954SCole Faust }
333*c217d954SCole Faust
validate(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)334*c217d954SCole Faust Status CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
335*c217d954SCole Faust const ConvolutionInfo &info)
336*c217d954SCole Faust {
337*c217d954SCole Faust ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
338*c217d954SCole Faust if(src->data_layout() == DataLayout::NCHW)
339*c217d954SCole Faust {
340*c217d954SCole Faust TensorShape permuted_input_shape = src->tensor_shape();
341*c217d954SCole Faust TensorShape permuted_weights_shape = weights->tensor_shape();
342*c217d954SCole Faust TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info);
343*c217d954SCole Faust permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
344*c217d954SCole Faust permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
345*c217d954SCole Faust permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
346*c217d954SCole Faust
347*c217d954SCole Faust const TensorInfo permuted_input = TensorInfo(src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
348*c217d954SCole Faust const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
349*c217d954SCole Faust const TensorInfo permuted_output = TensorInfo(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
350*c217d954SCole Faust
351*c217d954SCole Faust ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(src, &permuted_input, PermutationVector(2U, 0U, 1U)));
352*c217d954SCole Faust ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
353*c217d954SCole Faust ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(&permuted_output, dst, PermutationVector(1U, 2U, 0U)));
354*c217d954SCole Faust
355*c217d954SCole Faust ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, info));
356*c217d954SCole Faust }
357*c217d954SCole Faust else
358*c217d954SCole Faust {
359*c217d954SCole Faust ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(src, weights, biases, dst, info));
360*c217d954SCole Faust }
361*c217d954SCole Faust
362*c217d954SCole Faust // Validate Activation Layer
363*c217d954SCole Faust if(info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info))
364*c217d954SCole Faust {
365*c217d954SCole Faust ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info));
366*c217d954SCole Faust }
367*c217d954SCole Faust
368*c217d954SCole Faust return Status{};
369*c217d954SCole Faust }
370*c217d954SCole Faust
run(ITensorPack & tensors)371*c217d954SCole Faust void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::run(ITensorPack &tensors)
372*c217d954SCole Faust {
373*c217d954SCole Faust auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
374*c217d954SCole Faust auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
375*c217d954SCole Faust auto biases = tensors.get_const_tensor(TensorType::ACL_SRC_2);
376*c217d954SCole Faust auto dst = tensors.get_tensor(TensorType::ACL_DST_0);
377*c217d954SCole Faust
378*c217d954SCole Faust if(_is_nchw)
379*c217d954SCole Faust {
380*c217d954SCole Faust prepare(tensors);
381*c217d954SCole Faust auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
382*c217d954SCole Faust auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
383*c217d954SCole Faust auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
384*c217d954SCole Faust
385*c217d954SCole Faust ITensorPack pack;
386*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC, src);
387*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST, src_perm);
388*c217d954SCole Faust _permute_input->run(pack);
389*c217d954SCole Faust
390*c217d954SCole Faust ITensorPack pack_depth;
391*c217d954SCole Faust pack_depth.add_const_tensor(TensorType::ACL_SRC_0, src_perm);
392*c217d954SCole Faust pack_depth.add_const_tensor(TensorType::ACL_SRC_1, weights_perm);
393*c217d954SCole Faust pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
394*c217d954SCole Faust pack_depth.add_tensor(TensorType::ACL_DST, dst_perm);
395*c217d954SCole Faust NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
396*c217d954SCole Faust }
397*c217d954SCole Faust else
398*c217d954SCole Faust {
399*c217d954SCole Faust ITensorPack pack_depth;
400*c217d954SCole Faust pack_depth.add_tensor(TensorType::ACL_SRC_0, src);
401*c217d954SCole Faust pack_depth.add_tensor(TensorType::ACL_SRC_1, weights);
402*c217d954SCole Faust pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
403*c217d954SCole Faust pack_depth.add_tensor(TensorType::ACL_DST, dst);
404*c217d954SCole Faust NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
405*c217d954SCole Faust }
406*c217d954SCole Faust
407*c217d954SCole Faust if(_is_nchw)
408*c217d954SCole Faust {
409*c217d954SCole Faust ITensorPack pack;
410*c217d954SCole Faust auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
411*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC, dst_perm);
412*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST, dst);
413*c217d954SCole Faust _permute_output->run(pack);
414*c217d954SCole Faust }
415*c217d954SCole Faust
416*c217d954SCole Faust if(_is_activationlayer_enabled)
417*c217d954SCole Faust {
418*c217d954SCole Faust ITensorPack pack;
419*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC, dst);
420*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST, dst);
421*c217d954SCole Faust _activationlayer_function->run(pack);
422*c217d954SCole Faust }
423*c217d954SCole Faust }
424*c217d954SCole Faust
prepare(ITensorPack & tensors)425*c217d954SCole Faust void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::prepare(ITensorPack &tensors)
426*c217d954SCole Faust {
427*c217d954SCole Faust if(!_is_prepared)
428*c217d954SCole Faust {
429*c217d954SCole Faust auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
430*c217d954SCole Faust auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
431*c217d954SCole Faust
432*c217d954SCole Faust ARM_COMPUTE_ERROR_ON(!weights->is_used());
433*c217d954SCole Faust
434*c217d954SCole Faust ITensorPack pack;
435*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC, weights);
436*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST, weights_perm);
437*c217d954SCole Faust
438*c217d954SCole Faust _permute_weights->run(pack);
439*c217d954SCole Faust weights->mark_as_unused();
440*c217d954SCole Faust _is_prepared = true;
441*c217d954SCole Faust }
442*c217d954SCole Faust }
443*c217d954SCole Faust
configure(ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,ITensorInfo * dst,const ConvolutionInfo & info)444*c217d954SCole Faust void CpuDepthwiseConv2d::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
445*c217d954SCole Faust {
446*c217d954SCole Faust ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, info);
447*c217d954SCole Faust
448*c217d954SCole Faust _depth_conv_func = get_depthwiseconvolution_function(src, weights, (biases != nullptr) ? biases : nullptr, dst, info);
449*c217d954SCole Faust switch(_depth_conv_func)
450*c217d954SCole Faust {
451*c217d954SCole Faust case DepthwiseConvolutionFunction::OPTIMIZED:
452*c217d954SCole Faust _func_optimized.configure(src, weights, biases, dst, info);
453*c217d954SCole Faust break;
454*c217d954SCole Faust case DepthwiseConvolutionFunction::GENERIC:
455*c217d954SCole Faust _func_generic.configure(src, weights, biases, dst, info);
456*c217d954SCole Faust break;
457*c217d954SCole Faust default:
458*c217d954SCole Faust ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
459*c217d954SCole Faust }
460*c217d954SCole Faust }
461*c217d954SCole Faust
validate(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)462*c217d954SCole Faust Status CpuDepthwiseConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
463*c217d954SCole Faust {
464*c217d954SCole Faust DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(src, weights, biases, dst, info);
465*c217d954SCole Faust switch(depth_conv_func)
466*c217d954SCole Faust {
467*c217d954SCole Faust case DepthwiseConvolutionFunction::OPTIMIZED:
468*c217d954SCole Faust return CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info);
469*c217d954SCole Faust break;
470*c217d954SCole Faust case DepthwiseConvolutionFunction::GENERIC:
471*c217d954SCole Faust return CpuDepthwiseConv2dGeneric::validate(src, weights, biases, dst, info);
472*c217d954SCole Faust break;
473*c217d954SCole Faust default:
474*c217d954SCole Faust ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
475*c217d954SCole Faust }
476*c217d954SCole Faust }
477*c217d954SCole Faust
get_depthwiseconvolution_function(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)478*c217d954SCole Faust DepthwiseConvolutionFunction CpuDepthwiseConv2d::get_depthwiseconvolution_function(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
479*c217d954SCole Faust const ConvolutionInfo &info)
480*c217d954SCole Faust {
481*c217d954SCole Faust if(bool(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info)))
482*c217d954SCole Faust {
483*c217d954SCole Faust return DepthwiseConvolutionFunction::OPTIMIZED;
484*c217d954SCole Faust }
485*c217d954SCole Faust else
486*c217d954SCole Faust {
487*c217d954SCole Faust return DepthwiseConvolutionFunction::GENERIC;
488*c217d954SCole Faust }
489*c217d954SCole Faust }
490*c217d954SCole Faust
run(ITensorPack & tensors)491*c217d954SCole Faust void CpuDepthwiseConv2d::run(ITensorPack &tensors)
492*c217d954SCole Faust {
493*c217d954SCole Faust switch(_depth_conv_func)
494*c217d954SCole Faust {
495*c217d954SCole Faust case DepthwiseConvolutionFunction::OPTIMIZED:
496*c217d954SCole Faust _func_optimized.run(tensors);
497*c217d954SCole Faust break;
498*c217d954SCole Faust case DepthwiseConvolutionFunction::GENERIC:
499*c217d954SCole Faust _func_generic.run(tensors);
500*c217d954SCole Faust break;
501*c217d954SCole Faust default:
502*c217d954SCole Faust ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
503*c217d954SCole Faust }
504*c217d954SCole Faust }
505*c217d954SCole Faust
prepare(ITensorPack & tensors)506*c217d954SCole Faust void CpuDepthwiseConv2d::prepare(ITensorPack &tensors)
507*c217d954SCole Faust {
508*c217d954SCole Faust switch(_depth_conv_func)
509*c217d954SCole Faust {
510*c217d954SCole Faust case DepthwiseConvolutionFunction::OPTIMIZED:
511*c217d954SCole Faust _func_optimized.prepare(tensors);
512*c217d954SCole Faust break;
513*c217d954SCole Faust case DepthwiseConvolutionFunction::GENERIC:
514*c217d954SCole Faust _func_generic.prepare(tensors);
515*c217d954SCole Faust break;
516*c217d954SCole Faust default:
517*c217d954SCole Faust ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
518*c217d954SCole Faust }
519*c217d954SCole Faust }
520*c217d954SCole Faust } // namespace cpu
521*c217d954SCole Faust } // namespace arm_compute
522