1 /*
2 * Copyright (c) 2017-2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h"
25
26 #include "arm_compute/core/Utils.h"
27 #include "arm_compute/core/Validate.h"
28 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
29 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
30 #include "arm_compute/runtime/CL/CLScheduler.h"
31 #include "src/core/CL/ICLKernel.h"
32 #include "src/gpu/cl/IClOperator.h"
33 #include "src/gpu/cl/operators/ClTransposedConvolution.h"
34
35 #include "src/common/utils/Log.h"
36
37 #include <cmath>
38 #include <memory>
39 #include <tuple>
40
41 using namespace arm_compute;
42 using namespace arm_compute::misc::shape_calculator;
43
44 struct CLDeconvolutionLayer::Impl
45 {
46 const ICLTensor *src{ nullptr };
47 const ICLTensor *weights{ nullptr };
48 const ICLTensor *biases{ nullptr };
49 ICLTensor *dst{ nullptr };
50 std::unique_ptr<opencl::IClOperator> op{ nullptr };
51 };
52
53 CLDeconvolutionLayer::~CLDeconvolutionLayer() = default;
54
CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)55 CLDeconvolutionLayer::CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
56 : _memory_manager(std::move(memory_manager)), _function(), _impl(std::make_unique<Impl>())
57 {
58 }
59
configure(ICLTensor * input,ICLTensor * weights,const ICLTensor * bias,ICLTensor * output,const PadStrideInfo & deconv_info,const WeightsInfo & weights_info)60 void CLDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
61 const WeightsInfo &weights_info)
62 {
63 configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, deconv_info, weights_info);
64 }
65
configure(const CLCompileContext & compile_context,ICLTensor * input,ICLTensor * weights,const ICLTensor * bias,ICLTensor * output,const PadStrideInfo & deconv_info,const WeightsInfo & weights_info)66 void CLDeconvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
67 const WeightsInfo &weights_info)
68 {
69 ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
70 ARM_COMPUTE_LOG_PARAMS(input, weights, bias, output, deconv_info, weights_info);
71
72 switch(CLDeconvolutionLayer::get_deconvolution_method(input->info(), weights->info(), nullptr, output->info(), deconv_info, weights_info))
73 {
74 case DeconvolutionMethod::DIRECT:
75 {
76 auto op = std::make_unique<opencl::ClTransposedConvolution>();
77 op->configure(compile_context, input->info(), weights->info(), bias != nullptr ? bias->info() : nullptr, output->info(), deconv_info);
78
79 _impl->src = input;
80 _impl->weights = weights;
81 _impl->biases = bias;
82 _impl->dst = output;
83
84 _impl->op = std::move(op);
85 break;
86 }
87 case DeconvolutionMethod::UPSCALE_CONV2D:
88 {
89 auto f = std::make_unique<CLDirectDeconvolutionLayer>();
90 f->configure(compile_context, input, weights, bias, output, deconv_info, weights_info);
91 _function = std::move(f);
92 break;
93 }
94 case DeconvolutionMethod::GEMM:
95 {
96 auto f = std::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
97 f->configure(compile_context, input, weights, bias, output, deconv_info);
98 _function = std::move(f);
99 break;
100 }
101 default:
102 ARM_COMPUTE_ERROR("Not supported.");
103 break;
104 }
105 }
106
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * bias,ITensorInfo * output,const PadStrideInfo & deconv_info,const WeightsInfo & weights_info)107 Status CLDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &deconv_info,
108 const WeightsInfo &weights_info)
109 {
110 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
111 switch(CLDeconvolutionLayer::get_deconvolution_method(input, weights, bias, output, deconv_info, weights_info))
112 {
113 case DeconvolutionMethod::DIRECT:
114 {
115 // Validate transposed convolution operator
116 ARM_COMPUTE_RETURN_ON_ERROR(opencl::ClTransposedConvolution::validate(input, weights, bias, output, deconv_info));
117 break;
118 }
119 case DeconvolutionMethod::UPSCALE_CONV2D:
120 {
121 // Validate direct convolution layer
122 ARM_COMPUTE_RETURN_ON_ERROR(CLDirectDeconvolutionLayer::validate(input, weights, bias, output, deconv_info, weights_info));
123 break;
124 }
125 case DeconvolutionMethod::GEMM:
126 {
127 // Validate gemm-based convolution layer
128 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
129 break;
130 }
131 default:
132 ARM_COMPUTE_ERROR("Not supported.");
133 break;
134 }
135
136 return Status{};
137 }
138
get_deconvolution_method(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * bias,ITensorInfo * output,const PadStrideInfo & deconv_info,const WeightsInfo & weights_info)139 DeconvolutionMethod CLDeconvolutionLayer::get_deconvolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &deconv_info,
140 const WeightsInfo &weights_info)
141 {
142 ARM_COMPUTE_UNUSED(output, bias, weights_info);
143
144 if(is_data_type_quantized_per_channel(weights->data_type()))
145 {
146 return DeconvolutionMethod::UPSCALE_CONV2D;
147 }
148
149 const DataLayout data_layout = input->data_layout();
150
151 const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
152 const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
153 const size_t idx_n = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
154 const size_t ofm = weights->tensor_shape()[idx_n];
155
156 if(weights->dimension(idx_w) != deconv_info.stride().first || weights->dimension(idx_h) != deconv_info.stride().second)
157 {
158 if(input->data_layout() == DataLayout::NHWC && ofm <= 16)
159 {
160 return DeconvolutionMethod::DIRECT;
161 }
162 else
163 {
164 return DeconvolutionMethod::UPSCALE_CONV2D;
165 }
166 }
167
168 return DeconvolutionMethod::GEMM;
169 }
170
run()171 void CLDeconvolutionLayer::run()
172 {
173 prepare();
174
175 if(_impl->op != nullptr)
176 {
177 // Optimized Operator will be used
178 ITensorPack pack;
179
180 pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
181 pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
182 pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
183 pack.add_tensor(TensorType::ACL_DST, _impl->dst);
184
185 _impl->op->run(pack);
186 }
187 else
188 {
189 _function->run();
190 }
191 }
192
prepare()193 void CLDeconvolutionLayer::prepare()
194 {
195 if(_impl->op == nullptr)
196 {
197 _function->prepare();
198 }
199 }
200