xref: /aosp_15_r20/external/ComputeLibrary/src/graph/backends/CL/CLFunctionsFactory.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2018-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/graph/backends/CL/CLFunctionFactory.h"
25 
26 #include "arm_compute/graph/Graph.h"
27 #include "arm_compute/graph/GraphContext.h"
28 #include "arm_compute/graph/backends/FunctionHelpers.h"
29 #include "arm_compute/runtime/CL/CLFunctions.h"
30 #include "arm_compute/runtime/CPP/CPPFunctions.h"
31 #include "src/core/CL/CLKernels.h"
32 #include "support/Cast.h"
33 
34 using namespace arm_compute::utils::cast;
35 
36 namespace arm_compute
37 {
38 namespace graph
39 {
40 namespace backends
41 {
42 /** Target specific information structure used to pass information to the layer templates */
43 struct CLTargetInfo
44 {
45     using TensorType         = arm_compute::ICLTensor;
46     using SrcTensorType      = const arm_compute::ICLTensor;
47     using TensorConcreteType = CLTensor;
48     static Target TargetType;
49 };
50 
51 Target CLTargetInfo::TargetType = Target::CL;
52 
53 /** Collection of CL convolution functions */
54 struct CLConvolutionLayerFunctions
55 {
56     using GenericConvolutionLayer  = CLConvolutionLayer;
57     using GEMMConvolutionLayer     = CLGEMMConvolutionLayer;
58     using DirectConvolutionLayer   = CLDirectConvolutionLayer;
59     using WinogradConvolutionLayer = CLWinogradConvolutionLayer;
60 };
61 
62 /** Collection of CL element-wise functions */
63 struct CLEltwiseFunctions
64 {
65     using Addition       = CLArithmeticAddition;
66     using Subtraction    = CLArithmeticSubtraction;
67     using Multiplication = CLPixelWiseMultiplication;
68     using Maximum        = CLElementwiseMax;
69     using Division       = CLArithmeticDivision;
70 };
71 
72 /** Collection of CL unary element-wise functions */
73 struct CLUnaryEltwiseFunctions
74 {
75     using Exp = CLExpLayer;
76 };
77 
78 /** Function and tensor types to be used inside a CL fused convolution/batch normalization layer */
79 struct CLFusedLayerTypes
80 {
81     using ConvolutionLayer          = CLConvolutionLayer;
82     using DepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer;
83     using FuseBatchNormalization    = CLFuseBatchNormalization;
84     using GEMMConvolutionLayer      = CLGEMMConvolutionLayer;
85 };
86 
87 /** Wrapper for the CPP Function in the OpenCL backend **/
88 class CPPWrapperFunction : public IFunction
89 {
90 public:
91     /* Default constructor */
CPPWrapperFunction()92     CPPWrapperFunction()
93         : _tensors(), _func(nullptr)
94     {
95     }
96 
run()97     void run() override
98     {
99         for(auto &tensor : _tensors)
100         {
101             tensor->map(CLScheduler::get().queue());
102         }
103         _func->run();
104 
105         for(auto &tensor : _tensors)
106         {
107             tensor->unmap(CLScheduler::get().queue());
108         }
109     }
110 
register_tensor(ICLTensor * tensor)111     void register_tensor(ICLTensor *tensor)
112     {
113         _tensors.push_back(tensor);
114     }
115 
register_function(std::unique_ptr<IFunction> function)116     void register_function(std::unique_ptr<IFunction> function)
117     {
118         _func = std::move(function);
119     }
120 
121 private:
122     std::vector<arm_compute::ICLTensor *> _tensors;
123     std::unique_ptr<IFunction>            _func;
124 };
125 
126 namespace detail
127 {
128 // Specialized functions
129 template <>
create_detection_output_layer(DetectionOutputLayerNode & node)130 std::unique_ptr<IFunction> create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(DetectionOutputLayerNode &node)
131 {
132     validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
133 
134     // Extract IO and info
135     CLTargetInfo::TensorType      *input0      = get_backing_tensor<CLTargetInfo>(node.input(0));
136     CLTargetInfo::TensorType      *input1      = get_backing_tensor<CLTargetInfo>(node.input(1));
137     CLTargetInfo::TensorType      *input2      = get_backing_tensor<CLTargetInfo>(node.input(2));
138     CLTargetInfo::TensorType      *output      = get_backing_tensor<CLTargetInfo>(node.output(0));
139     const DetectionOutputLayerInfo detect_info = node.detection_output_info();
140 
141     ARM_COMPUTE_ERROR_ON(input0 == nullptr);
142     ARM_COMPUTE_ERROR_ON(input1 == nullptr);
143     ARM_COMPUTE_ERROR_ON(input2 == nullptr);
144     ARM_COMPUTE_ERROR_ON(output == nullptr);
145 
146     // Create and configure function
147     auto func = std::make_unique<CPPDetectionOutputLayer>();
148     func->configure(input0, input1, input2, output, detect_info);
149 
150     // Log info
151     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
152                                << node.name()
153                                << " Type: " << node.type()
154                                << " Target: " << CLTargetInfo::TargetType
155                                << " Data Type: " << input0->info()->data_type()
156                                << " Input0 shape: " << input0->info()->tensor_shape()
157                                << " Input1 shape: " << input1->info()->tensor_shape()
158                                << " Input2 shape: " << input2->info()->tensor_shape()
159                                << " Output shape: " << output->info()->tensor_shape()
160                                << " DetectionOutputLayer info: " << detect_info
161                                << std::endl);
162 
163     auto wrap_function = std::make_unique<CPPWrapperFunction>();
164 
165     wrap_function->register_function(std::move(func));
166     wrap_function->register_tensor(input0);
167     wrap_function->register_tensor(input1);
168     wrap_function->register_tensor(input2);
169     wrap_function->register_tensor(output);
170 
171     return std::move(wrap_function);
172 }
173 template <>
create_detection_post_process_layer(DetectionPostProcessLayerNode & node)174 std::unique_ptr<IFunction> create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(DetectionPostProcessLayerNode &node)
175 {
176     validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 4 /* expected outputs */);
177 
178     // Extract IO and info
179     CLTargetInfo::TensorType           *input0      = get_backing_tensor<CLTargetInfo>(node.input(0));
180     CLTargetInfo::TensorType           *input1      = get_backing_tensor<CLTargetInfo>(node.input(1));
181     CLTargetInfo::TensorType           *input2      = get_backing_tensor<CLTargetInfo>(node.input(2));
182     CLTargetInfo::TensorType           *output0     = get_backing_tensor<CLTargetInfo>(node.output(0));
183     CLTargetInfo::TensorType           *output1     = get_backing_tensor<CLTargetInfo>(node.output(1));
184     CLTargetInfo::TensorType           *output2     = get_backing_tensor<CLTargetInfo>(node.output(2));
185     CLTargetInfo::TensorType           *output3     = get_backing_tensor<CLTargetInfo>(node.output(3));
186     const DetectionPostProcessLayerInfo detect_info = node.detection_post_process_info();
187 
188     ARM_COMPUTE_ERROR_ON(input0 == nullptr);
189     ARM_COMPUTE_ERROR_ON(input1 == nullptr);
190     ARM_COMPUTE_ERROR_ON(input2 == nullptr);
191     ARM_COMPUTE_ERROR_ON(output0 == nullptr);
192     ARM_COMPUTE_ERROR_ON(output1 == nullptr);
193     ARM_COMPUTE_ERROR_ON(output2 == nullptr);
194     ARM_COMPUTE_ERROR_ON(output3 == nullptr);
195 
196     // Create and configure function
197     auto func = std::make_unique<CPPDetectionPostProcessLayer>();
198     func->configure(input0, input1, input2, output0, output1, output2, output3, detect_info);
199 
200     // Log info
201     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
202                                << node.name()
203                                << " Type: " << node.type()
204                                << " Target: " << CLTargetInfo::TargetType
205                                << " Data Type: " << input0->info()->data_type()
206                                << " Input0 shape: " << input0->info()->tensor_shape()
207                                << " Input1 shape: " << input1->info()->tensor_shape()
208                                << " Input2 shape: " << input2->info()->tensor_shape()
209                                << " Output0 shape: " << output0->info()->tensor_shape()
210                                << " Output1 shape: " << output1->info()->tensor_shape()
211                                << " Output2 shape: " << output2->info()->tensor_shape()
212                                << " Output3 shape: " << output3->info()->tensor_shape()
213                                << " DetectionPostProcessLayer info: " << detect_info
214                                << std::endl);
215 
216     auto wrap_function = std::make_unique<CPPWrapperFunction>();
217 
218     wrap_function->register_function(std::move(func));
219     wrap_function->register_tensor(input0);
220     wrap_function->register_tensor(input1);
221     wrap_function->register_tensor(input2);
222     wrap_function->register_tensor(output0);
223     wrap_function->register_tensor(output1);
224     wrap_function->register_tensor(output2);
225     wrap_function->register_tensor(output3);
226 
227     return std::move(wrap_function);
228 }
229 } // namespace detail
230 
create(INode * node,GraphContext & ctx)231 std::unique_ptr<IFunction> CLFunctionFactory::create(INode *node, GraphContext &ctx)
232 {
233     if(node == nullptr)
234     {
235         return nullptr;
236     }
237 
238     NodeType type = node->type();
239     switch(type)
240     {
241         case NodeType::ActivationLayer:
242             return detail::create_activation_layer<CLActivationLayer, CLTargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
243         case NodeType::ArgMinMaxLayer:
244             return detail::create_arg_min_max_layer<CLArgMinMaxLayer, CLTargetInfo>(*polymorphic_downcast<ArgMinMaxLayerNode *>(node));
245         case NodeType::BatchNormalizationLayer:
246             return detail::create_batch_normalization_layer<CLBatchNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
247         case NodeType::BoundingBoxTransformLayer:
248             return detail::create_bounding_box_transform_layer<CLBoundingBoxTransform, CLTargetInfo>(*polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
249         case NodeType::ChannelShuffleLayer:
250             return detail::create_channel_shuffle_layer<CLChannelShuffleLayer, CLTargetInfo>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
251         case NodeType::ConvolutionLayer:
252             return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
253         case NodeType::DeconvolutionLayer:
254             return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
255         case NodeType::ConcatenateLayer:
256             return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
257         case NodeType::DepthToSpaceLayer:
258             return detail::create_depth_to_space_layer<CLDepthToSpaceLayer, CLTargetInfo>(*polymorphic_downcast<DepthToSpaceLayerNode *>(node));
259         case NodeType::DepthwiseConvolutionLayer:
260             return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
261         case NodeType::DequantizationLayer:
262             return detail::create_dequantization_layer<CLDequantizationLayer, CLTargetInfo>(*polymorphic_downcast<DequantizationLayerNode *>(node));
263         case NodeType::DetectionOutputLayer:
264             return detail::create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
265         case NodeType::DetectionPostProcessLayer:
266             return detail::create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
267         case NodeType::EltwiseLayer:
268             return detail::create_eltwise_layer<CLEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
269         case NodeType::UnaryEltwiseLayer:
270             return detail::create_unary_eltwise_layer<CLUnaryEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
271         case NodeType::FlattenLayer:
272             return detail::create_flatten_layer<CLFlattenLayer, CLTargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
273         case NodeType::FullyConnectedLayer:
274             return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
275         case NodeType::FusedConvolutionBatchNormalizationLayer:
276             return detail::create_fused_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
277         case NodeType::FusedConvolutionWithPostOp:
278             return detail::create_fused_convolution_with_post_op<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedConvolutionWithPostOpNode *>(node), ctx);
279         case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer:
280             return detail::create_fused_depthwise_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
281         case NodeType::GenerateProposalsLayer:
282             return detail::create_generate_proposals_layer<CLGenerateProposalsLayer, CLTargetInfo>(*polymorphic_downcast<GenerateProposalsLayerNode *>(node), ctx);
283         case NodeType::L2NormalizeLayer:
284             return detail::create_l2_normalize_layer<CLL2NormalizeLayer, CLTargetInfo>(*polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
285         case NodeType::NormalizationLayer:
286             return detail::create_normalization_layer<CLNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
287         case NodeType::NormalizePlanarYUVLayer:
288             return detail::create_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer, CLTargetInfo>(*polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
289         case NodeType::PadLayer:
290             return detail::create_pad_layer<CLPadLayer, CLTargetInfo>(*polymorphic_downcast<PadLayerNode *>(node));
291         case NodeType::PermuteLayer:
292             return detail::create_permute_layer<CLPermute, CLTargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
293         case NodeType::PoolingLayer:
294             return detail::create_pooling_layer<CLPoolingLayer, CLTargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
295         case NodeType::PReluLayer:
296             return detail::create_prelu_layer<CLPReluLayer, CLTargetInfo>(*polymorphic_downcast<PReluLayerNode *>(node));
297         case NodeType::PrintLayer:
298             return detail::create_print_layer<CLTargetInfo>(*polymorphic_downcast<PrintLayerNode *>(node));
299         case NodeType::PriorBoxLayer:
300             return detail::create_priorbox_layer<CLPriorBoxLayer, CLTargetInfo>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
301         case NodeType::QuantizationLayer:
302             return detail::create_quantization_layer<CLQuantizationLayer, CLTargetInfo>(*polymorphic_downcast<QuantizationLayerNode *>(node));
303         case NodeType::ReductionOperationLayer:
304             return detail::create_reduction_operation_layer<CLReductionOperation, CLTargetInfo>(*polymorphic_downcast<ReductionLayerNode *>(node), ctx);
305         case NodeType::ReorgLayer:
306             return detail::create_reorg_layer<CLReorgLayer, CLTargetInfo>(*polymorphic_downcast<ReorgLayerNode *>(node));
307         case NodeType::ReshapeLayer:
308             return detail::create_reshape_layer<CLReshapeLayer, CLTargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
309         case NodeType::ResizeLayer:
310             return detail::create_resize_layer<CLScale, CLTargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
311         case NodeType::ROIAlignLayer:
312             return detail::create_roi_align_layer<CLROIAlignLayer, CLTargetInfo>(*polymorphic_downcast<ROIAlignLayerNode *>(node));
313         case NodeType::SliceLayer:
314             return detail::create_slice_layer<CLSlice, CLTargetInfo>(*polymorphic_downcast<SliceLayerNode *>(node));
315         case NodeType::SoftmaxLayer:
316             return detail::create_softmax_layer<CLSoftmaxLayer, CLTargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
317         case NodeType::StackLayer:
318             return detail::create_stack_layer<CLStackLayer, CLTargetInfo>(*polymorphic_downcast<StackLayerNode *>(node));
319         case NodeType::StridedSliceLayer:
320             return detail::create_strided_slice_layer<CLStridedSlice, CLTargetInfo>(*polymorphic_downcast<StridedSliceLayerNode *>(node));
321         case NodeType::FusedConvolutionBatchNormalizationLayerWithPostOpsLayer:
322             return detail::create_fused_convolution_batch_normalization_with_post_op<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationWithPostOpsNode *>(node), ctx);
323         default:
324             return nullptr;
325     }
326 }
327 } // namespace backends
328 } // namespace graph
329 } // namespace arm_compute
330