xref: /aosp_15_r20/external/armnn/samples/ObjectDetection/include/ObjectDetectionPipeline.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include "ArmnnNetworkExecutor.hpp"
9 #include "YoloResultDecoder.hpp"
10 #include "SSDResultDecoder.hpp"
11 # include "ImageUtils.hpp"
12 
13 #include <opencv2/opencv.hpp>
14 
15 namespace od
16 {
17 /**
18  * Generic object detection pipeline with 3 steps: data pre-processing, inference execution and inference
19  * result post-processing.
20  *
21  */
22 class ObjDetectionPipeline {
23 public:
24 
25     /**
26      * Creates object detection pipeline with given network executor and decoder.
27      * @param executor - unique pointer to inference runner
28      * @param decoder - unique pointer to inference results decoder
29      */
30     ObjDetectionPipeline(std::unique_ptr<common::ArmnnNetworkExecutor<float>> executor,
31                          std::unique_ptr<IDetectionResultDecoder> decoder);
32 
33     /**
34      * @brief Standard image pre-processing implementation.
35      *
36      * Re-sizes an image keeping aspect ratio, pads if necessary to fit the network input layer dimensions.
37 
38      * @param[in] frame - input image, expected data type is uint8.
39      * @param[out] processed - output image, data type is preserved.
40      */
41     virtual void PreProcessing(const cv::Mat& frame, cv::Mat& processed);
42 
43     /**
44      * @brief Executes inference
45      *
46      * Calls inference runner provided during instance construction.
47      *
48      * @param[in] processed - input inference data. Data type should be aligned with input tensor.
49      * @param[out] result - raw floating point inference results.
50      */
51     virtual void Inference(const cv::Mat& processed, common::InferenceResults<float>& result);
52 
53     /**
54      * @brief Standard inference results post-processing implementation.
55      *
56      * Decodes inference results using decoder provided during construction.
57      *
58      * @param[in] inferenceResult - inference results to be decoded.
59      * @param[in] callback - a function to be called after successful inference results decoding.
60      */
61     virtual void PostProcessing(common::InferenceResults<float>& inferenceResult,
62                                 const std::function<void (DetectedObjects)>& callback);
63 
64 protected:
65     std::unique_ptr<common::ArmnnNetworkExecutor<float>> m_executor;
66     std::unique_ptr<IDetectionResultDecoder> m_decoder;
67     common::Size m_inputImageSize{};
68     cv::Mat m_processedFrame;
69 };
70 
71 /**
72  * Specific to Yolo v3 tiny object detection pipeline implementation.
73  */
74 class YoloV3Tiny: public ObjDetectionPipeline{
75 public:
76 
77     /**
78      * Constructs object detection pipeline for Yolo v3 tiny network.
79      *
80      * Network input is expected to be uint8 or fp32. Data range [0, 255].
81      * Network output is FP32.
82      *
83      * @param executor[in] - unique pointer to inference runner
84      * @param NMSThreshold[in] - non max suppression threshold for decoding step
85      * @param ClsThreshold[in] -  class probability threshold for decoding step
86      * @param ObjectThreshold[in] - detected object score threshold for decoding step
87      */
88     YoloV3Tiny(std::unique_ptr<common::ArmnnNetworkExecutor<float>> executor,
89                float NMSThreshold, float ClsThreshold, float ObjectThreshold);
90 
91     /**
92      * @brief Yolo v3 tiny image pre-processing implementation.
93      *
94      * On top of the standard pre-processing, converts input data type according to the network input tensor data type.
95      * Supported data types: uint8 and float32.
96      *
97      * @param[in] original - input image data
98      * @param[out] processed - image data ready to be used for inference.
99      */
100     void PreProcessing(const cv::Mat& original, cv::Mat& processed);
101 
102 };
103 
104 /**
105  * Specific to MobileNet SSD v1 object detection pipeline implementation.
106  */
107 class MobileNetSSDv1: public ObjDetectionPipeline {
108 
109 public:
110     /**
111      * Constructs object detection pipeline for MobileNet SSD network.
112      *
113      * Network input is expected to be uint8 or fp32. Data range [-1, 1].
114      * Network output is FP32.
115      *
116      * @param[in] - unique pointer to inference runner
117      * @paramp[in] objectThreshold - detected object score threshold for decoding step
118      */
119     MobileNetSSDv1(std::unique_ptr<common::ArmnnNetworkExecutor<float>> executor,
120                    float objectThreshold);
121 
122     /**
123      * @brief MobileNet SSD image pre-processing implementation.
124      *
125      * On top of the standard pre-processing, converts input data type according to the network input tensor data type
126      * and scales input data from [0, 255] to [-1, 1] for FP32 input.
127      *
128      * Supported input data types: uint8 and float32.
129      *
130      * @param[in] original - input image data
131      * @param processed[out] - image data ready to be used for inference.
132      */
133     void PreProcessing(const cv::Mat& original, cv::Mat& processed);
134 
135 };
136 
137 using IPipelinePtr = std::unique_ptr<od::ObjDetectionPipeline>;
138 
139 /**
140  * Constructs object detection pipeline based on configuration provided.
141  *
142  * @param[in] config - object detection pipeline configuration.
143  *
144  * @return unique pointer to object detection pipeline.
145  */
146 IPipelinePtr CreatePipeline(common::PipelineOptions& config);
147 
148 }// namespace od