xref: /aosp_15_r20/external/armnn/samples/common/include/ArmnnUtils/ArmnnNetworkExecutor.hpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include "Types.hpp"
9 
10 #include "armnn/ArmNN.hpp"
11 #include "armnnTfLiteParser/ITfLiteParser.hpp"
12 #include "armnnUtils/DataLayoutIndexed.hpp"
13 #include <armnn/Logging.hpp>
14 #include "Profiling.hpp"
15 
16 #include <string>
17 #include <vector>
18 
19 namespace common
20 {
21 /**
22 * @brief Used to load in a network through ArmNN and run inference on it against a given backend.
23 *
24 */
25 template <typename Tout>
26 class ArmnnNetworkExecutor
27 {
28 private:
29     armnn::IRuntimePtr m_Runtime;
30     armnn::NetworkId m_NetId{};
31     mutable InferenceResults<Tout> m_OutputBuffer;
32     armnn::InputTensors     m_InputTensors;
33     armnn::OutputTensors    m_OutputTensors;
34     std::vector<armnnTfLiteParser::BindingPointInfo> m_outputBindingInfo;
35     Profiling m_profiling;
36     std::vector<std::string> m_outputLayerNamesList;
37 
38     armnnTfLiteParser::BindingPointInfo m_inputBindingInfo;
39 
40     void PrepareTensors(const void* inputData, const size_t dataBytes);
41 
42     template <typename Enumeration>
log_as_int(Enumeration value)43     auto log_as_int(Enumeration value)
44     -> typename std::underlying_type<Enumeration>::type
45     {
46         return static_cast<typename std::underlying_type<Enumeration>::type>(value);
47     }
48 
49 public:
50     ArmnnNetworkExecutor() = delete;
51 
52     /**
53     * @brief Initializes the network with the given input data. Parsed through TfLiteParser and optimized for a
54     *        given backend.
55     *
56     * Note that the output layers names order in m_outputLayerNamesList affects the order of the feature vectors
57     * in output of the Run method.
58     *
59     *       * @param[in] modelPath - Relative path to the model file
60     *       * @param[in] backends - The list of preferred backends to run inference on
61     */
62     ArmnnNetworkExecutor(std::string& modelPath,
63                          std::vector<armnn::BackendId>& backends,
64                          bool isProfilingEnabled = false);
65 
66     /**
67     * @brief Returns the aspect ratio of the associated model in the order of width, height.
68     */
69     Size GetImageAspectRatio();
70 
71     armnn::DataType GetInputDataType() const;
72 
73     float GetQuantizationScale();
74 
75     int GetQuantizationOffset();
76 
77     float GetOutputQuantizationScale(int tensorIndex);
78 
79     int GetOutputQuantizationOffset(int tensorIndex);
80 
81     /**
82     * @brief Runs inference on the provided input data, and stores the results in the provided InferenceResults object.
83     *
84     * @param[in] inputData - input frame data
85     * @param[in] dataBytes - input data size in bytes
86     * @param[out] results - Vector of DetectionResult objects used to store the output result.
87     */
88     bool Run(const void* inputData, const size_t dataBytes, common::InferenceResults<Tout>& outResults);
89 
90 };
91 
92 template <typename Tout>
ArmnnNetworkExecutor(std::string & modelPath,std::vector<armnn::BackendId> & preferredBackends,bool isProfilingEnabled)93 ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
94                                            std::vector<armnn::BackendId>& preferredBackends,
95                                            bool isProfilingEnabled):
96         m_profiling(isProfilingEnabled),
97         m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions()))
98 {
99     // Import the TensorFlow lite model.
100     m_profiling.ProfilingStart();
101     armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create();
102     armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
103 
104     std::vector<std::string> inputNames = parser->GetSubgraphInputTensorNames(0);
105 
106     m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]);
107 
108     m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0);
109 
110     std::vector<armnn::BindingPointInfo> outputBindings;
111     for(const std::string& name : m_outputLayerNamesList)
112     {
113         m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name)));
114     }
115     std::vector<std::string> errorMessages;
116     // optimize the network.
117     armnn::IOptimizedNetworkPtr optNet = Optimize(*network,
118                                                   preferredBackends,
119                                                   m_Runtime->GetDeviceSpec(),
120                                                   armnn::OptimizerOptions(),
121                                                   armnn::Optional<std::vector<std::string>&>(errorMessages));
122 
123     if (!optNet)
124     {
125         const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"};
126         ARMNN_LOG(error) << errorMessage;
127         throw armnn::Exception(errorMessage);
128     }
129 
130     // Load the optimized network onto the m_Runtime device
131     std::string errorMessage;
132     if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage))
133     {
134         ARMNN_LOG(error) << errorMessage;
135         throw armnn::Exception(errorMessage);
136     }
137 
138     //pre-allocate memory for output (the size of it never changes)
139     for (int it = 0; it < m_outputLayerNamesList.size(); ++it)
140     {
141         const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType();
142         const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape();
143 
144         std::vector<Tout> oneLayerOutResult;
145         oneLayerOutResult.resize(tensorShape.GetNumElements(), 0);
146         m_OutputBuffer.emplace_back(oneLayerOutResult);
147 
148         // Make ArmNN output tensors
149         m_OutputTensors.reserve(m_OutputBuffer.size());
150         for (size_t it = 0; it < m_OutputBuffer.size(); ++it)
151         {
152             m_OutputTensors.emplace_back(std::make_pair(
153                     m_outputBindingInfo[it].first,
154                     armnn::Tensor(m_outputBindingInfo[it].second,
155                                   m_OutputBuffer.at(it).data())
156             ));
157         }
158     }
159     m_profiling.ProfilingStopAndPrintUs("ArmnnNetworkExecutor time");
160 }
161 
162 template <typename Tout>
GetInputDataType() const163 armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
164 {
165     return m_inputBindingInfo.second.GetDataType();
166 }
167 
168 template <typename Tout>
PrepareTensors(const void * inputData,const size_t dataBytes)169 void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void* inputData, const size_t dataBytes)
170 {
171     assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes);
172     m_InputTensors.clear();
173     m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}};
174 }
175 
176 template <typename Tout>
Run(const void * inputData,const size_t dataBytes,InferenceResults<Tout> & outResults)177 bool ArmnnNetworkExecutor<Tout>::Run(const void* inputData, const size_t dataBytes, InferenceResults<Tout>& outResults)
178 {
179     m_profiling.ProfilingStart();
180     /* Prepare tensors if they are not ready */
181     ARMNN_LOG(debug) << "Preparing tensors...";
182     this->PrepareTensors(inputData, dataBytes);
183     ARMNN_LOG(trace) << "Running inference...";
184 
185     armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors);
186 
187     std::stringstream inferenceFinished;
188     inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n";
189 
190     ARMNN_LOG(trace) << inferenceFinished.str();
191 
192     if (ret == armnn::Status::Failure)
193     {
194         ARMNN_LOG(error) << "Failed to perform inference.";
195     }
196 
197     outResults.reserve(m_outputLayerNamesList.size());
198     outResults = m_OutputBuffer;
199     m_profiling.ProfilingStopAndPrintUs("Total inference time");
200     return (armnn::Status::Success == ret);
201 }
202 
203 template <typename Tout>
GetQuantizationScale()204 float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
205 {
206     return this->m_inputBindingInfo.second.GetQuantizationScale();
207 }
208 
209 template <typename Tout>
GetQuantizationOffset()210 int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
211 {
212     return this->m_inputBindingInfo.second.GetQuantizationOffset();
213 }
214 
215 template <typename Tout>
GetOutputQuantizationScale(int tensorIndex)216 float ArmnnNetworkExecutor<Tout>::GetOutputQuantizationScale(int tensorIndex)
217 {
218     assert(this->m_outputLayerNamesList.size() > tensorIndex);
219     return this->m_outputBindingInfo[tensorIndex].second.GetQuantizationScale();
220 }
221 
222 template <typename Tout>
GetOutputQuantizationOffset(int tensorIndex)223 int ArmnnNetworkExecutor<Tout>::GetOutputQuantizationOffset(int tensorIndex)
224 {
225     assert(this->m_outputLayerNamesList.size() > tensorIndex);
226     return this->m_outputBindingInfo[tensorIndex].second.GetQuantizationOffset();
227 }
228 
229 template <typename Tout>
GetImageAspectRatio()230 Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
231 {
232     const auto shape = m_inputBindingInfo.second.GetShape();
233     assert(shape.GetNumDimensions() == 4);
234     armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC);
235     return Size(shape[nhwc.GetWidthIndex()],
236                 shape[nhwc.GetHeightIndex()]);
237 }
238 }// namespace common