xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.h"
16 
17 #include <memory>
18 #include <string>
19 #include <vector>
20 
21 #include "tensorflow/lite/builtin_ops.h"
22 #include "tensorflow/lite/c/builtin_op_data.h"
23 #include "tensorflow/lite/c/common.h"
24 #include "tensorflow/lite/context_util.h"
25 #include "tensorflow/lite/delegates/hexagon/hexagon_implementation.h"
26 #include "tensorflow/lite/delegates/hexagon/utils.h"
27 #include "tensorflow/lite/kernels/kernel_util.h"
28 
29 namespace tflite {
30 
31 namespace {
32 // Returns uint64 representing total cycles in 'perf_info' by
33 // combining lo and hi counters.
GetCycles(const hexagon_nn_perfinfo & perf_info)34 inline uint64_t GetCycles(const hexagon_nn_perfinfo& perf_info) {
35   uint64_t res = perf_info.counter_hi;
36   res <<= 32;
37   res |= perf_info.counter_lo;
38   return res;
39 }
40 }  // namespace
41 
ReportError(TfLiteContext * context,const std::string & msg)42 void HexagonDelegateKernel::ReportError(TfLiteContext* context,
43                                         const std::string& msg) {
44   PrintLog();
45   TF_LITE_KERNEL_LOG(context, "Failed: %s.", msg.c_str());
46 }
47 
Init(TfLiteContext * context,const TfLiteDelegateParams * params)48 TfLiteStatus HexagonDelegateKernel::Init(TfLiteContext* context,
49                                          const TfLiteDelegateParams* params) {
50   hexagon_nn_ = HexagonNNImplementation();
51   if (hexagon_nn_ == nullptr) {
52     TF_LITE_KERNEL_LOG(context, "Hexagon interface not available.");
53     return kTfLiteError;
54   }
55 
56   // Ensure Hexagon NNLib is ready to start working.
57   int error = hexagon_nn_->hexagon_nn_config();
58   if (error != 0) {
59     TF_LITE_KERNEL_LOG(context, "hexagon_nn_config failed. Error: %d", error);
60     return kTfLiteError;
61   }
62 
63   // Initialize an empty graph.
64   error = hexagon_nn_->hexagon_nn_init(&graph_id_);
65   if (error != 0) {
66     ReportError(context, "failed to init");
67     return kTfLiteError;
68   }
69   error =
70       hexagon_nn_->hexagon_nn_set_debug_level(graph_id_, params_.debug_level);
71   if (error != 0) {
72     TF_LITE_KERNEL_LOG(context, "Failed to set debug level, error: %d", error);
73     return kTfLiteError;
74   }
75   error = hexagon_nn_->hexagon_nn_set_powersave_level(params_.powersave_level);
76   if (error != 0) {
77     TF_LITE_KERNEL_LOG(context, "Failed to set powersave level, error %d",
78                        error);
79     return kTfLiteError;
80   }
81 
82   for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
83     nodes_.push_back(node_index);
84   }
85 
86   TF_LITE_ENSURE_STATUS(
87       BuildGraph(context, params->input_tensors, params->output_tensors));
88   return kTfLiteOk;
89 }
90 
Eval(TfLiteContext * context,TfLiteNode * node)91 TfLiteStatus HexagonDelegateKernel::Eval(TfLiteContext* context,
92                                          TfLiteNode* node) {
93   if (hexagon_nn_ == nullptr) {
94     TF_LITE_KERNEL_LOG(context, "Hexagon interface not available.");
95     return kTfLiteError;
96   }
97   // Allocate inputs.
98   std::vector<hexagon_nn_tensordef> input_tensors;
99   for (int input_idx = 0; input_idx < node->inputs->size; ++input_idx) {
100     const auto tensor_index = node->inputs->data[input_idx];
101     if (tensor_index == kTfLiteOptionalTensor) {
102       continue;
103     }
104     TfLiteTensor* tensor = &context->tensors[tensor_index];
105     // Const tensors should have been handled at delegation time..
106     if (tensor->allocation_type != kTfLiteMmapRo) {
107       char* data_ptr = tensor->data.raw;
108 
109       if (tensor->dims->size > 4) {
110         ReportError(context, "Only up to 4d tensor are supported.");
111         return kTfLiteError;
112       }
113       input_tensors.emplace_back();
114       auto& input_tensor = input_tensors.back();
115       input_tensor.data = reinterpret_cast<unsigned char*>(data_ptr);
116       input_tensor.dataLen = tensor->bytes;
117       input_tensor.data_valid_len = tensor->bytes;
118       TF_LITE_ENSURE_STATUS(
119           Get4DShape(&input_tensor.batches, &input_tensor.height,
120                      &input_tensor.width, &input_tensor.depth, tensor->dims));
121     }
122   }
123 
124   // Allocate outputs.
125   std::vector<hexagon_nn_tensordef> output_tensors;
126   for (auto tensor_index : TfLiteIntArrayView(node->outputs)) {
127     if (tensor_index == kTfLiteOptionalTensor) {
128       continue;
129     }
130     TfLiteTensor* tensor = &context->tensors[tensor_index];
131     if (tensor->allocation_type != kTfLiteMmapRo) {
132       if (tensor->dims->size > 4) {
133         ReportError(context, "Only up to 4d tensor are supported.");
134         return kTfLiteError;
135       }
136       output_tensors.emplace_back();
137       auto& output_tensor = output_tensors.back();
138       output_tensor.data = reinterpret_cast<unsigned char*>(tensor->data.raw);
139       output_tensor.dataLen = tensor->bytes;
140     }
141   }
142 
143   if (params_.print_graph_profile) {
144     hexagon_nn_->hexagon_nn_reset_perfinfo(graph_id_, 0);
145   }
146 
147   // Execute.
148   int error = hexagon_nn_->hexagon_nn_execute_new(
149       graph_id_, input_tensors.data(), input_tensors.size(),
150       output_tensors.data(), output_tensors.size());
151   if (error != 0) {
152     ReportError(context, "Failed to execute graph.");
153     return kTfLiteError;
154   }
155 
156   if (params_.print_graph_profile) {
157     PrintPerformanceData(reinterpret_cast<Profiler*>(context->profiler));
158   }
159   return kTfLiteOk;
160 }
161 
ResizeOutputTensors(TfLiteContext * context,TfLiteNode * node)162 TfLiteStatus HexagonDelegateKernel::ResizeOutputTensors(TfLiteContext* context,
163                                                         TfLiteNode* node) {
164   if (!params_.enable_dynamic_batch_size) return kTfLiteError;
165   int new_batch = -1;
166   for (int i = 0; i < params_.input_batch_dimensions->size; ++i) {
167     // If this input has no dynamic shape skip it.
168     if (params_.input_batch_dimensions->data[i] == -1) continue;
169     int input_tensor_index = node->inputs->data[i];
170     TfLiteTensor* input_tensor = &context->tensors[input_tensor_index];
171     new_batch =
172         input_tensor->dims->data[params_.input_batch_dimensions->data[i]];
173     break;
174   }
175   if (new_batch == -1) {
176     TF_LITE_KERNEL_LOG(context, "Invalid Batch size.");
177     return kTfLiteError;
178   }
179   for (int i = 0; i < node->outputs->size; ++i) {
180     // If this output has no dynamic shape skip it.
181     if (params_.output_batch_dimensions->data[i] == -1) continue;
182     int output_tensor_index = node->outputs->data[i];
183     TfLiteTensor* output_tensor = &context->tensors[output_tensor_index];
184     TfLiteIntArray* new_shape = TfLiteIntArrayCopy(output_tensor->dims);
185     new_shape->data[params_.output_batch_dimensions->data[i]] = new_batch;
186     TF_LITE_ENSURE_OK(context,
187                       context->ResizeTensor(context, output_tensor, new_shape));
188   }
189   return kTfLiteOk;
190 }
191 
Prepare(TfLiteContext * context,TfLiteNode * node)192 TfLiteStatus HexagonDelegateKernel::Prepare(TfLiteContext* context,
193                                             TfLiteNode* node) {
194   if (graph_prepared_) {
195     // If params_.enable_dynamic_batch_size = false, the delegate flags will
196     // cause the runtime to re-do delegation in case of input tensor resize.
197     // So here we can assume that input shapes remain the same, and return Ok.
198     if (!params_.enable_dynamic_batch_size) return kTfLiteOk;
199     // Graph already prepared, but we must resize TFLite output tensors
200     // based on the new input shape.
201     return ResizeOutputTensors(context, node);
202   }
203   if (hexagon_nn_ == nullptr) {
204     ReportError(context, "Hexagon interface not available. prepare");
205     return kTfLiteError;
206   }
207   int status = hexagon_nn_->hexagon_nn_prepare(graph_id_);
208   if (status != 0) {
209     ReportError(context, "Failed to prepare graph.\n");
210     return kTfLiteError;
211   }
212 
213   // Check input/output tensors.
214   std::vector<int> tensors;
215   for (auto tensor_index : TfLiteIntArrayView(node->inputs)) {
216     tensors.push_back(tensor_index);
217   }
218   for (auto tensor_index : TfLiteIntArrayView(node->outputs)) {
219     tensors.push_back(tensor_index);
220   }
221   for (auto tensor_index : tensors) {
222     if (tensor_index == kTfLiteOptionalTensor) {
223       continue;
224     }
225     TfLiteTensor* tensor = &context->tensors[tensor_index];
226     // Const tensors should be added as const nodes during graph construction.
227     if (tensor->allocation_type != kTfLiteMmapRo && tensor->dims->size > 4) {
228       ReportError(context, "Only up to 4d tensor are supported.");
229       return kTfLiteError;
230     }
231   }
232 
233   if (params_.print_graph_debug) {
234     PrintDebuggingGraph();
235   }
236 
237   // Mark graph as prepared, since we can't prepare it multiple times.
238   graph_prepared_ = true;
239 
240   return kTfLiteOk;
241 }
242 
BuildGraph(TfLiteContext * context,const TfLiteIntArray * input_tensors,const TfLiteIntArray * output_tensors)243 TfLiteStatus HexagonDelegateKernel::BuildGraph(
244     TfLiteContext* context, const TfLiteIntArray* input_tensors,
245     const TfLiteIntArray* output_tensors) {
246   builder_ = std::make_unique<delegates::hexagon::GraphBuilder>(
247       hexagon_nn_, context, graph_id_);
248   if (params_.enable_dynamic_batch_size) {
249     builder_->AddBatchSeqConfig(params_.max_batch_size,
250                                 params_.input_batch_dimensions,
251                                 params_.output_batch_dimensions);
252   }
253   // Add inputs to the graph.
254   TF_LITE_ENSURE_STATUS(builder_->AddInputTensors(input_tensors, context));
255 
256   // Add all ops.
257   TfLiteNode* node;
258   TfLiteRegistration* reg;
259   for (int node_index : nodes_) {
260     TF_LITE_ENSURE_STATUS(
261         context->GetNodeAndRegistration(context, node_index, &node, &reg));
262     // Const inputs needs to be added to the hexagon graph as const nodes.
263     // Adding them earlier here to the graph
264     // - Simplifies separate builders
265     // - Simplifies int8 vs uint8 cases, builders don't need to handle them.
266     for (int i = 0; i < node->inputs->size; ++i) {
267       const int tensor_id = node->inputs->data[i];
268       if (tensor_id == -1) continue;
269       const auto& input_tensor = context->tensors[tensor_id];
270       if (input_tensor.allocation_type == kTfLiteMmapRo) {
271         builder_->AddConstNodeWithData(
272             tensor_id, input_tensor,
273             /*int8_to_uint8*/ (input_tensor.type == kTfLiteInt8));
274       }
275     }
276     auto* op_builder =
277         builder_->AddNodeFromTfLiteOp(reg->builtin_code, node, node_index);
278     TF_LITE_ENSURE_STATUS(
279         op_builder->PopulateSubGraph(node->inputs, node->outputs, context));
280     TF_LITE_ENSURE_STATUS(op_builder->RegisterOutputs(node->outputs, context));
281   }
282 
283   // Add Outputs.
284   TF_LITE_ENSURE_STATUS(builder_->AddOutputTensors(output_tensors, context));
285 
286   builder_->Build();
287 
288   return kTfLiteOk;
289 }
290 
~HexagonDelegateKernel()291 HexagonDelegateKernel::~HexagonDelegateKernel() {
292   if (graph_id_ != -1) {
293     hexagon_nn_->hexagon_nn_teardown(graph_id_);
294   }
295 }
296 
PrintLog()297 void HexagonDelegateKernel::PrintLog() {
298   std::vector<unsigned char> buf(3000000);
299   time_t my_time = time(nullptr);
300   hexagon_nn_->hexagon_nn_getlog(graph_id_, buf.data(), buf.size());
301   printf("----------------\n");
302   printf("Timestamp: %s\n\n", ctime(&my_time));
303   printf("Log\n%s\n", buf.data());
304   printf("----------------\n");
305   fflush(stdout);
306 }
307 
PrintPerformanceData(Profiler * profiler)308 void HexagonDelegateKernel::PrintPerformanceData(Profiler* profiler) {
309   if (profiler == nullptr) {
310     return;
311   }
312   const int kMaxNodes = 2048;
313   const int kMaxNameLen = 100;
314   std::vector<hexagon_nn_perfinfo> perf_data(kMaxNodes);
315   std::vector<char> op_name(kMaxNameLen);
316   uint64_t counter = 0;
317   unsigned int num_nodes;
318   if (hexagon_nn_->hexagon_nn_get_perfinfo(graph_id_, perf_data.data(),
319                                            kMaxNodes, &num_nodes) != 0) {
320     printf("Failed fetching perf data.\n");
321     return;
322   }
323   for (int i = 0; i < num_nodes; i++) {
324     counter = GetCycles(perf_data[i]);
325     int op_type_id = builder_->GetOpTypeId(perf_data[i].node_id);
326     if (op_type_id >= 0 && hexagon_nn_->hexagon_nn_op_id_to_name(
327                                op_type_id, op_name.data(), kMaxNameLen) != 0) {
328       printf("Failed to fetch name for %u with type %d\n", perf_data[i].node_id,
329              op_type_id);
330       continue;
331     }
332     int node_id = builder_->GetTFLiteNodeID(perf_data[i].node_id);
333     if (node_id != -1 && op_type_id >= 0) {
334       profiler->AddEvent((op_type_id < 0 ? "" : op_name.data()),
335                          Profiler::EventType::OPERATOR_INVOKE_EVENT, counter,
336                          node_id);
337     }
338   }
339 }
340 
PrintDebuggingGraph()341 void HexagonDelegateKernel::PrintDebuggingGraph() {
342   const int kMaxBufLen = 100000;
343   std::vector<unsigned char> buf(kMaxBufLen);
344   if (hexagon_nn_->hexagon_nn_snpprint(graph_id_, buf.data(), kMaxBufLen) !=
345       0) {
346     printf("Error fetching graph debug details.\n");
347     return;
348   }
349   printf("------- Graph Debugging Start -------\n");
350   printf("%s\n", buf.data());
351   printf("------- Graph Debugging End -------\n");
352 }
353 
Teardown()354 void HexagonDelegateKernel::Teardown() {
355   auto* hexagon_nn = HexagonNNImplementation();
356   if (hexagon_nn != nullptr) {
357     hexagon_nn->hexagon_nn_global_teardown();
358   }
359 }
360 
InitState()361 void HexagonDelegateKernel::InitState() {
362   auto* hexagon_nn = HexagonNNImplementation();
363   if (hexagon_nn != nullptr) {
364     hexagon_nn->hexagon_nn_global_init();
365   }
366 }
367 }  // namespace tflite
368