1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/hexagon/hexagon_delegate_kernel.h"
16
17 #include <memory>
18 #include <string>
19 #include <vector>
20
21 #include "tensorflow/lite/builtin_ops.h"
22 #include "tensorflow/lite/c/builtin_op_data.h"
23 #include "tensorflow/lite/c/common.h"
24 #include "tensorflow/lite/context_util.h"
25 #include "tensorflow/lite/delegates/hexagon/hexagon_implementation.h"
26 #include "tensorflow/lite/delegates/hexagon/utils.h"
27 #include "tensorflow/lite/kernels/kernel_util.h"
28
29 namespace tflite {
30
31 namespace {
32 // Returns uint64 representing total cycles in 'perf_info' by
33 // combining lo and hi counters.
GetCycles(const hexagon_nn_perfinfo & perf_info)34 inline uint64_t GetCycles(const hexagon_nn_perfinfo& perf_info) {
35 uint64_t res = perf_info.counter_hi;
36 res <<= 32;
37 res |= perf_info.counter_lo;
38 return res;
39 }
40 } // namespace
41
ReportError(TfLiteContext * context,const std::string & msg)42 void HexagonDelegateKernel::ReportError(TfLiteContext* context,
43 const std::string& msg) {
44 PrintLog();
45 TF_LITE_KERNEL_LOG(context, "Failed: %s.", msg.c_str());
46 }
47
Init(TfLiteContext * context,const TfLiteDelegateParams * params)48 TfLiteStatus HexagonDelegateKernel::Init(TfLiteContext* context,
49 const TfLiteDelegateParams* params) {
50 hexagon_nn_ = HexagonNNImplementation();
51 if (hexagon_nn_ == nullptr) {
52 TF_LITE_KERNEL_LOG(context, "Hexagon interface not available.");
53 return kTfLiteError;
54 }
55
56 // Ensure Hexagon NNLib is ready to start working.
57 int error = hexagon_nn_->hexagon_nn_config();
58 if (error != 0) {
59 TF_LITE_KERNEL_LOG(context, "hexagon_nn_config failed. Error: %d", error);
60 return kTfLiteError;
61 }
62
63 // Initialize an empty graph.
64 error = hexagon_nn_->hexagon_nn_init(&graph_id_);
65 if (error != 0) {
66 ReportError(context, "failed to init");
67 return kTfLiteError;
68 }
69 error =
70 hexagon_nn_->hexagon_nn_set_debug_level(graph_id_, params_.debug_level);
71 if (error != 0) {
72 TF_LITE_KERNEL_LOG(context, "Failed to set debug level, error: %d", error);
73 return kTfLiteError;
74 }
75 error = hexagon_nn_->hexagon_nn_set_powersave_level(params_.powersave_level);
76 if (error != 0) {
77 TF_LITE_KERNEL_LOG(context, "Failed to set powersave level, error %d",
78 error);
79 return kTfLiteError;
80 }
81
82 for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
83 nodes_.push_back(node_index);
84 }
85
86 TF_LITE_ENSURE_STATUS(
87 BuildGraph(context, params->input_tensors, params->output_tensors));
88 return kTfLiteOk;
89 }
90
Eval(TfLiteContext * context,TfLiteNode * node)91 TfLiteStatus HexagonDelegateKernel::Eval(TfLiteContext* context,
92 TfLiteNode* node) {
93 if (hexagon_nn_ == nullptr) {
94 TF_LITE_KERNEL_LOG(context, "Hexagon interface not available.");
95 return kTfLiteError;
96 }
97 // Allocate inputs.
98 std::vector<hexagon_nn_tensordef> input_tensors;
99 for (int input_idx = 0; input_idx < node->inputs->size; ++input_idx) {
100 const auto tensor_index = node->inputs->data[input_idx];
101 if (tensor_index == kTfLiteOptionalTensor) {
102 continue;
103 }
104 TfLiteTensor* tensor = &context->tensors[tensor_index];
105 // Const tensors should have been handled at delegation time..
106 if (tensor->allocation_type != kTfLiteMmapRo) {
107 char* data_ptr = tensor->data.raw;
108
109 if (tensor->dims->size > 4) {
110 ReportError(context, "Only up to 4d tensor are supported.");
111 return kTfLiteError;
112 }
113 input_tensors.emplace_back();
114 auto& input_tensor = input_tensors.back();
115 input_tensor.data = reinterpret_cast<unsigned char*>(data_ptr);
116 input_tensor.dataLen = tensor->bytes;
117 input_tensor.data_valid_len = tensor->bytes;
118 TF_LITE_ENSURE_STATUS(
119 Get4DShape(&input_tensor.batches, &input_tensor.height,
120 &input_tensor.width, &input_tensor.depth, tensor->dims));
121 }
122 }
123
124 // Allocate outputs.
125 std::vector<hexagon_nn_tensordef> output_tensors;
126 for (auto tensor_index : TfLiteIntArrayView(node->outputs)) {
127 if (tensor_index == kTfLiteOptionalTensor) {
128 continue;
129 }
130 TfLiteTensor* tensor = &context->tensors[tensor_index];
131 if (tensor->allocation_type != kTfLiteMmapRo) {
132 if (tensor->dims->size > 4) {
133 ReportError(context, "Only up to 4d tensor are supported.");
134 return kTfLiteError;
135 }
136 output_tensors.emplace_back();
137 auto& output_tensor = output_tensors.back();
138 output_tensor.data = reinterpret_cast<unsigned char*>(tensor->data.raw);
139 output_tensor.dataLen = tensor->bytes;
140 }
141 }
142
143 if (params_.print_graph_profile) {
144 hexagon_nn_->hexagon_nn_reset_perfinfo(graph_id_, 0);
145 }
146
147 // Execute.
148 int error = hexagon_nn_->hexagon_nn_execute_new(
149 graph_id_, input_tensors.data(), input_tensors.size(),
150 output_tensors.data(), output_tensors.size());
151 if (error != 0) {
152 ReportError(context, "Failed to execute graph.");
153 return kTfLiteError;
154 }
155
156 if (params_.print_graph_profile) {
157 PrintPerformanceData(reinterpret_cast<Profiler*>(context->profiler));
158 }
159 return kTfLiteOk;
160 }
161
ResizeOutputTensors(TfLiteContext * context,TfLiteNode * node)162 TfLiteStatus HexagonDelegateKernel::ResizeOutputTensors(TfLiteContext* context,
163 TfLiteNode* node) {
164 if (!params_.enable_dynamic_batch_size) return kTfLiteError;
165 int new_batch = -1;
166 for (int i = 0; i < params_.input_batch_dimensions->size; ++i) {
167 // If this input has no dynamic shape skip it.
168 if (params_.input_batch_dimensions->data[i] == -1) continue;
169 int input_tensor_index = node->inputs->data[i];
170 TfLiteTensor* input_tensor = &context->tensors[input_tensor_index];
171 new_batch =
172 input_tensor->dims->data[params_.input_batch_dimensions->data[i]];
173 break;
174 }
175 if (new_batch == -1) {
176 TF_LITE_KERNEL_LOG(context, "Invalid Batch size.");
177 return kTfLiteError;
178 }
179 for (int i = 0; i < node->outputs->size; ++i) {
180 // If this output has no dynamic shape skip it.
181 if (params_.output_batch_dimensions->data[i] == -1) continue;
182 int output_tensor_index = node->outputs->data[i];
183 TfLiteTensor* output_tensor = &context->tensors[output_tensor_index];
184 TfLiteIntArray* new_shape = TfLiteIntArrayCopy(output_tensor->dims);
185 new_shape->data[params_.output_batch_dimensions->data[i]] = new_batch;
186 TF_LITE_ENSURE_OK(context,
187 context->ResizeTensor(context, output_tensor, new_shape));
188 }
189 return kTfLiteOk;
190 }
191
Prepare(TfLiteContext * context,TfLiteNode * node)192 TfLiteStatus HexagonDelegateKernel::Prepare(TfLiteContext* context,
193 TfLiteNode* node) {
194 if (graph_prepared_) {
195 // If params_.enable_dynamic_batch_size = false, the delegate flags will
196 // cause the runtime to re-do delegation in case of input tensor resize.
197 // So here we can assume that input shapes remain the same, and return Ok.
198 if (!params_.enable_dynamic_batch_size) return kTfLiteOk;
199 // Graph already prepared, but we must resize TFLite output tensors
200 // based on the new input shape.
201 return ResizeOutputTensors(context, node);
202 }
203 if (hexagon_nn_ == nullptr) {
204 ReportError(context, "Hexagon interface not available. prepare");
205 return kTfLiteError;
206 }
207 int status = hexagon_nn_->hexagon_nn_prepare(graph_id_);
208 if (status != 0) {
209 ReportError(context, "Failed to prepare graph.\n");
210 return kTfLiteError;
211 }
212
213 // Check input/output tensors.
214 std::vector<int> tensors;
215 for (auto tensor_index : TfLiteIntArrayView(node->inputs)) {
216 tensors.push_back(tensor_index);
217 }
218 for (auto tensor_index : TfLiteIntArrayView(node->outputs)) {
219 tensors.push_back(tensor_index);
220 }
221 for (auto tensor_index : tensors) {
222 if (tensor_index == kTfLiteOptionalTensor) {
223 continue;
224 }
225 TfLiteTensor* tensor = &context->tensors[tensor_index];
226 // Const tensors should be added as const nodes during graph construction.
227 if (tensor->allocation_type != kTfLiteMmapRo && tensor->dims->size > 4) {
228 ReportError(context, "Only up to 4d tensor are supported.");
229 return kTfLiteError;
230 }
231 }
232
233 if (params_.print_graph_debug) {
234 PrintDebuggingGraph();
235 }
236
237 // Mark graph as prepared, since we can't prepare it multiple times.
238 graph_prepared_ = true;
239
240 return kTfLiteOk;
241 }
242
BuildGraph(TfLiteContext * context,const TfLiteIntArray * input_tensors,const TfLiteIntArray * output_tensors)243 TfLiteStatus HexagonDelegateKernel::BuildGraph(
244 TfLiteContext* context, const TfLiteIntArray* input_tensors,
245 const TfLiteIntArray* output_tensors) {
246 builder_ = std::make_unique<delegates::hexagon::GraphBuilder>(
247 hexagon_nn_, context, graph_id_);
248 if (params_.enable_dynamic_batch_size) {
249 builder_->AddBatchSeqConfig(params_.max_batch_size,
250 params_.input_batch_dimensions,
251 params_.output_batch_dimensions);
252 }
253 // Add inputs to the graph.
254 TF_LITE_ENSURE_STATUS(builder_->AddInputTensors(input_tensors, context));
255
256 // Add all ops.
257 TfLiteNode* node;
258 TfLiteRegistration* reg;
259 for (int node_index : nodes_) {
260 TF_LITE_ENSURE_STATUS(
261 context->GetNodeAndRegistration(context, node_index, &node, ®));
262 // Const inputs needs to be added to the hexagon graph as const nodes.
263 // Adding them earlier here to the graph
264 // - Simplifies separate builders
265 // - Simplifies int8 vs uint8 cases, builders don't need to handle them.
266 for (int i = 0; i < node->inputs->size; ++i) {
267 const int tensor_id = node->inputs->data[i];
268 if (tensor_id == -1) continue;
269 const auto& input_tensor = context->tensors[tensor_id];
270 if (input_tensor.allocation_type == kTfLiteMmapRo) {
271 builder_->AddConstNodeWithData(
272 tensor_id, input_tensor,
273 /*int8_to_uint8*/ (input_tensor.type == kTfLiteInt8));
274 }
275 }
276 auto* op_builder =
277 builder_->AddNodeFromTfLiteOp(reg->builtin_code, node, node_index);
278 TF_LITE_ENSURE_STATUS(
279 op_builder->PopulateSubGraph(node->inputs, node->outputs, context));
280 TF_LITE_ENSURE_STATUS(op_builder->RegisterOutputs(node->outputs, context));
281 }
282
283 // Add Outputs.
284 TF_LITE_ENSURE_STATUS(builder_->AddOutputTensors(output_tensors, context));
285
286 builder_->Build();
287
288 return kTfLiteOk;
289 }
290
~HexagonDelegateKernel()291 HexagonDelegateKernel::~HexagonDelegateKernel() {
292 if (graph_id_ != -1) {
293 hexagon_nn_->hexagon_nn_teardown(graph_id_);
294 }
295 }
296
PrintLog()297 void HexagonDelegateKernel::PrintLog() {
298 std::vector<unsigned char> buf(3000000);
299 time_t my_time = time(nullptr);
300 hexagon_nn_->hexagon_nn_getlog(graph_id_, buf.data(), buf.size());
301 printf("----------------\n");
302 printf("Timestamp: %s\n\n", ctime(&my_time));
303 printf("Log\n%s\n", buf.data());
304 printf("----------------\n");
305 fflush(stdout);
306 }
307
PrintPerformanceData(Profiler * profiler)308 void HexagonDelegateKernel::PrintPerformanceData(Profiler* profiler) {
309 if (profiler == nullptr) {
310 return;
311 }
312 const int kMaxNodes = 2048;
313 const int kMaxNameLen = 100;
314 std::vector<hexagon_nn_perfinfo> perf_data(kMaxNodes);
315 std::vector<char> op_name(kMaxNameLen);
316 uint64_t counter = 0;
317 unsigned int num_nodes;
318 if (hexagon_nn_->hexagon_nn_get_perfinfo(graph_id_, perf_data.data(),
319 kMaxNodes, &num_nodes) != 0) {
320 printf("Failed fetching perf data.\n");
321 return;
322 }
323 for (int i = 0; i < num_nodes; i++) {
324 counter = GetCycles(perf_data[i]);
325 int op_type_id = builder_->GetOpTypeId(perf_data[i].node_id);
326 if (op_type_id >= 0 && hexagon_nn_->hexagon_nn_op_id_to_name(
327 op_type_id, op_name.data(), kMaxNameLen) != 0) {
328 printf("Failed to fetch name for %u with type %d\n", perf_data[i].node_id,
329 op_type_id);
330 continue;
331 }
332 int node_id = builder_->GetTFLiteNodeID(perf_data[i].node_id);
333 if (node_id != -1 && op_type_id >= 0) {
334 profiler->AddEvent((op_type_id < 0 ? "" : op_name.data()),
335 Profiler::EventType::OPERATOR_INVOKE_EVENT, counter,
336 node_id);
337 }
338 }
339 }
340
PrintDebuggingGraph()341 void HexagonDelegateKernel::PrintDebuggingGraph() {
342 const int kMaxBufLen = 100000;
343 std::vector<unsigned char> buf(kMaxBufLen);
344 if (hexagon_nn_->hexagon_nn_snpprint(graph_id_, buf.data(), kMaxBufLen) !=
345 0) {
346 printf("Error fetching graph debug details.\n");
347 return;
348 }
349 printf("------- Graph Debugging Start -------\n");
350 printf("%s\n", buf.data());
351 printf("------- Graph Debugging End -------\n");
352 }
353
Teardown()354 void HexagonDelegateKernel::Teardown() {
355 auto* hexagon_nn = HexagonNNImplementation();
356 if (hexagon_nn != nullptr) {
357 hexagon_nn->hexagon_nn_global_teardown();
358 }
359 }
360
InitState()361 void HexagonDelegateKernel::InitState() {
362 auto* hexagon_nn = HexagonNNImplementation();
363 if (hexagon_nn != nullptr) {
364 hexagon_nn->hexagon_nn_global_init();
365 }
366 }
367 } // namespace tflite
368