1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
17
18 #include <cstdarg>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <fstream>
22 #include <functional>
23 #include <iostream>
24 #include <memory>
25 #include <random>
26 #include <sstream>
27 #include <string>
28 #include <unordered_set>
29 #include <utility>
30 #include <vector>
31
32 #include "absl/base/attributes.h"
33 #include "absl/strings/numbers.h"
34 #include "absl/strings/str_replace.h"
35 #include "absl/strings/str_split.h"
36 #include "ruy/profiler/profiler.h" // from @ruy
37 #include "tensorflow/lite/c/c_api_types.h"
38 #include "tensorflow/lite/c/common.h"
39 #include "tensorflow/lite/core/subgraph.h"
40 #include "tensorflow/lite/interpreter.h"
41 #include "tensorflow/lite/kernels/cpu_backend_context.h"
42 #include "tensorflow/lite/kernels/register.h"
43 #include "tensorflow/lite/model.h"
44 #include "tensorflow/lite/op_resolver.h"
45 #include "tensorflow/lite/optional_debug_tools.h"
46 #include "tensorflow/lite/profiling/profile_summary_formatter.h"
47 #include "tensorflow/lite/string_util.h"
48 #include "tensorflow/lite/tools/benchmark/benchmark_utils.h"
49 #include "tensorflow/lite/tools/benchmark/profiling_listener.h"
50 #include "tensorflow/lite/tools/delegates/delegate_provider.h"
51 #include "tensorflow/lite/tools/logging.h"
52 #include "tensorflow/lite/tools/utils.h"
53
54 void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
55
56 // Version with Weak linker attribute doing nothing: if someone links this
57 // library with another definition of this function (presumably to actually
58 // register custom ops), that version will be used instead.
59 void ABSL_ATTRIBUTE_WEAK
RegisterSelectedOps(::tflite::MutableOpResolver * resolver)60 RegisterSelectedOps(::tflite::MutableOpResolver* resolver) {}
61
62 namespace tflite {
63 namespace benchmark {
64 namespace {
65 using utils::InputTensorData;
66 using utils::VoidUniquePtr;
67
68 // Backward compat with previous approach to enabling op profiling.
69 #if defined(TFLITE_PROFILING_ENABLED)
70 constexpr bool kOpProfilingEnabledDefault = true;
71 #else
72 constexpr bool kOpProfilingEnabledDefault = false;
73 #endif
74
75 // Dumps ruy profiling events if the ruy profiler is enabled.
76 class RuyProfileListener : public BenchmarkListener {
77 public:
78 void OnBenchmarkStart(const BenchmarkParams& params) override;
79
80 void OnBenchmarkEnd(const BenchmarkResults& results) override;
81
82 private:
83 std::unique_ptr<ruy::profiler::ScopeProfile> ruy_profile_;
84 };
85
OnBenchmarkStart(const BenchmarkParams & params)86 void RuyProfileListener::OnBenchmarkStart(const BenchmarkParams& params) {
87 ruy_profile_ = std::make_unique<ruy::profiler::ScopeProfile>();
88 }
89
OnBenchmarkEnd(const BenchmarkResults & results)90 void RuyProfileListener::OnBenchmarkEnd(const BenchmarkResults& results) {
91 ruy_profile_ = nullptr;
92 }
93
94 class InterpreterStatePrinter : public BenchmarkListener {
95 public:
InterpreterStatePrinter(Interpreter * interpreter)96 explicit InterpreterStatePrinter(Interpreter* interpreter)
97 : interpreter_(interpreter) {}
98
OnBenchmarkStart(const BenchmarkParams & params)99 void OnBenchmarkStart(const BenchmarkParams& params) override {
100 params_ = ¶ms;
101 if (params_->Get<bool>("print_preinvoke_state")) {
102 TFLITE_LOG(INFO) << "\n====Printing out TfLite interpreter pre-invoke "
103 "state begins====";
104 tflite::PrintInterpreterState(interpreter_);
105 TFLITE_LOG(INFO) << "====Printing out TfLite interpreter pre-invoke "
106 "state ends====\n";
107 }
108 }
109
OnBenchmarkEnd(const BenchmarkResults & results)110 void OnBenchmarkEnd(const BenchmarkResults& results) override {
111 if (params_->Get<bool>("print_postinvoke_state")) {
112 TFLITE_LOG(INFO) << "\n====Printing out TfLite interpreter post-invoke "
113 "state begins====";
114 tflite::PrintInterpreterState(interpreter_);
115 TFLITE_LOG(INFO) << "====Printing out TfLite interpreter post-invoke "
116 "state ends====\n";
117 }
118 }
119
120 private:
121 Interpreter* const interpreter_ = nullptr; // not own the memory.
122 const BenchmarkParams* params_ = nullptr; // not own the memory.
123 };
124
125 class OutputSaver : public BenchmarkListener {
126 public:
OutputSaver(Interpreter * interpreter)127 explicit OutputSaver(Interpreter* interpreter) : interpreter_(interpreter) {}
128
OnBenchmarkStart(const BenchmarkParams & params)129 void OnBenchmarkStart(const BenchmarkParams& params) override {
130 params_ = ¶ms;
131 }
132
OnBenchmarkEnd(const BenchmarkResults & results)133 void OnBenchmarkEnd(const BenchmarkResults& results) override {
134 std::string path = params_->Get<std::string>("output_filepath");
135 if (path.empty()) return;
136
137 std::ofstream ofs(path, std::ofstream::out);
138 if (ofs.good()) {
139 for (int i = 0; i < interpreter_->outputs().size(); i++) {
140 ofs.write(interpreter_->output_tensor(i)->data.raw,
141 interpreter_->output_tensor(i)->bytes);
142 }
143 ofs.close();
144 }
145 }
146
147 private:
148 Interpreter* const interpreter_ = nullptr;
149 const BenchmarkParams* params_ = nullptr;
150 };
151
Split(const std::string & str,const char delim)152 std::vector<std::string> Split(const std::string& str, const char delim) {
153 if (str.empty()) {
154 return {};
155 }
156 return absl::StrSplit(str, delim);
157 }
158
GetNumElements(const TfLiteIntArray * dim_array)159 int GetNumElements(const TfLiteIntArray* dim_array) {
160 int num_elements = 1;
161 for (size_t i = 0; i < dim_array->size; i++) {
162 num_elements *= dim_array->data[i];
163 }
164 return num_elements;
165 }
166
FillRandomString(tflite::DynamicBuffer * buffer,const TfLiteIntArray * dim_array,const std::function<std::string ()> & random_func)167 void FillRandomString(tflite::DynamicBuffer* buffer,
168 const TfLiteIntArray* dim_array,
169 const std::function<std::string()>& random_func) {
170 int num_elements = GetNumElements(dim_array);
171 for (int i = 0; i < num_elements; ++i) {
172 auto str = random_func();
173 buffer->AddString(str.data(), str.length());
174 }
175 }
176
FindLayerInfoIndex(std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info,const std::string & input_name,const string & names_string)177 int FindLayerInfoIndex(std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info,
178 const std::string& input_name,
179 const string& names_string) {
180 for (int i = 0; i < info->size(); ++i) {
181 if (info->at(i).name == input_name) {
182 return i;
183 }
184 }
185 TFLITE_LOG(FATAL) << "Cannot find the corresponding input_layer name("
186 << input_name << ") in --input_layer as " << names_string;
187 return -1;
188 }
189
PopulateInputValueRanges(const std::string & names_string,const std::string & value_ranges_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)190 TfLiteStatus PopulateInputValueRanges(
191 const std::string& names_string, const std::string& value_ranges_string,
192 std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
193 std::vector<std::string> value_ranges = Split(value_ranges_string, ':');
194 for (const auto& val : value_ranges) {
195 std::vector<std::string> name_range = Split(val, ',');
196 if (name_range.size() != 3) {
197 TFLITE_LOG(ERROR) << "Wrong input value range item specified: " << val;
198 return kTfLiteError;
199 }
200
201 // Ensure the specific input layer name exists.
202 int layer_info_idx = FindLayerInfoIndex(info, name_range[0], names_string);
203
204 // Parse the range value.
205 int low, high;
206 bool has_low = absl::SimpleAtoi(name_range[1], &low);
207 bool has_high = absl::SimpleAtoi(name_range[2], &high);
208 if (!has_low || !has_high || low > high) {
209 TFLITE_LOG(ERROR)
210 << "Wrong low and high value of the input value range specified: "
211 << val;
212 return kTfLiteError;
213 }
214 info->at(layer_info_idx).has_value_range = true;
215 info->at(layer_info_idx).low = low;
216 info->at(layer_info_idx).high = high;
217 }
218 return kTfLiteOk;
219 }
220
PopulateInputValueFiles(const std::string & names_string,const std::string & value_files_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)221 TfLiteStatus PopulateInputValueFiles(
222 const std::string& names_string, const std::string& value_files_string,
223 std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
224 std::vector<std::string> value_files = Split(value_files_string, ',');
225 for (const auto& val : value_files) {
226 std::pair<std::string, std::string> name_file_pair;
227 TfLiteStatus status = SplitInputLayerNameAndValueFile(val, name_file_pair);
228 if (status != kTfLiteOk) {
229 TFLITE_LOG(ERROR) << "Wrong input value file item specified: " << val;
230 TFLITE_LOG(ERROR) << status;
231 return status;
232 }
233
234 // Ensure the specific input layer name exists.
235 int layer_info_idx =
236 FindLayerInfoIndex(info, name_file_pair.first, names_string);
237 if (info->at(layer_info_idx).has_value_range) {
238 TFLITE_LOG(WARN)
239 << "The input_name:" << info->at(layer_info_idx).name
240 << " appears both in input_layer_value_files and "
241 "input_layer_value_range. The input_layer_value_range of the "
242 "input_name will be ignored.";
243 }
244 info->at(layer_info_idx).input_file_path = name_file_pair.second;
245 }
246 return kTfLiteOk;
247 }
248
PopulateInputLayerInfo(const std::string & names_string,const std::string & shapes_string,const std::string & value_ranges_string,const std::string & value_files_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)249 TfLiteStatus PopulateInputLayerInfo(
250 const std::string& names_string, const std::string& shapes_string,
251 const std::string& value_ranges_string,
252 const std::string& value_files_string,
253 std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
254 info->clear();
255 std::vector<std::string> names = Split(names_string, ',');
256 std::vector<std::string> shapes = Split(shapes_string, ':');
257
258 if (names.size() != shapes.size()) {
259 TFLITE_LOG(ERROR) << "The number of items in"
260 << " --input_layer_shape (" << shapes_string << ", with "
261 << shapes.size() << " items)"
262 << " must match the number of items in"
263 << " --input_layer (" << names_string << ", with "
264 << names.size() << " items)."
265 << " For example --input_layer=input1,input2"
266 << " --input_layer_shape=1,224,224,4:1,20";
267 return kTfLiteError;
268 }
269
270 for (int i = 0; i < names.size(); ++i) {
271 info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
272 BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
273
274 input.name = names[i];
275
276 TFLITE_TOOLS_CHECK(util::SplitAndParse(shapes[i], ',', &input.shape))
277 << "Incorrect size string specified: " << shapes[i];
278 for (int dim : input.shape) {
279 if (dim == -1) {
280 TFLITE_LOG(ERROR)
281 << "Any unknown sizes in the shapes (-1's) must be replaced"
282 << " with the size you want to benchmark with.";
283 return kTfLiteError;
284 }
285 }
286 }
287
288 // Populate input value range if it's specified.
289 TF_LITE_ENSURE_STATUS(
290 PopulateInputValueRanges(names_string, value_ranges_string, info));
291
292 // Populate input value files if it's specified.
293 TF_LITE_ENSURE_STATUS(
294 PopulateInputValueFiles(names_string, value_files_string, info));
295
296 return kTfLiteOk;
297 }
298
299 std::shared_ptr<profiling::ProfileSummaryFormatter>
CreateProfileSummaryFormatter(bool format_as_csv)300 CreateProfileSummaryFormatter(bool format_as_csv) {
301 return format_as_csv
302 ? std::make_shared<profiling::ProfileSummaryCSVFormatter>()
303 : std::make_shared<profiling::ProfileSummaryDefaultFormatter>();
304 }
305
306 } // namespace
307
SplitInputLayerNameAndValueFile(const std::string & name_and_value_file,std::pair<std::string,std::string> & name_file_pair)308 TfLiteStatus SplitInputLayerNameAndValueFile(
309 const std::string& name_and_value_file,
310 std::pair<std::string, std::string>& name_file_pair) {
311 // 1. split the string by ':' and ignore escaped characters
312 int delim_index = -1;
313 for (int i = 1; i < name_and_value_file.length(); ++i) {
314 if (name_and_value_file[i] == ':' && name_and_value_file[i - 1] != '\\') {
315 if (delim_index == -1) {
316 delim_index = i;
317 } else {
318 TFLITE_LOG(ERROR) << name_and_value_file
319 << " contains more than one delimiter.";
320 return kTfLiteError;
321 }
322 }
323 }
324 if (delim_index == -1) {
325 TFLITE_LOG(ERROR) << name_and_value_file
326 << " doesn't contain any delimiter.";
327 return kTfLiteError;
328 }
329 // 2. replace escaped "\:" string to ":"
330 name_file_pair.first = absl::StrReplaceAll(
331 name_and_value_file.substr(0, delim_index), {{"\\:", ":"}});
332 name_file_pair.second = absl::StrReplaceAll(
333 name_and_value_file.substr(delim_index + 1), {{"\\:", ":"}});
334 return kTfLiteOk;
335 }
336
DefaultParams()337 BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
338 BenchmarkParams default_params = BenchmarkModel::DefaultParams();
339 default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
340 default_params.AddParam("input_layer",
341 BenchmarkParam::Create<std::string>(""));
342 default_params.AddParam("input_layer_shape",
343 BenchmarkParam::Create<std::string>(""));
344 default_params.AddParam("input_layer_value_range",
345 BenchmarkParam::Create<std::string>(""));
346 default_params.AddParam("input_layer_value_files",
347 BenchmarkParam::Create<std::string>(""));
348 default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false));
349 default_params.AddParam("require_full_delegation",
350 BenchmarkParam::Create<bool>(false));
351 default_params.AddParam(
352 "enable_op_profiling",
353 BenchmarkParam::Create<bool>(kOpProfilingEnabledDefault));
354 default_params.AddParam("max_profiling_buffer_entries",
355 BenchmarkParam::Create<int32_t>(1024));
356 default_params.AddParam("allow_dynamic_profiling_buffer_increase",
357 BenchmarkParam::Create<bool>(false));
358 default_params.AddParam("profiling_output_csv_file",
359 BenchmarkParam::Create<std::string>(""));
360
361 default_params.AddParam("print_preinvoke_state",
362 BenchmarkParam::Create<bool>(false));
363 default_params.AddParam("print_postinvoke_state",
364 BenchmarkParam::Create<bool>(false));
365 default_params.AddParam("release_dynamic_tensors",
366 BenchmarkParam::Create<bool>(false));
367 default_params.AddParam("optimize_memory_for_large_tensors",
368 BenchmarkParam::Create<int32_t>(0));
369 default_params.AddParam("output_filepath",
370 BenchmarkParam::Create<std::string>(""));
371
372 tools::ProvidedDelegateList delegate_providers(&default_params);
373 delegate_providers.AddAllDelegateParams();
374
375 return default_params;
376 }
377
BenchmarkTfLiteModel(BenchmarkParams params)378 BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
379 : BenchmarkModel(std::move(params)),
380 random_engine_(std::random_device()()) {
381 AddListener(&log_output_);
382 }
383
CleanUp()384 void BenchmarkTfLiteModel::CleanUp() {
385 // Free up any pre-allocated tensor data during PrepareInputData.
386 inputs_data_.clear();
387 }
388
~BenchmarkTfLiteModel()389 BenchmarkTfLiteModel::~BenchmarkTfLiteModel() {
390 CleanUp();
391
392 // Destory the owned interpreter earlier than other objects (specially
393 // 'owned_delegates_').
394 interpreter_.reset();
395 }
396
GetFlags()397 std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
398 std::vector<Flag> flags = BenchmarkModel::GetFlags();
399 std::vector<Flag> specific_flags = {
400 CreateFlag<std::string>("graph", ¶ms_, "graph file name"),
401 CreateFlag<std::string>("input_layer", ¶ms_, "input layer names"),
402 CreateFlag<std::string>("input_layer_shape", ¶ms_,
403 "input layer shape"),
404 CreateFlag<std::string>(
405 "input_layer_value_range", ¶ms_,
406 "A map-like string representing value range for *integer* input "
407 "layers. Each item is separated by ':', and the item value consists "
408 "of input layer name and integer-only range values (both low and "
409 "high are inclusive) separated by ',', e.g. input1,1,2:input2,0,254"),
410 CreateFlag<std::string>(
411 "input_layer_value_files", ¶ms_,
412 "A map-like string representing value file. Each item is separated "
413 "by ',', and the item value consists "
414 "of input layer name and value file path separated by ':', e.g. "
415 "input1:file_path1,input2:file_path2. In case the input layer name "
416 "contains ':' e.g. \"input:0\", escape it with \"\\:\". If the "
417 "input_name appears both in input_layer_value_range and "
418 "input_layer_value_files, input_layer_value_range of the input_name "
419 "will be ignored. The file format is binary and it should be array "
420 "format or null separated strings format."),
421 CreateFlag<bool>("allow_fp16", ¶ms_, "allow fp16"),
422 CreateFlag<bool>("require_full_delegation", ¶ms_,
423 "require delegate to run the entire graph"),
424 CreateFlag<bool>("enable_op_profiling", ¶ms_, "enable op profiling"),
425 CreateFlag<int32_t>("max_profiling_buffer_entries", ¶ms_,
426 "max initial profiling buffer entries"),
427 CreateFlag<bool>("allow_dynamic_profiling_buffer_increase", ¶ms_,
428 "allow dynamic increase on profiling buffer entries"),
429 CreateFlag<std::string>(
430 "profiling_output_csv_file", ¶ms_,
431 "File path to export profile data as CSV, if not set "
432 "prints to stdout."),
433 CreateFlag<bool>(
434 "print_preinvoke_state", ¶ms_,
435 "print out the interpreter internals just before calling Invoke. The "
436 "internals will include allocated memory size of each tensor etc."),
437 CreateFlag<bool>(
438 "print_postinvoke_state", ¶ms_,
439 "print out the interpreter internals just before benchmark completes "
440 "(i.e. after all repeated Invoke calls complete). The internals will "
441 "include allocated memory size of each tensor etc."),
442 CreateFlag<bool>("release_dynamic_tensors", ¶ms_,
443 "Ensure dynamic tensor's memory is released when they "
444 "are not used."),
445 CreateFlag<int32_t>(
446 "optimize_memory_for_large_tensors", ¶ms_,
447 "Optimize memory usage for large tensors with sacrificing latency."),
448 CreateFlag<std::string>(
449 "output_filepath", ¶ms_,
450 "File path to export outputs layer as binary data.")};
451
452 flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
453
454 tools::ProvidedDelegateList delegate_providers(¶ms_);
455 delegate_providers.AppendCmdlineFlags(flags);
456
457 return flags;
458 }
459
LogParams()460 void BenchmarkTfLiteModel::LogParams() {
461 BenchmarkModel::LogParams();
462 const bool verbose = params_.Get<bool>("verbose");
463 // Always log the value of --graph.
464 LOG_BENCHMARK_PARAM(std::string, "graph", "Graph", /*verbose*/ true);
465 LOG_BENCHMARK_PARAM(std::string, "input_layer", "Input layers", verbose);
466 LOG_BENCHMARK_PARAM(std::string, "input_layer_shape", "Input shapes",
467 verbose);
468 LOG_BENCHMARK_PARAM(std::string, "input_layer_value_range",
469 "Input value ranges", verbose);
470 LOG_BENCHMARK_PARAM(std::string, "input_layer_value_files",
471 "Input value files", verbose);
472
473 LOG_BENCHMARK_PARAM(bool, "allow_fp16", "Allow fp16", verbose);
474 LOG_BENCHMARK_PARAM(bool, "require_full_delegation",
475 "Require full delegation", verbose);
476 LOG_BENCHMARK_PARAM(bool, "enable_op_profiling", "Enable op profiling",
477 verbose);
478 LOG_BENCHMARK_PARAM(int32_t, "max_profiling_buffer_entries",
479 "Max initial profiling buffer entries", verbose);
480 LOG_BENCHMARK_PARAM(bool, "allow_dynamic_profiling_buffer_increase",
481 "Allow dynamic increase on profiling buffer entries",
482 verbose);
483 LOG_BENCHMARK_PARAM(std::string, "profiling_output_csv_file",
484 "CSV File to export profiling data to", verbose);
485 LOG_BENCHMARK_PARAM(bool, "print_preinvoke_state",
486 "Print pre-invoke interpreter state", verbose);
487 LOG_BENCHMARK_PARAM(bool, "print_postinvoke_state",
488 "Print post-invoke interpreter state", verbose);
489 LOG_BENCHMARK_PARAM(bool, "release_dynamic_tensors",
490 "Release dynamic tensor memory", verbose);
491 LOG_BENCHMARK_PARAM(int32_t, "optimize_memory_for_large_tensors",
492 "Optimize memory usage for large tensors", verbose);
493 LOG_BENCHMARK_PARAM(std::string, "output_filepath",
494 "File path to export outputs layer to", verbose);
495
496 for (const auto& delegate_provider :
497 tools::GetRegisteredDelegateProviders()) {
498 delegate_provider->LogParams(params_, verbose);
499 }
500 }
501
ValidateParams()502 TfLiteStatus BenchmarkTfLiteModel::ValidateParams() {
503 TF_LITE_ENSURE_STATUS(BenchmarkModel::ValidateParams());
504
505 if (params_.Get<std::string>("graph").empty()) {
506 TFLITE_LOG(ERROR)
507 << "Please specify the name of your TF Lite input file with --graph";
508 return kTfLiteError;
509 }
510
511 return PopulateInputLayerInfo(
512 params_.Get<std::string>("input_layer"),
513 params_.Get<std::string>("input_layer_shape"),
514 params_.Get<std::string>("input_layer_value_range"),
515 params_.Get<std::string>("input_layer_value_files"), &inputs_);
516 }
517
ComputeInputBytes()518 uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
519 TFLITE_TOOLS_CHECK(interpreter_);
520 uint64_t total_input_bytes = 0;
521 for (int input : interpreter_->inputs()) {
522 auto* t = interpreter_->tensor(input);
523 total_input_bytes += t->bytes;
524 }
525 return total_input_bytes;
526 }
527
MayGetModelFileSize()528 int64_t BenchmarkTfLiteModel::MayGetModelFileSize() {
529 std::ifstream in_file(params_.Get<std::string>("graph"),
530 std::ios::binary | std::ios::ate);
531 return in_file.tellg();
532 }
533
LoadInputTensorData(const TfLiteTensor & t,const std::string & input_file_path)534 InputTensorData BenchmarkTfLiteModel::LoadInputTensorData(
535 const TfLiteTensor& t, const std::string& input_file_path) {
536 std::ifstream value_file(input_file_path, std::ios::binary);
537 if (!value_file.good()) {
538 TFLITE_LOG(FATAL) << "Failed to read the input_layer_value_file:"
539 << input_file_path;
540 }
541 InputTensorData t_data;
542 if (t.type == kTfLiteString) {
543 t_data.data = VoidUniquePtr(
544 static_cast<void*>(new tflite::DynamicBuffer()),
545 [](void* ptr) { delete static_cast<DynamicBuffer*>(ptr); });
546 if (input_file_path.size() > 3 &&
547 input_file_path.substr(input_file_path.size() - 3) == ".pb") {
548 // If input file is ".pb" file, read data as a binary.
549 std::stringstream buffer;
550 buffer << value_file.rdbuf();
551 static_cast<DynamicBuffer*>(t_data.data.get())
552 ->AddString(buffer.str().data(), buffer.str().length());
553 TFLITE_LOG(INFO) << "Read " << buffer.str().length()
554 << " bytes data from " << input_file_path << ".";
555 } else {
556 // Read input as a text.
557 std::string line;
558 size_t num_line = 0;
559 // Read the line with the delimiter '\0'.
560 while (std::getline(value_file, line, '\0')) {
561 num_line++;
562 static_cast<DynamicBuffer*>(t_data.data.get())
563 ->AddString(line.data(), line.length());
564 }
565 int num_elements = GetNumElements(t.dims);
566 if (num_line != num_elements) {
567 TFLITE_LOG(FATAL)
568 << "The number of string in the input_layer_value_file("
569 << input_file_path << ") is " << num_line << ". It should be "
570 << num_elements << ".";
571 }
572 }
573 } else {
574 value_file.seekg(0, std::ios_base::end);
575 if (value_file.tellg() != t.bytes) {
576 TFLITE_LOG(FATAL) << "The size of " << input_file_path << " is "
577 << value_file.tellg() << " bytes. It should be "
578 << t.bytes << " bytes.";
579 }
580 t_data.bytes = t.bytes;
581 t_data.data =
582 VoidUniquePtr(static_cast<void*>(new char[t.bytes]),
583 [](void* ptr) { delete[] static_cast<char*>(ptr); });
584 value_file.clear();
585 value_file.seekg(0, std::ios_base::beg);
586 value_file.read(static_cast<char*>(t_data.data.get()), t.bytes);
587 }
588 return t_data;
589 }
590
CreateRandomTensorData(const TfLiteTensor & t,const InputLayerInfo * layer_info)591 InputTensorData BenchmarkTfLiteModel::CreateRandomTensorData(
592 const TfLiteTensor& t, const InputLayerInfo* layer_info) {
593 float low_range = 0;
594 float high_range = 0;
595 if (layer_info && layer_info->has_value_range) {
596 low_range = layer_info->low;
597 high_range = layer_info->high;
598 } else {
599 utils::GetDataRangesForType(t.type, &low_range, &high_range);
600 }
601 return utils::CreateRandomTensorData(t, low_range, high_range);
602 }
603
PrepareInputData()604 TfLiteStatus BenchmarkTfLiteModel::PrepareInputData() {
605 CleanUp();
606
607 // Note the corresponding relation between 'interpreter_inputs' and 'inputs_'
608 // (i.e. the specified input layer info) has been checked in
609 // BenchmarkTfLiteModel::Init() before calling this function. So, we simply
610 // use the corresponding input layer info to initialize the input data value
611 // properly.
612 auto interpreter_inputs = interpreter_->inputs();
613 for (int i = 0; i < interpreter_inputs.size(); ++i) {
614 int tensor_index = interpreter_inputs[i];
615 const TfLiteTensor& t = *(interpreter_->tensor(tensor_index));
616 const InputLayerInfo* input_layer_info = nullptr;
617 // Note that when input layer parameters (i.e. --input_layer,
618 // --input_layer_shape) are not specified, inputs_ is empty.
619 if (!inputs_.empty()) input_layer_info = &inputs_[i];
620
621 InputTensorData t_data;
622 if (input_layer_info && !input_layer_info->input_file_path.empty()) {
623 t_data = LoadInputTensorData(t, input_layer_info->input_file_path);
624 } else {
625 t_data = CreateRandomTensorData(t, input_layer_info);
626 }
627 inputs_data_.push_back(std::move(t_data));
628 }
629 return kTfLiteOk;
630 }
631
ResetInputsAndOutputs()632 TfLiteStatus BenchmarkTfLiteModel::ResetInputsAndOutputs() {
633 auto interpreter_inputs = interpreter_->inputs();
634 // Set the values of the input tensors from inputs_data_.
635 for (int j = 0; j < interpreter_inputs.size(); ++j) {
636 int i = interpreter_inputs[j];
637 TfLiteTensor* t = interpreter_->tensor(i);
638 if (t->type == kTfLiteString) {
639 if (inputs_data_[j].data) {
640 static_cast<DynamicBuffer*>(inputs_data_[j].data.get())
641 ->WriteToTensor(t, /*new_shape=*/nullptr);
642 } else {
643 tflite::DynamicBuffer buffer;
644 FillRandomString(&buffer, t->dims, []() {
645 return "we're have some friends over saturday to hang out in the "
646 "yard";
647 });
648 buffer.WriteToTensor(t, /*new_shape=*/nullptr);
649 }
650 } else {
651 std::memcpy(t->data.raw, inputs_data_[j].data.get(),
652 inputs_data_[j].bytes);
653 }
654 }
655
656 return kTfLiteOk;
657 }
658
InitInterpreter()659 TfLiteStatus BenchmarkTfLiteModel::InitInterpreter() {
660 auto resolver = GetOpResolver();
661 const int32_t num_threads = params_.Get<int32_t>("num_threads");
662 const bool use_caching = params_.Get<bool>("use_caching");
663
664 tflite::InterpreterBuilder builder(*model_, *resolver);
665 if (builder.SetNumThreads(num_threads) != kTfLiteOk) {
666 TFLITE_LOG(ERROR) << "Failed to set thread number";
667 return kTfLiteError;
668 }
669
670 builder(&interpreter_);
671 if (!interpreter_) {
672 TFLITE_LOG(ERROR) << "Failed to initialize the interpreter";
673 return kTfLiteError;
674 }
675 // Manually enable caching behavior in TF Lite interpreter.
676 if (use_caching) {
677 external_context_ = std::make_unique<tflite::ExternalCpuBackendContext>();
678 std::unique_ptr<tflite::CpuBackendContext> cpu_backend_context(
679 new tflite::CpuBackendContext());
680 cpu_backend_context->SetUseCaching(true);
681 cpu_backend_context->SetMaxNumThreads(num_threads);
682 external_context_->set_internal_backend_context(
683 std::move(cpu_backend_context));
684 interpreter_->SetExternalContext(kTfLiteCpuBackendContext,
685 external_context_.get());
686 }
687
688 return kTfLiteOk;
689 }
690
Init()691 TfLiteStatus BenchmarkTfLiteModel::Init() {
692 TF_LITE_ENSURE_STATUS(LoadModel());
693 TF_LITE_ENSURE_STATUS(InitInterpreter());
694
695 // Install profilers if necessary right after interpreter is created so that
696 // any memory allocations inside the TFLite runtime could be recorded if the
697 // installed profiler profile memory usage information.
698
699 // Adjust "max_profiling_buffer_entries" according to the loaded model.
700 int total_nodes = 0;
701 for (int i = 0; i < interpreter_->subgraphs_size(); ++i) {
702 // subgraph(...) is non-const member method.
703 total_nodes += static_cast<int>(interpreter_->subgraph(i)->nodes_size());
704 }
705 if (total_nodes > params_.Get<int32_t>("max_profiling_buffer_entries")) {
706 constexpr int kProfilingBufferHeadrooms = 512;
707 params_.Set<int32_t>("max_profiling_buffer_entries",
708 total_nodes + kProfilingBufferHeadrooms);
709 }
710
711 AddOwnedListener(MayCreateProfilingListener());
712 AddOwnedListener(std::unique_ptr<BenchmarkListener>(
713 new InterpreterStatePrinter(interpreter_.get())));
714
715 interpreter_->SetAllowFp16PrecisionForFp32(params_.Get<bool>("allow_fp16"));
716
717 InterpreterOptions options;
718 options.SetEnsureDynamicTensorsAreReleased(
719 params_.Get<bool>("release_dynamic_tensors"));
720 options.OptimizeMemoryForLargeTensors(
721 params_.Get<int32_t>("optimize_memory_for_large_tensors"));
722 interpreter_->ApplyOptions(&options);
723
724 owned_delegates_.clear();
725
726 // Contains all ids of TfLiteNodes that have been checked to see whether it's
727 // delegated or not.
728 std::unordered_set<int> checked_node_ids;
729 tools::ProvidedDelegateList delegate_providers(¶ms_);
730 auto created_delegates = delegate_providers.CreateAllRankedDelegates();
731 TFLITE_MAY_LOG(INFO, (created_delegates.size() >= 2))
732 << "Going to apply " << created_delegates.size()
733 << " delegates one after another.";
734 for (auto& created_delegate : created_delegates) {
735 const auto* delegate_provider = created_delegate.provider;
736 TfLiteDelegate* delegate = created_delegate.delegate.get();
737 TFLITE_TOOLS_CHECK(delegate != nullptr)
738 << "The created delegate by the delegate provider should not be "
739 "nullptr!";
740 // The interpreter becomes dependent on the delegate once the delegate is
741 // used, so the order of destruction must be interpreter first, delegate
742 // later.
743 // Moving the delegate to a list of owned delegates to guarantee that.
744 owned_delegates_.emplace_back(std::move(created_delegate.delegate));
745 if (interpreter_->ModifyGraphWithDelegate(delegate) != kTfLiteOk) {
746 TFLITE_LOG(ERROR) << "Failed to apply " << delegate_provider->GetName()
747 << " delegate.";
748 return kTfLiteError;
749 } else {
750 // Ideally, such delegate info should already be computed when the
751 // delegate is being applied to the model graph.
752 int num_delegated_kernels = 0;
753 for (int i = 0; i < interpreter_->execution_plan().size(); ++i) {
754 int node_id = interpreter_->execution_plan()[i];
755 if (checked_node_ids.find(node_id) != checked_node_ids.end()) {
756 continue;
757 }
758 const TfLiteNode& node =
759 interpreter_->node_and_registration(node_id)->first;
760
761 // Note that the 'delegate' here could be an ExternalDelegateWrapper
762 // object that wraps an actual external delegate, in which case,
763 // 'node.delegate' will be different from 'delegate' because
764 // 'node.delegate' refers to the actual external delegate.
765 if (node.delegate != nullptr) {
766 num_delegated_kernels++;
767 checked_node_ids.insert(node_id);
768 }
769 }
770 bool fully_delegated = (num_delegated_kernels == 1 &&
771 interpreter_->execution_plan().size() == 1);
772
773 if (params_.Get<bool>("require_full_delegation") && !fully_delegated) {
774 TFLITE_LOG(ERROR) << "Disallowed CPU fallback detected.";
775 return kTfLiteError;
776 }
777 if (fully_delegated) {
778 TFLITE_LOG(INFO) << "Explicitly applied "
779 << delegate_provider->GetName()
780 << " delegate, and the model graph will be completely"
781 << " executed by the delegate.";
782 } else if (num_delegated_kernels > 0) {
783 TFLITE_LOG(INFO) << "Explicitly applied "
784 << delegate_provider->GetName()
785 << " delegate, and the model graph will be partially"
786 << " executed by the delegate w/ "
787 << num_delegated_kernels << " delegate kernels.";
788 } else {
789 TFLITE_LOG(INFO)
790 << "Though " << delegate_provider->GetName()
791 << " delegate is explicitly applied, the model graph will not be"
792 << " executed by the delegate.";
793 }
794 }
795 }
796
797 auto interpreter_inputs = interpreter_->inputs();
798
799 if (!inputs_.empty()) {
800 TFLITE_TOOLS_CHECK_EQ(inputs_.size(), interpreter_inputs.size())
801 << "Inputs mismatch: Model inputs #:" << inputs_.size()
802 << " expected: " << interpreter_inputs.size();
803 }
804
805 // Check if the tensor names match, and log a warning if it doesn't.
806 for (int j = 0; j < inputs_.size(); ++j) {
807 const InputLayerInfo& input = inputs_[j];
808 int i = interpreter_inputs[j];
809 TfLiteTensor* t = interpreter_->tensor(i);
810 if (input.name != t->name) {
811 TFLITE_LOG(WARN) << "Tensor # " << i << " is named " << t->name
812 << " but flags call it " << input.name;
813 }
814
815 if (t->type != kTfLiteString && input.shape.size() != t->dims->size) {
816 TFLITE_LOG(ERROR) << "Input tensor #" << i << " should have "
817 << t->dims->size << " dimensions!";
818 return kTfLiteError;
819 }
820 }
821
822 // Resize all non-string tensors.
823 for (int j = 0; j < inputs_.size(); ++j) {
824 const InputLayerInfo& input = inputs_[j];
825 int i = interpreter_inputs[j];
826 TfLiteTensor* t = interpreter_->tensor(i);
827 if (t->type != kTfLiteString) {
828 interpreter_->ResizeInputTensor(i, input.shape);
829 }
830 }
831
832 if (interpreter_->AllocateTensors() != kTfLiteOk) {
833 TFLITE_LOG(ERROR) << "Failed to allocate tensors!";
834 return kTfLiteError;
835 }
836
837 AddOwnedListener(
838 std::unique_ptr<BenchmarkListener>(new RuyProfileListener()));
839 AddOwnedListener(
840 std::unique_ptr<BenchmarkListener>(new OutputSaver(interpreter_.get())));
841
842 return kTfLiteOk;
843 }
844
LoadModel()845 TfLiteStatus BenchmarkTfLiteModel::LoadModel() {
846 std::string graph = params_.Get<std::string>("graph");
847 model_ = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
848 if (!model_) {
849 TFLITE_LOG(ERROR) << "Failed to mmap model " << graph;
850 return kTfLiteError;
851 }
852 TFLITE_LOG(INFO) << "Loaded model " << graph;
853 return kTfLiteOk;
854 }
855
GetOpResolver() const856 std::unique_ptr<tflite::OpResolver> BenchmarkTfLiteModel::GetOpResolver()
857 const {
858 tflite::ops::builtin::BuiltinOpResolver* resolver = nullptr;
859 // When --use_xnnpack is explicitly set to false, skip applying the default
860 // XNNPACK delegate in TfLite runtime so that the original execution path
861 // based on the unmodified model graph is still excercised.
862 if (params_.HasParam("use_xnnpack") &&
863 params_.HasValueSet<bool>("use_xnnpack") &&
864 !params_.Get<bool>("use_xnnpack")) {
865 resolver =
866 new tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates();
867 } else {
868 resolver = new tflite::ops::builtin::BuiltinOpResolver();
869 }
870 RegisterSelectedOps(resolver);
871 return std::unique_ptr<tflite::OpResolver>(resolver);
872 }
873
874 std::unique_ptr<BenchmarkListener>
MayCreateProfilingListener() const875 BenchmarkTfLiteModel::MayCreateProfilingListener() const {
876 if (!params_.Get<bool>("enable_op_profiling")) return nullptr;
877
878 return std::unique_ptr<BenchmarkListener>(new ProfilingListener(
879 interpreter_.get(), params_.Get<int32_t>("max_profiling_buffer_entries"),
880 params_.Get<bool>("allow_dynamic_profiling_buffer_increase"),
881 params_.Get<std::string>("profiling_output_csv_file"),
882 CreateProfileSummaryFormatter(
883 !params_.Get<std::string>("profiling_output_csv_file").empty())));
884 }
885
RunImpl()886 TfLiteStatus BenchmarkTfLiteModel::RunImpl() { return interpreter_->Invoke(); }
887
888 } // namespace benchmark
889 } // namespace tflite
890