xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
17 
18 #include <cstdarg>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <fstream>
22 #include <functional>
23 #include <iostream>
24 #include <memory>
25 #include <random>
26 #include <sstream>
27 #include <string>
28 #include <unordered_set>
29 #include <utility>
30 #include <vector>
31 
32 #include "absl/base/attributes.h"
33 #include "absl/strings/numbers.h"
34 #include "absl/strings/str_replace.h"
35 #include "absl/strings/str_split.h"
36 #include "ruy/profiler/profiler.h"  // from @ruy
37 #include "tensorflow/lite/c/c_api_types.h"
38 #include "tensorflow/lite/c/common.h"
39 #include "tensorflow/lite/core/subgraph.h"
40 #include "tensorflow/lite/interpreter.h"
41 #include "tensorflow/lite/kernels/cpu_backend_context.h"
42 #include "tensorflow/lite/kernels/register.h"
43 #include "tensorflow/lite/model.h"
44 #include "tensorflow/lite/op_resolver.h"
45 #include "tensorflow/lite/optional_debug_tools.h"
46 #include "tensorflow/lite/profiling/profile_summary_formatter.h"
47 #include "tensorflow/lite/string_util.h"
48 #include "tensorflow/lite/tools/benchmark/benchmark_utils.h"
49 #include "tensorflow/lite/tools/benchmark/profiling_listener.h"
50 #include "tensorflow/lite/tools/delegates/delegate_provider.h"
51 #include "tensorflow/lite/tools/logging.h"
52 #include "tensorflow/lite/tools/utils.h"
53 
54 void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
55 
56 // Version with Weak linker attribute doing nothing: if someone links this
57 // library with another definition of this function (presumably to actually
58 // register custom ops), that version will be used instead.
59 void ABSL_ATTRIBUTE_WEAK
RegisterSelectedOps(::tflite::MutableOpResolver * resolver)60 RegisterSelectedOps(::tflite::MutableOpResolver* resolver) {}
61 
62 namespace tflite {
63 namespace benchmark {
64 namespace {
65 using utils::InputTensorData;
66 using utils::VoidUniquePtr;
67 
68 // Backward compat with previous approach to enabling op profiling.
69 #if defined(TFLITE_PROFILING_ENABLED)
70 constexpr bool kOpProfilingEnabledDefault = true;
71 #else
72 constexpr bool kOpProfilingEnabledDefault = false;
73 #endif
74 
75 // Dumps ruy profiling events if the ruy profiler is enabled.
76 class RuyProfileListener : public BenchmarkListener {
77  public:
78   void OnBenchmarkStart(const BenchmarkParams& params) override;
79 
80   void OnBenchmarkEnd(const BenchmarkResults& results) override;
81 
82  private:
83   std::unique_ptr<ruy::profiler::ScopeProfile> ruy_profile_;
84 };
85 
OnBenchmarkStart(const BenchmarkParams & params)86 void RuyProfileListener::OnBenchmarkStart(const BenchmarkParams& params) {
87   ruy_profile_ = std::make_unique<ruy::profiler::ScopeProfile>();
88 }
89 
OnBenchmarkEnd(const BenchmarkResults & results)90 void RuyProfileListener::OnBenchmarkEnd(const BenchmarkResults& results) {
91   ruy_profile_ = nullptr;
92 }
93 
94 class InterpreterStatePrinter : public BenchmarkListener {
95  public:
InterpreterStatePrinter(Interpreter * interpreter)96   explicit InterpreterStatePrinter(Interpreter* interpreter)
97       : interpreter_(interpreter) {}
98 
OnBenchmarkStart(const BenchmarkParams & params)99   void OnBenchmarkStart(const BenchmarkParams& params) override {
100     params_ = &params;
101     if (params_->Get<bool>("print_preinvoke_state")) {
102       TFLITE_LOG(INFO) << "\n====Printing out TfLite interpreter pre-invoke "
103                           "state begins====";
104       tflite::PrintInterpreterState(interpreter_);
105       TFLITE_LOG(INFO) << "====Printing out TfLite interpreter pre-invoke "
106                           "state ends====\n";
107     }
108   }
109 
OnBenchmarkEnd(const BenchmarkResults & results)110   void OnBenchmarkEnd(const BenchmarkResults& results) override {
111     if (params_->Get<bool>("print_postinvoke_state")) {
112       TFLITE_LOG(INFO) << "\n====Printing out TfLite interpreter post-invoke "
113                           "state begins====";
114       tflite::PrintInterpreterState(interpreter_);
115       TFLITE_LOG(INFO) << "====Printing out TfLite interpreter post-invoke "
116                           "state ends====\n";
117     }
118   }
119 
120  private:
121   Interpreter* const interpreter_ = nullptr;  // not own the memory.
122   const BenchmarkParams* params_ = nullptr;   // not own the memory.
123 };
124 
125 class OutputSaver : public BenchmarkListener {
126  public:
OutputSaver(Interpreter * interpreter)127   explicit OutputSaver(Interpreter* interpreter) : interpreter_(interpreter) {}
128 
OnBenchmarkStart(const BenchmarkParams & params)129   void OnBenchmarkStart(const BenchmarkParams& params) override {
130     params_ = &params;
131   }
132 
OnBenchmarkEnd(const BenchmarkResults & results)133   void OnBenchmarkEnd(const BenchmarkResults& results) override {
134     std::string path = params_->Get<std::string>("output_filepath");
135     if (path.empty()) return;
136 
137     std::ofstream ofs(path, std::ofstream::out);
138     if (ofs.good()) {
139       for (int i = 0; i < interpreter_->outputs().size(); i++) {
140         ofs.write(interpreter_->output_tensor(i)->data.raw,
141                   interpreter_->output_tensor(i)->bytes);
142       }
143       ofs.close();
144     }
145   }
146 
147  private:
148   Interpreter* const interpreter_ = nullptr;
149   const BenchmarkParams* params_ = nullptr;
150 };
151 
Split(const std::string & str,const char delim)152 std::vector<std::string> Split(const std::string& str, const char delim) {
153   if (str.empty()) {
154     return {};
155   }
156   return absl::StrSplit(str, delim);
157 }
158 
GetNumElements(const TfLiteIntArray * dim_array)159 int GetNumElements(const TfLiteIntArray* dim_array) {
160   int num_elements = 1;
161   for (size_t i = 0; i < dim_array->size; i++) {
162     num_elements *= dim_array->data[i];
163   }
164   return num_elements;
165 }
166 
FillRandomString(tflite::DynamicBuffer * buffer,const TfLiteIntArray * dim_array,const std::function<std::string ()> & random_func)167 void FillRandomString(tflite::DynamicBuffer* buffer,
168                       const TfLiteIntArray* dim_array,
169                       const std::function<std::string()>& random_func) {
170   int num_elements = GetNumElements(dim_array);
171   for (int i = 0; i < num_elements; ++i) {
172     auto str = random_func();
173     buffer->AddString(str.data(), str.length());
174   }
175 }
176 
FindLayerInfoIndex(std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info,const std::string & input_name,const string & names_string)177 int FindLayerInfoIndex(std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info,
178                        const std::string& input_name,
179                        const string& names_string) {
180   for (int i = 0; i < info->size(); ++i) {
181     if (info->at(i).name == input_name) {
182       return i;
183     }
184   }
185   TFLITE_LOG(FATAL) << "Cannot find the corresponding input_layer name("
186                     << input_name << ") in --input_layer as " << names_string;
187   return -1;
188 }
189 
PopulateInputValueRanges(const std::string & names_string,const std::string & value_ranges_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)190 TfLiteStatus PopulateInputValueRanges(
191     const std::string& names_string, const std::string& value_ranges_string,
192     std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
193   std::vector<std::string> value_ranges = Split(value_ranges_string, ':');
194   for (const auto& val : value_ranges) {
195     std::vector<std::string> name_range = Split(val, ',');
196     if (name_range.size() != 3) {
197       TFLITE_LOG(ERROR) << "Wrong input value range item specified: " << val;
198       return kTfLiteError;
199     }
200 
201     // Ensure the specific input layer name exists.
202     int layer_info_idx = FindLayerInfoIndex(info, name_range[0], names_string);
203 
204     // Parse the range value.
205     int low, high;
206     bool has_low = absl::SimpleAtoi(name_range[1], &low);
207     bool has_high = absl::SimpleAtoi(name_range[2], &high);
208     if (!has_low || !has_high || low > high) {
209       TFLITE_LOG(ERROR)
210           << "Wrong low and high value of the input value range specified: "
211           << val;
212       return kTfLiteError;
213     }
214     info->at(layer_info_idx).has_value_range = true;
215     info->at(layer_info_idx).low = low;
216     info->at(layer_info_idx).high = high;
217   }
218   return kTfLiteOk;
219 }
220 
PopulateInputValueFiles(const std::string & names_string,const std::string & value_files_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)221 TfLiteStatus PopulateInputValueFiles(
222     const std::string& names_string, const std::string& value_files_string,
223     std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
224   std::vector<std::string> value_files = Split(value_files_string, ',');
225   for (const auto& val : value_files) {
226     std::pair<std::string, std::string> name_file_pair;
227     TfLiteStatus status = SplitInputLayerNameAndValueFile(val, name_file_pair);
228     if (status != kTfLiteOk) {
229       TFLITE_LOG(ERROR) << "Wrong input value file item specified: " << val;
230       TFLITE_LOG(ERROR) << status;
231       return status;
232     }
233 
234     // Ensure the specific input layer name exists.
235     int layer_info_idx =
236         FindLayerInfoIndex(info, name_file_pair.first, names_string);
237     if (info->at(layer_info_idx).has_value_range) {
238       TFLITE_LOG(WARN)
239           << "The input_name:" << info->at(layer_info_idx).name
240           << " appears both in input_layer_value_files and "
241              "input_layer_value_range. The input_layer_value_range of the "
242              "input_name will be ignored.";
243     }
244     info->at(layer_info_idx).input_file_path = name_file_pair.second;
245   }
246   return kTfLiteOk;
247 }
248 
PopulateInputLayerInfo(const std::string & names_string,const std::string & shapes_string,const std::string & value_ranges_string,const std::string & value_files_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)249 TfLiteStatus PopulateInputLayerInfo(
250     const std::string& names_string, const std::string& shapes_string,
251     const std::string& value_ranges_string,
252     const std::string& value_files_string,
253     std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
254   info->clear();
255   std::vector<std::string> names = Split(names_string, ',');
256   std::vector<std::string> shapes = Split(shapes_string, ':');
257 
258   if (names.size() != shapes.size()) {
259     TFLITE_LOG(ERROR) << "The number of items in"
260                       << " --input_layer_shape (" << shapes_string << ", with "
261                       << shapes.size() << " items)"
262                       << " must match the number of items in"
263                       << " --input_layer (" << names_string << ", with "
264                       << names.size() << " items)."
265                       << " For example --input_layer=input1,input2"
266                       << " --input_layer_shape=1,224,224,4:1,20";
267     return kTfLiteError;
268   }
269 
270   for (int i = 0; i < names.size(); ++i) {
271     info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
272     BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
273 
274     input.name = names[i];
275 
276     TFLITE_TOOLS_CHECK(util::SplitAndParse(shapes[i], ',', &input.shape))
277         << "Incorrect size string specified: " << shapes[i];
278     for (int dim : input.shape) {
279       if (dim == -1) {
280         TFLITE_LOG(ERROR)
281             << "Any unknown sizes in the shapes (-1's) must be replaced"
282             << " with the size you want to benchmark with.";
283         return kTfLiteError;
284       }
285     }
286   }
287 
288   // Populate input value range if it's specified.
289   TF_LITE_ENSURE_STATUS(
290       PopulateInputValueRanges(names_string, value_ranges_string, info));
291 
292   // Populate input value files if it's specified.
293   TF_LITE_ENSURE_STATUS(
294       PopulateInputValueFiles(names_string, value_files_string, info));
295 
296   return kTfLiteOk;
297 }
298 
299 std::shared_ptr<profiling::ProfileSummaryFormatter>
CreateProfileSummaryFormatter(bool format_as_csv)300 CreateProfileSummaryFormatter(bool format_as_csv) {
301   return format_as_csv
302              ? std::make_shared<profiling::ProfileSummaryCSVFormatter>()
303              : std::make_shared<profiling::ProfileSummaryDefaultFormatter>();
304 }
305 
306 }  // namespace
307 
SplitInputLayerNameAndValueFile(const std::string & name_and_value_file,std::pair<std::string,std::string> & name_file_pair)308 TfLiteStatus SplitInputLayerNameAndValueFile(
309     const std::string& name_and_value_file,
310     std::pair<std::string, std::string>& name_file_pair) {
311   // 1. split the string by ':' and ignore escaped characters
312   int delim_index = -1;
313   for (int i = 1; i < name_and_value_file.length(); ++i) {
314     if (name_and_value_file[i] == ':' && name_and_value_file[i - 1] != '\\') {
315       if (delim_index == -1) {
316         delim_index = i;
317       } else {
318         TFLITE_LOG(ERROR) << name_and_value_file
319                           << " contains more than one delimiter.";
320         return kTfLiteError;
321       }
322     }
323   }
324   if (delim_index == -1) {
325     TFLITE_LOG(ERROR) << name_and_value_file
326                       << " doesn't contain any delimiter.";
327     return kTfLiteError;
328   }
329   // 2. replace escaped "\:" string to ":"
330   name_file_pair.first = absl::StrReplaceAll(
331       name_and_value_file.substr(0, delim_index), {{"\\:", ":"}});
332   name_file_pair.second = absl::StrReplaceAll(
333       name_and_value_file.substr(delim_index + 1), {{"\\:", ":"}});
334   return kTfLiteOk;
335 }
336 
DefaultParams()337 BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
338   BenchmarkParams default_params = BenchmarkModel::DefaultParams();
339   default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
340   default_params.AddParam("input_layer",
341                           BenchmarkParam::Create<std::string>(""));
342   default_params.AddParam("input_layer_shape",
343                           BenchmarkParam::Create<std::string>(""));
344   default_params.AddParam("input_layer_value_range",
345                           BenchmarkParam::Create<std::string>(""));
346   default_params.AddParam("input_layer_value_files",
347                           BenchmarkParam::Create<std::string>(""));
348   default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false));
349   default_params.AddParam("require_full_delegation",
350                           BenchmarkParam::Create<bool>(false));
351   default_params.AddParam(
352       "enable_op_profiling",
353       BenchmarkParam::Create<bool>(kOpProfilingEnabledDefault));
354   default_params.AddParam("max_profiling_buffer_entries",
355                           BenchmarkParam::Create<int32_t>(1024));
356   default_params.AddParam("allow_dynamic_profiling_buffer_increase",
357                           BenchmarkParam::Create<bool>(false));
358   default_params.AddParam("profiling_output_csv_file",
359                           BenchmarkParam::Create<std::string>(""));
360 
361   default_params.AddParam("print_preinvoke_state",
362                           BenchmarkParam::Create<bool>(false));
363   default_params.AddParam("print_postinvoke_state",
364                           BenchmarkParam::Create<bool>(false));
365   default_params.AddParam("release_dynamic_tensors",
366                           BenchmarkParam::Create<bool>(false));
367   default_params.AddParam("optimize_memory_for_large_tensors",
368                           BenchmarkParam::Create<int32_t>(0));
369   default_params.AddParam("output_filepath",
370                           BenchmarkParam::Create<std::string>(""));
371 
372   tools::ProvidedDelegateList delegate_providers(&default_params);
373   delegate_providers.AddAllDelegateParams();
374 
375   return default_params;
376 }
377 
BenchmarkTfLiteModel(BenchmarkParams params)378 BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
379     : BenchmarkModel(std::move(params)),
380       random_engine_(std::random_device()()) {
381   AddListener(&log_output_);
382 }
383 
CleanUp()384 void BenchmarkTfLiteModel::CleanUp() {
385   // Free up any pre-allocated tensor data during PrepareInputData.
386   inputs_data_.clear();
387 }
388 
~BenchmarkTfLiteModel()389 BenchmarkTfLiteModel::~BenchmarkTfLiteModel() {
390   CleanUp();
391 
392   // Destory the owned interpreter earlier than other objects (specially
393   // 'owned_delegates_').
394   interpreter_.reset();
395 }
396 
GetFlags()397 std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
398   std::vector<Flag> flags = BenchmarkModel::GetFlags();
399   std::vector<Flag> specific_flags = {
400       CreateFlag<std::string>("graph", &params_, "graph file name"),
401       CreateFlag<std::string>("input_layer", &params_, "input layer names"),
402       CreateFlag<std::string>("input_layer_shape", &params_,
403                               "input layer shape"),
404       CreateFlag<std::string>(
405           "input_layer_value_range", &params_,
406           "A map-like string representing value range for *integer* input "
407           "layers. Each item is separated by ':', and the item value consists "
408           "of input layer name and integer-only range values (both low and "
409           "high are inclusive) separated by ',', e.g. input1,1,2:input2,0,254"),
410       CreateFlag<std::string>(
411           "input_layer_value_files", &params_,
412           "A map-like string representing value file. Each item is separated "
413           "by ',', and the item value consists "
414           "of input layer name and value file path separated by ':', e.g. "
415           "input1:file_path1,input2:file_path2. In case the input layer name "
416           "contains ':' e.g. \"input:0\", escape it with \"\\:\". If the "
417           "input_name appears both in input_layer_value_range and "
418           "input_layer_value_files, input_layer_value_range of the input_name "
419           "will be ignored. The file format is binary and it should be array "
420           "format or null separated strings format."),
421       CreateFlag<bool>("allow_fp16", &params_, "allow fp16"),
422       CreateFlag<bool>("require_full_delegation", &params_,
423                        "require delegate to run the entire graph"),
424       CreateFlag<bool>("enable_op_profiling", &params_, "enable op profiling"),
425       CreateFlag<int32_t>("max_profiling_buffer_entries", &params_,
426                           "max initial profiling buffer entries"),
427       CreateFlag<bool>("allow_dynamic_profiling_buffer_increase", &params_,
428                        "allow dynamic increase on profiling buffer entries"),
429       CreateFlag<std::string>(
430           "profiling_output_csv_file", &params_,
431           "File path to export profile data as CSV, if not set "
432           "prints to stdout."),
433       CreateFlag<bool>(
434           "print_preinvoke_state", &params_,
435           "print out the interpreter internals just before calling Invoke. The "
436           "internals will include allocated memory size of each tensor etc."),
437       CreateFlag<bool>(
438           "print_postinvoke_state", &params_,
439           "print out the interpreter internals just before benchmark completes "
440           "(i.e. after all repeated Invoke calls complete). The internals will "
441           "include allocated memory size of each tensor etc."),
442       CreateFlag<bool>("release_dynamic_tensors", &params_,
443                        "Ensure dynamic tensor's memory is released when they "
444                        "are not used."),
445       CreateFlag<int32_t>(
446           "optimize_memory_for_large_tensors", &params_,
447           "Optimize memory usage for large tensors with sacrificing latency."),
448       CreateFlag<std::string>(
449           "output_filepath", &params_,
450           "File path to export outputs layer as binary data.")};
451 
452   flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
453 
454   tools::ProvidedDelegateList delegate_providers(&params_);
455   delegate_providers.AppendCmdlineFlags(flags);
456 
457   return flags;
458 }
459 
LogParams()460 void BenchmarkTfLiteModel::LogParams() {
461   BenchmarkModel::LogParams();
462   const bool verbose = params_.Get<bool>("verbose");
463   // Always log the value of --graph.
464   LOG_BENCHMARK_PARAM(std::string, "graph", "Graph", /*verbose*/ true);
465   LOG_BENCHMARK_PARAM(std::string, "input_layer", "Input layers", verbose);
466   LOG_BENCHMARK_PARAM(std::string, "input_layer_shape", "Input shapes",
467                       verbose);
468   LOG_BENCHMARK_PARAM(std::string, "input_layer_value_range",
469                       "Input value ranges", verbose);
470   LOG_BENCHMARK_PARAM(std::string, "input_layer_value_files",
471                       "Input value files", verbose);
472 
473   LOG_BENCHMARK_PARAM(bool, "allow_fp16", "Allow fp16", verbose);
474   LOG_BENCHMARK_PARAM(bool, "require_full_delegation",
475                       "Require full delegation", verbose);
476   LOG_BENCHMARK_PARAM(bool, "enable_op_profiling", "Enable op profiling",
477                       verbose);
478   LOG_BENCHMARK_PARAM(int32_t, "max_profiling_buffer_entries",
479                       "Max initial profiling buffer entries", verbose);
480   LOG_BENCHMARK_PARAM(bool, "allow_dynamic_profiling_buffer_increase",
481                       "Allow dynamic increase on profiling buffer entries",
482                       verbose);
483   LOG_BENCHMARK_PARAM(std::string, "profiling_output_csv_file",
484                       "CSV File to export profiling data to", verbose);
485   LOG_BENCHMARK_PARAM(bool, "print_preinvoke_state",
486                       "Print pre-invoke interpreter state", verbose);
487   LOG_BENCHMARK_PARAM(bool, "print_postinvoke_state",
488                       "Print post-invoke interpreter state", verbose);
489   LOG_BENCHMARK_PARAM(bool, "release_dynamic_tensors",
490                       "Release dynamic tensor memory", verbose);
491   LOG_BENCHMARK_PARAM(int32_t, "optimize_memory_for_large_tensors",
492                       "Optimize memory usage for large tensors", verbose);
493   LOG_BENCHMARK_PARAM(std::string, "output_filepath",
494                       "File path to export outputs layer to", verbose);
495 
496   for (const auto& delegate_provider :
497        tools::GetRegisteredDelegateProviders()) {
498     delegate_provider->LogParams(params_, verbose);
499   }
500 }
501 
ValidateParams()502 TfLiteStatus BenchmarkTfLiteModel::ValidateParams() {
503   TF_LITE_ENSURE_STATUS(BenchmarkModel::ValidateParams());
504 
505   if (params_.Get<std::string>("graph").empty()) {
506     TFLITE_LOG(ERROR)
507         << "Please specify the name of your TF Lite input file with --graph";
508     return kTfLiteError;
509   }
510 
511   return PopulateInputLayerInfo(
512       params_.Get<std::string>("input_layer"),
513       params_.Get<std::string>("input_layer_shape"),
514       params_.Get<std::string>("input_layer_value_range"),
515       params_.Get<std::string>("input_layer_value_files"), &inputs_);
516 }
517 
ComputeInputBytes()518 uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
519   TFLITE_TOOLS_CHECK(interpreter_);
520   uint64_t total_input_bytes = 0;
521   for (int input : interpreter_->inputs()) {
522     auto* t = interpreter_->tensor(input);
523     total_input_bytes += t->bytes;
524   }
525   return total_input_bytes;
526 }
527 
MayGetModelFileSize()528 int64_t BenchmarkTfLiteModel::MayGetModelFileSize() {
529   std::ifstream in_file(params_.Get<std::string>("graph"),
530                         std::ios::binary | std::ios::ate);
531   return in_file.tellg();
532 }
533 
LoadInputTensorData(const TfLiteTensor & t,const std::string & input_file_path)534 InputTensorData BenchmarkTfLiteModel::LoadInputTensorData(
535     const TfLiteTensor& t, const std::string& input_file_path) {
536   std::ifstream value_file(input_file_path, std::ios::binary);
537   if (!value_file.good()) {
538     TFLITE_LOG(FATAL) << "Failed to read the input_layer_value_file:"
539                       << input_file_path;
540   }
541   InputTensorData t_data;
542   if (t.type == kTfLiteString) {
543     t_data.data = VoidUniquePtr(
544         static_cast<void*>(new tflite::DynamicBuffer()),
545         [](void* ptr) { delete static_cast<DynamicBuffer*>(ptr); });
546     if (input_file_path.size() > 3 &&
547         input_file_path.substr(input_file_path.size() - 3) == ".pb") {
548       // If input file is ".pb" file, read data as a binary.
549       std::stringstream buffer;
550       buffer << value_file.rdbuf();
551       static_cast<DynamicBuffer*>(t_data.data.get())
552           ->AddString(buffer.str().data(), buffer.str().length());
553       TFLITE_LOG(INFO) << "Read " << buffer.str().length()
554                        << " bytes data from " << input_file_path << ".";
555     } else {
556       // Read input as a text.
557       std::string line;
558       size_t num_line = 0;
559       // Read the line with the delimiter '\0'.
560       while (std::getline(value_file, line, '\0')) {
561         num_line++;
562         static_cast<DynamicBuffer*>(t_data.data.get())
563             ->AddString(line.data(), line.length());
564       }
565       int num_elements = GetNumElements(t.dims);
566       if (num_line != num_elements) {
567         TFLITE_LOG(FATAL)
568             << "The number of string in the input_layer_value_file("
569             << input_file_path << ") is " << num_line << ". It should be "
570             << num_elements << ".";
571       }
572     }
573   } else {
574     value_file.seekg(0, std::ios_base::end);
575     if (value_file.tellg() != t.bytes) {
576       TFLITE_LOG(FATAL) << "The size of " << input_file_path << " is "
577                         << value_file.tellg() << " bytes. It should be "
578                         << t.bytes << " bytes.";
579     }
580     t_data.bytes = t.bytes;
581     t_data.data =
582         VoidUniquePtr(static_cast<void*>(new char[t.bytes]),
583                       [](void* ptr) { delete[] static_cast<char*>(ptr); });
584     value_file.clear();
585     value_file.seekg(0, std::ios_base::beg);
586     value_file.read(static_cast<char*>(t_data.data.get()), t.bytes);
587   }
588   return t_data;
589 }
590 
CreateRandomTensorData(const TfLiteTensor & t,const InputLayerInfo * layer_info)591 InputTensorData BenchmarkTfLiteModel::CreateRandomTensorData(
592     const TfLiteTensor& t, const InputLayerInfo* layer_info) {
593   float low_range = 0;
594   float high_range = 0;
595   if (layer_info && layer_info->has_value_range) {
596     low_range = layer_info->low;
597     high_range = layer_info->high;
598   } else {
599     utils::GetDataRangesForType(t.type, &low_range, &high_range);
600   }
601   return utils::CreateRandomTensorData(t, low_range, high_range);
602 }
603 
PrepareInputData()604 TfLiteStatus BenchmarkTfLiteModel::PrepareInputData() {
605   CleanUp();
606 
607   // Note the corresponding relation between 'interpreter_inputs' and 'inputs_'
608   // (i.e. the specified input layer info) has been checked in
609   // BenchmarkTfLiteModel::Init() before calling this function. So, we simply
610   // use the corresponding input layer info to initialize the input data value
611   // properly.
612   auto interpreter_inputs = interpreter_->inputs();
613   for (int i = 0; i < interpreter_inputs.size(); ++i) {
614     int tensor_index = interpreter_inputs[i];
615     const TfLiteTensor& t = *(interpreter_->tensor(tensor_index));
616     const InputLayerInfo* input_layer_info = nullptr;
617     // Note that when input layer parameters (i.e. --input_layer,
618     // --input_layer_shape) are not specified, inputs_ is empty.
619     if (!inputs_.empty()) input_layer_info = &inputs_[i];
620 
621     InputTensorData t_data;
622     if (input_layer_info && !input_layer_info->input_file_path.empty()) {
623       t_data = LoadInputTensorData(t, input_layer_info->input_file_path);
624     } else {
625       t_data = CreateRandomTensorData(t, input_layer_info);
626     }
627     inputs_data_.push_back(std::move(t_data));
628   }
629   return kTfLiteOk;
630 }
631 
ResetInputsAndOutputs()632 TfLiteStatus BenchmarkTfLiteModel::ResetInputsAndOutputs() {
633   auto interpreter_inputs = interpreter_->inputs();
634   // Set the values of the input tensors from inputs_data_.
635   for (int j = 0; j < interpreter_inputs.size(); ++j) {
636     int i = interpreter_inputs[j];
637     TfLiteTensor* t = interpreter_->tensor(i);
638     if (t->type == kTfLiteString) {
639       if (inputs_data_[j].data) {
640         static_cast<DynamicBuffer*>(inputs_data_[j].data.get())
641             ->WriteToTensor(t, /*new_shape=*/nullptr);
642       } else {
643         tflite::DynamicBuffer buffer;
644         FillRandomString(&buffer, t->dims, []() {
645           return "we're have some friends over saturday to hang out in the "
646                  "yard";
647         });
648         buffer.WriteToTensor(t, /*new_shape=*/nullptr);
649       }
650     } else {
651       std::memcpy(t->data.raw, inputs_data_[j].data.get(),
652                   inputs_data_[j].bytes);
653     }
654   }
655 
656   return kTfLiteOk;
657 }
658 
InitInterpreter()659 TfLiteStatus BenchmarkTfLiteModel::InitInterpreter() {
660   auto resolver = GetOpResolver();
661   const int32_t num_threads = params_.Get<int32_t>("num_threads");
662   const bool use_caching = params_.Get<bool>("use_caching");
663 
664   tflite::InterpreterBuilder builder(*model_, *resolver);
665   if (builder.SetNumThreads(num_threads) != kTfLiteOk) {
666     TFLITE_LOG(ERROR) << "Failed to set thread number";
667     return kTfLiteError;
668   }
669 
670   builder(&interpreter_);
671   if (!interpreter_) {
672     TFLITE_LOG(ERROR) << "Failed to initialize the interpreter";
673     return kTfLiteError;
674   }
675   // Manually enable caching behavior in TF Lite interpreter.
676   if (use_caching) {
677     external_context_ = std::make_unique<tflite::ExternalCpuBackendContext>();
678     std::unique_ptr<tflite::CpuBackendContext> cpu_backend_context(
679         new tflite::CpuBackendContext());
680     cpu_backend_context->SetUseCaching(true);
681     cpu_backend_context->SetMaxNumThreads(num_threads);
682     external_context_->set_internal_backend_context(
683         std::move(cpu_backend_context));
684     interpreter_->SetExternalContext(kTfLiteCpuBackendContext,
685                                      external_context_.get());
686   }
687 
688   return kTfLiteOk;
689 }
690 
Init()691 TfLiteStatus BenchmarkTfLiteModel::Init() {
692   TF_LITE_ENSURE_STATUS(LoadModel());
693   TF_LITE_ENSURE_STATUS(InitInterpreter());
694 
695   // Install profilers if necessary right after interpreter is created so that
696   // any memory allocations inside the TFLite runtime could be recorded if the
697   // installed profiler profile memory usage information.
698 
699   // Adjust "max_profiling_buffer_entries" according to the loaded model.
700   int total_nodes = 0;
701   for (int i = 0; i < interpreter_->subgraphs_size(); ++i) {
702     // subgraph(...) is non-const member method.
703     total_nodes += static_cast<int>(interpreter_->subgraph(i)->nodes_size());
704   }
705   if (total_nodes > params_.Get<int32_t>("max_profiling_buffer_entries")) {
706     constexpr int kProfilingBufferHeadrooms = 512;
707     params_.Set<int32_t>("max_profiling_buffer_entries",
708                          total_nodes + kProfilingBufferHeadrooms);
709   }
710 
711   AddOwnedListener(MayCreateProfilingListener());
712   AddOwnedListener(std::unique_ptr<BenchmarkListener>(
713       new InterpreterStatePrinter(interpreter_.get())));
714 
715   interpreter_->SetAllowFp16PrecisionForFp32(params_.Get<bool>("allow_fp16"));
716 
717   InterpreterOptions options;
718   options.SetEnsureDynamicTensorsAreReleased(
719       params_.Get<bool>("release_dynamic_tensors"));
720   options.OptimizeMemoryForLargeTensors(
721       params_.Get<int32_t>("optimize_memory_for_large_tensors"));
722   interpreter_->ApplyOptions(&options);
723 
724   owned_delegates_.clear();
725 
726   // Contains all ids of TfLiteNodes that have been checked to see whether it's
727   // delegated or not.
728   std::unordered_set<int> checked_node_ids;
729   tools::ProvidedDelegateList delegate_providers(&params_);
730   auto created_delegates = delegate_providers.CreateAllRankedDelegates();
731   TFLITE_MAY_LOG(INFO, (created_delegates.size() >= 2))
732       << "Going to apply " << created_delegates.size()
733       << " delegates one after another.";
734   for (auto& created_delegate : created_delegates) {
735     const auto* delegate_provider = created_delegate.provider;
736     TfLiteDelegate* delegate = created_delegate.delegate.get();
737     TFLITE_TOOLS_CHECK(delegate != nullptr)
738         << "The created delegate by the delegate provider should not be "
739            "nullptr!";
740     // The interpreter becomes dependent on the delegate once the delegate is
741     // used, so the order of destruction must be interpreter first, delegate
742     // later.
743     // Moving the delegate to a list of owned delegates to guarantee that.
744     owned_delegates_.emplace_back(std::move(created_delegate.delegate));
745     if (interpreter_->ModifyGraphWithDelegate(delegate) != kTfLiteOk) {
746       TFLITE_LOG(ERROR) << "Failed to apply " << delegate_provider->GetName()
747                         << " delegate.";
748       return kTfLiteError;
749     } else {
750       // Ideally, such delegate info should already be computed when the
751       // delegate is being applied to the model graph.
752       int num_delegated_kernels = 0;
753       for (int i = 0; i < interpreter_->execution_plan().size(); ++i) {
754         int node_id = interpreter_->execution_plan()[i];
755         if (checked_node_ids.find(node_id) != checked_node_ids.end()) {
756           continue;
757         }
758         const TfLiteNode& node =
759             interpreter_->node_and_registration(node_id)->first;
760 
761         // Note that the 'delegate' here could be an ExternalDelegateWrapper
762         // object that wraps an actual external delegate, in which case,
763         // 'node.delegate' will be different from 'delegate' because
764         // 'node.delegate' refers to the actual external delegate.
765         if (node.delegate != nullptr) {
766           num_delegated_kernels++;
767           checked_node_ids.insert(node_id);
768         }
769       }
770       bool fully_delegated = (num_delegated_kernels == 1 &&
771                               interpreter_->execution_plan().size() == 1);
772 
773       if (params_.Get<bool>("require_full_delegation") && !fully_delegated) {
774         TFLITE_LOG(ERROR) << "Disallowed CPU fallback detected.";
775         return kTfLiteError;
776       }
777       if (fully_delegated) {
778         TFLITE_LOG(INFO) << "Explicitly applied "
779                          << delegate_provider->GetName()
780                          << " delegate, and the model graph will be completely"
781                          << " executed by the delegate.";
782       } else if (num_delegated_kernels > 0) {
783         TFLITE_LOG(INFO) << "Explicitly applied "
784                          << delegate_provider->GetName()
785                          << " delegate, and the model graph will be partially"
786                          << " executed by the delegate w/ "
787                          << num_delegated_kernels << " delegate kernels.";
788       } else {
789         TFLITE_LOG(INFO)
790             << "Though " << delegate_provider->GetName()
791             << " delegate is explicitly applied, the model graph will not be"
792             << " executed by the delegate.";
793       }
794     }
795   }
796 
797   auto interpreter_inputs = interpreter_->inputs();
798 
799   if (!inputs_.empty()) {
800     TFLITE_TOOLS_CHECK_EQ(inputs_.size(), interpreter_inputs.size())
801         << "Inputs mismatch: Model inputs #:" << inputs_.size()
802         << " expected: " << interpreter_inputs.size();
803   }
804 
805   // Check if the tensor names match, and log a warning if it doesn't.
806   for (int j = 0; j < inputs_.size(); ++j) {
807     const InputLayerInfo& input = inputs_[j];
808     int i = interpreter_inputs[j];
809     TfLiteTensor* t = interpreter_->tensor(i);
810     if (input.name != t->name) {
811       TFLITE_LOG(WARN) << "Tensor # " << i << " is named " << t->name
812                        << " but flags call it " << input.name;
813     }
814 
815     if (t->type != kTfLiteString && input.shape.size() != t->dims->size) {
816       TFLITE_LOG(ERROR) << "Input tensor #" << i << " should have "
817                         << t->dims->size << " dimensions!";
818       return kTfLiteError;
819     }
820   }
821 
822   // Resize all non-string tensors.
823   for (int j = 0; j < inputs_.size(); ++j) {
824     const InputLayerInfo& input = inputs_[j];
825     int i = interpreter_inputs[j];
826     TfLiteTensor* t = interpreter_->tensor(i);
827     if (t->type != kTfLiteString) {
828       interpreter_->ResizeInputTensor(i, input.shape);
829     }
830   }
831 
832   if (interpreter_->AllocateTensors() != kTfLiteOk) {
833     TFLITE_LOG(ERROR) << "Failed to allocate tensors!";
834     return kTfLiteError;
835   }
836 
837   AddOwnedListener(
838       std::unique_ptr<BenchmarkListener>(new RuyProfileListener()));
839   AddOwnedListener(
840       std::unique_ptr<BenchmarkListener>(new OutputSaver(interpreter_.get())));
841 
842   return kTfLiteOk;
843 }
844 
LoadModel()845 TfLiteStatus BenchmarkTfLiteModel::LoadModel() {
846   std::string graph = params_.Get<std::string>("graph");
847   model_ = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
848   if (!model_) {
849     TFLITE_LOG(ERROR) << "Failed to mmap model " << graph;
850     return kTfLiteError;
851   }
852   TFLITE_LOG(INFO) << "Loaded model " << graph;
853   return kTfLiteOk;
854 }
855 
GetOpResolver() const856 std::unique_ptr<tflite::OpResolver> BenchmarkTfLiteModel::GetOpResolver()
857     const {
858   tflite::ops::builtin::BuiltinOpResolver* resolver = nullptr;
859   // When --use_xnnpack is explicitly set to false, skip applying the default
860   // XNNPACK delegate in TfLite runtime so that the original execution path
861   // based on the unmodified model graph is still excercised.
862   if (params_.HasParam("use_xnnpack") &&
863       params_.HasValueSet<bool>("use_xnnpack") &&
864       !params_.Get<bool>("use_xnnpack")) {
865     resolver =
866         new tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates();
867   } else {
868     resolver = new tflite::ops::builtin::BuiltinOpResolver();
869   }
870   RegisterSelectedOps(resolver);
871   return std::unique_ptr<tflite::OpResolver>(resolver);
872 }
873 
874 std::unique_ptr<BenchmarkListener>
MayCreateProfilingListener() const875 BenchmarkTfLiteModel::MayCreateProfilingListener() const {
876   if (!params_.Get<bool>("enable_op_profiling")) return nullptr;
877 
878   return std::unique_ptr<BenchmarkListener>(new ProfilingListener(
879       interpreter_.get(), params_.Get<int32_t>("max_profiling_buffer_entries"),
880       params_.Get<bool>("allow_dynamic_profiling_buffer_increase"),
881       params_.Get<std::string>("profiling_output_csv_file"),
882       CreateProfileSummaryFormatter(
883           !params_.Get<std::string>("profiling_output_csv_file").empty())));
884 }
885 
RunImpl()886 TfLiteStatus BenchmarkTfLiteModel::RunImpl() { return interpreter_->Invoke(); }
887 
888 }  // namespace benchmark
889 }  // namespace tflite
890