xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/interpreter.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/interpreter.h"
17 
18 #include <stddef.h>
19 #include <stdlib.h>
20 
21 #include <cstdint>
22 #include <functional>
23 #include <memory>
24 #include <string>
25 #include <utility>
26 #include <vector>
27 
28 #include "ruy/denormal.h"  // from @ruy
29 #include "tensorflow/lite/allocation.h"
30 #include "tensorflow/lite/core/api/error_reporter.h"
31 #include "tensorflow/lite/core/api/profiler.h"
32 #include "tensorflow/lite/external_cpu_backend_context.h"
33 #include "tensorflow/lite/interpreter_options.h"
34 #include "tensorflow/lite/minimal_logging.h"
35 #include "tensorflow/lite/stderr_reporter.h"
36 #include "tensorflow/lite/util.h"
37 
38 // TODO(b/139446230): Move to portable platform header.
39 #if defined(__ANDROID__)
40 #define TFLITE_IS_MOBILE_PLATFORM
41 #endif  // defined(__ANDROID__)
42 
43 #if defined(__APPLE__)
44 #include "TargetConditionals.h"
45 #if TARGET_IPHONE_SIMULATOR
46 #define TFLITE_IS_MOBILE_PLATFORM
47 #elif TARGET_OS_IPHONE
48 #define TFLITE_IS_MOBILE_PLATFORM
49 #endif
50 #endif  // defined(__APPLE__)
51 
52 // TODO(b/132087118): move static_assert to c_api_internal when compiled with
53 // C++.
54 static_assert(sizeof(TfLiteFloat16) == sizeof(uint16_t),
55               "Float 16 type must be 16 bits.");
56 
57 namespace tflite {
58 
59 namespace {
60 
61 // Gets the current TfLiteQuantization from the legacy TfLiteQuantizationParams.
GetQuantizationFromLegacy(const TfLiteQuantizationParams & legacy_quantization)62 TfLiteQuantization GetQuantizationFromLegacy(
63     const TfLiteQuantizationParams& legacy_quantization) {
64   TfLiteQuantization quantization;
65   quantization.type = kTfLiteAffineQuantization;
66   auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
67       malloc(sizeof(TfLiteAffineQuantization)));
68   affine_quantization->scale = TfLiteFloatArrayCreate(1);
69   affine_quantization->zero_point = TfLiteIntArrayCreate(1);
70   affine_quantization->scale->data[0] = legacy_quantization.scale;
71   affine_quantization->zero_point->data[0] = legacy_quantization.zero_point;
72   quantization.params = affine_quantization;
73 
74   return quantization;
75 }
76 
77 // TODO(b/153131797): We have put 'delegate_status' to 0 in the following macro
78 // temporarily because delegate-specific error codes are either not retrievable
79 // at the moment, which we will add later.
80 #define TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(runtime_event, a) \
81   do {                                                                      \
82     TfLiteStatus status = (a);                                              \
83     runtime_event.set_runtime_status(/*delegate_status=*/0,                 \
84                                      static_cast<int64_t>(status));         \
85     TF_LITE_ENSURE_STATUS(status);                                          \
86   } while (0)
87 
88 }  // namespace
89 
Interpreter(ErrorReporter * error_reporter)90 Interpreter::Interpreter(ErrorReporter* error_reporter)
91     : error_reporter_(error_reporter ? error_reporter
92                                      : DefaultErrorReporter()) {
93   // TODO(b/128420794): Include the TFLite runtime version in the log.
94   // Prod logging is useful for mobile platforms where scraping console logs is
95   // critical for debugging.
96 #if defined(TFLITE_IS_MOBILE_PLATFORM)
97   TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
98 #else
99   TFLITE_LOG_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
100 #endif
101 
102   // There's always at least 1 subgraph which is the primary subgraph.
103   AddSubgraphs(1);
104   context_ = primary_subgraph().context();
105 
106   // Reserve some space for the tensors to avoid excessive resizing.
107   for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) {
108     external_contexts_[i] = nullptr;
109   }
110 
111   // This operation is cheap because we allocate the CPU context resources (i.e.
112   // threads) lazily.
113   own_external_cpu_backend_context_ =
114       std::make_unique<ExternalCpuBackendContext>();
115   external_contexts_[kTfLiteCpuBackendContext] =
116       own_external_cpu_backend_context_.get();
117 }
118 
~Interpreter()119 Interpreter::~Interpreter() {
120   // The owned external Cpu Backend Context will go out of scope with this
121   // interpreter. If we have an external backend context that is not
122   // owned, we need to clear the cache for other interpreters that may
123   // use the context.
124   if (external_contexts_[kTfLiteCpuBackendContext] &&
125       (external_contexts_[kTfLiteCpuBackendContext] !=
126        own_external_cpu_backend_context_.get())) {
127     ExternalCpuBackendContext* external_context =
128         static_cast<ExternalCpuBackendContext*>(
129             external_contexts_[kTfLiteCpuBackendContext]);
130     TfLiteInternalBackendContext* internal_context =
131         external_context->internal_backend_context();
132     if (internal_context) {
133       // This call may have negative performance impacts on the next inference
134       // for any interpreter using this context. The cache will be refreshed
135       // by the next inference.
136       internal_context->ClearCaches();
137     }
138   }
139 }
140 
SetExternalContext(TfLiteExternalContextType type,TfLiteExternalContext * ctx)141 void Interpreter::SetExternalContext(TfLiteExternalContextType type,
142                                      TfLiteExternalContext* ctx) {
143   if (ctx == own_external_cpu_backend_context_.get()) {
144     error_reporter_->Report(
145         "WARNING: The passed external context is identical to the internally "
146         "owned one.");
147     return;
148   }
149 
150   // We have an internally owned external context of kTfLiteCpuBackendContext.
151   // If it's overwritten here, we will release the resource of the internally
152   // owned external context.
153   // Note: the 'max thread count' info associated with the overwritten context
154   // will be lost here, and such info is now determined by the new context, thus
155   // affecting how much parallelism a TFLite op would have.
156   if (kTfLiteCpuBackendContext == type &&
157       external_contexts_[kTfLiteCpuBackendContext] ==
158           own_external_cpu_backend_context_.get()) {
159     own_external_cpu_backend_context_.reset();
160   }
161 
162   // This essentially changes the "external_contexts_[type]".
163   primary_subgraph().SetExternalContext(type, ctx);
164 }
165 
SetInputs(std::vector<int> inputs)166 TfLiteStatus Interpreter::SetInputs(std::vector<int> inputs) {
167   return primary_subgraph().SetInputs(std::move(inputs));
168 }
169 
SetOutputs(std::vector<int> outputs)170 TfLiteStatus Interpreter::SetOutputs(std::vector<int> outputs) {
171   return primary_subgraph().SetOutputs(std::move(outputs));
172 }
173 
SetVariables(std::vector<int> variables)174 TfLiteStatus Interpreter::SetVariables(std::vector<int> variables) {
175   return primary_subgraph().SetVariables(std::move(variables));
176 }
177 
AllocateTensors()178 TfLiteStatus Interpreter::AllocateTensors() {
179   // Apply the default delegate that TFLite will enable at this point to allow
180   // other user-level delegates to be applied first. Only returns error when
181   // the status is kTfLiteError. For other statuses, it will fall back to the
182   // default implementation.
183   if (ApplyLazyDelegateProviders() == kTfLiteError) return kTfLiteError;
184 
185   return primary_subgraph().AllocateTensors();
186 }
187 
AddSubgraphs(int subgraphs_to_add,int * first_new_subgraph_index)188 void Interpreter::AddSubgraphs(int subgraphs_to_add,
189                                int* first_new_subgraph_index) {
190   const size_t base_index = subgraphs_.size();
191   if (first_new_subgraph_index) *first_new_subgraph_index = base_index;
192 
193   subgraphs_.reserve(base_index + subgraphs_to_add);
194   for (int i = 0; i < subgraphs_to_add; ++i) {
195     Subgraph* subgraph = new Subgraph(
196         error_reporter_, external_contexts_, &subgraphs_, &resources_,
197         &resource_ids_, &initialization_status_map_, subgraphs_.size());
198     subgraphs_.emplace_back(subgraph);
199   }
200 }
201 
AddNodeWithParameters(const std::vector<int> & inputs,const std::vector<int> & outputs,const char * init_data,size_t init_data_size,void * builtin_data,const TfLiteRegistration * registration,int * node_index)202 TfLiteStatus Interpreter::AddNodeWithParameters(
203     const std::vector<int>& inputs, const std::vector<int>& outputs,
204     const char* init_data, size_t init_data_size, void* builtin_data,
205     const TfLiteRegistration* registration, int* node_index) {
206   return primary_subgraph().AddNodeWithParameters(
207       inputs, outputs, {}, init_data, init_data_size, builtin_data,
208       registration, node_index);
209 }
210 
ResizeInputTensor(int tensor_index,const std::vector<int> & dims)211 TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index,
212                                             const std::vector<int>& dims) {
213   return primary_subgraph().ResizeInputTensor(tensor_index, dims);
214 }
215 
ResizeInputTensorStrict(int tensor_index,const std::vector<int> & dims)216 TfLiteStatus Interpreter::ResizeInputTensorStrict(
217     int tensor_index, const std::vector<int>& dims) {
218   return primary_subgraph().ResizeInputTensorStrict(tensor_index, dims);
219 }
220 
Invoke()221 TfLiteStatus Interpreter::Invoke() {
222   ScopedRuntimeInstrumentationProfile scoped_runtime_event(root_profiler_.get(),
223                                                            "invoke");
224 
225   // Denormal floating point numbers could cause significant slowdown on
226   // platforms like x86, therefore, we suppress denormals here to prevent this
227   // from happening.
228   ruy::ScopedSuppressDenormals suppress_denormals;
229 
230   TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
231       scoped_runtime_event, primary_subgraph().Invoke());
232 
233   if (!allow_buffer_handle_output_) {
234     for (int tensor_index : outputs()) {
235       TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
236           scoped_runtime_event,
237           primary_subgraph().EnsureTensorDataIsReadable(tensor_index));
238     }
239   }
240 
241   return kTfLiteOk;
242 }
243 
AddTensors(int tensors_to_add,int * first_new_tensor_index)244 TfLiteStatus Interpreter::AddTensors(int tensors_to_add,
245                                      int* first_new_tensor_index) {
246   return primary_subgraph().AddTensors(tensors_to_add, first_new_tensor_index);
247 }
248 
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const std::vector<int> & dims,TfLiteQuantization quantization,const char * buffer,size_t bytes,const Allocation * allocation)249 TfLiteStatus Interpreter::SetTensorParametersReadOnly(
250     int tensor_index, TfLiteType type, const char* name,
251     const std::vector<int>& dims, TfLiteQuantization quantization,
252     const char* buffer, size_t bytes, const Allocation* allocation) {
253   return primary_subgraph().SetTensorParametersReadOnly(
254       tensor_index, type, name, dims.size(), dims.data(), quantization, buffer,
255       bytes, allocation);
256 }
257 
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const std::vector<int> & dims,TfLiteQuantization quantization,bool is_variable)258 TfLiteStatus Interpreter::SetTensorParametersReadWrite(
259     int tensor_index, TfLiteType type, const char* name,
260     const std::vector<int>& dims, TfLiteQuantization quantization,
261     bool is_variable) {
262   return primary_subgraph().SetTensorParametersReadWrite(
263       tensor_index, type, name, dims.size(), dims.data(), quantization,
264       is_variable);
265 }
266 
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantizationParams quantization,const char * buffer,size_t bytes,const Allocation * allocation)267 TfLiteStatus Interpreter::SetTensorParametersReadOnly(
268     int tensor_index, TfLiteType type, const char* name, const size_t rank,
269     const int* dims, TfLiteQuantizationParams quantization, const char* buffer,
270     size_t bytes, const Allocation* allocation) {
271   TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
272   return primary_subgraph().SetTensorParametersReadOnly(
273       tensor_index, type, name, rank, dims, new_quantization, buffer, bytes,
274       allocation);
275 }
276 
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantizationParams quantization,bool is_variable,const size_t rank_dims_signature,const int * dims_signature)277 TfLiteStatus Interpreter::SetTensorParametersReadWrite(
278     int tensor_index, TfLiteType type, const char* name, const size_t rank,
279     const int* dims, TfLiteQuantizationParams quantization, bool is_variable,
280     const size_t rank_dims_signature, const int* dims_signature) {
281   TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
282   return primary_subgraph().SetTensorParametersReadWrite(
283       tensor_index, type, name, rank, dims, new_quantization, is_variable,
284       rank_dims_signature, dims_signature);
285 }
286 
SetExecutionPlan(const std::vector<int> & new_plan)287 TfLiteStatus Interpreter::SetExecutionPlan(const std::vector<int>& new_plan) {
288   return primary_subgraph().SetExecutionPlan(new_plan);
289 }
290 
SetNumThreads(int num_threads)291 TfLiteStatus Interpreter::SetNumThreads(int num_threads) {
292   if (num_threads < -1) {
293     context_->ReportError(context_,
294                           "num_threads should be >=0 or just -1 to let TFLite "
295                           "runtime set the value.");
296     return kTfLiteError;
297   }
298 
299   // num_threads == 0 has the same effect as num_threads == 1.
300   num_threads = num_threads == 0 ? 1 : num_threads;
301   for (auto& subgraph : subgraphs_) {
302     subgraph->context()->recommended_num_threads = num_threads;
303   }
304 
305   for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) {
306     auto* c = external_contexts_[i];
307     if (c && c->Refresh) {
308       c->Refresh(context_);
309     }
310   }
311   return kTfLiteOk;
312 }
313 
ApplyLazyDelegateProviders()314 TfLiteStatus Interpreter::ApplyLazyDelegateProviders() {
315   if (lazy_delegate_providers_.empty() || IsFullyDelegated()) return kTfLiteOk;
316 
317   // We only apply lazy delegate providers once.
318   TfLiteDelegateCreators delegate_providers;
319   delegate_providers.swap(lazy_delegate_providers_);
320 
321   TFLITE_LOG(TFLITE_LOG_INFO,
322              "Applying %zu TensorFlow Lite delegate(s) lazily.",
323              delegate_providers.size());
324   // At the momement, XNNPACK delegate is the only one that might be applied
325   // by default, in which case, the execution will fall back to default
326   // implementation if the XNNPACK delegate fails to be applied.
327   for (size_t i = 0; i < delegate_providers.size(); ++i) {
328     auto delegate_ptr =
329         delegate_providers[i](context_->recommended_num_threads);
330     // Note when XNNPACK-by-default is disabled, the corresponding creator (i.e.
331     // tflite::MaybeCreateXNNPACKDelegate(...)) will return a nullptr.
332     // Therefore, we simply continue with the next one.
333     if (delegate_ptr == nullptr) continue;
334     auto status = ModifyGraphWithDelegateImpl(std::move(delegate_ptr));
335     switch (status) {
336       case kTfLiteOk:
337         TFLITE_LOG(
338             TFLITE_LOG_INFO,
339             "Successfully applied the default TensorFlow Lite "
340             "delegate indexed at %zu.\n *NOTE*: because a delegate has been "
341             "applied, the precision of computations should be unchanged, but "
342             "the exact output tensor values may have changed. If such output "
343             "values are checked in your code, like in your tests etc., please "
344             "consider increasing error tolerance for the check.",
345             i);
346         break;
347       case kTfLiteError:
348         TF_LITE_REPORT_ERROR(error_reporter_,
349                              "Failed to apply the default TensorFlow Lite "
350                              "delegate indexed at %zu.",
351                              i);
352         return kTfLiteError;
353       case kTfLiteDelegateError:
354         TFLITE_LOG(
355             TFLITE_LOG_INFO,
356             "Error in applying the default TensorFlow Lite delegate indexed "
357             "at %zu, and all previously applied delegates are reverted.",
358             i);
359         return kTfLiteDelegateError;
360       case kTfLiteApplicationError:
361         TFLITE_LOG(
362             TFLITE_LOG_INFO,
363             "Failed to apply the default TensorFlow Lite delegate indexed at "
364             "%zu because of incompatibility between runtime and delegate. "
365             "Ignoring the error, and continuing anyway.",
366             i);
367         return kTfLiteApplicationError;
368       case kTfLiteUnresolvedOps:
369         TFLITE_LOG(
370             TFLITE_LOG_INFO,
371             "Failed to apply the default TensorFlow Lite delegate indexed at "
372             "%zu because of unresolved ops (which could be resolved by "
373             "another delegate). Ignoring the error, and continuing anyway.",
374             i);
375         return kTfLiteUnresolvedOps;
376       default:
377         TF_LITE_REPORT_ERROR(error_reporter_,
378                              "Unknown status (%d) after applying the default "
379                              "TensorFlow Lite delegate indexed at %zu.",
380                              status, i);
381         return kTfLiteError;
382     }
383   }
384   return kTfLiteOk;
385 }
386 
ModifyGraphWithDelegateImpl(TfLiteDelegate * delegate)387 TfLiteStatus Interpreter::ModifyGraphWithDelegateImpl(
388     TfLiteDelegate* delegate) {
389   TfLiteStatus status = kTfLiteOk;
390   for (auto& subgraph : subgraphs_) {
391     if (IsValidationSubgraph(subgraph->GetName().c_str())) {
392       continue;
393     }
394     status = subgraph->ModifyGraphWithDelegate(delegate);
395     if (status != kTfLiteOk) {
396       break;
397     }
398   }
399   // Delegate-specific errors can be recovered from by restoring Interpreter to
400   // its original state.
401   if (status == kTfLiteDelegateError) {
402     TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
403   }
404   return status;
405 }
406 
RemoveAllDelegates()407 TfLiteStatus Interpreter::RemoveAllDelegates() {
408   for (auto& subgraph : subgraphs_) {
409     TF_LITE_ENSURE_STATUS(subgraph->RemoveAllDelegates());
410   }
411   return kTfLiteOk;
412 }
413 
SetMetadata(const std::map<std::string,std::string> & metadata)414 TfLiteStatus Interpreter::SetMetadata(
415     const std::map<std::string, std::string>& metadata) {
416   metadata_ = metadata;
417   for (int subgraph_index = 0; subgraph_index < subgraphs_.size();
418        ++subgraph_index) {
419     TF_LITE_ENSURE_STATUS(subgraphs_[subgraph_index]->SetMetadata(&metadata_));
420   }
421   return kTfLiteOk;
422 }
423 
IsFullyDelegated() const424 bool Interpreter::IsFullyDelegated() const {
425   return primary_subgraph().IsFullyDelegated();
426 }
427 
SetProfilerImpl(std::unique_ptr<Profiler> profiler)428 void Interpreter::SetProfilerImpl(std::unique_ptr<Profiler> profiler) {
429   if (profiler == nullptr) {
430     root_profiler_ = nullptr;
431     return;
432   }
433   if (root_profiler_ == nullptr) {
434     root_profiler_ = std::make_unique<profiling::RootProfiler>();
435   } else {
436     // Removes all previously registered profilers.
437     root_profiler_->RemoveChildProfilers();
438   }
439   root_profiler_->AddProfiler(std::move(profiler));
440   SetSubgraphProfiler();
441 }
442 
SetSubgraphProfiler()443 void Interpreter::SetSubgraphProfiler() {
444   for (int subgraph_index = 0; subgraph_index < subgraphs_.size();
445        ++subgraph_index) {
446     subgraphs_[subgraph_index]->SetProfiler(root_profiler_.get(),
447                                             subgraph_index);
448   }
449 }
450 
ApplyOptionsImpl(InterpreterOptions * options)451 TfLiteStatus Interpreter::ApplyOptionsImpl(InterpreterOptions* options) {
452   if (options == nullptr) {
453     return kTfLiteOk;
454   }
455   options_ = std::make_unique<InterpreterOptions>(*options);
456 
457   // Set InterpreterOptions object to SubGraph.
458   for (auto& subgraph : subgraphs_) {
459     subgraph->SetOptions(options_.get());
460   }
461 
462   // Handle `experimental_dynamic_allocation_for_large_tensors_`.
463   if (options->GetDynamicAllocationForLargeTensors() > 0) {
464     for (auto& subgraph : subgraphs_) {
465       subgraph->OptimizeMemoryForLargeTensors(
466           options->GetDynamicAllocationForLargeTensors());
467     }
468   }
469   return kTfLiteOk;
470 }
471 
472 }  // namespace tflite
473