1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/interpreter.h"
17
18 #include <stddef.h>
19 #include <stdlib.h>
20
21 #include <cstdint>
22 #include <functional>
23 #include <memory>
24 #include <string>
25 #include <utility>
26 #include <vector>
27
28 #include "ruy/denormal.h" // from @ruy
29 #include "tensorflow/lite/allocation.h"
30 #include "tensorflow/lite/core/api/error_reporter.h"
31 #include "tensorflow/lite/core/api/profiler.h"
32 #include "tensorflow/lite/external_cpu_backend_context.h"
33 #include "tensorflow/lite/interpreter_options.h"
34 #include "tensorflow/lite/minimal_logging.h"
35 #include "tensorflow/lite/stderr_reporter.h"
36 #include "tensorflow/lite/util.h"
37
38 // TODO(b/139446230): Move to portable platform header.
39 #if defined(__ANDROID__)
40 #define TFLITE_IS_MOBILE_PLATFORM
41 #endif // defined(__ANDROID__)
42
43 #if defined(__APPLE__)
44 #include "TargetConditionals.h"
45 #if TARGET_IPHONE_SIMULATOR
46 #define TFLITE_IS_MOBILE_PLATFORM
47 #elif TARGET_OS_IPHONE
48 #define TFLITE_IS_MOBILE_PLATFORM
49 #endif
50 #endif // defined(__APPLE__)
51
52 // TODO(b/132087118): move static_assert to c_api_internal when compiled with
53 // C++.
54 static_assert(sizeof(TfLiteFloat16) == sizeof(uint16_t),
55 "Float 16 type must be 16 bits.");
56
57 namespace tflite {
58
59 namespace {
60
61 // Gets the current TfLiteQuantization from the legacy TfLiteQuantizationParams.
GetQuantizationFromLegacy(const TfLiteQuantizationParams & legacy_quantization)62 TfLiteQuantization GetQuantizationFromLegacy(
63 const TfLiteQuantizationParams& legacy_quantization) {
64 TfLiteQuantization quantization;
65 quantization.type = kTfLiteAffineQuantization;
66 auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
67 malloc(sizeof(TfLiteAffineQuantization)));
68 affine_quantization->scale = TfLiteFloatArrayCreate(1);
69 affine_quantization->zero_point = TfLiteIntArrayCreate(1);
70 affine_quantization->scale->data[0] = legacy_quantization.scale;
71 affine_quantization->zero_point->data[0] = legacy_quantization.zero_point;
72 quantization.params = affine_quantization;
73
74 return quantization;
75 }
76
77 // TODO(b/153131797): We have put 'delegate_status' to 0 in the following macro
78 // temporarily because delegate-specific error codes are either not retrievable
79 // at the moment, which we will add later.
80 #define TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(runtime_event, a) \
81 do { \
82 TfLiteStatus status = (a); \
83 runtime_event.set_runtime_status(/*delegate_status=*/0, \
84 static_cast<int64_t>(status)); \
85 TF_LITE_ENSURE_STATUS(status); \
86 } while (0)
87
88 } // namespace
89
Interpreter(ErrorReporter * error_reporter)90 Interpreter::Interpreter(ErrorReporter* error_reporter)
91 : error_reporter_(error_reporter ? error_reporter
92 : DefaultErrorReporter()) {
93 // TODO(b/128420794): Include the TFLite runtime version in the log.
94 // Prod logging is useful for mobile platforms where scraping console logs is
95 // critical for debugging.
96 #if defined(TFLITE_IS_MOBILE_PLATFORM)
97 TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
98 #else
99 TFLITE_LOG_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
100 #endif
101
102 // There's always at least 1 subgraph which is the primary subgraph.
103 AddSubgraphs(1);
104 context_ = primary_subgraph().context();
105
106 // Reserve some space for the tensors to avoid excessive resizing.
107 for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) {
108 external_contexts_[i] = nullptr;
109 }
110
111 // This operation is cheap because we allocate the CPU context resources (i.e.
112 // threads) lazily.
113 own_external_cpu_backend_context_ =
114 std::make_unique<ExternalCpuBackendContext>();
115 external_contexts_[kTfLiteCpuBackendContext] =
116 own_external_cpu_backend_context_.get();
117 }
118
~Interpreter()119 Interpreter::~Interpreter() {
120 // The owned external Cpu Backend Context will go out of scope with this
121 // interpreter. If we have an external backend context that is not
122 // owned, we need to clear the cache for other interpreters that may
123 // use the context.
124 if (external_contexts_[kTfLiteCpuBackendContext] &&
125 (external_contexts_[kTfLiteCpuBackendContext] !=
126 own_external_cpu_backend_context_.get())) {
127 ExternalCpuBackendContext* external_context =
128 static_cast<ExternalCpuBackendContext*>(
129 external_contexts_[kTfLiteCpuBackendContext]);
130 TfLiteInternalBackendContext* internal_context =
131 external_context->internal_backend_context();
132 if (internal_context) {
133 // This call may have negative performance impacts on the next inference
134 // for any interpreter using this context. The cache will be refreshed
135 // by the next inference.
136 internal_context->ClearCaches();
137 }
138 }
139 }
140
SetExternalContext(TfLiteExternalContextType type,TfLiteExternalContext * ctx)141 void Interpreter::SetExternalContext(TfLiteExternalContextType type,
142 TfLiteExternalContext* ctx) {
143 if (ctx == own_external_cpu_backend_context_.get()) {
144 error_reporter_->Report(
145 "WARNING: The passed external context is identical to the internally "
146 "owned one.");
147 return;
148 }
149
150 // We have an internally owned external context of kTfLiteCpuBackendContext.
151 // If it's overwritten here, we will release the resource of the internally
152 // owned external context.
153 // Note: the 'max thread count' info associated with the overwritten context
154 // will be lost here, and such info is now determined by the new context, thus
155 // affecting how much parallelism a TFLite op would have.
156 if (kTfLiteCpuBackendContext == type &&
157 external_contexts_[kTfLiteCpuBackendContext] ==
158 own_external_cpu_backend_context_.get()) {
159 own_external_cpu_backend_context_.reset();
160 }
161
162 // This essentially changes the "external_contexts_[type]".
163 primary_subgraph().SetExternalContext(type, ctx);
164 }
165
SetInputs(std::vector<int> inputs)166 TfLiteStatus Interpreter::SetInputs(std::vector<int> inputs) {
167 return primary_subgraph().SetInputs(std::move(inputs));
168 }
169
SetOutputs(std::vector<int> outputs)170 TfLiteStatus Interpreter::SetOutputs(std::vector<int> outputs) {
171 return primary_subgraph().SetOutputs(std::move(outputs));
172 }
173
SetVariables(std::vector<int> variables)174 TfLiteStatus Interpreter::SetVariables(std::vector<int> variables) {
175 return primary_subgraph().SetVariables(std::move(variables));
176 }
177
AllocateTensors()178 TfLiteStatus Interpreter::AllocateTensors() {
179 // Apply the default delegate that TFLite will enable at this point to allow
180 // other user-level delegates to be applied first. Only returns error when
181 // the status is kTfLiteError. For other statuses, it will fall back to the
182 // default implementation.
183 if (ApplyLazyDelegateProviders() == kTfLiteError) return kTfLiteError;
184
185 return primary_subgraph().AllocateTensors();
186 }
187
AddSubgraphs(int subgraphs_to_add,int * first_new_subgraph_index)188 void Interpreter::AddSubgraphs(int subgraphs_to_add,
189 int* first_new_subgraph_index) {
190 const size_t base_index = subgraphs_.size();
191 if (first_new_subgraph_index) *first_new_subgraph_index = base_index;
192
193 subgraphs_.reserve(base_index + subgraphs_to_add);
194 for (int i = 0; i < subgraphs_to_add; ++i) {
195 Subgraph* subgraph = new Subgraph(
196 error_reporter_, external_contexts_, &subgraphs_, &resources_,
197 &resource_ids_, &initialization_status_map_, subgraphs_.size());
198 subgraphs_.emplace_back(subgraph);
199 }
200 }
201
AddNodeWithParameters(const std::vector<int> & inputs,const std::vector<int> & outputs,const char * init_data,size_t init_data_size,void * builtin_data,const TfLiteRegistration * registration,int * node_index)202 TfLiteStatus Interpreter::AddNodeWithParameters(
203 const std::vector<int>& inputs, const std::vector<int>& outputs,
204 const char* init_data, size_t init_data_size, void* builtin_data,
205 const TfLiteRegistration* registration, int* node_index) {
206 return primary_subgraph().AddNodeWithParameters(
207 inputs, outputs, {}, init_data, init_data_size, builtin_data,
208 registration, node_index);
209 }
210
ResizeInputTensor(int tensor_index,const std::vector<int> & dims)211 TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index,
212 const std::vector<int>& dims) {
213 return primary_subgraph().ResizeInputTensor(tensor_index, dims);
214 }
215
ResizeInputTensorStrict(int tensor_index,const std::vector<int> & dims)216 TfLiteStatus Interpreter::ResizeInputTensorStrict(
217 int tensor_index, const std::vector<int>& dims) {
218 return primary_subgraph().ResizeInputTensorStrict(tensor_index, dims);
219 }
220
Invoke()221 TfLiteStatus Interpreter::Invoke() {
222 ScopedRuntimeInstrumentationProfile scoped_runtime_event(root_profiler_.get(),
223 "invoke");
224
225 // Denormal floating point numbers could cause significant slowdown on
226 // platforms like x86, therefore, we suppress denormals here to prevent this
227 // from happening.
228 ruy::ScopedSuppressDenormals suppress_denormals;
229
230 TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
231 scoped_runtime_event, primary_subgraph().Invoke());
232
233 if (!allow_buffer_handle_output_) {
234 for (int tensor_index : outputs()) {
235 TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
236 scoped_runtime_event,
237 primary_subgraph().EnsureTensorDataIsReadable(tensor_index));
238 }
239 }
240
241 return kTfLiteOk;
242 }
243
AddTensors(int tensors_to_add,int * first_new_tensor_index)244 TfLiteStatus Interpreter::AddTensors(int tensors_to_add,
245 int* first_new_tensor_index) {
246 return primary_subgraph().AddTensors(tensors_to_add, first_new_tensor_index);
247 }
248
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const std::vector<int> & dims,TfLiteQuantization quantization,const char * buffer,size_t bytes,const Allocation * allocation)249 TfLiteStatus Interpreter::SetTensorParametersReadOnly(
250 int tensor_index, TfLiteType type, const char* name,
251 const std::vector<int>& dims, TfLiteQuantization quantization,
252 const char* buffer, size_t bytes, const Allocation* allocation) {
253 return primary_subgraph().SetTensorParametersReadOnly(
254 tensor_index, type, name, dims.size(), dims.data(), quantization, buffer,
255 bytes, allocation);
256 }
257
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const std::vector<int> & dims,TfLiteQuantization quantization,bool is_variable)258 TfLiteStatus Interpreter::SetTensorParametersReadWrite(
259 int tensor_index, TfLiteType type, const char* name,
260 const std::vector<int>& dims, TfLiteQuantization quantization,
261 bool is_variable) {
262 return primary_subgraph().SetTensorParametersReadWrite(
263 tensor_index, type, name, dims.size(), dims.data(), quantization,
264 is_variable);
265 }
266
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantizationParams quantization,const char * buffer,size_t bytes,const Allocation * allocation)267 TfLiteStatus Interpreter::SetTensorParametersReadOnly(
268 int tensor_index, TfLiteType type, const char* name, const size_t rank,
269 const int* dims, TfLiteQuantizationParams quantization, const char* buffer,
270 size_t bytes, const Allocation* allocation) {
271 TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
272 return primary_subgraph().SetTensorParametersReadOnly(
273 tensor_index, type, name, rank, dims, new_quantization, buffer, bytes,
274 allocation);
275 }
276
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantizationParams quantization,bool is_variable,const size_t rank_dims_signature,const int * dims_signature)277 TfLiteStatus Interpreter::SetTensorParametersReadWrite(
278 int tensor_index, TfLiteType type, const char* name, const size_t rank,
279 const int* dims, TfLiteQuantizationParams quantization, bool is_variable,
280 const size_t rank_dims_signature, const int* dims_signature) {
281 TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
282 return primary_subgraph().SetTensorParametersReadWrite(
283 tensor_index, type, name, rank, dims, new_quantization, is_variable,
284 rank_dims_signature, dims_signature);
285 }
286
SetExecutionPlan(const std::vector<int> & new_plan)287 TfLiteStatus Interpreter::SetExecutionPlan(const std::vector<int>& new_plan) {
288 return primary_subgraph().SetExecutionPlan(new_plan);
289 }
290
SetNumThreads(int num_threads)291 TfLiteStatus Interpreter::SetNumThreads(int num_threads) {
292 if (num_threads < -1) {
293 context_->ReportError(context_,
294 "num_threads should be >=0 or just -1 to let TFLite "
295 "runtime set the value.");
296 return kTfLiteError;
297 }
298
299 // num_threads == 0 has the same effect as num_threads == 1.
300 num_threads = num_threads == 0 ? 1 : num_threads;
301 for (auto& subgraph : subgraphs_) {
302 subgraph->context()->recommended_num_threads = num_threads;
303 }
304
305 for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) {
306 auto* c = external_contexts_[i];
307 if (c && c->Refresh) {
308 c->Refresh(context_);
309 }
310 }
311 return kTfLiteOk;
312 }
313
ApplyLazyDelegateProviders()314 TfLiteStatus Interpreter::ApplyLazyDelegateProviders() {
315 if (lazy_delegate_providers_.empty() || IsFullyDelegated()) return kTfLiteOk;
316
317 // We only apply lazy delegate providers once.
318 TfLiteDelegateCreators delegate_providers;
319 delegate_providers.swap(lazy_delegate_providers_);
320
321 TFLITE_LOG(TFLITE_LOG_INFO,
322 "Applying %zu TensorFlow Lite delegate(s) lazily.",
323 delegate_providers.size());
324 // At the momement, XNNPACK delegate is the only one that might be applied
325 // by default, in which case, the execution will fall back to default
326 // implementation if the XNNPACK delegate fails to be applied.
327 for (size_t i = 0; i < delegate_providers.size(); ++i) {
328 auto delegate_ptr =
329 delegate_providers[i](context_->recommended_num_threads);
330 // Note when XNNPACK-by-default is disabled, the corresponding creator (i.e.
331 // tflite::MaybeCreateXNNPACKDelegate(...)) will return a nullptr.
332 // Therefore, we simply continue with the next one.
333 if (delegate_ptr == nullptr) continue;
334 auto status = ModifyGraphWithDelegateImpl(std::move(delegate_ptr));
335 switch (status) {
336 case kTfLiteOk:
337 TFLITE_LOG(
338 TFLITE_LOG_INFO,
339 "Successfully applied the default TensorFlow Lite "
340 "delegate indexed at %zu.\n *NOTE*: because a delegate has been "
341 "applied, the precision of computations should be unchanged, but "
342 "the exact output tensor values may have changed. If such output "
343 "values are checked in your code, like in your tests etc., please "
344 "consider increasing error tolerance for the check.",
345 i);
346 break;
347 case kTfLiteError:
348 TF_LITE_REPORT_ERROR(error_reporter_,
349 "Failed to apply the default TensorFlow Lite "
350 "delegate indexed at %zu.",
351 i);
352 return kTfLiteError;
353 case kTfLiteDelegateError:
354 TFLITE_LOG(
355 TFLITE_LOG_INFO,
356 "Error in applying the default TensorFlow Lite delegate indexed "
357 "at %zu, and all previously applied delegates are reverted.",
358 i);
359 return kTfLiteDelegateError;
360 case kTfLiteApplicationError:
361 TFLITE_LOG(
362 TFLITE_LOG_INFO,
363 "Failed to apply the default TensorFlow Lite delegate indexed at "
364 "%zu because of incompatibility between runtime and delegate. "
365 "Ignoring the error, and continuing anyway.",
366 i);
367 return kTfLiteApplicationError;
368 case kTfLiteUnresolvedOps:
369 TFLITE_LOG(
370 TFLITE_LOG_INFO,
371 "Failed to apply the default TensorFlow Lite delegate indexed at "
372 "%zu because of unresolved ops (which could be resolved by "
373 "another delegate). Ignoring the error, and continuing anyway.",
374 i);
375 return kTfLiteUnresolvedOps;
376 default:
377 TF_LITE_REPORT_ERROR(error_reporter_,
378 "Unknown status (%d) after applying the default "
379 "TensorFlow Lite delegate indexed at %zu.",
380 status, i);
381 return kTfLiteError;
382 }
383 }
384 return kTfLiteOk;
385 }
386
ModifyGraphWithDelegateImpl(TfLiteDelegate * delegate)387 TfLiteStatus Interpreter::ModifyGraphWithDelegateImpl(
388 TfLiteDelegate* delegate) {
389 TfLiteStatus status = kTfLiteOk;
390 for (auto& subgraph : subgraphs_) {
391 if (IsValidationSubgraph(subgraph->GetName().c_str())) {
392 continue;
393 }
394 status = subgraph->ModifyGraphWithDelegate(delegate);
395 if (status != kTfLiteOk) {
396 break;
397 }
398 }
399 // Delegate-specific errors can be recovered from by restoring Interpreter to
400 // its original state.
401 if (status == kTfLiteDelegateError) {
402 TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
403 }
404 return status;
405 }
406
RemoveAllDelegates()407 TfLiteStatus Interpreter::RemoveAllDelegates() {
408 for (auto& subgraph : subgraphs_) {
409 TF_LITE_ENSURE_STATUS(subgraph->RemoveAllDelegates());
410 }
411 return kTfLiteOk;
412 }
413
SetMetadata(const std::map<std::string,std::string> & metadata)414 TfLiteStatus Interpreter::SetMetadata(
415 const std::map<std::string, std::string>& metadata) {
416 metadata_ = metadata;
417 for (int subgraph_index = 0; subgraph_index < subgraphs_.size();
418 ++subgraph_index) {
419 TF_LITE_ENSURE_STATUS(subgraphs_[subgraph_index]->SetMetadata(&metadata_));
420 }
421 return kTfLiteOk;
422 }
423
IsFullyDelegated() const424 bool Interpreter::IsFullyDelegated() const {
425 return primary_subgraph().IsFullyDelegated();
426 }
427
SetProfilerImpl(std::unique_ptr<Profiler> profiler)428 void Interpreter::SetProfilerImpl(std::unique_ptr<Profiler> profiler) {
429 if (profiler == nullptr) {
430 root_profiler_ = nullptr;
431 return;
432 }
433 if (root_profiler_ == nullptr) {
434 root_profiler_ = std::make_unique<profiling::RootProfiler>();
435 } else {
436 // Removes all previously registered profilers.
437 root_profiler_->RemoveChildProfilers();
438 }
439 root_profiler_->AddProfiler(std::move(profiler));
440 SetSubgraphProfiler();
441 }
442
SetSubgraphProfiler()443 void Interpreter::SetSubgraphProfiler() {
444 for (int subgraph_index = 0; subgraph_index < subgraphs_.size();
445 ++subgraph_index) {
446 subgraphs_[subgraph_index]->SetProfiler(root_profiler_.get(),
447 subgraph_index);
448 }
449 }
450
ApplyOptionsImpl(InterpreterOptions * options)451 TfLiteStatus Interpreter::ApplyOptionsImpl(InterpreterOptions* options) {
452 if (options == nullptr) {
453 return kTfLiteOk;
454 }
455 options_ = std::make_unique<InterpreterOptions>(*options);
456
457 // Set InterpreterOptions object to SubGraph.
458 for (auto& subgraph : subgraphs_) {
459 subgraph->SetOptions(options_.get());
460 }
461
462 // Handle `experimental_dynamic_allocation_for_large_tensors_`.
463 if (options->GetDynamicAllocationForLargeTensors() > 0) {
464 for (auto& subgraph : subgraphs_) {
465 subgraph->OptimizeMemoryForLargeTensors(
466 options->GetDynamicAllocationForLargeTensors());
467 }
468 }
469 return kTfLiteOk;
470 }
471
472 } // namespace tflite
473