xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h"
17 
18 #include <cstdint>
19 
20 #include "absl/types/span.h"
21 #include "tensorflow/lite/builtin_ops.h"
22 #include "tensorflow/lite/delegates/gpu/api.h"
23 #include "tensorflow/lite/delegates/gpu/cl/api.h"
24 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
25 #include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
26 #include "tensorflow/lite/delegates/gpu/common/model.h"
27 #include "tensorflow/lite/delegates/gpu/common/model_builder.h"
28 #include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
29 #include "tensorflow/lite/delegates/gpu/common/status.h"
30 #include "tensorflow/lite/delegates/gpu/common/transformations/model_transformations.h"
31 
32 namespace tflite {
33 namespace gpu {
34 namespace cl {
35 namespace {
36 
37 // Forward declarations.
38 TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate);
39 
ToPriority(int32_t priority)40 InferencePriority ToPriority(int32_t priority) {
41   switch (priority) {
42     case TfLiteGpuInferencePriority::
43         TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION:
44       return InferencePriority::MAX_PRECISION;
45     case TfLiteGpuInferencePriority::TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY:
46       return InferencePriority::MIN_LATENCY;
47   }
48   return InferencePriority::MAX_PRECISION;
49 }
50 
ToDataType(TfLiteType data_type)51 DataType ToDataType(TfLiteType data_type) {
52   switch (data_type) {
53     case kTfLiteFloat16:
54       return DataType::FLOAT16;
55     case kTfLiteFloat32:
56       return DataType::FLOAT32;
57     default:
58       return DataType::UNKNOWN;
59   }
60 }
61 
ToDataLayoutFromTFL(TfLiteGpuDataLayout data_layout)62 DataLayout ToDataLayoutFromTFL(TfLiteGpuDataLayout data_layout) {
63   switch (data_layout) {
64     case TFLITE_GPU_DATA_LAYOUT_BHWC:
65       return DataLayout::BHWC;
66     case TFLITE_GPU_DATA_LAYOUT_DHWC4:
67       return DataLayout::DHWC4;
68     default:
69       return DataLayout::UNKNOWN;
70   }
71 }
72 
73 class Delegate {
74  public:
Delegate(const TfLiteGpuDelegateOptions_New * options)75   explicit Delegate(const TfLiteGpuDelegateOptions_New* options) {
76     if (options) {
77       options_ = *options;
78     } else {
79       // Default options.
80       options_.compile_options.precision_loss_allowed = 0;
81       options_.compile_options.inference_priority = TfLiteGpuInferencePriority::
82           TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION;
83       options_.egl_display = EGL_NO_DISPLAY;
84       options_.egl_context = EGL_NO_CONTEXT;
85       options_.serialized_binary_cache_data = nullptr;
86       options_.serialized_binary_cache_size = 0;
87     }
88   }
89 
Prepare(TfLiteContext * context,const TfLiteDelegateParams * delegate_params)90   absl::Status Prepare(TfLiteContext* context,
91                        const TfLiteDelegateParams* delegate_params) {
92     // Extract TFLite delegate execution plan from the context and convert it
93     // into GraphFloat32.
94     GraphFloat32 graph;
95     RETURN_IF_ERROR(BuildModel(context, delegate_params, &graph));
96 
97     // Apply general transformations on the graph.
98     ModelTransformer transformer(&graph);
99     if (!ApplyModelTransformations(&transformer)) {
100       return absl::InternalError("Graph transformations failed");
101     }
102 
103     InferenceEnvironmentOptions env_options;
104     env_options.egl_context = options_.egl_context;
105     env_options.egl_display = options_.egl_display;
106     env_options.serialized_binary_cache = {
107         options_.serialized_binary_cache_data,
108         options_.serialized_binary_cache_size};
109     InferenceEnvironmentProperties properties;
110     absl::Status status =
111         NewInferenceEnvironment(env_options, &environment_, &properties);
112     if (!properties.is_opencl_available) {
113       TF_LITE_KERNEL_LOG(context, "TfLiteGpuDelegate: OpenCL is not available");
114     }
115     if (!properties.is_gl_sharing_supported) {
116       TF_LITE_KERNEL_LOG(context,
117                          "TfLiteGpuDelegate: GL sharing is not supported");
118     }
119     if (!properties.is_cl_to_gl_fast_sync_supported) {
120       TF_LITE_KERNEL_LOG(
121           context, "TfLiteGpuDelegate: fast CL to GL sync is not supported");
122     }
123     if (!properties.is_gl_to_cl_fast_sync_supported) {
124       TF_LITE_KERNEL_LOG(
125           context, "TfLiteGpuDelegate: fast GL to CL sync is not supported");
126     }
127     RETURN_IF_ERROR(status);
128 
129     std::vector<uint32_t> input_refs;
130     {
131       const auto& inputs = graph.inputs();
132       input_refs.reserve(inputs.size());
133       for (auto input : inputs) {
134         input_refs.push_back(input->tensor.ref);
135       }
136     }
137     std::vector<uint32_t> output_refs;
138     {
139       const auto& outputs = graph.outputs();
140       output_refs.reserve(outputs.size());
141       for (auto output : outputs) {
142         output_refs.push_back(output->tensor.ref);
143       }
144     }
145 
146     InferenceOptions options;
147     options.usage = InferenceUsage::FAST_SINGLE_ANSWER;
148     if (options_.compile_options.precision_loss_allowed == 0) {
149       options.priority1 = InferencePriority::MAX_PRECISION;
150       switch (options_.compile_options.inference_priority) {
151         case TfLiteGpuInferencePriority::
152             TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION:
153           options.priority2 = InferencePriority::MIN_MEMORY_USAGE;
154           options.priority3 = InferencePriority::MIN_LATENCY;
155           break;
156         case TfLiteGpuInferencePriority::
157             TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY:
158           options.priority2 = InferencePriority::MIN_LATENCY;
159           options.priority3 = InferencePriority::MIN_MEMORY_USAGE;
160           break;
161         case TfLiteGpuInferencePriority::
162             TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE:
163           options.priority2 = InferencePriority::MIN_MEMORY_USAGE;
164           options.priority3 = InferencePriority::MIN_LATENCY;
165           break;
166       }
167     } else {
168       switch (options_.compile_options.inference_priority) {
169         case TfLiteGpuInferencePriority::
170             TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION:
171           options.priority1 = InferencePriority::MIN_LATENCY;
172           options.priority2 = InferencePriority::MAX_PRECISION;
173           options.priority3 = InferencePriority::MIN_MEMORY_USAGE;
174           break;
175         case TfLiteGpuInferencePriority::
176             TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY:
177           options.priority1 = InferencePriority::MIN_LATENCY;
178           options.priority2 = InferencePriority::MIN_MEMORY_USAGE;
179           options.priority3 = InferencePriority::MAX_PRECISION;
180           break;
181         case TfLiteGpuInferencePriority::
182             TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE:
183           options.priority1 = InferencePriority::MIN_MEMORY_USAGE;
184           options.priority2 = InferencePriority::MIN_LATENCY;
185           options.priority3 = InferencePriority::MAX_PRECISION;
186           break;
187       }
188     }
189     std::unique_ptr<InferenceBuilder> builder;
190     RETURN_IF_ERROR(
191         environment_->NewInferenceBuilder(options, std::move(graph), &builder));
192 
193     // At this point tflite didn't allocate tensors yet, therefore, collect
194     // indices and set all input and output tensors from tflite later.
195     input_indices_.reserve(input_refs.size());
196     for (auto tensor_index : input_refs) {
197       int object_index = input_indices_.size();
198       input_indices_.push_back(tensor_index);
199       RETURN_IF_ERROR(
200           builder->SetInputObjectDef(object_index, GetObjectDef(tensor_index)));
201     }
202     output_indices_.reserve(output_refs.size());
203     for (auto tensor_index : output_refs) {
204       int object_index = output_indices_.size();
205       output_indices_.push_back(tensor_index);
206       RETURN_IF_ERROR(builder->SetOutputObjectDef(object_index,
207                                                   GetObjectDef(tensor_index)));
208     }
209 
210     return builder->Build(&runner_);
211   }
212 
SetInputsAndOutputs(TfLiteContext * context)213   absl::Status SetInputsAndOutputs(TfLiteContext* context) {
214     int i = 0;
215     for (auto index : input_indices_) {
216       RETURN_IF_ERROR(
217           runner_->SetInputObject(i++, GetTensorObject(index, context)));
218     }
219     i = 0;
220     for (auto index : output_indices_) {
221       RETURN_IF_ERROR(
222           runner_->SetOutputObject(i++, GetTensorObject(index, context)));
223     }
224     return absl::OkStatus();
225   }
226 
Invoke(TfLiteContext * context)227   absl::Status Invoke(TfLiteContext* context) {
228     RETURN_IF_ERROR(SetInputsAndOutputs(context));
229     return runner_->Run();
230   }
231 
BindGlBufferToTensor(GLuint buffer_id,int tensor_index,DataType data_type,DataLayout data_layout)232   void BindGlBufferToTensor(GLuint buffer_id, int tensor_index,
233                             DataType data_type, DataLayout data_layout) {
234     // At this point the delegate haven't seen a model yet. Therefore, just
235     // record what object gets assigned.
236     if (tensor_index >= tensors_.size()) {
237       tensors_.resize(tensor_index + 1);
238     }
239     TensorObjectDef def;
240     def.object_def.data_type = data_type;
241     def.object_def.data_layout = data_layout;
242     def.object_def.object_type = ObjectType::OPENGL_SSBO;
243     def.object_def.user_provided = true;
244     def.dimensions = Dimensions(0, 0, 0, 0);
245     OpenGlBuffer buffer;
246     buffer.id = buffer_id;
247     TensorObject obj = buffer;
248     tensors_[tensor_index] = std::make_pair(obj, def);
249   }
250 
GetObjectDef(int index) const251   ObjectDef GetObjectDef(int index) const {
252     if (index < tensors_.size() && IsValid(tensors_[index].second)) {
253       return tensors_[index].second.object_def;
254     }
255     ObjectDef default_object_def;
256     default_object_def.data_type = DataType::FLOAT32;
257     default_object_def.data_layout = DataLayout::BHWC;
258     default_object_def.object_type = ObjectType::CPU_MEMORY;
259     default_object_def.user_provided = true;
260     return default_object_def;
261   }
262 
GetTensorObject(int index,TfLiteContext * context) const263   TensorObject GetTensorObject(int index, TfLiteContext* context) const {
264     if (index < tensors_.size() &&
265         IsValid(tensors_[index].second, tensors_[index].first)) {
266       return tensors_[index].first;
267     }
268     auto& tensor = context->tensors[index];
269     return MakeCpuMemory(absl::MakeSpan(tensor.data.raw, tensor.bytes));
270   }
271 
tflite_delegate()272   TfLiteDelegate* tflite_delegate() { return &delegate_; }
273 
SupportsGlObjects() const274   bool SupportsGlObjects() const {
275     return options_.egl_context != EGL_NO_CONTEXT &&
276            options_.egl_display != EGL_NO_DISPLAY;
277   }
278 
GetSerializedBinaryCache()279   absl::Span<const uint8_t> GetSerializedBinaryCache() {
280     binary_cache_ = environment_->GetSerializedBinaryCache();
281     return binary_cache_;
282   }
283 
284  private:
285   TfLiteDelegate delegate_ = {
286       reinterpret_cast<void*>(this),  // .data_
287       DelegatePrepare,                // .Prepare
288       nullptr,                        // .CopyFromBufferHandle
289       nullptr,                        // .CopyToBufferHandle
290       nullptr,                        // .FreeBufferHandle
291       kTfLiteDelegateFlagsNone,       // .flags
292   };
293 
294   TfLiteGpuDelegateOptions_New options_;
295   std::unique_ptr<InferenceEnvironment> environment_;
296   std::unique_ptr<InferenceRunner> runner_;
297   std::vector<int64_t> input_indices_;
298   std::vector<int64_t> output_indices_;
299   std::vector<uint8_t> binary_cache_;
300   std::vector<std::pair<TensorObject, TensorObjectDef>> tensors_;
301 };
302 
GetDelegate(TfLiteNode * node)303 inline Delegate* GetDelegate(TfLiteNode* node) {
304   return reinterpret_cast<Delegate*>(node->user_data);
305 }
306 
GetDelegate(TfLiteDelegate * delegate)307 inline Delegate* GetDelegate(TfLiteDelegate* delegate) {
308   return reinterpret_cast<Delegate*>(delegate->data_);
309 }
310 
DelegatePrepare(TfLiteContext * context,TfLiteDelegate * delegate)311 TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
312   const TfLiteRegistration kRegistration = {
313       // .init
314       [](TfLiteContext* context, const char* buffer, size_t) -> void* {
315         const auto* params =
316             reinterpret_cast<const TfLiteDelegateParams*>(buffer);
317         auto* gpu_delegate = GetDelegate(params->delegate);
318         // Everything below should happen in prepare function call, but TFLite
319         // for whatever reason forbids that.
320         const auto status = gpu_delegate->Prepare(context, params);
321         if (!status.ok()) {
322           TF_LITE_KERNEL_LOG(context, "TfLiteGpuDelegate Init: %s",
323                              std::string(status.message()).c_str());
324           return nullptr;
325         }
326         return gpu_delegate;
327       },
328       // .free
329       [](TfLiteContext*, void* buffer) -> void {},
330       // .prepare
331       [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
332         if (!node->user_data) {
333           TF_LITE_KERNEL_LOG(
334               context,
335               "TfLiteGpuDelegate Prepare: delegate is not initialized");
336           return kTfLiteError;
337         }
338         // TODO(akulik): tflite tensors are not allocated here either. It would
339         // be good to set inputs and outputs only once here instead of setting
340         // them every time in .invoke.
341         return kTfLiteOk;
342       },
343       // .invoke
344       [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
345         const auto status = GetDelegate(node)->Invoke(context);
346         if (!status.ok()) {
347           TF_LITE_KERNEL_LOG(context, "TfLiteGpuDelegate Invoke: %s",
348                              std::string(status.message()).c_str());
349           return kTfLiteError;
350         }
351         return kTfLiteOk;
352       },
353       nullptr,                  // .profiling_string
354       0,                        // .builtin_code
355       "TfLiteGpuDelegate_New",  // .custom_name
356       1,                        // .version
357   };
358   TfLiteIntArray* ops_to_replace = GetOpsToReplace(context);
359   const auto status = context->ReplaceNodeSubsetsWithDelegateKernels(
360       context, kRegistration, ops_to_replace, delegate);
361   TfLiteIntArrayFree(ops_to_replace);
362   return status;
363 }
364 
365 }  // namespace
366 }  // namespace cl
367 }  // namespace gpu
368 }  // namespace tflite
369 
TfLiteGpuDelegateCreate_New(const TfLiteGpuDelegateOptions_New * options)370 TfLiteDelegate* TfLiteGpuDelegateCreate_New(
371     const TfLiteGpuDelegateOptions_New* options) {
372   auto* gpu_delegate = new tflite::gpu::cl::Delegate(options);
373   return gpu_delegate ? gpu_delegate->tflite_delegate() : nullptr;
374 }
375 
TfLiteGpuDelegateDelete_New(TfLiteDelegate * delegate)376 void TfLiteGpuDelegateDelete_New(TfLiteDelegate* delegate) {
377   delete tflite::gpu::cl::GetDelegate(delegate);
378 }
379 
TfLiteGpuDelegateBindGlBufferToTensor(TfLiteDelegate * delegate,GLuint buffer_id,int tensor_index,TfLiteType data_type,TfLiteGpuDataLayout data_layout)380 TFL_CAPI_EXPORT TfLiteStatus TfLiteGpuDelegateBindGlBufferToTensor(
381     TfLiteDelegate* delegate, GLuint buffer_id, int tensor_index,
382     TfLiteType data_type, TfLiteGpuDataLayout data_layout) {
383   auto* gpu_delegate = tflite::gpu::cl::GetDelegate(delegate);
384   if (!gpu_delegate) {
385     return kTfLiteError;
386   }
387   if (!gpu_delegate->SupportsGlObjects()) {
388     return kTfLiteError;
389   }
390   auto type = tflite::gpu::cl::ToDataType(data_type);
391   if (type == tflite::gpu::DataType::UNKNOWN) {
392     return kTfLiteError;
393   }
394   auto layout = tflite::gpu::cl::ToDataLayoutFromTFL(data_layout);
395   if (layout == tflite::gpu::DataLayout::UNKNOWN) {
396     return kTfLiteError;
397   }
398   gpu_delegate->BindGlBufferToTensor(buffer_id, tensor_index, type, layout);
399   return kTfLiteOk;
400 }
401 
TfLiteGpuDelegateGetSerializedBinaryCache(TfLiteDelegate * delegate,size_t * size,const uint8_t ** data)402 bool TfLiteGpuDelegateGetSerializedBinaryCache(TfLiteDelegate* delegate,
403                                                size_t* size,
404                                                const uint8_t** data) {
405   *size = 0;
406   auto* gpu_delegate = tflite::gpu::cl::GetDelegate(delegate);
407   if (!gpu_delegate) {
408     return false;
409   }
410   auto cache = gpu_delegate->GetSerializedBinaryCache();
411   if (cache.empty()) {
412     return false;
413   }
414   *size = cache.size();
415   *data = cache.data();
416   return true;
417 }
418