1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/gpu_api_delegate.h"
17
18 #include <cstdint>
19
20 #include "absl/types/span.h"
21 #include "tensorflow/lite/builtin_ops.h"
22 #include "tensorflow/lite/delegates/gpu/api.h"
23 #include "tensorflow/lite/delegates/gpu/cl/api.h"
24 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
25 #include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
26 #include "tensorflow/lite/delegates/gpu/common/model.h"
27 #include "tensorflow/lite/delegates/gpu/common/model_builder.h"
28 #include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
29 #include "tensorflow/lite/delegates/gpu/common/status.h"
30 #include "tensorflow/lite/delegates/gpu/common/transformations/model_transformations.h"
31
32 namespace tflite {
33 namespace gpu {
34 namespace cl {
35 namespace {
36
37 // Forward declarations.
38 TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate);
39
ToPriority(int32_t priority)40 InferencePriority ToPriority(int32_t priority) {
41 switch (priority) {
42 case TfLiteGpuInferencePriority::
43 TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION:
44 return InferencePriority::MAX_PRECISION;
45 case TfLiteGpuInferencePriority::TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY:
46 return InferencePriority::MIN_LATENCY;
47 }
48 return InferencePriority::MAX_PRECISION;
49 }
50
ToDataType(TfLiteType data_type)51 DataType ToDataType(TfLiteType data_type) {
52 switch (data_type) {
53 case kTfLiteFloat16:
54 return DataType::FLOAT16;
55 case kTfLiteFloat32:
56 return DataType::FLOAT32;
57 default:
58 return DataType::UNKNOWN;
59 }
60 }
61
ToDataLayoutFromTFL(TfLiteGpuDataLayout data_layout)62 DataLayout ToDataLayoutFromTFL(TfLiteGpuDataLayout data_layout) {
63 switch (data_layout) {
64 case TFLITE_GPU_DATA_LAYOUT_BHWC:
65 return DataLayout::BHWC;
66 case TFLITE_GPU_DATA_LAYOUT_DHWC4:
67 return DataLayout::DHWC4;
68 default:
69 return DataLayout::UNKNOWN;
70 }
71 }
72
73 class Delegate {
74 public:
Delegate(const TfLiteGpuDelegateOptions_New * options)75 explicit Delegate(const TfLiteGpuDelegateOptions_New* options) {
76 if (options) {
77 options_ = *options;
78 } else {
79 // Default options.
80 options_.compile_options.precision_loss_allowed = 0;
81 options_.compile_options.inference_priority = TfLiteGpuInferencePriority::
82 TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION;
83 options_.egl_display = EGL_NO_DISPLAY;
84 options_.egl_context = EGL_NO_CONTEXT;
85 options_.serialized_binary_cache_data = nullptr;
86 options_.serialized_binary_cache_size = 0;
87 }
88 }
89
Prepare(TfLiteContext * context,const TfLiteDelegateParams * delegate_params)90 absl::Status Prepare(TfLiteContext* context,
91 const TfLiteDelegateParams* delegate_params) {
92 // Extract TFLite delegate execution plan from the context and convert it
93 // into GraphFloat32.
94 GraphFloat32 graph;
95 RETURN_IF_ERROR(BuildModel(context, delegate_params, &graph));
96
97 // Apply general transformations on the graph.
98 ModelTransformer transformer(&graph);
99 if (!ApplyModelTransformations(&transformer)) {
100 return absl::InternalError("Graph transformations failed");
101 }
102
103 InferenceEnvironmentOptions env_options;
104 env_options.egl_context = options_.egl_context;
105 env_options.egl_display = options_.egl_display;
106 env_options.serialized_binary_cache = {
107 options_.serialized_binary_cache_data,
108 options_.serialized_binary_cache_size};
109 InferenceEnvironmentProperties properties;
110 absl::Status status =
111 NewInferenceEnvironment(env_options, &environment_, &properties);
112 if (!properties.is_opencl_available) {
113 TF_LITE_KERNEL_LOG(context, "TfLiteGpuDelegate: OpenCL is not available");
114 }
115 if (!properties.is_gl_sharing_supported) {
116 TF_LITE_KERNEL_LOG(context,
117 "TfLiteGpuDelegate: GL sharing is not supported");
118 }
119 if (!properties.is_cl_to_gl_fast_sync_supported) {
120 TF_LITE_KERNEL_LOG(
121 context, "TfLiteGpuDelegate: fast CL to GL sync is not supported");
122 }
123 if (!properties.is_gl_to_cl_fast_sync_supported) {
124 TF_LITE_KERNEL_LOG(
125 context, "TfLiteGpuDelegate: fast GL to CL sync is not supported");
126 }
127 RETURN_IF_ERROR(status);
128
129 std::vector<uint32_t> input_refs;
130 {
131 const auto& inputs = graph.inputs();
132 input_refs.reserve(inputs.size());
133 for (auto input : inputs) {
134 input_refs.push_back(input->tensor.ref);
135 }
136 }
137 std::vector<uint32_t> output_refs;
138 {
139 const auto& outputs = graph.outputs();
140 output_refs.reserve(outputs.size());
141 for (auto output : outputs) {
142 output_refs.push_back(output->tensor.ref);
143 }
144 }
145
146 InferenceOptions options;
147 options.usage = InferenceUsage::FAST_SINGLE_ANSWER;
148 if (options_.compile_options.precision_loss_allowed == 0) {
149 options.priority1 = InferencePriority::MAX_PRECISION;
150 switch (options_.compile_options.inference_priority) {
151 case TfLiteGpuInferencePriority::
152 TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION:
153 options.priority2 = InferencePriority::MIN_MEMORY_USAGE;
154 options.priority3 = InferencePriority::MIN_LATENCY;
155 break;
156 case TfLiteGpuInferencePriority::
157 TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY:
158 options.priority2 = InferencePriority::MIN_LATENCY;
159 options.priority3 = InferencePriority::MIN_MEMORY_USAGE;
160 break;
161 case TfLiteGpuInferencePriority::
162 TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE:
163 options.priority2 = InferencePriority::MIN_MEMORY_USAGE;
164 options.priority3 = InferencePriority::MIN_LATENCY;
165 break;
166 }
167 } else {
168 switch (options_.compile_options.inference_priority) {
169 case TfLiteGpuInferencePriority::
170 TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION:
171 options.priority1 = InferencePriority::MIN_LATENCY;
172 options.priority2 = InferencePriority::MAX_PRECISION;
173 options.priority3 = InferencePriority::MIN_MEMORY_USAGE;
174 break;
175 case TfLiteGpuInferencePriority::
176 TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY:
177 options.priority1 = InferencePriority::MIN_LATENCY;
178 options.priority2 = InferencePriority::MIN_MEMORY_USAGE;
179 options.priority3 = InferencePriority::MAX_PRECISION;
180 break;
181 case TfLiteGpuInferencePriority::
182 TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE:
183 options.priority1 = InferencePriority::MIN_MEMORY_USAGE;
184 options.priority2 = InferencePriority::MIN_LATENCY;
185 options.priority3 = InferencePriority::MAX_PRECISION;
186 break;
187 }
188 }
189 std::unique_ptr<InferenceBuilder> builder;
190 RETURN_IF_ERROR(
191 environment_->NewInferenceBuilder(options, std::move(graph), &builder));
192
193 // At this point tflite didn't allocate tensors yet, therefore, collect
194 // indices and set all input and output tensors from tflite later.
195 input_indices_.reserve(input_refs.size());
196 for (auto tensor_index : input_refs) {
197 int object_index = input_indices_.size();
198 input_indices_.push_back(tensor_index);
199 RETURN_IF_ERROR(
200 builder->SetInputObjectDef(object_index, GetObjectDef(tensor_index)));
201 }
202 output_indices_.reserve(output_refs.size());
203 for (auto tensor_index : output_refs) {
204 int object_index = output_indices_.size();
205 output_indices_.push_back(tensor_index);
206 RETURN_IF_ERROR(builder->SetOutputObjectDef(object_index,
207 GetObjectDef(tensor_index)));
208 }
209
210 return builder->Build(&runner_);
211 }
212
SetInputsAndOutputs(TfLiteContext * context)213 absl::Status SetInputsAndOutputs(TfLiteContext* context) {
214 int i = 0;
215 for (auto index : input_indices_) {
216 RETURN_IF_ERROR(
217 runner_->SetInputObject(i++, GetTensorObject(index, context)));
218 }
219 i = 0;
220 for (auto index : output_indices_) {
221 RETURN_IF_ERROR(
222 runner_->SetOutputObject(i++, GetTensorObject(index, context)));
223 }
224 return absl::OkStatus();
225 }
226
Invoke(TfLiteContext * context)227 absl::Status Invoke(TfLiteContext* context) {
228 RETURN_IF_ERROR(SetInputsAndOutputs(context));
229 return runner_->Run();
230 }
231
BindGlBufferToTensor(GLuint buffer_id,int tensor_index,DataType data_type,DataLayout data_layout)232 void BindGlBufferToTensor(GLuint buffer_id, int tensor_index,
233 DataType data_type, DataLayout data_layout) {
234 // At this point the delegate haven't seen a model yet. Therefore, just
235 // record what object gets assigned.
236 if (tensor_index >= tensors_.size()) {
237 tensors_.resize(tensor_index + 1);
238 }
239 TensorObjectDef def;
240 def.object_def.data_type = data_type;
241 def.object_def.data_layout = data_layout;
242 def.object_def.object_type = ObjectType::OPENGL_SSBO;
243 def.object_def.user_provided = true;
244 def.dimensions = Dimensions(0, 0, 0, 0);
245 OpenGlBuffer buffer;
246 buffer.id = buffer_id;
247 TensorObject obj = buffer;
248 tensors_[tensor_index] = std::make_pair(obj, def);
249 }
250
GetObjectDef(int index) const251 ObjectDef GetObjectDef(int index) const {
252 if (index < tensors_.size() && IsValid(tensors_[index].second)) {
253 return tensors_[index].second.object_def;
254 }
255 ObjectDef default_object_def;
256 default_object_def.data_type = DataType::FLOAT32;
257 default_object_def.data_layout = DataLayout::BHWC;
258 default_object_def.object_type = ObjectType::CPU_MEMORY;
259 default_object_def.user_provided = true;
260 return default_object_def;
261 }
262
GetTensorObject(int index,TfLiteContext * context) const263 TensorObject GetTensorObject(int index, TfLiteContext* context) const {
264 if (index < tensors_.size() &&
265 IsValid(tensors_[index].second, tensors_[index].first)) {
266 return tensors_[index].first;
267 }
268 auto& tensor = context->tensors[index];
269 return MakeCpuMemory(absl::MakeSpan(tensor.data.raw, tensor.bytes));
270 }
271
tflite_delegate()272 TfLiteDelegate* tflite_delegate() { return &delegate_; }
273
SupportsGlObjects() const274 bool SupportsGlObjects() const {
275 return options_.egl_context != EGL_NO_CONTEXT &&
276 options_.egl_display != EGL_NO_DISPLAY;
277 }
278
GetSerializedBinaryCache()279 absl::Span<const uint8_t> GetSerializedBinaryCache() {
280 binary_cache_ = environment_->GetSerializedBinaryCache();
281 return binary_cache_;
282 }
283
284 private:
285 TfLiteDelegate delegate_ = {
286 reinterpret_cast<void*>(this), // .data_
287 DelegatePrepare, // .Prepare
288 nullptr, // .CopyFromBufferHandle
289 nullptr, // .CopyToBufferHandle
290 nullptr, // .FreeBufferHandle
291 kTfLiteDelegateFlagsNone, // .flags
292 };
293
294 TfLiteGpuDelegateOptions_New options_;
295 std::unique_ptr<InferenceEnvironment> environment_;
296 std::unique_ptr<InferenceRunner> runner_;
297 std::vector<int64_t> input_indices_;
298 std::vector<int64_t> output_indices_;
299 std::vector<uint8_t> binary_cache_;
300 std::vector<std::pair<TensorObject, TensorObjectDef>> tensors_;
301 };
302
GetDelegate(TfLiteNode * node)303 inline Delegate* GetDelegate(TfLiteNode* node) {
304 return reinterpret_cast<Delegate*>(node->user_data);
305 }
306
GetDelegate(TfLiteDelegate * delegate)307 inline Delegate* GetDelegate(TfLiteDelegate* delegate) {
308 return reinterpret_cast<Delegate*>(delegate->data_);
309 }
310
DelegatePrepare(TfLiteContext * context,TfLiteDelegate * delegate)311 TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
312 const TfLiteRegistration kRegistration = {
313 // .init
314 [](TfLiteContext* context, const char* buffer, size_t) -> void* {
315 const auto* params =
316 reinterpret_cast<const TfLiteDelegateParams*>(buffer);
317 auto* gpu_delegate = GetDelegate(params->delegate);
318 // Everything below should happen in prepare function call, but TFLite
319 // for whatever reason forbids that.
320 const auto status = gpu_delegate->Prepare(context, params);
321 if (!status.ok()) {
322 TF_LITE_KERNEL_LOG(context, "TfLiteGpuDelegate Init: %s",
323 std::string(status.message()).c_str());
324 return nullptr;
325 }
326 return gpu_delegate;
327 },
328 // .free
329 [](TfLiteContext*, void* buffer) -> void {},
330 // .prepare
331 [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
332 if (!node->user_data) {
333 TF_LITE_KERNEL_LOG(
334 context,
335 "TfLiteGpuDelegate Prepare: delegate is not initialized");
336 return kTfLiteError;
337 }
338 // TODO(akulik): tflite tensors are not allocated here either. It would
339 // be good to set inputs and outputs only once here instead of setting
340 // them every time in .invoke.
341 return kTfLiteOk;
342 },
343 // .invoke
344 [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
345 const auto status = GetDelegate(node)->Invoke(context);
346 if (!status.ok()) {
347 TF_LITE_KERNEL_LOG(context, "TfLiteGpuDelegate Invoke: %s",
348 std::string(status.message()).c_str());
349 return kTfLiteError;
350 }
351 return kTfLiteOk;
352 },
353 nullptr, // .profiling_string
354 0, // .builtin_code
355 "TfLiteGpuDelegate_New", // .custom_name
356 1, // .version
357 };
358 TfLiteIntArray* ops_to_replace = GetOpsToReplace(context);
359 const auto status = context->ReplaceNodeSubsetsWithDelegateKernels(
360 context, kRegistration, ops_to_replace, delegate);
361 TfLiteIntArrayFree(ops_to_replace);
362 return status;
363 }
364
365 } // namespace
366 } // namespace cl
367 } // namespace gpu
368 } // namespace tflite
369
TfLiteGpuDelegateCreate_New(const TfLiteGpuDelegateOptions_New * options)370 TfLiteDelegate* TfLiteGpuDelegateCreate_New(
371 const TfLiteGpuDelegateOptions_New* options) {
372 auto* gpu_delegate = new tflite::gpu::cl::Delegate(options);
373 return gpu_delegate ? gpu_delegate->tflite_delegate() : nullptr;
374 }
375
TfLiteGpuDelegateDelete_New(TfLiteDelegate * delegate)376 void TfLiteGpuDelegateDelete_New(TfLiteDelegate* delegate) {
377 delete tflite::gpu::cl::GetDelegate(delegate);
378 }
379
TfLiteGpuDelegateBindGlBufferToTensor(TfLiteDelegate * delegate,GLuint buffer_id,int tensor_index,TfLiteType data_type,TfLiteGpuDataLayout data_layout)380 TFL_CAPI_EXPORT TfLiteStatus TfLiteGpuDelegateBindGlBufferToTensor(
381 TfLiteDelegate* delegate, GLuint buffer_id, int tensor_index,
382 TfLiteType data_type, TfLiteGpuDataLayout data_layout) {
383 auto* gpu_delegate = tflite::gpu::cl::GetDelegate(delegate);
384 if (!gpu_delegate) {
385 return kTfLiteError;
386 }
387 if (!gpu_delegate->SupportsGlObjects()) {
388 return kTfLiteError;
389 }
390 auto type = tflite::gpu::cl::ToDataType(data_type);
391 if (type == tflite::gpu::DataType::UNKNOWN) {
392 return kTfLiteError;
393 }
394 auto layout = tflite::gpu::cl::ToDataLayoutFromTFL(data_layout);
395 if (layout == tflite::gpu::DataLayout::UNKNOWN) {
396 return kTfLiteError;
397 }
398 gpu_delegate->BindGlBufferToTensor(buffer_id, tensor_index, type, layout);
399 return kTfLiteOk;
400 }
401
TfLiteGpuDelegateGetSerializedBinaryCache(TfLiteDelegate * delegate,size_t * size,const uint8_t ** data)402 bool TfLiteGpuDelegateGetSerializedBinaryCache(TfLiteDelegate* delegate,
403 size_t* size,
404 const uint8_t** data) {
405 *size = 0;
406 auto* gpu_delegate = tflite::gpu::cl::GetDelegate(delegate);
407 if (!gpu_delegate) {
408 return false;
409 }
410 auto cache = gpu_delegate->GetSerializedBinaryCache();
411 if (cache.empty()) {
412 return false;
413 }
414 *size = cache.size();
415 *data = cache.data();
416 return true;
417 }
418