xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/cl/api.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/api.h"
17 
18 #include <utility>
19 
20 #ifndef CL_DELEGATE_NO_GL
21 #define CL_DELEGATE_ALLOW_GL
22 #endif
23 
24 #include <algorithm>
25 #include <cstring>
26 #include <memory>
27 #include <variant>
28 #include <vector>
29 
30 #include "absl/memory/memory.h"
31 #include "absl/types/span.h"
32 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
33 #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
34 #include "tensorflow/lite/delegates/gpu/cl/cl_event.h"
35 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
36 #include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
37 #include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
38 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
39 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
40 #include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
41 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
42 #include "tensorflow/lite/delegates/gpu/common/precision.h"
43 #include "tensorflow/lite/delegates/gpu/common/shape.h"
44 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
45 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
46 
47 #ifdef CL_DELEGATE_ALLOW_GL
48 #include <EGL/eglext.h>
49 
50 #include "tensorflow/lite/delegates/gpu/cl/egl_sync.h"
51 #include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
52 #endif
53 
54 namespace tflite {
55 namespace gpu {
56 namespace cl {
57 namespace {
58 
59 // Both internal and external defs are identical, therefore nothing to connect
60 // here.
61 class NoopTensorTie : public TensorTie {
62  public:
NoopTensorTie(const TensorTieDef & def,TensorObject obj)63   NoopTensorTie(const TensorTieDef& def, TensorObject obj)
64       : TensorTie(def), obj_(obj) {}
65 
IsSupported(const TensorTieDef & def)66   static bool IsSupported(const TensorTieDef& def) {
67     return def.external_def == def.internal_def;
68   }
69 
SetExternalObject(TensorObject obj)70   absl::Status SetExternalObject(TensorObject obj) final {
71     if (!def().external_def.object_def.user_provided) {
72       return absl::InvalidArgumentError("Tensor object is readonly.");
73     }
74     if (!IsValid(def().external_def, obj)) {
75       return absl::InvalidArgumentError("Given object is not valid");
76     }
77     obj_ = obj;
78     return absl::OkStatus();
79   }
80 
GetExternalObject()81   TensorObject GetExternalObject() final { return obj_; }
82 
CopyToExternalObject()83   absl::Status CopyToExternalObject() final { return absl::OkStatus(); }
84 
CopyFromExternalObject()85   absl::Status CopyFromExternalObject() final { return absl::OkStatus(); }
86 
87  private:
88   TensorObject obj_;
89 };
90 
91 // Does one-step conversion between internal and external objects.
92 // It may also allocate external objects if requested.
93 class DefaultTensorTie : public TensorTie {
94  public:
DefaultTensorTie(const TensorTieDef & def,TensorObject internal_obj)95   DefaultTensorTie(const TensorTieDef& def, TensorObject internal_obj)
96       : TensorTie(def), internal_obj_(internal_obj) {}
97 
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)98   static bool IsSupported(
99       const TensorTieDef& def,
100       const TensorObjectConverterBuilder& converter_builder) {
101     auto object_type = def.external_def.object_def.object_type;
102 #ifdef CL_DELEGATE_ALLOW_GL
103     if (def.external_def.object_def.user_provided &&
104         GlClBufferCopier::IsSupported(def.external_def.object_def,
105                                       def.internal_def.object_def)) {
106       return true;
107     }
108 #endif
109     return (object_type == ObjectType::OPENCL_BUFFER ||
110             object_type == ObjectType::OPENCL_TEXTURE ||
111             object_type == ObjectType::CPU_MEMORY) &&
112            converter_builder.IsSupported(def.internal_def, def.external_def) &&
113            converter_builder.IsSupported(def.external_def, def.internal_def);
114   }
115 
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env,std::unique_ptr<TensorTie> * tie)116   static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
117                           TensorObjectConverterBuilder* converter_builder,
118                           Environment* env, std::unique_ptr<TensorTie>* tie) {
119     auto tie_impl = std::make_unique<DefaultTensorTie>(def, internal_object);
120     RETURN_IF_ERROR(tie_impl->Init(converter_builder, env));
121     *tie = std::move(tie_impl);
122     return absl::OkStatus();
123   }
124 
CopyToExternalObject()125   absl::Status CopyToExternalObject() final {
126     if (!converter_to_) {
127       return absl::UnavailableError("Conversion is not available");
128     }
129     return converter_to_->Convert(internal_obj_, GetExternalObject());
130   }
131 
CopyFromExternalObject()132   absl::Status CopyFromExternalObject() final {
133     if (!converter_from_) {
134       return absl::UnavailableError("Conversion is not available");
135     }
136     return converter_from_->Convert(GetExternalObject(), internal_obj_);
137   }
138 
SetExternalObject(TensorObject obj)139   absl::Status SetExternalObject(TensorObject obj) final {
140     if (!def().external_def.object_def.user_provided) {
141       return absl::InvalidArgumentError("External object is read-only");
142     }
143     if (!IsValid(def().external_def, obj)) {
144       return absl::InvalidArgumentError("Given object is not valid");
145     }
146     external_obj_ = obj;
147     return absl::OkStatus();
148   }
149 
GetExternalObject()150   TensorObject GetExternalObject() final { return external_obj_; }
151 
152  private:
Init(TensorObjectConverterBuilder * converter_builder,Environment * env)153   absl::Status Init(TensorObjectConverterBuilder* converter_builder,
154                     Environment* env) {
155 #ifdef CL_DELEGATE_ALLOW_GL
156     if (def().external_def.object_def.user_provided &&
157         GlClBufferCopier::IsSupported(def().external_def.object_def,
158                                       def().internal_def.object_def)) {
159       converter_from_ = std::make_unique<GlClBufferCopier>(
160           def().internal_def, def().external_def, env);
161     } else {
162       RETURN_IF_ERROR(converter_builder->MakeConverter(
163           def().external_def, def().internal_def, &converter_from_));
164     }
165     if (def().external_def.object_def.user_provided &&
166         GlClBufferCopier::IsSupported(def().internal_def.object_def,
167                                       def().external_def.object_def)) {
168       converter_to_ = std::make_unique<GlClBufferCopier>(
169           def().internal_def, def().external_def, env);
170     } else {
171       RETURN_IF_ERROR(converter_builder->MakeConverter(
172           def().internal_def, def().external_def, &converter_to_));
173     }
174 #else
175     RETURN_IF_ERROR(converter_builder->MakeConverter(
176         def().external_def, def().internal_def, &converter_from_));
177     RETURN_IF_ERROR(converter_builder->MakeConverter(
178         def().internal_def, def().external_def, &converter_to_));
179 #endif
180     return MaybeAllocateExternalObject(env);
181   }
182 
MaybeAllocateExternalObject(Environment * env)183   absl::Status MaybeAllocateExternalObject(Environment* env) {
184     const TensorObjectDef& d = def().external_def;
185     if (d.object_def.user_provided) {
186       return absl::OkStatus();
187     }
188     switch (d.object_def.object_type) {
189       case ObjectType::CPU_MEMORY: {
190         size_t bytes_size = NumElements(d) * SizeOf(d.object_def.data_type);
191         cpu_memory_.resize(bytes_size);
192         external_obj_ = CpuMemory{cpu_memory_.data(), cpu_memory_.size()};
193         break;
194       }
195       case ObjectType::OPENCL_TEXTURE:
196       case ObjectType::OPENCL_BUFFER: {
197         auto& dims = d.dimensions;
198         const BHWC shape(dims.b, dims.h, dims.w, dims.c);
199         TensorStorageType storage_type = ToTensorStorageType(
200             d.object_def.object_type, d.object_def.data_layout);
201         TensorDescriptor desc = CreateBhwcTensorDescriptor(
202             d.object_def.data_type, storage_type, shape);
203         RETURN_IF_ERROR(
204             AllocateTensorMemory(env->context(), desc, &cl_memory_));
205         if (d.object_def.object_type == ObjectType::OPENCL_TEXTURE) {
206           external_obj_ = OpenClTexture{cl_memory_.memory()};
207         } else {
208           external_obj_ = OpenClBuffer{cl_memory_.memory()};
209         }
210         break;
211       }
212       default:
213         return absl::InternalError("Unexpected object type");
214     }
215     return absl::OkStatus();
216   }
217 
218   const TensorObject internal_obj_;
219   TensorObject external_obj_;
220   CLMemory cl_memory_;
221   std::vector<uint8_t> cpu_memory_;
222   std::unique_ptr<TensorObjectConverter> converter_to_;
223   std::unique_ptr<TensorObjectConverter> converter_from_;
224 };
225 
226 // Copies data to intermediate OpenCL buffer and then does two step conversion.
227 // It drives the following cases were one-step conversion is not supported:
228 //   - CPU BHWC -> CL buffer BHWC -> CL texture DHWC4.
229 class TwoStepTensorTie : public TensorTie {
230  public:
TwoStepTensorTie(const TensorTieDef & def)231   explicit TwoStepTensorTie(const TensorTieDef& def) : TensorTie(def) {}
232 
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)233   static bool IsSupported(
234       const TensorTieDef& def,
235       const TensorObjectConverterBuilder& converter_builder) {
236     auto defs = MakeOuterInnerDefs(def);
237     return DefaultTensorTie::IsSupported(defs.first, converter_builder) &&
238            DefaultTensorTie::IsSupported(defs.second, converter_builder);
239   }
240 
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env,std::unique_ptr<TensorTie> * tie)241   static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
242                           TensorObjectConverterBuilder* converter_builder,
243                           Environment* env, std::unique_ptr<TensorTie>* tie) {
244     auto tie_impl = std::make_unique<TwoStepTensorTie>(def);
245     RETURN_IF_ERROR(tie_impl->Init(internal_object, converter_builder, env));
246     *tie = std::move(tie_impl);
247     return absl::OkStatus();
248   }
249 
CopyToExternalObject()250   absl::Status CopyToExternalObject() final {
251     RETURN_IF_ERROR(inner_tie_->CopyToExternalObject());
252     return outer_tie_->CopyToExternalObject();
253   }
254 
CopyFromExternalObject()255   absl::Status CopyFromExternalObject() final {
256     RETURN_IF_ERROR(outer_tie_->CopyFromExternalObject());
257     return inner_tie_->CopyFromExternalObject();
258   }
259 
SetExternalObject(TensorObject obj)260   absl::Status SetExternalObject(TensorObject obj) final {
261     return outer_tie_->SetExternalObject(obj);
262   }
263 
GetExternalObject()264   TensorObject GetExternalObject() final {
265     return outer_tie_->GetExternalObject();
266   }
267 
268  private:
MakeOuterInnerDefs(const TensorTieDef & def)269   static std::pair<TensorTieDef, TensorTieDef> MakeOuterInnerDefs(
270       const TensorTieDef& def) {
271     TensorTieDef outer_def;
272     outer_def.external_def = def.external_def;
273     outer_def.internal_def = def.external_def;
274     outer_def.internal_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
275     outer_def.internal_def.object_def.user_provided = true;
276 
277     TensorTieDef inner_def;
278     inner_def.external_def = outer_def.internal_def;
279     inner_def.external_def.object_def.user_provided = false;
280     inner_def.internal_def = def.internal_def;
281     return std::make_pair(outer_def, inner_def);
282   }
283 
Init(TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env)284   absl::Status Init(TensorObject internal_object,
285                     TensorObjectConverterBuilder* converter_builder,
286                     Environment* env) {
287     auto defs = MakeOuterInnerDefs(def());
288     RETURN_IF_ERROR(DefaultTensorTie::New(defs.second, internal_object,
289                                           converter_builder, env, &inner_tie_));
290     return DefaultTensorTie::New(defs.first, inner_tie_->GetExternalObject(),
291                                  converter_builder, env, &outer_tie_);
292   }
293 
294   std::unique_ptr<TensorTie> inner_tie_;
295   std::unique_ptr<TensorTie> outer_tie_;
296 };
297 
298 #ifdef CL_DELEGATE_ALLOW_GL
299 // Captures GL object into CL context before performing a conversion.
300 class GlBufferHolder : public TensorTie {
301  public:
GlBufferHolder(const TensorTieDef & def,GlInteropFabric * gl_interop_fabric,Environment * env)302   GlBufferHolder(const TensorTieDef& def, GlInteropFabric* gl_interop_fabric,
303                  Environment* env)
304       : TensorTie(def),
305         gl_interop_fabric_(gl_interop_fabric),
306         environment_(env) {}
307 
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)308   static bool IsSupported(
309       const TensorTieDef& def,
310       const TensorObjectConverterBuilder& converter_builder) {
311     if (!def.external_def.object_def.user_provided ||
312         def.external_def.object_def.object_type != ObjectType::OPENGL_SSBO) {
313       return false;
314     }
315     return DefaultTensorTie::IsSupported(MakeClDef(def), converter_builder);
316   }
317 
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,GlInteropFabric * gl_interop_fabric,Environment * env,std::unique_ptr<TensorTie> * tie)318   static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
319                           TensorObjectConverterBuilder* converter_builder,
320                           GlInteropFabric* gl_interop_fabric, Environment* env,
321                           std::unique_ptr<TensorTie>* tie) {
322     auto tie_impl =
323         std::make_unique<GlBufferHolder>(def, gl_interop_fabric, env);
324     RETURN_IF_ERROR(DefaultTensorTie::New(MakeClDef(def), internal_object,
325                                           converter_builder, env,
326                                           &tie_impl->tie_));
327     *tie = std::move(tie_impl);
328     return absl::OkStatus();
329   }
330 
SetExternalObject(TensorObject obj)331   absl::Status SetExternalObject(TensorObject obj) final {
332     auto ssbo = std::get_if<OpenGlBuffer>(&obj);
333     if (!ssbo) {
334       return absl::InvalidArgumentError("Missing OpenGL SSBO");
335     }
336     auto old_ssbo = std::get_if<OpenGlBuffer>(&external_obj_);
337     if (old_ssbo && ssbo->id == old_ssbo->id) {
338       return absl::OkStatus();
339     }
340     if (cl_object_.memory()) {
341       gl_interop_fabric_->UnregisterMemory(cl_object_.memory());
342     }
343     RETURN_IF_ERROR(CreateClMemoryFromGlBuffer(
344         ssbo->id, def().access_type, &environment_->context(), &cl_object_));
345     external_obj_ = obj;
346     RETURN_IF_ERROR(tie_->SetExternalObject(OpenClBuffer{cl_object_.memory()}));
347     gl_interop_fabric_->RegisterMemory(cl_object_.memory());
348     return absl::OkStatus();
349   }
350 
GetExternalObject()351   TensorObject GetExternalObject() final { return external_obj_; }
352 
CopyFromExternalObject()353   absl::Status CopyFromExternalObject() final {
354     return tie_->CopyFromExternalObject();
355   }
356 
CopyToExternalObject()357   absl::Status CopyToExternalObject() final {
358     return tie_->CopyToExternalObject();
359   }
360 
361  private:
MakeClDef(const TensorTieDef & def)362   static TensorTieDef MakeClDef(const TensorTieDef& def) {
363     auto cl_def = def;
364     cl_def.external_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
365     cl_def.external_def.object_def.user_provided = true;
366     return cl_def;
367   }
368 
369   CLMemory cl_object_;
370   GlInteropFabric* gl_interop_fabric_;
371   Environment* environment_;
372   std::unique_ptr<TensorTie> tie_;
373   TensorObject external_obj_;
374 };
375 #endif
376 
TensorToObj(const Tensor & tensor)377 TensorObject TensorToObj(const Tensor& tensor) {
378   if (tensor.GetStorageType() == TensorStorageType::BUFFER) {
379     return OpenClBuffer{tensor.GetMemoryPtr()};
380   }
381   if (tensor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
382     return OpenClBuffer{tensor.GetMemoryPtrForWriting()};
383   }
384   return OpenClTexture{tensor.GetMemoryPtr()};
385 }
386 
387 // Responsible for creating new tensor objects.
388 class TensorTieFactory {
389  public:
TensorTieFactory(Environment * env,InferenceContext * context,GlInteropFabric * gl_interop_fabric)390   TensorTieFactory(Environment* env, InferenceContext* context
391 #ifdef CL_DELEGATE_ALLOW_GL
392                    ,
393                    GlInteropFabric* gl_interop_fabric
394 #endif
395                    )
396       : env_(*env),
397         context_(*context),
398 #ifdef CL_DELEGATE_ALLOW_GL
399         gl_interop_fabric_(gl_interop_fabric),
400 #endif
401         converter_builder_(NewConverterBuilder(env)) {
402   }
403 
IsSupported(const TensorTieDef & def) const404   bool IsSupported(const TensorTieDef& def) const {
405     return IsValid(def.external_def.object_def) &&
406            (NoopTensorTie::IsSupported(def) ||
407             DefaultTensorTie::IsSupported(def, *converter_builder_) ||
408 #ifdef CL_DELEGATE_ALLOW_GL
409             (gl_interop_fabric_ &&
410              GlBufferHolder::IsSupported(def, *converter_builder_)) ||
411 #endif
412             TwoStepTensorTie::IsSupported(def, *converter_builder_));
413   }
414 
NewTensorTie(const TensorTieDef & def,std::unique_ptr<TensorTie> * tie)415   absl::Status NewTensorTie(const TensorTieDef& def,
416                             std::unique_ptr<TensorTie>* tie) {
417     TensorObject internal_object = TensorToObj(*context_.GetTensor(def.id));
418     auto converter = converter_builder_.get();
419     if (NoopTensorTie::IsSupported(def)) {
420       *tie = std::make_unique<NoopTensorTie>(def, internal_object);
421       return absl::OkStatus();
422     }
423     if (DefaultTensorTie::IsSupported(def, *converter)) {
424       return DefaultTensorTie::New(def, internal_object, converter, &env_, tie);
425     }
426 #ifdef CL_DELEGATE_ALLOW_GL
427     if (gl_interop_fabric_ && GlBufferHolder::IsSupported(def, *converter)) {
428       return GlBufferHolder::New(def, internal_object, converter,
429                                  gl_interop_fabric_, &env_, tie);
430     }
431 #endif
432     if (TwoStepTensorTie::IsSupported(def, *converter)) {
433       return TwoStepTensorTie::New(def, internal_object, converter, &env_, tie);
434     }
435     return absl::UnimplementedError("Unsupported tensor tie definition.");
436   }
437 
438  private:
439   Environment& env_;
440   InferenceContext& context_;
441 #ifdef CL_DELEGATE_ALLOW_GL
442   GlInteropFabric* gl_interop_fabric_;
443 #endif
444   std::unique_ptr<TensorObjectConverterBuilder> converter_builder_;
445 };
446 
447 class InferenceRunnerImpl : public CLInferenceRunner {
448  public:
InferenceRunnerImpl(Environment * environment,std::unique_ptr<InferenceContext> context,std::unique_ptr<GlInteropFabric> gl_interop_fabric)449   InferenceRunnerImpl(Environment* environment,
450                       std::unique_ptr<InferenceContext> context
451 #ifdef CL_DELEGATE_ALLOW_GL
452                       ,
453                       std::unique_ptr<GlInteropFabric> gl_interop_fabric
454 #endif
455                       )
456       : queue_(environment->queue()),
457         context_(std::move(context))
458 #ifdef CL_DELEGATE_ALLOW_GL
459         ,
460         gl_interop_fabric_(std::move(gl_interop_fabric))
461 #endif
462   {
463   }
464 
Initialize(const std::vector<TensorTieDef> & inputs,const std::vector<TensorTieDef> & outputs,TensorTieFactory * factory)465   absl::Status Initialize(const std::vector<TensorTieDef>& inputs,
466                           const std::vector<TensorTieDef>& outputs,
467                           TensorTieFactory* factory) {
468     RETURN_IF_ERROR(LinkTensors(inputs, factory, &inputs_));
469     return LinkTensors(outputs, factory, &outputs_);
470   }
471 
inputs() const472   std::vector<TensorObjectDef> inputs() const override {
473     return GetExternalDefinitions(inputs_);
474   }
475 
outputs() const476   std::vector<TensorObjectDef> outputs() const override {
477     return GetExternalDefinitions(outputs_);
478   }
479 
GetInputObject(int index,TensorObject * object)480   absl::Status GetInputObject(int index, TensorObject* object) override {
481     if (index < 0 || index >= inputs_.size()) {
482       return absl::OutOfRangeError("Index is out of range");
483     }
484     *object = inputs_[index]->GetExternalObject();
485     return absl::OkStatus();
486   }
487 
GetOutputObject(int index,TensorObject * object)488   absl::Status GetOutputObject(int index, TensorObject* object) override {
489     if (index < 0 || index >= outputs_.size()) {
490       return absl::OutOfRangeError("Index is out of range");
491     }
492     *object = outputs_[index]->GetExternalObject();
493     return absl::OkStatus();
494   }
495 
SetInputObject(int index,TensorObject object)496   absl::Status SetInputObject(int index, TensorObject object) override {
497     if (index < 0 || index >= inputs_.size()) {
498       return absl::OutOfRangeError("Input index is out of range");
499     }
500     return inputs_[index]->SetExternalObject(object);
501   }
502 
SetOutputObject(int index,TensorObject object)503   absl::Status SetOutputObject(int index, TensorObject object) override {
504     if (index < 0 || index >= outputs_.size()) {
505       return absl::OutOfRangeError("Output index is out of range");
506     }
507     return outputs_[index]->SetExternalObject(object);
508   }
509 
CopyFromExternalInput(int index)510   absl::Status CopyFromExternalInput(int index) override {
511     if (index > inputs_.size()) {
512       return absl::NotFoundError(
513           absl::StrCat("Input id ", index, " is an invalid input index."));
514     }
515     return inputs_[index]->CopyFromExternalObject();
516   }
517 
CopyToExternalOutput(int index)518   absl::Status CopyToExternalOutput(int index) override {
519     if (index > outputs_.size()) {
520       return absl::NotFoundError(
521           absl::StrCat("Output id ", index, " is an invalid output index"));
522     }
523     return outputs_[index]->CopyToExternalObject();
524   }
525 
Run()526   absl::Status Run() override {
527 #ifdef CL_DELEGATE_ALLOW_GL
528     if (gl_interop_fabric_) {
529       RETURN_IF_ERROR(gl_interop_fabric_->Start());
530     }
531 #endif
532     for (const auto& input : inputs_) {
533       RETURN_IF_ERROR(input->CopyFromExternalObject());
534     }
535 
536     RETURN_IF_ERROR(RunWithoutExternalBufferCopy());
537 
538     bool has_async_copies = false;
539     for (const auto& output : outputs_) {
540       RETURN_IF_ERROR(output->CopyToExternalObject());
541       if (output->def().external_def.object_def.object_type ==
542           ObjectType::CPU_MEMORY) {
543         has_async_copies = true;
544       }
545     }
546 #ifdef CL_DELEGATE_ALLOW_GL
547     if (gl_interop_fabric_) {
548       RETURN_IF_ERROR(gl_interop_fabric_->Finish());
549     }
550 #endif
551     if (has_async_copies) {
552       RETURN_IF_ERROR(queue_->WaitForCompletion());
553     }
554     return absl::OkStatus();
555   }
556 
RunWithoutExternalBufferCopy()557   absl::Status RunWithoutExternalBufferCopy() override {
558     RETURN_IF_ERROR(context_->AddToQueue(queue_));
559     clFlush(queue_->queue());
560 
561     return absl::OkStatus();
562   }
563 
564  private:
LinkTensors(const std::vector<TensorTieDef> & defs,TensorTieFactory * factory,std::vector<std::unique_ptr<TensorTie>> * objects)565   static absl::Status LinkTensors(
566       const std::vector<TensorTieDef>& defs, TensorTieFactory* factory,
567       std::vector<std::unique_ptr<TensorTie>>* objects) {
568     objects->reserve(defs.size());
569     for (auto& def : defs) {
570       std::unique_ptr<TensorTie> object;
571       RETURN_IF_ERROR(factory->NewTensorTie(def, &object));
572       objects->push_back(std::move(object));
573     }
574     return absl::OkStatus();
575   }
576 
GetExternalDefinitions(const std::vector<std::unique_ptr<TensorTie>> & objects)577   static std::vector<TensorObjectDef> GetExternalDefinitions(
578       const std::vector<std::unique_ptr<TensorTie>>& objects) {
579     std::vector<TensorObjectDef> defs;
580     defs.reserve(objects.size());
581     for (auto& obj : objects) {
582       defs.push_back(obj->def().external_def);
583     }
584     return defs;
585   }
586 
587   CLCommandQueue* queue_;
588   std::unique_ptr<InferenceContext> context_;
589 #ifdef CL_DELEGATE_ALLOW_GL
590   std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
591 #endif
592   std::vector<std::unique_ptr<TensorTie>> inputs_;
593   std::vector<std::unique_ptr<TensorTie>> outputs_;
594 };
595 
TensorToDef(const Tensor & tensor)596 TensorObjectDef TensorToDef(const Tensor& tensor) {
597   TensorObjectDef def;
598   def.dimensions.b = tensor.Batch();
599   def.dimensions.h = tensor.Height();
600   def.dimensions.w = tensor.Width();
601   def.dimensions.c = tensor.Channels();
602   def.object_def.data_layout = ToDataLayout(tensor.GetStorageType());
603   def.object_def.data_type = tensor.GetDataType();
604   def.object_def.object_type = ToObjectType(tensor.GetStorageType());
605   def.object_def.user_provided = false;
606   return def;
607 }
608 
GetPrecision(const Environment & env,const InferenceOptions & options)609 CalculationsPrecision GetPrecision(const Environment& env,
610                                    const InferenceOptions& options) {
611   CalculationsPrecision precision;
612   switch (GetPosition(options, InferencePriority::MAX_PRECISION)) {
613     case 1:
614       precision = CalculationsPrecision::F32;
615       break;
616     case 2:
617       precision = CalculationsPrecision::F32_F16;
618       break;
619     case 3:
620       precision = CalculationsPrecision::F16;
621       break;
622     default:
623       precision = CalculationsPrecision::F16;
624       break;
625   }
626   // Increase precision if lower precision is not supported.
627   if (!env.IsSupported(precision)) {
628     precision = CalculationsPrecision::F32_F16;
629     if (!env.IsSupported(precision)) {
630       precision = CalculationsPrecision::F32;
631     }
632   }
633   return precision;
634 }
635 
GetStorageTypeFromOptions(const Environment & env,const InferenceOptions & options)636 TensorStorageType GetStorageTypeFromOptions(const Environment& env,
637                                             const InferenceOptions& options) {
638   // Fallback to BUFFER that should be supported by default.
639   std::vector<TensorStorageType> preferred_storage_types;
640   if (GetRelativeImportance(options, InferencePriority::MIN_LATENCY,
641                             InferencePriority::MIN_MEMORY_USAGE) ==
642       PriorityImportance::HIGHER) {
643     preferred_storage_types = {GetFastestStorageType(env.device().GetInfo()),
644                                TensorStorageType::BUFFER};
645   } else {
646     preferred_storage_types = {
647         GetStorageTypeWithMinimalMemoryConsumption(env.device().GetInfo()),
648         TensorStorageType::BUFFER};
649   }
650 
651   for (TensorStorageType storage_type : preferred_storage_types) {
652     if (env.IsSupported(storage_type)) {
653       return storage_type;
654     }
655   }
656   return TensorStorageType::UNKNOWN;
657 }
658 
GetCreateInfo(const Environment & environment,const InferenceOptions & options)659 CreateGpuModelInfo GetCreateInfo(const Environment& environment,
660                                  const InferenceOptions& options) {
661   CreateGpuModelInfo create_info;
662   create_info.precision = GetPrecision(environment, options);
663   create_info.storage_type = GetStorageTypeFromOptions(environment, options);
664   if (options.usage == InferenceUsage::FAST_SINGLE_ANSWER) {
665     create_info.hints.Add(ModelHints::kReduceKernelsCount);
666     create_info.hints.Add(ModelHints::kFastTuning);
667   } else if (options.usage == InferenceUsage::SUSTAINED_SPEED) {
668     create_info.hints.Add(ModelHints::kAllowSpecialKernels);
669   }
670   if (GetRelativeImportance(options, InferencePriority::MIN_MEMORY_USAGE,
671                             InferencePriority::MIN_LATENCY) ==
672       PriorityImportance::HIGHER) {
673     create_info.hints.Add(ModelHints::kNoWinogradOptimizations);
674     create_info.hints.Add(ModelHints::kReuseConvWeights);
675   }
676   return create_info;
677 }
678 
679 class InferenceBuilderImpl : public InferenceBuilder {
680  public:
InferenceBuilderImpl(Environment * environment)681   explicit InferenceBuilderImpl(Environment* environment)
682       : environment_(environment) {}
683 
Initialize(const InferenceOptions & options,const InferenceEnvironmentOptions & env_options,const GraphFloat32 & graph)684   absl::Status Initialize(const InferenceOptions& options,
685                           const InferenceEnvironmentOptions& env_options,
686                           const GraphFloat32& graph) {
687     context_ = std::make_unique<InferenceContext>();
688     CreateGpuModelInfo create_info = GetCreateInfo(*environment_, options);
689     RETURN_IF_ERROR(context_->InitFromGraph(create_info, graph, environment_));
690 
691 #ifdef CL_DELEGATE_ALLOW_GL
692     if (env_options.IsGlAware() &&
693         IsGlSharingSupported(environment_->device())) {
694       gl_interop_fabric_ = std::make_unique<GlInteropFabric>(
695           env_options.egl_display, environment_);
696     }
697     tie_factory_ = std::make_unique<TensorTieFactory>(
698         environment_, context_.get(), gl_interop_fabric_.get());
699 #else
700     tie_factory_ =
701         std::make_unique<TensorTieFactory>(environment_, context_.get());
702 #endif
703 
704     inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
705     outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
706     return absl::OkStatus();
707   }
708 
Initialize(const InferenceEnvironmentOptions & env_options,const absl::Span<const uint8_t> serialized_model)709   absl::Status Initialize(const InferenceEnvironmentOptions& env_options,
710                           const absl::Span<const uint8_t> serialized_model) {
711     context_ = std::make_unique<InferenceContext>();
712     RETURN_IF_ERROR(
713         context_->RestoreDeserialized(serialized_model, environment_));
714 
715 #ifdef CL_DELEGATE_ALLOW_GL
716     if (env_options.IsGlAware() &&
717         IsGlSharingSupported(environment_->device())) {
718       gl_interop_fabric_ = std::make_unique<GlInteropFabric>(
719           env_options.egl_display, environment_);
720     }
721     tie_factory_ = std::make_unique<TensorTieFactory>(
722         environment_, context_.get(), gl_interop_fabric_.get());
723 #else
724     tie_factory_ =
725         std::make_unique<TensorTieFactory>(environment_, context_.get());
726 #endif
727 
728     inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
729     outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
730     return absl::OkStatus();
731   }
732 
inputs() const733   std::vector<TensorObjectDef> inputs() const override {
734     return GetExternalDefinitions(inputs_);
735   }
736 
outputs() const737   std::vector<TensorObjectDef> outputs() const override {
738     return GetExternalDefinitions(outputs_);
739   }
740 
SetInputShape(int index,const Dimensions & dimensions)741   absl::Status SetInputShape(int index, const Dimensions& dimensions) override {
742     if (index < 0 || index >= inputs_.size()) {
743       return absl::OutOfRangeError("Index is out of range");
744     }
745     return absl::UnimplementedError("Changing input shapes is not supported");
746   }
747 
SetInputObjectDef(int index,ObjectDef new_def)748   absl::Status SetInputObjectDef(int index, ObjectDef new_def) override {
749     if (index < 0 || index >= inputs_.size()) {
750       return absl::OutOfRangeError("Input index is out of range");
751     }
752     auto def = inputs_[index];
753     def.external_def.object_def = new_def;
754     if (!tie_factory_->IsSupported(def)) {
755       return absl::InvalidArgumentError(
756           "New input object definition is not supported.");
757     }
758     inputs_[index] = def;
759     return absl::OkStatus();
760   }
761 
SetOutputObjectDef(int index,ObjectDef new_def)762   absl::Status SetOutputObjectDef(int index, ObjectDef new_def) override {
763     if (index < 0 || index >= outputs_.size()) {
764       return absl::OutOfRangeError("Output index is out of range");
765     }
766     auto def = outputs_[index];
767     def.external_def.object_def = new_def;
768     if (!tie_factory_->IsSupported(def)) {
769       return absl::InvalidArgumentError(
770           "New output object definition is not supported.");
771     }
772     outputs_[index] = def;
773     return absl::OkStatus();
774   }
775 
Build(std::unique_ptr<InferenceRunner> * runner)776   absl::Status Build(std::unique_ptr<InferenceRunner>* runner) override {
777 #ifdef CL_DELEGATE_ALLOW_GL
778     if (gl_interop_fabric_ && !HasGlObjects()) {
779       // destroy interop layer when there are no GL objects to avoid
780       // extra synchronization cost.
781       gl_interop_fabric_.reset(nullptr);
782     }
783     auto runner_impl = std::make_unique<InferenceRunnerImpl>(
784         environment_, std::move(context_), std::move(gl_interop_fabric_));
785 #else
786     auto runner_impl = std::make_unique<InferenceRunnerImpl>(
787         environment_, std::move(context_));
788 #endif
789     RETURN_IF_ERROR(
790         runner_impl->Initialize(inputs_, outputs_, tie_factory_.get()));
791     *runner = std::move(runner_impl);
792     return absl::OkStatus();
793   }
794 
795  private:
796   // Links internal tensors with external user-facing objects.
LinkTensors(const std::vector<ValueId> & ids,AccessType access)797   std::vector<TensorTieDef> LinkTensors(const std::vector<ValueId>& ids,
798                                         AccessType access) {
799     std::vector<TensorTieDef> links;
800     links.reserve(ids.size());
801     for (const auto& id : ids) {
802       TensorObjectDef def = TensorToDef(*context_->GetTensor(id));
803       links.push_back({id, access, def, def});
804     }
805     return links;
806   }
807 
HasGlObjects() const808   bool HasGlObjects() const {
809 #ifdef CL_DELEGATE_ALLOW_GL
810     auto is_gl = [](ObjectType t) {
811       return t == ObjectType::OPENGL_SSBO || t == ObjectType::OPENGL_TEXTURE;
812     };
813     for (const TensorTieDef& def : inputs_) {
814       if (is_gl(def.external_def.object_def.object_type)) {
815         return true;
816       }
817     }
818     for (const TensorTieDef& def : outputs_) {
819       if (is_gl(def.external_def.object_def.object_type)) {
820         return true;
821       }
822     }
823 #endif
824     return false;
825   }
826 
GetExternalDefinitions(const std::vector<TensorTieDef> & links)827   static std::vector<TensorObjectDef> GetExternalDefinitions(
828       const std::vector<TensorTieDef>& links) {
829     std::vector<TensorObjectDef> defs;
830     defs.reserve(links.size());
831     for (auto& desc : links) {
832       defs.push_back(desc.external_def);
833     }
834     return defs;
835   }
836 
837   std::unique_ptr<InferenceContext> context_;
838 #ifdef CL_DELEGATE_ALLOW_GL
839   std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
840 #endif
841   Environment* environment_;
842 
843   std::vector<TensorTieDef> inputs_;
844   std::vector<TensorTieDef> outputs_;
845   std::unique_ptr<TensorTieFactory> tie_factory_;
846 };
847 
848 class InferenceEnvironmentImpl : public InferenceEnvironment {
849  public:
InferenceEnvironmentImpl(const InferenceEnvironmentOptions & options)850   explicit InferenceEnvironmentImpl(const InferenceEnvironmentOptions& options)
851       : options_(options) {}
852 
Init()853   absl::Status Init() {
854     RETURN_IF_ERROR(LoadOpenCL());
855     properties_.is_opencl_available = true;
856 
857     CLDevice device;
858     if (options_.device) {
859       cl_platform_id platform;
860       RETURN_IF_ERROR(GetDeviceInfo<cl_platform_id>(
861           options_.device, CL_DEVICE_PLATFORM, &platform));
862       device = CLDevice(options_.device, platform);
863     } else {
864       RETURN_IF_ERROR(CreateDefaultGPUDevice(&device));
865     }
866 
867 #ifdef CL_DELEGATE_ALLOW_GL
868     properties_.is_gl_sharing_supported = IsGlSharingSupported(device);
869     properties_.is_gl_to_cl_fast_sync_supported =
870         IsClEventFromEglSyncSupported(device);
871     properties_.is_cl_to_gl_fast_sync_supported =
872         IsEglSyncFromClEventSupported();
873 #endif
874 
875     CLContext context;
876     if (options_.context) {
877 #ifdef CL_DELEGATE_ALLOW_GL
878       if (options_.IsGlAware()) {
879         return absl::InvalidArgumentError(
880             "OpenCL context and EGL parameters are set in the same time.");
881       }
882 #endif
883       context = CLContext(options_.context, /* has_ownership = */ false);
884     } else {
885 #ifdef CL_DELEGATE_ALLOW_GL
886       if (options_.IsGlAware() && properties_.is_gl_sharing_supported) {
887         RETURN_IF_ERROR(CreateCLGLContext(
888             device,
889             reinterpret_cast<cl_context_properties>(options_.egl_context),
890             reinterpret_cast<cl_context_properties>(options_.egl_display),
891             &context));
892       } else {
893         RETURN_IF_ERROR(CreateCLContext(device, &context));
894       }
895 #else
896       RETURN_IF_ERROR(CreateCLContext(device, &context));
897 #endif
898     }
899 
900     CLCommandQueue queue;
901     if (options_.command_queue) {
902       queue =
903           CLCommandQueue(options_.command_queue, /* has_ownership = */ false);
904     } else {
905       RETURN_IF_ERROR(CreateCLCommandQueue(device, context, &queue));
906     }
907     // Profiling queue is used for workgroup size tuning.
908     ProfilingCommandQueue profiling_queue;
909     RETURN_IF_ERROR(
910         CreateProfilingCommandQueue(device, context, &profiling_queue));
911     environment_ = Environment(std::move(device), std::move(context),
912                                std::move(queue), std::move(profiling_queue));
913     return environment_.Init();
914   }
915 
BuildSerializedModel(const InferenceOptions & options,GraphFloat32 model,std::vector<uint8_t> * serialized_model)916   absl::Status BuildSerializedModel(
917       const InferenceOptions& options, GraphFloat32 model,
918       std::vector<uint8_t>* serialized_model) final {
919     if (!IsValid(options)) {
920       return absl::InvalidArgumentError("InferenceOptions are invalid.");
921     }
922     InferenceOptions resolved_options = options;
923     ResolveAutoPriority(&resolved_options);
924     if (environment_.program_cache() &&
925         !options_.serialized_binary_cache.empty()) {
926       // Ignore returned error. Cache is discarded.
927       environment_.program_cache()
928           ->AddSerializedCache(environment_.context(), environment_.device(),
929                                options_.serialized_binary_cache)
930           .IgnoreError();
931     }
932 
933     RETURN_IF_ERROR(RunGraphTransformsForGpuModel(&model));
934     InferenceContext context;
935     CreateGpuModelInfo create_info = GetCreateInfo(environment_, options);
936     RETURN_IF_ERROR(context.InitFromGraph(create_info, model, &environment_,
937                                           serialized_model));
938     return absl::OkStatus();
939   }
940 
NewInferenceBuilder(const InferenceOptions & options,GraphFloat32 model,std::unique_ptr<InferenceBuilder> * builder)941   absl::Status NewInferenceBuilder(
942       const InferenceOptions& options, GraphFloat32 model,
943       std::unique_ptr<InferenceBuilder>* builder) final {
944     if (!IsValid(options)) {
945       return absl::InvalidArgumentError("InferenceOptions are invalid.");
946     }
947     InferenceOptions resolved_options = options;
948     ResolveAutoPriority(&resolved_options);
949     if (environment_.program_cache() &&
950         !options_.serialized_binary_cache.empty()) {
951       // Ignore returned error. Cache is discarded.
952       environment_.program_cache()
953           ->AddSerializedCache(environment_.context(), environment_.device(),
954                                options_.serialized_binary_cache)
955           .IgnoreError();
956     }
957 
958     RETURN_IF_ERROR(RunGraphTransformsForGpuModel(&model));
959     auto builder_impl = std::make_unique<InferenceBuilderImpl>(&environment_);
960     RETURN_IF_ERROR(
961         builder_impl->Initialize(resolved_options, options_, model));
962     *builder = std::move(builder_impl);
963     return absl::OkStatus();
964   }
965 
NewInferenceBuilder(const absl::Span<const uint8_t> serialized_model,std::unique_ptr<InferenceBuilder> * builder)966   absl::Status NewInferenceBuilder(
967       const absl::Span<const uint8_t> serialized_model,
968       std::unique_ptr<InferenceBuilder>* builder) final {
969     if (environment_.program_cache() &&
970         !options_.serialized_binary_cache.empty()) {
971       // Ignore returned error. Cache is discarded.
972       environment_.program_cache()
973           ->AddSerializedCache(environment_.context(), environment_.device(),
974                                options_.serialized_binary_cache)
975           .IgnoreError();
976     }
977 
978     auto builder_impl = std::make_unique<InferenceBuilderImpl>(&environment_);
979     RETURN_IF_ERROR(builder_impl->Initialize(options_, serialized_model));
980     *builder = std::move(builder_impl);
981     return absl::OkStatus();
982   }
983 
GetSerializedBinaryCache() const984   std::vector<uint8_t> GetSerializedBinaryCache() const final {
985     std::vector<uint8_t> data;
986     // Is there was a problem, data would be empty.
987     environment_.program_cache()
988         ->GetSerializedCache(environment_.device(), &data)
989         .IgnoreError();
990     return data;
991   }
992 
properties() const993   const InferenceEnvironmentProperties& properties() const {
994     return properties_;
995   }
996 
997  private:
998   const InferenceEnvironmentOptions options_;
999   Environment environment_;
1000   InferenceEnvironmentProperties properties_;
1001 };
1002 
1003 }  // namespace
1004 
NewInferenceEnvironment(const InferenceEnvironmentOptions & options,std::unique_ptr<InferenceEnvironment> * environment,InferenceEnvironmentProperties * properties)1005 absl::Status NewInferenceEnvironment(
1006     const InferenceEnvironmentOptions& options,
1007     std::unique_ptr<InferenceEnvironment>* environment,
1008     InferenceEnvironmentProperties* properties) {
1009   auto env_impl = std::make_unique<InferenceEnvironmentImpl>(options);
1010   absl::Status status = env_impl->Init();
1011   if (properties) {
1012     *properties = env_impl->properties();
1013   }
1014   RETURN_IF_ERROR(status);
1015   *environment = std::move(env_impl);
1016   return absl::OkStatus();
1017 }
1018 
1019 }  // namespace cl
1020 }  // namespace gpu
1021 }  // namespace tflite
1022