1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/api.h"
17
18 #include <utility>
19
20 #ifndef CL_DELEGATE_NO_GL
21 #define CL_DELEGATE_ALLOW_GL
22 #endif
23
24 #include <algorithm>
25 #include <cstring>
26 #include <memory>
27 #include <variant>
28 #include <vector>
29
30 #include "absl/memory/memory.h"
31 #include "absl/types/span.h"
32 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
33 #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
34 #include "tensorflow/lite/delegates/gpu/cl/cl_event.h"
35 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
36 #include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
37 #include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
38 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
39 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
40 #include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
41 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
42 #include "tensorflow/lite/delegates/gpu/common/precision.h"
43 #include "tensorflow/lite/delegates/gpu/common/shape.h"
44 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
45 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
46
47 #ifdef CL_DELEGATE_ALLOW_GL
48 #include <EGL/eglext.h>
49
50 #include "tensorflow/lite/delegates/gpu/cl/egl_sync.h"
51 #include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
52 #endif
53
54 namespace tflite {
55 namespace gpu {
56 namespace cl {
57 namespace {
58
59 // Both internal and external defs are identical, therefore nothing to connect
60 // here.
61 class NoopTensorTie : public TensorTie {
62 public:
NoopTensorTie(const TensorTieDef & def,TensorObject obj)63 NoopTensorTie(const TensorTieDef& def, TensorObject obj)
64 : TensorTie(def), obj_(obj) {}
65
IsSupported(const TensorTieDef & def)66 static bool IsSupported(const TensorTieDef& def) {
67 return def.external_def == def.internal_def;
68 }
69
SetExternalObject(TensorObject obj)70 absl::Status SetExternalObject(TensorObject obj) final {
71 if (!def().external_def.object_def.user_provided) {
72 return absl::InvalidArgumentError("Tensor object is readonly.");
73 }
74 if (!IsValid(def().external_def, obj)) {
75 return absl::InvalidArgumentError("Given object is not valid");
76 }
77 obj_ = obj;
78 return absl::OkStatus();
79 }
80
GetExternalObject()81 TensorObject GetExternalObject() final { return obj_; }
82
CopyToExternalObject()83 absl::Status CopyToExternalObject() final { return absl::OkStatus(); }
84
CopyFromExternalObject()85 absl::Status CopyFromExternalObject() final { return absl::OkStatus(); }
86
87 private:
88 TensorObject obj_;
89 };
90
91 // Does one-step conversion between internal and external objects.
92 // It may also allocate external objects if requested.
93 class DefaultTensorTie : public TensorTie {
94 public:
DefaultTensorTie(const TensorTieDef & def,TensorObject internal_obj)95 DefaultTensorTie(const TensorTieDef& def, TensorObject internal_obj)
96 : TensorTie(def), internal_obj_(internal_obj) {}
97
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)98 static bool IsSupported(
99 const TensorTieDef& def,
100 const TensorObjectConverterBuilder& converter_builder) {
101 auto object_type = def.external_def.object_def.object_type;
102 #ifdef CL_DELEGATE_ALLOW_GL
103 if (def.external_def.object_def.user_provided &&
104 GlClBufferCopier::IsSupported(def.external_def.object_def,
105 def.internal_def.object_def)) {
106 return true;
107 }
108 #endif
109 return (object_type == ObjectType::OPENCL_BUFFER ||
110 object_type == ObjectType::OPENCL_TEXTURE ||
111 object_type == ObjectType::CPU_MEMORY) &&
112 converter_builder.IsSupported(def.internal_def, def.external_def) &&
113 converter_builder.IsSupported(def.external_def, def.internal_def);
114 }
115
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env,std::unique_ptr<TensorTie> * tie)116 static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
117 TensorObjectConverterBuilder* converter_builder,
118 Environment* env, std::unique_ptr<TensorTie>* tie) {
119 auto tie_impl = std::make_unique<DefaultTensorTie>(def, internal_object);
120 RETURN_IF_ERROR(tie_impl->Init(converter_builder, env));
121 *tie = std::move(tie_impl);
122 return absl::OkStatus();
123 }
124
CopyToExternalObject()125 absl::Status CopyToExternalObject() final {
126 if (!converter_to_) {
127 return absl::UnavailableError("Conversion is not available");
128 }
129 return converter_to_->Convert(internal_obj_, GetExternalObject());
130 }
131
CopyFromExternalObject()132 absl::Status CopyFromExternalObject() final {
133 if (!converter_from_) {
134 return absl::UnavailableError("Conversion is not available");
135 }
136 return converter_from_->Convert(GetExternalObject(), internal_obj_);
137 }
138
SetExternalObject(TensorObject obj)139 absl::Status SetExternalObject(TensorObject obj) final {
140 if (!def().external_def.object_def.user_provided) {
141 return absl::InvalidArgumentError("External object is read-only");
142 }
143 if (!IsValid(def().external_def, obj)) {
144 return absl::InvalidArgumentError("Given object is not valid");
145 }
146 external_obj_ = obj;
147 return absl::OkStatus();
148 }
149
GetExternalObject()150 TensorObject GetExternalObject() final { return external_obj_; }
151
152 private:
Init(TensorObjectConverterBuilder * converter_builder,Environment * env)153 absl::Status Init(TensorObjectConverterBuilder* converter_builder,
154 Environment* env) {
155 #ifdef CL_DELEGATE_ALLOW_GL
156 if (def().external_def.object_def.user_provided &&
157 GlClBufferCopier::IsSupported(def().external_def.object_def,
158 def().internal_def.object_def)) {
159 converter_from_ = std::make_unique<GlClBufferCopier>(
160 def().internal_def, def().external_def, env);
161 } else {
162 RETURN_IF_ERROR(converter_builder->MakeConverter(
163 def().external_def, def().internal_def, &converter_from_));
164 }
165 if (def().external_def.object_def.user_provided &&
166 GlClBufferCopier::IsSupported(def().internal_def.object_def,
167 def().external_def.object_def)) {
168 converter_to_ = std::make_unique<GlClBufferCopier>(
169 def().internal_def, def().external_def, env);
170 } else {
171 RETURN_IF_ERROR(converter_builder->MakeConverter(
172 def().internal_def, def().external_def, &converter_to_));
173 }
174 #else
175 RETURN_IF_ERROR(converter_builder->MakeConverter(
176 def().external_def, def().internal_def, &converter_from_));
177 RETURN_IF_ERROR(converter_builder->MakeConverter(
178 def().internal_def, def().external_def, &converter_to_));
179 #endif
180 return MaybeAllocateExternalObject(env);
181 }
182
MaybeAllocateExternalObject(Environment * env)183 absl::Status MaybeAllocateExternalObject(Environment* env) {
184 const TensorObjectDef& d = def().external_def;
185 if (d.object_def.user_provided) {
186 return absl::OkStatus();
187 }
188 switch (d.object_def.object_type) {
189 case ObjectType::CPU_MEMORY: {
190 size_t bytes_size = NumElements(d) * SizeOf(d.object_def.data_type);
191 cpu_memory_.resize(bytes_size);
192 external_obj_ = CpuMemory{cpu_memory_.data(), cpu_memory_.size()};
193 break;
194 }
195 case ObjectType::OPENCL_TEXTURE:
196 case ObjectType::OPENCL_BUFFER: {
197 auto& dims = d.dimensions;
198 const BHWC shape(dims.b, dims.h, dims.w, dims.c);
199 TensorStorageType storage_type = ToTensorStorageType(
200 d.object_def.object_type, d.object_def.data_layout);
201 TensorDescriptor desc = CreateBhwcTensorDescriptor(
202 d.object_def.data_type, storage_type, shape);
203 RETURN_IF_ERROR(
204 AllocateTensorMemory(env->context(), desc, &cl_memory_));
205 if (d.object_def.object_type == ObjectType::OPENCL_TEXTURE) {
206 external_obj_ = OpenClTexture{cl_memory_.memory()};
207 } else {
208 external_obj_ = OpenClBuffer{cl_memory_.memory()};
209 }
210 break;
211 }
212 default:
213 return absl::InternalError("Unexpected object type");
214 }
215 return absl::OkStatus();
216 }
217
218 const TensorObject internal_obj_;
219 TensorObject external_obj_;
220 CLMemory cl_memory_;
221 std::vector<uint8_t> cpu_memory_;
222 std::unique_ptr<TensorObjectConverter> converter_to_;
223 std::unique_ptr<TensorObjectConverter> converter_from_;
224 };
225
226 // Copies data to intermediate OpenCL buffer and then does two step conversion.
227 // It drives the following cases were one-step conversion is not supported:
228 // - CPU BHWC -> CL buffer BHWC -> CL texture DHWC4.
229 class TwoStepTensorTie : public TensorTie {
230 public:
TwoStepTensorTie(const TensorTieDef & def)231 explicit TwoStepTensorTie(const TensorTieDef& def) : TensorTie(def) {}
232
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)233 static bool IsSupported(
234 const TensorTieDef& def,
235 const TensorObjectConverterBuilder& converter_builder) {
236 auto defs = MakeOuterInnerDefs(def);
237 return DefaultTensorTie::IsSupported(defs.first, converter_builder) &&
238 DefaultTensorTie::IsSupported(defs.second, converter_builder);
239 }
240
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env,std::unique_ptr<TensorTie> * tie)241 static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
242 TensorObjectConverterBuilder* converter_builder,
243 Environment* env, std::unique_ptr<TensorTie>* tie) {
244 auto tie_impl = std::make_unique<TwoStepTensorTie>(def);
245 RETURN_IF_ERROR(tie_impl->Init(internal_object, converter_builder, env));
246 *tie = std::move(tie_impl);
247 return absl::OkStatus();
248 }
249
CopyToExternalObject()250 absl::Status CopyToExternalObject() final {
251 RETURN_IF_ERROR(inner_tie_->CopyToExternalObject());
252 return outer_tie_->CopyToExternalObject();
253 }
254
CopyFromExternalObject()255 absl::Status CopyFromExternalObject() final {
256 RETURN_IF_ERROR(outer_tie_->CopyFromExternalObject());
257 return inner_tie_->CopyFromExternalObject();
258 }
259
SetExternalObject(TensorObject obj)260 absl::Status SetExternalObject(TensorObject obj) final {
261 return outer_tie_->SetExternalObject(obj);
262 }
263
GetExternalObject()264 TensorObject GetExternalObject() final {
265 return outer_tie_->GetExternalObject();
266 }
267
268 private:
MakeOuterInnerDefs(const TensorTieDef & def)269 static std::pair<TensorTieDef, TensorTieDef> MakeOuterInnerDefs(
270 const TensorTieDef& def) {
271 TensorTieDef outer_def;
272 outer_def.external_def = def.external_def;
273 outer_def.internal_def = def.external_def;
274 outer_def.internal_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
275 outer_def.internal_def.object_def.user_provided = true;
276
277 TensorTieDef inner_def;
278 inner_def.external_def = outer_def.internal_def;
279 inner_def.external_def.object_def.user_provided = false;
280 inner_def.internal_def = def.internal_def;
281 return std::make_pair(outer_def, inner_def);
282 }
283
Init(TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env)284 absl::Status Init(TensorObject internal_object,
285 TensorObjectConverterBuilder* converter_builder,
286 Environment* env) {
287 auto defs = MakeOuterInnerDefs(def());
288 RETURN_IF_ERROR(DefaultTensorTie::New(defs.second, internal_object,
289 converter_builder, env, &inner_tie_));
290 return DefaultTensorTie::New(defs.first, inner_tie_->GetExternalObject(),
291 converter_builder, env, &outer_tie_);
292 }
293
294 std::unique_ptr<TensorTie> inner_tie_;
295 std::unique_ptr<TensorTie> outer_tie_;
296 };
297
298 #ifdef CL_DELEGATE_ALLOW_GL
299 // Captures GL object into CL context before performing a conversion.
300 class GlBufferHolder : public TensorTie {
301 public:
GlBufferHolder(const TensorTieDef & def,GlInteropFabric * gl_interop_fabric,Environment * env)302 GlBufferHolder(const TensorTieDef& def, GlInteropFabric* gl_interop_fabric,
303 Environment* env)
304 : TensorTie(def),
305 gl_interop_fabric_(gl_interop_fabric),
306 environment_(env) {}
307
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)308 static bool IsSupported(
309 const TensorTieDef& def,
310 const TensorObjectConverterBuilder& converter_builder) {
311 if (!def.external_def.object_def.user_provided ||
312 def.external_def.object_def.object_type != ObjectType::OPENGL_SSBO) {
313 return false;
314 }
315 return DefaultTensorTie::IsSupported(MakeClDef(def), converter_builder);
316 }
317
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,GlInteropFabric * gl_interop_fabric,Environment * env,std::unique_ptr<TensorTie> * tie)318 static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
319 TensorObjectConverterBuilder* converter_builder,
320 GlInteropFabric* gl_interop_fabric, Environment* env,
321 std::unique_ptr<TensorTie>* tie) {
322 auto tie_impl =
323 std::make_unique<GlBufferHolder>(def, gl_interop_fabric, env);
324 RETURN_IF_ERROR(DefaultTensorTie::New(MakeClDef(def), internal_object,
325 converter_builder, env,
326 &tie_impl->tie_));
327 *tie = std::move(tie_impl);
328 return absl::OkStatus();
329 }
330
SetExternalObject(TensorObject obj)331 absl::Status SetExternalObject(TensorObject obj) final {
332 auto ssbo = std::get_if<OpenGlBuffer>(&obj);
333 if (!ssbo) {
334 return absl::InvalidArgumentError("Missing OpenGL SSBO");
335 }
336 auto old_ssbo = std::get_if<OpenGlBuffer>(&external_obj_);
337 if (old_ssbo && ssbo->id == old_ssbo->id) {
338 return absl::OkStatus();
339 }
340 if (cl_object_.memory()) {
341 gl_interop_fabric_->UnregisterMemory(cl_object_.memory());
342 }
343 RETURN_IF_ERROR(CreateClMemoryFromGlBuffer(
344 ssbo->id, def().access_type, &environment_->context(), &cl_object_));
345 external_obj_ = obj;
346 RETURN_IF_ERROR(tie_->SetExternalObject(OpenClBuffer{cl_object_.memory()}));
347 gl_interop_fabric_->RegisterMemory(cl_object_.memory());
348 return absl::OkStatus();
349 }
350
GetExternalObject()351 TensorObject GetExternalObject() final { return external_obj_; }
352
CopyFromExternalObject()353 absl::Status CopyFromExternalObject() final {
354 return tie_->CopyFromExternalObject();
355 }
356
CopyToExternalObject()357 absl::Status CopyToExternalObject() final {
358 return tie_->CopyToExternalObject();
359 }
360
361 private:
MakeClDef(const TensorTieDef & def)362 static TensorTieDef MakeClDef(const TensorTieDef& def) {
363 auto cl_def = def;
364 cl_def.external_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
365 cl_def.external_def.object_def.user_provided = true;
366 return cl_def;
367 }
368
369 CLMemory cl_object_;
370 GlInteropFabric* gl_interop_fabric_;
371 Environment* environment_;
372 std::unique_ptr<TensorTie> tie_;
373 TensorObject external_obj_;
374 };
375 #endif
376
TensorToObj(const Tensor & tensor)377 TensorObject TensorToObj(const Tensor& tensor) {
378 if (tensor.GetStorageType() == TensorStorageType::BUFFER) {
379 return OpenClBuffer{tensor.GetMemoryPtr()};
380 }
381 if (tensor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
382 return OpenClBuffer{tensor.GetMemoryPtrForWriting()};
383 }
384 return OpenClTexture{tensor.GetMemoryPtr()};
385 }
386
387 // Responsible for creating new tensor objects.
388 class TensorTieFactory {
389 public:
TensorTieFactory(Environment * env,InferenceContext * context,GlInteropFabric * gl_interop_fabric)390 TensorTieFactory(Environment* env, InferenceContext* context
391 #ifdef CL_DELEGATE_ALLOW_GL
392 ,
393 GlInteropFabric* gl_interop_fabric
394 #endif
395 )
396 : env_(*env),
397 context_(*context),
398 #ifdef CL_DELEGATE_ALLOW_GL
399 gl_interop_fabric_(gl_interop_fabric),
400 #endif
401 converter_builder_(NewConverterBuilder(env)) {
402 }
403
IsSupported(const TensorTieDef & def) const404 bool IsSupported(const TensorTieDef& def) const {
405 return IsValid(def.external_def.object_def) &&
406 (NoopTensorTie::IsSupported(def) ||
407 DefaultTensorTie::IsSupported(def, *converter_builder_) ||
408 #ifdef CL_DELEGATE_ALLOW_GL
409 (gl_interop_fabric_ &&
410 GlBufferHolder::IsSupported(def, *converter_builder_)) ||
411 #endif
412 TwoStepTensorTie::IsSupported(def, *converter_builder_));
413 }
414
NewTensorTie(const TensorTieDef & def,std::unique_ptr<TensorTie> * tie)415 absl::Status NewTensorTie(const TensorTieDef& def,
416 std::unique_ptr<TensorTie>* tie) {
417 TensorObject internal_object = TensorToObj(*context_.GetTensor(def.id));
418 auto converter = converter_builder_.get();
419 if (NoopTensorTie::IsSupported(def)) {
420 *tie = std::make_unique<NoopTensorTie>(def, internal_object);
421 return absl::OkStatus();
422 }
423 if (DefaultTensorTie::IsSupported(def, *converter)) {
424 return DefaultTensorTie::New(def, internal_object, converter, &env_, tie);
425 }
426 #ifdef CL_DELEGATE_ALLOW_GL
427 if (gl_interop_fabric_ && GlBufferHolder::IsSupported(def, *converter)) {
428 return GlBufferHolder::New(def, internal_object, converter,
429 gl_interop_fabric_, &env_, tie);
430 }
431 #endif
432 if (TwoStepTensorTie::IsSupported(def, *converter)) {
433 return TwoStepTensorTie::New(def, internal_object, converter, &env_, tie);
434 }
435 return absl::UnimplementedError("Unsupported tensor tie definition.");
436 }
437
438 private:
439 Environment& env_;
440 InferenceContext& context_;
441 #ifdef CL_DELEGATE_ALLOW_GL
442 GlInteropFabric* gl_interop_fabric_;
443 #endif
444 std::unique_ptr<TensorObjectConverterBuilder> converter_builder_;
445 };
446
447 class InferenceRunnerImpl : public CLInferenceRunner {
448 public:
InferenceRunnerImpl(Environment * environment,std::unique_ptr<InferenceContext> context,std::unique_ptr<GlInteropFabric> gl_interop_fabric)449 InferenceRunnerImpl(Environment* environment,
450 std::unique_ptr<InferenceContext> context
451 #ifdef CL_DELEGATE_ALLOW_GL
452 ,
453 std::unique_ptr<GlInteropFabric> gl_interop_fabric
454 #endif
455 )
456 : queue_(environment->queue()),
457 context_(std::move(context))
458 #ifdef CL_DELEGATE_ALLOW_GL
459 ,
460 gl_interop_fabric_(std::move(gl_interop_fabric))
461 #endif
462 {
463 }
464
Initialize(const std::vector<TensorTieDef> & inputs,const std::vector<TensorTieDef> & outputs,TensorTieFactory * factory)465 absl::Status Initialize(const std::vector<TensorTieDef>& inputs,
466 const std::vector<TensorTieDef>& outputs,
467 TensorTieFactory* factory) {
468 RETURN_IF_ERROR(LinkTensors(inputs, factory, &inputs_));
469 return LinkTensors(outputs, factory, &outputs_);
470 }
471
inputs() const472 std::vector<TensorObjectDef> inputs() const override {
473 return GetExternalDefinitions(inputs_);
474 }
475
outputs() const476 std::vector<TensorObjectDef> outputs() const override {
477 return GetExternalDefinitions(outputs_);
478 }
479
GetInputObject(int index,TensorObject * object)480 absl::Status GetInputObject(int index, TensorObject* object) override {
481 if (index < 0 || index >= inputs_.size()) {
482 return absl::OutOfRangeError("Index is out of range");
483 }
484 *object = inputs_[index]->GetExternalObject();
485 return absl::OkStatus();
486 }
487
GetOutputObject(int index,TensorObject * object)488 absl::Status GetOutputObject(int index, TensorObject* object) override {
489 if (index < 0 || index >= outputs_.size()) {
490 return absl::OutOfRangeError("Index is out of range");
491 }
492 *object = outputs_[index]->GetExternalObject();
493 return absl::OkStatus();
494 }
495
SetInputObject(int index,TensorObject object)496 absl::Status SetInputObject(int index, TensorObject object) override {
497 if (index < 0 || index >= inputs_.size()) {
498 return absl::OutOfRangeError("Input index is out of range");
499 }
500 return inputs_[index]->SetExternalObject(object);
501 }
502
SetOutputObject(int index,TensorObject object)503 absl::Status SetOutputObject(int index, TensorObject object) override {
504 if (index < 0 || index >= outputs_.size()) {
505 return absl::OutOfRangeError("Output index is out of range");
506 }
507 return outputs_[index]->SetExternalObject(object);
508 }
509
CopyFromExternalInput(int index)510 absl::Status CopyFromExternalInput(int index) override {
511 if (index > inputs_.size()) {
512 return absl::NotFoundError(
513 absl::StrCat("Input id ", index, " is an invalid input index."));
514 }
515 return inputs_[index]->CopyFromExternalObject();
516 }
517
CopyToExternalOutput(int index)518 absl::Status CopyToExternalOutput(int index) override {
519 if (index > outputs_.size()) {
520 return absl::NotFoundError(
521 absl::StrCat("Output id ", index, " is an invalid output index"));
522 }
523 return outputs_[index]->CopyToExternalObject();
524 }
525
Run()526 absl::Status Run() override {
527 #ifdef CL_DELEGATE_ALLOW_GL
528 if (gl_interop_fabric_) {
529 RETURN_IF_ERROR(gl_interop_fabric_->Start());
530 }
531 #endif
532 for (const auto& input : inputs_) {
533 RETURN_IF_ERROR(input->CopyFromExternalObject());
534 }
535
536 RETURN_IF_ERROR(RunWithoutExternalBufferCopy());
537
538 bool has_async_copies = false;
539 for (const auto& output : outputs_) {
540 RETURN_IF_ERROR(output->CopyToExternalObject());
541 if (output->def().external_def.object_def.object_type ==
542 ObjectType::CPU_MEMORY) {
543 has_async_copies = true;
544 }
545 }
546 #ifdef CL_DELEGATE_ALLOW_GL
547 if (gl_interop_fabric_) {
548 RETURN_IF_ERROR(gl_interop_fabric_->Finish());
549 }
550 #endif
551 if (has_async_copies) {
552 RETURN_IF_ERROR(queue_->WaitForCompletion());
553 }
554 return absl::OkStatus();
555 }
556
RunWithoutExternalBufferCopy()557 absl::Status RunWithoutExternalBufferCopy() override {
558 RETURN_IF_ERROR(context_->AddToQueue(queue_));
559 clFlush(queue_->queue());
560
561 return absl::OkStatus();
562 }
563
564 private:
LinkTensors(const std::vector<TensorTieDef> & defs,TensorTieFactory * factory,std::vector<std::unique_ptr<TensorTie>> * objects)565 static absl::Status LinkTensors(
566 const std::vector<TensorTieDef>& defs, TensorTieFactory* factory,
567 std::vector<std::unique_ptr<TensorTie>>* objects) {
568 objects->reserve(defs.size());
569 for (auto& def : defs) {
570 std::unique_ptr<TensorTie> object;
571 RETURN_IF_ERROR(factory->NewTensorTie(def, &object));
572 objects->push_back(std::move(object));
573 }
574 return absl::OkStatus();
575 }
576
GetExternalDefinitions(const std::vector<std::unique_ptr<TensorTie>> & objects)577 static std::vector<TensorObjectDef> GetExternalDefinitions(
578 const std::vector<std::unique_ptr<TensorTie>>& objects) {
579 std::vector<TensorObjectDef> defs;
580 defs.reserve(objects.size());
581 for (auto& obj : objects) {
582 defs.push_back(obj->def().external_def);
583 }
584 return defs;
585 }
586
587 CLCommandQueue* queue_;
588 std::unique_ptr<InferenceContext> context_;
589 #ifdef CL_DELEGATE_ALLOW_GL
590 std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
591 #endif
592 std::vector<std::unique_ptr<TensorTie>> inputs_;
593 std::vector<std::unique_ptr<TensorTie>> outputs_;
594 };
595
TensorToDef(const Tensor & tensor)596 TensorObjectDef TensorToDef(const Tensor& tensor) {
597 TensorObjectDef def;
598 def.dimensions.b = tensor.Batch();
599 def.dimensions.h = tensor.Height();
600 def.dimensions.w = tensor.Width();
601 def.dimensions.c = tensor.Channels();
602 def.object_def.data_layout = ToDataLayout(tensor.GetStorageType());
603 def.object_def.data_type = tensor.GetDataType();
604 def.object_def.object_type = ToObjectType(tensor.GetStorageType());
605 def.object_def.user_provided = false;
606 return def;
607 }
608
GetPrecision(const Environment & env,const InferenceOptions & options)609 CalculationsPrecision GetPrecision(const Environment& env,
610 const InferenceOptions& options) {
611 CalculationsPrecision precision;
612 switch (GetPosition(options, InferencePriority::MAX_PRECISION)) {
613 case 1:
614 precision = CalculationsPrecision::F32;
615 break;
616 case 2:
617 precision = CalculationsPrecision::F32_F16;
618 break;
619 case 3:
620 precision = CalculationsPrecision::F16;
621 break;
622 default:
623 precision = CalculationsPrecision::F16;
624 break;
625 }
626 // Increase precision if lower precision is not supported.
627 if (!env.IsSupported(precision)) {
628 precision = CalculationsPrecision::F32_F16;
629 if (!env.IsSupported(precision)) {
630 precision = CalculationsPrecision::F32;
631 }
632 }
633 return precision;
634 }
635
GetStorageTypeFromOptions(const Environment & env,const InferenceOptions & options)636 TensorStorageType GetStorageTypeFromOptions(const Environment& env,
637 const InferenceOptions& options) {
638 // Fallback to BUFFER that should be supported by default.
639 std::vector<TensorStorageType> preferred_storage_types;
640 if (GetRelativeImportance(options, InferencePriority::MIN_LATENCY,
641 InferencePriority::MIN_MEMORY_USAGE) ==
642 PriorityImportance::HIGHER) {
643 preferred_storage_types = {GetFastestStorageType(env.device().GetInfo()),
644 TensorStorageType::BUFFER};
645 } else {
646 preferred_storage_types = {
647 GetStorageTypeWithMinimalMemoryConsumption(env.device().GetInfo()),
648 TensorStorageType::BUFFER};
649 }
650
651 for (TensorStorageType storage_type : preferred_storage_types) {
652 if (env.IsSupported(storage_type)) {
653 return storage_type;
654 }
655 }
656 return TensorStorageType::UNKNOWN;
657 }
658
GetCreateInfo(const Environment & environment,const InferenceOptions & options)659 CreateGpuModelInfo GetCreateInfo(const Environment& environment,
660 const InferenceOptions& options) {
661 CreateGpuModelInfo create_info;
662 create_info.precision = GetPrecision(environment, options);
663 create_info.storage_type = GetStorageTypeFromOptions(environment, options);
664 if (options.usage == InferenceUsage::FAST_SINGLE_ANSWER) {
665 create_info.hints.Add(ModelHints::kReduceKernelsCount);
666 create_info.hints.Add(ModelHints::kFastTuning);
667 } else if (options.usage == InferenceUsage::SUSTAINED_SPEED) {
668 create_info.hints.Add(ModelHints::kAllowSpecialKernels);
669 }
670 if (GetRelativeImportance(options, InferencePriority::MIN_MEMORY_USAGE,
671 InferencePriority::MIN_LATENCY) ==
672 PriorityImportance::HIGHER) {
673 create_info.hints.Add(ModelHints::kNoWinogradOptimizations);
674 create_info.hints.Add(ModelHints::kReuseConvWeights);
675 }
676 return create_info;
677 }
678
679 class InferenceBuilderImpl : public InferenceBuilder {
680 public:
InferenceBuilderImpl(Environment * environment)681 explicit InferenceBuilderImpl(Environment* environment)
682 : environment_(environment) {}
683
Initialize(const InferenceOptions & options,const InferenceEnvironmentOptions & env_options,const GraphFloat32 & graph)684 absl::Status Initialize(const InferenceOptions& options,
685 const InferenceEnvironmentOptions& env_options,
686 const GraphFloat32& graph) {
687 context_ = std::make_unique<InferenceContext>();
688 CreateGpuModelInfo create_info = GetCreateInfo(*environment_, options);
689 RETURN_IF_ERROR(context_->InitFromGraph(create_info, graph, environment_));
690
691 #ifdef CL_DELEGATE_ALLOW_GL
692 if (env_options.IsGlAware() &&
693 IsGlSharingSupported(environment_->device())) {
694 gl_interop_fabric_ = std::make_unique<GlInteropFabric>(
695 env_options.egl_display, environment_);
696 }
697 tie_factory_ = std::make_unique<TensorTieFactory>(
698 environment_, context_.get(), gl_interop_fabric_.get());
699 #else
700 tie_factory_ =
701 std::make_unique<TensorTieFactory>(environment_, context_.get());
702 #endif
703
704 inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
705 outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
706 return absl::OkStatus();
707 }
708
Initialize(const InferenceEnvironmentOptions & env_options,const absl::Span<const uint8_t> serialized_model)709 absl::Status Initialize(const InferenceEnvironmentOptions& env_options,
710 const absl::Span<const uint8_t> serialized_model) {
711 context_ = std::make_unique<InferenceContext>();
712 RETURN_IF_ERROR(
713 context_->RestoreDeserialized(serialized_model, environment_));
714
715 #ifdef CL_DELEGATE_ALLOW_GL
716 if (env_options.IsGlAware() &&
717 IsGlSharingSupported(environment_->device())) {
718 gl_interop_fabric_ = std::make_unique<GlInteropFabric>(
719 env_options.egl_display, environment_);
720 }
721 tie_factory_ = std::make_unique<TensorTieFactory>(
722 environment_, context_.get(), gl_interop_fabric_.get());
723 #else
724 tie_factory_ =
725 std::make_unique<TensorTieFactory>(environment_, context_.get());
726 #endif
727
728 inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
729 outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
730 return absl::OkStatus();
731 }
732
inputs() const733 std::vector<TensorObjectDef> inputs() const override {
734 return GetExternalDefinitions(inputs_);
735 }
736
outputs() const737 std::vector<TensorObjectDef> outputs() const override {
738 return GetExternalDefinitions(outputs_);
739 }
740
SetInputShape(int index,const Dimensions & dimensions)741 absl::Status SetInputShape(int index, const Dimensions& dimensions) override {
742 if (index < 0 || index >= inputs_.size()) {
743 return absl::OutOfRangeError("Index is out of range");
744 }
745 return absl::UnimplementedError("Changing input shapes is not supported");
746 }
747
SetInputObjectDef(int index,ObjectDef new_def)748 absl::Status SetInputObjectDef(int index, ObjectDef new_def) override {
749 if (index < 0 || index >= inputs_.size()) {
750 return absl::OutOfRangeError("Input index is out of range");
751 }
752 auto def = inputs_[index];
753 def.external_def.object_def = new_def;
754 if (!tie_factory_->IsSupported(def)) {
755 return absl::InvalidArgumentError(
756 "New input object definition is not supported.");
757 }
758 inputs_[index] = def;
759 return absl::OkStatus();
760 }
761
SetOutputObjectDef(int index,ObjectDef new_def)762 absl::Status SetOutputObjectDef(int index, ObjectDef new_def) override {
763 if (index < 0 || index >= outputs_.size()) {
764 return absl::OutOfRangeError("Output index is out of range");
765 }
766 auto def = outputs_[index];
767 def.external_def.object_def = new_def;
768 if (!tie_factory_->IsSupported(def)) {
769 return absl::InvalidArgumentError(
770 "New output object definition is not supported.");
771 }
772 outputs_[index] = def;
773 return absl::OkStatus();
774 }
775
Build(std::unique_ptr<InferenceRunner> * runner)776 absl::Status Build(std::unique_ptr<InferenceRunner>* runner) override {
777 #ifdef CL_DELEGATE_ALLOW_GL
778 if (gl_interop_fabric_ && !HasGlObjects()) {
779 // destroy interop layer when there are no GL objects to avoid
780 // extra synchronization cost.
781 gl_interop_fabric_.reset(nullptr);
782 }
783 auto runner_impl = std::make_unique<InferenceRunnerImpl>(
784 environment_, std::move(context_), std::move(gl_interop_fabric_));
785 #else
786 auto runner_impl = std::make_unique<InferenceRunnerImpl>(
787 environment_, std::move(context_));
788 #endif
789 RETURN_IF_ERROR(
790 runner_impl->Initialize(inputs_, outputs_, tie_factory_.get()));
791 *runner = std::move(runner_impl);
792 return absl::OkStatus();
793 }
794
795 private:
796 // Links internal tensors with external user-facing objects.
LinkTensors(const std::vector<ValueId> & ids,AccessType access)797 std::vector<TensorTieDef> LinkTensors(const std::vector<ValueId>& ids,
798 AccessType access) {
799 std::vector<TensorTieDef> links;
800 links.reserve(ids.size());
801 for (const auto& id : ids) {
802 TensorObjectDef def = TensorToDef(*context_->GetTensor(id));
803 links.push_back({id, access, def, def});
804 }
805 return links;
806 }
807
HasGlObjects() const808 bool HasGlObjects() const {
809 #ifdef CL_DELEGATE_ALLOW_GL
810 auto is_gl = [](ObjectType t) {
811 return t == ObjectType::OPENGL_SSBO || t == ObjectType::OPENGL_TEXTURE;
812 };
813 for (const TensorTieDef& def : inputs_) {
814 if (is_gl(def.external_def.object_def.object_type)) {
815 return true;
816 }
817 }
818 for (const TensorTieDef& def : outputs_) {
819 if (is_gl(def.external_def.object_def.object_type)) {
820 return true;
821 }
822 }
823 #endif
824 return false;
825 }
826
GetExternalDefinitions(const std::vector<TensorTieDef> & links)827 static std::vector<TensorObjectDef> GetExternalDefinitions(
828 const std::vector<TensorTieDef>& links) {
829 std::vector<TensorObjectDef> defs;
830 defs.reserve(links.size());
831 for (auto& desc : links) {
832 defs.push_back(desc.external_def);
833 }
834 return defs;
835 }
836
837 std::unique_ptr<InferenceContext> context_;
838 #ifdef CL_DELEGATE_ALLOW_GL
839 std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
840 #endif
841 Environment* environment_;
842
843 std::vector<TensorTieDef> inputs_;
844 std::vector<TensorTieDef> outputs_;
845 std::unique_ptr<TensorTieFactory> tie_factory_;
846 };
847
848 class InferenceEnvironmentImpl : public InferenceEnvironment {
849 public:
InferenceEnvironmentImpl(const InferenceEnvironmentOptions & options)850 explicit InferenceEnvironmentImpl(const InferenceEnvironmentOptions& options)
851 : options_(options) {}
852
Init()853 absl::Status Init() {
854 RETURN_IF_ERROR(LoadOpenCL());
855 properties_.is_opencl_available = true;
856
857 CLDevice device;
858 if (options_.device) {
859 cl_platform_id platform;
860 RETURN_IF_ERROR(GetDeviceInfo<cl_platform_id>(
861 options_.device, CL_DEVICE_PLATFORM, &platform));
862 device = CLDevice(options_.device, platform);
863 } else {
864 RETURN_IF_ERROR(CreateDefaultGPUDevice(&device));
865 }
866
867 #ifdef CL_DELEGATE_ALLOW_GL
868 properties_.is_gl_sharing_supported = IsGlSharingSupported(device);
869 properties_.is_gl_to_cl_fast_sync_supported =
870 IsClEventFromEglSyncSupported(device);
871 properties_.is_cl_to_gl_fast_sync_supported =
872 IsEglSyncFromClEventSupported();
873 #endif
874
875 CLContext context;
876 if (options_.context) {
877 #ifdef CL_DELEGATE_ALLOW_GL
878 if (options_.IsGlAware()) {
879 return absl::InvalidArgumentError(
880 "OpenCL context and EGL parameters are set in the same time.");
881 }
882 #endif
883 context = CLContext(options_.context, /* has_ownership = */ false);
884 } else {
885 #ifdef CL_DELEGATE_ALLOW_GL
886 if (options_.IsGlAware() && properties_.is_gl_sharing_supported) {
887 RETURN_IF_ERROR(CreateCLGLContext(
888 device,
889 reinterpret_cast<cl_context_properties>(options_.egl_context),
890 reinterpret_cast<cl_context_properties>(options_.egl_display),
891 &context));
892 } else {
893 RETURN_IF_ERROR(CreateCLContext(device, &context));
894 }
895 #else
896 RETURN_IF_ERROR(CreateCLContext(device, &context));
897 #endif
898 }
899
900 CLCommandQueue queue;
901 if (options_.command_queue) {
902 queue =
903 CLCommandQueue(options_.command_queue, /* has_ownership = */ false);
904 } else {
905 RETURN_IF_ERROR(CreateCLCommandQueue(device, context, &queue));
906 }
907 // Profiling queue is used for workgroup size tuning.
908 ProfilingCommandQueue profiling_queue;
909 RETURN_IF_ERROR(
910 CreateProfilingCommandQueue(device, context, &profiling_queue));
911 environment_ = Environment(std::move(device), std::move(context),
912 std::move(queue), std::move(profiling_queue));
913 return environment_.Init();
914 }
915
BuildSerializedModel(const InferenceOptions & options,GraphFloat32 model,std::vector<uint8_t> * serialized_model)916 absl::Status BuildSerializedModel(
917 const InferenceOptions& options, GraphFloat32 model,
918 std::vector<uint8_t>* serialized_model) final {
919 if (!IsValid(options)) {
920 return absl::InvalidArgumentError("InferenceOptions are invalid.");
921 }
922 InferenceOptions resolved_options = options;
923 ResolveAutoPriority(&resolved_options);
924 if (environment_.program_cache() &&
925 !options_.serialized_binary_cache.empty()) {
926 // Ignore returned error. Cache is discarded.
927 environment_.program_cache()
928 ->AddSerializedCache(environment_.context(), environment_.device(),
929 options_.serialized_binary_cache)
930 .IgnoreError();
931 }
932
933 RETURN_IF_ERROR(RunGraphTransformsForGpuModel(&model));
934 InferenceContext context;
935 CreateGpuModelInfo create_info = GetCreateInfo(environment_, options);
936 RETURN_IF_ERROR(context.InitFromGraph(create_info, model, &environment_,
937 serialized_model));
938 return absl::OkStatus();
939 }
940
NewInferenceBuilder(const InferenceOptions & options,GraphFloat32 model,std::unique_ptr<InferenceBuilder> * builder)941 absl::Status NewInferenceBuilder(
942 const InferenceOptions& options, GraphFloat32 model,
943 std::unique_ptr<InferenceBuilder>* builder) final {
944 if (!IsValid(options)) {
945 return absl::InvalidArgumentError("InferenceOptions are invalid.");
946 }
947 InferenceOptions resolved_options = options;
948 ResolveAutoPriority(&resolved_options);
949 if (environment_.program_cache() &&
950 !options_.serialized_binary_cache.empty()) {
951 // Ignore returned error. Cache is discarded.
952 environment_.program_cache()
953 ->AddSerializedCache(environment_.context(), environment_.device(),
954 options_.serialized_binary_cache)
955 .IgnoreError();
956 }
957
958 RETURN_IF_ERROR(RunGraphTransformsForGpuModel(&model));
959 auto builder_impl = std::make_unique<InferenceBuilderImpl>(&environment_);
960 RETURN_IF_ERROR(
961 builder_impl->Initialize(resolved_options, options_, model));
962 *builder = std::move(builder_impl);
963 return absl::OkStatus();
964 }
965
NewInferenceBuilder(const absl::Span<const uint8_t> serialized_model,std::unique_ptr<InferenceBuilder> * builder)966 absl::Status NewInferenceBuilder(
967 const absl::Span<const uint8_t> serialized_model,
968 std::unique_ptr<InferenceBuilder>* builder) final {
969 if (environment_.program_cache() &&
970 !options_.serialized_binary_cache.empty()) {
971 // Ignore returned error. Cache is discarded.
972 environment_.program_cache()
973 ->AddSerializedCache(environment_.context(), environment_.device(),
974 options_.serialized_binary_cache)
975 .IgnoreError();
976 }
977
978 auto builder_impl = std::make_unique<InferenceBuilderImpl>(&environment_);
979 RETURN_IF_ERROR(builder_impl->Initialize(options_, serialized_model));
980 *builder = std::move(builder_impl);
981 return absl::OkStatus();
982 }
983
GetSerializedBinaryCache() const984 std::vector<uint8_t> GetSerializedBinaryCache() const final {
985 std::vector<uint8_t> data;
986 // Is there was a problem, data would be empty.
987 environment_.program_cache()
988 ->GetSerializedCache(environment_.device(), &data)
989 .IgnoreError();
990 return data;
991 }
992
properties() const993 const InferenceEnvironmentProperties& properties() const {
994 return properties_;
995 }
996
997 private:
998 const InferenceEnvironmentOptions options_;
999 Environment environment_;
1000 InferenceEnvironmentProperties properties_;
1001 };
1002
1003 } // namespace
1004
NewInferenceEnvironment(const InferenceEnvironmentOptions & options,std::unique_ptr<InferenceEnvironment> * environment,InferenceEnvironmentProperties * properties)1005 absl::Status NewInferenceEnvironment(
1006 const InferenceEnvironmentOptions& options,
1007 std::unique_ptr<InferenceEnvironment>* environment,
1008 InferenceEnvironmentProperties* properties) {
1009 auto env_impl = std::make_unique<InferenceEnvironmentImpl>(options);
1010 absl::Status status = env_impl->Init();
1011 if (properties) {
1012 *properties = env_impl->properties();
1013 }
1014 RETURN_IF_ERROR(status);
1015 *environment = std::move(env_impl);
1016 return absl::OkStatus();
1017 }
1018
1019 } // namespace cl
1020 } // namespace gpu
1021 } // namespace tflite
1022