xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/cl/api.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_API_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_API_H_
18 
19 #ifdef CL_DELEGATE_NO_GL
20 #define EGL_NO_PROTOTYPES
21 #endif
22 
23 #include <EGL/egl.h>
24 
25 #include <cstdint>
26 #include <memory>
27 
28 #include "absl/types/span.h"
29 #include "tensorflow/lite/delegates/gpu/api.h"
30 #include "tensorflow/lite/delegates/gpu/common/model.h"
31 #include "tensorflow/lite/delegates/gpu/common/status.h"
32 
33 // Usage example:
34 //
35 //   std::unique_ptr<InferenceEnvironment> env;
36 //   RETURN_IF_ERROR(NewInferenceEnvironment(option, &env));
37 //
38 //   InferenceOptions options;
39 //
40 //   std::unique_ptr<InferenceBuilder> builder;
41 //   RETURN_IF_ERROR(env->NewInferenceBuilder(options, model, &builder));
42 //   // now builder is ready to prepare inference runner.
43 //
44 // -----------------
45 // Supported formats
46 // -----------------
47 //
48 // OpenCL implementation uses 2D textures as the primary format.
49 // Tensor in HWDC4 layout is {TEXTURE_2D, RGBA, width := W*D, height := H}.
50 //
51 
52 namespace tflite {
53 namespace gpu {
54 namespace cl {
55 
56 struct InferenceOptions : public tflite::gpu::InferenceOptions {};
57 
58 // Indicates environment
59 struct InferenceEnvironmentProperties {
60   bool is_opencl_available = false;
61 
62   // GL objects (buffers and textures) could be shared with CL context.
63   bool is_gl_sharing_supported = false;
64 
65   // Indicates whether fast GL->CL synchronization is supported.
66   bool is_gl_to_cl_fast_sync_supported = false;
67 
68   // Indicates whether fast CL->GL synchronization is supported.
69   bool is_cl_to_gl_fast_sync_supported = false;
70 };
71 
72 // Environment manages all resources that need to stay until any inference is
73 // running using OpenCL backend.
74 class InferenceEnvironment {
75  public:
~InferenceEnvironment()76   virtual ~InferenceEnvironment() {}
77 
78   // Converts GraphFloat32 into intermediate, device-specific representation.
79   // This serialized_model specific for device and InferenceOptions.
80   // serialized_model cannot be used with another device or InferenceOptions.
81   // Loading serialized_model is much faster than loading GraphFloat32.
82   // serialized_model must be used with appropriate NewInferenceBuilder
83   // method (see below).
84   // Normally BuildSerializedModel method need to be called whenever a model or
85   // OS GPU driver is updated.
86   virtual absl::Status BuildSerializedModel(
87       const InferenceOptions& options, GraphFloat32 model,
88       std::vector<uint8_t>* serialized_model) = 0;
89 
90   // Serialized model can became invalid when environment changes. In this case
91   // this call will fail and model must be regenerated(with
92   // BuildSerializedModel).
93   virtual absl::Status NewInferenceBuilder(
94       const absl::Span<const uint8_t> serialized_model,
95       std::unique_ptr<InferenceBuilder>* builder) = 0;
96 
97   virtual absl::Status NewInferenceBuilder(
98       const InferenceOptions& options, GraphFloat32 model,
99       std::unique_ptr<InferenceBuilder>* builder) = 0;
100 
101   // Returns opaque binary blob that contains a collection of already compiled
102   // OpenCL kernels present in a cache. Returned data could be re-used later
103   // to speed up compilation time when new environment is created for the same
104   // set of models.
105   // Returned data is valid only if used on the same device, otherwise it will
106   // not be compatible and will be discarded.
107   virtual std::vector<uint8_t> GetSerializedBinaryCache() const = 0;
108 };
109 
110 struct InferenceEnvironmentOptions {
111   // If any of these objects are set, created environment will use them instead
112   // of creating/choosing own instances.
113   cl_device_id device = nullptr;
114   cl_context context = nullptr;
115   cl_command_queue command_queue = nullptr;
116 
117   // Whenever input and/or output is GL object, EGL display and context must be
118   // set to create GL aware OpenCL context. Do not set these variables whenever
119   // GL interoperability is not needed.
120   // It is the error to set egl_display, egl_context AND context at the same
121   // time. If egl_display and egl_context are set, they will be used to create
122   // GL-aware CL context.
123   EGLDisplay egl_display = EGL_NO_DISPLAY;
124   EGLContext egl_context = EGL_NO_CONTEXT;
125 
126   // Should contain data returned from
127   // InferenceEnvironment::GetSerializedBinaryCache method.
128   // Invalid or incompatible data will be discarded. Compiled binary may become
129   // incompatible when GPU driver is updated.
130   absl::Span<const uint8_t> serialized_binary_cache;
131 
IsGlAwareInferenceEnvironmentOptions132   bool IsGlAware() const {
133     return egl_context != EGL_NO_CONTEXT && egl_display != EGL_NO_DISPLAY;
134   }
135 };
136 
137 // Creates new OpenCL environment that needs to stay around until all inference
138 // runners are destroyed.
139 absl::Status NewInferenceEnvironment(
140     const InferenceEnvironmentOptions& options,
141     std::unique_ptr<InferenceEnvironment>* environment,
142     InferenceEnvironmentProperties* properties /* optional */);
143 
144 class CLInferenceRunner : public ::tflite::gpu::InferenceRunner {
145  public:
146   // The RunWithoutExternalBufferCopy provides a contract where the user of this
147   // interface does not need
148   //    a. Inputs to be copied to the internal GPU buffer from the external CPU
149   //       input buffer
150   //    b. Outputs to be copied from the internal GPU buffer to the
151   //       external CPU buffer
152   //
153   // The user of this interface is responsible for copying the inputs prior to
154   // running the GPU kernels and outputs post running with the other interfaces
155   // provided here.
156   virtual absl::Status RunWithoutExternalBufferCopy() = 0;
157 
158   // Copies from the external input tensor (normally CPU buffer) to the internal
159   // OpenCL buffer.  The call only guarantees a queueing of the command. The
160   // caller is expected to hold a copy of the queue and wait for completion if
161   // the external buffer is a CPU buffer.
162   virtual absl::Status CopyFromExternalInput(int index) = 0;
163 
164   // Copies from the internal output OpenCL buffer to the external output
165   // tensor.  The call only guarantees a queueing of the command. The caller
166   // is expected to hold a copy of the queue and wait for completion if the
167   // external buffer is a CPU buffer.
168   virtual absl::Status CopyToExternalOutput(int index) = 0;
169 };
170 
171 }  // namespace cl
172 }  // namespace gpu
173 }  // namespace tflite
174 
175 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_API_H_
176