xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/api.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_API_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_API_H_
18 
19 // Usage example:
20 //
21 //   // Builder is created from a model using GPU-specific parameters.
22 //   std::unique_ptr<InferenceBuilder> builder = ...;
23 //
24 //   // input data is coming from a texture
25 //   // output data goes to CPU
26 //   builder->SetInputObjectDef(0, {DataType::FLOAT16, DataLayout::PHWC4,
27 //                                  ObjectType::OPENGL_TEXTURE, true});
28 //   builder->SetOutputObjectDef(0, {DataType::FLOAT32, DataLayout::BHWC,
29 //                                  ObjectType::CPU_MEMORY, false});
30 //   std::unique_ptr<InferenceRunner> runner;
31 //   RETURN_IF_ERROR(builder->Build(&runner));  // may take significant time.
32 //   RETURN_IF_ERROR(
33 //       runner->SetInputObject(0, OpenGlTexture{texture_ud, texture_format}));
34 //   RETURN_IF_ERROR(runner->Run());
35 
36 #include <cstdint>
37 #include <memory>
38 #include <variant>
39 #include <vector>
40 
41 #include "absl/types/span.h"
42 #include "absl/types/variant.h"
43 #include <CL/cl.h>
44 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
45 #include "tensorflow/lite/delegates/gpu/common/status.h"
46 #include "tensorflow/lite/delegates/gpu/common/util.h"
47 #include "vulkan/vulkan.h"  // from @vulkan_headers
48 
49 #define GL_NO_PROTOTYPES
50 #define EGL_NO_PROTOTYPES
51 #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
52 #undef GL_NO_PROTOTYPES
53 #undef EGL_NO_PROTOTYPES
54 
55 namespace tflite {
56 namespace gpu {
57 
58 // Common abbreviations:
59 //   B  - batch
60 //   H  - height
61 //   W  - width
62 //   C  - channels
63 //   D  - depth := DivideRoundUp(C, 4)
64 //   C4 - is the constant = 4.
65 enum class DataLayout {
66   UNKNOWN,
67   BHWC,
68   DHWC4,
69   HWDC4,
70   HDWC4,
71 };
72 
73 enum class ObjectType {
74   UNKNOWN,
75   OPENGL_SSBO,
76   OPENGL_TEXTURE,
77   CPU_MEMORY,
78   OPENCL_TEXTURE,
79   OPENCL_BUFFER,
80   VULKAN_BUFFER,
81   VULKAN_TEXTURE
82 };
83 
84 struct OpenGlBuffer {
85   OpenGlBuffer() = default;
OpenGlBufferOpenGlBuffer86   explicit OpenGlBuffer(GLuint new_id) : id(new_id) {}
87 
88   GLuint id = GL_INVALID_INDEX;
89 };
90 
91 struct OpenGlTexture {
92   OpenGlTexture() = default;
OpenGlTextureOpenGlTexture93   OpenGlTexture(GLuint new_id, GLenum new_format)
94       : id(new_id), format(new_format) {}
95 
96   GLuint id = GL_INVALID_INDEX;
97   GLenum format = GL_INVALID_ENUM;
98 };
99 
100 struct OpenClBuffer {
101   OpenClBuffer() = default;
OpenClBufferOpenClBuffer102   explicit OpenClBuffer(cl_mem new_memobj) : memobj(new_memobj) {}
103 
104   cl_mem memobj = nullptr;
105 };
106 
107 struct OpenClTexture {
108   OpenClTexture() = default;
OpenClTextureOpenClTexture109   explicit OpenClTexture(cl_mem new_memobj) : memobj(new_memobj) {}
110 
111   cl_mem memobj = nullptr;
112   // TODO(akulik): should it specify texture format?
113 };
114 
115 struct VulkanBuffer {
116   VulkanBuffer() = default;
VulkanBufferVulkanBuffer117   explicit VulkanBuffer(VkBuffer buffer_, VkDeviceSize size_,
118                         VkDeviceMemory memory_, VkDeviceSize offset_)
119       : buffer(buffer_), size(size_), memory(memory_), offset(offset_) {}
120 
121   VkBuffer buffer;
122   VkDeviceSize size;
123   VkDeviceMemory memory;
124   VkDeviceSize offset;
125 };
126 
127 struct VulkanTexture {
128   VulkanTexture() = default;
VulkanTextureVulkanTexture129   explicit VulkanTexture(VkDeviceMemory new_memory) : memory(new_memory) {}
130 
131   VkImage image;
132   VkImageView image_view;
133   VkFormat format;
134   VkExtent3D extent;
135   VkDeviceMemory memory;
136   VkDeviceSize offset;
137 };
138 
139 struct VulkanMemory {
140   VulkanMemory() = default;
VulkanMemoryVulkanMemory141   explicit VulkanMemory(VkDeviceMemory new_memory) : memory(new_memory) {}
142 
143   VkDeviceMemory memory;
144   VkDeviceSize size;
145   VkDeviceSize offset;
146 };
147 
148 struct CpuMemory {
149   CpuMemory() = default;
CpuMemoryCpuMemory150   CpuMemory(void* new_data, size_t new_size_bytes)
151       : data(new_data), size_bytes(new_size_bytes) {}
152 
153   void* data = nullptr;
154   size_t size_bytes = 0;
155 };
156 
157 template <typename T>
MakeCpuMemory(absl::Span<T> t)158 inline CpuMemory MakeCpuMemory(absl::Span<T> t) {
159   CpuMemory m;
160   m.data = t.data();
161   m.size_bytes = t.size() * sizeof(T);
162   return m;
163 }
164 
165 template <typename T>
MakeReadableCpuMemory(absl::Span<const T> t)166 inline CpuMemory MakeReadableCpuMemory(absl::Span<const T> t) {
167   CpuMemory m;
168   m.data = const_cast<T*>(t.data());
169   m.size_bytes = t.size() * sizeof(T);
170   return m;
171 }
172 
173 // Defines object representation.
174 struct ObjectDef {
175   DataType data_type = DataType::UNKNOWN;
176   DataLayout data_layout = DataLayout::UNKNOWN;
177   ObjectType object_type = ObjectType::UNKNOWN;
178 
179   // If true, then object is managed externally and needs to be provided to
180   // InferenceRunner by a user before running inference.
181   //
182   // User-provided objects will not be re-used internally for any purpose to
183   // lower overall memory usage.
184   bool user_provided = false;
185 
186   bool operator==(const ObjectDef& other) const {
187     return data_type == other.data_type && data_layout == other.data_layout &&
188            object_type == other.object_type &&
189            user_provided == other.user_provided;
190   }
191 };
192 
193 bool IsValid(const ObjectDef& def);
194 
195 struct Dimensions {
DimensionsDimensions196   Dimensions() : b(1), h(1), w(1), c(1) {}
197 
DimensionsDimensions198   Dimensions(int32_t batch, int32_t height, int32_t width, int32_t channels)
199       : b(batch), h(height), w(width), c(channels) {}
200 
dDimensions201   int32_t d() const { return DivideRoundUp(c, 4); }
202 
productDimensions203   int32_t product() const { return b * h * w * c; }
204 
205   bool operator==(const Dimensions& other) const {
206     return b == other.b && h == other.h && w == other.w && c == other.c;
207   }
208 
209   int32_t b;
210   int32_t h;
211   int32_t w;
212   int32_t c;
213 };
214 
215 // Connects tensor shape with corresponding object definition.
216 struct TensorObjectDef {
217   // Dimensions semantic is defined by corresponding DataLayout.
218   Dimensions dimensions;
219   ObjectDef object_def;
220 
221   bool operator==(const TensorObjectDef& other) const {
222     return dimensions == other.dimensions && object_def == other.object_def;
223   }
224 };
225 
226 // @return true if tensor object def is defined.
227 bool IsValid(const TensorObjectDef& def);
228 
229 // @return the number of elements in a tensor object.
230 uint32_t NumElements(const TensorObjectDef& def);
231 
232 using TensorObject =
233     std::variant<std::monostate, OpenGlBuffer, OpenGlTexture, CpuMemory,
234                  OpenClBuffer, OpenClTexture, VulkanBuffer, VulkanTexture>;
235 
236 // @return true if object is set and corresponding values are defined.
237 bool IsValid(const TensorObjectDef& def, const TensorObject& object);
238 
239 ObjectType GetType(const TensorObject& object);
240 
241 // @return true if corresponding object is set for the given type
242 bool IsObjectPresent(ObjectType type, const TensorObject& obj);
243 
244 // @return true if corresponding object has already been initialized and
245 // assigned with a specific ObjectType.
246 bool IsObjectInitialized(const TensorObject& obj);
247 
248 class InferenceRunner;
249 
250 // Allows to inspect and change input and output definitions before a graph is
251 // prepared for the inference.
252 class InferenceBuilder {
253  public:
~InferenceBuilder()254   virtual ~InferenceBuilder() {}
255 
256   // Returns inference graph inputs and outputs definitions.
257   virtual std::vector<TensorObjectDef> inputs() const = 0;
258   virtual std::vector<TensorObjectDef> outputs() const = 0;
259 
260   // Sets new shape for the input if underlying implementation and graph
261   // structure allows dynamic tensors.
262   virtual absl::Status SetInputShape(int index,
263                                      const Dimensions& dimensions) = 0;
264 
265   // Updates object definitions for the given index. Implementation may allow
266   // to use different layouts and/or data type conversions between objects
267   // defined in a graph and given objects, for example:
268   //   input '0' is DataType::FLOAT32, DataLayout::BHWC.
269   //   A user, however, has an input in DataType::FLOAT16, DataLayout::PHWC4.
270   //   An implementation may allow this transformation to happen automatically
271   //   under the hood.
272   virtual absl::Status SetInputObjectDef(int index, ObjectDef def) = 0;
273   virtual absl::Status SetOutputObjectDef(int index, ObjectDef def) = 0;
SetAllInputObjectDefsTo(ObjectDef def)274   virtual absl::Status SetAllInputObjectDefsTo(ObjectDef def) {
275     auto input_defs = inputs();
276     for (int i = 0; i < input_defs.size(); ++i) {
277       RETURN_IF_ERROR(SetInputObjectDef(i, def));
278     }
279     return absl::OkStatus();
280   }
SetAllOutputObjectDefsTo(ObjectDef def)281   virtual absl::Status SetAllOutputObjectDefsTo(ObjectDef def) {
282     auto output_defs = outputs();
283     for (int i = 0; i < output_defs.size(); ++i) {
284       RETURN_IF_ERROR(SetOutputObjectDef(i, def));
285     }
286     return absl::OkStatus();
287   }
288 
289   // Creates new instance of the inference runner. InferenceBuilder stays valid
290   // and could be used to create another inference runner if needed.
291   //
292   // This method may take significant time to prepare new inference runner. For
293   // example, it may require to compile OpenGL shaders.
294   virtual absl::Status Build(std::unique_ptr<InferenceRunner>* runner) = 0;
295 };
296 
297 // Runs prepared inference. Every object marked as external needs to be set
298 // prior calling Run method.
299 class InferenceRunner {
300  public:
~InferenceRunner()301   virtual ~InferenceRunner() {}
302 
303   // Returns inference graph inputs and outputs definitions.
304   virtual std::vector<TensorObjectDef> inputs() const = 0;
305   virtual std::vector<TensorObjectDef> outputs() const = 0;
306 
307   // Getters provide access to underlying objects for the given index.
308   // Setters allow to set or change external object for the given index. Note,
309   // object need to match object definition set before in InferenceBuilder.
310 
311   virtual absl::Status GetInputObject(int index, TensorObject* object) = 0;
312   virtual absl::Status GetOutputObject(int index, TensorObject* object) = 0;
313   virtual absl::Status SetInputObject(int index, TensorObject object) = 0;
314   virtual absl::Status SetOutputObject(int index, TensorObject object) = 0;
315 
316   virtual absl::Status Run() = 0;
317 };
318 
319 // Encapsulated compilation/runtime tradeoffs.
320 enum class InferenceUsage {
321   UNKNOWN,
322 
323   // InferenceRunner will be used only once. Therefore, it is important to
324   // minimize bootstrap time as well.
325   FAST_SINGLE_ANSWER,
326 
327   // Prefer maximizing the throughput. Same inference runner will be used
328   // repeatedly on different inputs.
329   SUSTAINED_SPEED,
330 };
331 
332 // Defines aspects to control while instantiating a runner.
333 enum class InferencePriority {
334   UNKNOWN,
335 
336   AUTO,
337 
338   MIN_LATENCY,
339 
340   MAX_PRECISION,
341 
342   MIN_MEMORY_USAGE,
343 };
344 
345 struct InferenceOptions {
346   InferenceUsage usage = InferenceUsage::SUSTAINED_SPEED;
347 
348   // Ordered priorities provide better understanding of desired semantics,
349   // where priority(n) is more important than priority(n+1).
350   // AUTO priority is needed when a single priority is the most important
351   // factor. For example, priority1 = InferencePriority::MIN_LATENCY and leaving
352   // everything else to AUTO would result in configuration that achieves maximum
353   // performance.
354   //
355   // AUTO priority can only be used when higher priorities are fully specified.
356   // For example:
357   //   VALID:   priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO
358   //   VALID:   priority1 = MIN_LATENCY, priority2 = MAX_PRECISION,
359   //            priority3 = AUTO
360   //   INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO
361   //   INVALID: priority1 = MIN_LATENCY, priority2 = AUTO,
362   //            priority3 = MAX_PRECISION
363   // Invalid priorities will result in error.
364   InferencePriority priority1 = InferencePriority::MAX_PRECISION;
365 
366   InferencePriority priority2 = InferencePriority::AUTO;
367 
368   InferencePriority priority3 = InferencePriority::AUTO;
369 };
370 
371 // Returns a position number for the priority. If priority is missing,
372 // then it would return 'max num priorities + 1'.
373 int GetPosition(const InferenceOptions& options, InferencePriority p);
374 
375 // Return true if options are valid.
376 bool IsValid(const InferenceOptions& options);
377 
378 // Resolves AUTO priorities and specifies them explicitly.
379 // Note, no-one should assume that these mappings will not change.
380 // Technically this function is declared here for code re-use purposes and
381 // by no means it should be treated as canonical way to resolve AUTO.
382 void ResolveAutoPriority(InferenceOptions* options);
383 
384 enum class PriorityImportance {
385   UNKNOWN,
386   HIGHER,
387   LOWER,
388 };
389 
390 // If both p1 and p2 are not present in options, return UNKNOWN
391 // If p1 is present, but p2 is not, return HIGHER
392 // If p2 is present, but p1 is not, return LOWER
393 // If both are present, and p1 is more important, return HIGHER, otherwise,
394 // LOWER.
395 PriorityImportance GetRelativeImportance(const InferenceOptions& options,
396                                          InferencePriority p1,
397                                          InferencePriority p2);
398 
399 }  // namespace gpu
400 }  // namespace tflite
401 
402 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_API_H_
403