xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/cl/gl_interop.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
17 
18 #include <algorithm>
19 #include <variant>
20 
21 #include "absl/strings/str_cat.h"
22 #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
23 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
24 #include "tensorflow/lite/delegates/gpu/gl/gl_sync.h"
25 
26 namespace tflite {
27 namespace gpu {
28 namespace cl {
29 namespace {
30 
31 #ifndef EGL_VERSION_1_5
32 typedef void* EGLSync;
33 #define EGL_SYNC_CL_EVENT 0x30FE
34 #define EGL_CL_EVENT_HANDLE 0x309C
35 #define EGL_NO_SYNC 0
36 #endif /* EGL_VERSION_1_5 */
37 
38 // TODO(b/131897059): replace with 64 version when EGL 1.5 is available.
39 // it should use KHR_cl_event2 extension. More details are in b/129974818.
40 using PFNEGLCREATESYNCPROC = EGLSync(EGLAPIENTRYP)(
41     EGLDisplay dpy, EGLenum type, const EGLAttrib* attrib_list);
42 
43 PFNEGLCREATESYNCPROC g_eglCreateSync = nullptr;
44 
45 }  // namespace
46 
CreateEglSyncFromClEvent(cl_event event,EGLDisplay display,EglSync * sync)47 absl::Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
48                                       EglSync* sync) {
49   if (!IsEglSyncFromClEventSupported()) {
50     return absl::UnimplementedError(
51         "CreateEglSyncFromClEvent is not supported");
52   }
53   EGLSync egl_sync;
54   const EGLAttrib attributes[] = {EGL_CL_EVENT_HANDLE,
55                                   reinterpret_cast<EGLAttrib>(event), EGL_NONE};
56   RETURN_IF_ERROR(TFLITE_GPU_CALL_EGL(g_eglCreateSync, &egl_sync, display,
57                                       EGL_SYNC_CL_EVENT, attributes));
58   if (egl_sync == EGL_NO_SYNC) {
59     return absl::InternalError("Returned empty EGL sync");
60   }
61   *sync = EglSync(display, egl_sync);
62   return absl::OkStatus();
63 }
64 
IsEglSyncFromClEventSupported()65 bool IsEglSyncFromClEventSupported() {
66   // In C++11, static initializers are guaranteed to be evaluated only once.
67   static bool supported = []() -> bool {
68     // This function requires EGL 1.5 to work
69     g_eglCreateSync = reinterpret_cast<PFNEGLCREATESYNCPROC>(
70         eglGetProcAddress("eglCreateSync"));
71     // eglQueryString accepts EGL_NO_DISPLAY only starting EGL 1.5
72     if (!eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS)) {
73       g_eglCreateSync = nullptr;
74     }
75     return (g_eglCreateSync != nullptr);
76   }();
77   return supported;
78 }
79 
CreateClEventFromEglSync(cl_context context,const EglSync & egl_sync,CLEvent * event)80 absl::Status CreateClEventFromEglSync(cl_context context,
81                                       const EglSync& egl_sync, CLEvent* event) {
82   cl_int error_code;
83   cl_event new_event = clCreateEventFromEGLSyncKHR(
84       context, egl_sync.sync(), egl_sync.display(), &error_code);
85   if (error_code != CL_SUCCESS) {
86     return absl::InternalError(
87         absl::StrCat("Unable to create CL sync from EGL sync. ",
88                      CLErrorCodeToString(error_code)));
89   }
90   *event = CLEvent(new_event);
91   return absl::OkStatus();
92 }
93 
IsClEventFromEglSyncSupported(const CLDevice & device)94 bool IsClEventFromEglSyncSupported(const CLDevice& device) {
95   return device.GetInfo().SupportsExtension("cl_khr_egl_event");
96 }
97 
CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,AccessType access_type,CLContext * context,CLMemory * memory)98 absl::Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,
99                                         AccessType access_type,
100                                         CLContext* context, CLMemory* memory) {
101   cl_int error_code;
102   auto mem = clCreateFromGLBuffer(context->context(), ToClMemFlags(access_type),
103                                   gl_ssbo_id, &error_code);
104   if (error_code != CL_SUCCESS) {
105     return absl::InternalError(
106         absl::StrCat("Unable to acquire CL buffer from GL buffer. ",
107                      CLErrorCodeToString(error_code)));
108   }
109   *memory = CLMemory(mem, true);
110   return absl::OkStatus();
111 }
112 
CreateClMemoryFromGlTexture(GLenum texture_target,GLuint texture_id,AccessType access_type,CLContext * context,CLMemory * memory)113 absl::Status CreateClMemoryFromGlTexture(GLenum texture_target,
114                                          GLuint texture_id,
115                                          AccessType access_type,
116                                          CLContext* context, CLMemory* memory) {
117   cl_int error_code;
118   auto mem =
119       clCreateFromGLTexture(context->context(), ToClMemFlags(access_type),
120                             texture_target, 0, texture_id, &error_code);
121   if (error_code != CL_SUCCESS) {
122     return absl::InternalError(
123         absl::StrCat("Unable to create CL buffer from GL texture. ",
124                      CLErrorCodeToString(error_code)));
125   }
126   *memory = CLMemory(mem, true);
127   return absl::OkStatus();
128 }
129 
IsGlSharingSupported(const CLDevice & device)130 bool IsGlSharingSupported(const CLDevice& device) {
131   return clCreateFromGLBuffer && clCreateFromGLTexture &&
132          device.GetInfo().SupportsExtension("cl_khr_gl_sharing");
133 }
134 
~AcquiredGlObjects()135 AcquiredGlObjects::~AcquiredGlObjects() { Release({}, nullptr).IgnoreError(); }
136 
Acquire(const std::vector<cl_mem> & memory,cl_command_queue queue,const std::vector<cl_event> & wait_events,CLEvent * acquire_event,AcquiredGlObjects * objects)137 absl::Status AcquiredGlObjects::Acquire(
138     const std::vector<cl_mem>& memory, cl_command_queue queue,
139     const std::vector<cl_event>& wait_events, CLEvent* acquire_event,
140     AcquiredGlObjects* objects) {
141   if (!memory.empty()) {
142     cl_event new_event;
143     cl_int error_code = clEnqueueAcquireGLObjects(
144         queue, memory.size(), memory.data(), wait_events.size(),
145         wait_events.data(), acquire_event ? &new_event : nullptr);
146     if (error_code != CL_SUCCESS) {
147       return absl::InternalError(absl::StrCat("Unable to acquire GL object. ",
148                                               CLErrorCodeToString(error_code)));
149     }
150     if (acquire_event) {
151       *acquire_event = CLEvent(new_event);
152     }
153     clFlush(queue);
154   }
155   *objects = AcquiredGlObjects(memory, queue);
156   return absl::OkStatus();
157 }
158 
Release(const std::vector<cl_event> & wait_events,CLEvent * release_event)159 absl::Status AcquiredGlObjects::Release(
160     const std::vector<cl_event>& wait_events, CLEvent* release_event) {
161   if (queue_ && !memory_.empty()) {
162     cl_event new_event;
163     cl_int error_code = clEnqueueReleaseGLObjects(
164         queue_, memory_.size(), memory_.data(), wait_events.size(),
165         wait_events.data(), release_event ? &new_event : nullptr);
166     if (error_code != CL_SUCCESS) {
167       return absl::InternalError(absl::StrCat("Unable to release GL object. ",
168                                               CLErrorCodeToString(error_code)));
169     }
170     if (release_event) {
171       *release_event = CLEvent(new_event);
172     }
173     clFlush(queue_);
174     queue_ = nullptr;
175   }
176   return absl::OkStatus();
177 }
178 
GlInteropFabric(EGLDisplay egl_display,Environment * environment)179 GlInteropFabric::GlInteropFabric(EGLDisplay egl_display,
180                                  Environment* environment)
181     : is_egl_sync_supported_(true),
182       is_egl_to_cl_mapping_supported_(
183           IsClEventFromEglSyncSupported(environment->device())),
184       is_cl_to_egl_mapping_supported_(IsEglSyncFromClEventSupported()),
185       egl_display_(egl_display),
186       context_(environment->context().context()),
187       queue_(environment->queue()->queue()) {}
188 
RegisterMemory(cl_mem memory)189 void GlInteropFabric::RegisterMemory(cl_mem memory) {
190   memory_.push_back(memory);
191 }
192 
UnregisterMemory(cl_mem memory)193 void GlInteropFabric::UnregisterMemory(cl_mem memory) {
194   auto it = std::find(memory_.begin(), memory_.end(), memory);
195   if (it != memory_.end()) {
196     memory_.erase(it);
197   }
198 }
199 
Start()200 absl::Status GlInteropFabric::Start() {
201   if (!is_enabled()) {
202     return absl::OkStatus();
203   }
204 
205   // In GL-CL interoperability, we need to make sure GL finished processing of
206   // all commands that might affect GL objects. There are a few ways:
207   //   a) glFinish
208   //      slow, but portable
209   //   b) EglSync + ClientWait
210   //      faster alternative for glFinish, but still slow as it stalls GPU
211   //      pipeline.
212   //   c) EglSync->CLEvent or GlSync->CLEvent mapping
213   //      Fast, as it allows to map sync to CL event and use it as a dependency
214   //      later without stalling GPU pipeline.
215   CLEvent inbound_event;
216   std::vector<cl_event> inbound_events;
217   if (is_egl_sync_supported_) {
218     EglSync sync;
219     RETURN_IF_ERROR(EglSync::NewFence(egl_display_, &sync));
220     if (is_egl_to_cl_mapping_supported_) {
221       // (c) EglSync->CLEvent or GlSync->CLEvent mapping
222       glFlush();
223       RETURN_IF_ERROR(CreateClEventFromEglSync(context_, sync, &inbound_event));
224       inbound_events.push_back(inbound_event.event());
225     } else {
226       // (b) EglSync + ClientWait
227       RETURN_IF_ERROR(sync.ClientWait());
228     }
229   } else {
230     // (a) glFinish / GL fence sync
231     RETURN_IF_ERROR(gl::GlActiveSyncWait());
232   }
233 
234   // Acquire all GL objects needed while processing.
235   return AcquiredGlObjects::Acquire(memory_, queue_, inbound_events, nullptr,
236                                     &gl_objects_);
237 }
238 
Finish()239 absl::Status GlInteropFabric::Finish() {
240   if (!is_enabled()) {
241     return absl::OkStatus();
242   }
243   CLEvent outbound_event;
244   RETURN_IF_ERROR(gl_objects_.Release({}, &outbound_event));
245 
246   // if (is_egl_sync_supported_ && is_cl_to_egl_mapping_supported_) {
247   //   EglSync egl_outbound_sync;
248   //   RETURN_IF_ERROR(CreateEglSyncFromClEvent(outbound_event.event(),
249   //                                            egl_display_,
250   //                                            &egl_outbound_sync));
251   //   // Instruct GL pipeline to wait until corresponding CL event is signaled.
252   //   RETURN_IF_ERROR(egl_outbound_sync.ServerWait());
253   //   glFlush();
254   // } else {
255   //   // Slower option if proper sync is not supported. It is equivalent to
256   //   // clFinish, but, hopefully, faster.
257   //   outbound_event.Wait();
258   // }
259 
260   // This slow sync is the only working solution right now. We have to debug why
261   // above version is not working fast and reliable.
262   outbound_event.Wait();
263   return absl::OkStatus();
264 }
265 
GlClBufferCopier(const TensorObjectDef & input_def,const TensorObjectDef & output_def,Environment * environment)266 GlClBufferCopier::GlClBufferCopier(const TensorObjectDef& input_def,
267                                    const TensorObjectDef& output_def,
268                                    Environment* environment) {
269   queue_ = environment->queue();
270   size_in_bytes_ =
271       NumElements(input_def) * SizeOf(input_def.object_def.data_type);
272 }
273 
Convert(const TensorObject & input_obj,const TensorObject & output_obj)274 absl::Status GlClBufferCopier::Convert(const TensorObject& input_obj,
275                                        const TensorObject& output_obj) {
276   if (std::holds_alternative<OpenGlBuffer>(input_obj)) {
277     auto ssbo = std::get_if<OpenGlBuffer>(&input_obj);
278     auto cl_mem = std::get_if<OpenClBuffer>(&output_obj);
279     RETURN_IF_ERROR(
280         TFLITE_GPU_CALL_GL(glBindBuffer, GL_SHADER_STORAGE_BUFFER, ssbo->id));
281     void* ptr;
282     RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glMapBufferRange, &ptr,
283                                        GL_SHADER_STORAGE_BUFFER, 0,
284                                        size_in_bytes_, GL_MAP_READ_BIT));
285     RETURN_IF_ERROR(
286         queue_->EnqueueWriteBuffer(cl_mem->memobj, size_in_bytes_, ptr));
287     RETURN_IF_ERROR(
288         TFLITE_GPU_CALL_GL(glUnmapBuffer, GL_SHADER_STORAGE_BUFFER));
289   } else {
290     auto cl_mem = std::get_if<OpenClBuffer>(&input_obj);
291     auto ssbo = std::get_if<OpenGlBuffer>(&output_obj);
292     RETURN_IF_ERROR(
293         TFLITE_GPU_CALL_GL(glBindBuffer, GL_SHADER_STORAGE_BUFFER, ssbo->id));
294     void* ptr;
295     RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glMapBufferRange, &ptr,
296                                        GL_SHADER_STORAGE_BUFFER, 0,
297                                        size_in_bytes_, GL_MAP_WRITE_BIT));
298     RETURN_IF_ERROR(
299         queue_->EnqueueReadBuffer(cl_mem->memobj, size_in_bytes_, ptr));
300     RETURN_IF_ERROR(
301         TFLITE_GPU_CALL_GL(glUnmapBuffer, GL_SHADER_STORAGE_BUFFER));
302   }
303   return absl::OkStatus();
304 }
305 
306 }  // namespace cl
307 }  // namespace gpu
308 }  // namespace tflite
309