1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
17
18 #include <algorithm>
19 #include <variant>
20
21 #include "absl/strings/str_cat.h"
22 #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
23 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
24 #include "tensorflow/lite/delegates/gpu/gl/gl_sync.h"
25
26 namespace tflite {
27 namespace gpu {
28 namespace cl {
29 namespace {
30
31 #ifndef EGL_VERSION_1_5
32 typedef void* EGLSync;
33 #define EGL_SYNC_CL_EVENT 0x30FE
34 #define EGL_CL_EVENT_HANDLE 0x309C
35 #define EGL_NO_SYNC 0
36 #endif /* EGL_VERSION_1_5 */
37
38 // TODO(b/131897059): replace with 64 version when EGL 1.5 is available.
39 // it should use KHR_cl_event2 extension. More details are in b/129974818.
40 using PFNEGLCREATESYNCPROC = EGLSync(EGLAPIENTRYP)(
41 EGLDisplay dpy, EGLenum type, const EGLAttrib* attrib_list);
42
43 PFNEGLCREATESYNCPROC g_eglCreateSync = nullptr;
44
45 } // namespace
46
CreateEglSyncFromClEvent(cl_event event,EGLDisplay display,EglSync * sync)47 absl::Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
48 EglSync* sync) {
49 if (!IsEglSyncFromClEventSupported()) {
50 return absl::UnimplementedError(
51 "CreateEglSyncFromClEvent is not supported");
52 }
53 EGLSync egl_sync;
54 const EGLAttrib attributes[] = {EGL_CL_EVENT_HANDLE,
55 reinterpret_cast<EGLAttrib>(event), EGL_NONE};
56 RETURN_IF_ERROR(TFLITE_GPU_CALL_EGL(g_eglCreateSync, &egl_sync, display,
57 EGL_SYNC_CL_EVENT, attributes));
58 if (egl_sync == EGL_NO_SYNC) {
59 return absl::InternalError("Returned empty EGL sync");
60 }
61 *sync = EglSync(display, egl_sync);
62 return absl::OkStatus();
63 }
64
IsEglSyncFromClEventSupported()65 bool IsEglSyncFromClEventSupported() {
66 // In C++11, static initializers are guaranteed to be evaluated only once.
67 static bool supported = []() -> bool {
68 // This function requires EGL 1.5 to work
69 g_eglCreateSync = reinterpret_cast<PFNEGLCREATESYNCPROC>(
70 eglGetProcAddress("eglCreateSync"));
71 // eglQueryString accepts EGL_NO_DISPLAY only starting EGL 1.5
72 if (!eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS)) {
73 g_eglCreateSync = nullptr;
74 }
75 return (g_eglCreateSync != nullptr);
76 }();
77 return supported;
78 }
79
CreateClEventFromEglSync(cl_context context,const EglSync & egl_sync,CLEvent * event)80 absl::Status CreateClEventFromEglSync(cl_context context,
81 const EglSync& egl_sync, CLEvent* event) {
82 cl_int error_code;
83 cl_event new_event = clCreateEventFromEGLSyncKHR(
84 context, egl_sync.sync(), egl_sync.display(), &error_code);
85 if (error_code != CL_SUCCESS) {
86 return absl::InternalError(
87 absl::StrCat("Unable to create CL sync from EGL sync. ",
88 CLErrorCodeToString(error_code)));
89 }
90 *event = CLEvent(new_event);
91 return absl::OkStatus();
92 }
93
IsClEventFromEglSyncSupported(const CLDevice & device)94 bool IsClEventFromEglSyncSupported(const CLDevice& device) {
95 return device.GetInfo().SupportsExtension("cl_khr_egl_event");
96 }
97
CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,AccessType access_type,CLContext * context,CLMemory * memory)98 absl::Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,
99 AccessType access_type,
100 CLContext* context, CLMemory* memory) {
101 cl_int error_code;
102 auto mem = clCreateFromGLBuffer(context->context(), ToClMemFlags(access_type),
103 gl_ssbo_id, &error_code);
104 if (error_code != CL_SUCCESS) {
105 return absl::InternalError(
106 absl::StrCat("Unable to acquire CL buffer from GL buffer. ",
107 CLErrorCodeToString(error_code)));
108 }
109 *memory = CLMemory(mem, true);
110 return absl::OkStatus();
111 }
112
CreateClMemoryFromGlTexture(GLenum texture_target,GLuint texture_id,AccessType access_type,CLContext * context,CLMemory * memory)113 absl::Status CreateClMemoryFromGlTexture(GLenum texture_target,
114 GLuint texture_id,
115 AccessType access_type,
116 CLContext* context, CLMemory* memory) {
117 cl_int error_code;
118 auto mem =
119 clCreateFromGLTexture(context->context(), ToClMemFlags(access_type),
120 texture_target, 0, texture_id, &error_code);
121 if (error_code != CL_SUCCESS) {
122 return absl::InternalError(
123 absl::StrCat("Unable to create CL buffer from GL texture. ",
124 CLErrorCodeToString(error_code)));
125 }
126 *memory = CLMemory(mem, true);
127 return absl::OkStatus();
128 }
129
IsGlSharingSupported(const CLDevice & device)130 bool IsGlSharingSupported(const CLDevice& device) {
131 return clCreateFromGLBuffer && clCreateFromGLTexture &&
132 device.GetInfo().SupportsExtension("cl_khr_gl_sharing");
133 }
134
~AcquiredGlObjects()135 AcquiredGlObjects::~AcquiredGlObjects() { Release({}, nullptr).IgnoreError(); }
136
Acquire(const std::vector<cl_mem> & memory,cl_command_queue queue,const std::vector<cl_event> & wait_events,CLEvent * acquire_event,AcquiredGlObjects * objects)137 absl::Status AcquiredGlObjects::Acquire(
138 const std::vector<cl_mem>& memory, cl_command_queue queue,
139 const std::vector<cl_event>& wait_events, CLEvent* acquire_event,
140 AcquiredGlObjects* objects) {
141 if (!memory.empty()) {
142 cl_event new_event;
143 cl_int error_code = clEnqueueAcquireGLObjects(
144 queue, memory.size(), memory.data(), wait_events.size(),
145 wait_events.data(), acquire_event ? &new_event : nullptr);
146 if (error_code != CL_SUCCESS) {
147 return absl::InternalError(absl::StrCat("Unable to acquire GL object. ",
148 CLErrorCodeToString(error_code)));
149 }
150 if (acquire_event) {
151 *acquire_event = CLEvent(new_event);
152 }
153 clFlush(queue);
154 }
155 *objects = AcquiredGlObjects(memory, queue);
156 return absl::OkStatus();
157 }
158
Release(const std::vector<cl_event> & wait_events,CLEvent * release_event)159 absl::Status AcquiredGlObjects::Release(
160 const std::vector<cl_event>& wait_events, CLEvent* release_event) {
161 if (queue_ && !memory_.empty()) {
162 cl_event new_event;
163 cl_int error_code = clEnqueueReleaseGLObjects(
164 queue_, memory_.size(), memory_.data(), wait_events.size(),
165 wait_events.data(), release_event ? &new_event : nullptr);
166 if (error_code != CL_SUCCESS) {
167 return absl::InternalError(absl::StrCat("Unable to release GL object. ",
168 CLErrorCodeToString(error_code)));
169 }
170 if (release_event) {
171 *release_event = CLEvent(new_event);
172 }
173 clFlush(queue_);
174 queue_ = nullptr;
175 }
176 return absl::OkStatus();
177 }
178
GlInteropFabric(EGLDisplay egl_display,Environment * environment)179 GlInteropFabric::GlInteropFabric(EGLDisplay egl_display,
180 Environment* environment)
181 : is_egl_sync_supported_(true),
182 is_egl_to_cl_mapping_supported_(
183 IsClEventFromEglSyncSupported(environment->device())),
184 is_cl_to_egl_mapping_supported_(IsEglSyncFromClEventSupported()),
185 egl_display_(egl_display),
186 context_(environment->context().context()),
187 queue_(environment->queue()->queue()) {}
188
RegisterMemory(cl_mem memory)189 void GlInteropFabric::RegisterMemory(cl_mem memory) {
190 memory_.push_back(memory);
191 }
192
UnregisterMemory(cl_mem memory)193 void GlInteropFabric::UnregisterMemory(cl_mem memory) {
194 auto it = std::find(memory_.begin(), memory_.end(), memory);
195 if (it != memory_.end()) {
196 memory_.erase(it);
197 }
198 }
199
Start()200 absl::Status GlInteropFabric::Start() {
201 if (!is_enabled()) {
202 return absl::OkStatus();
203 }
204
205 // In GL-CL interoperability, we need to make sure GL finished processing of
206 // all commands that might affect GL objects. There are a few ways:
207 // a) glFinish
208 // slow, but portable
209 // b) EglSync + ClientWait
210 // faster alternative for glFinish, but still slow as it stalls GPU
211 // pipeline.
212 // c) EglSync->CLEvent or GlSync->CLEvent mapping
213 // Fast, as it allows to map sync to CL event and use it as a dependency
214 // later without stalling GPU pipeline.
215 CLEvent inbound_event;
216 std::vector<cl_event> inbound_events;
217 if (is_egl_sync_supported_) {
218 EglSync sync;
219 RETURN_IF_ERROR(EglSync::NewFence(egl_display_, &sync));
220 if (is_egl_to_cl_mapping_supported_) {
221 // (c) EglSync->CLEvent or GlSync->CLEvent mapping
222 glFlush();
223 RETURN_IF_ERROR(CreateClEventFromEglSync(context_, sync, &inbound_event));
224 inbound_events.push_back(inbound_event.event());
225 } else {
226 // (b) EglSync + ClientWait
227 RETURN_IF_ERROR(sync.ClientWait());
228 }
229 } else {
230 // (a) glFinish / GL fence sync
231 RETURN_IF_ERROR(gl::GlActiveSyncWait());
232 }
233
234 // Acquire all GL objects needed while processing.
235 return AcquiredGlObjects::Acquire(memory_, queue_, inbound_events, nullptr,
236 &gl_objects_);
237 }
238
Finish()239 absl::Status GlInteropFabric::Finish() {
240 if (!is_enabled()) {
241 return absl::OkStatus();
242 }
243 CLEvent outbound_event;
244 RETURN_IF_ERROR(gl_objects_.Release({}, &outbound_event));
245
246 // if (is_egl_sync_supported_ && is_cl_to_egl_mapping_supported_) {
247 // EglSync egl_outbound_sync;
248 // RETURN_IF_ERROR(CreateEglSyncFromClEvent(outbound_event.event(),
249 // egl_display_,
250 // &egl_outbound_sync));
251 // // Instruct GL pipeline to wait until corresponding CL event is signaled.
252 // RETURN_IF_ERROR(egl_outbound_sync.ServerWait());
253 // glFlush();
254 // } else {
255 // // Slower option if proper sync is not supported. It is equivalent to
256 // // clFinish, but, hopefully, faster.
257 // outbound_event.Wait();
258 // }
259
260 // This slow sync is the only working solution right now. We have to debug why
261 // above version is not working fast and reliable.
262 outbound_event.Wait();
263 return absl::OkStatus();
264 }
265
GlClBufferCopier(const TensorObjectDef & input_def,const TensorObjectDef & output_def,Environment * environment)266 GlClBufferCopier::GlClBufferCopier(const TensorObjectDef& input_def,
267 const TensorObjectDef& output_def,
268 Environment* environment) {
269 queue_ = environment->queue();
270 size_in_bytes_ =
271 NumElements(input_def) * SizeOf(input_def.object_def.data_type);
272 }
273
Convert(const TensorObject & input_obj,const TensorObject & output_obj)274 absl::Status GlClBufferCopier::Convert(const TensorObject& input_obj,
275 const TensorObject& output_obj) {
276 if (std::holds_alternative<OpenGlBuffer>(input_obj)) {
277 auto ssbo = std::get_if<OpenGlBuffer>(&input_obj);
278 auto cl_mem = std::get_if<OpenClBuffer>(&output_obj);
279 RETURN_IF_ERROR(
280 TFLITE_GPU_CALL_GL(glBindBuffer, GL_SHADER_STORAGE_BUFFER, ssbo->id));
281 void* ptr;
282 RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glMapBufferRange, &ptr,
283 GL_SHADER_STORAGE_BUFFER, 0,
284 size_in_bytes_, GL_MAP_READ_BIT));
285 RETURN_IF_ERROR(
286 queue_->EnqueueWriteBuffer(cl_mem->memobj, size_in_bytes_, ptr));
287 RETURN_IF_ERROR(
288 TFLITE_GPU_CALL_GL(glUnmapBuffer, GL_SHADER_STORAGE_BUFFER));
289 } else {
290 auto cl_mem = std::get_if<OpenClBuffer>(&input_obj);
291 auto ssbo = std::get_if<OpenGlBuffer>(&output_obj);
292 RETURN_IF_ERROR(
293 TFLITE_GPU_CALL_GL(glBindBuffer, GL_SHADER_STORAGE_BUFFER, ssbo->id));
294 void* ptr;
295 RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glMapBufferRange, &ptr,
296 GL_SHADER_STORAGE_BUFFER, 0,
297 size_in_bytes_, GL_MAP_WRITE_BIT));
298 RETURN_IF_ERROR(
299 queue_->EnqueueReadBuffer(cl_mem->memobj, size_in_bytes_, ptr));
300 RETURN_IF_ERROR(
301 TFLITE_GPU_CALL_GL(glUnmapBuffer, GL_SHADER_STORAGE_BUFFER));
302 }
303 return absl::OkStatus();
304 }
305
306 } // namespace cl
307 } // namespace gpu
308 } // namespace tflite
309