xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/vulkan/test_vulkan_interop_buffer.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include <vulkan_interop_common.hpp>
18 #include <vulkan_wrapper.hpp>
19 #include <CL/cl.h>
20 #include <CL/cl_ext.h>
21 #include <assert.h>
22 #include <vector>
23 #include <iostream>
24 #include <memory>
25 #include <string.h>
26 #include "harness/errorHelpers.h"
27 
28 #define MAX_BUFFERS 5
29 #define MAX_IMPORTS 5
30 #define BUFFERSIZE 3000
31 static cl_uchar uuid[CL_UUID_SIZE_KHR];
32 static cl_device_id deviceId = NULL;
33 
34 namespace {
35 struct Params
36 {
37     uint32_t numBuffers;
38     uint32_t bufferSize;
39     uint32_t interBufferOffset;
40 };
41 }
42 
43 const char *kernel_text_numbuffer_1 = " \
44 __kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a) {  \n\
45     int gid = get_global_id(0); \n\
46     if (gid < bufferSize) { \n\
47         a[gid]++; \n\
48     } \n\
49 }";
50 
51 const char *kernel_text_numbuffer_2 = " \
52 __kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a, __global unsigned char *b) {  \n\
53     int gid = get_global_id(0); \n\
54     if (gid < bufferSize) { \n\
55         a[gid]++; \n\
56         b[gid]++;\n\
57     } \n\
58 }";
59 
60 const char *kernel_text_numbuffer_4 = " \
61 __kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a, __global unsigned char *b, __global unsigned char *c, __global unsigned char *d) {  \n\
62     int gid = get_global_id(0); \n\
63     if (gid < bufferSize) { \n\
64         a[gid]++;\n\
65         b[gid]++; \n\
66         c[gid]++; \n\
67         d[gid]++; \n\
68     } \n\
69 }";
70 
71 
72 const char *kernel_text_verify = " \
73 __kernel void checkKernel(__global unsigned char *ptr, int size, int expVal, __global unsigned char *err)     \n\
74 {                                                                                         \n\
75     int idx = get_global_id(0);                                                           \n\
76     if ((idx < size) && (*err == 0)) { \n\
77     if (ptr[idx] != expVal){           \n\
78             *err = 1;                  \n\
79            }                           \n\
80         }                              \n\
81 }";
82 
run_test_with_two_queue(cl_context & context,cl_command_queue & cmd_queue1,cl_command_queue & cmd_queue2,cl_kernel * kernel,cl_kernel & verify_kernel,VulkanDevice & vkDevice,uint32_t numBuffers,uint32_t bufferSize,bool use_fence)83 int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
84                             cl_command_queue &cmd_queue2, cl_kernel *kernel,
85                             cl_kernel &verify_kernel, VulkanDevice &vkDevice,
86                             uint32_t numBuffers, uint32_t bufferSize,
87                             bool use_fence)
88 {
89     int err = CL_SUCCESS;
90     size_t global_work_size[1];
91     uint8_t *error_2;
92     cl_mem error_1;
93     cl_kernel update_buffer_kernel;
94     cl_kernel kernel_cq;
95     clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
96     clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
97     const char *program_source_const = kernel_text_numbuffer_2;
98     size_t program_source_length = strlen(program_source_const);
99     cl_program program = clCreateProgramWithSource(
100         context, 1, &program_source_const, &program_source_length, &err);
101     err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
102     if (err != CL_SUCCESS)
103     {
104         print_error(err, "Error: Failed to build program \n");
105         return err;
106     }
107     // create the kernel
108     kernel_cq = clCreateKernel(program, "clUpdateBuffer", &err);
109     if (err != CL_SUCCESS)
110     {
111         print_error(err, "clCreateKernel failed \n");
112         return err;
113     }
114 
115     const std::vector<VulkanExternalMemoryHandleType>
116         vkExternalMemoryHandleTypeList =
117             getSupportedVulkanExternalMemoryHandleTypeList();
118     VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
119         getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
120     VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
121     VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
122     std::shared_ptr<VulkanFence> fence = nullptr;
123 
124     VulkanQueue &vkQueue = vkDevice.getQueue();
125 
126     std::vector<char> vkBufferShader = readFile("buffer.spv");
127 
128     VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
129     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
130     vkDescriptorSetLayoutBindingList.addBinding(
131         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
132     vkDescriptorSetLayoutBindingList.addBinding(
133         1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
134     VulkanDescriptorSetLayout vkDescriptorSetLayout(
135         vkDevice, vkDescriptorSetLayoutBindingList);
136     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
137     VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
138                                             vkBufferShaderModule);
139 
140     VulkanDescriptorPool vkDescriptorPool(vkDevice,
141                                           vkDescriptorSetLayoutBindingList);
142     VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
143                                         vkDescriptorSetLayout);
144 
145     if (use_fence)
146     {
147         fence = std::make_shared<VulkanFence>(vkDevice);
148     }
149     else
150     {
151         clVk2CLExternalSemaphore = new clExternalSemaphore(
152             vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
153         clCl2VkExternalSemaphore = new clExternalSemaphore(
154             vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
155     }
156 
157     const uint32_t maxIter = innerIterations;
158     VulkanCommandPool vkCommandPool(vkDevice);
159     VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
160 
161     VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
162     VulkanDeviceMemory vkParamsDeviceMemory(
163         vkDevice, vkParamsBuffer.getSize(),
164         getVulkanMemoryType(vkDevice,
165                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
166     vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
167     std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
168     std::vector<clExternalMemory *> externalMemory;
169     for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
170          emhtIdx++)
171     {
172         VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
173             vkExternalMemoryHandleTypeList[emhtIdx];
174         log_info("External memory handle type: %d\n",
175                  vkExternalMemoryHandleType);
176 
177         VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
178                                    vkExternalMemoryHandleType);
179         const VulkanMemoryTypeList &memoryTypeList =
180             vkDummyBuffer.getMemoryTypeList();
181 
182         for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
183         {
184             const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
185 
186             log_info("Memory type index: %d\n", (uint32_t)memoryType);
187             log_info("Memory type property: %d\n",
188                      memoryType.getMemoryTypeProperty());
189 
190             VulkanBufferList vkBufferList(numBuffers, vkDevice, bufferSize,
191                                           vkExternalMemoryHandleType);
192 
193             for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
194             {
195                 vkBufferListDeviceMemory.push_back(new VulkanDeviceMemory(
196                     vkDevice, vkBufferList[bIdx], memoryType,
197                     vkExternalMemoryHandleType));
198                 externalMemory.push_back(new clExternalMemory(
199                     vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType,
200                     0, bufferSize, context, deviceId));
201             }
202             cl_mem buffers[MAX_BUFFERS];
203             clFinish(cmd_queue1);
204             Params *params = (Params *)vkParamsDeviceMemory.map();
205             params->numBuffers = numBuffers;
206             params->bufferSize = bufferSize;
207             params->interBufferOffset = 0;
208             vkParamsDeviceMemory.unmap();
209             vkDescriptorSet.update(0, vkParamsBuffer);
210             for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
211             {
212                 size_t buffer_size = vkBufferList[bIdx].getSize();
213                 vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx],
214                                                            0);
215                 buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer();
216             }
217             vkDescriptorSet.updateArray(1, numBuffers, vkBufferList);
218             vkCommandBuffer.begin();
219             vkCommandBuffer.bindPipeline(vkComputePipeline);
220             vkCommandBuffer.bindDescriptorSets(
221                 vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
222             vkCommandBuffer.dispatch(512, 1, 1);
223             vkCommandBuffer.end();
224 
225             if (vkBufferList.size() == 2)
226             {
227                 update_buffer_kernel = kernel[0];
228             }
229             else if (vkBufferList.size() == 3)
230             {
231                 update_buffer_kernel = kernel[1];
232             }
233             else if (vkBufferList.size() == 5)
234             {
235                 update_buffer_kernel = kernel[2];
236             }
237             // global work size should be less than or equal to
238             // bufferSizeList[i]
239             global_work_size[0] = bufferSize;
240             for (uint32_t iter = 0; iter < maxIter; iter++)
241             {
242 
243                 if (use_fence)
244                 {
245                     fence->reset();
246                     vkQueue.submit(vkCommandBuffer, fence);
247                     fence->wait();
248                 }
249                 else
250                 {
251                     if (iter == 0)
252                     {
253                         vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
254                     }
255                     else
256                     {
257                         vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
258                                        vkVk2CLSemaphore);
259                     }
260 
261                     clVk2CLExternalSemaphore->wait(cmd_queue1);
262                 }
263 
264 
265                 err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
266                                      (void *)&bufferSize);
267                 err |= clSetKernelArg(kernel_cq, 0, sizeof(uint32_t),
268                                       (void *)&bufferSize);
269                 err |= clSetKernelArg(kernel_cq, 1, sizeof(cl_mem),
270                                       (void *)&(buffers[0]));
271 
272                 for (int i = 0; i < vkBufferList.size() - 1; i++)
273                 {
274                     err |=
275                         clSetKernelArg(update_buffer_kernel, i + 1,
276                                        sizeof(cl_mem), (void *)&(buffers[i]));
277                 }
278 
279                 err |=
280                     clSetKernelArg(kernel_cq, 2, sizeof(cl_mem),
281                                    (void *)&(buffers[vkBufferList.size() - 1]));
282 
283                 if (err != CL_SUCCESS)
284                 {
285                     print_error(err,
286                                 "Error: Failed to set arg values for kernel\n");
287                     goto CLEANUP;
288                 }
289                 cl_event first_launch;
290 
291                 err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel,
292                                              1, NULL, global_work_size, NULL, 0,
293                                              NULL, &first_launch);
294                 if (err != CL_SUCCESS)
295                 {
296                     print_error(err,
297                                 "Error: Failed to launch update_buffer_kernel,"
298                                 "error\n");
299                     goto CLEANUP;
300                 }
301 
302                 err = clEnqueueNDRangeKernel(cmd_queue2, kernel_cq, 1, NULL,
303                                              global_work_size, NULL, 1,
304                                              &first_launch, NULL);
305                 if (err != CL_SUCCESS)
306                 {
307                     print_error(err,
308                                 "Error: Failed to launch update_buffer_kernel,"
309                                 "error\n");
310                     goto CLEANUP;
311                 }
312 
313                 if (use_fence)
314                 {
315                     clFlush(cmd_queue1);
316                     clFlush(cmd_queue2);
317                     clFinish(cmd_queue1);
318                     clFinish(cmd_queue2);
319                 }
320                 else if (!use_fence && iter != (maxIter - 1))
321                 {
322                     clCl2VkExternalSemaphore->signal(cmd_queue2);
323                 }
324             }
325             error_2 = (uint8_t *)malloc(sizeof(uint8_t));
326             if (NULL == error_2)
327             {
328                 log_error("Not able to allocate memory\n");
329                 goto CLEANUP;
330             }
331             clFinish(cmd_queue2);
332             error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
333                                      sizeof(uint8_t), NULL, &err);
334             if (CL_SUCCESS != err)
335             {
336                 print_error(err, "Error: clCreateBuffer \n");
337                 goto CLEANUP;
338             }
339             uint8_t val = 0;
340             err = clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
341                                        sizeof(uint8_t), &val, 0, NULL, NULL);
342             if (err != CL_SUCCESS)
343             {
344                 print_error(err, "Error: Failed read output, error\n");
345                 goto CLEANUP;
346             }
347 
348             int calc_max_iter;
349             for (int i = 0; i < vkBufferList.size(); i++)
350             {
351                 if (i == 0)
352                     calc_max_iter = (maxIter * 3);
353                 else
354                     calc_max_iter = (maxIter * 2);
355                 err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
356                                      (void *)&(buffers[i]));
357                 err |=
358                     clSetKernelArg(verify_kernel, 1, sizeof(int), &bufferSize);
359                 err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
360                                       &calc_max_iter);
361                 err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
362                                       (void *)&error_1);
363                 if (err != CL_SUCCESS)
364                 {
365                     print_error(err,
366                                 "Error: Failed to set arg values for "
367                                 "verify_kernel \n");
368                     goto CLEANUP;
369                 }
370                 err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, NULL,
371                                              global_work_size, NULL, 0, NULL,
372                                              NULL);
373 
374                 if (err != CL_SUCCESS)
375                 {
376                     print_error(err,
377                                 "Error: Failed to launch verify_kernel,"
378                                 "error \n");
379                     goto CLEANUP;
380                 }
381                 err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
382                                           sizeof(uint8_t), error_2, 0, NULL,
383                                           NULL);
384                 if (err != CL_SUCCESS)
385                 {
386                     print_error(err, "Error: Failed read output, error \n ");
387                     goto CLEANUP;
388                 }
389                 if (*error_2 == 1)
390                 {
391                     log_error("&&&& vulkan_opencl_buffer test FAILED\n");
392                     goto CLEANUP;
393                 }
394             }
395             for (size_t i = 0; i < vkBufferList.size(); i++)
396             {
397                 delete vkBufferListDeviceMemory[i];
398                 delete externalMemory[i];
399             }
400             vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
401                                            vkBufferListDeviceMemory.begin()
402                                                + numBuffers);
403             externalMemory.erase(externalMemory.begin(),
404                                  externalMemory.begin() + numBuffers);
405         }
406     }
407 CLEANUP:
408     for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
409     {
410         if (vkBufferListDeviceMemory[i])
411         {
412             delete vkBufferListDeviceMemory[i];
413         }
414         if (externalMemory[i])
415         {
416             delete externalMemory[i];
417         }
418     }
419     if (program) clReleaseProgram(program);
420     if (kernel_cq) clReleaseKernel(kernel_cq);
421     if (!use_fence)
422     {
423         if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
424         if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
425     }
426     if (error_2) free(error_2);
427     if (error_1) clReleaseMemObject(error_1);
428 
429     return err;
430 }
431 
run_test_with_one_queue(cl_context & context,cl_command_queue & cmd_queue1,cl_kernel * kernel,cl_kernel & verify_kernel,VulkanDevice & vkDevice,uint32_t numBuffers,uint32_t bufferSize,bool use_fence)432 int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
433                             cl_kernel *kernel, cl_kernel &verify_kernel,
434                             VulkanDevice &vkDevice, uint32_t numBuffers,
435                             uint32_t bufferSize, bool use_fence)
436 {
437     log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
438     size_t global_work_size[1];
439     uint8_t *error_2;
440     cl_mem error_1;
441     cl_kernel update_buffer_kernel;
442     clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
443     clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
444     int err = CL_SUCCESS;
445 
446     const std::vector<VulkanExternalMemoryHandleType>
447         vkExternalMemoryHandleTypeList =
448             getSupportedVulkanExternalMemoryHandleTypeList();
449     VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
450         getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
451     VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
452     VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
453     std::shared_ptr<VulkanFence> fence = nullptr;
454 
455     VulkanQueue &vkQueue = vkDevice.getQueue();
456 
457     std::vector<char> vkBufferShader = readFile("buffer.spv");
458     VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
459     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
460     vkDescriptorSetLayoutBindingList.addBinding(
461         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
462     vkDescriptorSetLayoutBindingList.addBinding(
463         1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
464     VulkanDescriptorSetLayout vkDescriptorSetLayout(
465         vkDevice, vkDescriptorSetLayoutBindingList);
466     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
467     VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
468                                             vkBufferShaderModule);
469 
470     VulkanDescriptorPool vkDescriptorPool(vkDevice,
471                                           vkDescriptorSetLayoutBindingList);
472     VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
473                                         vkDescriptorSetLayout);
474 
475     if (use_fence)
476     {
477         fence = std::make_shared<VulkanFence>(vkDevice);
478     }
479     else
480     {
481         clVk2CLExternalSemaphore = new clExternalSemaphore(
482             vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
483         clCl2VkExternalSemaphore = new clExternalSemaphore(
484             vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
485     }
486 
487     const uint32_t maxIter = innerIterations;
488     VulkanCommandPool vkCommandPool(vkDevice);
489     VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
490 
491     VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
492     VulkanDeviceMemory vkParamsDeviceMemory(
493         vkDevice, vkParamsBuffer.getSize(),
494         getVulkanMemoryType(vkDevice,
495                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
496     vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
497     std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
498     std::vector<clExternalMemory *> externalMemory;
499 
500     for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
501          emhtIdx++)
502     {
503         VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
504             vkExternalMemoryHandleTypeList[emhtIdx];
505         log_info("External memory handle type: %d\n",
506                  vkExternalMemoryHandleType);
507 
508         VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
509                                    vkExternalMemoryHandleType);
510         const VulkanMemoryTypeList &memoryTypeList =
511             vkDummyBuffer.getMemoryTypeList();
512 
513         for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
514         {
515             const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
516 
517             log_info("Memory type index: %d\n", (uint32_t)memoryType);
518             log_info("Memory type property: %d\n",
519                      memoryType.getMemoryTypeProperty());
520 
521             VulkanBufferList vkBufferList(numBuffers, vkDevice, bufferSize,
522                                           vkExternalMemoryHandleType);
523 
524             for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
525             {
526                 vkBufferListDeviceMemory.push_back(new VulkanDeviceMemory(
527                     vkDevice, vkBufferList[bIdx], memoryType,
528                     vkExternalMemoryHandleType));
529                 externalMemory.push_back(new clExternalMemory(
530                     vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType,
531                     0, bufferSize, context, deviceId));
532             }
533             cl_mem buffers[4];
534             clFinish(cmd_queue1);
535             Params *params = (Params *)vkParamsDeviceMemory.map();
536             params->numBuffers = numBuffers;
537             params->bufferSize = bufferSize;
538             params->interBufferOffset = 0;
539             vkParamsDeviceMemory.unmap();
540             vkDescriptorSet.update(0, vkParamsBuffer);
541             for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
542             {
543                 size_t buffer_size = vkBufferList[bIdx].getSize();
544                 vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx],
545                                                            0);
546                 buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer();
547             }
548             vkDescriptorSet.updateArray(1, vkBufferList.size(), vkBufferList);
549 
550             vkCommandBuffer.begin();
551             vkCommandBuffer.bindPipeline(vkComputePipeline);
552             vkCommandBuffer.bindDescriptorSets(
553                 vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
554             vkCommandBuffer.dispatch(512, 1, 1);
555             vkCommandBuffer.end();
556 
557             if (vkBufferList.size() == 1)
558             {
559                 update_buffer_kernel = kernel[0];
560             }
561             else if (vkBufferList.size() == 2)
562             {
563                 update_buffer_kernel = kernel[1];
564             }
565             else if (vkBufferList.size() == 4)
566             {
567                 update_buffer_kernel = kernel[2];
568             }
569 
570             // global work size should be less than or equal to
571             // bufferSizeList[i]
572             global_work_size[0] = bufferSize;
573 
574             for (uint32_t iter = 0; iter < maxIter; iter++)
575             {
576                 if (use_fence)
577                 {
578                     fence->reset();
579                     vkQueue.submit(vkCommandBuffer, fence);
580                     fence->wait();
581                 }
582                 else
583                 {
584                     if (iter == 0)
585                     {
586                         vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
587                     }
588                     else
589                     {
590                         vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
591                                        vkVk2CLSemaphore);
592                     }
593 
594                     clVk2CLExternalSemaphore->wait(cmd_queue1);
595                 }
596 
597                 err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
598                                      (void *)&bufferSize);
599                 for (int i = 0; i < vkBufferList.size(); i++)
600                 {
601                     err |=
602                         clSetKernelArg(update_buffer_kernel, i + 1,
603                                        sizeof(cl_mem), (void *)&(buffers[i]));
604                 }
605 
606                 if (err != CL_SUCCESS)
607                 {
608                     print_error(err,
609                                 "Error: Failed to set arg values for kernel\n");
610                     goto CLEANUP;
611                 }
612                 err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel,
613                                              1, NULL, global_work_size, NULL, 0,
614                                              NULL, NULL);
615                 if (err != CL_SUCCESS)
616                 {
617                     print_error(err,
618                                 "Error: Failed to launch update_buffer_kernel,"
619                                 " error\n");
620                     goto CLEANUP;
621                 }
622                 if (use_fence)
623                 {
624                     clFlush(cmd_queue1);
625                     clFinish(cmd_queue1);
626                 }
627                 else if (!use_fence && (iter != (maxIter - 1)))
628                 {
629                     clCl2VkExternalSemaphore->signal(cmd_queue1);
630                 }
631             }
632             error_2 = (uint8_t *)malloc(sizeof(uint8_t));
633             if (NULL == error_2)
634             {
635                 log_error("Not able to allocate memory\n");
636                 goto CLEANUP;
637             }
638 
639             error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
640                                      sizeof(uint8_t), NULL, &err);
641             if (CL_SUCCESS != err)
642             {
643                 print_error(err, "Error: clCreateBuffer \n");
644                 goto CLEANUP;
645             }
646             uint8_t val = 0;
647             err = clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
648                                        sizeof(uint8_t), &val, 0, NULL, NULL);
649             if (CL_SUCCESS != err)
650             {
651                 print_error(err, "Error: clEnqueueWriteBuffer \n");
652                 goto CLEANUP;
653             }
654 
655             int calc_max_iter = (maxIter * 2);
656             for (int i = 0; i < vkBufferList.size(); i++)
657             {
658                 err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
659                                      (void *)&(buffers[i]));
660                 err |=
661                     clSetKernelArg(verify_kernel, 1, sizeof(int), &bufferSize);
662                 err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
663                                       &calc_max_iter);
664                 err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
665                                       (void *)&error_1);
666                 if (err != CL_SUCCESS)
667                 {
668                     print_error(
669                         err,
670                         "Error: Failed to set arg values for verify_kernel \n");
671                     goto CLEANUP;
672                 }
673                 err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, NULL,
674                                              global_work_size, NULL, 0, NULL,
675                                              NULL);
676                 if (err != CL_SUCCESS)
677                 {
678                     print_error(
679                         err, "Error: Failed to launch verify_kernel, error\n");
680                     goto CLEANUP;
681                 }
682 
683                 err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
684                                           sizeof(uint8_t), error_2, 0, NULL,
685                                           NULL);
686                 if (err != CL_SUCCESS)
687                 {
688                     print_error(err, "Error: Failed read output, error  \n");
689                     goto CLEANUP;
690                 }
691                 if (*error_2 == 1)
692                 {
693                     log_error("&&&& vulkan_opencl_buffer test FAILED\n");
694                     goto CLEANUP;
695                 }
696             }
697             for (size_t i = 0; i < vkBufferList.size(); i++)
698             {
699                 delete vkBufferListDeviceMemory[i];
700                 delete externalMemory[i];
701             }
702             vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
703                                            vkBufferListDeviceMemory.begin()
704                                                + numBuffers);
705             externalMemory.erase(externalMemory.begin(),
706                                  externalMemory.begin() + numBuffers);
707         }
708     }
709 CLEANUP:
710     for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
711     {
712         if (vkBufferListDeviceMemory[i])
713         {
714             delete vkBufferListDeviceMemory[i];
715         }
716         if (externalMemory[i])
717         {
718             delete externalMemory[i];
719         }
720     }
721 
722     if (!use_fence)
723     {
724         if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
725         if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
726     }
727 
728     if (error_2) free(error_2);
729     if (error_1) clReleaseMemObject(error_1);
730     return err;
731 }
732 
run_test_with_multi_import_same_ctx(cl_context & context,cl_command_queue & cmd_queue1,cl_kernel * kernel,cl_kernel & verify_kernel,VulkanDevice & vkDevice,uint32_t numBuffers,uint32_t bufferSize,uint32_t bufferSizeForOffset,float use_fence)733 int run_test_with_multi_import_same_ctx(
734     cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel,
735     cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers,
736     uint32_t bufferSize, uint32_t bufferSizeForOffset, float use_fence)
737 {
738     size_t global_work_size[1];
739     uint8_t *error_2;
740     cl_mem error_1;
741     int numImports = numBuffers;
742     cl_kernel update_buffer_kernel[MAX_IMPORTS];
743     clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
744     clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
745     int err = CL_SUCCESS;
746     int calc_max_iter;
747     bool withOffset;
748     uint32_t pBufferSize;
749 
750     const std::vector<VulkanExternalMemoryHandleType>
751         vkExternalMemoryHandleTypeList =
752             getSupportedVulkanExternalMemoryHandleTypeList();
753     VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
754         getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
755     VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
756     VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
757     std::shared_ptr<VulkanFence> fence = nullptr;
758 
759     VulkanQueue &vkQueue = vkDevice.getQueue();
760 
761     std::vector<char> vkBufferShader = readFile("buffer.spv");
762 
763     VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
764     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
765     vkDescriptorSetLayoutBindingList.addBinding(
766         0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
767     vkDescriptorSetLayoutBindingList.addBinding(
768         1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
769     VulkanDescriptorSetLayout vkDescriptorSetLayout(
770         vkDevice, vkDescriptorSetLayoutBindingList);
771     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
772     VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
773                                             vkBufferShaderModule);
774 
775     VulkanDescriptorPool vkDescriptorPool(vkDevice,
776                                           vkDescriptorSetLayoutBindingList);
777     VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
778                                         vkDescriptorSetLayout);
779 
780     if (use_fence)
781     {
782         fence = std::make_shared<VulkanFence>(vkDevice);
783     }
784     else
785     {
786         clVk2CLExternalSemaphore = new clExternalSemaphore(
787             vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
788         clCl2VkExternalSemaphore = new clExternalSemaphore(
789             vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
790     }
791 
792     const uint32_t maxIter = innerIterations;
793     VulkanCommandPool vkCommandPool(vkDevice);
794     VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
795 
796     VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
797     VulkanDeviceMemory vkParamsDeviceMemory(
798         vkDevice, vkParamsBuffer.getSize(),
799         getVulkanMemoryType(vkDevice,
800                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
801     vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
802     std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
803     std::vector<std::vector<clExternalMemory *>> externalMemory;
804 
805 
806     for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
807          emhtIdx++)
808     {
809         VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
810             vkExternalMemoryHandleTypeList[emhtIdx];
811         log_info("External memory handle type: %d\n",
812                  vkExternalMemoryHandleType);
813 
814         VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
815                                    vkExternalMemoryHandleType);
816         const VulkanMemoryTypeList &memoryTypeList =
817             vkDummyBuffer.getMemoryTypeList();
818 
819         for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
820         {
821             const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
822 
823             log_info("Memory type index: %d\n", (uint32_t)memoryType);
824             log_info("Memory type property: %d\n",
825                      memoryType.getMemoryTypeProperty());
826             for (unsigned int withOffset = 0;
827                  withOffset <= (unsigned int)enableOffset; withOffset++)
828             {
829                 log_info("Running withOffset case %d\n", (uint32_t)withOffset);
830                 if (withOffset)
831                 {
832                     pBufferSize = bufferSizeForOffset;
833                 }
834                 else
835                 {
836                     pBufferSize = bufferSize;
837                 }
838                 cl_mem buffers[MAX_BUFFERS][MAX_IMPORTS];
839                 VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize,
840                                               vkExternalMemoryHandleType);
841                 uint32_t interBufferOffset =
842                     (uint32_t)(vkBufferList[0].getSize());
843 
844                 for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
845                 {
846                     if (withOffset == 0)
847                     {
848                         vkBufferListDeviceMemory.push_back(
849                             new VulkanDeviceMemory(vkDevice, vkBufferList[bIdx],
850                                                    memoryType,
851                                                    vkExternalMemoryHandleType));
852                     }
853                     if (withOffset == 1)
854                     {
855                         uint32_t totalSize =
856                             (uint32_t)(vkBufferList.size() * interBufferOffset);
857                         vkBufferListDeviceMemory.push_back(
858                             new VulkanDeviceMemory(vkDevice, totalSize,
859                                                    memoryType,
860                                                    vkExternalMemoryHandleType));
861                     }
862                     std::vector<clExternalMemory *> pExternalMemory;
863                     for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
864                     {
865                         pExternalMemory.push_back(new clExternalMemory(
866                             vkBufferListDeviceMemory[bIdx],
867                             vkExternalMemoryHandleType,
868                             withOffset * bIdx * interBufferOffset, pBufferSize,
869                             context, deviceId));
870                     }
871                     externalMemory.push_back(pExternalMemory);
872                 }
873 
874                 clFinish(cmd_queue1);
875                 Params *params = (Params *)vkParamsDeviceMemory.map();
876                 params->numBuffers = numBuffers;
877                 params->bufferSize = pBufferSize;
878                 params->interBufferOffset = interBufferOffset * withOffset;
879                 vkParamsDeviceMemory.unmap();
880                 vkDescriptorSet.update(0, vkParamsBuffer);
881                 for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
882                 {
883                     size_t buffer_size = vkBufferList[bIdx].getSize();
884                     vkBufferListDeviceMemory[bIdx]->bindBuffer(
885                         vkBufferList[bIdx],
886                         bIdx * interBufferOffset * withOffset);
887                     for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
888                     {
889                         buffers[bIdx][cl_bIdx] =
890                             externalMemory[bIdx][cl_bIdx]
891                                 ->getExternalMemoryBuffer();
892                     }
893                 }
894                 vkDescriptorSet.updateArray(1, numBuffers, vkBufferList);
895                 vkCommandBuffer.begin();
896                 vkCommandBuffer.bindPipeline(vkComputePipeline);
897                 vkCommandBuffer.bindDescriptorSets(
898                     vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
899                 vkCommandBuffer.dispatch(512, 1, 1);
900                 vkCommandBuffer.end();
901                 for (int i = 0; i < numImports; i++)
902                 {
903                     update_buffer_kernel[i] = (numBuffers == 1)
904                         ? kernel[0]
905                         : ((numBuffers == 2) ? kernel[1] : kernel[2]);
906                 }
907                 // global work size should be less than or equal to
908                 // bufferSizeList[i]
909                 global_work_size[0] = pBufferSize;
910 
911                 for (uint32_t iter = 0; iter < maxIter; iter++)
912                 {
913                     if (use_fence)
914                     {
915                         fence->reset();
916                         vkQueue.submit(vkCommandBuffer, fence);
917                         fence->wait();
918                     }
919                     else
920                     {
921                         if (iter == 0)
922                         {
923                             vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
924                         }
925                         else
926                         {
927                             vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
928                                            vkVk2CLSemaphore);
929                         }
930                     }
931 
932                     if (use_fence)
933                     {
934                         fence->wait();
935                     }
936                     else
937                     {
938                         clVk2CLExternalSemaphore->wait(cmd_queue1);
939                     }
940 
941                     for (uint8_t launchIter = 0; launchIter < numImports;
942                          launchIter++)
943                     {
944                         err = clSetKernelArg(update_buffer_kernel[launchIter],
945                                              0, sizeof(uint32_t),
946                                              (void *)&pBufferSize);
947                         for (int i = 0; i < numBuffers; i++)
948                         {
949                             err |= clSetKernelArg(
950                                 update_buffer_kernel[launchIter], i + 1,
951                                 sizeof(cl_mem),
952                                 (void *)&(buffers[i][launchIter]));
953                         }
954 
955                         if (err != CL_SUCCESS)
956                         {
957                             print_error(err,
958                                         "Error: Failed to set arg values for "
959                                         "kernel\n ");
960                             goto CLEANUP;
961                         }
962                         err = clEnqueueNDRangeKernel(
963                             cmd_queue1, update_buffer_kernel[launchIter], 1,
964                             NULL, global_work_size, NULL, 0, NULL, NULL);
965                         if (err != CL_SUCCESS)
966                         {
967                             print_error(err,
968                                         "Error: Failed to launch "
969                                         "update_buffer_kernel, error\n ");
970                             goto CLEANUP;
971                         }
972                     }
973                     if (use_fence)
974                     {
975                         clFinish(cmd_queue1);
976                     }
977                     else if (!use_fence && iter != (maxIter - 1))
978                     {
979                         clCl2VkExternalSemaphore->signal(cmd_queue1);
980                     }
981                 }
982                 error_2 = (uint8_t *)malloc(sizeof(uint8_t));
983                 if (NULL == error_2)
984                 {
985                     log_error("Not able to allocate memory\n");
986                     goto CLEANUP;
987                 }
988 
989                 error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
990                                          sizeof(uint8_t), NULL, &err);
991                 if (CL_SUCCESS != err)
992                 {
993                     print_error(err, "Error: clCreateBuffer \n");
994                     goto CLEANUP;
995                 }
996                 uint8_t val = 0;
997                 err =
998                     clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
999                                          sizeof(uint8_t), &val, 0, NULL, NULL);
1000                 if (CL_SUCCESS != err)
1001                 {
1002                     print_error(err, "Error: clEnqueueWriteBuffer \n");
1003                     goto CLEANUP;
1004                 }
1005                 calc_max_iter = maxIter * (numBuffers + 1);
1006 
1007                 for (int i = 0; i < vkBufferList.size(); i++)
1008                 {
1009                     err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
1010                                          (void *)&(buffers[i][0]));
1011                     err |= clSetKernelArg(verify_kernel, 1, sizeof(int),
1012                                           &pBufferSize);
1013                     err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
1014                                           &calc_max_iter);
1015                     err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
1016                                           (void *)&error_1);
1017                     if (err != CL_SUCCESS)
1018                     {
1019                         print_error(err,
1020                                     "Error: Failed to set arg values for "
1021                                     "verify_kernel \n");
1022                         goto CLEANUP;
1023                     }
1024                     err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1,
1025                                                  NULL, global_work_size, NULL,
1026                                                  0, NULL, NULL);
1027                     if (err != CL_SUCCESS)
1028                     {
1029                         print_error(
1030                             err,
1031                             "Error: Failed to launch verify_kernel, error\n");
1032                         goto CLEANUP;
1033                     }
1034 
1035                     err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
1036                                               sizeof(uint8_t), error_2, 0, NULL,
1037                                               NULL);
1038                     if (err != CL_SUCCESS)
1039                     {
1040                         print_error(err, "Error: Failed read output, error \n");
1041                         goto CLEANUP;
1042                     }
1043                     if (*error_2 == 1)
1044                     {
1045                         log_error("&&&& vulkan_opencl_buffer test FAILED\n");
1046                         goto CLEANUP;
1047                     }
1048                 }
1049                 for (size_t i = 0; i < vkBufferList.size(); i++)
1050                 {
1051                     for (size_t j = 0; j < numImports; j++)
1052                     {
1053                         delete externalMemory[i][j];
1054                     }
1055                 }
1056                 for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
1057                 {
1058                     delete vkBufferListDeviceMemory[i];
1059                 }
1060                 vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
1061                                                vkBufferListDeviceMemory.end());
1062                 for (size_t i = 0; i < externalMemory.size(); i++)
1063                 {
1064                     externalMemory[i].erase(externalMemory[i].begin(),
1065                                             externalMemory[i].begin()
1066                                                 + numBuffers);
1067                 }
1068                 externalMemory.clear();
1069             }
1070         }
1071     }
1072 CLEANUP:
1073     for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
1074     {
1075         if (vkBufferListDeviceMemory[i])
1076         {
1077             delete vkBufferListDeviceMemory[i];
1078         }
1079     }
1080     for (size_t i = 0; i < externalMemory.size(); i++)
1081     {
1082         for (size_t j = 0; j < externalMemory[i].size(); j++)
1083         {
1084             if (externalMemory[i][j])
1085             {
1086                 delete externalMemory[i][j];
1087             }
1088         }
1089     }
1090 
1091     if (!use_fence)
1092     {
1093         if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
1094         if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
1095     }
1096 
1097     if (error_2) free(error_2);
1098     if (error_1) clReleaseMemObject(error_1);
1099     return err;
1100 }
1101 
run_test_with_multi_import_diff_ctx(cl_context & context,cl_context & context2,cl_command_queue & cmd_queue1,cl_command_queue & cmd_queue2,cl_kernel * kernel1,cl_kernel * kernel2,cl_kernel & verify_kernel,cl_kernel verify_kernel2,VulkanDevice & vkDevice,uint32_t numBuffers,uint32_t bufferSize,uint32_t bufferSizeForOffset,float use_fence)1102 int run_test_with_multi_import_diff_ctx(
1103     cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1,
1104     cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2,
1105     cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice,
1106     uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset,
1107     float use_fence)
1108 {
1109     size_t global_work_size[1];
1110     uint8_t *error_3;
1111     cl_mem error_1;
1112     cl_mem error_2;
1113     int numImports = numBuffers;
1114     cl_kernel update_buffer_kernel1[MAX_IMPORTS];
1115     cl_kernel update_buffer_kernel2[MAX_IMPORTS];
1116     clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
1117     clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
1118     clExternalSemaphore *clVk2CLExternalSemaphore2 = NULL;
1119     clExternalSemaphore *clCl2VkExternalSemaphore2 = NULL;
1120     int err = CL_SUCCESS;
1121     int calc_max_iter;
1122     bool withOffset;
1123     uint32_t pBufferSize;
1124 
1125     const std::vector<VulkanExternalMemoryHandleType>
1126         vkExternalMemoryHandleTypeList =
1127             getSupportedVulkanExternalMemoryHandleTypeList();
1128     VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
1129         getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
1130     VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
1131     VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
1132     std::shared_ptr<VulkanFence> fence = nullptr;
1133 
1134     VulkanQueue &vkQueue = vkDevice.getQueue();
1135 
1136     std::vector<char> vkBufferShader = readFile("buffer.spv");
1137 
1138     VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
1139     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
1140         MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1141     VulkanDescriptorSetLayout vkDescriptorSetLayout(
1142         vkDevice, vkDescriptorSetLayoutBindingList);
1143     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
1144     VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
1145                                             vkBufferShaderModule);
1146 
1147     VulkanDescriptorPool vkDescriptorPool(vkDevice,
1148                                           vkDescriptorSetLayoutBindingList);
1149     VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
1150                                         vkDescriptorSetLayout);
1151 
1152     if (use_fence)
1153     {
1154         fence = std::make_shared<VulkanFence>(vkDevice);
1155     }
1156     else
1157     {
1158         clVk2CLExternalSemaphore = new clExternalSemaphore(
1159             vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
1160         clCl2VkExternalSemaphore = new clExternalSemaphore(
1161             vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
1162 
1163         clVk2CLExternalSemaphore2 =
1164             new clExternalSemaphore(vkVk2CLSemaphore, context2,
1165                                     vkExternalSemaphoreHandleType, deviceId);
1166         clCl2VkExternalSemaphore2 =
1167             new clExternalSemaphore(vkCl2VkSemaphore, context2,
1168                                     vkExternalSemaphoreHandleType, deviceId);
1169     }
1170 
1171     const uint32_t maxIter = innerIterations;
1172     VulkanCommandPool vkCommandPool(vkDevice);
1173     VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
1174 
1175     VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
1176     VulkanDeviceMemory vkParamsDeviceMemory(
1177         vkDevice, vkParamsBuffer.getSize(),
1178         getVulkanMemoryType(vkDevice,
1179                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
1180     vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
1181     std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
1182     std::vector<std::vector<clExternalMemory *>> externalMemory1;
1183     std::vector<std::vector<clExternalMemory *>> externalMemory2;
1184 
1185     for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
1186          emhtIdx++)
1187     {
1188         VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
1189             vkExternalMemoryHandleTypeList[emhtIdx];
1190         log_info("External memory handle type:%d\n",
1191                  vkExternalMemoryHandleType);
1192 
1193         VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
1194                                    vkExternalMemoryHandleType);
1195         const VulkanMemoryTypeList &memoryTypeList =
1196             vkDummyBuffer.getMemoryTypeList();
1197 
1198         for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
1199         {
1200             const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
1201 
1202             log_info("Memory type index: %d\n", (uint32_t)memoryType);
1203             log_info("Memory type property: %d\n",
1204                      memoryType.getMemoryTypeProperty());
1205 
1206             for (unsigned int withOffset = 0;
1207                  withOffset <= (unsigned int)enableOffset; withOffset++)
1208             {
1209                 log_info("Running withOffset case %d\n", (uint32_t)withOffset);
1210                 cl_mem buffers1[MAX_BUFFERS][MAX_IMPORTS];
1211                 cl_mem buffers2[MAX_BUFFERS][MAX_IMPORTS];
1212                 if (withOffset)
1213                 {
1214                     pBufferSize = bufferSizeForOffset;
1215                 }
1216                 else
1217                 {
1218                     pBufferSize = bufferSize;
1219                 }
1220                 VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize,
1221                                               vkExternalMemoryHandleType);
1222                 uint32_t interBufferOffset =
1223                     (uint32_t)(vkBufferList[0].getSize());
1224 
1225                 for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
1226                 {
1227                     if (withOffset == 0)
1228                     {
1229                         vkBufferListDeviceMemory.push_back(
1230                             new VulkanDeviceMemory(vkDevice, pBufferSize,
1231                                                    memoryType,
1232                                                    vkExternalMemoryHandleType));
1233                     }
1234                     if (withOffset == 1)
1235                     {
1236                         uint32_t totalSize =
1237                             (uint32_t)(vkBufferList.size() * interBufferOffset);
1238                         vkBufferListDeviceMemory.push_back(
1239                             new VulkanDeviceMemory(vkDevice, totalSize,
1240                                                    memoryType,
1241                                                    vkExternalMemoryHandleType));
1242                     }
1243                     std::vector<clExternalMemory *> pExternalMemory1;
1244                     std::vector<clExternalMemory *> pExternalMemory2;
1245                     for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
1246                     {
1247                         pExternalMemory1.push_back(new clExternalMemory(
1248                             vkBufferListDeviceMemory[bIdx],
1249                             vkExternalMemoryHandleType,
1250                             withOffset * bIdx * interBufferOffset, pBufferSize,
1251                             context, deviceId));
1252                         pExternalMemory2.push_back(new clExternalMemory(
1253                             vkBufferListDeviceMemory[bIdx],
1254                             vkExternalMemoryHandleType,
1255                             withOffset * bIdx * interBufferOffset, pBufferSize,
1256                             context2, deviceId));
1257                     }
1258                     externalMemory1.push_back(pExternalMemory1);
1259                     externalMemory2.push_back(pExternalMemory2);
1260                 }
1261 
1262                 clFinish(cmd_queue1);
1263                 Params *params = (Params *)vkParamsDeviceMemory.map();
1264                 params->numBuffers = numBuffers;
1265                 params->bufferSize = pBufferSize;
1266                 params->interBufferOffset = interBufferOffset * withOffset;
1267                 vkParamsDeviceMemory.unmap();
1268                 vkDescriptorSet.update(0, vkParamsBuffer);
1269                 for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
1270                 {
1271                     size_t buffer_size = vkBufferList[bIdx].getSize();
1272                     vkBufferListDeviceMemory[bIdx]->bindBuffer(
1273                         vkBufferList[bIdx],
1274                         bIdx * interBufferOffset * withOffset);
1275                     for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
1276                     {
1277                         buffers1[bIdx][cl_bIdx] =
1278                             externalMemory1[bIdx][cl_bIdx]
1279                                 ->getExternalMemoryBuffer();
1280                         buffers2[bIdx][cl_bIdx] =
1281                             externalMemory2[bIdx][cl_bIdx]
1282                                 ->getExternalMemoryBuffer();
1283                     }
1284                     vkDescriptorSet.update((uint32_t)bIdx + 1,
1285                                            vkBufferList[bIdx]);
1286                 }
1287 
1288                 vkCommandBuffer.begin();
1289                 vkCommandBuffer.bindPipeline(vkComputePipeline);
1290                 vkCommandBuffer.bindDescriptorSets(
1291                     vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
1292                 vkCommandBuffer.dispatch(512, 1, 1);
1293                 vkCommandBuffer.end();
1294 
1295                 for (int i = 0; i < numImports; i++)
1296                 {
1297                     update_buffer_kernel1[i] = (numBuffers == 1)
1298                         ? kernel1[0]
1299                         : ((numBuffers == 2) ? kernel1[1] : kernel1[2]);
1300                     update_buffer_kernel2[i] = (numBuffers == 1)
1301                         ? kernel2[0]
1302                         : ((numBuffers == 2) ? kernel2[1] : kernel2[2]);
1303                 }
1304 
1305                 // global work size should be less than or equal
1306                 // to bufferSizeList[i]
1307                 global_work_size[0] = pBufferSize;
1308 
1309                 for (uint32_t iter = 0; iter < maxIter; iter++)
1310                 {
1311                     if (use_fence)
1312                     {
1313                         fence->reset();
1314                         vkQueue.submit(vkCommandBuffer, fence);
1315                         fence->wait();
1316                     }
1317                     else
1318                     {
1319                         if (iter == 0)
1320                         {
1321                             vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
1322                         }
1323                         else
1324                         {
1325                             vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
1326                                            vkVk2CLSemaphore);
1327                         }
1328                     }
1329 
1330                     if (use_fence)
1331                     {
1332                         fence->wait();
1333                     }
1334                     else
1335                     {
1336                         clVk2CLExternalSemaphore->wait(cmd_queue1);
1337                     }
1338 
1339                     for (uint8_t launchIter = 0; launchIter < numImports;
1340                          launchIter++)
1341                     {
1342                         err = clSetKernelArg(update_buffer_kernel1[launchIter],
1343                                              0, sizeof(uint32_t),
1344                                              (void *)&pBufferSize);
1345                         for (int i = 0; i < numBuffers; i++)
1346                         {
1347                             err |= clSetKernelArg(
1348                                 update_buffer_kernel1[launchIter], i + 1,
1349                                 sizeof(cl_mem),
1350                                 (void *)&(buffers1[i][launchIter]));
1351                         }
1352 
1353                         if (err != CL_SUCCESS)
1354                         {
1355                             print_error(err,
1356                                         "Error: Failed to set arg values for "
1357                                         "kernel\n ");
1358                             goto CLEANUP;
1359                         }
1360                         err = clEnqueueNDRangeKernel(
1361                             cmd_queue1, update_buffer_kernel1[launchIter], 1,
1362                             NULL, global_work_size, NULL, 0, NULL, NULL);
1363                         if (err != CL_SUCCESS)
1364                         {
1365                             print_error(err,
1366                                         "Error: Failed to launch "
1367                                         "update_buffer_kernel, error\n");
1368                             goto CLEANUP;
1369                         }
1370                     }
1371                     if (use_fence)
1372                     {
1373                         clFinish(cmd_queue1);
1374                     }
1375                     else if (!use_fence && iter != (maxIter - 1))
1376                     {
1377                         clCl2VkExternalSemaphore->signal(cmd_queue1);
1378                     }
1379                 }
1380                 clFinish(cmd_queue1);
1381                 for (uint32_t iter = 0; iter < maxIter; iter++)
1382                 {
1383                     if (use_fence)
1384                     {
1385                         fence->reset();
1386                         vkQueue.submit(vkCommandBuffer, fence);
1387                         fence->wait();
1388                     }
1389                     else
1390                     {
1391                         if (iter == 0)
1392                         {
1393                             vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
1394                         }
1395                         else
1396                         {
1397                             vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
1398                                            vkVk2CLSemaphore);
1399                         }
1400                     }
1401 
1402                     if (use_fence)
1403                     {
1404                         fence->wait();
1405                     }
1406                     else
1407                     {
1408                         clVk2CLExternalSemaphore2->wait(cmd_queue2);
1409                     }
1410 
1411                     for (uint8_t launchIter = 0; launchIter < numImports;
1412                          launchIter++)
1413                     {
1414                         err = clSetKernelArg(update_buffer_kernel2[launchIter],
1415                                              0, sizeof(uint32_t),
1416                                              (void *)&bufferSize);
1417                         for (int i = 0; i < numBuffers; i++)
1418                         {
1419                             err |= clSetKernelArg(
1420                                 update_buffer_kernel2[launchIter], i + 1,
1421                                 sizeof(cl_mem),
1422                                 (void *)&(buffers2[i][launchIter]));
1423                         }
1424 
1425                         if (err != CL_SUCCESS)
1426                         {
1427                             print_error(err,
1428                                         "Error: Failed to set arg values for "
1429                                         "kernel\n ");
1430                             goto CLEANUP;
1431                         }
1432                         err = clEnqueueNDRangeKernel(
1433                             cmd_queue2, update_buffer_kernel2[launchIter], 1,
1434                             NULL, global_work_size, NULL, 0, NULL, NULL);
1435                         if (err != CL_SUCCESS)
1436                         {
1437                             print_error(err,
1438                                         "Error: Failed to launch "
1439                                         "update_buffer_kernel, error\n ");
1440                             goto CLEANUP;
1441                         }
1442                     }
1443                     if (use_fence)
1444                     {
1445                         clFinish(cmd_queue2);
1446                     }
1447                     else if (!use_fence && iter != (maxIter - 1))
1448                     {
1449                         clCl2VkExternalSemaphore2->signal(cmd_queue2);
1450                     }
1451                 }
1452                 clFinish(cmd_queue2);
1453                 error_3 = (uint8_t *)malloc(sizeof(uint8_t));
1454                 if (NULL == error_3)
1455                 {
1456                     log_error("Not able to allocate memory\n");
1457                     goto CLEANUP;
1458                 }
1459 
1460                 error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
1461                                          sizeof(uint8_t), NULL, &err);
1462                 if (CL_SUCCESS != err)
1463                 {
1464                     print_error(err, "Error: clCreateBuffer \n");
1465                     goto CLEANUP;
1466                 }
1467                 error_2 = clCreateBuffer(context2, CL_MEM_WRITE_ONLY,
1468                                          sizeof(uint8_t), NULL, &err);
1469                 if (CL_SUCCESS != err)
1470                 {
1471                     print_error(err, "Error: clCreateBuffer \n");
1472                     goto CLEANUP;
1473                 }
1474                 uint8_t val = 0;
1475                 err =
1476                     clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
1477                                          sizeof(uint8_t), &val, 0, NULL, NULL);
1478                 if (err != CL_SUCCESS)
1479                 {
1480                     print_error(err, "Error: Failed read output, error  \n");
1481                     goto CLEANUP;
1482                 }
1483 
1484                 err =
1485                     clEnqueueWriteBuffer(cmd_queue2, error_2, CL_TRUE, 0,
1486                                          sizeof(uint8_t), &val, 0, NULL, NULL);
1487                 if (err != CL_SUCCESS)
1488                 {
1489                     print_error(err, "Error: Failed read output, error  \n");
1490                     goto CLEANUP;
1491                 }
1492 
1493                 calc_max_iter = maxIter * 2 * (numBuffers + 1);
1494                 for (int i = 0; i < numBuffers; i++)
1495                 {
1496                     err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
1497                                          (void *)&(buffers1[i][0]));
1498                     err |= clSetKernelArg(verify_kernel, 1, sizeof(int),
1499                                           &pBufferSize);
1500                     err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
1501                                           &calc_max_iter);
1502                     err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
1503                                           (void *)&error_1);
1504                     if (err != CL_SUCCESS)
1505                     {
1506                         print_error(err,
1507                                     "Error: Failed to set arg values for "
1508                                     "verify_kernel \n");
1509                         goto CLEANUP;
1510                     }
1511                     err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1,
1512                                                  NULL, global_work_size, NULL,
1513                                                  0, NULL, NULL);
1514                     if (err != CL_SUCCESS)
1515                     {
1516                         print_error(err,
1517                                     "Error: Failed to launch verify_kernel,"
1518                                     "error\n");
1519                         goto CLEANUP;
1520                     }
1521 
1522                     err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
1523                                               sizeof(uint8_t), error_3, 0, NULL,
1524                                               NULL);
1525                     if (err != CL_SUCCESS)
1526                     {
1527                         print_error(err, "Error: Failed read output, error\n");
1528                         goto CLEANUP;
1529                     }
1530                     if (*error_3 == 1)
1531                     {
1532                         log_error("&&&& vulkan_opencl_buffer test FAILED\n");
1533                         goto CLEANUP;
1534                     }
1535                 }
1536                 *error_3 = 0;
1537                 for (int i = 0; i < vkBufferList.size(); i++)
1538                 {
1539                     err = clSetKernelArg(verify_kernel2, 0, sizeof(cl_mem),
1540                                          (void *)&(buffers2[i][0]));
1541                     err |= clSetKernelArg(verify_kernel2, 1, sizeof(int),
1542                                           &pBufferSize);
1543                     err |= clSetKernelArg(verify_kernel2, 2, sizeof(int),
1544                                           &calc_max_iter);
1545                     err |= clSetKernelArg(verify_kernel2, 3, sizeof(cl_mem),
1546                                           (void *)&error_2);
1547                     if (err != CL_SUCCESS)
1548                     {
1549                         print_error(err,
1550                                     "Error: Failed to set arg values for "
1551                                     "verify_kernel \n");
1552                         goto CLEANUP;
1553                     }
1554                     err = clEnqueueNDRangeKernel(cmd_queue2, verify_kernel2, 1,
1555                                                  NULL, global_work_size, NULL,
1556                                                  0, NULL, NULL);
1557                     if (err != CL_SUCCESS)
1558                     {
1559                         print_error(err,
1560                                     "Error: Failed to launch verify_kernel,"
1561                                     "error\n");
1562                         goto CLEANUP;
1563                     }
1564 
1565                     err = clEnqueueReadBuffer(cmd_queue2, error_2, CL_TRUE, 0,
1566                                               sizeof(uint8_t), error_3, 0, NULL,
1567                                               NULL);
1568                     if (err != CL_SUCCESS)
1569                     {
1570                         print_error(err, "Error: Failed read output, error\n");
1571                         goto CLEANUP;
1572                     }
1573                     if (*error_3 == 1)
1574                     {
1575                         log_error("&&&& vulkan_opencl_buffer test FAILED\n");
1576                         goto CLEANUP;
1577                     }
1578                 }
1579                 for (size_t i = 0; i < vkBufferList.size(); i++)
1580                 {
1581                     for (size_t j = 0; j < numImports; j++)
1582                     {
1583                         delete externalMemory1[i][j];
1584                         delete externalMemory2[i][j];
1585                     }
1586                 }
1587                 for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
1588                 {
1589                     delete vkBufferListDeviceMemory[i];
1590                 }
1591                 vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
1592                                                vkBufferListDeviceMemory.end());
1593                 for (size_t i = 0; i < externalMemory1.size(); i++)
1594                 {
1595                     externalMemory1[i].erase(externalMemory1[i].begin(),
1596                                              externalMemory1[i].begin()
1597                                                  + numBuffers);
1598                     externalMemory2[i].erase(externalMemory2[i].begin(),
1599                                              externalMemory2[i].begin()
1600                                                  + numBuffers);
1601                 }
1602                 externalMemory1.clear();
1603                 externalMemory2.clear();
1604             }
1605         }
1606     }
1607 CLEANUP:
1608     for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
1609     {
1610         if (vkBufferListDeviceMemory[i])
1611         {
1612             delete vkBufferListDeviceMemory[i];
1613         }
1614     }
1615     for (size_t i = 0; i < externalMemory1.size(); i++)
1616     {
1617         for (size_t j = 0; j < externalMemory1[i].size(); j++)
1618         {
1619             if (externalMemory1[i][j])
1620             {
1621                 delete externalMemory1[i][j];
1622             }
1623         }
1624     }
1625     for (size_t i = 0; i < externalMemory2.size(); i++)
1626     {
1627         for (size_t j = 0; j < externalMemory2[i].size(); j++)
1628         {
1629             if (externalMemory2[i][j])
1630             {
1631                 delete externalMemory2[i][j];
1632             }
1633         }
1634     }
1635 
1636     if (!use_fence)
1637     {
1638         if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
1639         if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
1640         if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2;
1641         if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2;
1642     }
1643 
1644     if (error_3) free(error_3);
1645     if (error_1) clReleaseMemObject(error_1);
1646     if (error_2) clReleaseMemObject(error_2);
1647     return err;
1648 }
1649 
test_buffer_common(cl_device_id device_,cl_context context_,cl_command_queue queue_,int numElements_,float use_fence)1650 int test_buffer_common(cl_device_id device_, cl_context context_,
1651                        cl_command_queue queue_, int numElements_,
1652                        float use_fence)
1653 {
1654 
1655     int current_device = 0;
1656     int device_count = 0;
1657     int devices_prohibited = 0;
1658     cl_int errNum = CL_SUCCESS;
1659     cl_platform_id platform = NULL;
1660     size_t extensionSize = 0;
1661     cl_uint num_devices = 0;
1662     cl_uint device_no = 0;
1663     const size_t bufsize = BUFFERSIZE;
1664     char buf[BUFFERSIZE];
1665     cl_device_id *devices;
1666     char *extensions = NULL;
1667     cl_kernel verify_kernel;
1668     cl_kernel verify_kernel2;
1669     cl_kernel kernel[3] = { NULL, NULL, NULL };
1670     cl_kernel kernel2[3] = { NULL, NULL, NULL };
1671     const char *program_source_const[3] = { kernel_text_numbuffer_1,
1672                                             kernel_text_numbuffer_2,
1673                                             kernel_text_numbuffer_4 };
1674     const char *program_source_const_verify;
1675     size_t program_source_length;
1676     cl_command_queue cmd_queue1 = NULL;
1677     cl_command_queue cmd_queue2 = NULL;
1678     cl_command_queue cmd_queue3 = NULL;
1679     cl_context context = NULL;
1680     cl_program program[3] = { NULL, NULL, NULL };
1681     cl_program program_verify, program_verify2;
1682     cl_context context2 = NULL;
1683 
1684 
1685     VulkanDevice vkDevice;
1686     uint32_t numBuffersList[] = { 1, 2, 4 };
1687     uint32_t bufferSizeList[] = { 4 * 1024, 64 * 1024, 2 * 1024 * 1024 };
1688     uint32_t bufferSizeListforOffset[] = { 256, 512, 1024 };
1689 
1690     cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
1691     errNum = clGetPlatformIDs(1, &platform, NULL);
1692     if (errNum != CL_SUCCESS)
1693     {
1694         print_error(errNum, "Error: Failed to get platform\n");
1695         goto CLEANUP;
1696     }
1697 
1698     errNum =
1699         clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
1700     if (CL_SUCCESS != errNum)
1701     {
1702         print_error(errNum, "clGetDeviceIDs failed in returning of devices\n");
1703         goto CLEANUP;
1704     }
1705     devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
1706     if (NULL == devices)
1707     {
1708         errNum = CL_OUT_OF_HOST_MEMORY;
1709         print_error(errNum, "Unable to allocate memory for devices\n");
1710         goto CLEANUP;
1711     }
1712     errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
1713                             NULL);
1714     if (CL_SUCCESS != errNum)
1715     {
1716         print_error(errNum, "Failed to get deviceID.\n");
1717         goto CLEANUP;
1718     }
1719     contextProperties[1] = (cl_context_properties)platform;
1720     log_info("Assigned contextproperties for platform\n");
1721     for (device_no = 0; device_no < num_devices; device_no++)
1722     {
1723         errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0,
1724                                  NULL, &extensionSize);
1725         if (CL_SUCCESS != errNum)
1726         {
1727             print_error(errNum,
1728                         "Error in clGetDeviceInfo for getting device_extension "
1729                         "size....\n");
1730             goto CLEANUP;
1731         }
1732         extensions = (char *)malloc(extensionSize);
1733         if (NULL == extensions)
1734         {
1735             print_error(errNum, "Unable to allocate memory for extensions\n");
1736             errNum = CL_OUT_OF_HOST_MEMORY;
1737             goto CLEANUP;
1738         }
1739         errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
1740                                  extensionSize, extensions, NULL);
1741         if (CL_SUCCESS != errNum)
1742         {
1743             print_error(errNum,
1744                         "Error in clGetDeviceInfo for device_extension\n");
1745             goto CLEANUP;
1746         }
1747         errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
1748                                  CL_UUID_SIZE_KHR, uuid, &extensionSize);
1749         if (CL_SUCCESS != errNum)
1750         {
1751             print_error(errNum, "clGetDeviceInfo failed\n");
1752             goto CLEANUP;
1753         }
1754         errNum =
1755             memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
1756         if (errNum == 0)
1757         {
1758             break;
1759         }
1760     }
1761     if (device_no >= num_devices)
1762     {
1763         errNum = EXIT_FAILURE;
1764         print_error(errNum,
1765                     "OpenCL error: "
1766                     "No Vulkan-OpenCL Interop capable GPU found.\n");
1767         goto CLEANUP;
1768     }
1769     deviceId = devices[device_no];
1770     context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
1771                                       NULL, NULL, &errNum);
1772     if (CL_SUCCESS != errNum)
1773     {
1774         print_error(errNum, "error creating context\n");
1775         goto CLEANUP;
1776     }
1777     log_info("Successfully created context !!!\n");
1778 
1779     cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &errNum);
1780     if (CL_SUCCESS != errNum)
1781     {
1782         errNum = CL_INVALID_COMMAND_QUEUE;
1783         print_error(errNum, "Error: Failed to create command queue!\n");
1784         goto CLEANUP;
1785     }
1786     cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &errNum);
1787     if (CL_SUCCESS != errNum)
1788     {
1789         errNum = CL_INVALID_COMMAND_QUEUE;
1790         print_error(errNum, "Error: Failed to create command queue!\n");
1791         goto CLEANUP;
1792     }
1793     log_info("clCreateCommandQueue successful\n");
1794     for (int i = 0; i < 3; i++)
1795     {
1796         program_source_length = strlen(program_source_const[i]);
1797         program[i] =
1798             clCreateProgramWithSource(context, 1, &program_source_const[i],
1799                                       &program_source_length, &errNum);
1800         errNum = clBuildProgram(program[i], 0, NULL, NULL, NULL, NULL);
1801         if (errNum != CL_SUCCESS)
1802         {
1803             print_error(errNum, "Error: Failed to build program \n");
1804             return errNum;
1805         }
1806         // create the kernel
1807         kernel[i] = clCreateKernel(program[i], "clUpdateBuffer", &errNum);
1808         if (errNum != CL_SUCCESS)
1809         {
1810             print_error(errNum, "clCreateKernel failed \n");
1811             return errNum;
1812         }
1813     }
1814 
1815     program_source_const_verify = kernel_text_verify;
1816     program_source_length = strlen(program_source_const_verify);
1817     program_verify =
1818         clCreateProgramWithSource(context, 1, &program_source_const_verify,
1819                                   &program_source_length, &errNum);
1820     errNum = clBuildProgram(program_verify, 0, NULL, NULL, NULL, NULL);
1821     if (errNum != CL_SUCCESS)
1822     {
1823         log_error("Error: Failed to build program2\n");
1824         return errNum;
1825     }
1826     verify_kernel = clCreateKernel(program_verify, "checkKernel", &errNum);
1827     if (errNum != CL_SUCCESS)
1828     {
1829         print_error(errNum, "clCreateKernel failed \n");
1830         return errNum;
1831     }
1832 
1833     if (multiCtx) // different context guard
1834     {
1835         context2 = clCreateContextFromType(
1836             contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum);
1837         if (CL_SUCCESS != errNum)
1838         {
1839             print_error(errNum, "error creating context\n");
1840             goto CLEANUP;
1841         }
1842         cmd_queue3 =
1843             clCreateCommandQueue(context2, devices[device_no], 0, &errNum);
1844         if (CL_SUCCESS != errNum)
1845         {
1846             errNum = CL_INVALID_COMMAND_QUEUE;
1847             print_error(errNum, "Error: Failed to create command queue!\n");
1848             goto CLEANUP;
1849         }
1850         for (int i = 0; i < 3; i++)
1851         {
1852             program_source_length = strlen(program_source_const[i]);
1853             program[i] =
1854                 clCreateProgramWithSource(context2, 1, &program_source_const[i],
1855                                           &program_source_length, &errNum);
1856             errNum = clBuildProgram(program[i], 0, NULL, NULL, NULL, NULL);
1857             if (errNum != CL_SUCCESS)
1858             {
1859                 print_error(errNum, "Error: Failed to build program \n");
1860                 return errNum;
1861             }
1862             // create the kernel
1863             kernel2[i] = clCreateKernel(program[i], "clUpdateBuffer", &errNum);
1864             if (errNum != CL_SUCCESS)
1865             {
1866                 print_error(errNum, "clCreateKernel failed \n");
1867                 return errNum;
1868             }
1869         }
1870         program_source_length = strlen(program_source_const_verify);
1871         program_verify =
1872             clCreateProgramWithSource(context2, 1, &program_source_const_verify,
1873                                       &program_source_length, &errNum);
1874         errNum = clBuildProgram(program_verify, 0, NULL, NULL, NULL, NULL);
1875         if (errNum != CL_SUCCESS)
1876         {
1877             log_error("Error: Failed to build program2\n");
1878             return errNum;
1879         }
1880         verify_kernel2 = clCreateKernel(program_verify, "checkKernel", &errNum);
1881         if (errNum != CL_SUCCESS)
1882         {
1883             print_error(errNum, "clCreateKernel failed \n");
1884             return errNum;
1885         }
1886     }
1887 
1888     for (size_t numBuffersIdx = 0; numBuffersIdx < ARRAY_SIZE(numBuffersList);
1889          numBuffersIdx++)
1890     {
1891         uint32_t numBuffers = numBuffersList[numBuffersIdx];
1892         log_info("Number of buffers: %d\n", numBuffers);
1893         for (size_t sizeIdx = 0; sizeIdx < ARRAY_SIZE(bufferSizeList);
1894              sizeIdx++)
1895         {
1896             uint32_t bufferSize = bufferSizeList[sizeIdx];
1897             uint32_t bufferSizeForOffset = bufferSizeListforOffset[sizeIdx];
1898             log_info("&&&& RUNNING vulkan_opencl_buffer test for Buffer size: "
1899                      "%d\n",
1900                      bufferSize);
1901             if (multiImport && !multiCtx)
1902             {
1903                 errNum = run_test_with_multi_import_same_ctx(
1904                     context, cmd_queue1, kernel, verify_kernel, vkDevice,
1905                     numBuffers, bufferSize, bufferSizeForOffset, use_fence);
1906             }
1907             else if (multiImport && multiCtx)
1908             {
1909                 errNum = run_test_with_multi_import_diff_ctx(
1910                     context, context2, cmd_queue1, cmd_queue3, kernel, kernel2,
1911                     verify_kernel, verify_kernel2, vkDevice, numBuffers,
1912                     bufferSize, bufferSizeForOffset, use_fence);
1913             }
1914             else if (numCQ == 2)
1915             {
1916                 errNum = run_test_with_two_queue(
1917                     context, cmd_queue1, cmd_queue2, kernel, verify_kernel,
1918                     vkDevice, numBuffers + 1, bufferSize, use_fence);
1919             }
1920             else
1921             {
1922                 errNum = run_test_with_one_queue(
1923                     context, cmd_queue1, kernel, verify_kernel, vkDevice,
1924                     numBuffers, bufferSize, use_fence);
1925             }
1926             if (errNum != CL_SUCCESS)
1927             {
1928                 print_error(errNum, "func_name failed \n");
1929                 goto CLEANUP;
1930             }
1931         }
1932     }
1933 
1934 CLEANUP:
1935     for (int i = 0; i < 3; i++)
1936     {
1937         if (program[i]) clReleaseProgram(program[i]);
1938         if (kernel[i]) clReleaseKernel(kernel[i]);
1939     }
1940     if (cmd_queue1) clReleaseCommandQueue(cmd_queue1);
1941     if (cmd_queue2) clReleaseCommandQueue(cmd_queue2);
1942     if (cmd_queue3) clReleaseCommandQueue(cmd_queue3);
1943     if (context) clReleaseContext(context);
1944     if (context2) clReleaseContext(context2);
1945 
1946     if (devices) free(devices);
1947     if (extensions) free(extensions);
1948 
1949     return errNum;
1950 }
1951