1 //
2 // Copyright (c) 2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include <vulkan_interop_common.hpp>
18 #include <vulkan_wrapper.hpp>
19 #include <CL/cl.h>
20 #include <CL/cl_ext.h>
21 #include <assert.h>
22 #include <vector>
23 #include <iostream>
24 #include <memory>
25 #include <string.h>
26 #include "harness/errorHelpers.h"
27
28 #define MAX_BUFFERS 5
29 #define MAX_IMPORTS 5
30 #define BUFFERSIZE 3000
31 static cl_uchar uuid[CL_UUID_SIZE_KHR];
32 static cl_device_id deviceId = NULL;
33
34 namespace {
35 struct Params
36 {
37 uint32_t numBuffers;
38 uint32_t bufferSize;
39 uint32_t interBufferOffset;
40 };
41 }
42
43 const char *kernel_text_numbuffer_1 = " \
44 __kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a) { \n\
45 int gid = get_global_id(0); \n\
46 if (gid < bufferSize) { \n\
47 a[gid]++; \n\
48 } \n\
49 }";
50
51 const char *kernel_text_numbuffer_2 = " \
52 __kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a, __global unsigned char *b) { \n\
53 int gid = get_global_id(0); \n\
54 if (gid < bufferSize) { \n\
55 a[gid]++; \n\
56 b[gid]++;\n\
57 } \n\
58 }";
59
60 const char *kernel_text_numbuffer_4 = " \
61 __kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a, __global unsigned char *b, __global unsigned char *c, __global unsigned char *d) { \n\
62 int gid = get_global_id(0); \n\
63 if (gid < bufferSize) { \n\
64 a[gid]++;\n\
65 b[gid]++; \n\
66 c[gid]++; \n\
67 d[gid]++; \n\
68 } \n\
69 }";
70
71
72 const char *kernel_text_verify = " \
73 __kernel void checkKernel(__global unsigned char *ptr, int size, int expVal, __global unsigned char *err) \n\
74 { \n\
75 int idx = get_global_id(0); \n\
76 if ((idx < size) && (*err == 0)) { \n\
77 if (ptr[idx] != expVal){ \n\
78 *err = 1; \n\
79 } \n\
80 } \n\
81 }";
82
run_test_with_two_queue(cl_context & context,cl_command_queue & cmd_queue1,cl_command_queue & cmd_queue2,cl_kernel * kernel,cl_kernel & verify_kernel,VulkanDevice & vkDevice,uint32_t numBuffers,uint32_t bufferSize,bool use_fence)83 int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
84 cl_command_queue &cmd_queue2, cl_kernel *kernel,
85 cl_kernel &verify_kernel, VulkanDevice &vkDevice,
86 uint32_t numBuffers, uint32_t bufferSize,
87 bool use_fence)
88 {
89 int err = CL_SUCCESS;
90 size_t global_work_size[1];
91 uint8_t *error_2;
92 cl_mem error_1;
93 cl_kernel update_buffer_kernel;
94 cl_kernel kernel_cq;
95 clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
96 clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
97 const char *program_source_const = kernel_text_numbuffer_2;
98 size_t program_source_length = strlen(program_source_const);
99 cl_program program = clCreateProgramWithSource(
100 context, 1, &program_source_const, &program_source_length, &err);
101 err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
102 if (err != CL_SUCCESS)
103 {
104 print_error(err, "Error: Failed to build program \n");
105 return err;
106 }
107 // create the kernel
108 kernel_cq = clCreateKernel(program, "clUpdateBuffer", &err);
109 if (err != CL_SUCCESS)
110 {
111 print_error(err, "clCreateKernel failed \n");
112 return err;
113 }
114
115 const std::vector<VulkanExternalMemoryHandleType>
116 vkExternalMemoryHandleTypeList =
117 getSupportedVulkanExternalMemoryHandleTypeList();
118 VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
119 getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
120 VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
121 VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
122 std::shared_ptr<VulkanFence> fence = nullptr;
123
124 VulkanQueue &vkQueue = vkDevice.getQueue();
125
126 std::vector<char> vkBufferShader = readFile("buffer.spv");
127
128 VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
129 VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
130 vkDescriptorSetLayoutBindingList.addBinding(
131 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
132 vkDescriptorSetLayoutBindingList.addBinding(
133 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
134 VulkanDescriptorSetLayout vkDescriptorSetLayout(
135 vkDevice, vkDescriptorSetLayoutBindingList);
136 VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
137 VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
138 vkBufferShaderModule);
139
140 VulkanDescriptorPool vkDescriptorPool(vkDevice,
141 vkDescriptorSetLayoutBindingList);
142 VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
143 vkDescriptorSetLayout);
144
145 if (use_fence)
146 {
147 fence = std::make_shared<VulkanFence>(vkDevice);
148 }
149 else
150 {
151 clVk2CLExternalSemaphore = new clExternalSemaphore(
152 vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
153 clCl2VkExternalSemaphore = new clExternalSemaphore(
154 vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
155 }
156
157 const uint32_t maxIter = innerIterations;
158 VulkanCommandPool vkCommandPool(vkDevice);
159 VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
160
161 VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
162 VulkanDeviceMemory vkParamsDeviceMemory(
163 vkDevice, vkParamsBuffer.getSize(),
164 getVulkanMemoryType(vkDevice,
165 VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
166 vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
167 std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
168 std::vector<clExternalMemory *> externalMemory;
169 for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
170 emhtIdx++)
171 {
172 VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
173 vkExternalMemoryHandleTypeList[emhtIdx];
174 log_info("External memory handle type: %d\n",
175 vkExternalMemoryHandleType);
176
177 VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
178 vkExternalMemoryHandleType);
179 const VulkanMemoryTypeList &memoryTypeList =
180 vkDummyBuffer.getMemoryTypeList();
181
182 for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
183 {
184 const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
185
186 log_info("Memory type index: %d\n", (uint32_t)memoryType);
187 log_info("Memory type property: %d\n",
188 memoryType.getMemoryTypeProperty());
189
190 VulkanBufferList vkBufferList(numBuffers, vkDevice, bufferSize,
191 vkExternalMemoryHandleType);
192
193 for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
194 {
195 vkBufferListDeviceMemory.push_back(new VulkanDeviceMemory(
196 vkDevice, vkBufferList[bIdx], memoryType,
197 vkExternalMemoryHandleType));
198 externalMemory.push_back(new clExternalMemory(
199 vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType,
200 0, bufferSize, context, deviceId));
201 }
202 cl_mem buffers[MAX_BUFFERS];
203 clFinish(cmd_queue1);
204 Params *params = (Params *)vkParamsDeviceMemory.map();
205 params->numBuffers = numBuffers;
206 params->bufferSize = bufferSize;
207 params->interBufferOffset = 0;
208 vkParamsDeviceMemory.unmap();
209 vkDescriptorSet.update(0, vkParamsBuffer);
210 for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
211 {
212 size_t buffer_size = vkBufferList[bIdx].getSize();
213 vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx],
214 0);
215 buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer();
216 }
217 vkDescriptorSet.updateArray(1, numBuffers, vkBufferList);
218 vkCommandBuffer.begin();
219 vkCommandBuffer.bindPipeline(vkComputePipeline);
220 vkCommandBuffer.bindDescriptorSets(
221 vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
222 vkCommandBuffer.dispatch(512, 1, 1);
223 vkCommandBuffer.end();
224
225 if (vkBufferList.size() == 2)
226 {
227 update_buffer_kernel = kernel[0];
228 }
229 else if (vkBufferList.size() == 3)
230 {
231 update_buffer_kernel = kernel[1];
232 }
233 else if (vkBufferList.size() == 5)
234 {
235 update_buffer_kernel = kernel[2];
236 }
237 // global work size should be less than or equal to
238 // bufferSizeList[i]
239 global_work_size[0] = bufferSize;
240 for (uint32_t iter = 0; iter < maxIter; iter++)
241 {
242
243 if (use_fence)
244 {
245 fence->reset();
246 vkQueue.submit(vkCommandBuffer, fence);
247 fence->wait();
248 }
249 else
250 {
251 if (iter == 0)
252 {
253 vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
254 }
255 else
256 {
257 vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
258 vkVk2CLSemaphore);
259 }
260
261 clVk2CLExternalSemaphore->wait(cmd_queue1);
262 }
263
264
265 err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
266 (void *)&bufferSize);
267 err |= clSetKernelArg(kernel_cq, 0, sizeof(uint32_t),
268 (void *)&bufferSize);
269 err |= clSetKernelArg(kernel_cq, 1, sizeof(cl_mem),
270 (void *)&(buffers[0]));
271
272 for (int i = 0; i < vkBufferList.size() - 1; i++)
273 {
274 err |=
275 clSetKernelArg(update_buffer_kernel, i + 1,
276 sizeof(cl_mem), (void *)&(buffers[i]));
277 }
278
279 err |=
280 clSetKernelArg(kernel_cq, 2, sizeof(cl_mem),
281 (void *)&(buffers[vkBufferList.size() - 1]));
282
283 if (err != CL_SUCCESS)
284 {
285 print_error(err,
286 "Error: Failed to set arg values for kernel\n");
287 goto CLEANUP;
288 }
289 cl_event first_launch;
290
291 err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel,
292 1, NULL, global_work_size, NULL, 0,
293 NULL, &first_launch);
294 if (err != CL_SUCCESS)
295 {
296 print_error(err,
297 "Error: Failed to launch update_buffer_kernel,"
298 "error\n");
299 goto CLEANUP;
300 }
301
302 err = clEnqueueNDRangeKernel(cmd_queue2, kernel_cq, 1, NULL,
303 global_work_size, NULL, 1,
304 &first_launch, NULL);
305 if (err != CL_SUCCESS)
306 {
307 print_error(err,
308 "Error: Failed to launch update_buffer_kernel,"
309 "error\n");
310 goto CLEANUP;
311 }
312
313 if (use_fence)
314 {
315 clFlush(cmd_queue1);
316 clFlush(cmd_queue2);
317 clFinish(cmd_queue1);
318 clFinish(cmd_queue2);
319 }
320 else if (!use_fence && iter != (maxIter - 1))
321 {
322 clCl2VkExternalSemaphore->signal(cmd_queue2);
323 }
324 }
325 error_2 = (uint8_t *)malloc(sizeof(uint8_t));
326 if (NULL == error_2)
327 {
328 log_error("Not able to allocate memory\n");
329 goto CLEANUP;
330 }
331 clFinish(cmd_queue2);
332 error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
333 sizeof(uint8_t), NULL, &err);
334 if (CL_SUCCESS != err)
335 {
336 print_error(err, "Error: clCreateBuffer \n");
337 goto CLEANUP;
338 }
339 uint8_t val = 0;
340 err = clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
341 sizeof(uint8_t), &val, 0, NULL, NULL);
342 if (err != CL_SUCCESS)
343 {
344 print_error(err, "Error: Failed read output, error\n");
345 goto CLEANUP;
346 }
347
348 int calc_max_iter;
349 for (int i = 0; i < vkBufferList.size(); i++)
350 {
351 if (i == 0)
352 calc_max_iter = (maxIter * 3);
353 else
354 calc_max_iter = (maxIter * 2);
355 err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
356 (void *)&(buffers[i]));
357 err |=
358 clSetKernelArg(verify_kernel, 1, sizeof(int), &bufferSize);
359 err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
360 &calc_max_iter);
361 err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
362 (void *)&error_1);
363 if (err != CL_SUCCESS)
364 {
365 print_error(err,
366 "Error: Failed to set arg values for "
367 "verify_kernel \n");
368 goto CLEANUP;
369 }
370 err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, NULL,
371 global_work_size, NULL, 0, NULL,
372 NULL);
373
374 if (err != CL_SUCCESS)
375 {
376 print_error(err,
377 "Error: Failed to launch verify_kernel,"
378 "error \n");
379 goto CLEANUP;
380 }
381 err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
382 sizeof(uint8_t), error_2, 0, NULL,
383 NULL);
384 if (err != CL_SUCCESS)
385 {
386 print_error(err, "Error: Failed read output, error \n ");
387 goto CLEANUP;
388 }
389 if (*error_2 == 1)
390 {
391 log_error("&&&& vulkan_opencl_buffer test FAILED\n");
392 goto CLEANUP;
393 }
394 }
395 for (size_t i = 0; i < vkBufferList.size(); i++)
396 {
397 delete vkBufferListDeviceMemory[i];
398 delete externalMemory[i];
399 }
400 vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
401 vkBufferListDeviceMemory.begin()
402 + numBuffers);
403 externalMemory.erase(externalMemory.begin(),
404 externalMemory.begin() + numBuffers);
405 }
406 }
407 CLEANUP:
408 for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
409 {
410 if (vkBufferListDeviceMemory[i])
411 {
412 delete vkBufferListDeviceMemory[i];
413 }
414 if (externalMemory[i])
415 {
416 delete externalMemory[i];
417 }
418 }
419 if (program) clReleaseProgram(program);
420 if (kernel_cq) clReleaseKernel(kernel_cq);
421 if (!use_fence)
422 {
423 if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
424 if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
425 }
426 if (error_2) free(error_2);
427 if (error_1) clReleaseMemObject(error_1);
428
429 return err;
430 }
431
run_test_with_one_queue(cl_context & context,cl_command_queue & cmd_queue1,cl_kernel * kernel,cl_kernel & verify_kernel,VulkanDevice & vkDevice,uint32_t numBuffers,uint32_t bufferSize,bool use_fence)432 int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
433 cl_kernel *kernel, cl_kernel &verify_kernel,
434 VulkanDevice &vkDevice, uint32_t numBuffers,
435 uint32_t bufferSize, bool use_fence)
436 {
437 log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
438 size_t global_work_size[1];
439 uint8_t *error_2;
440 cl_mem error_1;
441 cl_kernel update_buffer_kernel;
442 clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
443 clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
444 int err = CL_SUCCESS;
445
446 const std::vector<VulkanExternalMemoryHandleType>
447 vkExternalMemoryHandleTypeList =
448 getSupportedVulkanExternalMemoryHandleTypeList();
449 VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
450 getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
451 VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
452 VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
453 std::shared_ptr<VulkanFence> fence = nullptr;
454
455 VulkanQueue &vkQueue = vkDevice.getQueue();
456
457 std::vector<char> vkBufferShader = readFile("buffer.spv");
458 VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
459 VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
460 vkDescriptorSetLayoutBindingList.addBinding(
461 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
462 vkDescriptorSetLayoutBindingList.addBinding(
463 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
464 VulkanDescriptorSetLayout vkDescriptorSetLayout(
465 vkDevice, vkDescriptorSetLayoutBindingList);
466 VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
467 VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
468 vkBufferShaderModule);
469
470 VulkanDescriptorPool vkDescriptorPool(vkDevice,
471 vkDescriptorSetLayoutBindingList);
472 VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
473 vkDescriptorSetLayout);
474
475 if (use_fence)
476 {
477 fence = std::make_shared<VulkanFence>(vkDevice);
478 }
479 else
480 {
481 clVk2CLExternalSemaphore = new clExternalSemaphore(
482 vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
483 clCl2VkExternalSemaphore = new clExternalSemaphore(
484 vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
485 }
486
487 const uint32_t maxIter = innerIterations;
488 VulkanCommandPool vkCommandPool(vkDevice);
489 VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
490
491 VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
492 VulkanDeviceMemory vkParamsDeviceMemory(
493 vkDevice, vkParamsBuffer.getSize(),
494 getVulkanMemoryType(vkDevice,
495 VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
496 vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
497 std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
498 std::vector<clExternalMemory *> externalMemory;
499
500 for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
501 emhtIdx++)
502 {
503 VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
504 vkExternalMemoryHandleTypeList[emhtIdx];
505 log_info("External memory handle type: %d\n",
506 vkExternalMemoryHandleType);
507
508 VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
509 vkExternalMemoryHandleType);
510 const VulkanMemoryTypeList &memoryTypeList =
511 vkDummyBuffer.getMemoryTypeList();
512
513 for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
514 {
515 const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
516
517 log_info("Memory type index: %d\n", (uint32_t)memoryType);
518 log_info("Memory type property: %d\n",
519 memoryType.getMemoryTypeProperty());
520
521 VulkanBufferList vkBufferList(numBuffers, vkDevice, bufferSize,
522 vkExternalMemoryHandleType);
523
524 for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
525 {
526 vkBufferListDeviceMemory.push_back(new VulkanDeviceMemory(
527 vkDevice, vkBufferList[bIdx], memoryType,
528 vkExternalMemoryHandleType));
529 externalMemory.push_back(new clExternalMemory(
530 vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType,
531 0, bufferSize, context, deviceId));
532 }
533 cl_mem buffers[4];
534 clFinish(cmd_queue1);
535 Params *params = (Params *)vkParamsDeviceMemory.map();
536 params->numBuffers = numBuffers;
537 params->bufferSize = bufferSize;
538 params->interBufferOffset = 0;
539 vkParamsDeviceMemory.unmap();
540 vkDescriptorSet.update(0, vkParamsBuffer);
541 for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
542 {
543 size_t buffer_size = vkBufferList[bIdx].getSize();
544 vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx],
545 0);
546 buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer();
547 }
548 vkDescriptorSet.updateArray(1, vkBufferList.size(), vkBufferList);
549
550 vkCommandBuffer.begin();
551 vkCommandBuffer.bindPipeline(vkComputePipeline);
552 vkCommandBuffer.bindDescriptorSets(
553 vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
554 vkCommandBuffer.dispatch(512, 1, 1);
555 vkCommandBuffer.end();
556
557 if (vkBufferList.size() == 1)
558 {
559 update_buffer_kernel = kernel[0];
560 }
561 else if (vkBufferList.size() == 2)
562 {
563 update_buffer_kernel = kernel[1];
564 }
565 else if (vkBufferList.size() == 4)
566 {
567 update_buffer_kernel = kernel[2];
568 }
569
570 // global work size should be less than or equal to
571 // bufferSizeList[i]
572 global_work_size[0] = bufferSize;
573
574 for (uint32_t iter = 0; iter < maxIter; iter++)
575 {
576 if (use_fence)
577 {
578 fence->reset();
579 vkQueue.submit(vkCommandBuffer, fence);
580 fence->wait();
581 }
582 else
583 {
584 if (iter == 0)
585 {
586 vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
587 }
588 else
589 {
590 vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
591 vkVk2CLSemaphore);
592 }
593
594 clVk2CLExternalSemaphore->wait(cmd_queue1);
595 }
596
597 err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
598 (void *)&bufferSize);
599 for (int i = 0; i < vkBufferList.size(); i++)
600 {
601 err |=
602 clSetKernelArg(update_buffer_kernel, i + 1,
603 sizeof(cl_mem), (void *)&(buffers[i]));
604 }
605
606 if (err != CL_SUCCESS)
607 {
608 print_error(err,
609 "Error: Failed to set arg values for kernel\n");
610 goto CLEANUP;
611 }
612 err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel,
613 1, NULL, global_work_size, NULL, 0,
614 NULL, NULL);
615 if (err != CL_SUCCESS)
616 {
617 print_error(err,
618 "Error: Failed to launch update_buffer_kernel,"
619 " error\n");
620 goto CLEANUP;
621 }
622 if (use_fence)
623 {
624 clFlush(cmd_queue1);
625 clFinish(cmd_queue1);
626 }
627 else if (!use_fence && (iter != (maxIter - 1)))
628 {
629 clCl2VkExternalSemaphore->signal(cmd_queue1);
630 }
631 }
632 error_2 = (uint8_t *)malloc(sizeof(uint8_t));
633 if (NULL == error_2)
634 {
635 log_error("Not able to allocate memory\n");
636 goto CLEANUP;
637 }
638
639 error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
640 sizeof(uint8_t), NULL, &err);
641 if (CL_SUCCESS != err)
642 {
643 print_error(err, "Error: clCreateBuffer \n");
644 goto CLEANUP;
645 }
646 uint8_t val = 0;
647 err = clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
648 sizeof(uint8_t), &val, 0, NULL, NULL);
649 if (CL_SUCCESS != err)
650 {
651 print_error(err, "Error: clEnqueueWriteBuffer \n");
652 goto CLEANUP;
653 }
654
655 int calc_max_iter = (maxIter * 2);
656 for (int i = 0; i < vkBufferList.size(); i++)
657 {
658 err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
659 (void *)&(buffers[i]));
660 err |=
661 clSetKernelArg(verify_kernel, 1, sizeof(int), &bufferSize);
662 err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
663 &calc_max_iter);
664 err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
665 (void *)&error_1);
666 if (err != CL_SUCCESS)
667 {
668 print_error(
669 err,
670 "Error: Failed to set arg values for verify_kernel \n");
671 goto CLEANUP;
672 }
673 err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, NULL,
674 global_work_size, NULL, 0, NULL,
675 NULL);
676 if (err != CL_SUCCESS)
677 {
678 print_error(
679 err, "Error: Failed to launch verify_kernel, error\n");
680 goto CLEANUP;
681 }
682
683 err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
684 sizeof(uint8_t), error_2, 0, NULL,
685 NULL);
686 if (err != CL_SUCCESS)
687 {
688 print_error(err, "Error: Failed read output, error \n");
689 goto CLEANUP;
690 }
691 if (*error_2 == 1)
692 {
693 log_error("&&&& vulkan_opencl_buffer test FAILED\n");
694 goto CLEANUP;
695 }
696 }
697 for (size_t i = 0; i < vkBufferList.size(); i++)
698 {
699 delete vkBufferListDeviceMemory[i];
700 delete externalMemory[i];
701 }
702 vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
703 vkBufferListDeviceMemory.begin()
704 + numBuffers);
705 externalMemory.erase(externalMemory.begin(),
706 externalMemory.begin() + numBuffers);
707 }
708 }
709 CLEANUP:
710 for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
711 {
712 if (vkBufferListDeviceMemory[i])
713 {
714 delete vkBufferListDeviceMemory[i];
715 }
716 if (externalMemory[i])
717 {
718 delete externalMemory[i];
719 }
720 }
721
722 if (!use_fence)
723 {
724 if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
725 if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
726 }
727
728 if (error_2) free(error_2);
729 if (error_1) clReleaseMemObject(error_1);
730 return err;
731 }
732
run_test_with_multi_import_same_ctx(cl_context & context,cl_command_queue & cmd_queue1,cl_kernel * kernel,cl_kernel & verify_kernel,VulkanDevice & vkDevice,uint32_t numBuffers,uint32_t bufferSize,uint32_t bufferSizeForOffset,float use_fence)733 int run_test_with_multi_import_same_ctx(
734 cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel,
735 cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers,
736 uint32_t bufferSize, uint32_t bufferSizeForOffset, float use_fence)
737 {
738 size_t global_work_size[1];
739 uint8_t *error_2;
740 cl_mem error_1;
741 int numImports = numBuffers;
742 cl_kernel update_buffer_kernel[MAX_IMPORTS];
743 clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
744 clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
745 int err = CL_SUCCESS;
746 int calc_max_iter;
747 bool withOffset;
748 uint32_t pBufferSize;
749
750 const std::vector<VulkanExternalMemoryHandleType>
751 vkExternalMemoryHandleTypeList =
752 getSupportedVulkanExternalMemoryHandleTypeList();
753 VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
754 getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
755 VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
756 VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
757 std::shared_ptr<VulkanFence> fence = nullptr;
758
759 VulkanQueue &vkQueue = vkDevice.getQueue();
760
761 std::vector<char> vkBufferShader = readFile("buffer.spv");
762
763 VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
764 VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
765 vkDescriptorSetLayoutBindingList.addBinding(
766 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
767 vkDescriptorSetLayoutBindingList.addBinding(
768 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
769 VulkanDescriptorSetLayout vkDescriptorSetLayout(
770 vkDevice, vkDescriptorSetLayoutBindingList);
771 VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
772 VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
773 vkBufferShaderModule);
774
775 VulkanDescriptorPool vkDescriptorPool(vkDevice,
776 vkDescriptorSetLayoutBindingList);
777 VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
778 vkDescriptorSetLayout);
779
780 if (use_fence)
781 {
782 fence = std::make_shared<VulkanFence>(vkDevice);
783 }
784 else
785 {
786 clVk2CLExternalSemaphore = new clExternalSemaphore(
787 vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
788 clCl2VkExternalSemaphore = new clExternalSemaphore(
789 vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
790 }
791
792 const uint32_t maxIter = innerIterations;
793 VulkanCommandPool vkCommandPool(vkDevice);
794 VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
795
796 VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
797 VulkanDeviceMemory vkParamsDeviceMemory(
798 vkDevice, vkParamsBuffer.getSize(),
799 getVulkanMemoryType(vkDevice,
800 VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
801 vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
802 std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
803 std::vector<std::vector<clExternalMemory *>> externalMemory;
804
805
806 for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
807 emhtIdx++)
808 {
809 VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
810 vkExternalMemoryHandleTypeList[emhtIdx];
811 log_info("External memory handle type: %d\n",
812 vkExternalMemoryHandleType);
813
814 VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
815 vkExternalMemoryHandleType);
816 const VulkanMemoryTypeList &memoryTypeList =
817 vkDummyBuffer.getMemoryTypeList();
818
819 for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
820 {
821 const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
822
823 log_info("Memory type index: %d\n", (uint32_t)memoryType);
824 log_info("Memory type property: %d\n",
825 memoryType.getMemoryTypeProperty());
826 for (unsigned int withOffset = 0;
827 withOffset <= (unsigned int)enableOffset; withOffset++)
828 {
829 log_info("Running withOffset case %d\n", (uint32_t)withOffset);
830 if (withOffset)
831 {
832 pBufferSize = bufferSizeForOffset;
833 }
834 else
835 {
836 pBufferSize = bufferSize;
837 }
838 cl_mem buffers[MAX_BUFFERS][MAX_IMPORTS];
839 VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize,
840 vkExternalMemoryHandleType);
841 uint32_t interBufferOffset =
842 (uint32_t)(vkBufferList[0].getSize());
843
844 for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
845 {
846 if (withOffset == 0)
847 {
848 vkBufferListDeviceMemory.push_back(
849 new VulkanDeviceMemory(vkDevice, vkBufferList[bIdx],
850 memoryType,
851 vkExternalMemoryHandleType));
852 }
853 if (withOffset == 1)
854 {
855 uint32_t totalSize =
856 (uint32_t)(vkBufferList.size() * interBufferOffset);
857 vkBufferListDeviceMemory.push_back(
858 new VulkanDeviceMemory(vkDevice, totalSize,
859 memoryType,
860 vkExternalMemoryHandleType));
861 }
862 std::vector<clExternalMemory *> pExternalMemory;
863 for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
864 {
865 pExternalMemory.push_back(new clExternalMemory(
866 vkBufferListDeviceMemory[bIdx],
867 vkExternalMemoryHandleType,
868 withOffset * bIdx * interBufferOffset, pBufferSize,
869 context, deviceId));
870 }
871 externalMemory.push_back(pExternalMemory);
872 }
873
874 clFinish(cmd_queue1);
875 Params *params = (Params *)vkParamsDeviceMemory.map();
876 params->numBuffers = numBuffers;
877 params->bufferSize = pBufferSize;
878 params->interBufferOffset = interBufferOffset * withOffset;
879 vkParamsDeviceMemory.unmap();
880 vkDescriptorSet.update(0, vkParamsBuffer);
881 for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
882 {
883 size_t buffer_size = vkBufferList[bIdx].getSize();
884 vkBufferListDeviceMemory[bIdx]->bindBuffer(
885 vkBufferList[bIdx],
886 bIdx * interBufferOffset * withOffset);
887 for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
888 {
889 buffers[bIdx][cl_bIdx] =
890 externalMemory[bIdx][cl_bIdx]
891 ->getExternalMemoryBuffer();
892 }
893 }
894 vkDescriptorSet.updateArray(1, numBuffers, vkBufferList);
895 vkCommandBuffer.begin();
896 vkCommandBuffer.bindPipeline(vkComputePipeline);
897 vkCommandBuffer.bindDescriptorSets(
898 vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
899 vkCommandBuffer.dispatch(512, 1, 1);
900 vkCommandBuffer.end();
901 for (int i = 0; i < numImports; i++)
902 {
903 update_buffer_kernel[i] = (numBuffers == 1)
904 ? kernel[0]
905 : ((numBuffers == 2) ? kernel[1] : kernel[2]);
906 }
907 // global work size should be less than or equal to
908 // bufferSizeList[i]
909 global_work_size[0] = pBufferSize;
910
911 for (uint32_t iter = 0; iter < maxIter; iter++)
912 {
913 if (use_fence)
914 {
915 fence->reset();
916 vkQueue.submit(vkCommandBuffer, fence);
917 fence->wait();
918 }
919 else
920 {
921 if (iter == 0)
922 {
923 vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
924 }
925 else
926 {
927 vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
928 vkVk2CLSemaphore);
929 }
930 }
931
932 if (use_fence)
933 {
934 fence->wait();
935 }
936 else
937 {
938 clVk2CLExternalSemaphore->wait(cmd_queue1);
939 }
940
941 for (uint8_t launchIter = 0; launchIter < numImports;
942 launchIter++)
943 {
944 err = clSetKernelArg(update_buffer_kernel[launchIter],
945 0, sizeof(uint32_t),
946 (void *)&pBufferSize);
947 for (int i = 0; i < numBuffers; i++)
948 {
949 err |= clSetKernelArg(
950 update_buffer_kernel[launchIter], i + 1,
951 sizeof(cl_mem),
952 (void *)&(buffers[i][launchIter]));
953 }
954
955 if (err != CL_SUCCESS)
956 {
957 print_error(err,
958 "Error: Failed to set arg values for "
959 "kernel\n ");
960 goto CLEANUP;
961 }
962 err = clEnqueueNDRangeKernel(
963 cmd_queue1, update_buffer_kernel[launchIter], 1,
964 NULL, global_work_size, NULL, 0, NULL, NULL);
965 if (err != CL_SUCCESS)
966 {
967 print_error(err,
968 "Error: Failed to launch "
969 "update_buffer_kernel, error\n ");
970 goto CLEANUP;
971 }
972 }
973 if (use_fence)
974 {
975 clFinish(cmd_queue1);
976 }
977 else if (!use_fence && iter != (maxIter - 1))
978 {
979 clCl2VkExternalSemaphore->signal(cmd_queue1);
980 }
981 }
982 error_2 = (uint8_t *)malloc(sizeof(uint8_t));
983 if (NULL == error_2)
984 {
985 log_error("Not able to allocate memory\n");
986 goto CLEANUP;
987 }
988
989 error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
990 sizeof(uint8_t), NULL, &err);
991 if (CL_SUCCESS != err)
992 {
993 print_error(err, "Error: clCreateBuffer \n");
994 goto CLEANUP;
995 }
996 uint8_t val = 0;
997 err =
998 clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
999 sizeof(uint8_t), &val, 0, NULL, NULL);
1000 if (CL_SUCCESS != err)
1001 {
1002 print_error(err, "Error: clEnqueueWriteBuffer \n");
1003 goto CLEANUP;
1004 }
1005 calc_max_iter = maxIter * (numBuffers + 1);
1006
1007 for (int i = 0; i < vkBufferList.size(); i++)
1008 {
1009 err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
1010 (void *)&(buffers[i][0]));
1011 err |= clSetKernelArg(verify_kernel, 1, sizeof(int),
1012 &pBufferSize);
1013 err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
1014 &calc_max_iter);
1015 err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
1016 (void *)&error_1);
1017 if (err != CL_SUCCESS)
1018 {
1019 print_error(err,
1020 "Error: Failed to set arg values for "
1021 "verify_kernel \n");
1022 goto CLEANUP;
1023 }
1024 err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1,
1025 NULL, global_work_size, NULL,
1026 0, NULL, NULL);
1027 if (err != CL_SUCCESS)
1028 {
1029 print_error(
1030 err,
1031 "Error: Failed to launch verify_kernel, error\n");
1032 goto CLEANUP;
1033 }
1034
1035 err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
1036 sizeof(uint8_t), error_2, 0, NULL,
1037 NULL);
1038 if (err != CL_SUCCESS)
1039 {
1040 print_error(err, "Error: Failed read output, error \n");
1041 goto CLEANUP;
1042 }
1043 if (*error_2 == 1)
1044 {
1045 log_error("&&&& vulkan_opencl_buffer test FAILED\n");
1046 goto CLEANUP;
1047 }
1048 }
1049 for (size_t i = 0; i < vkBufferList.size(); i++)
1050 {
1051 for (size_t j = 0; j < numImports; j++)
1052 {
1053 delete externalMemory[i][j];
1054 }
1055 }
1056 for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
1057 {
1058 delete vkBufferListDeviceMemory[i];
1059 }
1060 vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
1061 vkBufferListDeviceMemory.end());
1062 for (size_t i = 0; i < externalMemory.size(); i++)
1063 {
1064 externalMemory[i].erase(externalMemory[i].begin(),
1065 externalMemory[i].begin()
1066 + numBuffers);
1067 }
1068 externalMemory.clear();
1069 }
1070 }
1071 }
1072 CLEANUP:
1073 for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
1074 {
1075 if (vkBufferListDeviceMemory[i])
1076 {
1077 delete vkBufferListDeviceMemory[i];
1078 }
1079 }
1080 for (size_t i = 0; i < externalMemory.size(); i++)
1081 {
1082 for (size_t j = 0; j < externalMemory[i].size(); j++)
1083 {
1084 if (externalMemory[i][j])
1085 {
1086 delete externalMemory[i][j];
1087 }
1088 }
1089 }
1090
1091 if (!use_fence)
1092 {
1093 if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
1094 if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
1095 }
1096
1097 if (error_2) free(error_2);
1098 if (error_1) clReleaseMemObject(error_1);
1099 return err;
1100 }
1101
run_test_with_multi_import_diff_ctx(cl_context & context,cl_context & context2,cl_command_queue & cmd_queue1,cl_command_queue & cmd_queue2,cl_kernel * kernel1,cl_kernel * kernel2,cl_kernel & verify_kernel,cl_kernel verify_kernel2,VulkanDevice & vkDevice,uint32_t numBuffers,uint32_t bufferSize,uint32_t bufferSizeForOffset,float use_fence)1102 int run_test_with_multi_import_diff_ctx(
1103 cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1,
1104 cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2,
1105 cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice,
1106 uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset,
1107 float use_fence)
1108 {
1109 size_t global_work_size[1];
1110 uint8_t *error_3;
1111 cl_mem error_1;
1112 cl_mem error_2;
1113 int numImports = numBuffers;
1114 cl_kernel update_buffer_kernel1[MAX_IMPORTS];
1115 cl_kernel update_buffer_kernel2[MAX_IMPORTS];
1116 clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
1117 clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
1118 clExternalSemaphore *clVk2CLExternalSemaphore2 = NULL;
1119 clExternalSemaphore *clCl2VkExternalSemaphore2 = NULL;
1120 int err = CL_SUCCESS;
1121 int calc_max_iter;
1122 bool withOffset;
1123 uint32_t pBufferSize;
1124
1125 const std::vector<VulkanExternalMemoryHandleType>
1126 vkExternalMemoryHandleTypeList =
1127 getSupportedVulkanExternalMemoryHandleTypeList();
1128 VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
1129 getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
1130 VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
1131 VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
1132 std::shared_ptr<VulkanFence> fence = nullptr;
1133
1134 VulkanQueue &vkQueue = vkDevice.getQueue();
1135
1136 std::vector<char> vkBufferShader = readFile("buffer.spv");
1137
1138 VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
1139 VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
1140 MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
1141 VulkanDescriptorSetLayout vkDescriptorSetLayout(
1142 vkDevice, vkDescriptorSetLayoutBindingList);
1143 VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
1144 VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
1145 vkBufferShaderModule);
1146
1147 VulkanDescriptorPool vkDescriptorPool(vkDevice,
1148 vkDescriptorSetLayoutBindingList);
1149 VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
1150 vkDescriptorSetLayout);
1151
1152 if (use_fence)
1153 {
1154 fence = std::make_shared<VulkanFence>(vkDevice);
1155 }
1156 else
1157 {
1158 clVk2CLExternalSemaphore = new clExternalSemaphore(
1159 vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
1160 clCl2VkExternalSemaphore = new clExternalSemaphore(
1161 vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
1162
1163 clVk2CLExternalSemaphore2 =
1164 new clExternalSemaphore(vkVk2CLSemaphore, context2,
1165 vkExternalSemaphoreHandleType, deviceId);
1166 clCl2VkExternalSemaphore2 =
1167 new clExternalSemaphore(vkCl2VkSemaphore, context2,
1168 vkExternalSemaphoreHandleType, deviceId);
1169 }
1170
1171 const uint32_t maxIter = innerIterations;
1172 VulkanCommandPool vkCommandPool(vkDevice);
1173 VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
1174
1175 VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
1176 VulkanDeviceMemory vkParamsDeviceMemory(
1177 vkDevice, vkParamsBuffer.getSize(),
1178 getVulkanMemoryType(vkDevice,
1179 VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
1180 vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
1181 std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
1182 std::vector<std::vector<clExternalMemory *>> externalMemory1;
1183 std::vector<std::vector<clExternalMemory *>> externalMemory2;
1184
1185 for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
1186 emhtIdx++)
1187 {
1188 VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
1189 vkExternalMemoryHandleTypeList[emhtIdx];
1190 log_info("External memory handle type:%d\n",
1191 vkExternalMemoryHandleType);
1192
1193 VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
1194 vkExternalMemoryHandleType);
1195 const VulkanMemoryTypeList &memoryTypeList =
1196 vkDummyBuffer.getMemoryTypeList();
1197
1198 for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
1199 {
1200 const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
1201
1202 log_info("Memory type index: %d\n", (uint32_t)memoryType);
1203 log_info("Memory type property: %d\n",
1204 memoryType.getMemoryTypeProperty());
1205
1206 for (unsigned int withOffset = 0;
1207 withOffset <= (unsigned int)enableOffset; withOffset++)
1208 {
1209 log_info("Running withOffset case %d\n", (uint32_t)withOffset);
1210 cl_mem buffers1[MAX_BUFFERS][MAX_IMPORTS];
1211 cl_mem buffers2[MAX_BUFFERS][MAX_IMPORTS];
1212 if (withOffset)
1213 {
1214 pBufferSize = bufferSizeForOffset;
1215 }
1216 else
1217 {
1218 pBufferSize = bufferSize;
1219 }
1220 VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize,
1221 vkExternalMemoryHandleType);
1222 uint32_t interBufferOffset =
1223 (uint32_t)(vkBufferList[0].getSize());
1224
1225 for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
1226 {
1227 if (withOffset == 0)
1228 {
1229 vkBufferListDeviceMemory.push_back(
1230 new VulkanDeviceMemory(vkDevice, pBufferSize,
1231 memoryType,
1232 vkExternalMemoryHandleType));
1233 }
1234 if (withOffset == 1)
1235 {
1236 uint32_t totalSize =
1237 (uint32_t)(vkBufferList.size() * interBufferOffset);
1238 vkBufferListDeviceMemory.push_back(
1239 new VulkanDeviceMemory(vkDevice, totalSize,
1240 memoryType,
1241 vkExternalMemoryHandleType));
1242 }
1243 std::vector<clExternalMemory *> pExternalMemory1;
1244 std::vector<clExternalMemory *> pExternalMemory2;
1245 for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
1246 {
1247 pExternalMemory1.push_back(new clExternalMemory(
1248 vkBufferListDeviceMemory[bIdx],
1249 vkExternalMemoryHandleType,
1250 withOffset * bIdx * interBufferOffset, pBufferSize,
1251 context, deviceId));
1252 pExternalMemory2.push_back(new clExternalMemory(
1253 vkBufferListDeviceMemory[bIdx],
1254 vkExternalMemoryHandleType,
1255 withOffset * bIdx * interBufferOffset, pBufferSize,
1256 context2, deviceId));
1257 }
1258 externalMemory1.push_back(pExternalMemory1);
1259 externalMemory2.push_back(pExternalMemory2);
1260 }
1261
1262 clFinish(cmd_queue1);
1263 Params *params = (Params *)vkParamsDeviceMemory.map();
1264 params->numBuffers = numBuffers;
1265 params->bufferSize = pBufferSize;
1266 params->interBufferOffset = interBufferOffset * withOffset;
1267 vkParamsDeviceMemory.unmap();
1268 vkDescriptorSet.update(0, vkParamsBuffer);
1269 for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
1270 {
1271 size_t buffer_size = vkBufferList[bIdx].getSize();
1272 vkBufferListDeviceMemory[bIdx]->bindBuffer(
1273 vkBufferList[bIdx],
1274 bIdx * interBufferOffset * withOffset);
1275 for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
1276 {
1277 buffers1[bIdx][cl_bIdx] =
1278 externalMemory1[bIdx][cl_bIdx]
1279 ->getExternalMemoryBuffer();
1280 buffers2[bIdx][cl_bIdx] =
1281 externalMemory2[bIdx][cl_bIdx]
1282 ->getExternalMemoryBuffer();
1283 }
1284 vkDescriptorSet.update((uint32_t)bIdx + 1,
1285 vkBufferList[bIdx]);
1286 }
1287
1288 vkCommandBuffer.begin();
1289 vkCommandBuffer.bindPipeline(vkComputePipeline);
1290 vkCommandBuffer.bindDescriptorSets(
1291 vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
1292 vkCommandBuffer.dispatch(512, 1, 1);
1293 vkCommandBuffer.end();
1294
1295 for (int i = 0; i < numImports; i++)
1296 {
1297 update_buffer_kernel1[i] = (numBuffers == 1)
1298 ? kernel1[0]
1299 : ((numBuffers == 2) ? kernel1[1] : kernel1[2]);
1300 update_buffer_kernel2[i] = (numBuffers == 1)
1301 ? kernel2[0]
1302 : ((numBuffers == 2) ? kernel2[1] : kernel2[2]);
1303 }
1304
1305 // global work size should be less than or equal
1306 // to bufferSizeList[i]
1307 global_work_size[0] = pBufferSize;
1308
1309 for (uint32_t iter = 0; iter < maxIter; iter++)
1310 {
1311 if (use_fence)
1312 {
1313 fence->reset();
1314 vkQueue.submit(vkCommandBuffer, fence);
1315 fence->wait();
1316 }
1317 else
1318 {
1319 if (iter == 0)
1320 {
1321 vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
1322 }
1323 else
1324 {
1325 vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
1326 vkVk2CLSemaphore);
1327 }
1328 }
1329
1330 if (use_fence)
1331 {
1332 fence->wait();
1333 }
1334 else
1335 {
1336 clVk2CLExternalSemaphore->wait(cmd_queue1);
1337 }
1338
1339 for (uint8_t launchIter = 0; launchIter < numImports;
1340 launchIter++)
1341 {
1342 err = clSetKernelArg(update_buffer_kernel1[launchIter],
1343 0, sizeof(uint32_t),
1344 (void *)&pBufferSize);
1345 for (int i = 0; i < numBuffers; i++)
1346 {
1347 err |= clSetKernelArg(
1348 update_buffer_kernel1[launchIter], i + 1,
1349 sizeof(cl_mem),
1350 (void *)&(buffers1[i][launchIter]));
1351 }
1352
1353 if (err != CL_SUCCESS)
1354 {
1355 print_error(err,
1356 "Error: Failed to set arg values for "
1357 "kernel\n ");
1358 goto CLEANUP;
1359 }
1360 err = clEnqueueNDRangeKernel(
1361 cmd_queue1, update_buffer_kernel1[launchIter], 1,
1362 NULL, global_work_size, NULL, 0, NULL, NULL);
1363 if (err != CL_SUCCESS)
1364 {
1365 print_error(err,
1366 "Error: Failed to launch "
1367 "update_buffer_kernel, error\n");
1368 goto CLEANUP;
1369 }
1370 }
1371 if (use_fence)
1372 {
1373 clFinish(cmd_queue1);
1374 }
1375 else if (!use_fence && iter != (maxIter - 1))
1376 {
1377 clCl2VkExternalSemaphore->signal(cmd_queue1);
1378 }
1379 }
1380 clFinish(cmd_queue1);
1381 for (uint32_t iter = 0; iter < maxIter; iter++)
1382 {
1383 if (use_fence)
1384 {
1385 fence->reset();
1386 vkQueue.submit(vkCommandBuffer, fence);
1387 fence->wait();
1388 }
1389 else
1390 {
1391 if (iter == 0)
1392 {
1393 vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
1394 }
1395 else
1396 {
1397 vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
1398 vkVk2CLSemaphore);
1399 }
1400 }
1401
1402 if (use_fence)
1403 {
1404 fence->wait();
1405 }
1406 else
1407 {
1408 clVk2CLExternalSemaphore2->wait(cmd_queue2);
1409 }
1410
1411 for (uint8_t launchIter = 0; launchIter < numImports;
1412 launchIter++)
1413 {
1414 err = clSetKernelArg(update_buffer_kernel2[launchIter],
1415 0, sizeof(uint32_t),
1416 (void *)&bufferSize);
1417 for (int i = 0; i < numBuffers; i++)
1418 {
1419 err |= clSetKernelArg(
1420 update_buffer_kernel2[launchIter], i + 1,
1421 sizeof(cl_mem),
1422 (void *)&(buffers2[i][launchIter]));
1423 }
1424
1425 if (err != CL_SUCCESS)
1426 {
1427 print_error(err,
1428 "Error: Failed to set arg values for "
1429 "kernel\n ");
1430 goto CLEANUP;
1431 }
1432 err = clEnqueueNDRangeKernel(
1433 cmd_queue2, update_buffer_kernel2[launchIter], 1,
1434 NULL, global_work_size, NULL, 0, NULL, NULL);
1435 if (err != CL_SUCCESS)
1436 {
1437 print_error(err,
1438 "Error: Failed to launch "
1439 "update_buffer_kernel, error\n ");
1440 goto CLEANUP;
1441 }
1442 }
1443 if (use_fence)
1444 {
1445 clFinish(cmd_queue2);
1446 }
1447 else if (!use_fence && iter != (maxIter - 1))
1448 {
1449 clCl2VkExternalSemaphore2->signal(cmd_queue2);
1450 }
1451 }
1452 clFinish(cmd_queue2);
1453 error_3 = (uint8_t *)malloc(sizeof(uint8_t));
1454 if (NULL == error_3)
1455 {
1456 log_error("Not able to allocate memory\n");
1457 goto CLEANUP;
1458 }
1459
1460 error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
1461 sizeof(uint8_t), NULL, &err);
1462 if (CL_SUCCESS != err)
1463 {
1464 print_error(err, "Error: clCreateBuffer \n");
1465 goto CLEANUP;
1466 }
1467 error_2 = clCreateBuffer(context2, CL_MEM_WRITE_ONLY,
1468 sizeof(uint8_t), NULL, &err);
1469 if (CL_SUCCESS != err)
1470 {
1471 print_error(err, "Error: clCreateBuffer \n");
1472 goto CLEANUP;
1473 }
1474 uint8_t val = 0;
1475 err =
1476 clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
1477 sizeof(uint8_t), &val, 0, NULL, NULL);
1478 if (err != CL_SUCCESS)
1479 {
1480 print_error(err, "Error: Failed read output, error \n");
1481 goto CLEANUP;
1482 }
1483
1484 err =
1485 clEnqueueWriteBuffer(cmd_queue2, error_2, CL_TRUE, 0,
1486 sizeof(uint8_t), &val, 0, NULL, NULL);
1487 if (err != CL_SUCCESS)
1488 {
1489 print_error(err, "Error: Failed read output, error \n");
1490 goto CLEANUP;
1491 }
1492
1493 calc_max_iter = maxIter * 2 * (numBuffers + 1);
1494 for (int i = 0; i < numBuffers; i++)
1495 {
1496 err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
1497 (void *)&(buffers1[i][0]));
1498 err |= clSetKernelArg(verify_kernel, 1, sizeof(int),
1499 &pBufferSize);
1500 err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
1501 &calc_max_iter);
1502 err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
1503 (void *)&error_1);
1504 if (err != CL_SUCCESS)
1505 {
1506 print_error(err,
1507 "Error: Failed to set arg values for "
1508 "verify_kernel \n");
1509 goto CLEANUP;
1510 }
1511 err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1,
1512 NULL, global_work_size, NULL,
1513 0, NULL, NULL);
1514 if (err != CL_SUCCESS)
1515 {
1516 print_error(err,
1517 "Error: Failed to launch verify_kernel,"
1518 "error\n");
1519 goto CLEANUP;
1520 }
1521
1522 err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
1523 sizeof(uint8_t), error_3, 0, NULL,
1524 NULL);
1525 if (err != CL_SUCCESS)
1526 {
1527 print_error(err, "Error: Failed read output, error\n");
1528 goto CLEANUP;
1529 }
1530 if (*error_3 == 1)
1531 {
1532 log_error("&&&& vulkan_opencl_buffer test FAILED\n");
1533 goto CLEANUP;
1534 }
1535 }
1536 *error_3 = 0;
1537 for (int i = 0; i < vkBufferList.size(); i++)
1538 {
1539 err = clSetKernelArg(verify_kernel2, 0, sizeof(cl_mem),
1540 (void *)&(buffers2[i][0]));
1541 err |= clSetKernelArg(verify_kernel2, 1, sizeof(int),
1542 &pBufferSize);
1543 err |= clSetKernelArg(verify_kernel2, 2, sizeof(int),
1544 &calc_max_iter);
1545 err |= clSetKernelArg(verify_kernel2, 3, sizeof(cl_mem),
1546 (void *)&error_2);
1547 if (err != CL_SUCCESS)
1548 {
1549 print_error(err,
1550 "Error: Failed to set arg values for "
1551 "verify_kernel \n");
1552 goto CLEANUP;
1553 }
1554 err = clEnqueueNDRangeKernel(cmd_queue2, verify_kernel2, 1,
1555 NULL, global_work_size, NULL,
1556 0, NULL, NULL);
1557 if (err != CL_SUCCESS)
1558 {
1559 print_error(err,
1560 "Error: Failed to launch verify_kernel,"
1561 "error\n");
1562 goto CLEANUP;
1563 }
1564
1565 err = clEnqueueReadBuffer(cmd_queue2, error_2, CL_TRUE, 0,
1566 sizeof(uint8_t), error_3, 0, NULL,
1567 NULL);
1568 if (err != CL_SUCCESS)
1569 {
1570 print_error(err, "Error: Failed read output, error\n");
1571 goto CLEANUP;
1572 }
1573 if (*error_3 == 1)
1574 {
1575 log_error("&&&& vulkan_opencl_buffer test FAILED\n");
1576 goto CLEANUP;
1577 }
1578 }
1579 for (size_t i = 0; i < vkBufferList.size(); i++)
1580 {
1581 for (size_t j = 0; j < numImports; j++)
1582 {
1583 delete externalMemory1[i][j];
1584 delete externalMemory2[i][j];
1585 }
1586 }
1587 for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
1588 {
1589 delete vkBufferListDeviceMemory[i];
1590 }
1591 vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
1592 vkBufferListDeviceMemory.end());
1593 for (size_t i = 0; i < externalMemory1.size(); i++)
1594 {
1595 externalMemory1[i].erase(externalMemory1[i].begin(),
1596 externalMemory1[i].begin()
1597 + numBuffers);
1598 externalMemory2[i].erase(externalMemory2[i].begin(),
1599 externalMemory2[i].begin()
1600 + numBuffers);
1601 }
1602 externalMemory1.clear();
1603 externalMemory2.clear();
1604 }
1605 }
1606 }
1607 CLEANUP:
1608 for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
1609 {
1610 if (vkBufferListDeviceMemory[i])
1611 {
1612 delete vkBufferListDeviceMemory[i];
1613 }
1614 }
1615 for (size_t i = 0; i < externalMemory1.size(); i++)
1616 {
1617 for (size_t j = 0; j < externalMemory1[i].size(); j++)
1618 {
1619 if (externalMemory1[i][j])
1620 {
1621 delete externalMemory1[i][j];
1622 }
1623 }
1624 }
1625 for (size_t i = 0; i < externalMemory2.size(); i++)
1626 {
1627 for (size_t j = 0; j < externalMemory2[i].size(); j++)
1628 {
1629 if (externalMemory2[i][j])
1630 {
1631 delete externalMemory2[i][j];
1632 }
1633 }
1634 }
1635
1636 if (!use_fence)
1637 {
1638 if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
1639 if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
1640 if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2;
1641 if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2;
1642 }
1643
1644 if (error_3) free(error_3);
1645 if (error_1) clReleaseMemObject(error_1);
1646 if (error_2) clReleaseMemObject(error_2);
1647 return err;
1648 }
1649
test_buffer_common(cl_device_id device_,cl_context context_,cl_command_queue queue_,int numElements_,float use_fence)1650 int test_buffer_common(cl_device_id device_, cl_context context_,
1651 cl_command_queue queue_, int numElements_,
1652 float use_fence)
1653 {
1654
1655 int current_device = 0;
1656 int device_count = 0;
1657 int devices_prohibited = 0;
1658 cl_int errNum = CL_SUCCESS;
1659 cl_platform_id platform = NULL;
1660 size_t extensionSize = 0;
1661 cl_uint num_devices = 0;
1662 cl_uint device_no = 0;
1663 const size_t bufsize = BUFFERSIZE;
1664 char buf[BUFFERSIZE];
1665 cl_device_id *devices;
1666 char *extensions = NULL;
1667 cl_kernel verify_kernel;
1668 cl_kernel verify_kernel2;
1669 cl_kernel kernel[3] = { NULL, NULL, NULL };
1670 cl_kernel kernel2[3] = { NULL, NULL, NULL };
1671 const char *program_source_const[3] = { kernel_text_numbuffer_1,
1672 kernel_text_numbuffer_2,
1673 kernel_text_numbuffer_4 };
1674 const char *program_source_const_verify;
1675 size_t program_source_length;
1676 cl_command_queue cmd_queue1 = NULL;
1677 cl_command_queue cmd_queue2 = NULL;
1678 cl_command_queue cmd_queue3 = NULL;
1679 cl_context context = NULL;
1680 cl_program program[3] = { NULL, NULL, NULL };
1681 cl_program program_verify, program_verify2;
1682 cl_context context2 = NULL;
1683
1684
1685 VulkanDevice vkDevice;
1686 uint32_t numBuffersList[] = { 1, 2, 4 };
1687 uint32_t bufferSizeList[] = { 4 * 1024, 64 * 1024, 2 * 1024 * 1024 };
1688 uint32_t bufferSizeListforOffset[] = { 256, 512, 1024 };
1689
1690 cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
1691 errNum = clGetPlatformIDs(1, &platform, NULL);
1692 if (errNum != CL_SUCCESS)
1693 {
1694 print_error(errNum, "Error: Failed to get platform\n");
1695 goto CLEANUP;
1696 }
1697
1698 errNum =
1699 clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
1700 if (CL_SUCCESS != errNum)
1701 {
1702 print_error(errNum, "clGetDeviceIDs failed in returning of devices\n");
1703 goto CLEANUP;
1704 }
1705 devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
1706 if (NULL == devices)
1707 {
1708 errNum = CL_OUT_OF_HOST_MEMORY;
1709 print_error(errNum, "Unable to allocate memory for devices\n");
1710 goto CLEANUP;
1711 }
1712 errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
1713 NULL);
1714 if (CL_SUCCESS != errNum)
1715 {
1716 print_error(errNum, "Failed to get deviceID.\n");
1717 goto CLEANUP;
1718 }
1719 contextProperties[1] = (cl_context_properties)platform;
1720 log_info("Assigned contextproperties for platform\n");
1721 for (device_no = 0; device_no < num_devices; device_no++)
1722 {
1723 errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0,
1724 NULL, &extensionSize);
1725 if (CL_SUCCESS != errNum)
1726 {
1727 print_error(errNum,
1728 "Error in clGetDeviceInfo for getting device_extension "
1729 "size....\n");
1730 goto CLEANUP;
1731 }
1732 extensions = (char *)malloc(extensionSize);
1733 if (NULL == extensions)
1734 {
1735 print_error(errNum, "Unable to allocate memory for extensions\n");
1736 errNum = CL_OUT_OF_HOST_MEMORY;
1737 goto CLEANUP;
1738 }
1739 errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
1740 extensionSize, extensions, NULL);
1741 if (CL_SUCCESS != errNum)
1742 {
1743 print_error(errNum,
1744 "Error in clGetDeviceInfo for device_extension\n");
1745 goto CLEANUP;
1746 }
1747 errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
1748 CL_UUID_SIZE_KHR, uuid, &extensionSize);
1749 if (CL_SUCCESS != errNum)
1750 {
1751 print_error(errNum, "clGetDeviceInfo failed\n");
1752 goto CLEANUP;
1753 }
1754 errNum =
1755 memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
1756 if (errNum == 0)
1757 {
1758 break;
1759 }
1760 }
1761 if (device_no >= num_devices)
1762 {
1763 errNum = EXIT_FAILURE;
1764 print_error(errNum,
1765 "OpenCL error: "
1766 "No Vulkan-OpenCL Interop capable GPU found.\n");
1767 goto CLEANUP;
1768 }
1769 deviceId = devices[device_no];
1770 context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
1771 NULL, NULL, &errNum);
1772 if (CL_SUCCESS != errNum)
1773 {
1774 print_error(errNum, "error creating context\n");
1775 goto CLEANUP;
1776 }
1777 log_info("Successfully created context !!!\n");
1778
1779 cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &errNum);
1780 if (CL_SUCCESS != errNum)
1781 {
1782 errNum = CL_INVALID_COMMAND_QUEUE;
1783 print_error(errNum, "Error: Failed to create command queue!\n");
1784 goto CLEANUP;
1785 }
1786 cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &errNum);
1787 if (CL_SUCCESS != errNum)
1788 {
1789 errNum = CL_INVALID_COMMAND_QUEUE;
1790 print_error(errNum, "Error: Failed to create command queue!\n");
1791 goto CLEANUP;
1792 }
1793 log_info("clCreateCommandQueue successful\n");
1794 for (int i = 0; i < 3; i++)
1795 {
1796 program_source_length = strlen(program_source_const[i]);
1797 program[i] =
1798 clCreateProgramWithSource(context, 1, &program_source_const[i],
1799 &program_source_length, &errNum);
1800 errNum = clBuildProgram(program[i], 0, NULL, NULL, NULL, NULL);
1801 if (errNum != CL_SUCCESS)
1802 {
1803 print_error(errNum, "Error: Failed to build program \n");
1804 return errNum;
1805 }
1806 // create the kernel
1807 kernel[i] = clCreateKernel(program[i], "clUpdateBuffer", &errNum);
1808 if (errNum != CL_SUCCESS)
1809 {
1810 print_error(errNum, "clCreateKernel failed \n");
1811 return errNum;
1812 }
1813 }
1814
1815 program_source_const_verify = kernel_text_verify;
1816 program_source_length = strlen(program_source_const_verify);
1817 program_verify =
1818 clCreateProgramWithSource(context, 1, &program_source_const_verify,
1819 &program_source_length, &errNum);
1820 errNum = clBuildProgram(program_verify, 0, NULL, NULL, NULL, NULL);
1821 if (errNum != CL_SUCCESS)
1822 {
1823 log_error("Error: Failed to build program2\n");
1824 return errNum;
1825 }
1826 verify_kernel = clCreateKernel(program_verify, "checkKernel", &errNum);
1827 if (errNum != CL_SUCCESS)
1828 {
1829 print_error(errNum, "clCreateKernel failed \n");
1830 return errNum;
1831 }
1832
1833 if (multiCtx) // different context guard
1834 {
1835 context2 = clCreateContextFromType(
1836 contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum);
1837 if (CL_SUCCESS != errNum)
1838 {
1839 print_error(errNum, "error creating context\n");
1840 goto CLEANUP;
1841 }
1842 cmd_queue3 =
1843 clCreateCommandQueue(context2, devices[device_no], 0, &errNum);
1844 if (CL_SUCCESS != errNum)
1845 {
1846 errNum = CL_INVALID_COMMAND_QUEUE;
1847 print_error(errNum, "Error: Failed to create command queue!\n");
1848 goto CLEANUP;
1849 }
1850 for (int i = 0; i < 3; i++)
1851 {
1852 program_source_length = strlen(program_source_const[i]);
1853 program[i] =
1854 clCreateProgramWithSource(context2, 1, &program_source_const[i],
1855 &program_source_length, &errNum);
1856 errNum = clBuildProgram(program[i], 0, NULL, NULL, NULL, NULL);
1857 if (errNum != CL_SUCCESS)
1858 {
1859 print_error(errNum, "Error: Failed to build program \n");
1860 return errNum;
1861 }
1862 // create the kernel
1863 kernel2[i] = clCreateKernel(program[i], "clUpdateBuffer", &errNum);
1864 if (errNum != CL_SUCCESS)
1865 {
1866 print_error(errNum, "clCreateKernel failed \n");
1867 return errNum;
1868 }
1869 }
1870 program_source_length = strlen(program_source_const_verify);
1871 program_verify =
1872 clCreateProgramWithSource(context2, 1, &program_source_const_verify,
1873 &program_source_length, &errNum);
1874 errNum = clBuildProgram(program_verify, 0, NULL, NULL, NULL, NULL);
1875 if (errNum != CL_SUCCESS)
1876 {
1877 log_error("Error: Failed to build program2\n");
1878 return errNum;
1879 }
1880 verify_kernel2 = clCreateKernel(program_verify, "checkKernel", &errNum);
1881 if (errNum != CL_SUCCESS)
1882 {
1883 print_error(errNum, "clCreateKernel failed \n");
1884 return errNum;
1885 }
1886 }
1887
1888 for (size_t numBuffersIdx = 0; numBuffersIdx < ARRAY_SIZE(numBuffersList);
1889 numBuffersIdx++)
1890 {
1891 uint32_t numBuffers = numBuffersList[numBuffersIdx];
1892 log_info("Number of buffers: %d\n", numBuffers);
1893 for (size_t sizeIdx = 0; sizeIdx < ARRAY_SIZE(bufferSizeList);
1894 sizeIdx++)
1895 {
1896 uint32_t bufferSize = bufferSizeList[sizeIdx];
1897 uint32_t bufferSizeForOffset = bufferSizeListforOffset[sizeIdx];
1898 log_info("&&&& RUNNING vulkan_opencl_buffer test for Buffer size: "
1899 "%d\n",
1900 bufferSize);
1901 if (multiImport && !multiCtx)
1902 {
1903 errNum = run_test_with_multi_import_same_ctx(
1904 context, cmd_queue1, kernel, verify_kernel, vkDevice,
1905 numBuffers, bufferSize, bufferSizeForOffset, use_fence);
1906 }
1907 else if (multiImport && multiCtx)
1908 {
1909 errNum = run_test_with_multi_import_diff_ctx(
1910 context, context2, cmd_queue1, cmd_queue3, kernel, kernel2,
1911 verify_kernel, verify_kernel2, vkDevice, numBuffers,
1912 bufferSize, bufferSizeForOffset, use_fence);
1913 }
1914 else if (numCQ == 2)
1915 {
1916 errNum = run_test_with_two_queue(
1917 context, cmd_queue1, cmd_queue2, kernel, verify_kernel,
1918 vkDevice, numBuffers + 1, bufferSize, use_fence);
1919 }
1920 else
1921 {
1922 errNum = run_test_with_one_queue(
1923 context, cmd_queue1, kernel, verify_kernel, vkDevice,
1924 numBuffers, bufferSize, use_fence);
1925 }
1926 if (errNum != CL_SUCCESS)
1927 {
1928 print_error(errNum, "func_name failed \n");
1929 goto CLEANUP;
1930 }
1931 }
1932 }
1933
1934 CLEANUP:
1935 for (int i = 0; i < 3; i++)
1936 {
1937 if (program[i]) clReleaseProgram(program[i]);
1938 if (kernel[i]) clReleaseKernel(kernel[i]);
1939 }
1940 if (cmd_queue1) clReleaseCommandQueue(cmd_queue1);
1941 if (cmd_queue2) clReleaseCommandQueue(cmd_queue2);
1942 if (cmd_queue3) clReleaseCommandQueue(cmd_queue3);
1943 if (context) clReleaseContext(context);
1944 if (context2) clReleaseContext(context2);
1945
1946 if (devices) free(devices);
1947 if (extensions) free(extensions);
1948
1949 return errNum;
1950 }
1951