xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/device_execution/enqueue_block.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include "harness/testHarness.h"
19 #include "harness/typeWrappers.h"
20 
21 #include <vector>
22 
23 #include "procs.h"
24 #include "utils.h"
25 #include <time.h>
26 
27 
28 #ifdef CL_VERSION_2_0
29 extern int gWimpyMode;
30 
31 // clang-format off
32 static const char* enqueue_simple_block[] = { R"(
33     void block_fn(size_t tid, int mul, __global int* res)
34     {
35       res[tid] = mul * 7 - 21;
36     }
37 
38     kernel void enqueue_simple_block(__global int* res)
39     {
40       int multiplier = 3;
41       size_t tid = get_global_id(0);
42 
43       void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };
44 
45       res[tid] = -1;
46       queue_t def_q = get_default_queue();
47       ndrange_t ndrange = ndrange_1D(1);
48       int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
49       if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
50     }
51 )" };
52 
53 static const char* enqueue_block_with_local_arg1[] = { R"(
54     #define LOCAL_MEM_SIZE 10
55 
56     void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp)
57     {
58       for (int i = 0; i < LOCAL_MEM_SIZE; i++)
59       {
60         tmp[i] = mul * 7 - 21;
61         res[tid] += tmp[i];
62       }
63       res[tid] += 2;
64     }
65 
66     kernel void enqueue_block_with_local_arg1(__global int* res)
67     {
68       int multiplier = 3;
69       size_t tid = get_global_id(0);
70 
71       void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); };
72 
73       res[tid] = -2;
74       queue_t def_q = get_default_queue();
75       ndrange_t ndrange = ndrange_1D(1);
76       int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)));
77       if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
78     }
79 )" };
80 
81 static const char* enqueue_block_with_local_arg2[] = { R"(
82     #define LOCAL_MEM_SIZE 10
83 
84     void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2)
85     {
86       for (int i = 0; i < LOCAL_MEM_SIZE; i++)
87       {
88         tmp1[i]   = mul * 7 - 21;
89         tmp2[i].x = (float)(mul * 7 - 21);
90         tmp2[i].y = (float)(mul * 7 - 21);
91         tmp2[i].z = (float)(mul * 7 - 21);
92         tmp2[i].w = (float)(mul * 7 - 21);
93 
94         res[tid] += tmp1[i];
95         res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w);
96       }
97       res[tid] += 2;
98     }
99 
100     kernel void enqueue_block_with_local_arg2(__global int* res)
101     {
102       int multiplier = 3;
103       size_t tid = get_global_id(0);
104 
105       void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2)
106         { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); };
107 
108       res[tid] = -2;
109       queue_t def_q = get_default_queue();
110       ndrange_t ndrange = ndrange_1D(1);
111       int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4)));
112       if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
113     }
114 )" };
115 
116 static const char* enqueue_block_with_wait_list[] = { R"(
117     #define BLOCK_SUBMITTED 1
118     #define BLOCK_COMPLETED 2
119     #define CHECK_SUCCESS   0
120 
121     kernel void enqueue_block_with_wait_list(__global int* res)
122     {
123       size_t tid = get_global_id(0);
124 
125       clk_event_t user_evt = create_user_event();
126 
127       res[tid] = BLOCK_SUBMITTED;
128       queue_t def_q = get_default_queue();
129       ndrange_t ndrange = ndrange_1D(1);
130       clk_event_t block_evt;
131       int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt,
132       ^{
133           res[tid] = BLOCK_COMPLETED;
134        });
135       if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
136 
137       retain_event(block_evt);
138       release_event(block_evt);
139 
140       //check block is not started
141       if (res[tid] == BLOCK_SUBMITTED)
142       {
143         clk_event_t my_evt;
144         enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt,
145         ^{
146            //check block is completed
147            if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;
148          });
149         release_event(my_evt);
150       }
151 
152       set_user_event_status(user_evt, CL_COMPLETE);
153 
154       release_event(user_evt);
155       release_event(block_evt);
156     }
157 )" };
158 
159 static const char* enqueue_block_with_wait_list_and_local_arg[] = { R"(
160     #define LOCAL_MEM_SIZE 10
161     #define BLOCK_COMPLETED 1
162     #define BLOCK_SUBMITTED 2
163     #define BLOCK_STARTED   3
164     #define CHECK_SUCCESS   0
165 
166     void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp)
167     {
168       res[tid] = BLOCK_STARTED;
169       for (int i = 0; i < LOCAL_MEM_SIZE; i++)
170       {
171         tmp[i] = mul * 7 - 21;
172         res[tid] += tmp[i];
173       }
174       if (res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED;
175     }
176 
177     kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res)
178     {
179       int multiplier = 3;
180       size_t tid = get_global_id(0);
181       clk_event_t user_evt = create_user_event();
182 
183       res[tid] = BLOCK_SUBMITTED;
184       queue_t def_q = get_default_queue();
185       ndrange_t ndrange = ndrange_1D(1);
186       clk_event_t block_evt;
187       int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt,
188         ^(__local void* buf) {
189            block_fn_local_arg(tid, multiplier, res, (__local int*)buf);
190          }, LOCAL_MEM_SIZE*sizeof(int));
191       if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
192 
193       retain_event(block_evt);
194       release_event(block_evt);
195 
196       //check block is not started
197       if (res[tid] == BLOCK_SUBMITTED)
198       {
199         clk_event_t my_evt;
200         enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt,
201         ^{
202            //check block is completed
203            if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;
204          });
205         release_event(my_evt);
206       }
207 
208       set_user_event_status(user_evt, CL_COMPLETE);
209 
210       release_event(user_evt);
211       release_event(block_evt);
212     }
213 )" };
214 
215 static const char* enqueue_block_get_kernel_work_group_size[] = { R"(
216     void block_fn(size_t tid, int mul, __global int* res)
217     {
218       res[tid] = mul * 7 - 21;
219     }
220 
221     kernel void enqueue_block_get_kernel_work_group_size(__global int* res)
222     {
223         int multiplier = 3;
224         size_t tid = get_global_id(0);
225 
226         void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };
227 
228         size_t local_work_size = get_kernel_work_group_size(kernelBlock);
229         if (local_work_size <= 0){ res[tid] = -1; return; }
230         size_t global_work_size = local_work_size * 4;
231 
232         res[tid] = -1;
233         queue_t q1 = get_default_queue();
234         ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);
235 
236         int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
237         if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
238     }
239 )" };
240 
241 static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] = { R"(
242     void block_fn(size_t tid, int mul, __global int* res)
243     {
244       res[tid] = mul * 7 - 21;
245     }
246 
247     kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)
248     {
249         int multiplier = 3;
250         size_t tid = get_global_id(0);
251 
252         void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };
253 
254         size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock);
255         if (local_work_size <= 0){ res[tid] = -1; return; }
256         size_t global_work_size = local_work_size * 4;
257 
258         res[tid] = -1;
259         queue_t q1 = get_default_queue();
260         ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);
261 
262         int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
263         if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
264     }
265 )" };
266 
267 static const char* enqueue_block_capture_event_profiling_info_after_execution[] = {
268     "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n"
269     , R"(
270     __global ulong value[MAX_GWS*2] = {0};
271 
272     void block_fn(size_t tid, __global int* res)
273     {
274         res[tid] = -2;
275     }
276 
277     void check_res(size_t tid, const clk_event_t evt, __global int* res)
278     {
279         capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);
280 
281         if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] =  0;
282         else                                        res[tid] = -4;
283         release_event(evt);
284     }
285 
286     kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res)
287     {
288         size_t tid = get_global_id(0);
289 
290         res[tid] = -1;
291         queue_t def_q = get_default_queue();
292         ndrange_t ndrange = ndrange_1D(1);
293         clk_event_t block_evt1;
294 
295         void (^kernelBlock)(void)  = ^{ block_fn (tid, res);                   };
296 
297         int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock);
298         if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
299 
300         void (^checkBlock) (void)  = ^{ check_res(tid, block_evt1, res);      };
301 
302         enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock);
303         if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
304     }
305 )" };
306 
307 static const char* enqueue_block_capture_event_profiling_info_before_execution[] = {
308     "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n"
309     , R"(
310     __global ulong value[MAX_GWS*2] = {0};
311 
312     void block_fn(size_t tid, __global int* res)
313     {
314         res[tid] = -2;
315     }
316 
317     void check_res(size_t tid, const ulong *value, __global int* res)
318     {
319         if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] =  0;
320         else                                        res[tid] = -4;
321     }
322 
323     kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res)
324     {
325         int multiplier = 3;
326         size_t tid = get_global_id(0);
327         clk_event_t user_evt = create_user_event();
328 
329         res[tid] = -1;
330         queue_t def_q = get_default_queue();
331         ndrange_t ndrange = ndrange_1D(1);
332         clk_event_t block_evt1;
333         clk_event_t block_evt2;
334 
335         void (^kernelBlock)(void)  = ^{ block_fn (tid, res);                   };
336 
337         int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock);
338         if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
339 
340         capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);
341 
342         set_user_event_status(user_evt, CL_COMPLETE);
343 
344         void (^checkBlock) (void)  = ^{ check_res(tid, &value, res);      };
345 
346         enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);
347         if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
348 
349         release_event(user_evt);
350         release_event(block_evt1);
351         release_event(block_evt2);
352     }
353 )" };
354 
355 static const char* enqueue_block_with_barrier[] = { R"(
356     void block_fn(size_t tid, int mul, __global int* res)
357     {
358       if (mul > 0) barrier(CLK_GLOBAL_MEM_FENCE);
359       res[tid] = mul * 7 -21;
360     }
361 
362     void loop_fn(size_t tid, int n, __global int* res)
363     {
364       while (n > 0)
365       {
366         barrier(CLK_GLOBAL_MEM_FENCE);
367         res[tid] = 0;
368         --n;
369       }
370     }
371 
372     kernel void enqueue_block_with_barrier(__global int* res)
373     {
374       int multiplier = 3;
375       size_t tid = get_global_id(0);
376       queue_t def_q = get_default_queue();
377       res[tid] = -1;
378       size_t n = 256;
379 
380       void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };
381 
382       ndrange_t ndrange = ndrange_1D(n);
383       int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
384       if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
385 
386       void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); };
387 
388       enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock);
389       if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
390     }
391 )" };
392 
393 static const char* enqueue_marker_with_block_event[] = { R"(
394     #define BLOCK_COMPLETED 1
395     #define BLOCK_SUBMITTED 2
396     #define CHECK_SUCCESS   0
397 
398     kernel void enqueue_marker_with_block_event(__global int* res)
399     {
400       size_t tid = get_global_id(0);
401 
402       clk_event_t user_evt = create_user_event();
403 
404       res[tid] = BLOCK_SUBMITTED;
405       queue_t def_q = get_default_queue();
406       ndrange_t ndrange = ndrange_1D(1);
407 
408       clk_event_t block_evt1;
409       clk_event_t marker_evt;
410 
411       int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1,
412       ^{
413          res[tid] = BLOCK_COMPLETED;
414        });
415       if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; }
416 
417       enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt);
418       if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
419 
420       retain_event(marker_evt);
421       release_event(marker_evt);
422 
423       //check block is not started
424       if (res[tid] == BLOCK_SUBMITTED)
425       {
426         clk_event_t my_evt;
427         enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt,
428         ^{
429            //check block is completed
430            if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;
431          });
432         release_event(my_evt);
433       }
434 
435       set_user_event_status(user_evt, CL_COMPLETE);
436 
437       release_event(block_evt1);
438       release_event(marker_evt);
439       release_event(user_evt);
440     }
441 )" };
442 
443 static const char* enqueue_marker_with_user_event[] = { R"(
444     #define BLOCK_COMPLETED 1
445     #define BLOCK_SUBMITTED 2
446     #define CHECK_SUCCESS   0
447 
448     kernel void enqueue_marker_with_user_event(__global int* res)
449     {
450       size_t tid = get_global_id(0);
451       uint multiplier = 7;
452 
453       clk_event_t user_evt = create_user_event();
454 
455       res[tid] = BLOCK_SUBMITTED;
456       queue_t def_q = get_default_queue();
457       ndrange_t ndrange = ndrange_1D(1);
458 
459       clk_event_t marker_evt;
460       clk_event_t block_evt;
461 
462       int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt);
463       if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
464 
465       retain_event(marker_evt);
466       release_event(marker_evt);
467 
468       enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt,
469       ^{
470          if (res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS;
471        });
472 
473       //check block is not started
474       if (res[tid] != BLOCK_SUBMITTED)  { res[tid] = -2; return; }
475 
476       set_user_event_status(user_evt, CL_COMPLETE);
477 
478       release_event(block_evt);
479       release_event(marker_evt);
480       release_event(user_evt);
481     }
482 )" };
483 
484 static const char* enqueue_marker_with_mixed_events[] = { R"(
485     #define BLOCK_COMPLETED 1
486     #define BLOCK_SUBMITTED 2
487     #define CHECK_SUCCESS   0
488 
489     kernel void enqueue_marker_with_mixed_events(__global int* res)
490     {
491       size_t tid = get_global_id(0);
492 
493       clk_event_t mix_ev[2];
494       mix_ev[0] = create_user_event();
495 
496       res[tid] = BLOCK_SUBMITTED;
497       queue_t def_q = get_default_queue();
498       ndrange_t ndrange = ndrange_1D(1);
499 
500       int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1],
501       ^{
502          res[tid] = BLOCK_COMPLETED;
503        });
504       if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; }
505 
506       clk_event_t marker_evt;
507 
508       enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt);
509       if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
510 
511       retain_event(marker_evt);
512       release_event(marker_evt);
513 
514       //check block is not started
515       if (res[tid] == BLOCK_SUBMITTED)
516       {
517         clk_event_t my_evt;
518         enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt,
519         ^{
520            //check block is completed
521            if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;
522          });
523         release_event(my_evt);
524       }
525 
526       set_user_event_status(mix_ev[0], CL_COMPLETE);
527 
528       release_event(mix_ev[1]);
529       release_event(marker_evt);
530       release_event(mix_ev[0]);
531     }
532 )" };
533 
534 static const char* enqueue_block_with_mixed_events[] = { R"(
535     kernel void enqueue_block_with_mixed_events(__global int* res)
536     {
537       int enq_res;
538       size_t tid = get_global_id(0);
539       clk_event_t mix_ev[3];
540       mix_ev[0] = create_user_event();
541       queue_t def_q = get_default_queue();
542       ndrange_t ndrange = ndrange_1D(1);
543       res[tid] = -2;
544 
545       enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; });
546       if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
547 
548       enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]);
549       if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
550 
551       enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; });
552       if (enq_res != CLK_SUCCESS) { res[tid] = -4; return; }
553 
554       set_user_event_status(mix_ev[0], CL_COMPLETE);
555 
556       release_event(mix_ev[0]);
557       release_event(mix_ev[1]);
558       release_event(mix_ev[2]);
559     }
560 )" };
561 // clang-format on
562 
563 static const kernel_src sources_enqueue_block[] =
564 {
565     KERNEL(enqueue_simple_block),
566     // Block with local mem
567     KERNEL(enqueue_block_with_local_arg1),
568     KERNEL(enqueue_block_with_local_arg2),
569     KERNEL(enqueue_block_with_wait_list),
570     KERNEL(enqueue_block_with_wait_list_and_local_arg),
571     // WG size built-ins
572     KERNEL(enqueue_block_get_kernel_work_group_size),
573     KERNEL(enqueue_block_get_kernel_preferred_work_group_size_multiple),
574     // Event profiling info
575     KERNEL(enqueue_block_capture_event_profiling_info_after_execution),
576     KERNEL(enqueue_block_capture_event_profiling_info_before_execution),
577     // Marker
578     KERNEL(enqueue_marker_with_block_event),
579     KERNEL(enqueue_marker_with_user_event),
580     // Mixed events
581     KERNEL(enqueue_marker_with_mixed_events),
582     KERNEL(enqueue_block_with_mixed_events),
583     // Barrier
584     KERNEL(enqueue_block_with_barrier),
585 
586 };
587 static const size_t num_kernels_enqueue_block = arr_size(sources_enqueue_block);
588 
check_kernel_results(cl_int * results,cl_int len)589 static int check_kernel_results(cl_int* results, cl_int len)
590 {
591     for(cl_int i = 0; i < len; ++i)
592     {
593         if(results[i] != 0) return i;
594     }
595     return -1;
596 }
597 
test_enqueue_block(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)598 int test_enqueue_block(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
599 {
600     cl_uint i;
601     cl_int n, err_ret, res = 0;
602     clCommandQueueWrapper dev_queue;
603     cl_int kernel_results[MAX_GWS] = {0};
604 
605     size_t ret_len;
606     cl_uint max_queues = 1;
607     cl_uint maxQueueSize = 0;
608     err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
609     test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
610 
611     err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
612     test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
613 
614     size_t max_local_size = 1;
615     err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
616     test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
617 
618     cl_queue_properties queue_prop_def[] =
619     {
620         CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT|CL_QUEUE_PROFILING_ENABLE,
621         CL_QUEUE_SIZE, maxQueueSize,
622         0
623     };
624 
625     dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
626     test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
627 
628     size_t global_size = MAX_GWS;
629     size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
630     if(gWimpyMode)
631     {
632         global_size = 4;
633         local_size = 2;
634     }
635 
636     size_t failCnt = 0;
637     for(i = 0; i < num_kernels_enqueue_block; ++i)
638     {
639         if (!gKernelName.empty() && gKernelName != sources_enqueue_block[i].kernel_name)
640             continue;
641 
642         log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_block[i].kernel_name, i + 1, num_kernels_enqueue_block);
643         err_ret = run_n_kernel_args(context, queue, sources_enqueue_block[i].lines, sources_enqueue_block[i].num_lines, sources_enqueue_block[i].kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), 0, NULL);
644         if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_block[i].kernel_name)) { ++failCnt; res = -1; }
645         else if((n = check_kernel_results(kernel_results, arr_size(kernel_results))) >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_enqueue_block[i].kernel_name, n, kernel_results[n])) res = -1;
646         else log_info("'%s' kernel is OK.\n", sources_enqueue_block[i].kernel_name);
647     }
648 
649     if (failCnt > 0)
650     {
651       log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_kernels_enqueue_block);
652     }
653 
654     return res;
655 }
656 
657 
658 
659 #endif
660 
661 
662