1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include "harness/testHarness.h"
19 #include "harness/typeWrappers.h"
20
21 #include <vector>
22
23 #include "procs.h"
24 #include "utils.h"
25 #include <time.h>
26
27
28 #ifdef CL_VERSION_2_0
29 extern int gWimpyMode;
30
31 // clang-format off
32 static const char* enqueue_simple_block[] = { R"(
33 void block_fn(size_t tid, int mul, __global int* res)
34 {
35 res[tid] = mul * 7 - 21;
36 }
37
38 kernel void enqueue_simple_block(__global int* res)
39 {
40 int multiplier = 3;
41 size_t tid = get_global_id(0);
42
43 void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };
44
45 res[tid] = -1;
46 queue_t def_q = get_default_queue();
47 ndrange_t ndrange = ndrange_1D(1);
48 int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
49 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
50 }
51 )" };
52
53 static const char* enqueue_block_with_local_arg1[] = { R"(
54 #define LOCAL_MEM_SIZE 10
55
56 void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp)
57 {
58 for (int i = 0; i < LOCAL_MEM_SIZE; i++)
59 {
60 tmp[i] = mul * 7 - 21;
61 res[tid] += tmp[i];
62 }
63 res[tid] += 2;
64 }
65
66 kernel void enqueue_block_with_local_arg1(__global int* res)
67 {
68 int multiplier = 3;
69 size_t tid = get_global_id(0);
70
71 void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); };
72
73 res[tid] = -2;
74 queue_t def_q = get_default_queue();
75 ndrange_t ndrange = ndrange_1D(1);
76 int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)));
77 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
78 }
79 )" };
80
81 static const char* enqueue_block_with_local_arg2[] = { R"(
82 #define LOCAL_MEM_SIZE 10
83
84 void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2)
85 {
86 for (int i = 0; i < LOCAL_MEM_SIZE; i++)
87 {
88 tmp1[i] = mul * 7 - 21;
89 tmp2[i].x = (float)(mul * 7 - 21);
90 tmp2[i].y = (float)(mul * 7 - 21);
91 tmp2[i].z = (float)(mul * 7 - 21);
92 tmp2[i].w = (float)(mul * 7 - 21);
93
94 res[tid] += tmp1[i];
95 res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w);
96 }
97 res[tid] += 2;
98 }
99
100 kernel void enqueue_block_with_local_arg2(__global int* res)
101 {
102 int multiplier = 3;
103 size_t tid = get_global_id(0);
104
105 void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2)
106 { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); };
107
108 res[tid] = -2;
109 queue_t def_q = get_default_queue();
110 ndrange_t ndrange = ndrange_1D(1);
111 int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4)));
112 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
113 }
114 )" };
115
116 static const char* enqueue_block_with_wait_list[] = { R"(
117 #define BLOCK_SUBMITTED 1
118 #define BLOCK_COMPLETED 2
119 #define CHECK_SUCCESS 0
120
121 kernel void enqueue_block_with_wait_list(__global int* res)
122 {
123 size_t tid = get_global_id(0);
124
125 clk_event_t user_evt = create_user_event();
126
127 res[tid] = BLOCK_SUBMITTED;
128 queue_t def_q = get_default_queue();
129 ndrange_t ndrange = ndrange_1D(1);
130 clk_event_t block_evt;
131 int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt,
132 ^{
133 res[tid] = BLOCK_COMPLETED;
134 });
135 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
136
137 retain_event(block_evt);
138 release_event(block_evt);
139
140 //check block is not started
141 if (res[tid] == BLOCK_SUBMITTED)
142 {
143 clk_event_t my_evt;
144 enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt,
145 ^{
146 //check block is completed
147 if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;
148 });
149 release_event(my_evt);
150 }
151
152 set_user_event_status(user_evt, CL_COMPLETE);
153
154 release_event(user_evt);
155 release_event(block_evt);
156 }
157 )" };
158
159 static const char* enqueue_block_with_wait_list_and_local_arg[] = { R"(
160 #define LOCAL_MEM_SIZE 10
161 #define BLOCK_COMPLETED 1
162 #define BLOCK_SUBMITTED 2
163 #define BLOCK_STARTED 3
164 #define CHECK_SUCCESS 0
165
166 void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp)
167 {
168 res[tid] = BLOCK_STARTED;
169 for (int i = 0; i < LOCAL_MEM_SIZE; i++)
170 {
171 tmp[i] = mul * 7 - 21;
172 res[tid] += tmp[i];
173 }
174 if (res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED;
175 }
176
177 kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res)
178 {
179 int multiplier = 3;
180 size_t tid = get_global_id(0);
181 clk_event_t user_evt = create_user_event();
182
183 res[tid] = BLOCK_SUBMITTED;
184 queue_t def_q = get_default_queue();
185 ndrange_t ndrange = ndrange_1D(1);
186 clk_event_t block_evt;
187 int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt,
188 ^(__local void* buf) {
189 block_fn_local_arg(tid, multiplier, res, (__local int*)buf);
190 }, LOCAL_MEM_SIZE*sizeof(int));
191 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
192
193 retain_event(block_evt);
194 release_event(block_evt);
195
196 //check block is not started
197 if (res[tid] == BLOCK_SUBMITTED)
198 {
199 clk_event_t my_evt;
200 enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt,
201 ^{
202 //check block is completed
203 if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;
204 });
205 release_event(my_evt);
206 }
207
208 set_user_event_status(user_evt, CL_COMPLETE);
209
210 release_event(user_evt);
211 release_event(block_evt);
212 }
213 )" };
214
215 static const char* enqueue_block_get_kernel_work_group_size[] = { R"(
216 void block_fn(size_t tid, int mul, __global int* res)
217 {
218 res[tid] = mul * 7 - 21;
219 }
220
221 kernel void enqueue_block_get_kernel_work_group_size(__global int* res)
222 {
223 int multiplier = 3;
224 size_t tid = get_global_id(0);
225
226 void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };
227
228 size_t local_work_size = get_kernel_work_group_size(kernelBlock);
229 if (local_work_size <= 0){ res[tid] = -1; return; }
230 size_t global_work_size = local_work_size * 4;
231
232 res[tid] = -1;
233 queue_t q1 = get_default_queue();
234 ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);
235
236 int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
237 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
238 }
239 )" };
240
241 static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] = { R"(
242 void block_fn(size_t tid, int mul, __global int* res)
243 {
244 res[tid] = mul * 7 - 21;
245 }
246
247 kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)
248 {
249 int multiplier = 3;
250 size_t tid = get_global_id(0);
251
252 void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };
253
254 size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock);
255 if (local_work_size <= 0){ res[tid] = -1; return; }
256 size_t global_work_size = local_work_size * 4;
257
258 res[tid] = -1;
259 queue_t q1 = get_default_queue();
260 ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);
261
262 int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
263 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
264 }
265 )" };
266
267 static const char* enqueue_block_capture_event_profiling_info_after_execution[] = {
268 "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n"
269 , R"(
270 __global ulong value[MAX_GWS*2] = {0};
271
272 void block_fn(size_t tid, __global int* res)
273 {
274 res[tid] = -2;
275 }
276
277 void check_res(size_t tid, const clk_event_t evt, __global int* res)
278 {
279 capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);
280
281 if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;
282 else res[tid] = -4;
283 release_event(evt);
284 }
285
286 kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res)
287 {
288 size_t tid = get_global_id(0);
289
290 res[tid] = -1;
291 queue_t def_q = get_default_queue();
292 ndrange_t ndrange = ndrange_1D(1);
293 clk_event_t block_evt1;
294
295 void (^kernelBlock)(void) = ^{ block_fn (tid, res); };
296
297 int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock);
298 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
299
300 void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); };
301
302 enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock);
303 if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
304 }
305 )" };
306
307 static const char* enqueue_block_capture_event_profiling_info_before_execution[] = {
308 "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n"
309 , R"(
310 __global ulong value[MAX_GWS*2] = {0};
311
312 void block_fn(size_t tid, __global int* res)
313 {
314 res[tid] = -2;
315 }
316
317 void check_res(size_t tid, const ulong *value, __global int* res)
318 {
319 if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;
320 else res[tid] = -4;
321 }
322
323 kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res)
324 {
325 int multiplier = 3;
326 size_t tid = get_global_id(0);
327 clk_event_t user_evt = create_user_event();
328
329 res[tid] = -1;
330 queue_t def_q = get_default_queue();
331 ndrange_t ndrange = ndrange_1D(1);
332 clk_event_t block_evt1;
333 clk_event_t block_evt2;
334
335 void (^kernelBlock)(void) = ^{ block_fn (tid, res); };
336
337 int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock);
338 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
339
340 capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);
341
342 set_user_event_status(user_evt, CL_COMPLETE);
343
344 void (^checkBlock) (void) = ^{ check_res(tid, &value, res); };
345
346 enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);
347 if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
348
349 release_event(user_evt);
350 release_event(block_evt1);
351 release_event(block_evt2);
352 }
353 )" };
354
355 static const char* enqueue_block_with_barrier[] = { R"(
356 void block_fn(size_t tid, int mul, __global int* res)
357 {
358 if (mul > 0) barrier(CLK_GLOBAL_MEM_FENCE);
359 res[tid] = mul * 7 -21;
360 }
361
362 void loop_fn(size_t tid, int n, __global int* res)
363 {
364 while (n > 0)
365 {
366 barrier(CLK_GLOBAL_MEM_FENCE);
367 res[tid] = 0;
368 --n;
369 }
370 }
371
372 kernel void enqueue_block_with_barrier(__global int* res)
373 {
374 int multiplier = 3;
375 size_t tid = get_global_id(0);
376 queue_t def_q = get_default_queue();
377 res[tid] = -1;
378 size_t n = 256;
379
380 void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };
381
382 ndrange_t ndrange = ndrange_1D(n);
383 int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
384 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
385
386 void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); };
387
388 enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock);
389 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
390 }
391 )" };
392
393 static const char* enqueue_marker_with_block_event[] = { R"(
394 #define BLOCK_COMPLETED 1
395 #define BLOCK_SUBMITTED 2
396 #define CHECK_SUCCESS 0
397
398 kernel void enqueue_marker_with_block_event(__global int* res)
399 {
400 size_t tid = get_global_id(0);
401
402 clk_event_t user_evt = create_user_event();
403
404 res[tid] = BLOCK_SUBMITTED;
405 queue_t def_q = get_default_queue();
406 ndrange_t ndrange = ndrange_1D(1);
407
408 clk_event_t block_evt1;
409 clk_event_t marker_evt;
410
411 int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1,
412 ^{
413 res[tid] = BLOCK_COMPLETED;
414 });
415 if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; }
416
417 enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt);
418 if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
419
420 retain_event(marker_evt);
421 release_event(marker_evt);
422
423 //check block is not started
424 if (res[tid] == BLOCK_SUBMITTED)
425 {
426 clk_event_t my_evt;
427 enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt,
428 ^{
429 //check block is completed
430 if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;
431 });
432 release_event(my_evt);
433 }
434
435 set_user_event_status(user_evt, CL_COMPLETE);
436
437 release_event(block_evt1);
438 release_event(marker_evt);
439 release_event(user_evt);
440 }
441 )" };
442
443 static const char* enqueue_marker_with_user_event[] = { R"(
444 #define BLOCK_COMPLETED 1
445 #define BLOCK_SUBMITTED 2
446 #define CHECK_SUCCESS 0
447
448 kernel void enqueue_marker_with_user_event(__global int* res)
449 {
450 size_t tid = get_global_id(0);
451 uint multiplier = 7;
452
453 clk_event_t user_evt = create_user_event();
454
455 res[tid] = BLOCK_SUBMITTED;
456 queue_t def_q = get_default_queue();
457 ndrange_t ndrange = ndrange_1D(1);
458
459 clk_event_t marker_evt;
460 clk_event_t block_evt;
461
462 int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt);
463 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
464
465 retain_event(marker_evt);
466 release_event(marker_evt);
467
468 enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt,
469 ^{
470 if (res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS;
471 });
472
473 //check block is not started
474 if (res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; }
475
476 set_user_event_status(user_evt, CL_COMPLETE);
477
478 release_event(block_evt);
479 release_event(marker_evt);
480 release_event(user_evt);
481 }
482 )" };
483
484 static const char* enqueue_marker_with_mixed_events[] = { R"(
485 #define BLOCK_COMPLETED 1
486 #define BLOCK_SUBMITTED 2
487 #define CHECK_SUCCESS 0
488
489 kernel void enqueue_marker_with_mixed_events(__global int* res)
490 {
491 size_t tid = get_global_id(0);
492
493 clk_event_t mix_ev[2];
494 mix_ev[0] = create_user_event();
495
496 res[tid] = BLOCK_SUBMITTED;
497 queue_t def_q = get_default_queue();
498 ndrange_t ndrange = ndrange_1D(1);
499
500 int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1],
501 ^{
502 res[tid] = BLOCK_COMPLETED;
503 });
504 if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; }
505
506 clk_event_t marker_evt;
507
508 enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt);
509 if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
510
511 retain_event(marker_evt);
512 release_event(marker_evt);
513
514 //check block is not started
515 if (res[tid] == BLOCK_SUBMITTED)
516 {
517 clk_event_t my_evt;
518 enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt,
519 ^{
520 //check block is completed
521 if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;
522 });
523 release_event(my_evt);
524 }
525
526 set_user_event_status(mix_ev[0], CL_COMPLETE);
527
528 release_event(mix_ev[1]);
529 release_event(marker_evt);
530 release_event(mix_ev[0]);
531 }
532 )" };
533
534 static const char* enqueue_block_with_mixed_events[] = { R"(
535 kernel void enqueue_block_with_mixed_events(__global int* res)
536 {
537 int enq_res;
538 size_t tid = get_global_id(0);
539 clk_event_t mix_ev[3];
540 mix_ev[0] = create_user_event();
541 queue_t def_q = get_default_queue();
542 ndrange_t ndrange = ndrange_1D(1);
543 res[tid] = -2;
544
545 enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; });
546 if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
547
548 enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]);
549 if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
550
551 enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; });
552 if (enq_res != CLK_SUCCESS) { res[tid] = -4; return; }
553
554 set_user_event_status(mix_ev[0], CL_COMPLETE);
555
556 release_event(mix_ev[0]);
557 release_event(mix_ev[1]);
558 release_event(mix_ev[2]);
559 }
560 )" };
561 // clang-format on
562
563 static const kernel_src sources_enqueue_block[] =
564 {
565 KERNEL(enqueue_simple_block),
566 // Block with local mem
567 KERNEL(enqueue_block_with_local_arg1),
568 KERNEL(enqueue_block_with_local_arg2),
569 KERNEL(enqueue_block_with_wait_list),
570 KERNEL(enqueue_block_with_wait_list_and_local_arg),
571 // WG size built-ins
572 KERNEL(enqueue_block_get_kernel_work_group_size),
573 KERNEL(enqueue_block_get_kernel_preferred_work_group_size_multiple),
574 // Event profiling info
575 KERNEL(enqueue_block_capture_event_profiling_info_after_execution),
576 KERNEL(enqueue_block_capture_event_profiling_info_before_execution),
577 // Marker
578 KERNEL(enqueue_marker_with_block_event),
579 KERNEL(enqueue_marker_with_user_event),
580 // Mixed events
581 KERNEL(enqueue_marker_with_mixed_events),
582 KERNEL(enqueue_block_with_mixed_events),
583 // Barrier
584 KERNEL(enqueue_block_with_barrier),
585
586 };
587 static const size_t num_kernels_enqueue_block = arr_size(sources_enqueue_block);
588
check_kernel_results(cl_int * results,cl_int len)589 static int check_kernel_results(cl_int* results, cl_int len)
590 {
591 for(cl_int i = 0; i < len; ++i)
592 {
593 if(results[i] != 0) return i;
594 }
595 return -1;
596 }
597
test_enqueue_block(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)598 int test_enqueue_block(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
599 {
600 cl_uint i;
601 cl_int n, err_ret, res = 0;
602 clCommandQueueWrapper dev_queue;
603 cl_int kernel_results[MAX_GWS] = {0};
604
605 size_t ret_len;
606 cl_uint max_queues = 1;
607 cl_uint maxQueueSize = 0;
608 err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
609 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
610
611 err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
612 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
613
614 size_t max_local_size = 1;
615 err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
616 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
617
618 cl_queue_properties queue_prop_def[] =
619 {
620 CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT|CL_QUEUE_PROFILING_ENABLE,
621 CL_QUEUE_SIZE, maxQueueSize,
622 0
623 };
624
625 dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
626 test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
627
628 size_t global_size = MAX_GWS;
629 size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
630 if(gWimpyMode)
631 {
632 global_size = 4;
633 local_size = 2;
634 }
635
636 size_t failCnt = 0;
637 for(i = 0; i < num_kernels_enqueue_block; ++i)
638 {
639 if (!gKernelName.empty() && gKernelName != sources_enqueue_block[i].kernel_name)
640 continue;
641
642 log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_block[i].kernel_name, i + 1, num_kernels_enqueue_block);
643 err_ret = run_n_kernel_args(context, queue, sources_enqueue_block[i].lines, sources_enqueue_block[i].num_lines, sources_enqueue_block[i].kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), 0, NULL);
644 if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_block[i].kernel_name)) { ++failCnt; res = -1; }
645 else if((n = check_kernel_results(kernel_results, arr_size(kernel_results))) >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_enqueue_block[i].kernel_name, n, kernel_results[n])) res = -1;
646 else log_info("'%s' kernel is OK.\n", sources_enqueue_block[i].kernel_name);
647 }
648
649 if (failCnt > 0)
650 {
651 log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_kernels_enqueue_block);
652 }
653
654 return res;
655 }
656
657
658
659 #endif
660
661
662