1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include "harness/testHarness.h"
19 #include "harness/typeWrappers.h"
20
21 #include <vector>
22
23 #include "procs.h"
24 #include "utils.h"
25 #include <time.h>
26
27
28 #ifdef CL_VERSION_2_0
29 extern int gWimpyMode;
30 #define BITS_DEPTH 28
31
32 static const char* enqueue_flags_wait_kernel_simple[] =
33 {
34 NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
35 NL, ""
36 NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res)"
37 NL, "{"
38 NL, " int val = 0;"
39 NL, " size_t lid = get_local_id(0);"
40 NL, " size_t tid = get_global_id(0);"
41 NL, ""
42 NL, " if(tid == 0)"
43 NL, " {"
44 NL, " if((index + 1) < BITS_DEPTH)"
45 NL, " {"
46 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
47 NL, " ^{"
48 NL, " block_fn(array, index + 1, ls, gs, res);"
49 NL, " });"
50 NL, " }"
51 NL, " }"
52 NL, ""
53 NL, " array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
54 NL, ""
55 NL, " if((index + 1) == BITS_DEPTH)"
56 NL, " {"
57 NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
58 NL, ""
59 NL, " if(lid == 0)"
60 NL, " {"
61 NL, " size_t gid = get_group_id(0);"
62 NL, " res[gid] = 1;"
63 NL, ""
64 NL, " for(int j = 0; j < BITS_DEPTH; j++)"
65 NL, " {"
66 NL, " for(int i = 0; i < ls; i++)"
67 NL, " {"
68 NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
69 NL, " {"
70 NL, " res[gid] = 2;"
71 NL, " break;"
72 NL, " }"
73 NL, " }"
74 NL, " }"
75 NL, " }"
76 NL, " }"
77 NL, "}"
78 NL, ""
79 NL, "kernel void enqueue_flags_wait_kernel_simple(__global int* res, __global int* array)"
80 NL, "{"
81 NL, " size_t ls = get_local_size(0);"
82 NL, " size_t gs = get_global_size(0);"
83 NL, " size_t tid = get_global_id(0);"
84 NL, ""
85 NL, " res[tid] = 0;"
86 NL, " array[tid] = tid;"
87 NL, ""
88 NL, " if(tid == 0)"
89 NL, " {"
90 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
91 NL, " ^{"
92 NL, " block_fn(array, 1, ls, gs, res);"
93 NL, " });"
94 NL, " }"
95 NL, "}"
96 NL
97 };
98
99 static const char* enqueue_flags_wait_kernel_event[] =
100 {
101 NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
102 NL, ""
103 NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res)"
104 NL, "{"
105 NL, " int val = 0;"
106 NL, " size_t lid = get_local_id(0);"
107 NL, " size_t tid = get_global_id(0);"
108 NL, ""
109 NL, " if(tid == 0)"
110 NL, " {"
111 NL, " if((index + 1) < BITS_DEPTH)"
112 NL, " {"
113 NL, " clk_event_t block_evt;"
114 NL, " clk_event_t user_evt = create_user_event();"
115 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
116 NL, " ^{"
117 NL, " block_fn(array, index + 1, ls, gs, res);"
118 NL, " });"
119 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
120 NL, " release_event(user_evt);"
121 NL, " release_event(block_evt);"
122 NL, " }"
123 NL, " }"
124 NL, ""
125 NL, " array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
126 NL, ""
127 NL, " if((index + 1) == BITS_DEPTH)"
128 NL, " {"
129 NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
130 NL, ""
131 NL, " if(lid == 0)"
132 NL, " {"
133 NL, " size_t gid = get_group_id(0);"
134 NL, " res[gid] = 1;"
135 NL, ""
136 NL, " for(int j = 0; j < BITS_DEPTH; j++)"
137 NL, " {"
138 NL, " for(int i = 0; i < ls; i++)"
139 NL, " {"
140 NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
141 NL, " {"
142 NL, " res[gid] = 2;"
143 NL, " break;"
144 NL, " }"
145 NL, " }"
146 NL, " }"
147 NL, " }"
148 NL, " }"
149 NL, "}"
150 NL, ""
151 NL, "kernel void enqueue_flags_wait_kernel_event(__global int* res, __global int* array)"
152 NL, "{"
153 NL, " size_t tid = get_global_id(0);"
154 NL, " size_t gs = get_global_size(0);"
155 NL, " size_t ls = get_local_size(0);"
156 NL, ""
157 NL, " res[tid] = 0;"
158 NL, " array[tid] = tid;"
159 NL, ""
160 NL, " if(tid == 0)"
161 NL, " {"
162 NL, " clk_event_t block_evt;"
163 NL, " clk_event_t user_evt = create_user_event();"
164 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
165 NL, " ^{"
166 NL, " block_fn(array, 1, ls, gs, res);"
167 NL, " });"
168 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
169 NL, " release_event(user_evt);"
170 NL, " release_event(block_evt);"
171 NL, " }"
172 NL, "}"
173 NL
174 };
175
176 static const char* enqueue_flags_wait_kernel_local[] =
177 {
178 NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
179 NL, ""
180 NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res, __local int* sub_array)"
181 NL, "{"
182 NL, " int val = 0;"
183 NL, " size_t gid = get_group_id(0);"
184 NL, " size_t lid = get_local_id(0);"
185 NL, " size_t tid = get_global_id(0);"
186 NL, ""
187 NL, " sub_array[lid] = array[(index - 1) * gs + tid];"
188 NL, " barrier(CLK_LOCAL_MEM_FENCE);"
189 NL, ""
190 NL, " for(int i = 0; i < ls; i++)"
191 NL, " {"
192 NL, " int id = gid * ls + i;"
193 NL, " val += sub_array[i];"
194 NL, " val -= (tid == id)? 0: (id + index - 1);"
195 NL, " }"
196 NL, ""
197 NL, " if(tid == 0)"
198 NL, " {"
199 NL, " if((index + 1) < BITS_DEPTH)"
200 NL, " {"
201 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
202 NL, " ^(__local void* sub_array){"
203 NL, " block_fn(array, index + 1, ls, gs, res, sub_array);"
204 NL, " }, ls * sizeof(int));"
205 NL, " }"
206 NL, " }"
207 NL, ""
208 NL, " array[index * gs + tid] = val + 1;"
209 NL, ""
210 NL, " if((index + 1) == BITS_DEPTH)"
211 NL, " {"
212 NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
213 NL, ""
214 NL, " if(lid == 0)"
215 NL, " {"
216 NL, " res[gid] = 1;"
217 NL, ""
218 NL, " for(int j = 0; j < BITS_DEPTH; j++)"
219 NL, " {"
220 NL, " for(int i = 0; i < ls; i++)"
221 NL, " {"
222 NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
223 NL, " {"
224 NL, " res[gid] = 2;"
225 NL, " break;"
226 NL, " }"
227 NL, " }"
228 NL, " }"
229 NL, " }"
230 NL, " }"
231 NL, "}"
232 NL, ""
233 NL, "kernel void enqueue_flags_wait_kernel_local(__global int* res, __global int* array)"
234 NL, "{"
235 NL, " size_t ls = get_local_size(0);"
236 NL, " size_t gs = get_global_size(0);"
237 NL, " size_t tid = get_global_id(0);"
238 NL, ""
239 NL, " res[tid] = 0;"
240 NL, " array[tid] = tid;"
241 NL, ""
242 NL, " if(tid == 0)"
243 NL, " {"
244 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
245 NL, " ^(__local void* sub_array){"
246 NL, " block_fn(array, 1, ls, gs, res, sub_array);"
247 NL, " }, ls * sizeof(int));"
248 NL, " }"
249 NL, "}"
250 NL
251 };
252
253 static const char* enqueue_flags_wait_kernel_event_local[] =
254 {
255 NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
256 NL, ""
257 NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res, __local int* sub_array)"
258 NL, "{"
259 NL, " int val = 0;"
260 NL, " size_t gid = get_group_id(0);"
261 NL, " size_t lid = get_local_id(0);"
262 NL, " size_t tid = get_global_id(0);"
263 NL, ""
264 NL, " sub_array[lid] = array[(index - 1) * gs + tid];"
265 NL, " barrier(CLK_LOCAL_MEM_FENCE);"
266 NL, ""
267 NL, " for(int i = 0; i < ls; i++)"
268 NL, " {"
269 NL, " int id = gid * ls + i;"
270 NL, " val += sub_array[i];"
271 NL, " val -= (tid == id)? 0: (id + index - 1);"
272 NL, " }"
273 NL, ""
274 NL, " if(tid == 0)"
275 NL, " {"
276 NL, " if((index + 1) < BITS_DEPTH)"
277 NL, " {"
278 NL, " clk_event_t block_evt;"
279 NL, " clk_event_t user_evt = create_user_event();"
280 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
281 NL, " ^(__local void* sub_array){"
282 NL, " block_fn(array, index + 1, ls, gs, res, sub_array);"
283 NL, " }, ls * sizeof(int));"
284 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
285 NL, " release_event(user_evt);"
286 NL, " release_event(block_evt);"
287 NL, " }"
288 NL, " }"
289 NL, ""
290 NL, " array[index * gs + tid] = val + 1;"
291 NL, ""
292 NL, " if((index + 1) == BITS_DEPTH)"
293 NL, " {"
294 NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
295 NL, ""
296 NL, " if(lid == 0)"
297 NL, " {"
298 NL, " res[gid] = 1;"
299 NL, ""
300 NL, " for(int j = 0; j < BITS_DEPTH; j++)"
301 NL, " {"
302 NL, " for(int i = 0; i < ls; i++)"
303 NL, " {"
304 NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
305 NL, " {"
306 NL, " res[gid] = 2;"
307 NL, " break;"
308 NL, " }"
309 NL, " }"
310 NL, " }"
311 NL, " }"
312 NL, " }"
313 NL, "}"
314 NL, ""
315 NL, "kernel void enqueue_flags_wait_kernel_event_local(__global int* res, __global int* array)"
316 NL, "{"
317 NL, " size_t ls = get_local_size(0);"
318 NL, " size_t gs = get_global_size(0);"
319 NL, " size_t tid = get_global_id(0);"
320 NL, ""
321 NL, " res[tid] = 0;"
322 NL, " array[tid] = tid;"
323 NL, ""
324 NL, " if(tid == 0)"
325 NL, " {"
326 NL, " clk_event_t block_evt;"
327 NL, " clk_event_t user_evt = create_user_event();"
328 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
329 NL, " ^(__local void* sub_array){"
330 NL, " block_fn(array, 1, ls, gs, res, sub_array);"
331 NL, " }, ls * sizeof(int));"
332 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
333 NL, " release_event(user_evt);"
334 NL, " release_event(block_evt);"
335 NL, " }"
336 NL, "}"
337 NL
338 };
339
340 static const char* enqueue_flags_wait_work_group_simple[] =
341 {
342 NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
343 NL, ""
344 NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, int group_id)"
345 NL, "{"
346 NL, " size_t tid = get_global_id(0);"
347 NL, " size_t lid = get_local_id(0);"
348 NL, " size_t gs = get_global_size(0);"
349 NL, " size_t gid = get_group_id(0);"
350 NL, " "
351 NL, " if(gid == group_id)"
352 NL, " {"
353 NL, " if((index + 1) < BITS_DEPTH && lid == 0)"
354 NL, " {"
355 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
356 NL, " ^{"
357 NL, " block_fn(array, index + 1, ls, res, gid);"
358 NL, " });"
359 NL, " }"
360 NL, " "
361 NL, " array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
362 NL, " }"
363 NL, ""
364 NL, " if((index + 1) == BITS_DEPTH)"
365 NL, " {"
366 NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
367 NL, ""
368 NL, " if(lid == 0 && gid == group_id)"
369 NL, " {"
370 NL, " res[gid] = 1;"
371 NL, ""
372 NL, " for(int j = 0; j < BITS_DEPTH; j++)"
373 NL, " {"
374 NL, " for(int i = 0; i < ls; i++)"
375 NL, " {"
376 NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
377 NL, " {"
378 NL, " res[gid] = 2;"
379 NL, " break;"
380 NL, " }"
381 NL, " }"
382 NL, " }"
383 NL, " }"
384 NL, " }"
385 NL, "}"
386 NL, ""
387 NL, "kernel void enqueue_flags_wait_work_group_simple(__global int* res, __global int* array)"
388 NL, "{"
389 NL, " size_t ls = get_local_size(0);"
390 NL, " size_t gs = get_global_size(0);"
391 NL, " size_t tid = get_global_id(0);"
392 NL, " size_t gid = get_group_id(0);"
393 NL, " size_t lid = get_local_id(0);"
394 NL, ""
395 NL, " res[tid] = 0;"
396 NL, " array[tid] = tid;"
397 NL, ""
398 NL, " if(lid == 0)"
399 NL, " {"
400 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
401 NL, " ^{"
402 NL, " block_fn(array, 1, ls, res, gid);"
403 NL, " });"
404 NL, " }"
405 NL, "}"
406 NL
407 };
408
409 static const char* enqueue_flags_wait_work_group_event[] =
410 {
411 NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
412 NL, ""
413 NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, int group_id)"
414 NL, "{"
415 NL, " size_t tid = get_global_id(0);"
416 NL, " size_t lid = get_local_id(0);"
417 NL, " size_t gs = get_global_size(0);"
418 NL, " size_t gid = get_group_id(0);"
419 NL, " "
420 NL, " if(gid == group_id)"
421 NL, " {"
422 NL, " if((index + 1) < BITS_DEPTH && lid == 0)"
423 NL, " {"
424 NL, " clk_event_t block_evt;"
425 NL, " clk_event_t user_evt = create_user_event();"
426 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
427 NL, " ^{"
428 NL, " block_fn(array, index + 1, ls, res, gid);"
429 NL, " });"
430 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
431 NL, " release_event(user_evt);"
432 NL, " release_event(block_evt);"
433 NL, " }"
434 NL, " "
435 NL, " array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
436 NL, " }"
437 NL, ""
438 NL, ""
439 NL, " if((index + 1) == BITS_DEPTH)"
440 NL, " {"
441 NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
442 NL, ""
443 NL, " if(lid == 0 && gid == group_id)"
444 NL, " {"
445 NL, " res[gid] = 1;"
446 NL, ""
447 NL, " for(int j = 0; j < BITS_DEPTH; j++)"
448 NL, " {"
449 NL, " for(int i = 0; i < ls; i++)"
450 NL, " {"
451 NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
452 NL, " {"
453 NL, " res[gid] = 2;"
454 NL, " break;"
455 NL, " }"
456 NL, " }"
457 NL, " }"
458 NL, " }"
459 NL, " }"
460 NL, "}"
461 NL, ""
462 NL, "kernel void enqueue_flags_wait_work_group_event(__global int* res, __global int* array)"
463 NL, "{"
464 NL, " size_t ls = get_local_size(0);"
465 NL, " size_t gs = get_global_size(0);"
466 NL, " size_t tid = get_global_id(0);"
467 NL, " size_t gid = get_group_id(0);"
468 NL, " size_t lid = get_local_id(0);"
469 NL, ""
470 NL, " res[tid] = 0;"
471 NL, " array[tid] = tid;"
472 NL, ""
473 NL, " if(lid == 0)"
474 NL, " {"
475 NL, " clk_event_t block_evt;"
476 NL, " clk_event_t user_evt = create_user_event();"
477 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
478 NL, " ^{"
479 NL, " block_fn(array, 1, ls, res, gid);"
480 NL, " });"
481 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
482 NL, " release_event(user_evt);"
483 NL, " release_event(block_evt);"
484 NL, " }"
485 NL, "}"
486 NL
487 };
488
489 static const char* enqueue_flags_wait_work_group_local[] =
490 {
491 NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
492 NL, ""
493 NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, __local int* sub_array, int group_id)"
494 NL, "{"
495 NL, " int val = 0;"
496 NL, " size_t gid = get_group_id(0);"
497 NL, " size_t lid = get_local_id(0);"
498 NL, " size_t tid = get_global_id(0);"
499 NL, " size_t gs = get_global_size(0);"
500 NL, ""
501 NL, " sub_array[lid] = array[(index - 1) * gs + tid];"
502 NL, " barrier(CLK_LOCAL_MEM_FENCE);"
503 NL, ""
504 NL, " for(int i = 0; i < ls; i++)"
505 NL, " {"
506 NL, " int id = gid * ls + i;"
507 NL, " val += sub_array[i];"
508 NL, " val -= (tid == id)? 0: (id + index - 1);"
509 NL, " }"
510 NL, " "
511 NL, " if(gid == group_id)"
512 NL, " {"
513 NL, " if((index + 1) < BITS_DEPTH && lid == 0)"
514 NL, " {"
515 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
516 NL, " ^(__local void* sub_array){"
517 NL, " block_fn(array, index + 1, ls, res, sub_array, gid);"
518 NL, " }, ls * sizeof(int));"
519 NL, " }"
520 NL, " "
521 NL, " array[index * gs + tid] = val + 1;"
522 NL, " }"
523 NL, ""
524 NL, ""
525 NL, " if((index + 1) == BITS_DEPTH)"
526 NL, " {"
527 NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
528 NL, ""
529 NL, " if(lid == 0 && gid == group_id)"
530 NL, " {"
531 NL, " res[gid] = 1;"
532 NL, ""
533 NL, " for(int j = 0; j < BITS_DEPTH; j++)"
534 NL, " {"
535 NL, " for(int i = 0; i < ls; i++)"
536 NL, " {"
537 NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
538 NL, " {"
539 NL, " res[gid] = 2;"
540 NL, " break;"
541 NL, " }"
542 NL, " }"
543 NL, " }"
544 NL, " }"
545 NL, " }"
546 NL, "}"
547 NL, ""
548 NL, "kernel void enqueue_flags_wait_work_group_local(__global int* res, __global int* array)"
549 NL, "{"
550 NL, " size_t ls = get_local_size(0);"
551 NL, " size_t gs = get_global_size(0);"
552 NL, " size_t tid = get_global_id(0);"
553 NL, " size_t gid = get_group_id(0);"
554 NL, " size_t lid = get_local_id(0);"
555 NL, ""
556 NL, " res[tid] = 0;"
557 NL, " array[tid] = tid;"
558 NL, ""
559 NL, " if(lid == 0)"
560 NL, " {"
561 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
562 NL, " ^(__local void* sub_array){"
563 NL, " block_fn(array, 1, ls, res, sub_array, gid);"
564 NL, " }, ls * sizeof(int));"
565 NL, " }"
566 NL, "}"
567 NL
568 };
569
570 static const char* enqueue_flags_wait_work_group_event_local[] =
571 {
572 NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
573 NL, ""
574 NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, __local int* sub_array, int group_id)"
575 NL, "{"
576 NL, " int val = 0;"
577 NL, " size_t gid = get_group_id(0);"
578 NL, " size_t lid = get_local_id(0);"
579 NL, " size_t tid = get_global_id(0);"
580 NL, " size_t gs = get_global_size(0);"
581 NL, ""
582 NL, " sub_array[lid] = array[(index - 1) * gs + tid];"
583 NL, " barrier(CLK_LOCAL_MEM_FENCE);"
584 NL, ""
585 NL, " for(int i = 0; i < ls; i++)"
586 NL, " {"
587 NL, " int id = gid * ls + i;"
588 NL, " val += sub_array[i];"
589 NL, " val -= (tid == id)? 0: (id + index - 1);"
590 NL, " }"
591 NL, ""
592 NL, " if(gid == group_id)"
593 NL, " {"
594 NL, " if((index + 1) < BITS_DEPTH && lid == 0)"
595 NL, " {"
596 NL, " clk_event_t block_evt;"
597 NL, " clk_event_t user_evt = create_user_event();"
598 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
599 NL, " ^(__local void* sub_array){"
600 NL, " block_fn(array, index + 1, ls, res, sub_array, gid);"
601 NL, " }, ls * sizeof(int));"
602 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
603 NL, " release_event(user_evt);"
604 NL, " release_event(block_evt);"
605 NL, " }"
606 NL, " "
607 NL, " array[index * gs + tid] = val + 1;"
608 NL, " }"
609 NL, ""
610 NL, " if((index + 1) == BITS_DEPTH)"
611 NL, " {"
612 NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
613 NL, ""
614 NL, " if(lid == 0 && gid == group_id)"
615 NL, " {"
616 NL, " res[gid] = 1;"
617 NL, ""
618 NL, " for(int j = 0; j < BITS_DEPTH; j++)"
619 NL, " {"
620 NL, " for(int i = 0; i < ls; i++)"
621 NL, " {"
622 NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
623 NL, " {"
624 NL, " res[gid] = 2;"
625 NL, " break;"
626 NL, " }"
627 NL, " }"
628 NL, " }"
629 NL, " }"
630 NL, " }"
631 NL, "}"
632 NL, ""
633 NL, "kernel void enqueue_flags_wait_work_group_event_local(__global int* res, __global int* array)"
634 NL, "{"
635 NL, " size_t ls = get_local_size(0);"
636 NL, " size_t gs = get_global_size(0);"
637 NL, " size_t tid = get_global_id(0);"
638 NL, " size_t gid = get_group_id(0);"
639 NL, " size_t lid = get_local_id(0);"
640 NL, ""
641 NL, " res[tid] = 0;"
642 NL, " array[tid] = tid;"
643 NL, ""
644 NL, " if(lid == 0)"
645 NL, " {"
646 NL, " clk_event_t block_evt;"
647 NL, " clk_event_t user_evt = create_user_event();"
648 NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
649 NL, " ^(__local void* sub_array){"
650 NL, " block_fn(array, 1, ls, res, sub_array, gid);"
651 NL, " }, ls * sizeof(int));"
652 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
653 NL, " release_event(user_evt);"
654 NL, " release_event(block_evt);"
655 NL, " }"
656 NL, "}"
657 NL
658 };
659
660 static const kernel_src sources_enqueue_block_flags[] =
661 {
662 KERNEL(enqueue_flags_wait_kernel_simple),
663 KERNEL(enqueue_flags_wait_kernel_event),
664 KERNEL(enqueue_flags_wait_kernel_local),
665 KERNEL(enqueue_flags_wait_kernel_event_local),
666 KERNEL(enqueue_flags_wait_work_group_simple),
667 KERNEL(enqueue_flags_wait_work_group_event),
668 KERNEL(enqueue_flags_wait_work_group_local),
669 KERNEL(enqueue_flags_wait_work_group_event_local)
670 };
671 static const size_t num_enqueue_block_flags = arr_size(sources_enqueue_block_flags);
672
673
test_enqueue_flags(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)674 int test_enqueue_flags(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
675 {
676 cl_uint i;
677 cl_int err_ret, res = 0;
678 clCommandQueueWrapper dev_queue;
679 cl_int kernel_results[MAX_GWS] = { -1 };
680 int buff[MAX_GWS * BITS_DEPTH] = { 0 };
681
682 size_t ret_len;
683 size_t max_local_size = 1;
684 cl_uint maxQueueSize = 0;
685 err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
686 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
687
688 err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
689 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
690
691 cl_queue_properties queue_prop_def[] =
692 {
693 CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
694 CL_QUEUE_SIZE, maxQueueSize,
695 0
696 };
697
698 dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
699 test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
700
701 size_t global_size = MAX_GWS;
702 size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
703 if(gWimpyMode)
704 {
705 global_size = 4;
706 local_size = 2;
707 }
708
709 size_t failCnt = 0;
710 for(i = 0; i < num_enqueue_block_flags; ++i)
711 {
712 if (!gKernelName.empty() && gKernelName != sources_enqueue_block_flags[i].kernel_name)
713 continue;
714
715 log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_block_flags[i].kernel_name, i + 1, num_enqueue_block_flags);
716
717 clMemWrapper mem = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, global_size * BITS_DEPTH * sizeof(cl_int), buff, &err_ret);
718 test_error(err_ret, "clCreateBuffer() failed");
719
720 kernel_arg args[] =
721 {
722 { sizeof(cl_mem), &mem }
723 };
724
725 err_ret = run_n_kernel_args(context, queue, sources_enqueue_block_flags[i].lines, sources_enqueue_block_flags[i].num_lines, sources_enqueue_block_flags[i].kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), arr_size(args), args);
726 if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_block_flags[i].kernel_name)) { ++failCnt; res = -1; }
727 else
728 {
729 int r = 0;
730 for (int j=0; j<global_size; j++)
731 {
732 if (kernel_results[j] != 1 && j < (global_size / local_size) && check_error(-1, "'%s' kernel result[idx: %d] validation failed (test) %d != (expected) 1", sources_enqueue_block_flags[i].kernel_name, j, kernel_results[j]))
733 {
734 r = -1;
735 break;
736 }
737 else if (kernel_results[j] != 0 && j >= (global_size / local_size) && check_error(-1, "'%s' kernel result[idx: %d] validation failed (test) %d != (expected) 0", sources_enqueue_block_flags[i].kernel_name, j, kernel_results[j]))
738 {
739 r = -1;
740 break;
741 }
742 }
743 if(r == 0) log_info("'%s' kernel is OK.\n", sources_enqueue_block_flags[i].kernel_name);
744 else res = -1;
745 }
746 }
747
748 if (failCnt > 0)
749 {
750 log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_enqueue_block_flags);
751 }
752
753 return res;
754 }
755
756
757
758 #endif
759