xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/device_execution/enqueue_flags.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include "harness/testHarness.h"
19 #include "harness/typeWrappers.h"
20 
21 #include <vector>
22 
23 #include "procs.h"
24 #include "utils.h"
25 #include <time.h>
26 
27 
28 #ifdef CL_VERSION_2_0
29 extern int gWimpyMode;
30 #define BITS_DEPTH 28
31 
32 static const char* enqueue_flags_wait_kernel_simple[] =
33 {
34     NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
35     NL, ""
36     NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res)"
37     NL, "{"
38     NL, "  int val = 0;"
39     NL, "  size_t lid = get_local_id(0);"
40     NL, "  size_t tid = get_global_id(0);"
41     NL, ""
42     NL, "  if(tid == 0)"
43     NL, "  {"
44     NL, "    if((index + 1) < BITS_DEPTH)"
45     NL, "    {"
46     NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
47     NL, "      ^{"
48     NL, "         block_fn(array, index + 1, ls, gs, res);"
49     NL, "       });"
50     NL, "    }"
51     NL, "  }"
52     NL, ""
53     NL, "  array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
54     NL, ""
55     NL, "  if((index + 1) == BITS_DEPTH)"
56     NL, "  {"
57     NL, "    barrier(CLK_GLOBAL_MEM_FENCE);"
58     NL, ""
59     NL, "    if(lid == 0)"
60     NL, "    {"
61     NL, "      size_t gid = get_group_id(0);"
62     NL, "      res[gid] = 1;"
63     NL, ""
64     NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
65     NL, "      {"
66     NL, "        for(int i = 0; i < ls; i++)"
67     NL, "        {"
68     NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
69     NL, "          {"
70     NL, "            res[gid] = 2;"
71     NL, "            break;"
72     NL, "          }"
73     NL, "        }"
74     NL, "      }"
75     NL, "    }"
76     NL, "  }"
77     NL, "}"
78     NL, ""
79     NL, "kernel void enqueue_flags_wait_kernel_simple(__global int* res, __global int* array)"
80     NL, "{"
81     NL, "  size_t ls  = get_local_size(0);"
82     NL, "  size_t gs  = get_global_size(0);"
83     NL, "  size_t tid  = get_global_id(0);"
84     NL, ""
85     NL, "  res[tid] = 0;"
86     NL, "  array[tid] = tid;"
87     NL, ""
88     NL, "  if(tid == 0)"
89     NL, "  {"
90     NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
91     NL, "    ^{"
92     NL, "       block_fn(array, 1, ls, gs, res);"
93     NL, "     });"
94     NL, "  }"
95     NL, "}"
96     NL
97 };
98 
99 static const char* enqueue_flags_wait_kernel_event[] =
100 {
101     NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
102     NL, ""
103     NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res)"
104     NL, "{"
105     NL, "  int val = 0;"
106     NL, "  size_t lid = get_local_id(0);"
107     NL, "  size_t tid = get_global_id(0);"
108     NL, ""
109     NL, "  if(tid == 0)"
110     NL, "  {"
111     NL, "    if((index + 1) < BITS_DEPTH)"
112     NL, "    {"
113     NL, "      clk_event_t block_evt;"
114     NL, "      clk_event_t user_evt = create_user_event();"
115     NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
116     NL, "      ^{"
117     NL, "         block_fn(array, index + 1, ls, gs, res);"
118     NL, "       });"
119     NL, "      set_user_event_status(user_evt, CL_COMPLETE);"
120     NL, "      release_event(user_evt);"
121     NL, "      release_event(block_evt);"
122     NL, "    }"
123     NL, "  }"
124     NL, ""
125     NL, "  array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
126     NL, ""
127     NL, "  if((index + 1) == BITS_DEPTH)"
128     NL, "  {"
129     NL, "    barrier(CLK_GLOBAL_MEM_FENCE);"
130     NL, ""
131     NL, "    if(lid == 0)"
132     NL, "    {"
133     NL, "      size_t gid = get_group_id(0);"
134     NL, "      res[gid] = 1;"
135     NL, ""
136     NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
137     NL, "      {"
138     NL, "        for(int i = 0; i < ls; i++)"
139     NL, "        {"
140     NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
141     NL, "          {"
142     NL, "            res[gid] = 2;"
143     NL, "            break;"
144     NL, "          }"
145     NL, "        }"
146     NL, "      }"
147     NL, "    }"
148     NL, "  }"
149     NL, "}"
150     NL, ""
151     NL, "kernel void enqueue_flags_wait_kernel_event(__global int* res, __global int* array)"
152     NL, "{"
153     NL, "  size_t tid  = get_global_id(0);"
154     NL, "  size_t gs = get_global_size(0);"
155     NL, "  size_t ls = get_local_size(0);"
156     NL, ""
157     NL, "  res[tid] = 0;"
158     NL, "  array[tid] = tid;"
159     NL, ""
160     NL, "  if(tid == 0)"
161     NL, "  {"
162     NL, "    clk_event_t block_evt;"
163     NL, "    clk_event_t user_evt = create_user_event();"
164     NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
165     NL, "    ^{"
166     NL, "       block_fn(array, 1, ls, gs, res);"
167     NL, "     });"
168     NL, "    set_user_event_status(user_evt, CL_COMPLETE);"
169     NL, "    release_event(user_evt);"
170     NL, "    release_event(block_evt);"
171     NL, "  }"
172     NL, "}"
173     NL
174 };
175 
176 static const char* enqueue_flags_wait_kernel_local[] =
177 {
178     NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
179     NL, ""
180     NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res, __local int* sub_array)"
181     NL, "{"
182     NL, "  int val = 0;"
183     NL, "  size_t gid = get_group_id(0);"
184     NL, "  size_t lid = get_local_id(0);"
185     NL, "  size_t tid = get_global_id(0);"
186     NL, ""
187     NL, "  sub_array[lid] = array[(index - 1) * gs + tid];"
188     NL, "  barrier(CLK_LOCAL_MEM_FENCE);"
189     NL, ""
190     NL, "  for(int i = 0; i < ls; i++)"
191     NL, "  {"
192     NL, "    int id = gid * ls + i;"
193     NL, "    val += sub_array[i];"
194     NL, "    val -= (tid == id)? 0: (id + index - 1);"
195     NL, "  }"
196     NL, ""
197     NL, "  if(tid == 0)"
198     NL, "  {"
199     NL, "    if((index + 1) < BITS_DEPTH)"
200     NL, "    {"
201     NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
202     NL, "      ^(__local void* sub_array){"
203     NL, "        block_fn(array, index + 1, ls, gs, res, sub_array);"
204     NL, "      }, ls * sizeof(int));"
205     NL, "    }"
206     NL, "  }"
207     NL, ""
208     NL, "  array[index * gs + tid] = val + 1;"
209     NL, ""
210     NL, "  if((index + 1) == BITS_DEPTH)"
211     NL, "  {"
212     NL, "    barrier(CLK_GLOBAL_MEM_FENCE);"
213     NL, ""
214     NL, "    if(lid == 0)"
215     NL, "    {"
216     NL, "      res[gid] = 1;"
217     NL, ""
218     NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
219     NL, "      {"
220     NL, "        for(int i = 0; i < ls; i++)"
221     NL, "        {"
222     NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
223     NL, "          {"
224     NL, "            res[gid] = 2;"
225     NL, "            break;"
226     NL, "          }"
227     NL, "        }"
228     NL, "      }"
229     NL, "    }"
230     NL, "  }"
231     NL, "}"
232     NL, ""
233     NL, "kernel void enqueue_flags_wait_kernel_local(__global int* res, __global int* array)"
234     NL, "{"
235     NL, "  size_t ls  = get_local_size(0);"
236     NL, "  size_t gs  = get_global_size(0);"
237     NL, "  size_t tid  = get_global_id(0);"
238     NL, ""
239     NL, "  res[tid] = 0;"
240     NL, "  array[tid] = tid;"
241     NL, ""
242     NL, "  if(tid == 0)"
243     NL, "  {"
244     NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
245     NL, "    ^(__local void* sub_array){"
246     NL, "      block_fn(array, 1, ls, gs, res, sub_array);"
247     NL, "    }, ls * sizeof(int));"
248     NL, "  }"
249     NL, "}"
250     NL
251 };
252 
253 static const char* enqueue_flags_wait_kernel_event_local[] =
254 {
255     NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
256     NL, ""
257     NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res, __local int* sub_array)"
258     NL, "{"
259     NL, "  int val = 0;"
260     NL, "  size_t gid = get_group_id(0);"
261     NL, "  size_t lid = get_local_id(0);"
262     NL, "  size_t tid = get_global_id(0);"
263     NL, ""
264     NL, "  sub_array[lid] = array[(index - 1) * gs + tid];"
265     NL, "  barrier(CLK_LOCAL_MEM_FENCE);"
266     NL, ""
267     NL, "  for(int i = 0; i < ls; i++)"
268     NL, "  {"
269     NL, "    int id = gid * ls + i;"
270     NL, "    val += sub_array[i];"
271     NL, "    val -= (tid == id)? 0: (id + index - 1);"
272     NL, "  }"
273     NL, ""
274     NL, "  if(tid == 0)"
275     NL, "  {"
276     NL, "    if((index + 1) < BITS_DEPTH)"
277     NL, "    {"
278     NL, "      clk_event_t block_evt;"
279     NL, "      clk_event_t user_evt = create_user_event();"
280     NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
281     NL, "      ^(__local void* sub_array){"
282     NL, "        block_fn(array, index + 1, ls, gs, res, sub_array);"
283     NL, "      }, ls * sizeof(int));"
284     NL, "      set_user_event_status(user_evt, CL_COMPLETE);"
285     NL, "      release_event(user_evt);"
286     NL, "      release_event(block_evt);"
287     NL, "    }"
288     NL, "  }"
289     NL, ""
290     NL, "  array[index * gs + tid] = val + 1;"
291     NL, ""
292     NL, "  if((index + 1) == BITS_DEPTH)"
293     NL, "  {"
294     NL, "    barrier(CLK_GLOBAL_MEM_FENCE);"
295     NL, ""
296     NL, "    if(lid == 0)"
297     NL, "    {"
298     NL, "      res[gid] = 1;"
299     NL, ""
300     NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
301     NL, "      {"
302     NL, "        for(int i = 0; i < ls; i++)"
303     NL, "        {"
304     NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
305     NL, "          {"
306     NL, "            res[gid] = 2;"
307     NL, "            break;"
308     NL, "          }"
309     NL, "        }"
310     NL, "      }"
311     NL, "    }"
312     NL, "  }"
313     NL, "}"
314     NL, ""
315     NL, "kernel void enqueue_flags_wait_kernel_event_local(__global int* res, __global int* array)"
316     NL, "{"
317     NL, "  size_t ls  = get_local_size(0);"
318     NL, "  size_t gs  = get_global_size(0);"
319     NL, "  size_t tid  = get_global_id(0);"
320     NL, ""
321     NL, "  res[tid] = 0;"
322     NL, "  array[tid] = tid;"
323     NL, ""
324     NL, "  if(tid == 0)"
325     NL, "  {"
326     NL, "    clk_event_t block_evt;"
327     NL, "    clk_event_t user_evt = create_user_event();"
328     NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
329     NL, "    ^(__local void* sub_array){"
330     NL, "      block_fn(array, 1, ls, gs, res, sub_array);"
331     NL, "    }, ls * sizeof(int));"
332     NL, "    set_user_event_status(user_evt, CL_COMPLETE);"
333     NL, "    release_event(user_evt);"
334     NL, "    release_event(block_evt);"
335     NL, "  }"
336     NL, "}"
337     NL
338 };
339 
340 static const char* enqueue_flags_wait_work_group_simple[] =
341 {
342     NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
343     NL, ""
344     NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, int group_id)"
345     NL, "{"
346     NL, "  size_t tid = get_global_id(0);"
347     NL, "  size_t lid = get_local_id(0);"
348     NL, "  size_t gs = get_global_size(0);"
349     NL, "  size_t gid = get_group_id(0);"
350     NL, "  "
351     NL, "  if(gid == group_id)"
352     NL, "  {"
353     NL, "    if((index + 1) < BITS_DEPTH && lid == 0)"
354     NL, "    {"
355     NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
356     NL, "      ^{"
357     NL, "         block_fn(array, index + 1, ls, res, gid);"
358     NL, "       });"
359     NL, "    }"
360     NL, "   "
361     NL, "    array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
362     NL, "  }"
363     NL, ""
364     NL, "  if((index + 1) == BITS_DEPTH)"
365     NL, "  {"
366     NL, "    barrier(CLK_GLOBAL_MEM_FENCE);"
367     NL, ""
368     NL, "    if(lid == 0 && gid == group_id)"
369     NL, "    {"
370     NL, "      res[gid] = 1;"
371     NL, ""
372     NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
373     NL, "      {"
374     NL, "        for(int i = 0; i < ls; i++)"
375     NL, "        {"
376     NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
377     NL, "          {"
378     NL, "            res[gid] = 2;"
379     NL, "            break;"
380     NL, "          }"
381     NL, "        }"
382     NL, "      }"
383     NL, "    }"
384     NL, "  }"
385     NL, "}"
386     NL, ""
387     NL, "kernel void enqueue_flags_wait_work_group_simple(__global int* res, __global int* array)"
388     NL, "{"
389     NL, "  size_t ls = get_local_size(0);"
390     NL, "  size_t gs = get_global_size(0);"
391     NL, "  size_t tid = get_global_id(0);"
392     NL, "  size_t gid = get_group_id(0);"
393     NL, "  size_t lid = get_local_id(0);"
394     NL, ""
395     NL, "  res[tid] = 0;"
396     NL, "  array[tid] = tid;"
397     NL, ""
398     NL, "  if(lid == 0)"
399     NL, "  {"
400     NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
401     NL, "    ^{"
402     NL, "       block_fn(array, 1, ls, res, gid);"
403     NL, "     });"
404     NL, "  }"
405     NL, "}"
406     NL
407 };
408 
409 static const char* enqueue_flags_wait_work_group_event[] =
410 {
411     NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
412     NL, ""
413     NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, int group_id)"
414     NL, "{"
415     NL, "  size_t tid = get_global_id(0);"
416     NL, "  size_t lid = get_local_id(0);"
417     NL, "  size_t gs = get_global_size(0);"
418     NL, "  size_t gid = get_group_id(0);"
419     NL, "  "
420     NL, "  if(gid == group_id)"
421     NL, "  {"
422     NL, "    if((index + 1) < BITS_DEPTH && lid == 0)"
423     NL, "    {"
424     NL, "      clk_event_t block_evt;"
425     NL, "      clk_event_t user_evt = create_user_event();"
426     NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
427     NL, "      ^{"
428     NL, "         block_fn(array, index + 1, ls, res, gid);"
429     NL, "       });"
430     NL, "      set_user_event_status(user_evt, CL_COMPLETE);"
431     NL, "      release_event(user_evt);"
432     NL, "      release_event(block_evt);"
433     NL, "    }"
434     NL, "   "
435     NL, "    array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
436     NL, "  }"
437     NL, ""
438     NL, ""
439     NL, "  if((index + 1) == BITS_DEPTH)"
440     NL, "  {"
441     NL, "    barrier(CLK_GLOBAL_MEM_FENCE);"
442     NL, ""
443     NL, "    if(lid == 0 && gid == group_id)"
444     NL, "    {"
445     NL, "      res[gid] = 1;"
446     NL, ""
447     NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
448     NL, "      {"
449     NL, "        for(int i = 0; i < ls; i++)"
450     NL, "        {"
451     NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
452     NL, "          {"
453     NL, "            res[gid] = 2;"
454     NL, "            break;"
455     NL, "          }"
456     NL, "        }"
457     NL, "      }"
458     NL, "    }"
459     NL, "  }"
460     NL, "}"
461     NL, ""
462     NL, "kernel void enqueue_flags_wait_work_group_event(__global int* res, __global int* array)"
463     NL, "{"
464     NL, "  size_t ls = get_local_size(0);"
465     NL, "  size_t gs = get_global_size(0);"
466     NL, "  size_t tid = get_global_id(0);"
467     NL, "  size_t gid = get_group_id(0);"
468     NL, "  size_t lid = get_local_id(0);"
469     NL, ""
470     NL, "  res[tid] = 0;"
471     NL, "  array[tid] = tid;"
472     NL, ""
473     NL, "  if(lid == 0)"
474     NL, "  {"
475     NL, "    clk_event_t block_evt;"
476     NL, "    clk_event_t user_evt = create_user_event();"
477     NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
478     NL, "    ^{"
479     NL, "       block_fn(array, 1, ls, res, gid);"
480     NL, "     });"
481     NL, "    set_user_event_status(user_evt, CL_COMPLETE);"
482     NL, "    release_event(user_evt);"
483     NL, "    release_event(block_evt);"
484     NL, "  }"
485     NL, "}"
486     NL
487 };
488 
489 static const char* enqueue_flags_wait_work_group_local[] =
490 {
491     NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
492     NL, ""
493     NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, __local int* sub_array, int group_id)"
494     NL, "{"
495     NL, "  int val = 0;"
496     NL, "  size_t gid = get_group_id(0);"
497     NL, "  size_t lid = get_local_id(0);"
498     NL, "  size_t tid = get_global_id(0);"
499     NL, "  size_t gs = get_global_size(0);"
500     NL, ""
501     NL, "  sub_array[lid] = array[(index - 1) * gs + tid];"
502     NL, "  barrier(CLK_LOCAL_MEM_FENCE);"
503     NL, ""
504     NL, "  for(int i = 0; i < ls; i++)"
505     NL, "  {"
506     NL, "    int id = gid * ls + i;"
507     NL, "    val += sub_array[i];"
508     NL, "    val -= (tid == id)? 0: (id + index - 1);"
509     NL, "  }"
510     NL, " "
511     NL, "  if(gid == group_id)"
512     NL, "  {"
513     NL, "    if((index + 1) < BITS_DEPTH && lid == 0)"
514     NL, "    {"
515     NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
516     NL, "      ^(__local void* sub_array){"
517     NL, "        block_fn(array, index + 1, ls, res, sub_array, gid);"
518     NL, "      }, ls * sizeof(int));"
519     NL, "    }"
520     NL, " "
521     NL, "    array[index * gs + tid] = val + 1;"
522     NL, "  }"
523     NL, ""
524     NL, ""
525     NL, "  if((index + 1) == BITS_DEPTH)"
526     NL, "  {"
527     NL, "    barrier(CLK_GLOBAL_MEM_FENCE);"
528     NL, ""
529     NL, "    if(lid == 0 && gid == group_id)"
530     NL, "    {"
531     NL, "      res[gid] = 1;"
532     NL, ""
533     NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
534     NL, "      {"
535     NL, "        for(int i = 0; i < ls; i++)"
536     NL, "        {"
537     NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
538     NL, "          {"
539     NL, "            res[gid] = 2;"
540     NL, "            break;"
541     NL, "          }"
542     NL, "        }"
543     NL, "      }"
544     NL, "    }"
545     NL, "  }"
546     NL, "}"
547     NL, ""
548     NL, "kernel void enqueue_flags_wait_work_group_local(__global int* res, __global int* array)"
549     NL, "{"
550     NL, "  size_t ls  = get_local_size(0);"
551     NL, "  size_t gs = get_global_size(0);"
552     NL, "  size_t tid  = get_global_id(0);"
553     NL, "  size_t gid = get_group_id(0);"
554     NL, "  size_t lid = get_local_id(0);"
555     NL, ""
556     NL, "  res[tid] = 0;"
557     NL, "  array[tid] = tid;"
558     NL, ""
559     NL, "  if(lid == 0)"
560     NL, "  {"
561     NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
562     NL, "    ^(__local void* sub_array){"
563     NL, "      block_fn(array, 1, ls, res, sub_array, gid);"
564     NL, "    }, ls * sizeof(int));"
565     NL, "  }"
566     NL, "}"
567     NL
568 };
569 
570 static const char* enqueue_flags_wait_work_group_event_local[] =
571 {
572     NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
573     NL, ""
574     NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, __local int* sub_array, int group_id)"
575     NL, "{"
576     NL, "  int val = 0;"
577     NL, "  size_t gid = get_group_id(0);"
578     NL, "  size_t lid = get_local_id(0);"
579     NL, "  size_t tid = get_global_id(0);"
580     NL, "  size_t gs = get_global_size(0);"
581     NL, ""
582     NL, "  sub_array[lid] = array[(index - 1) * gs + tid];"
583     NL, "  barrier(CLK_LOCAL_MEM_FENCE);"
584     NL, ""
585     NL, "  for(int i = 0; i < ls; i++)"
586     NL, "  {"
587     NL, "    int id = gid * ls + i;"
588     NL, "    val += sub_array[i];"
589     NL, "    val -= (tid == id)? 0: (id + index - 1);"
590     NL, "  }"
591     NL, ""
592     NL, "  if(gid == group_id)"
593     NL, "  {"
594     NL, "    if((index + 1) < BITS_DEPTH && lid == 0)"
595     NL, "    {"
596     NL, "      clk_event_t block_evt;"
597     NL, "      clk_event_t user_evt = create_user_event();"
598     NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
599     NL, "      ^(__local void* sub_array){"
600     NL, "        block_fn(array, index + 1, ls, res, sub_array, gid);"
601     NL, "      }, ls * sizeof(int));"
602     NL, "      set_user_event_status(user_evt, CL_COMPLETE);"
603     NL, "      release_event(user_evt);"
604     NL, "      release_event(block_evt);"
605     NL, "    }"
606     NL, " "
607     NL, "    array[index * gs + tid] = val + 1;"
608     NL, "  }"
609     NL, ""
610     NL, "  if((index + 1) == BITS_DEPTH)"
611     NL, "  {"
612     NL, "    barrier(CLK_GLOBAL_MEM_FENCE);"
613     NL, ""
614     NL, "    if(lid == 0 && gid == group_id)"
615     NL, "    {"
616     NL, "      res[gid] = 1;"
617     NL, ""
618     NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
619     NL, "      {"
620     NL, "        for(int i = 0; i < ls; i++)"
621     NL, "        {"
622     NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
623     NL, "          {"
624     NL, "            res[gid] = 2;"
625     NL, "            break;"
626     NL, "          }"
627     NL, "        }"
628     NL, "      }"
629     NL, "    }"
630     NL, "  }"
631     NL, "}"
632     NL, ""
633     NL, "kernel void enqueue_flags_wait_work_group_event_local(__global int* res, __global int* array)"
634     NL, "{"
635     NL, "  size_t ls  = get_local_size(0);"
636     NL, "  size_t gs  = get_global_size(0);"
637     NL, "  size_t tid  = get_global_id(0);"
638     NL, "  size_t gid = get_group_id(0);"
639     NL, "  size_t lid  = get_local_id(0);"
640     NL, ""
641     NL, "  res[tid] = 0;"
642     NL, "  array[tid] = tid;"
643     NL, ""
644     NL, "  if(lid == 0)"
645     NL, "  {"
646     NL, "    clk_event_t block_evt;"
647     NL, "    clk_event_t user_evt = create_user_event();"
648     NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
649     NL, "    ^(__local void* sub_array){"
650     NL, "      block_fn(array, 1, ls, res, sub_array, gid);"
651     NL, "    }, ls * sizeof(int));"
652     NL, "    set_user_event_status(user_evt, CL_COMPLETE);"
653     NL, "    release_event(user_evt);"
654     NL, "    release_event(block_evt);"
655     NL, "  }"
656     NL, "}"
657     NL
658 };
659 
660 static const kernel_src sources_enqueue_block_flags[] =
661 {
662     KERNEL(enqueue_flags_wait_kernel_simple),
663     KERNEL(enqueue_flags_wait_kernel_event),
664     KERNEL(enqueue_flags_wait_kernel_local),
665     KERNEL(enqueue_flags_wait_kernel_event_local),
666     KERNEL(enqueue_flags_wait_work_group_simple),
667     KERNEL(enqueue_flags_wait_work_group_event),
668     KERNEL(enqueue_flags_wait_work_group_local),
669     KERNEL(enqueue_flags_wait_work_group_event_local)
670 };
671 static const size_t num_enqueue_block_flags = arr_size(sources_enqueue_block_flags);
672 
673 
test_enqueue_flags(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)674 int test_enqueue_flags(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
675 {
676     cl_uint i;
677     cl_int err_ret, res = 0;
678     clCommandQueueWrapper dev_queue;
679     cl_int kernel_results[MAX_GWS] = { -1 };
680     int buff[MAX_GWS * BITS_DEPTH] = { 0 };
681 
682     size_t ret_len;
683     size_t max_local_size = 1;
684     cl_uint maxQueueSize = 0;
685     err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
686     test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
687 
688     err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
689     test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
690 
691     cl_queue_properties queue_prop_def[] =
692     {
693         CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
694         CL_QUEUE_SIZE, maxQueueSize,
695         0
696     };
697 
698     dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
699     test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
700 
701     size_t global_size = MAX_GWS;
702     size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
703     if(gWimpyMode)
704     {
705         global_size = 4;
706         local_size = 2;
707     }
708 
709     size_t failCnt = 0;
710     for(i = 0; i < num_enqueue_block_flags; ++i)
711     {
712         if (!gKernelName.empty() && gKernelName != sources_enqueue_block_flags[i].kernel_name)
713             continue;
714 
715         log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_block_flags[i].kernel_name, i + 1, num_enqueue_block_flags);
716 
717         clMemWrapper mem = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, global_size * BITS_DEPTH * sizeof(cl_int), buff, &err_ret);
718         test_error(err_ret, "clCreateBuffer() failed");
719 
720         kernel_arg args[] =
721         {
722             { sizeof(cl_mem),  &mem }
723         };
724 
725         err_ret = run_n_kernel_args(context, queue, sources_enqueue_block_flags[i].lines, sources_enqueue_block_flags[i].num_lines, sources_enqueue_block_flags[i].kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), arr_size(args), args);
726         if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_block_flags[i].kernel_name)) { ++failCnt; res = -1; }
727         else
728         {
729             int r = 0;
730             for (int j=0; j<global_size; j++)
731             {
732                 if (kernel_results[j] != 1 && j < (global_size / local_size) && check_error(-1, "'%s' kernel result[idx: %d] validation failed (test) %d != (expected) 1", sources_enqueue_block_flags[i].kernel_name, j, kernel_results[j]))
733                 {
734                     r = -1;
735                     break;
736                 }
737                 else if (kernel_results[j] != 0 && j >= (global_size / local_size) && check_error(-1, "'%s' kernel result[idx: %d] validation failed (test) %d != (expected) 0", sources_enqueue_block_flags[i].kernel_name, j, kernel_results[j]))
738                 {
739                     r = -1;
740                     break;
741                 }
742             }
743             if(r == 0) log_info("'%s' kernel is OK.\n", sources_enqueue_block_flags[i].kernel_name);
744             else res = -1;
745         }
746     }
747 
748     if (failCnt > 0)
749     {
750         log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_enqueue_block_flags);
751     }
752 
753     return res;
754 }
755 
756 
757 
758 #endif
759