xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/basic/test_progvar.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017, 2020 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 // Bug: Missing in spec: atomic_intptr_t is always supported if device is
19 // 32-bits.
20 // Bug: Missing in spec: CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE
21 
22 #define FLUSH fflush(stdout)
23 
24 #define MAX_STR 16 * 1024
25 
26 #define ALIGNMENT 128
27 
28 
29 // NUM_ROUNDS must be at least 1.
30 // It determines how many sets of random data we push through the global
31 // variables.
32 #define NUM_ROUNDS 1
33 
34 // This is a shared property of the writer and reader kernels.
35 #define NUM_TESTED_VALUES 5
36 
37 // TODO: pointer-to-half (and its vectors)
38 // TODO: union of...
39 
40 #include <algorithm>
41 #include <cstdio>
42 #include <cstdlib>
43 #include <cstring>
44 #include <string>
45 #include <vector>
46 #include <cassert>
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include "harness/typeWrappers.h"
50 #include "harness/errorHelpers.h"
51 #include "harness/mt19937.h"
52 #include "procs.h"
53 
54 
55 ////////////////////
56 // Device capabilities
57 static int l_has_double = 0;
58 static int l_has_half = 0;
59 static int l_64bit_device = 0;
60 static int l_has_int64_atomics = 0;
61 static int l_has_intptr_atomics = 0;
62 static int l_has_cles_int64 = 0;
63 
64 static int l_host_is_big_endian = 1;
65 
66 static size_t l_max_global_id0 = 0;
67 static cl_bool l_linker_available = false;
68 
69 #define check_error(errCode, msg, ...)                                         \
70     ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n",         \
71                                           ##__VA_ARGS__, __FILE__, __LINE__),  \
72                                 1)                                             \
73                              : 0)
74 
75 ////////////////////
76 // Info about types we can use for program scope variables.
77 
78 
79 class TypeInfo {
80 
81 public:
TypeInfo()82     TypeInfo()
83         : name(""), m_elem_type(0), m_num_elem(0), m_is_vecbase(false),
84           m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
85           m_size(0), m_value_size(0), m_buf_elem_type("")
86     {}
TypeInfo(const char * name_arg)87     TypeInfo(const char* name_arg)
88         : name(name_arg), m_elem_type(0), m_num_elem(0), m_is_vecbase(false),
89           m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
90           m_size(0), m_value_size(0), m_buf_elem_type(name_arg)
91     {}
92 
93     // Vectors
TypeInfo(TypeInfo * elem_type,int num_elem)94     TypeInfo(TypeInfo* elem_type, int num_elem)
95         : m_elem_type(elem_type), m_num_elem(num_elem), m_is_vecbase(false),
96           m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false)
97     {
98         char
99             the_name[10]; // long enough for longest vector type name "double16"
100         snprintf(the_name, sizeof(the_name), "%s%d",
101                  elem_type->get_name_c_str(), m_num_elem);
102         this->name = std::string(the_name);
103         this->m_buf_elem_type = std::string(the_name);
104         this->m_value_size = num_elem * elem_type->get_size();
105         if (m_num_elem == 3)
106         {
107             this->m_size = 4 * elem_type->get_size();
108         }
109         else
110         {
111             this->m_size = num_elem * elem_type->get_size();
112         }
113     }
get_name(void) const114     const std::string& get_name(void) const { return name; }
get_name_c_str(void) const115     const char* get_name_c_str(void) const { return name.c_str(); }
set_vecbase(void)116     TypeInfo& set_vecbase(void)
117     {
118         this->m_is_vecbase = true;
119         return *this;
120     }
set_atomic(void)121     TypeInfo& set_atomic(void)
122     {
123         this->m_is_atomic = true;
124         return *this;
125     }
set_like_size_t(void)126     TypeInfo& set_like_size_t(void)
127     {
128         this->m_is_like_size_t = true;
129         this->set_size(l_64bit_device ? 8 : 4);
130         this->m_buf_elem_type = l_64bit_device ? "ulong" : "uint";
131         return *this;
132     }
set_bool(void)133     TypeInfo& set_bool(void)
134     {
135         this->m_is_bool = true;
136         return *this;
137     }
set_size(size_t n)138     TypeInfo& set_size(size_t n)
139     {
140         this->m_value_size = this->m_size = n;
141         return *this;
142     }
set_buf_elem_type(const char * name)143     TypeInfo& set_buf_elem_type(const char* name)
144     {
145         this->m_buf_elem_type = std::string(name);
146         return *this;
147     }
148 
elem_type(void) const149     const TypeInfo* elem_type(void) const { return m_elem_type; }
num_elem(void) const150     int num_elem(void) const { return m_num_elem; }
151 
is_vecbase(void) const152     bool is_vecbase(void) const { return m_is_vecbase; }
is_atomic(void) const153     bool is_atomic(void) const { return m_is_atomic; }
is_atomic_64bit(void) const154     bool is_atomic_64bit(void) const { return m_is_atomic && m_size == 8; }
is_like_size_t(void) const155     bool is_like_size_t(void) const { return m_is_like_size_t; }
is_bool(void) const156     bool is_bool(void) const { return m_is_bool; }
get_size(void) const157     size_t get_size(void) const { return m_size; }
get_value_size(void) const158     size_t get_value_size(void) const { return m_value_size; }
159 
160     // When passing values of this type to a kernel, what buffer type
161     // should be used?
get_buf_elem_type(void) const162     const char* get_buf_elem_type(void) const
163     {
164         return m_buf_elem_type.c_str();
165     }
166 
as_string(const cl_uchar * value_ptr) const167     std::string as_string(const cl_uchar* value_ptr) const
168     {
169         // This method would be shorter if I had a real handle to element
170         // vector type.
171         if (this->is_bool())
172         {
173             std::string result(name);
174             result += "<";
175             result += (*value_ptr ? "true" : "false");
176             result += ", ";
177             char buf[10];
178             sprintf(buf, "%02x", *value_ptr);
179             result += buf;
180             result += ">";
181             return result;
182         }
183         else if (this->num_elem())
184         {
185             std::string result(name);
186             result += "<";
187             for (unsigned ielem = 0; ielem < this->num_elem(); ielem++)
188             {
189                 char buf[MAX_STR];
190                 if (ielem) result += ", ";
191                 for (unsigned ibyte = 0; ibyte < this->m_elem_type->get_size();
192                      ibyte++)
193                 {
194                     sprintf(buf + 2 * ibyte, "%02x",
195                             value_ptr[ielem * this->m_elem_type->get_size()
196                                       + ibyte]);
197                 }
198                 result += buf;
199             }
200             result += ">";
201             return result;
202         }
203         else
204         {
205             std::string result(name);
206             result += "<";
207             char buf[MAX_STR];
208             for (unsigned ibyte = 0; ibyte < this->get_size(); ibyte++)
209             {
210                 sprintf(buf + 2 * ibyte, "%02x", value_ptr[ibyte]);
211             }
212             result += buf;
213             result += ">";
214             return result;
215         }
216     }
217 
218     // Initialize the given buffer to a constant value initialized as if it
219     // were from the INIT_VAR macro below.
220     // Only needs to support values 0 and 1.
init(cl_uchar * buf,cl_uchar val) const221     void init(cl_uchar* buf, cl_uchar val) const
222     {
223         if (this->num_elem())
224         {
225             for (unsigned ielem = 0; ielem < this->num_elem(); ielem++)
226             {
227                 // Delegate!
228                 this->init_elem(
229                     buf + ielem * this->get_value_size() / this->num_elem(),
230                     val);
231             }
232         }
233         else
234         {
235             init_elem(buf, val);
236         }
237     }
238 
239 private:
init_elem(cl_uchar * buf,cl_uchar val) const240     void init_elem(cl_uchar* buf, cl_uchar val) const
241     {
242         size_t elem_size = this->num_elem()
243             ? this->get_value_size() / this->num_elem()
244             : this->get_size();
245         memset(buf, 0, elem_size);
246         if (val)
247         {
248             if (strstr(name.c_str(), "float"))
249             {
250                 *(float*)buf = (float)val;
251                 return;
252             }
253             if (strstr(name.c_str(), "double"))
254             {
255                 *(double*)buf = (double)val;
256                 return;
257             }
258             if (this->is_bool())
259             {
260                 *buf = (bool)val;
261                 return;
262             }
263 
264             // Write a single character value to the correct spot,
265             // depending on host endianness.
266             if (l_host_is_big_endian)
267                 *(buf + elem_size - 1) = (cl_uchar)val;
268             else
269                 *buf = (cl_uchar)val;
270         }
271     }
272 
273 public:
dump(FILE * fp) const274     void dump(FILE* fp) const
275     {
276         fprintf(fp, "Type %s : <%d,%d,%s> ", name.c_str(), (int)m_size,
277                 (int)m_value_size, m_buf_elem_type.c_str());
278         if (this->m_elem_type)
279             fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(),
280                     this->num_elem());
281         if (this->m_is_vecbase) fprintf(fp, " vecbase");
282         if (this->m_is_bool) fprintf(fp, " bool");
283         if (this->m_is_like_size_t) fprintf(fp, " like-size_t");
284         if (this->m_is_atomic) fprintf(fp, " atomic");
285         fprintf(fp, "\n");
286         fflush(fp);
287     }
288 
289 private:
290     std::string name;
291     TypeInfo* m_elem_type;
292     int m_num_elem;
293     bool m_is_vecbase;
294     bool m_is_atomic;
295     bool m_is_like_size_t;
296     bool m_is_bool;
297     size_t m_size; // Number of bytes of storage occupied by this type.
298     size_t m_value_size; // Number of bytes of value significant for this type.
299                          // Differs for vec3.
300 
301     // When passing values of this type to a kernel, what buffer type
302     // should be used?
303     // For most types, it's just itself.
304     // Use a std::string so I don't have to make a copy constructor.
305     std::string m_buf_elem_type;
306 };
307 
308 
309 #define NUM_SCALAR_TYPES                                                       \
310     (8 + 2) // signed and unsigned integral types, float and double
311 #define NUM_VECTOR_SIZES (5) // 2,3,4,8,16
312 #define NUM_PLAIN_TYPES                                                        \
313     5 /*boolean and size_t family */                                           \
314         + NUM_SCALAR_TYPES + NUM_SCALAR_TYPES* NUM_VECTOR_SIZES                \
315         + 10 /* atomic types */
316 
317 // Need room for plain, array, pointer, struct
318 #define MAX_TYPES (4 * NUM_PLAIN_TYPES)
319 
320 static TypeInfo type_info[MAX_TYPES];
321 static int num_type_info = 0; // Number of valid entries in type_info[]
322 
323 
324 // A helper class to form kernel source arguments for clCreateProgramWithSource.
325 class StringTable {
326 public:
StringTable()327     StringTable(): m_strings(), m_c_strs(NULL), m_lengths(NULL), m_frozen(false)
328     {}
~StringTable()329     ~StringTable() { release_frozen(); }
330 
add(std::string s)331     void add(std::string s)
332     {
333         release_frozen();
334         m_strings.push_back(s);
335     }
336 
num_str()337     const size_t num_str()
338     {
339         freeze();
340         return m_strings.size();
341     }
strs()342     const char** strs()
343     {
344         freeze();
345         return m_c_strs;
346     }
lengths()347     const size_t* lengths()
348     {
349         freeze();
350         return m_lengths;
351     }
352 
353 private:
freeze(void)354     void freeze(void)
355     {
356         if (!m_frozen)
357         {
358             release_frozen();
359 
360             m_c_strs =
361                 (const char**)malloc(sizeof(const char*) * m_strings.size());
362             m_lengths = (size_t*)malloc(sizeof(size_t) * m_strings.size());
363             assert(m_c_strs);
364             assert(m_lengths);
365 
366             for (size_t i = 0; i < m_strings.size(); i++)
367             {
368                 m_c_strs[i] = m_strings[i].c_str();
369                 m_lengths[i] = strlen(m_c_strs[i]);
370             }
371 
372             m_frozen = true;
373         }
374     }
release_frozen(void)375     void release_frozen(void)
376     {
377         if (m_c_strs)
378         {
379             free(m_c_strs);
380             m_c_strs = 0;
381         }
382         if (m_lengths)
383         {
384             free(m_lengths);
385             m_lengths = 0;
386         }
387         m_frozen = false;
388     }
389 
390     typedef std::vector<std::string> strlist_t;
391     strlist_t m_strings;
392     const char** m_c_strs;
393     size_t* m_lengths;
394     bool m_frozen;
395 };
396 
397 
398 ////////////////////
399 // File scope function declarations
400 
401 static void l_load_abilities(cl_device_id device);
402 static const char* l_get_fp64_pragma(void);
403 static const char* l_get_cles_int64_pragma(void);
404 static int l_build_type_table(cl_device_id device);
405 
406 static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
407                              size_t* pref_size_ret);
408 
409 static void l_set_randomly(cl_uchar* buf, size_t buf_size,
410                            RandomSeed& rand_state);
411 static int l_compare(const char* test_name, const cl_uchar* expected,
412                      const cl_uchar* received, size_t num_values,
413                      const TypeInfo& ti);
414 static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
415                   unsigned src_idx, const TypeInfo& ti);
416 
417 static std::string conversion_functions(const TypeInfo& ti);
418 static std::string global_decls(const TypeInfo& ti, bool with_init);
419 static std::string global_check_function(const TypeInfo& ti);
420 static std::string writer_function(const TypeInfo& ti);
421 static std::string reader_function(const TypeInfo& ti);
422 
423 static int l_write_read(cl_device_id device, cl_context context,
424                         cl_command_queue queue);
425 static int l_write_read_for_type(cl_device_id device, cl_context context,
426                                  cl_command_queue queue, const TypeInfo& ti,
427                                  RandomSeed& rand_state);
428 
429 static int l_init_write_read(cl_device_id device, cl_context context,
430                              cl_command_queue queue);
431 static int l_init_write_read_for_type(cl_device_id device, cl_context context,
432                                       cl_command_queue queue,
433                                       const TypeInfo& ti,
434                                       RandomSeed& rand_state);
435 
436 static int l_capacity(cl_device_id device, cl_context context,
437                       cl_command_queue queue, size_t max_size);
438 static int l_user_type(cl_device_id device, cl_context context,
439                        cl_command_queue queue, bool separate_compile);
440 
441 static std::string get_build_options(cl_device_id device);
442 
443 ////////////////////
444 // File scope function definitions
445 
print_build_log(cl_program program,cl_uint num_devices,cl_device_id * device_list,cl_uint count,const char ** strings,const size_t * lengths,const char * options)446 static cl_int print_build_log(cl_program program, cl_uint num_devices,
447                               cl_device_id* device_list, cl_uint count,
448                               const char** strings, const size_t* lengths,
449                               const char* options)
450 {
451     cl_uint i;
452     cl_int error;
453     BufferOwningPtr<cl_device_id> devices;
454 
455     if (num_devices == 0 || device_list == NULL)
456     {
457         error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
458                                  sizeof(num_devices), &num_devices, NULL);
459         test_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
460 
461         device_list = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devices);
462         devices.reset(device_list);
463 
464         memset(device_list, 0, sizeof(cl_device_id) * num_devices);
465 
466         error = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
467                                  sizeof(cl_device_id) * num_devices,
468                                  device_list, NULL);
469         test_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
470     }
471 
472     cl_uint z;
473     bool sourcePrinted = false;
474 
475     for (z = 0; z < num_devices; z++)
476     {
477         char deviceName[4096] = "";
478         error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME,
479                                 sizeof(deviceName), deviceName, NULL);
480         check_error(error,
481                     "Device \"%d\" failed to return a name. clGetDeviceInfo "
482                     "CL_DEVICE_NAME failed",
483                     z);
484 
485         cl_build_status buildStatus;
486         error = clGetProgramBuildInfo(program, device_list[z],
487                                       CL_PROGRAM_BUILD_STATUS,
488                                       sizeof(buildStatus), &buildStatus, NULL);
489         check_error(error,
490                     "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
491 
492         if (buildStatus != CL_BUILD_SUCCESS)
493         {
494             if (!sourcePrinted)
495             {
496                 log_error("Build options: %s\n", options);
497                 if (count && strings)
498                 {
499                     log_error("Original source is: ------------\n");
500                     for (i = 0; i < count; i++) log_error("%s", strings[i]);
501                 }
502                 sourcePrinted = true;
503             }
504 
505             char statusString[64] = "";
506             if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
507                 sprintf(statusString, "CL_BUILD_SUCCESS");
508             else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
509                 sprintf(statusString, "CL_BUILD_NONE");
510             else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
511                 sprintf(statusString, "CL_BUILD_ERROR");
512             else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
513                 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
514             else
515                 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
516 
517             log_error("Build not successful for device \"%s\", status: %s\n",
518                       deviceName, statusString);
519 
520             size_t paramSize = 0;
521             error = clGetProgramBuildInfo(program, device_list[z],
522                                           CL_PROGRAM_BUILD_LOG, 0, NULL,
523                                           &paramSize);
524             if (check_error(
525                     error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"))
526                 break;
527 
528             std::string log;
529             log.resize(paramSize / sizeof(char));
530 
531             error = clGetProgramBuildInfo(program, device_list[z],
532                                           CL_PROGRAM_BUILD_LOG, paramSize,
533                                           &log[0], NULL);
534             if (check_error(error,
535                             "Device %d (%s) failed to return a build log", z,
536                             deviceName))
537                 break;
538             if (log[0] == 0)
539                 log_error("clGetProgramBuildInfo returned an empty log.\n");
540             else
541             {
542                 log_error("Build log for device \"%s\":\n", deviceName);
543                 log_error("%s\n", log.c_str());
544             }
545         }
546     }
547     return error;
548 }
549 
l_load_abilities(cl_device_id device)550 static void l_load_abilities(cl_device_id device)
551 {
552     l_has_half = is_extension_available(device, "cl_khr_fp16");
553     l_has_double = is_extension_available(device, "cl_khr_fp64");
554     l_has_cles_int64 = is_extension_available(device, "cles_khr_int64");
555 
556     l_has_int64_atomics =
557         is_extension_available(device, "cl_khr_int64_base_atomics")
558         && is_extension_available(device, "cl_khr_int64_extended_atomics");
559 
560     {
561         int status = CL_SUCCESS;
562         cl_uint addr_bits = 32;
563         status = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
564                                  sizeof(addr_bits), &addr_bits, 0);
565         l_64bit_device = (status == CL_SUCCESS && addr_bits == 64);
566     }
567 
568     // 32-bit devices always have intptr atomics.
569     l_has_intptr_atomics = !l_64bit_device || l_has_int64_atomics;
570 
571     union {
572         char c[4];
573         int i;
574     } probe;
575     probe.i = 1;
576     l_host_is_big_endian = !probe.c[0];
577 
578     // Determine max global id.
579     {
580         int status = CL_SUCCESS;
581         cl_uint max_dim = 0;
582         status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
583                                  sizeof(max_dim), &max_dim, 0);
584         if (check_error(status,
585                         "clGetDeviceInfo for "
586                         "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed."))
587             return;
588         assert(max_dim > 0);
589         size_t max_id[3];
590         max_id[0] = 0;
591         status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
592                                  max_dim * sizeof(size_t), &max_id[0], 0);
593         if (check_error(status,
594                         "clGetDeviceInfo for "
595                         "CL_DEVICE_MAX_WORK_ITEM_SIZES failed."))
596             return;
597         l_max_global_id0 = max_id[0];
598     }
599 
600     { // Is separate compilation supported?
601         int status = CL_SUCCESS;
602         l_linker_available = false;
603         status =
604             clGetDeviceInfo(device, CL_DEVICE_LINKER_AVAILABLE,
605                             sizeof(l_linker_available), &l_linker_available, 0);
606         if (check_error(status,
607                         "clGetDeviceInfo for "
608                         "CL_DEVICE_LINKER_AVAILABLE failed."))
609             return;
610     }
611 }
612 
613 
l_get_fp64_pragma(void)614 static const char* l_get_fp64_pragma(void)
615 {
616     return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
617                         : "";
618 }
619 
l_get_cles_int64_pragma(void)620 static const char* l_get_cles_int64_pragma(void)
621 {
622     return l_has_cles_int64
623         ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n"
624         : "";
625 }
626 
l_get_int64_atomic_pragma(void)627 static const char* l_get_int64_atomic_pragma(void)
628 {
629     return "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
630            "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
631 }
632 
l_build_type_table(cl_device_id device)633 static int l_build_type_table(cl_device_id device)
634 {
635     int status = CL_SUCCESS;
636     size_t iscalar = 0;
637     size_t ivecsize = 0;
638     int vecsizes[] = { 2, 3, 4, 8, 16 };
639     const char* vecbase[] = { "uchar", "char",  "ushort", "short", "uint",
640                               "int",   "ulong", "long",   "float", "double" };
641     int vecbase_size[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
642     const char* like_size_t[] = { "intptr_t", "uintptr_t", "size_t",
643                                   "ptrdiff_t" };
644     const char* atomics[] = {
645         "atomic_int",   "atomic_uint",  "atomic_long",
646         "atomic_ulong", "atomic_float", "atomic_double",
647     };
648     int atomics_size[] = { 4, 4, 8, 8, 4, 8 };
649     const char* intptr_atomics[] = { "atomic_intptr_t", "atomic_uintptr_t",
650                                      "atomic_size_t", "atomic_ptrdiff_t" };
651 
652     l_load_abilities(device);
653     num_type_info = 0;
654 
655     // Boolean.
656     type_info[num_type_info++] =
657         TypeInfo("bool").set_bool().set_size(1).set_buf_elem_type("uchar");
658 
659     // Vector types, and the related scalar element types.
660     for (iscalar = 0; iscalar < sizeof(vecbase) / sizeof(vecbase[0]); ++iscalar)
661     {
662         if (!gHasLong && strstr(vecbase[iscalar], "long")) continue;
663         if (!l_has_double && strstr(vecbase[iscalar], "double")) continue;
664 
665         // Scalar
666         TypeInfo* elem_type = type_info + num_type_info++;
667         *elem_type = TypeInfo(vecbase[iscalar])
668                          .set_vecbase()
669                          .set_size(vecbase_size[iscalar]);
670 
671         // Vector
672         for (ivecsize = 0; ivecsize < sizeof(vecsizes) / sizeof(vecsizes[0]);
673              ivecsize++)
674         {
675             type_info[num_type_info++] =
676                 TypeInfo(elem_type, vecsizes[ivecsize]);
677         }
678     }
679 
680     // Size_t-like types
681     for (iscalar = 0; iscalar < sizeof(like_size_t) / sizeof(like_size_t[0]);
682          ++iscalar)
683     {
684         type_info[num_type_info++] =
685             TypeInfo(like_size_t[iscalar]).set_like_size_t();
686     }
687 
688     // Atomic types.
689     for (iscalar = 0; iscalar < sizeof(atomics) / sizeof(atomics[0]); ++iscalar)
690     {
691         if (!l_has_int64_atomics && strstr(atomics[iscalar], "long")) continue;
692         if (!(l_has_int64_atomics && l_has_double)
693             && strstr(atomics[iscalar], "double"))
694             continue;
695 
696         // The +7 is used to skip over the "atomic_" prefix.
697         const char* buf_type = atomics[iscalar] + 7;
698         type_info[num_type_info++] = TypeInfo(atomics[iscalar])
699                                          .set_atomic()
700                                          .set_size(atomics_size[iscalar])
701                                          .set_buf_elem_type(buf_type);
702     }
703     if (l_has_intptr_atomics)
704     {
705         for (iscalar = 0;
706              iscalar < sizeof(intptr_atomics) / sizeof(intptr_atomics[0]);
707              ++iscalar)
708         {
709             type_info[num_type_info++] = TypeInfo(intptr_atomics[iscalar])
710                                              .set_atomic()
711                                              .set_like_size_t();
712         }
713     }
714 
715     assert(num_type_info <= MAX_TYPES); // or increase MAX_TYPES
716 
717 #if 0
718     for ( size_t i = 0 ; i < num_type_info ; i++ ) {
719         type_info[ i ].dump(stdout);
720     }
721     exit(0);
722 #endif
723 
724     return status;
725 }
726 
l_find_type(const char * name)727 static const TypeInfo& l_find_type(const char* name)
728 {
729     auto itr =
730         std::find_if(type_info, type_info + num_type_info,
731                      [name](TypeInfo& ti) { return ti.get_name() == name; });
732     assert(itr != type_info + num_type_info);
733     return *itr;
734 }
735 
736 
737 // Populate return parameters for max program variable size, preferred program
738 // variable size.
739 
l_get_device_info(cl_device_id device,size_t * max_size_ret,size_t * pref_size_ret)740 static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
741                              size_t* pref_size_ret)
742 {
743     int err = CL_SUCCESS;
744     size_t return_size = 0;
745 
746     err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
747                           sizeof(*max_size_ret), max_size_ret, &return_size);
748     if (err != CL_SUCCESS)
749     {
750         log_error("Error: Failed to get device info for "
751                   "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
752         return err;
753     }
754     if (return_size != sizeof(size_t))
755     {
756         log_error("Error: Invalid size %d returned for "
757                   "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n",
758                   (int)return_size);
759         return 1;
760     }
761     if (return_size != sizeof(size_t))
762     {
763         log_error("Error: Invalid size %d returned for "
764                   "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n",
765                   (int)return_size);
766         return 1;
767     }
768 
769     return_size = 0;
770     err =
771         clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE,
772                         sizeof(*pref_size_ret), pref_size_ret, &return_size);
773     if (err != CL_SUCCESS)
774     {
775         log_error("Error: Failed to get device info for "
776                   "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",
777                   err);
778         return err;
779     }
780     if (return_size != sizeof(size_t))
781     {
782         log_error("Error: Invalid size %d returned for "
783                   "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n",
784                   (int)return_size);
785         return 1;
786     }
787 
788     return CL_SUCCESS;
789 }
790 
791 
l_set_randomly(cl_uchar * buf,size_t buf_size,RandomSeed & rand_state)792 static void l_set_randomly(cl_uchar* buf, size_t buf_size,
793                            RandomSeed& rand_state)
794 {
795     assert(0 == (buf_size % sizeof(cl_uint)));
796     for (size_t i = 0; i < buf_size; i += sizeof(cl_uint))
797     {
798         *((cl_uint*)(buf + i)) = genrand_int32(rand_state);
799     }
800 #if 0
801     for ( size_t i = 0; i < buf_size ; i++ ) {
802         printf("%02x",buf[i]);
803     }
804     printf("\n");
805 #endif
806 }
807 
808 // Return num_value values of the given type.
809 // Returns CL_SUCCESS if they compared as equal.
l_compare(const char * test_name,const cl_uchar * expected,const cl_uchar * received,size_t num_values,const TypeInfo & ti)810 static int l_compare(const char* test_name, const cl_uchar* expected,
811                      const cl_uchar* received, size_t num_values,
812                      const TypeInfo& ti)
813 {
814     // Compare only the valid returned bytes.
815     for (unsigned value_idx = 0; value_idx < num_values; value_idx++)
816     {
817         const cl_uchar* expv = expected + value_idx * ti.get_size();
818         const cl_uchar* gotv = received + value_idx * ti.get_size();
819         if (memcmp(expv, gotv, ti.get_value_size()))
820         {
821             std::string exp_str = ti.as_string(expv);
822             std::string got_str = ti.as_string(gotv);
823             log_error(
824                 "Error: %s test for type %s, at index %d: Expected %s got %s\n",
825                 test_name, ti.get_name_c_str(), value_idx, exp_str.c_str(),
826                 got_str.c_str());
827             return 1;
828         }
829     }
830     return CL_SUCCESS;
831 }
832 
833 // Copy a target value from src[idx] to dest[idx]
l_copy(cl_uchar * dest,unsigned dest_idx,const cl_uchar * src,unsigned src_idx,const TypeInfo & ti)834 static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
835                   unsigned src_idx, const TypeInfo& ti)
836 {
837     cl_uchar* raw_dest = dest + dest_idx * ti.get_size();
838     const cl_uchar* raw_src = src + src_idx * ti.get_size();
839     memcpy(raw_dest, raw_src, ti.get_value_size());
840 
841     return 0;
842 }
843 
844 
conversion_functions(const TypeInfo & ti)845 static std::string conversion_functions(const TypeInfo& ti)
846 {
847     std::string result;
848     static char buf[MAX_STR];
849     int num_printed = 0;
850     // The atomic types just use the base type.
851     if (ti.is_atomic()
852         || 0 == strcmp(ti.get_buf_elem_type(), ti.get_name_c_str()))
853     {
854         // The type is represented in a buffer by itself.
855         num_printed = snprintf(buf, MAX_STR,
856                                "%s from_buf(%s a) { return a; }\n"
857                                "%s to_buf(%s a) { return a; }\n",
858                                ti.get_buf_elem_type(), ti.get_buf_elem_type(),
859                                ti.get_buf_elem_type(), ti.get_buf_elem_type());
860     }
861     else
862     {
863         // Just use C-style cast.
864         num_printed = snprintf(buf, MAX_STR,
865                                "%s from_buf(%s a) { return (%s)a; }\n"
866                                "%s to_buf(%s a) { return (%s)a; }\n",
867                                ti.get_name_c_str(), ti.get_buf_elem_type(),
868                                ti.get_name_c_str(), ti.get_buf_elem_type(),
869                                ti.get_name_c_str(), ti.get_buf_elem_type());
870     }
871     // Add initializations.
872     if (ti.is_atomic())
873     {
874         num_printed += snprintf(buf + num_printed, MAX_STR - num_printed,
875                                 "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n");
876     }
877     else
878     {
879         // This cast works even if the target type is a vector type.
880         num_printed +=
881             snprintf(buf + num_printed, MAX_STR - num_printed,
882                      "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
883     }
884     assert(num_printed < MAX_STR); // or increase MAX_STR
885     result = buf;
886     return result;
887 }
888 
global_decls(const TypeInfo & ti,bool with_init)889 static std::string global_decls(const TypeInfo& ti, bool with_init)
890 {
891     const char* tn = ti.get_name_c_str();
892     const char* vol = (ti.is_atomic() ? " volatile " : " ");
893     static char decls[MAX_STR];
894     int num_printed = 0;
895     if (with_init)
896     {
897         const char* decls_template_with_init =
898             "%s %s var = INIT_VAR(0);\n"
899             "global %s %s g_var = INIT_VAR(1);\n"
900             "%s %s a_var[2] = { INIT_VAR(1), INIT_VAR(1) };\n"
901             "volatile global %s %s* p_var = &a_var[1];\n\n";
902         num_printed = snprintf(decls, sizeof(decls), decls_template_with_init,
903                                vol, tn, vol, tn, vol, tn, vol, tn);
904     }
905     else
906     {
907         const char* decls_template_no_init = "%s %s var;\n"
908                                              "global %s %s g_var;\n"
909                                              "%s %s a_var[2];\n"
910                                              "global %s %s* p_var;\n\n";
911         num_printed = snprintf(decls, sizeof(decls), decls_template_no_init,
912                                vol, tn, vol, tn, vol, tn, vol, tn);
913     }
914     assert(num_printed < sizeof(decls));
915     (void)num_printed;
916     return std::string(decls);
917 }
918 
919 // Return the source code for the "global_check" function for the given type.
920 // This function checks that all program-scope variables have appropriate
921 // initial values when no explicit initializer is used. If all tests pass the
922 // kernel writes a non-zero value to its output argument, otherwise it writes
923 // zero.
global_check_function(const TypeInfo & ti)924 static std::string global_check_function(const TypeInfo& ti)
925 {
926     const std::string type_name = ti.get_buf_elem_type();
927 
928     // all() should only be used on vector inputs. For scalar comparison, the
929     // result of the equality operator can be used as a bool value.
930     const bool is_scalar =
931         ti.num_elem() == 0; // 0 is used to represent scalar types, not 1.
932     const std::string is_equality_true = is_scalar ? "" : "all";
933 
934     std::string code = "kernel void global_check(global int* out) {\n";
935     code += "  const " + type_name + " zero = ((" + type_name + ")0);\n";
936     code += "  bool status = true;\n";
937     if (ti.is_atomic())
938     {
939         code += "  status &= " + is_equality_true
940             + "(atomic_load(&var) == zero);\n";
941         code += "  status &= " + is_equality_true
942             + "(atomic_load(&g_var) == zero);\n";
943         code += "  status &= " + is_equality_true
944             + "(atomic_load(&a_var[0]) == zero);\n";
945         code += "  status &= " + is_equality_true
946             + "(atomic_load(&a_var[1]) == zero);\n";
947     }
948     else
949     {
950         code += "  status &= " + is_equality_true + "(var == zero);\n";
951         code += "  status &= " + is_equality_true + "(g_var == zero);\n";
952         code += "  status &= " + is_equality_true + "(a_var[0] == zero);\n";
953         code += "  status &= " + is_equality_true + "(a_var[1] == zero);\n";
954     }
955     code += "  status &= (p_var == NULL);\n";
956     code += "  *out = status ? 1 : 0;\n";
957     code += "}\n\n";
958 
959     return code;
960 }
961 
962 // Return the source text for the writer function for the given type.
963 // For types that can't be passed as pointer-to-type as a kernel argument,
964 // use a substitute base type of the same size.
writer_function(const TypeInfo & ti)965 static std::string writer_function(const TypeInfo& ti)
966 {
967     static char writer_src[MAX_STR];
968     int num_printed = 0;
969     if (!ti.is_atomic())
970     {
971         const char* writer_template_normal =
972             "kernel void writer( global %s* src, uint idx ) {\n"
973             "  var = from_buf(src[0]);\n"
974             "  g_var = from_buf(src[1]);\n"
975             "  a_var[0] = from_buf(src[2]);\n"
976             "  a_var[1] = from_buf(src[3]);\n"
977             "  p_var = a_var + idx;\n"
978             "}\n\n";
979         num_printed = snprintf(writer_src, sizeof(writer_src),
980                                writer_template_normal, ti.get_buf_elem_type());
981     }
982     else
983     {
984         const char* writer_template_atomic =
985             "kernel void writer( global %s* src, uint idx ) {\n"
986             "  atomic_store( &var, from_buf(src[0]) );\n"
987             "  atomic_store( &g_var, from_buf(src[1]) );\n"
988             "  atomic_store( &a_var[0], from_buf(src[2]) );\n"
989             "  atomic_store( &a_var[1], from_buf(src[3]) );\n"
990             "  p_var = a_var + idx;\n"
991             "}\n\n";
992         num_printed = snprintf(writer_src, sizeof(writer_src),
993                                writer_template_atomic, ti.get_buf_elem_type());
994     }
995     assert(num_printed < sizeof(writer_src));
996     (void)num_printed;
997     std::string result = writer_src;
998     return result;
999 }
1000 
1001 
1002 // Return source text for teh reader function for the given type.
1003 // For types that can't be passed as pointer-to-type as a kernel argument,
1004 // use a substitute base type of the same size.
reader_function(const TypeInfo & ti)1005 static std::string reader_function(const TypeInfo& ti)
1006 {
1007     static char reader_src[MAX_STR];
1008     int num_printed = 0;
1009     if (!ti.is_atomic())
1010     {
1011         const char* reader_template_normal =
1012             "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
1013             "  *p_var = from_buf(ptr_write_val);\n"
1014             "  dest[0] = to_buf(var);\n"
1015             "  dest[1] = to_buf(g_var);\n"
1016             "  dest[2] = to_buf(a_var[0]);\n"
1017             "  dest[3] = to_buf(a_var[1]);\n"
1018             "}\n\n";
1019         num_printed =
1020             snprintf(reader_src, sizeof(reader_src), reader_template_normal,
1021                      ti.get_buf_elem_type(), ti.get_buf_elem_type());
1022     }
1023     else
1024     {
1025         const char* reader_template_atomic =
1026             "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
1027             "  atomic_store( p_var, from_buf(ptr_write_val) );\n"
1028             "  dest[0] = to_buf( atomic_load( &var ) );\n"
1029             "  dest[1] = to_buf( atomic_load( &g_var ) );\n"
1030             "  dest[2] = to_buf( atomic_load( &a_var[0] ) );\n"
1031             "  dest[3] = to_buf( atomic_load( &a_var[1] ) );\n"
1032             "}\n\n";
1033         num_printed =
1034             snprintf(reader_src, sizeof(reader_src), reader_template_atomic,
1035                      ti.get_buf_elem_type(), ti.get_buf_elem_type());
1036     }
1037     assert(num_printed < sizeof(reader_src));
1038     (void)num_printed;
1039     std::string result = reader_src;
1040     return result;
1041 }
1042 
1043 // Check that all globals where appropriately default-initialized.
check_global_initialization(cl_context context,cl_program program,cl_command_queue queue)1044 static int check_global_initialization(cl_context context, cl_program program,
1045                                        cl_command_queue queue)
1046 {
1047     int status = CL_SUCCESS;
1048 
1049     // Create a buffer on device to store a unique integer.
1050     cl_int is_init_valid = 0;
1051     clMemWrapper buffer(
1052         clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
1053                        sizeof(is_init_valid), &is_init_valid, &status));
1054     test_error_ret(status, "Failed to allocate buffer", status);
1055 
1056     // Create, setup and invoke kernel.
1057     clKernelWrapper global_check(
1058         clCreateKernel(program, "global_check", &status));
1059     test_error_ret(status, "Failed to create global_check kernel", status);
1060     status = clSetKernelArg(global_check, 0, sizeof(cl_mem), &buffer);
1061     test_error_ret(status,
1062                    "Failed to set up argument for the global_check kernel",
1063                    status);
1064     const cl_uint work_dim = 1;
1065     const size_t global_work_offset[] = { 0 };
1066     const size_t global_work_size[] = { 1 };
1067     status = clEnqueueNDRangeKernel(queue, global_check, work_dim,
1068                                     global_work_offset, global_work_size,
1069                                     nullptr, 0, nullptr, nullptr);
1070     test_error_ret(status, "Failed to run global_check kernel", status);
1071     status = clFinish(queue);
1072     test_error_ret(status, "clFinish() failed", status);
1073 
1074     // Read back the memory buffer from the device.
1075     status =
1076         clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid),
1077                             &is_init_valid, 0, nullptr, nullptr);
1078     test_error_ret(status, "Failed to read buffer from device", status);
1079     if (is_init_valid == 0)
1080     {
1081         log_error("Unexpected default values were detected");
1082         return 1;
1083     }
1084 
1085     return CL_SUCCESS;
1086 }
1087 
1088 // Check write-then-read.
l_write_read(cl_device_id device,cl_context context,cl_command_queue queue)1089 static int l_write_read(cl_device_id device, cl_context context,
1090                         cl_command_queue queue)
1091 {
1092     int status = CL_SUCCESS;
1093     int itype;
1094 
1095     RandomSeed rand_state(gRandomSeed);
1096 
1097     for (itype = 0; itype < num_type_info; itype++)
1098     {
1099         status = status
1100             | l_write_read_for_type(device, context, queue, type_info[itype],
1101                                     rand_state);
1102         FLUSH;
1103     }
1104 
1105     return status;
1106 }
1107 
l_write_read_for_type(cl_device_id device,cl_context context,cl_command_queue queue,const TypeInfo & ti,RandomSeed & rand_state)1108 static int l_write_read_for_type(cl_device_id device, cl_context context,
1109                                  cl_command_queue queue, const TypeInfo& ti,
1110                                  RandomSeed& rand_state)
1111 {
1112     int err = CL_SUCCESS;
1113     std::string type_name(ti.get_name());
1114     const char* tn = type_name.c_str();
1115     log_info("  %s ", tn);
1116 
1117     StringTable ksrc;
1118     ksrc.add(l_get_fp64_pragma());
1119     ksrc.add(l_get_cles_int64_pragma());
1120     if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma());
1121     ksrc.add(conversion_functions(ti));
1122     ksrc.add(global_decls(ti, false));
1123     ksrc.add(global_check_function(ti));
1124     ksrc.add(writer_function(ti));
1125     ksrc.add(reader_function(ti));
1126 
1127     int status = CL_SUCCESS;
1128     clProgramWrapper program;
1129     clKernelWrapper writer;
1130 
1131     status = create_single_kernel_helper(context, &program, &writer,
1132                                          ksrc.num_str(), ksrc.strs(), "writer");
1133     test_error_ret(status, "Failed to create program for read-after-write test",
1134                    status);
1135 
1136     clKernelWrapper reader(clCreateKernel(program, "reader", &status));
1137     test_error_ret(status,
1138                    "Failed to create reader kernel for read-after-write test",
1139                    status);
1140 
1141     // Check size query.
1142     size_t used_bytes = 0;
1143     status = clGetProgramBuildInfo(program, device,
1144                                    CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
1145                                    sizeof(used_bytes), &used_bytes, 0);
1146     test_error_ret(status, "Failed to query global variable total size",
1147                    status);
1148     size_t expected_used_bytes = (NUM_TESTED_VALUES - 1)
1149             * ti.get_size() // Two regular variables and an array of 2 elements.
1150         + (l_64bit_device ? 8 : 4); // The pointer
1151     if (used_bytes < expected_used_bytes)
1152     {
1153         log_error("Error program query for global variable total size query "
1154                   "failed: Expected at least %llu but got %llu\n",
1155                   (unsigned long long)expected_used_bytes,
1156                   (unsigned long long)used_bytes);
1157         err |= 1;
1158     }
1159 
1160     err |= check_global_initialization(context, program, queue);
1161 
1162     // We need to create 5 random values of the given type,
1163     // and read 4 of them back.
1164     const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
1165     const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16);
1166     cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
1167     cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
1168 
1169     clMemWrapper write_mem(clCreateBuffer(
1170         context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status));
1171     test_error_ret(status, "Failed to allocate write buffer", status);
1172     clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
1173                                          read_data_size, read_data, &status));
1174     test_error_ret(status, "Failed to allocate read buffer", status);
1175 
1176     status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem);
1177     test_error_ret(status, "set arg", status);
1178     status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem);
1179     test_error_ret(status, "set arg", status);
1180 
1181     // Boolean random data needs to be massaged a bit more.
1182     const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS;
1183     unsigned bool_iter = 0;
1184 
1185     for (int iround = 0; iround < num_rounds; iround++)
1186     {
1187         for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++)
1188         { // Index into array, to write via pointer
1189             // Generate new random data to push through.
1190             // Generate 5 * 128 bytes all the time, even though the test for
1191             // many types use less than all that.
1192 
1193             cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer(
1194                 queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0,
1195                 0, 0, 0);
1196 
1197             if (ti.is_bool())
1198             {
1199                 // For boolean, random data cast to bool isn't very random.
1200                 // So use the bottom bit of bool_value_iter to get true
1201                 // diversity.
1202                 for (unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES;
1203                      value_idx++)
1204                 {
1205                     write_data[value_idx] = (1 << value_idx) & bool_iter;
1206                     // printf(" %s", (write_data[value_idx] ? "true" : "false"
1207                     // ));
1208                 }
1209                 bool_iter++;
1210             }
1211             else
1212             {
1213                 l_set_randomly(write_data, write_data_size, rand_state);
1214             }
1215             status = clSetKernelArg(writer, 1, sizeof(cl_uint), &iptr_idx);
1216             test_error_ret(status, "set arg", status);
1217 
1218             // The value to write via the pointer should be taken from the
1219             // 5th typed slot of the write_data.
1220             status = clSetKernelArg(
1221                 reader, 1, ti.get_size(),
1222                 write_data + (NUM_TESTED_VALUES - 1) * ti.get_size());
1223             test_error_ret(status, "set arg", status);
1224 
1225             // Determine the expected values.
1226             cl_uchar expected[read_data_size];
1227             memset(expected, -1, sizeof(expected));
1228             l_copy(expected, 0, write_data, 0, ti);
1229             l_copy(expected, 1, write_data, 1, ti);
1230             l_copy(expected, 2, write_data, 2, ti);
1231             l_copy(expected, 3, write_data, 3, ti);
1232             // But we need to take into account the value from the pointer
1233             // write. The 2 represents where the "a" array values begin in our
1234             // read-back.
1235             l_copy(expected, 2 + iptr_idx, write_data, 4, ti);
1236 
1237             clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
1238 
1239             if (ti.is_bool())
1240             {
1241                 // Collapse down to one bit.
1242                 for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
1243                     expected[i] = (bool)expected[i];
1244             }
1245 
1246             cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
1247                 queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
1248                 0, 0);
1249             memset(read_data, -1, read_data_size);
1250             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1251 
1252             // Now run the kernel
1253             const size_t one = 1;
1254             status =
1255                 clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0, 0, 0);
1256             test_error_ret(status, "enqueue writer", status);
1257             status =
1258                 clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
1259             test_error_ret(status, "enqueue reader", status);
1260             status = clFinish(queue);
1261             test_error_ret(status, "finish", status);
1262 
1263             read_ptr = (cl_uchar*)clEnqueueMapBuffer(
1264                 queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
1265                 0, 0);
1266 
1267             if (ti.is_bool())
1268             {
1269                 // Collapse down to one bit.
1270                 for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
1271                     read_data[i] = (bool)read_data[i];
1272             }
1273 
1274             // Compare only the valid returned bytes.
1275             int compare_result =
1276                 l_compare("read-after-write", expected, read_data,
1277                           NUM_TESTED_VALUES - 1, ti);
1278             // log_info("Compared %d values each of size %llu. Result %d\n",
1279             // NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(),
1280             // compare_result );
1281             err |= compare_result;
1282 
1283             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1284 
1285             if (err) break;
1286         }
1287     }
1288 
1289     if (CL_SUCCESS == err)
1290     {
1291         log_info("OK\n");
1292         FLUSH;
1293     }
1294     align_free(write_data);
1295     align_free(read_data);
1296     return err;
1297 }
1298 
1299 
1300 // Check initialization, then, read, then write, then read.
l_init_write_read(cl_device_id device,cl_context context,cl_command_queue queue)1301 static int l_init_write_read(cl_device_id device, cl_context context,
1302                              cl_command_queue queue)
1303 {
1304     int status = CL_SUCCESS;
1305     int itype;
1306 
1307     RandomSeed rand_state(gRandomSeed);
1308 
1309     for (itype = 0; itype < num_type_info; itype++)
1310     {
1311         status = status
1312             | l_init_write_read_for_type(device, context, queue,
1313                                          type_info[itype], rand_state);
1314     }
1315     return status;
1316 }
l_init_write_read_for_type(cl_device_id device,cl_context context,cl_command_queue queue,const TypeInfo & ti,RandomSeed & rand_state)1317 static int l_init_write_read_for_type(cl_device_id device, cl_context context,
1318                                       cl_command_queue queue,
1319                                       const TypeInfo& ti,
1320                                       RandomSeed& rand_state)
1321 {
1322     int err = CL_SUCCESS;
1323     std::string type_name(ti.get_name());
1324     const char* tn = type_name.c_str();
1325     log_info("  %s ", tn);
1326 
1327     StringTable ksrc;
1328     ksrc.add(l_get_fp64_pragma());
1329     ksrc.add(l_get_cles_int64_pragma());
1330     if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma());
1331     ksrc.add(conversion_functions(ti));
1332     ksrc.add(global_decls(ti, true));
1333     ksrc.add(writer_function(ti));
1334     ksrc.add(reader_function(ti));
1335 
1336     int status = CL_SUCCESS;
1337     clProgramWrapper program;
1338     clKernelWrapper writer;
1339 
1340     status = create_single_kernel_helper(context, &program, &writer,
1341                                          ksrc.num_str(), ksrc.strs(), "writer");
1342     test_error_ret(status,
1343                    "Failed to create program for init-read-after-write test",
1344                    status);
1345 
1346     clKernelWrapper reader(clCreateKernel(program, "reader", &status));
1347     test_error_ret(
1348         status, "Failed to create reader kernel for init-read-after-write test",
1349         status);
1350 
1351     // Check size query.
1352     size_t used_bytes = 0;
1353     status = clGetProgramBuildInfo(program, device,
1354                                    CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
1355                                    sizeof(used_bytes), &used_bytes, 0);
1356     test_error_ret(status, "Failed to query global variable total size",
1357                    status);
1358     size_t expected_used_bytes = (NUM_TESTED_VALUES - 1)
1359             * ti.get_size() // Two regular variables and an array of 2 elements.
1360         + (l_64bit_device ? 8 : 4); // The pointer
1361     if (used_bytes < expected_used_bytes)
1362     {
1363         log_error("Error: program query for global variable total size query "
1364                   "failed: Expected at least %llu but got %llu\n",
1365                   (unsigned long long)expected_used_bytes,
1366                   (unsigned long long)used_bytes);
1367         err |= 1;
1368     }
1369 
1370     // We need to create 5 random values of the given type,
1371     // and read 4 of them back.
1372     const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
1373     const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16);
1374 
1375     cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
1376     cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
1377     clMemWrapper write_mem(clCreateBuffer(
1378         context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status));
1379     test_error_ret(status, "Failed to allocate write buffer", status);
1380     clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
1381                                          read_data_size, read_data, &status));
1382     test_error_ret(status, "Failed to allocate read buffer", status);
1383 
1384     status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem);
1385     test_error_ret(status, "set arg", status);
1386     status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem);
1387     test_error_ret(status, "set arg", status);
1388 
1389     // Boolean random data needs to be massaged a bit more.
1390     const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS;
1391     unsigned bool_iter = 0;
1392 
1393     // We need to count iterations.  We do something *different on the
1394     // first iteration, to ensure we actually pick up the initialized
1395     // values.
1396     unsigned iteration = 0;
1397 
1398     for (int iround = 0; iround < num_rounds; iround++)
1399     {
1400         for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++)
1401         { // Index into array, to write via pointer
1402             // Generate new random data to push through.
1403             // Generate 5 * 128 bytes all the time, even though the test for
1404             // many types use less than all that.
1405 
1406             cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer(
1407                 queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0,
1408                 0, 0, 0);
1409 
1410             if (ti.is_bool())
1411             {
1412                 // For boolean, random data cast to bool isn't very random.
1413                 // So use the bottom bit of bool_value_iter to get true
1414                 // diversity.
1415                 for (unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES;
1416                      value_idx++)
1417                 {
1418                     write_data[value_idx] = (1 << value_idx) & bool_iter;
1419                     // printf(" %s", (write_data[value_idx] ? "true" : "false"
1420                     // ));
1421                 }
1422                 bool_iter++;
1423             }
1424             else
1425             {
1426                 l_set_randomly(write_data, write_data_size, rand_state);
1427             }
1428             status = clSetKernelArg(writer, 1, sizeof(cl_uint), &iptr_idx);
1429             test_error_ret(status, "set arg", status);
1430 
1431             if (!iteration)
1432             {
1433                 // On first iteration, the value we write via the last arg
1434                 // to the "reader" function is 0.
1435                 // It's way easier to code the test this way.
1436                 ti.init(write_data + (NUM_TESTED_VALUES - 1) * ti.get_size(),
1437                         0);
1438             }
1439 
1440             // The value to write via the pointer should be taken from the
1441             // 5th typed slot of the write_data.
1442             status = clSetKernelArg(
1443                 reader, 1, ti.get_size(),
1444                 write_data + (NUM_TESTED_VALUES - 1) * ti.get_size());
1445             test_error_ret(status, "set arg", status);
1446 
1447             // Determine the expected values.
1448             cl_uchar expected[read_data_size];
1449             memset(expected, -1, sizeof(expected));
1450             if (iteration)
1451             {
1452                 l_copy(expected, 0, write_data, 0, ti);
1453                 l_copy(expected, 1, write_data, 1, ti);
1454                 l_copy(expected, 2, write_data, 2, ti);
1455                 l_copy(expected, 3, write_data, 3, ti);
1456                 // But we need to take into account the value from the pointer
1457                 // write. The 2 represents where the "a" array values begin in
1458                 // our read-back. But we need to take into account the value
1459                 // from the pointer write.
1460                 l_copy(expected, 2 + iptr_idx, write_data, 4, ti);
1461             }
1462             else
1463             {
1464                 // On first iteration, expect these initialized values!
1465                 // See the decls_template_with_init above.
1466                 ti.init(expected, 0);
1467                 ti.init(expected + ti.get_size(), 1);
1468                 ti.init(expected + 2 * ti.get_size(), 1);
1469                 // Emulate the effect of the write via the pointer.
1470                 // The value is 0, not 1 (see above).
1471                 // The pointer is always initialized to the second element
1472                 // of the array. So it goes into slot 3 of the "expected" array.
1473                 ti.init(expected + 3 * ti.get_size(), 0);
1474             }
1475 
1476             if (ti.is_bool())
1477             {
1478                 // Collapse down to one bit.
1479                 for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
1480                     expected[i] = (bool)expected[i];
1481             }
1482 
1483             clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
1484 
1485             cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
1486                 queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
1487                 0, 0);
1488             memset(read_data, -1, read_data_size);
1489             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1490 
1491             // Now run the kernel
1492             const size_t one = 1;
1493             if (iteration)
1494             {
1495                 status = clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0,
1496                                                 0, 0);
1497                 test_error_ret(status, "enqueue writer", status);
1498             }
1499             else
1500             {
1501                 // On first iteration, we should be picking up the
1502                 // initialized value. So don't enqueue the writer.
1503             }
1504             status =
1505                 clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
1506             test_error_ret(status, "enqueue reader", status);
1507             status = clFinish(queue);
1508             test_error_ret(status, "finish", status);
1509 
1510             read_ptr = (cl_uchar*)clEnqueueMapBuffer(
1511                 queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
1512                 0, 0);
1513 
1514             if (ti.is_bool())
1515             {
1516                 // Collapse down to one bit.
1517                 for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
1518                     read_data[i] = (bool)read_data[i];
1519             }
1520 
1521             // Compare only the valid returned bytes.
1522             // log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
1523             int compare_result =
1524                 l_compare("init-write-read", expected, read_data,
1525                           NUM_TESTED_VALUES - 1, ti);
1526             // log_info("Compared %d values each of size %llu. Result %d\n",
1527             // NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(),
1528             // compare_result );
1529             err |= compare_result;
1530 
1531             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1532 
1533             if (err) break;
1534 
1535             iteration++;
1536         }
1537     }
1538 
1539     if (CL_SUCCESS == err)
1540     {
1541         log_info("OK\n");
1542         FLUSH;
1543     }
1544     align_free(write_data);
1545     align_free(read_data);
1546 
1547     return err;
1548 }
1549 
1550 
1551 // Check that we can make at least one variable with size
1552 // max_size which is returned from the device info property :
1553 // CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
l_capacity(cl_device_id device,cl_context context,cl_command_queue queue,size_t max_size)1554 static int l_capacity(cl_device_id device, cl_context context,
1555                       cl_command_queue queue, size_t max_size)
1556 {
1557     int err = CL_SUCCESS;
1558     // Just test one type.
1559     const TypeInfo ti(l_find_type("uchar"));
1560     log_info(" l_capacity...");
1561 
1562     const char prog_src_template[] =
1563 #if defined(_WIN32)
1564         "uchar var[%Iu];\n\n"
1565 #else
1566         "uchar var[%zu];\n\n"
1567 #endif
1568         "kernel void get_max_size( global ulong* size_ret ) {\n"
1569 #if defined(_WIN32)
1570         "  *size_ret = (ulong)%Iu;\n"
1571 #else
1572         "  *size_ret = (ulong)%zu;\n"
1573 #endif
1574         "}\n\n"
1575         "kernel void writer( global uchar* src ) {\n"
1576         "  var[get_global_id(0)] = src[get_global_linear_id()];\n"
1577         "}\n\n"
1578         "kernel void reader( global uchar* dest ) {\n"
1579         "  dest[get_global_linear_id()] = var[get_global_id(0)];\n"
1580         "}\n\n";
1581     char prog_src[MAX_STR];
1582     int num_printed = snprintf(prog_src, sizeof(prog_src), prog_src_template,
1583                                max_size, max_size);
1584     assert(num_printed < MAX_STR); // or increase MAX_STR
1585     (void)num_printed;
1586 
1587     StringTable ksrc;
1588     ksrc.add(prog_src);
1589 
1590     int status = CL_SUCCESS;
1591     clProgramWrapper program;
1592     clKernelWrapper get_max_size;
1593 
1594     status = create_single_kernel_helper(context, &program, &get_max_size,
1595                                          ksrc.num_str(), ksrc.strs(),
1596                                          "get_max_size");
1597     test_error_ret(status, "Failed to create program for capacity test",
1598                    status);
1599 
1600     // Check size query.
1601     size_t used_bytes = 0;
1602     status = clGetProgramBuildInfo(program, device,
1603                                    CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
1604                                    sizeof(used_bytes), &used_bytes, 0);
1605     test_error_ret(status, "Failed to query global variable total size",
1606                    status);
1607     if (used_bytes < max_size)
1608     {
1609         log_error("Error: program query for global variable total size query "
1610                   "failed: Expected at least %llu but got %llu\n",
1611                   (unsigned long long)max_size, (unsigned long long)used_bytes);
1612         err |= 1;
1613     }
1614 
1615     // Prepare to execute
1616     clKernelWrapper writer(clCreateKernel(program, "writer", &status));
1617     test_error_ret(status, "Failed to create writer kernel for capacity test",
1618                    status);
1619     clKernelWrapper reader(clCreateKernel(program, "reader", &status));
1620     test_error_ret(status, "Failed to create reader kernel for capacity test",
1621                    status);
1622 
1623     cl_ulong max_size_ret = 0;
1624     const size_t arr_size = 10 * 1024 * 1024;
1625     cl_uchar* buffer = (cl_uchar*)align_malloc(arr_size, ALIGNMENT);
1626 
1627     if (!buffer)
1628     {
1629         log_error("Failed to allocate buffer\n");
1630         return 1;
1631     }
1632 
1633     clMemWrapper max_size_ret_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
1634                                                  sizeof(max_size_ret),
1635                                                  &max_size_ret, &status));
1636     test_error_ret(status, "Failed to allocate size query buffer", status);
1637     clMemWrapper buffer_mem(
1638         clCreateBuffer(context, CL_MEM_READ_WRITE, arr_size, 0, &status));
1639     test_error_ret(status, "Failed to allocate write buffer", status);
1640 
1641     status = clSetKernelArg(get_max_size, 0, sizeof(cl_mem), &max_size_ret_mem);
1642     test_error_ret(status, "set arg", status);
1643     status = clSetKernelArg(writer, 0, sizeof(cl_mem), &buffer_mem);
1644     test_error_ret(status, "set arg", status);
1645     status = clSetKernelArg(reader, 0, sizeof(cl_mem), &buffer_mem);
1646     test_error_ret(status, "set arg", status);
1647 
1648     // Check the macro value of CL_DEVICE_MAX_GLOBAL_VARIABLE
1649     const size_t one = 1;
1650     status =
1651         clEnqueueNDRangeKernel(queue, get_max_size, 1, 0, &one, 0, 0, 0, 0);
1652     test_error_ret(status, "enqueue size query", status);
1653     status = clFinish(queue);
1654     test_error_ret(status, "finish", status);
1655 
1656     cl_uchar* max_size_ret_ptr = (cl_uchar*)clEnqueueMapBuffer(
1657         queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret),
1658         0, 0, 0, 0);
1659     if (max_size_ret != max_size)
1660     {
1661         log_error("Error: preprocessor definition for "
1662                   "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not "
1663                   "match device query value %llu\n",
1664                   (unsigned long long)max_size_ret,
1665                   (unsigned long long)max_size);
1666         err |= 1;
1667     }
1668     clEnqueueUnmapMemObject(queue, max_size_ret_mem, max_size_ret_ptr, 0, 0, 0);
1669 
1670     RandomSeed rand_state_write(gRandomSeed);
1671     for (size_t offset = 0; offset < max_size; offset += arr_size)
1672     {
1673         size_t curr_size =
1674             (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
1675         l_set_randomly(buffer, curr_size, rand_state_write);
1676         status = clEnqueueWriteBuffer(queue, buffer_mem, CL_TRUE, 0, curr_size,
1677                                       buffer, 0, 0, 0);
1678         test_error_ret(status, "populate buffer_mem object", status);
1679         status = clEnqueueNDRangeKernel(queue, writer, 1, &offset, &curr_size,
1680                                         0, 0, 0, 0);
1681         test_error_ret(status, "enqueue writer", status);
1682         status = clFinish(queue);
1683         test_error_ret(status, "finish", status);
1684     }
1685 
1686     RandomSeed rand_state_read(gRandomSeed);
1687     for (size_t offset = 0; offset < max_size; offset += arr_size)
1688     {
1689         size_t curr_size =
1690             (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
1691         status = clEnqueueNDRangeKernel(queue, reader, 1, &offset, &curr_size,
1692                                         0, 0, 0, 0);
1693         test_error_ret(status, "enqueue reader", status);
1694         cl_uchar* read_mem_ptr = (cl_uchar*)clEnqueueMapBuffer(
1695             queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0,
1696             &status);
1697         test_error_ret(status, "map read data", status);
1698         l_set_randomly(buffer, curr_size, rand_state_read);
1699         err |= l_compare("capacity", buffer, read_mem_ptr, curr_size, ti);
1700         clEnqueueUnmapMemObject(queue, buffer_mem, read_mem_ptr, 0, 0, 0);
1701     }
1702 
1703     if (CL_SUCCESS == err)
1704     {
1705         log_info("OK\n");
1706         FLUSH;
1707     }
1708     align_free(buffer);
1709 
1710     return err;
1711 }
1712 
1713 
1714 // Check operation on a user type.
l_user_type(cl_device_id device,cl_context context,cl_command_queue queue,bool separate_compile)1715 static int l_user_type(cl_device_id device, cl_context context,
1716                        cl_command_queue queue, bool separate_compile)
1717 {
1718     int err = CL_SUCCESS;
1719     // Just test one type.
1720     const TypeInfo ti(l_find_type("uchar"));
1721     log_info(" l_user_type %s...",
1722              separate_compile ? "separate compilation"
1723                               : "single source compilation");
1724 
1725     if (separate_compile && !l_linker_available)
1726     {
1727         log_info("Separate compilation is not supported. Skipping test\n");
1728         return err;
1729     }
1730 
1731     const char type_src[] =
1732         "typedef struct { uchar c; uint i; } my_struct_t;\n\n";
1733     const char def_src[] = "my_struct_t var = { 'a', 42 };\n\n";
1734     const char decl_src[] = "extern my_struct_t var;\n\n";
1735 
1736     // Don't use a host struct. We can't guarantee that the host
1737     // compiler has the same structure layout as the device compiler.
1738     const char writer_src[] = "kernel void writer( uchar c, uint i ) {\n"
1739                               "  var.c = c;\n"
1740                               "  var.i = i;\n"
1741                               "}\n\n";
1742     const char reader_src[] =
1743         "kernel void reader( global uchar* C, global uint* I ) {\n"
1744         "  *C = var.c;\n"
1745         "  *I = var.i;\n"
1746         "}\n\n";
1747 
1748     clProgramWrapper program;
1749 
1750     const std::string options = get_build_options(device);
1751 
1752     if (separate_compile)
1753     {
1754         // Separate compilation flow.
1755         StringTable wksrc;
1756         wksrc.add(type_src);
1757         wksrc.add(def_src);
1758         wksrc.add(writer_src);
1759 
1760         StringTable rksrc;
1761         rksrc.add(type_src);
1762         rksrc.add(decl_src);
1763         rksrc.add(reader_src);
1764 
1765         int status = CL_SUCCESS;
1766         clProgramWrapper writer_program(clCreateProgramWithSource(
1767             context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status));
1768         test_error_ret(status,
1769                        "Failed to create writer program for user type test",
1770                        status);
1771 
1772         status = clCompileProgram(writer_program, 1, &device, options.c_str(),
1773                                   0, 0, 0, 0, 0);
1774         if (check_error(
1775                 status,
1776                 "Failed to compile writer program for user type test (%s)",
1777                 IGetErrorString(status)))
1778         {
1779             print_build_log(writer_program, 1, &device, wksrc.num_str(),
1780                             wksrc.strs(), wksrc.lengths(), options.c_str());
1781             return status;
1782         }
1783 
1784         clProgramWrapper reader_program(clCreateProgramWithSource(
1785             context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status));
1786         test_error_ret(status,
1787                        "Failed to create reader program for user type test",
1788                        status);
1789 
1790         status = clCompileProgram(reader_program, 1, &device, options.c_str(),
1791                                   0, 0, 0, 0, 0);
1792         if (check_error(
1793                 status,
1794                 "Failed to compile reader program for user type test (%s)",
1795                 IGetErrorString(status)))
1796         {
1797             print_build_log(reader_program, 1, &device, rksrc.num_str(),
1798                             rksrc.strs(), rksrc.lengths(), options.c_str());
1799             return status;
1800         }
1801 
1802         cl_program progs[2];
1803         progs[0] = writer_program;
1804         progs[1] = reader_program;
1805 
1806         program =
1807             clLinkProgram(context, 1, &device, "", 2, progs, 0, 0, &status);
1808         if (check_error(status,
1809                         "Failed to link program for user type test (%s)",
1810                         IGetErrorString(status)))
1811         {
1812             print_build_log(program, 1, &device, 0, NULL, NULL, "");
1813             return status;
1814         }
1815     }
1816     else
1817     {
1818         // Single compilation flow.
1819         StringTable ksrc;
1820         ksrc.add(type_src);
1821         ksrc.add(def_src);
1822         ksrc.add(writer_src);
1823         ksrc.add(reader_src);
1824 
1825         int status = CL_SUCCESS;
1826 
1827         status = create_single_kernel_helper_create_program(
1828             context, &program, ksrc.num_str(), ksrc.strs(), options.c_str());
1829         if (check_error(status,
1830                         "Failed to build program for user type test (%s)",
1831                         IGetErrorString(status)))
1832         {
1833             print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
1834                             ksrc.lengths(), options.c_str());
1835             return status;
1836         }
1837 
1838         status = clBuildProgram(program, 1, &device, options.c_str(), 0, 0);
1839         if (check_error(status,
1840                         "Failed to compile program for user type test (%s)",
1841                         IGetErrorString(status)))
1842         {
1843             print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
1844                             ksrc.lengths(), options.c_str());
1845             return status;
1846         }
1847     }
1848 
1849 
1850     // Check size query.
1851     size_t used_bytes = 0;
1852     int status = clGetProgramBuildInfo(
1853         program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
1854         sizeof(used_bytes), &used_bytes, 0);
1855     test_error_ret(status, "Failed to query global variable total size",
1856                    status);
1857     size_t expected_size = sizeof(cl_uchar) + sizeof(cl_uint);
1858     if (used_bytes < expected_size)
1859     {
1860         log_error("Error: program query for global variable total size query "
1861                   "failed: Expected at least %llu but got %llu\n",
1862                   (unsigned long long)expected_size,
1863                   (unsigned long long)used_bytes);
1864         err |= 1;
1865     }
1866 
1867     // Prepare to execute
1868     clKernelWrapper writer(clCreateKernel(program, "writer", &status));
1869     test_error_ret(status, "Failed to create writer kernel for user type test",
1870                    status);
1871     clKernelWrapper reader(clCreateKernel(program, "reader", &status));
1872     test_error_ret(status, "Failed to create reader kernel for user type test",
1873                    status);
1874 
1875     // Set up data.
1876     cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT);
1877     cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT);
1878 
1879     clMemWrapper uchar_mem(clCreateBuffer(
1880         context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status));
1881     test_error_ret(status, "Failed to allocate uchar buffer", status);
1882     clMemWrapper uint_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
1883                                          sizeof(cl_uint), uint_data, &status));
1884     test_error_ret(status, "Failed to allocate uint buffer", status);
1885 
1886     status = clSetKernelArg(reader, 0, sizeof(cl_mem), &uchar_mem);
1887     test_error_ret(status, "set arg", status);
1888     status = clSetKernelArg(reader, 1, sizeof(cl_mem), &uint_mem);
1889     test_error_ret(status, "set arg", status);
1890 
1891     cl_uchar expected_uchar = 'a';
1892     cl_uint expected_uint = 42;
1893     for (unsigned iter = 0; iter < 5; iter++)
1894     { // Must go around at least twice
1895         // Read back data
1896         *uchar_data = -1;
1897         *uint_data = -1;
1898         const size_t one = 1;
1899         status = clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
1900         test_error_ret(status, "enqueue reader", status);
1901         status = clFinish(queue);
1902         test_error_ret(status, "finish", status);
1903 
1904         cl_uchar* uint_data_ptr =
1905             (cl_uchar*)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ,
1906                                           0, sizeof(cl_uint), 0, 0, 0, 0);
1907         cl_uchar* uchar_data_ptr = (cl_uchar*)clEnqueueMapBuffer(
1908             queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0,
1909             0, 0);
1910 
1911         if (expected_uchar != *uchar_data || expected_uint != *uint_data)
1912         {
1913             log_error(
1914                 "FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
1915                 iter, (int)*uchar_data, *uint_data, (int)expected_uchar,
1916                 expected_uint);
1917             err |= 1;
1918         }
1919 
1920         clEnqueueUnmapMemObject(queue, uint_mem, uint_data_ptr, 0, 0, 0);
1921         clEnqueueUnmapMemObject(queue, uchar_mem, uchar_data_ptr, 0, 0, 0);
1922 
1923         // Mutate the data.
1924         expected_uchar++;
1925         expected_uint++;
1926 
1927         // Write the new values into persistent store.
1928         *uchar_data = expected_uchar;
1929         *uint_data = expected_uint;
1930         status = clSetKernelArg(writer, 0, sizeof(cl_uchar), uchar_data);
1931         test_error_ret(status, "set arg", status);
1932         status = clSetKernelArg(writer, 1, sizeof(cl_uint), uint_data);
1933         test_error_ret(status, "set arg", status);
1934         status = clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0, 0, 0);
1935         test_error_ret(status, "enqueue writer", status);
1936         status = clFinish(queue);
1937         test_error_ret(status, "finish", status);
1938     }
1939 
1940     if (CL_SUCCESS == err)
1941     {
1942         log_info("OK\n");
1943         FLUSH;
1944     }
1945     align_free(uchar_data);
1946     align_free(uint_data);
1947     return err;
1948 }
1949 
get_build_options(cl_device_id device)1950 static std::string get_build_options(cl_device_id device)
1951 {
1952     std::string options = "-cl-std=CL";
1953     Version latest_cl_c_version = get_device_latest_cl_c_version(device);
1954     options += latest_cl_c_version.to_string();
1955     return options;
1956 }
1957 
1958 // Determines whether its valid to skip this test based on the driver version
1959 // and the features it optionally supports.
1960 // Whether the test should be skipped is writen into the out paramter skip.
1961 // The check returns an error code for the clDeviceInfo query.
should_skip(cl_device_id device,cl_bool & skip)1962 static cl_int should_skip(cl_device_id device, cl_bool& skip)
1963 {
1964     // Assume we can't skip to begin with.
1965     skip = CL_FALSE;
1966 
1967     // Progvar tests are already skipped for OpenCL < 2.0, so here we only need
1968     // to test for 3.0 since that is when program scope global variables become
1969     // optional.
1970     if (get_device_cl_version(device) >= Version(3, 0))
1971     {
1972         size_t max_global_variable_size{};
1973         test_error(clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
1974                                    sizeof(max_global_variable_size),
1975                                    &max_global_variable_size, nullptr),
1976                    "clGetDeviceInfo failed");
1977         skip = (max_global_variable_size != 0) ? CL_FALSE : CL_TRUE;
1978     }
1979     return CL_SUCCESS;
1980 }
1981 
1982 ////////////////////
1983 // Global functions
1984 
1985 
1986 // Test support for variables at program scope. Miscellaneous
test_progvar_prog_scope_misc(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1987 int test_progvar_prog_scope_misc(cl_device_id device, cl_context context,
1988                                  cl_command_queue queue, int num_elements)
1989 {
1990     cl_bool skip{ CL_FALSE };
1991     auto error = should_skip(device, skip);
1992     if (CL_SUCCESS != error)
1993     {
1994         return TEST_FAIL;
1995     }
1996     if (skip)
1997     {
1998         log_info("Skipping progvar_prog_scope_misc since it is optionally not "
1999                  "supported on this device\n");
2000         return TEST_SKIPPED_ITSELF;
2001     }
2002     size_t max_size = 0;
2003     size_t pref_size = 0;
2004 
2005     cl_int err = CL_SUCCESS;
2006 
2007     err = l_get_device_info(device, &max_size, &pref_size);
2008     err |= l_build_type_table(device);
2009 
2010     err |= l_capacity(device, context, queue, max_size);
2011     err |= l_user_type(device, context, queue, false);
2012     err |= l_user_type(device, context, queue, true);
2013 
2014     return err;
2015 }
2016 
2017 
2018 // Test support for variables at program scope. Unitialized data
test_progvar_prog_scope_uninit(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)2019 int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context,
2020                                    cl_command_queue queue, int num_elements)
2021 {
2022     cl_bool skip{ CL_FALSE };
2023     auto error = should_skip(device, skip);
2024     if (CL_SUCCESS != error)
2025     {
2026         return TEST_FAIL;
2027     }
2028     if (skip)
2029     {
2030         log_info(
2031             "Skipping progvar_prog_scope_uninit since it is optionally not "
2032             "supported on this device\n");
2033         return TEST_SKIPPED_ITSELF;
2034     }
2035     size_t max_size = 0;
2036     size_t pref_size = 0;
2037 
2038     cl_int err = CL_SUCCESS;
2039 
2040     err = l_get_device_info(device, &max_size, &pref_size);
2041     err |= l_build_type_table(device);
2042 
2043     err |= l_write_read(device, context, queue);
2044 
2045     return err;
2046 }
2047 
2048 // Test support for variables at program scope. Initialized data.
test_progvar_prog_scope_init(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)2049 int test_progvar_prog_scope_init(cl_device_id device, cl_context context,
2050                                  cl_command_queue queue, int num_elements)
2051 {
2052     cl_bool skip{ CL_FALSE };
2053     auto error = should_skip(device, skip);
2054     if (CL_SUCCESS != error)
2055     {
2056         return TEST_FAIL;
2057     }
2058     if (skip)
2059     {
2060         log_info("Skipping progvar_prog_scope_init since it is optionally not "
2061                  "supported on this device\n");
2062         return TEST_SKIPPED_ITSELF;
2063     }
2064     size_t max_size = 0;
2065     size_t pref_size = 0;
2066 
2067     cl_int err = CL_SUCCESS;
2068 
2069     err = l_get_device_info(device, &max_size, &pref_size);
2070     err |= l_build_type_table(device);
2071 
2072     err |= l_init_write_read(device, context, queue);
2073 
2074     return err;
2075 }
2076 
2077 
2078 // A simple test for support of static variables inside a kernel.
test_progvar_func_scope(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)2079 int test_progvar_func_scope(cl_device_id device, cl_context context,
2080                             cl_command_queue queue, int num_elements)
2081 {
2082     cl_bool skip{ CL_FALSE };
2083     auto error = should_skip(device, skip);
2084     if (CL_SUCCESS != error)
2085     {
2086         return TEST_FAIL;
2087     }
2088     if (skip)
2089     {
2090         log_info("Skipping progvar_func_scope since it is optionally not "
2091                  "supported on this device\n");
2092         return TEST_SKIPPED_ITSELF;
2093     }
2094 
2095     cl_int err = CL_SUCCESS;
2096 
2097     // Deliberately have two variables with the same name but in different
2098     // scopes.
2099     // Also, use a large initialized structure in both cases.
2100     // clang-format off
2101     const char prog_src[] =
2102         "typedef struct { char c; int16 i; } mystruct_t;\n"
2103         "kernel void test_bump(global int* value, int which) {\n"
2104         "  if (which) {\n"
2105         // Explicit address space.
2106         // Last element set to 0
2107         "     static global mystruct_t persistent = { 'a', (int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
2108         "     *value = persistent.i.sf++;\n"
2109         "  } else {\n"
2110         // Implicitly global
2111         // Last element set to 100
2112         "     static mystruct_t persistent = { 'b' , (int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
2113         "     *value = persistent.i.sf++;\n"
2114         "  }\n"
2115         "}\n";
2116     // clang-format on
2117 
2118     StringTable ksrc;
2119     ksrc.add(prog_src);
2120 
2121     int status = CL_SUCCESS;
2122     clProgramWrapper program;
2123     clKernelWrapper test_bump;
2124 
2125     status =
2126         create_single_kernel_helper(context, &program, &test_bump,
2127                                     ksrc.num_str(), ksrc.strs(), "test_bump");
2128     test_error_ret(status,
2129                    "Failed to create program for function static variable test",
2130                    status);
2131 
2132     // Check size query.
2133     size_t used_bytes = 0;
2134     status = clGetProgramBuildInfo(program, device,
2135                                    CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
2136                                    sizeof(used_bytes), &used_bytes, 0);
2137     test_error_ret(status, "Failed to query global variable total size",
2138                    status);
2139     size_t expected_size = 2 * sizeof(cl_int); // Two ints.
2140     if (used_bytes < expected_size)
2141     {
2142         log_error("Error: program query for global variable total size query "
2143                   "failed: Expected at least %llu but got %llu\n",
2144                   (unsigned long long)expected_size,
2145                   (unsigned long long)used_bytes);
2146         err |= 1;
2147     }
2148 
2149     // Prepare the data.
2150     cl_int counter_value = 0;
2151     clMemWrapper counter_value_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
2152                                                   sizeof(counter_value),
2153                                                   &counter_value, &status));
2154     test_error_ret(status, "Failed to allocate counter query buffer", status);
2155 
2156     status = clSetKernelArg(test_bump, 0, sizeof(cl_mem), &counter_value_mem);
2157     test_error_ret(status, "set arg", status);
2158 
2159     // Go a few rounds, alternating between the two counters in the kernel.
2160 
2161     // Same as initial values in kernel.
2162     // But "true" which increments the 0-based counter, and "false" which
2163     // increments the 100-based counter.
2164     cl_int expected_counter[2] = { 100, 0 };
2165 
2166     const size_t one = 1;
2167     for (int iround = 0; iround < 5; iround++)
2168     { // Must go at least twice around
2169         for (int iwhich = 0; iwhich < 2; iwhich++)
2170         { // Cover both counters
2171             status = clSetKernelArg(test_bump, 1, sizeof(iwhich), &iwhich);
2172             test_error_ret(status, "set arg", status);
2173             status = clEnqueueNDRangeKernel(queue, test_bump, 1, 0, &one, 0, 0,
2174                                             0, 0);
2175             test_error_ret(status, "enqueue test_bump", status);
2176             status = clFinish(queue);
2177             test_error_ret(status, "finish", status);
2178 
2179             cl_uchar* counter_value_ptr = (cl_uchar*)clEnqueueMapBuffer(
2180                 queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0,
2181                 sizeof(counter_value), 0, 0, 0, 0);
2182 
2183             if (counter_value != expected_counter[iwhich])
2184             {
2185                 log_error(
2186                     "Error: Round %d on counter %d: Expected %d but got %d\n",
2187                     iround, iwhich, expected_counter[iwhich], counter_value);
2188                 err |= 1;
2189             }
2190             expected_counter[iwhich]++; // Emulate behaviour of the kernel.
2191 
2192             clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr,
2193                                     0, 0, 0);
2194         }
2195     }
2196 
2197     if (CL_SUCCESS == err)
2198     {
2199         log_info("OK\n");
2200         FLUSH;
2201     }
2202 
2203     return err;
2204 }
2205