1 //
2 // Copyright (c) 2017, 2020 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 // Bug: Missing in spec: atomic_intptr_t is always supported if device is
19 // 32-bits.
20 // Bug: Missing in spec: CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE
21
22 #define FLUSH fflush(stdout)
23
24 #define MAX_STR 16 * 1024
25
26 #define ALIGNMENT 128
27
28
29 // NUM_ROUNDS must be at least 1.
30 // It determines how many sets of random data we push through the global
31 // variables.
32 #define NUM_ROUNDS 1
33
34 // This is a shared property of the writer and reader kernels.
35 #define NUM_TESTED_VALUES 5
36
37 // TODO: pointer-to-half (and its vectors)
38 // TODO: union of...
39
40 #include <algorithm>
41 #include <cstdio>
42 #include <cstdlib>
43 #include <cstring>
44 #include <string>
45 #include <vector>
46 #include <cassert>
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include "harness/typeWrappers.h"
50 #include "harness/errorHelpers.h"
51 #include "harness/mt19937.h"
52 #include "procs.h"
53
54
55 ////////////////////
56 // Device capabilities
57 static int l_has_double = 0;
58 static int l_has_half = 0;
59 static int l_64bit_device = 0;
60 static int l_has_int64_atomics = 0;
61 static int l_has_intptr_atomics = 0;
62 static int l_has_cles_int64 = 0;
63
64 static int l_host_is_big_endian = 1;
65
66 static size_t l_max_global_id0 = 0;
67 static cl_bool l_linker_available = false;
68
69 #define check_error(errCode, msg, ...) \
70 ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", \
71 ##__VA_ARGS__, __FILE__, __LINE__), \
72 1) \
73 : 0)
74
75 ////////////////////
76 // Info about types we can use for program scope variables.
77
78
79 class TypeInfo {
80
81 public:
TypeInfo()82 TypeInfo()
83 : name(""), m_elem_type(0), m_num_elem(0), m_is_vecbase(false),
84 m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
85 m_size(0), m_value_size(0), m_buf_elem_type("")
86 {}
TypeInfo(const char * name_arg)87 TypeInfo(const char* name_arg)
88 : name(name_arg), m_elem_type(0), m_num_elem(0), m_is_vecbase(false),
89 m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
90 m_size(0), m_value_size(0), m_buf_elem_type(name_arg)
91 {}
92
93 // Vectors
TypeInfo(TypeInfo * elem_type,int num_elem)94 TypeInfo(TypeInfo* elem_type, int num_elem)
95 : m_elem_type(elem_type), m_num_elem(num_elem), m_is_vecbase(false),
96 m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false)
97 {
98 char
99 the_name[10]; // long enough for longest vector type name "double16"
100 snprintf(the_name, sizeof(the_name), "%s%d",
101 elem_type->get_name_c_str(), m_num_elem);
102 this->name = std::string(the_name);
103 this->m_buf_elem_type = std::string(the_name);
104 this->m_value_size = num_elem * elem_type->get_size();
105 if (m_num_elem == 3)
106 {
107 this->m_size = 4 * elem_type->get_size();
108 }
109 else
110 {
111 this->m_size = num_elem * elem_type->get_size();
112 }
113 }
get_name(void) const114 const std::string& get_name(void) const { return name; }
get_name_c_str(void) const115 const char* get_name_c_str(void) const { return name.c_str(); }
set_vecbase(void)116 TypeInfo& set_vecbase(void)
117 {
118 this->m_is_vecbase = true;
119 return *this;
120 }
set_atomic(void)121 TypeInfo& set_atomic(void)
122 {
123 this->m_is_atomic = true;
124 return *this;
125 }
set_like_size_t(void)126 TypeInfo& set_like_size_t(void)
127 {
128 this->m_is_like_size_t = true;
129 this->set_size(l_64bit_device ? 8 : 4);
130 this->m_buf_elem_type = l_64bit_device ? "ulong" : "uint";
131 return *this;
132 }
set_bool(void)133 TypeInfo& set_bool(void)
134 {
135 this->m_is_bool = true;
136 return *this;
137 }
set_size(size_t n)138 TypeInfo& set_size(size_t n)
139 {
140 this->m_value_size = this->m_size = n;
141 return *this;
142 }
set_buf_elem_type(const char * name)143 TypeInfo& set_buf_elem_type(const char* name)
144 {
145 this->m_buf_elem_type = std::string(name);
146 return *this;
147 }
148
elem_type(void) const149 const TypeInfo* elem_type(void) const { return m_elem_type; }
num_elem(void) const150 int num_elem(void) const { return m_num_elem; }
151
is_vecbase(void) const152 bool is_vecbase(void) const { return m_is_vecbase; }
is_atomic(void) const153 bool is_atomic(void) const { return m_is_atomic; }
is_atomic_64bit(void) const154 bool is_atomic_64bit(void) const { return m_is_atomic && m_size == 8; }
is_like_size_t(void) const155 bool is_like_size_t(void) const { return m_is_like_size_t; }
is_bool(void) const156 bool is_bool(void) const { return m_is_bool; }
get_size(void) const157 size_t get_size(void) const { return m_size; }
get_value_size(void) const158 size_t get_value_size(void) const { return m_value_size; }
159
160 // When passing values of this type to a kernel, what buffer type
161 // should be used?
get_buf_elem_type(void) const162 const char* get_buf_elem_type(void) const
163 {
164 return m_buf_elem_type.c_str();
165 }
166
as_string(const cl_uchar * value_ptr) const167 std::string as_string(const cl_uchar* value_ptr) const
168 {
169 // This method would be shorter if I had a real handle to element
170 // vector type.
171 if (this->is_bool())
172 {
173 std::string result(name);
174 result += "<";
175 result += (*value_ptr ? "true" : "false");
176 result += ", ";
177 char buf[10];
178 sprintf(buf, "%02x", *value_ptr);
179 result += buf;
180 result += ">";
181 return result;
182 }
183 else if (this->num_elem())
184 {
185 std::string result(name);
186 result += "<";
187 for (unsigned ielem = 0; ielem < this->num_elem(); ielem++)
188 {
189 char buf[MAX_STR];
190 if (ielem) result += ", ";
191 for (unsigned ibyte = 0; ibyte < this->m_elem_type->get_size();
192 ibyte++)
193 {
194 sprintf(buf + 2 * ibyte, "%02x",
195 value_ptr[ielem * this->m_elem_type->get_size()
196 + ibyte]);
197 }
198 result += buf;
199 }
200 result += ">";
201 return result;
202 }
203 else
204 {
205 std::string result(name);
206 result += "<";
207 char buf[MAX_STR];
208 for (unsigned ibyte = 0; ibyte < this->get_size(); ibyte++)
209 {
210 sprintf(buf + 2 * ibyte, "%02x", value_ptr[ibyte]);
211 }
212 result += buf;
213 result += ">";
214 return result;
215 }
216 }
217
218 // Initialize the given buffer to a constant value initialized as if it
219 // were from the INIT_VAR macro below.
220 // Only needs to support values 0 and 1.
init(cl_uchar * buf,cl_uchar val) const221 void init(cl_uchar* buf, cl_uchar val) const
222 {
223 if (this->num_elem())
224 {
225 for (unsigned ielem = 0; ielem < this->num_elem(); ielem++)
226 {
227 // Delegate!
228 this->init_elem(
229 buf + ielem * this->get_value_size() / this->num_elem(),
230 val);
231 }
232 }
233 else
234 {
235 init_elem(buf, val);
236 }
237 }
238
239 private:
init_elem(cl_uchar * buf,cl_uchar val) const240 void init_elem(cl_uchar* buf, cl_uchar val) const
241 {
242 size_t elem_size = this->num_elem()
243 ? this->get_value_size() / this->num_elem()
244 : this->get_size();
245 memset(buf, 0, elem_size);
246 if (val)
247 {
248 if (strstr(name.c_str(), "float"))
249 {
250 *(float*)buf = (float)val;
251 return;
252 }
253 if (strstr(name.c_str(), "double"))
254 {
255 *(double*)buf = (double)val;
256 return;
257 }
258 if (this->is_bool())
259 {
260 *buf = (bool)val;
261 return;
262 }
263
264 // Write a single character value to the correct spot,
265 // depending on host endianness.
266 if (l_host_is_big_endian)
267 *(buf + elem_size - 1) = (cl_uchar)val;
268 else
269 *buf = (cl_uchar)val;
270 }
271 }
272
273 public:
dump(FILE * fp) const274 void dump(FILE* fp) const
275 {
276 fprintf(fp, "Type %s : <%d,%d,%s> ", name.c_str(), (int)m_size,
277 (int)m_value_size, m_buf_elem_type.c_str());
278 if (this->m_elem_type)
279 fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(),
280 this->num_elem());
281 if (this->m_is_vecbase) fprintf(fp, " vecbase");
282 if (this->m_is_bool) fprintf(fp, " bool");
283 if (this->m_is_like_size_t) fprintf(fp, " like-size_t");
284 if (this->m_is_atomic) fprintf(fp, " atomic");
285 fprintf(fp, "\n");
286 fflush(fp);
287 }
288
289 private:
290 std::string name;
291 TypeInfo* m_elem_type;
292 int m_num_elem;
293 bool m_is_vecbase;
294 bool m_is_atomic;
295 bool m_is_like_size_t;
296 bool m_is_bool;
297 size_t m_size; // Number of bytes of storage occupied by this type.
298 size_t m_value_size; // Number of bytes of value significant for this type.
299 // Differs for vec3.
300
301 // When passing values of this type to a kernel, what buffer type
302 // should be used?
303 // For most types, it's just itself.
304 // Use a std::string so I don't have to make a copy constructor.
305 std::string m_buf_elem_type;
306 };
307
308
309 #define NUM_SCALAR_TYPES \
310 (8 + 2) // signed and unsigned integral types, float and double
311 #define NUM_VECTOR_SIZES (5) // 2,3,4,8,16
312 #define NUM_PLAIN_TYPES \
313 5 /*boolean and size_t family */ \
314 + NUM_SCALAR_TYPES + NUM_SCALAR_TYPES* NUM_VECTOR_SIZES \
315 + 10 /* atomic types */
316
317 // Need room for plain, array, pointer, struct
318 #define MAX_TYPES (4 * NUM_PLAIN_TYPES)
319
320 static TypeInfo type_info[MAX_TYPES];
321 static int num_type_info = 0; // Number of valid entries in type_info[]
322
323
324 // A helper class to form kernel source arguments for clCreateProgramWithSource.
325 class StringTable {
326 public:
StringTable()327 StringTable(): m_strings(), m_c_strs(NULL), m_lengths(NULL), m_frozen(false)
328 {}
~StringTable()329 ~StringTable() { release_frozen(); }
330
add(std::string s)331 void add(std::string s)
332 {
333 release_frozen();
334 m_strings.push_back(s);
335 }
336
num_str()337 const size_t num_str()
338 {
339 freeze();
340 return m_strings.size();
341 }
strs()342 const char** strs()
343 {
344 freeze();
345 return m_c_strs;
346 }
lengths()347 const size_t* lengths()
348 {
349 freeze();
350 return m_lengths;
351 }
352
353 private:
freeze(void)354 void freeze(void)
355 {
356 if (!m_frozen)
357 {
358 release_frozen();
359
360 m_c_strs =
361 (const char**)malloc(sizeof(const char*) * m_strings.size());
362 m_lengths = (size_t*)malloc(sizeof(size_t) * m_strings.size());
363 assert(m_c_strs);
364 assert(m_lengths);
365
366 for (size_t i = 0; i < m_strings.size(); i++)
367 {
368 m_c_strs[i] = m_strings[i].c_str();
369 m_lengths[i] = strlen(m_c_strs[i]);
370 }
371
372 m_frozen = true;
373 }
374 }
release_frozen(void)375 void release_frozen(void)
376 {
377 if (m_c_strs)
378 {
379 free(m_c_strs);
380 m_c_strs = 0;
381 }
382 if (m_lengths)
383 {
384 free(m_lengths);
385 m_lengths = 0;
386 }
387 m_frozen = false;
388 }
389
390 typedef std::vector<std::string> strlist_t;
391 strlist_t m_strings;
392 const char** m_c_strs;
393 size_t* m_lengths;
394 bool m_frozen;
395 };
396
397
398 ////////////////////
399 // File scope function declarations
400
401 static void l_load_abilities(cl_device_id device);
402 static const char* l_get_fp64_pragma(void);
403 static const char* l_get_cles_int64_pragma(void);
404 static int l_build_type_table(cl_device_id device);
405
406 static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
407 size_t* pref_size_ret);
408
409 static void l_set_randomly(cl_uchar* buf, size_t buf_size,
410 RandomSeed& rand_state);
411 static int l_compare(const char* test_name, const cl_uchar* expected,
412 const cl_uchar* received, size_t num_values,
413 const TypeInfo& ti);
414 static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
415 unsigned src_idx, const TypeInfo& ti);
416
417 static std::string conversion_functions(const TypeInfo& ti);
418 static std::string global_decls(const TypeInfo& ti, bool with_init);
419 static std::string global_check_function(const TypeInfo& ti);
420 static std::string writer_function(const TypeInfo& ti);
421 static std::string reader_function(const TypeInfo& ti);
422
423 static int l_write_read(cl_device_id device, cl_context context,
424 cl_command_queue queue);
425 static int l_write_read_for_type(cl_device_id device, cl_context context,
426 cl_command_queue queue, const TypeInfo& ti,
427 RandomSeed& rand_state);
428
429 static int l_init_write_read(cl_device_id device, cl_context context,
430 cl_command_queue queue);
431 static int l_init_write_read_for_type(cl_device_id device, cl_context context,
432 cl_command_queue queue,
433 const TypeInfo& ti,
434 RandomSeed& rand_state);
435
436 static int l_capacity(cl_device_id device, cl_context context,
437 cl_command_queue queue, size_t max_size);
438 static int l_user_type(cl_device_id device, cl_context context,
439 cl_command_queue queue, bool separate_compile);
440
441 static std::string get_build_options(cl_device_id device);
442
443 ////////////////////
444 // File scope function definitions
445
print_build_log(cl_program program,cl_uint num_devices,cl_device_id * device_list,cl_uint count,const char ** strings,const size_t * lengths,const char * options)446 static cl_int print_build_log(cl_program program, cl_uint num_devices,
447 cl_device_id* device_list, cl_uint count,
448 const char** strings, const size_t* lengths,
449 const char* options)
450 {
451 cl_uint i;
452 cl_int error;
453 BufferOwningPtr<cl_device_id> devices;
454
455 if (num_devices == 0 || device_list == NULL)
456 {
457 error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
458 sizeof(num_devices), &num_devices, NULL);
459 test_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
460
461 device_list = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devices);
462 devices.reset(device_list);
463
464 memset(device_list, 0, sizeof(cl_device_id) * num_devices);
465
466 error = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
467 sizeof(cl_device_id) * num_devices,
468 device_list, NULL);
469 test_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
470 }
471
472 cl_uint z;
473 bool sourcePrinted = false;
474
475 for (z = 0; z < num_devices; z++)
476 {
477 char deviceName[4096] = "";
478 error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME,
479 sizeof(deviceName), deviceName, NULL);
480 check_error(error,
481 "Device \"%d\" failed to return a name. clGetDeviceInfo "
482 "CL_DEVICE_NAME failed",
483 z);
484
485 cl_build_status buildStatus;
486 error = clGetProgramBuildInfo(program, device_list[z],
487 CL_PROGRAM_BUILD_STATUS,
488 sizeof(buildStatus), &buildStatus, NULL);
489 check_error(error,
490 "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
491
492 if (buildStatus != CL_BUILD_SUCCESS)
493 {
494 if (!sourcePrinted)
495 {
496 log_error("Build options: %s\n", options);
497 if (count && strings)
498 {
499 log_error("Original source is: ------------\n");
500 for (i = 0; i < count; i++) log_error("%s", strings[i]);
501 }
502 sourcePrinted = true;
503 }
504
505 char statusString[64] = "";
506 if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
507 sprintf(statusString, "CL_BUILD_SUCCESS");
508 else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
509 sprintf(statusString, "CL_BUILD_NONE");
510 else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
511 sprintf(statusString, "CL_BUILD_ERROR");
512 else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
513 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
514 else
515 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
516
517 log_error("Build not successful for device \"%s\", status: %s\n",
518 deviceName, statusString);
519
520 size_t paramSize = 0;
521 error = clGetProgramBuildInfo(program, device_list[z],
522 CL_PROGRAM_BUILD_LOG, 0, NULL,
523 ¶mSize);
524 if (check_error(
525 error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"))
526 break;
527
528 std::string log;
529 log.resize(paramSize / sizeof(char));
530
531 error = clGetProgramBuildInfo(program, device_list[z],
532 CL_PROGRAM_BUILD_LOG, paramSize,
533 &log[0], NULL);
534 if (check_error(error,
535 "Device %d (%s) failed to return a build log", z,
536 deviceName))
537 break;
538 if (log[0] == 0)
539 log_error("clGetProgramBuildInfo returned an empty log.\n");
540 else
541 {
542 log_error("Build log for device \"%s\":\n", deviceName);
543 log_error("%s\n", log.c_str());
544 }
545 }
546 }
547 return error;
548 }
549
l_load_abilities(cl_device_id device)550 static void l_load_abilities(cl_device_id device)
551 {
552 l_has_half = is_extension_available(device, "cl_khr_fp16");
553 l_has_double = is_extension_available(device, "cl_khr_fp64");
554 l_has_cles_int64 = is_extension_available(device, "cles_khr_int64");
555
556 l_has_int64_atomics =
557 is_extension_available(device, "cl_khr_int64_base_atomics")
558 && is_extension_available(device, "cl_khr_int64_extended_atomics");
559
560 {
561 int status = CL_SUCCESS;
562 cl_uint addr_bits = 32;
563 status = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
564 sizeof(addr_bits), &addr_bits, 0);
565 l_64bit_device = (status == CL_SUCCESS && addr_bits == 64);
566 }
567
568 // 32-bit devices always have intptr atomics.
569 l_has_intptr_atomics = !l_64bit_device || l_has_int64_atomics;
570
571 union {
572 char c[4];
573 int i;
574 } probe;
575 probe.i = 1;
576 l_host_is_big_endian = !probe.c[0];
577
578 // Determine max global id.
579 {
580 int status = CL_SUCCESS;
581 cl_uint max_dim = 0;
582 status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
583 sizeof(max_dim), &max_dim, 0);
584 if (check_error(status,
585 "clGetDeviceInfo for "
586 "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed."))
587 return;
588 assert(max_dim > 0);
589 size_t max_id[3];
590 max_id[0] = 0;
591 status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
592 max_dim * sizeof(size_t), &max_id[0], 0);
593 if (check_error(status,
594 "clGetDeviceInfo for "
595 "CL_DEVICE_MAX_WORK_ITEM_SIZES failed."))
596 return;
597 l_max_global_id0 = max_id[0];
598 }
599
600 { // Is separate compilation supported?
601 int status = CL_SUCCESS;
602 l_linker_available = false;
603 status =
604 clGetDeviceInfo(device, CL_DEVICE_LINKER_AVAILABLE,
605 sizeof(l_linker_available), &l_linker_available, 0);
606 if (check_error(status,
607 "clGetDeviceInfo for "
608 "CL_DEVICE_LINKER_AVAILABLE failed."))
609 return;
610 }
611 }
612
613
l_get_fp64_pragma(void)614 static const char* l_get_fp64_pragma(void)
615 {
616 return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
617 : "";
618 }
619
l_get_cles_int64_pragma(void)620 static const char* l_get_cles_int64_pragma(void)
621 {
622 return l_has_cles_int64
623 ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n"
624 : "";
625 }
626
l_get_int64_atomic_pragma(void)627 static const char* l_get_int64_atomic_pragma(void)
628 {
629 return "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n"
630 "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
631 }
632
l_build_type_table(cl_device_id device)633 static int l_build_type_table(cl_device_id device)
634 {
635 int status = CL_SUCCESS;
636 size_t iscalar = 0;
637 size_t ivecsize = 0;
638 int vecsizes[] = { 2, 3, 4, 8, 16 };
639 const char* vecbase[] = { "uchar", "char", "ushort", "short", "uint",
640 "int", "ulong", "long", "float", "double" };
641 int vecbase_size[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
642 const char* like_size_t[] = { "intptr_t", "uintptr_t", "size_t",
643 "ptrdiff_t" };
644 const char* atomics[] = {
645 "atomic_int", "atomic_uint", "atomic_long",
646 "atomic_ulong", "atomic_float", "atomic_double",
647 };
648 int atomics_size[] = { 4, 4, 8, 8, 4, 8 };
649 const char* intptr_atomics[] = { "atomic_intptr_t", "atomic_uintptr_t",
650 "atomic_size_t", "atomic_ptrdiff_t" };
651
652 l_load_abilities(device);
653 num_type_info = 0;
654
655 // Boolean.
656 type_info[num_type_info++] =
657 TypeInfo("bool").set_bool().set_size(1).set_buf_elem_type("uchar");
658
659 // Vector types, and the related scalar element types.
660 for (iscalar = 0; iscalar < sizeof(vecbase) / sizeof(vecbase[0]); ++iscalar)
661 {
662 if (!gHasLong && strstr(vecbase[iscalar], "long")) continue;
663 if (!l_has_double && strstr(vecbase[iscalar], "double")) continue;
664
665 // Scalar
666 TypeInfo* elem_type = type_info + num_type_info++;
667 *elem_type = TypeInfo(vecbase[iscalar])
668 .set_vecbase()
669 .set_size(vecbase_size[iscalar]);
670
671 // Vector
672 for (ivecsize = 0; ivecsize < sizeof(vecsizes) / sizeof(vecsizes[0]);
673 ivecsize++)
674 {
675 type_info[num_type_info++] =
676 TypeInfo(elem_type, vecsizes[ivecsize]);
677 }
678 }
679
680 // Size_t-like types
681 for (iscalar = 0; iscalar < sizeof(like_size_t) / sizeof(like_size_t[0]);
682 ++iscalar)
683 {
684 type_info[num_type_info++] =
685 TypeInfo(like_size_t[iscalar]).set_like_size_t();
686 }
687
688 // Atomic types.
689 for (iscalar = 0; iscalar < sizeof(atomics) / sizeof(atomics[0]); ++iscalar)
690 {
691 if (!l_has_int64_atomics && strstr(atomics[iscalar], "long")) continue;
692 if (!(l_has_int64_atomics && l_has_double)
693 && strstr(atomics[iscalar], "double"))
694 continue;
695
696 // The +7 is used to skip over the "atomic_" prefix.
697 const char* buf_type = atomics[iscalar] + 7;
698 type_info[num_type_info++] = TypeInfo(atomics[iscalar])
699 .set_atomic()
700 .set_size(atomics_size[iscalar])
701 .set_buf_elem_type(buf_type);
702 }
703 if (l_has_intptr_atomics)
704 {
705 for (iscalar = 0;
706 iscalar < sizeof(intptr_atomics) / sizeof(intptr_atomics[0]);
707 ++iscalar)
708 {
709 type_info[num_type_info++] = TypeInfo(intptr_atomics[iscalar])
710 .set_atomic()
711 .set_like_size_t();
712 }
713 }
714
715 assert(num_type_info <= MAX_TYPES); // or increase MAX_TYPES
716
717 #if 0
718 for ( size_t i = 0 ; i < num_type_info ; i++ ) {
719 type_info[ i ].dump(stdout);
720 }
721 exit(0);
722 #endif
723
724 return status;
725 }
726
l_find_type(const char * name)727 static const TypeInfo& l_find_type(const char* name)
728 {
729 auto itr =
730 std::find_if(type_info, type_info + num_type_info,
731 [name](TypeInfo& ti) { return ti.get_name() == name; });
732 assert(itr != type_info + num_type_info);
733 return *itr;
734 }
735
736
737 // Populate return parameters for max program variable size, preferred program
738 // variable size.
739
l_get_device_info(cl_device_id device,size_t * max_size_ret,size_t * pref_size_ret)740 static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
741 size_t* pref_size_ret)
742 {
743 int err = CL_SUCCESS;
744 size_t return_size = 0;
745
746 err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
747 sizeof(*max_size_ret), max_size_ret, &return_size);
748 if (err != CL_SUCCESS)
749 {
750 log_error("Error: Failed to get device info for "
751 "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
752 return err;
753 }
754 if (return_size != sizeof(size_t))
755 {
756 log_error("Error: Invalid size %d returned for "
757 "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n",
758 (int)return_size);
759 return 1;
760 }
761 if (return_size != sizeof(size_t))
762 {
763 log_error("Error: Invalid size %d returned for "
764 "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n",
765 (int)return_size);
766 return 1;
767 }
768
769 return_size = 0;
770 err =
771 clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE,
772 sizeof(*pref_size_ret), pref_size_ret, &return_size);
773 if (err != CL_SUCCESS)
774 {
775 log_error("Error: Failed to get device info for "
776 "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",
777 err);
778 return err;
779 }
780 if (return_size != sizeof(size_t))
781 {
782 log_error("Error: Invalid size %d returned for "
783 "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n",
784 (int)return_size);
785 return 1;
786 }
787
788 return CL_SUCCESS;
789 }
790
791
l_set_randomly(cl_uchar * buf,size_t buf_size,RandomSeed & rand_state)792 static void l_set_randomly(cl_uchar* buf, size_t buf_size,
793 RandomSeed& rand_state)
794 {
795 assert(0 == (buf_size % sizeof(cl_uint)));
796 for (size_t i = 0; i < buf_size; i += sizeof(cl_uint))
797 {
798 *((cl_uint*)(buf + i)) = genrand_int32(rand_state);
799 }
800 #if 0
801 for ( size_t i = 0; i < buf_size ; i++ ) {
802 printf("%02x",buf[i]);
803 }
804 printf("\n");
805 #endif
806 }
807
808 // Return num_value values of the given type.
809 // Returns CL_SUCCESS if they compared as equal.
l_compare(const char * test_name,const cl_uchar * expected,const cl_uchar * received,size_t num_values,const TypeInfo & ti)810 static int l_compare(const char* test_name, const cl_uchar* expected,
811 const cl_uchar* received, size_t num_values,
812 const TypeInfo& ti)
813 {
814 // Compare only the valid returned bytes.
815 for (unsigned value_idx = 0; value_idx < num_values; value_idx++)
816 {
817 const cl_uchar* expv = expected + value_idx * ti.get_size();
818 const cl_uchar* gotv = received + value_idx * ti.get_size();
819 if (memcmp(expv, gotv, ti.get_value_size()))
820 {
821 std::string exp_str = ti.as_string(expv);
822 std::string got_str = ti.as_string(gotv);
823 log_error(
824 "Error: %s test for type %s, at index %d: Expected %s got %s\n",
825 test_name, ti.get_name_c_str(), value_idx, exp_str.c_str(),
826 got_str.c_str());
827 return 1;
828 }
829 }
830 return CL_SUCCESS;
831 }
832
833 // Copy a target value from src[idx] to dest[idx]
l_copy(cl_uchar * dest,unsigned dest_idx,const cl_uchar * src,unsigned src_idx,const TypeInfo & ti)834 static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
835 unsigned src_idx, const TypeInfo& ti)
836 {
837 cl_uchar* raw_dest = dest + dest_idx * ti.get_size();
838 const cl_uchar* raw_src = src + src_idx * ti.get_size();
839 memcpy(raw_dest, raw_src, ti.get_value_size());
840
841 return 0;
842 }
843
844
conversion_functions(const TypeInfo & ti)845 static std::string conversion_functions(const TypeInfo& ti)
846 {
847 std::string result;
848 static char buf[MAX_STR];
849 int num_printed = 0;
850 // The atomic types just use the base type.
851 if (ti.is_atomic()
852 || 0 == strcmp(ti.get_buf_elem_type(), ti.get_name_c_str()))
853 {
854 // The type is represented in a buffer by itself.
855 num_printed = snprintf(buf, MAX_STR,
856 "%s from_buf(%s a) { return a; }\n"
857 "%s to_buf(%s a) { return a; }\n",
858 ti.get_buf_elem_type(), ti.get_buf_elem_type(),
859 ti.get_buf_elem_type(), ti.get_buf_elem_type());
860 }
861 else
862 {
863 // Just use C-style cast.
864 num_printed = snprintf(buf, MAX_STR,
865 "%s from_buf(%s a) { return (%s)a; }\n"
866 "%s to_buf(%s a) { return (%s)a; }\n",
867 ti.get_name_c_str(), ti.get_buf_elem_type(),
868 ti.get_name_c_str(), ti.get_buf_elem_type(),
869 ti.get_name_c_str(), ti.get_buf_elem_type());
870 }
871 // Add initializations.
872 if (ti.is_atomic())
873 {
874 num_printed += snprintf(buf + num_printed, MAX_STR - num_printed,
875 "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n");
876 }
877 else
878 {
879 // This cast works even if the target type is a vector type.
880 num_printed +=
881 snprintf(buf + num_printed, MAX_STR - num_printed,
882 "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
883 }
884 assert(num_printed < MAX_STR); // or increase MAX_STR
885 result = buf;
886 return result;
887 }
888
global_decls(const TypeInfo & ti,bool with_init)889 static std::string global_decls(const TypeInfo& ti, bool with_init)
890 {
891 const char* tn = ti.get_name_c_str();
892 const char* vol = (ti.is_atomic() ? " volatile " : " ");
893 static char decls[MAX_STR];
894 int num_printed = 0;
895 if (with_init)
896 {
897 const char* decls_template_with_init =
898 "%s %s var = INIT_VAR(0);\n"
899 "global %s %s g_var = INIT_VAR(1);\n"
900 "%s %s a_var[2] = { INIT_VAR(1), INIT_VAR(1) };\n"
901 "volatile global %s %s* p_var = &a_var[1];\n\n";
902 num_printed = snprintf(decls, sizeof(decls), decls_template_with_init,
903 vol, tn, vol, tn, vol, tn, vol, tn);
904 }
905 else
906 {
907 const char* decls_template_no_init = "%s %s var;\n"
908 "global %s %s g_var;\n"
909 "%s %s a_var[2];\n"
910 "global %s %s* p_var;\n\n";
911 num_printed = snprintf(decls, sizeof(decls), decls_template_no_init,
912 vol, tn, vol, tn, vol, tn, vol, tn);
913 }
914 assert(num_printed < sizeof(decls));
915 (void)num_printed;
916 return std::string(decls);
917 }
918
919 // Return the source code for the "global_check" function for the given type.
920 // This function checks that all program-scope variables have appropriate
921 // initial values when no explicit initializer is used. If all tests pass the
922 // kernel writes a non-zero value to its output argument, otherwise it writes
923 // zero.
global_check_function(const TypeInfo & ti)924 static std::string global_check_function(const TypeInfo& ti)
925 {
926 const std::string type_name = ti.get_buf_elem_type();
927
928 // all() should only be used on vector inputs. For scalar comparison, the
929 // result of the equality operator can be used as a bool value.
930 const bool is_scalar =
931 ti.num_elem() == 0; // 0 is used to represent scalar types, not 1.
932 const std::string is_equality_true = is_scalar ? "" : "all";
933
934 std::string code = "kernel void global_check(global int* out) {\n";
935 code += " const " + type_name + " zero = ((" + type_name + ")0);\n";
936 code += " bool status = true;\n";
937 if (ti.is_atomic())
938 {
939 code += " status &= " + is_equality_true
940 + "(atomic_load(&var) == zero);\n";
941 code += " status &= " + is_equality_true
942 + "(atomic_load(&g_var) == zero);\n";
943 code += " status &= " + is_equality_true
944 + "(atomic_load(&a_var[0]) == zero);\n";
945 code += " status &= " + is_equality_true
946 + "(atomic_load(&a_var[1]) == zero);\n";
947 }
948 else
949 {
950 code += " status &= " + is_equality_true + "(var == zero);\n";
951 code += " status &= " + is_equality_true + "(g_var == zero);\n";
952 code += " status &= " + is_equality_true + "(a_var[0] == zero);\n";
953 code += " status &= " + is_equality_true + "(a_var[1] == zero);\n";
954 }
955 code += " status &= (p_var == NULL);\n";
956 code += " *out = status ? 1 : 0;\n";
957 code += "}\n\n";
958
959 return code;
960 }
961
962 // Return the source text for the writer function for the given type.
963 // For types that can't be passed as pointer-to-type as a kernel argument,
964 // use a substitute base type of the same size.
writer_function(const TypeInfo & ti)965 static std::string writer_function(const TypeInfo& ti)
966 {
967 static char writer_src[MAX_STR];
968 int num_printed = 0;
969 if (!ti.is_atomic())
970 {
971 const char* writer_template_normal =
972 "kernel void writer( global %s* src, uint idx ) {\n"
973 " var = from_buf(src[0]);\n"
974 " g_var = from_buf(src[1]);\n"
975 " a_var[0] = from_buf(src[2]);\n"
976 " a_var[1] = from_buf(src[3]);\n"
977 " p_var = a_var + idx;\n"
978 "}\n\n";
979 num_printed = snprintf(writer_src, sizeof(writer_src),
980 writer_template_normal, ti.get_buf_elem_type());
981 }
982 else
983 {
984 const char* writer_template_atomic =
985 "kernel void writer( global %s* src, uint idx ) {\n"
986 " atomic_store( &var, from_buf(src[0]) );\n"
987 " atomic_store( &g_var, from_buf(src[1]) );\n"
988 " atomic_store( &a_var[0], from_buf(src[2]) );\n"
989 " atomic_store( &a_var[1], from_buf(src[3]) );\n"
990 " p_var = a_var + idx;\n"
991 "}\n\n";
992 num_printed = snprintf(writer_src, sizeof(writer_src),
993 writer_template_atomic, ti.get_buf_elem_type());
994 }
995 assert(num_printed < sizeof(writer_src));
996 (void)num_printed;
997 std::string result = writer_src;
998 return result;
999 }
1000
1001
1002 // Return source text for teh reader function for the given type.
1003 // For types that can't be passed as pointer-to-type as a kernel argument,
1004 // use a substitute base type of the same size.
reader_function(const TypeInfo & ti)1005 static std::string reader_function(const TypeInfo& ti)
1006 {
1007 static char reader_src[MAX_STR];
1008 int num_printed = 0;
1009 if (!ti.is_atomic())
1010 {
1011 const char* reader_template_normal =
1012 "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
1013 " *p_var = from_buf(ptr_write_val);\n"
1014 " dest[0] = to_buf(var);\n"
1015 " dest[1] = to_buf(g_var);\n"
1016 " dest[2] = to_buf(a_var[0]);\n"
1017 " dest[3] = to_buf(a_var[1]);\n"
1018 "}\n\n";
1019 num_printed =
1020 snprintf(reader_src, sizeof(reader_src), reader_template_normal,
1021 ti.get_buf_elem_type(), ti.get_buf_elem_type());
1022 }
1023 else
1024 {
1025 const char* reader_template_atomic =
1026 "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
1027 " atomic_store( p_var, from_buf(ptr_write_val) );\n"
1028 " dest[0] = to_buf( atomic_load( &var ) );\n"
1029 " dest[1] = to_buf( atomic_load( &g_var ) );\n"
1030 " dest[2] = to_buf( atomic_load( &a_var[0] ) );\n"
1031 " dest[3] = to_buf( atomic_load( &a_var[1] ) );\n"
1032 "}\n\n";
1033 num_printed =
1034 snprintf(reader_src, sizeof(reader_src), reader_template_atomic,
1035 ti.get_buf_elem_type(), ti.get_buf_elem_type());
1036 }
1037 assert(num_printed < sizeof(reader_src));
1038 (void)num_printed;
1039 std::string result = reader_src;
1040 return result;
1041 }
1042
1043 // Check that all globals where appropriately default-initialized.
check_global_initialization(cl_context context,cl_program program,cl_command_queue queue)1044 static int check_global_initialization(cl_context context, cl_program program,
1045 cl_command_queue queue)
1046 {
1047 int status = CL_SUCCESS;
1048
1049 // Create a buffer on device to store a unique integer.
1050 cl_int is_init_valid = 0;
1051 clMemWrapper buffer(
1052 clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
1053 sizeof(is_init_valid), &is_init_valid, &status));
1054 test_error_ret(status, "Failed to allocate buffer", status);
1055
1056 // Create, setup and invoke kernel.
1057 clKernelWrapper global_check(
1058 clCreateKernel(program, "global_check", &status));
1059 test_error_ret(status, "Failed to create global_check kernel", status);
1060 status = clSetKernelArg(global_check, 0, sizeof(cl_mem), &buffer);
1061 test_error_ret(status,
1062 "Failed to set up argument for the global_check kernel",
1063 status);
1064 const cl_uint work_dim = 1;
1065 const size_t global_work_offset[] = { 0 };
1066 const size_t global_work_size[] = { 1 };
1067 status = clEnqueueNDRangeKernel(queue, global_check, work_dim,
1068 global_work_offset, global_work_size,
1069 nullptr, 0, nullptr, nullptr);
1070 test_error_ret(status, "Failed to run global_check kernel", status);
1071 status = clFinish(queue);
1072 test_error_ret(status, "clFinish() failed", status);
1073
1074 // Read back the memory buffer from the device.
1075 status =
1076 clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid),
1077 &is_init_valid, 0, nullptr, nullptr);
1078 test_error_ret(status, "Failed to read buffer from device", status);
1079 if (is_init_valid == 0)
1080 {
1081 log_error("Unexpected default values were detected");
1082 return 1;
1083 }
1084
1085 return CL_SUCCESS;
1086 }
1087
1088 // Check write-then-read.
l_write_read(cl_device_id device,cl_context context,cl_command_queue queue)1089 static int l_write_read(cl_device_id device, cl_context context,
1090 cl_command_queue queue)
1091 {
1092 int status = CL_SUCCESS;
1093 int itype;
1094
1095 RandomSeed rand_state(gRandomSeed);
1096
1097 for (itype = 0; itype < num_type_info; itype++)
1098 {
1099 status = status
1100 | l_write_read_for_type(device, context, queue, type_info[itype],
1101 rand_state);
1102 FLUSH;
1103 }
1104
1105 return status;
1106 }
1107
l_write_read_for_type(cl_device_id device,cl_context context,cl_command_queue queue,const TypeInfo & ti,RandomSeed & rand_state)1108 static int l_write_read_for_type(cl_device_id device, cl_context context,
1109 cl_command_queue queue, const TypeInfo& ti,
1110 RandomSeed& rand_state)
1111 {
1112 int err = CL_SUCCESS;
1113 std::string type_name(ti.get_name());
1114 const char* tn = type_name.c_str();
1115 log_info(" %s ", tn);
1116
1117 StringTable ksrc;
1118 ksrc.add(l_get_fp64_pragma());
1119 ksrc.add(l_get_cles_int64_pragma());
1120 if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma());
1121 ksrc.add(conversion_functions(ti));
1122 ksrc.add(global_decls(ti, false));
1123 ksrc.add(global_check_function(ti));
1124 ksrc.add(writer_function(ti));
1125 ksrc.add(reader_function(ti));
1126
1127 int status = CL_SUCCESS;
1128 clProgramWrapper program;
1129 clKernelWrapper writer;
1130
1131 status = create_single_kernel_helper(context, &program, &writer,
1132 ksrc.num_str(), ksrc.strs(), "writer");
1133 test_error_ret(status, "Failed to create program for read-after-write test",
1134 status);
1135
1136 clKernelWrapper reader(clCreateKernel(program, "reader", &status));
1137 test_error_ret(status,
1138 "Failed to create reader kernel for read-after-write test",
1139 status);
1140
1141 // Check size query.
1142 size_t used_bytes = 0;
1143 status = clGetProgramBuildInfo(program, device,
1144 CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
1145 sizeof(used_bytes), &used_bytes, 0);
1146 test_error_ret(status, "Failed to query global variable total size",
1147 status);
1148 size_t expected_used_bytes = (NUM_TESTED_VALUES - 1)
1149 * ti.get_size() // Two regular variables and an array of 2 elements.
1150 + (l_64bit_device ? 8 : 4); // The pointer
1151 if (used_bytes < expected_used_bytes)
1152 {
1153 log_error("Error program query for global variable total size query "
1154 "failed: Expected at least %llu but got %llu\n",
1155 (unsigned long long)expected_used_bytes,
1156 (unsigned long long)used_bytes);
1157 err |= 1;
1158 }
1159
1160 err |= check_global_initialization(context, program, queue);
1161
1162 // We need to create 5 random values of the given type,
1163 // and read 4 of them back.
1164 const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
1165 const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16);
1166 cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
1167 cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
1168
1169 clMemWrapper write_mem(clCreateBuffer(
1170 context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status));
1171 test_error_ret(status, "Failed to allocate write buffer", status);
1172 clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
1173 read_data_size, read_data, &status));
1174 test_error_ret(status, "Failed to allocate read buffer", status);
1175
1176 status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem);
1177 test_error_ret(status, "set arg", status);
1178 status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem);
1179 test_error_ret(status, "set arg", status);
1180
1181 // Boolean random data needs to be massaged a bit more.
1182 const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS;
1183 unsigned bool_iter = 0;
1184
1185 for (int iround = 0; iround < num_rounds; iround++)
1186 {
1187 for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++)
1188 { // Index into array, to write via pointer
1189 // Generate new random data to push through.
1190 // Generate 5 * 128 bytes all the time, even though the test for
1191 // many types use less than all that.
1192
1193 cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer(
1194 queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0,
1195 0, 0, 0);
1196
1197 if (ti.is_bool())
1198 {
1199 // For boolean, random data cast to bool isn't very random.
1200 // So use the bottom bit of bool_value_iter to get true
1201 // diversity.
1202 for (unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES;
1203 value_idx++)
1204 {
1205 write_data[value_idx] = (1 << value_idx) & bool_iter;
1206 // printf(" %s", (write_data[value_idx] ? "true" : "false"
1207 // ));
1208 }
1209 bool_iter++;
1210 }
1211 else
1212 {
1213 l_set_randomly(write_data, write_data_size, rand_state);
1214 }
1215 status = clSetKernelArg(writer, 1, sizeof(cl_uint), &iptr_idx);
1216 test_error_ret(status, "set arg", status);
1217
1218 // The value to write via the pointer should be taken from the
1219 // 5th typed slot of the write_data.
1220 status = clSetKernelArg(
1221 reader, 1, ti.get_size(),
1222 write_data + (NUM_TESTED_VALUES - 1) * ti.get_size());
1223 test_error_ret(status, "set arg", status);
1224
1225 // Determine the expected values.
1226 cl_uchar expected[read_data_size];
1227 memset(expected, -1, sizeof(expected));
1228 l_copy(expected, 0, write_data, 0, ti);
1229 l_copy(expected, 1, write_data, 1, ti);
1230 l_copy(expected, 2, write_data, 2, ti);
1231 l_copy(expected, 3, write_data, 3, ti);
1232 // But we need to take into account the value from the pointer
1233 // write. The 2 represents where the "a" array values begin in our
1234 // read-back.
1235 l_copy(expected, 2 + iptr_idx, write_data, 4, ti);
1236
1237 clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
1238
1239 if (ti.is_bool())
1240 {
1241 // Collapse down to one bit.
1242 for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
1243 expected[i] = (bool)expected[i];
1244 }
1245
1246 cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
1247 queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
1248 0, 0);
1249 memset(read_data, -1, read_data_size);
1250 clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1251
1252 // Now run the kernel
1253 const size_t one = 1;
1254 status =
1255 clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0, 0, 0);
1256 test_error_ret(status, "enqueue writer", status);
1257 status =
1258 clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
1259 test_error_ret(status, "enqueue reader", status);
1260 status = clFinish(queue);
1261 test_error_ret(status, "finish", status);
1262
1263 read_ptr = (cl_uchar*)clEnqueueMapBuffer(
1264 queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
1265 0, 0);
1266
1267 if (ti.is_bool())
1268 {
1269 // Collapse down to one bit.
1270 for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
1271 read_data[i] = (bool)read_data[i];
1272 }
1273
1274 // Compare only the valid returned bytes.
1275 int compare_result =
1276 l_compare("read-after-write", expected, read_data,
1277 NUM_TESTED_VALUES - 1, ti);
1278 // log_info("Compared %d values each of size %llu. Result %d\n",
1279 // NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(),
1280 // compare_result );
1281 err |= compare_result;
1282
1283 clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1284
1285 if (err) break;
1286 }
1287 }
1288
1289 if (CL_SUCCESS == err)
1290 {
1291 log_info("OK\n");
1292 FLUSH;
1293 }
1294 align_free(write_data);
1295 align_free(read_data);
1296 return err;
1297 }
1298
1299
1300 // Check initialization, then, read, then write, then read.
l_init_write_read(cl_device_id device,cl_context context,cl_command_queue queue)1301 static int l_init_write_read(cl_device_id device, cl_context context,
1302 cl_command_queue queue)
1303 {
1304 int status = CL_SUCCESS;
1305 int itype;
1306
1307 RandomSeed rand_state(gRandomSeed);
1308
1309 for (itype = 0; itype < num_type_info; itype++)
1310 {
1311 status = status
1312 | l_init_write_read_for_type(device, context, queue,
1313 type_info[itype], rand_state);
1314 }
1315 return status;
1316 }
l_init_write_read_for_type(cl_device_id device,cl_context context,cl_command_queue queue,const TypeInfo & ti,RandomSeed & rand_state)1317 static int l_init_write_read_for_type(cl_device_id device, cl_context context,
1318 cl_command_queue queue,
1319 const TypeInfo& ti,
1320 RandomSeed& rand_state)
1321 {
1322 int err = CL_SUCCESS;
1323 std::string type_name(ti.get_name());
1324 const char* tn = type_name.c_str();
1325 log_info(" %s ", tn);
1326
1327 StringTable ksrc;
1328 ksrc.add(l_get_fp64_pragma());
1329 ksrc.add(l_get_cles_int64_pragma());
1330 if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma());
1331 ksrc.add(conversion_functions(ti));
1332 ksrc.add(global_decls(ti, true));
1333 ksrc.add(writer_function(ti));
1334 ksrc.add(reader_function(ti));
1335
1336 int status = CL_SUCCESS;
1337 clProgramWrapper program;
1338 clKernelWrapper writer;
1339
1340 status = create_single_kernel_helper(context, &program, &writer,
1341 ksrc.num_str(), ksrc.strs(), "writer");
1342 test_error_ret(status,
1343 "Failed to create program for init-read-after-write test",
1344 status);
1345
1346 clKernelWrapper reader(clCreateKernel(program, "reader", &status));
1347 test_error_ret(
1348 status, "Failed to create reader kernel for init-read-after-write test",
1349 status);
1350
1351 // Check size query.
1352 size_t used_bytes = 0;
1353 status = clGetProgramBuildInfo(program, device,
1354 CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
1355 sizeof(used_bytes), &used_bytes, 0);
1356 test_error_ret(status, "Failed to query global variable total size",
1357 status);
1358 size_t expected_used_bytes = (NUM_TESTED_VALUES - 1)
1359 * ti.get_size() // Two regular variables and an array of 2 elements.
1360 + (l_64bit_device ? 8 : 4); // The pointer
1361 if (used_bytes < expected_used_bytes)
1362 {
1363 log_error("Error: program query for global variable total size query "
1364 "failed: Expected at least %llu but got %llu\n",
1365 (unsigned long long)expected_used_bytes,
1366 (unsigned long long)used_bytes);
1367 err |= 1;
1368 }
1369
1370 // We need to create 5 random values of the given type,
1371 // and read 4 of them back.
1372 const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
1373 const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16);
1374
1375 cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
1376 cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
1377 clMemWrapper write_mem(clCreateBuffer(
1378 context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status));
1379 test_error_ret(status, "Failed to allocate write buffer", status);
1380 clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
1381 read_data_size, read_data, &status));
1382 test_error_ret(status, "Failed to allocate read buffer", status);
1383
1384 status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem);
1385 test_error_ret(status, "set arg", status);
1386 status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem);
1387 test_error_ret(status, "set arg", status);
1388
1389 // Boolean random data needs to be massaged a bit more.
1390 const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS;
1391 unsigned bool_iter = 0;
1392
1393 // We need to count iterations. We do something *different on the
1394 // first iteration, to ensure we actually pick up the initialized
1395 // values.
1396 unsigned iteration = 0;
1397
1398 for (int iround = 0; iround < num_rounds; iround++)
1399 {
1400 for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++)
1401 { // Index into array, to write via pointer
1402 // Generate new random data to push through.
1403 // Generate 5 * 128 bytes all the time, even though the test for
1404 // many types use less than all that.
1405
1406 cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer(
1407 queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0,
1408 0, 0, 0);
1409
1410 if (ti.is_bool())
1411 {
1412 // For boolean, random data cast to bool isn't very random.
1413 // So use the bottom bit of bool_value_iter to get true
1414 // diversity.
1415 for (unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES;
1416 value_idx++)
1417 {
1418 write_data[value_idx] = (1 << value_idx) & bool_iter;
1419 // printf(" %s", (write_data[value_idx] ? "true" : "false"
1420 // ));
1421 }
1422 bool_iter++;
1423 }
1424 else
1425 {
1426 l_set_randomly(write_data, write_data_size, rand_state);
1427 }
1428 status = clSetKernelArg(writer, 1, sizeof(cl_uint), &iptr_idx);
1429 test_error_ret(status, "set arg", status);
1430
1431 if (!iteration)
1432 {
1433 // On first iteration, the value we write via the last arg
1434 // to the "reader" function is 0.
1435 // It's way easier to code the test this way.
1436 ti.init(write_data + (NUM_TESTED_VALUES - 1) * ti.get_size(),
1437 0);
1438 }
1439
1440 // The value to write via the pointer should be taken from the
1441 // 5th typed slot of the write_data.
1442 status = clSetKernelArg(
1443 reader, 1, ti.get_size(),
1444 write_data + (NUM_TESTED_VALUES - 1) * ti.get_size());
1445 test_error_ret(status, "set arg", status);
1446
1447 // Determine the expected values.
1448 cl_uchar expected[read_data_size];
1449 memset(expected, -1, sizeof(expected));
1450 if (iteration)
1451 {
1452 l_copy(expected, 0, write_data, 0, ti);
1453 l_copy(expected, 1, write_data, 1, ti);
1454 l_copy(expected, 2, write_data, 2, ti);
1455 l_copy(expected, 3, write_data, 3, ti);
1456 // But we need to take into account the value from the pointer
1457 // write. The 2 represents where the "a" array values begin in
1458 // our read-back. But we need to take into account the value
1459 // from the pointer write.
1460 l_copy(expected, 2 + iptr_idx, write_data, 4, ti);
1461 }
1462 else
1463 {
1464 // On first iteration, expect these initialized values!
1465 // See the decls_template_with_init above.
1466 ti.init(expected, 0);
1467 ti.init(expected + ti.get_size(), 1);
1468 ti.init(expected + 2 * ti.get_size(), 1);
1469 // Emulate the effect of the write via the pointer.
1470 // The value is 0, not 1 (see above).
1471 // The pointer is always initialized to the second element
1472 // of the array. So it goes into slot 3 of the "expected" array.
1473 ti.init(expected + 3 * ti.get_size(), 0);
1474 }
1475
1476 if (ti.is_bool())
1477 {
1478 // Collapse down to one bit.
1479 for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
1480 expected[i] = (bool)expected[i];
1481 }
1482
1483 clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
1484
1485 cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
1486 queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
1487 0, 0);
1488 memset(read_data, -1, read_data_size);
1489 clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1490
1491 // Now run the kernel
1492 const size_t one = 1;
1493 if (iteration)
1494 {
1495 status = clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0,
1496 0, 0);
1497 test_error_ret(status, "enqueue writer", status);
1498 }
1499 else
1500 {
1501 // On first iteration, we should be picking up the
1502 // initialized value. So don't enqueue the writer.
1503 }
1504 status =
1505 clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
1506 test_error_ret(status, "enqueue reader", status);
1507 status = clFinish(queue);
1508 test_error_ret(status, "finish", status);
1509
1510 read_ptr = (cl_uchar*)clEnqueueMapBuffer(
1511 queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
1512 0, 0);
1513
1514 if (ti.is_bool())
1515 {
1516 // Collapse down to one bit.
1517 for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
1518 read_data[i] = (bool)read_data[i];
1519 }
1520
1521 // Compare only the valid returned bytes.
1522 // log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
1523 int compare_result =
1524 l_compare("init-write-read", expected, read_data,
1525 NUM_TESTED_VALUES - 1, ti);
1526 // log_info("Compared %d values each of size %llu. Result %d\n",
1527 // NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(),
1528 // compare_result );
1529 err |= compare_result;
1530
1531 clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
1532
1533 if (err) break;
1534
1535 iteration++;
1536 }
1537 }
1538
1539 if (CL_SUCCESS == err)
1540 {
1541 log_info("OK\n");
1542 FLUSH;
1543 }
1544 align_free(write_data);
1545 align_free(read_data);
1546
1547 return err;
1548 }
1549
1550
1551 // Check that we can make at least one variable with size
1552 // max_size which is returned from the device info property :
1553 // CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
l_capacity(cl_device_id device,cl_context context,cl_command_queue queue,size_t max_size)1554 static int l_capacity(cl_device_id device, cl_context context,
1555 cl_command_queue queue, size_t max_size)
1556 {
1557 int err = CL_SUCCESS;
1558 // Just test one type.
1559 const TypeInfo ti(l_find_type("uchar"));
1560 log_info(" l_capacity...");
1561
1562 const char prog_src_template[] =
1563 #if defined(_WIN32)
1564 "uchar var[%Iu];\n\n"
1565 #else
1566 "uchar var[%zu];\n\n"
1567 #endif
1568 "kernel void get_max_size( global ulong* size_ret ) {\n"
1569 #if defined(_WIN32)
1570 " *size_ret = (ulong)%Iu;\n"
1571 #else
1572 " *size_ret = (ulong)%zu;\n"
1573 #endif
1574 "}\n\n"
1575 "kernel void writer( global uchar* src ) {\n"
1576 " var[get_global_id(0)] = src[get_global_linear_id()];\n"
1577 "}\n\n"
1578 "kernel void reader( global uchar* dest ) {\n"
1579 " dest[get_global_linear_id()] = var[get_global_id(0)];\n"
1580 "}\n\n";
1581 char prog_src[MAX_STR];
1582 int num_printed = snprintf(prog_src, sizeof(prog_src), prog_src_template,
1583 max_size, max_size);
1584 assert(num_printed < MAX_STR); // or increase MAX_STR
1585 (void)num_printed;
1586
1587 StringTable ksrc;
1588 ksrc.add(prog_src);
1589
1590 int status = CL_SUCCESS;
1591 clProgramWrapper program;
1592 clKernelWrapper get_max_size;
1593
1594 status = create_single_kernel_helper(context, &program, &get_max_size,
1595 ksrc.num_str(), ksrc.strs(),
1596 "get_max_size");
1597 test_error_ret(status, "Failed to create program for capacity test",
1598 status);
1599
1600 // Check size query.
1601 size_t used_bytes = 0;
1602 status = clGetProgramBuildInfo(program, device,
1603 CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
1604 sizeof(used_bytes), &used_bytes, 0);
1605 test_error_ret(status, "Failed to query global variable total size",
1606 status);
1607 if (used_bytes < max_size)
1608 {
1609 log_error("Error: program query for global variable total size query "
1610 "failed: Expected at least %llu but got %llu\n",
1611 (unsigned long long)max_size, (unsigned long long)used_bytes);
1612 err |= 1;
1613 }
1614
1615 // Prepare to execute
1616 clKernelWrapper writer(clCreateKernel(program, "writer", &status));
1617 test_error_ret(status, "Failed to create writer kernel for capacity test",
1618 status);
1619 clKernelWrapper reader(clCreateKernel(program, "reader", &status));
1620 test_error_ret(status, "Failed to create reader kernel for capacity test",
1621 status);
1622
1623 cl_ulong max_size_ret = 0;
1624 const size_t arr_size = 10 * 1024 * 1024;
1625 cl_uchar* buffer = (cl_uchar*)align_malloc(arr_size, ALIGNMENT);
1626
1627 if (!buffer)
1628 {
1629 log_error("Failed to allocate buffer\n");
1630 return 1;
1631 }
1632
1633 clMemWrapper max_size_ret_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
1634 sizeof(max_size_ret),
1635 &max_size_ret, &status));
1636 test_error_ret(status, "Failed to allocate size query buffer", status);
1637 clMemWrapper buffer_mem(
1638 clCreateBuffer(context, CL_MEM_READ_WRITE, arr_size, 0, &status));
1639 test_error_ret(status, "Failed to allocate write buffer", status);
1640
1641 status = clSetKernelArg(get_max_size, 0, sizeof(cl_mem), &max_size_ret_mem);
1642 test_error_ret(status, "set arg", status);
1643 status = clSetKernelArg(writer, 0, sizeof(cl_mem), &buffer_mem);
1644 test_error_ret(status, "set arg", status);
1645 status = clSetKernelArg(reader, 0, sizeof(cl_mem), &buffer_mem);
1646 test_error_ret(status, "set arg", status);
1647
1648 // Check the macro value of CL_DEVICE_MAX_GLOBAL_VARIABLE
1649 const size_t one = 1;
1650 status =
1651 clEnqueueNDRangeKernel(queue, get_max_size, 1, 0, &one, 0, 0, 0, 0);
1652 test_error_ret(status, "enqueue size query", status);
1653 status = clFinish(queue);
1654 test_error_ret(status, "finish", status);
1655
1656 cl_uchar* max_size_ret_ptr = (cl_uchar*)clEnqueueMapBuffer(
1657 queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret),
1658 0, 0, 0, 0);
1659 if (max_size_ret != max_size)
1660 {
1661 log_error("Error: preprocessor definition for "
1662 "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not "
1663 "match device query value %llu\n",
1664 (unsigned long long)max_size_ret,
1665 (unsigned long long)max_size);
1666 err |= 1;
1667 }
1668 clEnqueueUnmapMemObject(queue, max_size_ret_mem, max_size_ret_ptr, 0, 0, 0);
1669
1670 RandomSeed rand_state_write(gRandomSeed);
1671 for (size_t offset = 0; offset < max_size; offset += arr_size)
1672 {
1673 size_t curr_size =
1674 (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
1675 l_set_randomly(buffer, curr_size, rand_state_write);
1676 status = clEnqueueWriteBuffer(queue, buffer_mem, CL_TRUE, 0, curr_size,
1677 buffer, 0, 0, 0);
1678 test_error_ret(status, "populate buffer_mem object", status);
1679 status = clEnqueueNDRangeKernel(queue, writer, 1, &offset, &curr_size,
1680 0, 0, 0, 0);
1681 test_error_ret(status, "enqueue writer", status);
1682 status = clFinish(queue);
1683 test_error_ret(status, "finish", status);
1684 }
1685
1686 RandomSeed rand_state_read(gRandomSeed);
1687 for (size_t offset = 0; offset < max_size; offset += arr_size)
1688 {
1689 size_t curr_size =
1690 (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
1691 status = clEnqueueNDRangeKernel(queue, reader, 1, &offset, &curr_size,
1692 0, 0, 0, 0);
1693 test_error_ret(status, "enqueue reader", status);
1694 cl_uchar* read_mem_ptr = (cl_uchar*)clEnqueueMapBuffer(
1695 queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0,
1696 &status);
1697 test_error_ret(status, "map read data", status);
1698 l_set_randomly(buffer, curr_size, rand_state_read);
1699 err |= l_compare("capacity", buffer, read_mem_ptr, curr_size, ti);
1700 clEnqueueUnmapMemObject(queue, buffer_mem, read_mem_ptr, 0, 0, 0);
1701 }
1702
1703 if (CL_SUCCESS == err)
1704 {
1705 log_info("OK\n");
1706 FLUSH;
1707 }
1708 align_free(buffer);
1709
1710 return err;
1711 }
1712
1713
1714 // Check operation on a user type.
l_user_type(cl_device_id device,cl_context context,cl_command_queue queue,bool separate_compile)1715 static int l_user_type(cl_device_id device, cl_context context,
1716 cl_command_queue queue, bool separate_compile)
1717 {
1718 int err = CL_SUCCESS;
1719 // Just test one type.
1720 const TypeInfo ti(l_find_type("uchar"));
1721 log_info(" l_user_type %s...",
1722 separate_compile ? "separate compilation"
1723 : "single source compilation");
1724
1725 if (separate_compile && !l_linker_available)
1726 {
1727 log_info("Separate compilation is not supported. Skipping test\n");
1728 return err;
1729 }
1730
1731 const char type_src[] =
1732 "typedef struct { uchar c; uint i; } my_struct_t;\n\n";
1733 const char def_src[] = "my_struct_t var = { 'a', 42 };\n\n";
1734 const char decl_src[] = "extern my_struct_t var;\n\n";
1735
1736 // Don't use a host struct. We can't guarantee that the host
1737 // compiler has the same structure layout as the device compiler.
1738 const char writer_src[] = "kernel void writer( uchar c, uint i ) {\n"
1739 " var.c = c;\n"
1740 " var.i = i;\n"
1741 "}\n\n";
1742 const char reader_src[] =
1743 "kernel void reader( global uchar* C, global uint* I ) {\n"
1744 " *C = var.c;\n"
1745 " *I = var.i;\n"
1746 "}\n\n";
1747
1748 clProgramWrapper program;
1749
1750 const std::string options = get_build_options(device);
1751
1752 if (separate_compile)
1753 {
1754 // Separate compilation flow.
1755 StringTable wksrc;
1756 wksrc.add(type_src);
1757 wksrc.add(def_src);
1758 wksrc.add(writer_src);
1759
1760 StringTable rksrc;
1761 rksrc.add(type_src);
1762 rksrc.add(decl_src);
1763 rksrc.add(reader_src);
1764
1765 int status = CL_SUCCESS;
1766 clProgramWrapper writer_program(clCreateProgramWithSource(
1767 context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status));
1768 test_error_ret(status,
1769 "Failed to create writer program for user type test",
1770 status);
1771
1772 status = clCompileProgram(writer_program, 1, &device, options.c_str(),
1773 0, 0, 0, 0, 0);
1774 if (check_error(
1775 status,
1776 "Failed to compile writer program for user type test (%s)",
1777 IGetErrorString(status)))
1778 {
1779 print_build_log(writer_program, 1, &device, wksrc.num_str(),
1780 wksrc.strs(), wksrc.lengths(), options.c_str());
1781 return status;
1782 }
1783
1784 clProgramWrapper reader_program(clCreateProgramWithSource(
1785 context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status));
1786 test_error_ret(status,
1787 "Failed to create reader program for user type test",
1788 status);
1789
1790 status = clCompileProgram(reader_program, 1, &device, options.c_str(),
1791 0, 0, 0, 0, 0);
1792 if (check_error(
1793 status,
1794 "Failed to compile reader program for user type test (%s)",
1795 IGetErrorString(status)))
1796 {
1797 print_build_log(reader_program, 1, &device, rksrc.num_str(),
1798 rksrc.strs(), rksrc.lengths(), options.c_str());
1799 return status;
1800 }
1801
1802 cl_program progs[2];
1803 progs[0] = writer_program;
1804 progs[1] = reader_program;
1805
1806 program =
1807 clLinkProgram(context, 1, &device, "", 2, progs, 0, 0, &status);
1808 if (check_error(status,
1809 "Failed to link program for user type test (%s)",
1810 IGetErrorString(status)))
1811 {
1812 print_build_log(program, 1, &device, 0, NULL, NULL, "");
1813 return status;
1814 }
1815 }
1816 else
1817 {
1818 // Single compilation flow.
1819 StringTable ksrc;
1820 ksrc.add(type_src);
1821 ksrc.add(def_src);
1822 ksrc.add(writer_src);
1823 ksrc.add(reader_src);
1824
1825 int status = CL_SUCCESS;
1826
1827 status = create_single_kernel_helper_create_program(
1828 context, &program, ksrc.num_str(), ksrc.strs(), options.c_str());
1829 if (check_error(status,
1830 "Failed to build program for user type test (%s)",
1831 IGetErrorString(status)))
1832 {
1833 print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
1834 ksrc.lengths(), options.c_str());
1835 return status;
1836 }
1837
1838 status = clBuildProgram(program, 1, &device, options.c_str(), 0, 0);
1839 if (check_error(status,
1840 "Failed to compile program for user type test (%s)",
1841 IGetErrorString(status)))
1842 {
1843 print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
1844 ksrc.lengths(), options.c_str());
1845 return status;
1846 }
1847 }
1848
1849
1850 // Check size query.
1851 size_t used_bytes = 0;
1852 int status = clGetProgramBuildInfo(
1853 program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
1854 sizeof(used_bytes), &used_bytes, 0);
1855 test_error_ret(status, "Failed to query global variable total size",
1856 status);
1857 size_t expected_size = sizeof(cl_uchar) + sizeof(cl_uint);
1858 if (used_bytes < expected_size)
1859 {
1860 log_error("Error: program query for global variable total size query "
1861 "failed: Expected at least %llu but got %llu\n",
1862 (unsigned long long)expected_size,
1863 (unsigned long long)used_bytes);
1864 err |= 1;
1865 }
1866
1867 // Prepare to execute
1868 clKernelWrapper writer(clCreateKernel(program, "writer", &status));
1869 test_error_ret(status, "Failed to create writer kernel for user type test",
1870 status);
1871 clKernelWrapper reader(clCreateKernel(program, "reader", &status));
1872 test_error_ret(status, "Failed to create reader kernel for user type test",
1873 status);
1874
1875 // Set up data.
1876 cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT);
1877 cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT);
1878
1879 clMemWrapper uchar_mem(clCreateBuffer(
1880 context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status));
1881 test_error_ret(status, "Failed to allocate uchar buffer", status);
1882 clMemWrapper uint_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
1883 sizeof(cl_uint), uint_data, &status));
1884 test_error_ret(status, "Failed to allocate uint buffer", status);
1885
1886 status = clSetKernelArg(reader, 0, sizeof(cl_mem), &uchar_mem);
1887 test_error_ret(status, "set arg", status);
1888 status = clSetKernelArg(reader, 1, sizeof(cl_mem), &uint_mem);
1889 test_error_ret(status, "set arg", status);
1890
1891 cl_uchar expected_uchar = 'a';
1892 cl_uint expected_uint = 42;
1893 for (unsigned iter = 0; iter < 5; iter++)
1894 { // Must go around at least twice
1895 // Read back data
1896 *uchar_data = -1;
1897 *uint_data = -1;
1898 const size_t one = 1;
1899 status = clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
1900 test_error_ret(status, "enqueue reader", status);
1901 status = clFinish(queue);
1902 test_error_ret(status, "finish", status);
1903
1904 cl_uchar* uint_data_ptr =
1905 (cl_uchar*)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ,
1906 0, sizeof(cl_uint), 0, 0, 0, 0);
1907 cl_uchar* uchar_data_ptr = (cl_uchar*)clEnqueueMapBuffer(
1908 queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0,
1909 0, 0);
1910
1911 if (expected_uchar != *uchar_data || expected_uint != *uint_data)
1912 {
1913 log_error(
1914 "FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
1915 iter, (int)*uchar_data, *uint_data, (int)expected_uchar,
1916 expected_uint);
1917 err |= 1;
1918 }
1919
1920 clEnqueueUnmapMemObject(queue, uint_mem, uint_data_ptr, 0, 0, 0);
1921 clEnqueueUnmapMemObject(queue, uchar_mem, uchar_data_ptr, 0, 0, 0);
1922
1923 // Mutate the data.
1924 expected_uchar++;
1925 expected_uint++;
1926
1927 // Write the new values into persistent store.
1928 *uchar_data = expected_uchar;
1929 *uint_data = expected_uint;
1930 status = clSetKernelArg(writer, 0, sizeof(cl_uchar), uchar_data);
1931 test_error_ret(status, "set arg", status);
1932 status = clSetKernelArg(writer, 1, sizeof(cl_uint), uint_data);
1933 test_error_ret(status, "set arg", status);
1934 status = clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0, 0, 0);
1935 test_error_ret(status, "enqueue writer", status);
1936 status = clFinish(queue);
1937 test_error_ret(status, "finish", status);
1938 }
1939
1940 if (CL_SUCCESS == err)
1941 {
1942 log_info("OK\n");
1943 FLUSH;
1944 }
1945 align_free(uchar_data);
1946 align_free(uint_data);
1947 return err;
1948 }
1949
get_build_options(cl_device_id device)1950 static std::string get_build_options(cl_device_id device)
1951 {
1952 std::string options = "-cl-std=CL";
1953 Version latest_cl_c_version = get_device_latest_cl_c_version(device);
1954 options += latest_cl_c_version.to_string();
1955 return options;
1956 }
1957
1958 // Determines whether its valid to skip this test based on the driver version
1959 // and the features it optionally supports.
1960 // Whether the test should be skipped is writen into the out paramter skip.
1961 // The check returns an error code for the clDeviceInfo query.
should_skip(cl_device_id device,cl_bool & skip)1962 static cl_int should_skip(cl_device_id device, cl_bool& skip)
1963 {
1964 // Assume we can't skip to begin with.
1965 skip = CL_FALSE;
1966
1967 // Progvar tests are already skipped for OpenCL < 2.0, so here we only need
1968 // to test for 3.0 since that is when program scope global variables become
1969 // optional.
1970 if (get_device_cl_version(device) >= Version(3, 0))
1971 {
1972 size_t max_global_variable_size{};
1973 test_error(clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
1974 sizeof(max_global_variable_size),
1975 &max_global_variable_size, nullptr),
1976 "clGetDeviceInfo failed");
1977 skip = (max_global_variable_size != 0) ? CL_FALSE : CL_TRUE;
1978 }
1979 return CL_SUCCESS;
1980 }
1981
1982 ////////////////////
1983 // Global functions
1984
1985
1986 // Test support for variables at program scope. Miscellaneous
test_progvar_prog_scope_misc(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1987 int test_progvar_prog_scope_misc(cl_device_id device, cl_context context,
1988 cl_command_queue queue, int num_elements)
1989 {
1990 cl_bool skip{ CL_FALSE };
1991 auto error = should_skip(device, skip);
1992 if (CL_SUCCESS != error)
1993 {
1994 return TEST_FAIL;
1995 }
1996 if (skip)
1997 {
1998 log_info("Skipping progvar_prog_scope_misc since it is optionally not "
1999 "supported on this device\n");
2000 return TEST_SKIPPED_ITSELF;
2001 }
2002 size_t max_size = 0;
2003 size_t pref_size = 0;
2004
2005 cl_int err = CL_SUCCESS;
2006
2007 err = l_get_device_info(device, &max_size, &pref_size);
2008 err |= l_build_type_table(device);
2009
2010 err |= l_capacity(device, context, queue, max_size);
2011 err |= l_user_type(device, context, queue, false);
2012 err |= l_user_type(device, context, queue, true);
2013
2014 return err;
2015 }
2016
2017
2018 // Test support for variables at program scope. Unitialized data
test_progvar_prog_scope_uninit(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)2019 int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context,
2020 cl_command_queue queue, int num_elements)
2021 {
2022 cl_bool skip{ CL_FALSE };
2023 auto error = should_skip(device, skip);
2024 if (CL_SUCCESS != error)
2025 {
2026 return TEST_FAIL;
2027 }
2028 if (skip)
2029 {
2030 log_info(
2031 "Skipping progvar_prog_scope_uninit since it is optionally not "
2032 "supported on this device\n");
2033 return TEST_SKIPPED_ITSELF;
2034 }
2035 size_t max_size = 0;
2036 size_t pref_size = 0;
2037
2038 cl_int err = CL_SUCCESS;
2039
2040 err = l_get_device_info(device, &max_size, &pref_size);
2041 err |= l_build_type_table(device);
2042
2043 err |= l_write_read(device, context, queue);
2044
2045 return err;
2046 }
2047
2048 // Test support for variables at program scope. Initialized data.
test_progvar_prog_scope_init(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)2049 int test_progvar_prog_scope_init(cl_device_id device, cl_context context,
2050 cl_command_queue queue, int num_elements)
2051 {
2052 cl_bool skip{ CL_FALSE };
2053 auto error = should_skip(device, skip);
2054 if (CL_SUCCESS != error)
2055 {
2056 return TEST_FAIL;
2057 }
2058 if (skip)
2059 {
2060 log_info("Skipping progvar_prog_scope_init since it is optionally not "
2061 "supported on this device\n");
2062 return TEST_SKIPPED_ITSELF;
2063 }
2064 size_t max_size = 0;
2065 size_t pref_size = 0;
2066
2067 cl_int err = CL_SUCCESS;
2068
2069 err = l_get_device_info(device, &max_size, &pref_size);
2070 err |= l_build_type_table(device);
2071
2072 err |= l_init_write_read(device, context, queue);
2073
2074 return err;
2075 }
2076
2077
2078 // A simple test for support of static variables inside a kernel.
test_progvar_func_scope(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)2079 int test_progvar_func_scope(cl_device_id device, cl_context context,
2080 cl_command_queue queue, int num_elements)
2081 {
2082 cl_bool skip{ CL_FALSE };
2083 auto error = should_skip(device, skip);
2084 if (CL_SUCCESS != error)
2085 {
2086 return TEST_FAIL;
2087 }
2088 if (skip)
2089 {
2090 log_info("Skipping progvar_func_scope since it is optionally not "
2091 "supported on this device\n");
2092 return TEST_SKIPPED_ITSELF;
2093 }
2094
2095 cl_int err = CL_SUCCESS;
2096
2097 // Deliberately have two variables with the same name but in different
2098 // scopes.
2099 // Also, use a large initialized structure in both cases.
2100 // clang-format off
2101 const char prog_src[] =
2102 "typedef struct { char c; int16 i; } mystruct_t;\n"
2103 "kernel void test_bump(global int* value, int which) {\n"
2104 " if (which) {\n"
2105 // Explicit address space.
2106 // Last element set to 0
2107 " static global mystruct_t persistent = { 'a', (int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
2108 " *value = persistent.i.sf++;\n"
2109 " } else {\n"
2110 // Implicitly global
2111 // Last element set to 100
2112 " static mystruct_t persistent = { 'b' , (int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
2113 " *value = persistent.i.sf++;\n"
2114 " }\n"
2115 "}\n";
2116 // clang-format on
2117
2118 StringTable ksrc;
2119 ksrc.add(prog_src);
2120
2121 int status = CL_SUCCESS;
2122 clProgramWrapper program;
2123 clKernelWrapper test_bump;
2124
2125 status =
2126 create_single_kernel_helper(context, &program, &test_bump,
2127 ksrc.num_str(), ksrc.strs(), "test_bump");
2128 test_error_ret(status,
2129 "Failed to create program for function static variable test",
2130 status);
2131
2132 // Check size query.
2133 size_t used_bytes = 0;
2134 status = clGetProgramBuildInfo(program, device,
2135 CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
2136 sizeof(used_bytes), &used_bytes, 0);
2137 test_error_ret(status, "Failed to query global variable total size",
2138 status);
2139 size_t expected_size = 2 * sizeof(cl_int); // Two ints.
2140 if (used_bytes < expected_size)
2141 {
2142 log_error("Error: program query for global variable total size query "
2143 "failed: Expected at least %llu but got %llu\n",
2144 (unsigned long long)expected_size,
2145 (unsigned long long)used_bytes);
2146 err |= 1;
2147 }
2148
2149 // Prepare the data.
2150 cl_int counter_value = 0;
2151 clMemWrapper counter_value_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
2152 sizeof(counter_value),
2153 &counter_value, &status));
2154 test_error_ret(status, "Failed to allocate counter query buffer", status);
2155
2156 status = clSetKernelArg(test_bump, 0, sizeof(cl_mem), &counter_value_mem);
2157 test_error_ret(status, "set arg", status);
2158
2159 // Go a few rounds, alternating between the two counters in the kernel.
2160
2161 // Same as initial values in kernel.
2162 // But "true" which increments the 0-based counter, and "false" which
2163 // increments the 100-based counter.
2164 cl_int expected_counter[2] = { 100, 0 };
2165
2166 const size_t one = 1;
2167 for (int iround = 0; iround < 5; iround++)
2168 { // Must go at least twice around
2169 for (int iwhich = 0; iwhich < 2; iwhich++)
2170 { // Cover both counters
2171 status = clSetKernelArg(test_bump, 1, sizeof(iwhich), &iwhich);
2172 test_error_ret(status, "set arg", status);
2173 status = clEnqueueNDRangeKernel(queue, test_bump, 1, 0, &one, 0, 0,
2174 0, 0);
2175 test_error_ret(status, "enqueue test_bump", status);
2176 status = clFinish(queue);
2177 test_error_ret(status, "finish", status);
2178
2179 cl_uchar* counter_value_ptr = (cl_uchar*)clEnqueueMapBuffer(
2180 queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0,
2181 sizeof(counter_value), 0, 0, 0, 0);
2182
2183 if (counter_value != expected_counter[iwhich])
2184 {
2185 log_error(
2186 "Error: Round %d on counter %d: Expected %d but got %d\n",
2187 iround, iwhich, expected_counter[iwhich], counter_value);
2188 err |= 1;
2189 }
2190 expected_counter[iwhich]++; // Emulate behaviour of the kernel.
2191
2192 clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr,
2193 0, 0, 0);
2194 }
2195 }
2196
2197 if (CL_SUCCESS == err)
2198 {
2199 log_info("OK\n");
2200 FLUSH;
2201 }
2202
2203 return err;
2204 }
2205