1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/testHarness.h"
17 #include "harness/typeWrappers.h"
18 #include "base.h"
19
20 #include <string>
21 #include <vector>
22 #include <algorithm>
23 #include <sstream>
24
25 typedef enum {
26 ARG_TYPE_NONE,
27
28 ARG_TYPE_HOST_PTR,
29 ARG_TYPE_HOST_LOCAL,
30
31 ARG_TYPE_COARSE_GRAINED_SVM,
32 ARG_TYPE_FINE_GRAINED_BUFFER_SVM,
33 ARG_TYPE_FINE_GRAINED_SYSTEM_SVM,
34 ARG_TYPE_ATOMICS_SVM
35 } ExtraKernelArgMemType;
36
37 class CSVMWrapper {
38 public:
CSVMWrapper()39 CSVMWrapper() : ptr_(NULL), context_(NULL) { }
40
Attach(cl_context context,void * ptr)41 void Attach(cl_context context, void *ptr) {
42 context_ = context;
43 ptr_ = ptr;
44 }
45
~CSVMWrapper()46 ~CSVMWrapper() {
47 if (ptr_)
48 clSVMFree(context_, ptr_);
49 }
50
operator void*()51 operator void *() {
52 return ptr_;
53 }
54
55 private:
56 void *ptr_;
57 cl_context context_;
58 };
59
60 class CAdvancedTest : public CTest {
61 public:
CAdvancedTest(const std::vector<std::string> & kernel)62 CAdvancedTest(const std::vector<std::string>& kernel) : CTest(), _kernels(kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
63
64 }
65
CAdvancedTest(const std::string & library,const std::vector<std::string> & kernel)66 CAdvancedTest(const std::string& library, const std::vector<std::string>& kernel) : CTest(), _libraryCode(library), _kernels(kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
67
68 }
69
CAdvancedTest(const std::string & kernel,ExtraKernelArgMemType argType=ARG_TYPE_NONE)70 CAdvancedTest(const std::string& kernel, ExtraKernelArgMemType argType = ARG_TYPE_NONE) : CTest(), _kernels(1, kernel), _extraKernelArgMemType(argType) {
71
72 }
73
CAdvancedTest(const std::string & library,const std::string & kernel)74 CAdvancedTest(const std::string& library, const std::string& kernel) : CTest(), _libraryCode(library), _kernels(1, kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
75
76 }
77
PrintCompilationLog(cl_program program,cl_device_id device)78 int PrintCompilationLog(cl_program program, cl_device_id device) {
79 cl_int error;
80 size_t buildLogSize = 0;
81
82 error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize);
83 test_error(error, "clGetProgramBuildInfo failed");
84
85 std::string log;
86 log.resize(buildLogSize);
87
88 error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, buildLogSize, &log[0], NULL);
89 test_error(error, "clGetProgramBuildInfo failed");
90
91 log_error("Build log for device is:\n------------\n");
92 log_error("%s\n", log.c_str() );
93 log_error( "\n----------\n" );
94
95 return CL_SUCCESS;
96 }
97
ExecuteSubcase(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,const std::string & src)98 int ExecuteSubcase(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const std::string& src) {
99 cl_int error;
100
101 clProgramWrapper program, preCompiledLibrary, library, finalProgram;
102 clKernelWrapper kernel;
103
104 const char *srcPtr = src.c_str();
105
106 if (!_libraryCode.empty()) {
107 program = clCreateProgramWithSource(context, 1, &srcPtr, NULL, &error);
108 test_error(error, "clCreateProgramWithSource failed");
109
110 // Use the latest OpenCL-C version supported by the device. This
111 // allows calling code to force a particular CL C version if it is
112 // required, but also means that callers need not specify a version
113 // if they want to assume the most recent CL C.
114
115 auto version = get_max_OpenCL_C_for_context(context);
116
117 const char* cl_std = nullptr;
118 if (version >= Version(3, 0))
119 {
120 cl_std = "-cl-std=CL3.0";
121 }
122 else if (version >= Version(2, 0) && version < Version(3, 0))
123 {
124 cl_std = "-cl-std=CL2.0";
125 }
126 else
127 {
128 // If the -cl-std build option is not specified, the highest
129 // OpenCL C 1.x language version supported by each device is
130 // used when compiling the program for each device.
131 cl_std = "";
132 }
133
134 error = clCompileProgram(program, 1, &deviceID, cl_std, 0, NULL,
135 NULL, NULL, NULL);
136
137 if (error != CL_SUCCESS)
138 PrintCompilationLog(program, deviceID);
139 test_error(error, "clCompileProgram failed");
140
141 const char *srcPtrLibrary = _libraryCode.c_str();
142
143 preCompiledLibrary = clCreateProgramWithSource(context, 1, &srcPtrLibrary, NULL, &error);
144 test_error(error, "clCreateProgramWithSource failed");
145
146 error = clCompileProgram(preCompiledLibrary, 1, &deviceID, cl_std,
147 0, NULL, NULL, NULL, NULL);
148
149 if (error != CL_SUCCESS)
150 PrintCompilationLog(preCompiledLibrary, deviceID);
151 test_error(error, "clCompileProgram failed");
152
153 library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &preCompiledLibrary, NULL, NULL, &error);
154 test_error(error, "clLinkProgram failed");
155
156 cl_program objects[] = { program, library };
157 finalProgram = clLinkProgram(context, 1, &deviceID, "", 2, objects, NULL, NULL, &error);
158 test_error(error, "clLinkProgram failed");
159
160 kernel = clCreateKernel(finalProgram, "testKernel", &error);
161 test_error(error, "clCreateKernel failed");
162 }
163
164 else {
165 if (create_single_kernel_helper(context, &program, &kernel, 1,
166 &srcPtr, "testKernel"))
167 {
168 log_error("create_single_kernel_helper failed\n");
169 return -1;
170 }
171 }
172
173 size_t bufferSize = num_elements * sizeof(cl_uint);
174 clMemWrapper buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bufferSize, NULL, &error);
175 test_error(error, "clCreateBuffer failed");
176
177 error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
178 test_error(error, "clSetKernelArg(0) failed");
179
180 // Warning: the order below is very important as SVM buffer cannot be free'd before corresponding mem_object
181 CSVMWrapper svmWrapper;
182 clMemWrapper extraArg;
183 std::vector<cl_uint> extraArgData(num_elements);
184 for (cl_uint i = 0; i < (cl_uint)num_elements; i++)
185 extraArgData[i] = i;
186
187 if (_extraKernelArgMemType != ARG_TYPE_NONE) {
188 if (_extraKernelArgMemType == ARG_TYPE_HOST_PTR) {
189 extraArg = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, bufferSize, &extraArgData[0], &error);
190 test_error(error, "clCreateBuffer failed");
191 }
192
193 else {
194 void *ptr = NULL;
195
196 switch (_extraKernelArgMemType) {
197 case ARG_TYPE_COARSE_GRAINED_SVM:
198 ptr = clSVMAlloc(context, CL_MEM_READ_WRITE, bufferSize, 0);
199 break;
200 case ARG_TYPE_FINE_GRAINED_BUFFER_SVM:
201 ptr = clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_READ_WRITE, bufferSize, 0);
202 break;
203 case ARG_TYPE_FINE_GRAINED_SYSTEM_SVM:
204 ptr = &extraArgData[0];
205 break;
206 case ARG_TYPE_ATOMICS_SVM:
207 ptr = clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS | CL_MEM_READ_WRITE, bufferSize, 0);
208 break;
209 default:
210 break;
211 }
212
213 if(_extraKernelArgMemType != ARG_TYPE_HOST_LOCAL) {
214 if (!ptr) {
215 log_error("Allocation failed\n");
216 return -1;
217 }
218
219 if (_extraKernelArgMemType != ARG_TYPE_FINE_GRAINED_SYSTEM_SVM) {
220 svmWrapper.Attach(context, ptr);
221 }
222
223 if (_extraKernelArgMemType == ARG_TYPE_COARSE_GRAINED_SVM) {
224 error = clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_WRITE, ptr, bufferSize, 0, NULL, NULL);
225 test_error(error, "clEnqueueSVMMap failed");
226 }
227
228 memcpy(ptr, &extraArgData[0], bufferSize);
229
230 if (_extraKernelArgMemType == ARG_TYPE_COARSE_GRAINED_SVM) {
231 error = clEnqueueSVMUnmap(queue, ptr, 0, NULL, NULL);
232 test_error(error, "clEnqueueSVMUnmap failed");
233 clFinish(queue);
234 }
235
236 extraArg = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, bufferSize, ptr, &error);
237 test_error(error, "clCreateBuffer from SVM buffer failed");
238 }
239 }
240
241 if(_extraKernelArgMemType == ARG_TYPE_HOST_LOCAL)
242 error = clSetKernelArg(kernel, 1, bufferSize, NULL);
243 else
244 error = clSetKernelArg(kernel, 1, sizeof(extraArg), &extraArg);
245
246
247 test_error(error, "clSetKernelArg(1) failed");
248 }
249
250 size_t globalWorkGroupSize = num_elements;
251 size_t localWorkGroupSize = 0;
252 error = get_max_common_work_group_size(context, kernel, globalWorkGroupSize, &localWorkGroupSize);
253 test_error(error, "Unable to get common work group size");
254
255 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalWorkGroupSize, &localWorkGroupSize, 0, NULL, NULL);
256 test_error(error, "clEnqueueNDRangeKernel failed");
257
258 // verify results
259 std::vector<cl_uint> results(num_elements);
260
261 error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, bufferSize, &results[0], 0, NULL, NULL);
262 test_error(error, "clEnqueueReadBuffer failed");
263
264 size_t passCount = std::count(results.begin(), results.end(), 1);
265 if (passCount != results.size()) {
266 std::vector<cl_uint>::iterator iter = std::find(results.begin(), results.end(), 0);
267 log_error("Verification on device failed at index %ld\n", std::distance(results.begin(), iter));
268 log_error("%ld out of %ld failed\n", (results.size()-passCount), results.size());
269 return -1;
270 }
271
272 return CL_SUCCESS;
273 }
274
Execute(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)275 int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
276 cl_int result = CL_SUCCESS;
277
278 for (std::vector<std::string>::const_iterator it = _kernels.begin(); it != _kernels.end(); ++it) {
279 log_info("Executing subcase #%ld out of %ld\n", (it - _kernels.begin() + 1), _kernels.size());
280
281 result |= ExecuteSubcase(deviceID, context, queue, num_elements, *it);
282 }
283
284 return result;
285 }
286
287 private:
288 const std::string _libraryCode;
289 const std::vector<std::string> _kernels;
290 const ExtraKernelArgMemType _extraKernelArgMemType;
291 };
292
test_library_function(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)293 int test_library_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
294 const std::string LIBRARY_FUNCTION = common::CONFORMANCE_VERIFY_FENCE +
295 NL
296 NL "bool helperFunction(float *floatp, float val) {"
297 NL " if (!isFenceValid(get_fence(floatp)))"
298 NL " return false;"
299 NL
300 NL " if (*floatp != val)"
301 NL " return false;"
302 NL
303 NL " return true;"
304 NL "}"
305 NL;
306
307 const std::string KERNEL_FUNCTION = R"OpenCLC(
308 extern bool helperFunction(float *floatp, float val);
309
310 #ifdef __opencl_c_program_scope_global_variables
311 __global float gfloat = 1.0f;
312 #endif
313
314 __kernel void testKernel(__global uint *results) {
315 uint tid = get_global_id(0);
316
317 #ifdef __opencl_c_program_scope_global_variables
318 __global float *gfloatp = &gfloat;
319 #endif
320 __local float lfloat;
321 lfloat = 2.0f;
322 __local float *lfloatp = &lfloat;
323 float pfloat = 3.0f;
324 __private float *pfloatp = &pfloat;
325
326 uint failures = 0;
327
328 #ifdef __opencl_c_program_scope_global_variables
329 failures += helperFunction(gfloatp, gfloat) ? 0 : 1;
330 #endif
331 failures += helperFunction(lfloatp, lfloat) ? 0 : 1;
332 failures += helperFunction(pfloatp, pfloat) ? 0 : 1;
333
334 results[tid] = failures == 0;
335 };
336 )OpenCLC";
337
338 CAdvancedTest test(LIBRARY_FUNCTION, KERNEL_FUNCTION);
339
340 return test.Execute(deviceID, context, queue, num_elements);
341 }
342
test_generic_variable_volatile(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)343 int test_generic_variable_volatile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
344 std::vector<std::string> KERNEL_FUNCTIONS;
345
346 KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
347 NL
348 NL "bool helperFunction(float *floatp, float val) {"
349 NL " if (!isFenceValid(get_fence(floatp)))"
350 NL " return false;"
351 NL
352 NL " if (*floatp != val)"
353 NL " return false;"
354 NL
355 NL " return true;"
356 NL "}"
357 NL
358 NL "__kernel void testKernel(__global uint *results) {"
359 NL " uint tid = get_global_id(0);"
360 NL
361 NL " static __global float val;"
362 NL " val = 0.1f;"
363 NL " float * volatile ptr = &val;"
364 NL
365 NL " results[tid] = helperFunction(ptr, val);"
366 NL "}"
367 NL
368 );
369
370 KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
371 NL
372 NL "bool helperFunction(float *floatp, float val) {"
373 NL " if (!isFenceValid(get_fence(floatp)))"
374 NL " return false;"
375 NL
376 NL " if (*floatp != val)"
377 NL " return false;"
378 NL
379 NL " return true;"
380 NL "}"
381 NL
382 NL "__kernel void testKernel(__global uint *results) {"
383 NL " uint tid = get_global_id(0);"
384 NL
385 NL " __local float val;"
386 NL " val = 0.1f;"
387 NL " float * ptr = &val;"
388 NL
389 NL " results[tid] = helperFunction(ptr, val);"
390 NL "}"
391 NL
392 );
393
394 KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
395 NL
396 NL "bool helperFunction(float *floatp, float val) {"
397 NL " if (!isFenceValid(get_fence(floatp)))"
398 NL " return false;"
399 NL
400 NL " if (*floatp != val)"
401 NL " return false;"
402 NL
403 NL " return true;"
404 NL "}"
405 NL
406 NL "__kernel void testKernel(__global uint *results) {"
407 NL " uint tid = get_global_id(0);"
408 NL
409 NL " __private float val;"
410 NL " val = 0.1f;"
411 NL " float * volatile ptr = &val;"
412 NL
413 NL " results[tid] = helperFunction(ptr, val);"
414 NL "}"
415 NL
416 );
417
418 CAdvancedTest test(KERNEL_FUNCTIONS);
419
420 return test.Execute(deviceID, context, queue, num_elements);
421 }
422
test_generic_variable_const(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)423 int test_generic_variable_const(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
424 std::vector<std::string> KERNEL_FUNCTIONS;
425
426 KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
427 NL
428 NL "bool helperFunction(const float *floatp, float val) {"
429 NL " if (!isFenceValid(get_fence(floatp)))"
430 NL " return false;"
431 NL
432 NL " if (*floatp != val)"
433 NL " return false;"
434 NL
435 NL " return true;"
436 NL "}"
437 NL
438 NL "__kernel void testKernel(__global uint *results) {"
439 NL " uint tid = get_global_id(0);"
440 NL
441 NL " const __private float val = 0.1f;"
442 NL " const float * ptr = &val;"
443 NL
444 NL " results[tid] = helperFunction(ptr, val);"
445 NL "}"
446 NL
447 );
448
449 KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
450 NL
451 NL "bool helperFunction(const float *floatp, float val) {"
452 NL " if (!isFenceValid(get_fence(floatp)))"
453 NL " return false;"
454 NL
455 NL " if (*floatp != val)"
456 NL " return false;"
457 NL
458 NL " return true;"
459 NL "}"
460 NL
461 NL "__kernel void testKernel(__global uint *results) {"
462 NL " uint tid = get_global_id(0);"
463 NL
464 NL " const static __global float val = 0.1f;"
465 NL " const float * ptr = &val;"
466 NL
467 NL " results[tid] = helperFunction(ptr, val);"
468 NL "}"
469 NL
470 );
471
472 CAdvancedTest test(KERNEL_FUNCTIONS);
473
474 return test.Execute(deviceID, context, queue, num_elements);
475 }
476
test_generic_variable_gentype(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)477 int test_generic_variable_gentype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
478 const std::string KERNEL_FUNCTION_TEMPLATE = common::CONFORMANCE_VERIFY_FENCE +
479 NL
480 NL "%s"
481 NL
482 NL "bool helperFunction(const %s *%sp, %s val) {"
483 NL " if (!isFenceValid(get_fence(%sp)))"
484 NL " return false;"
485 NL
486 NL " return %s(*%sp == val);"
487 NL "}"
488 NL
489 NL "__kernel void testKernel(__global uint *results) {"
490 NL " uint tid = get_global_id(0);"
491 NL
492 NL " %s %s val = (%s)1;"
493 NL " %s * ptr = &val;"
494 NL
495 NL " results[tid] = helperFunction(ptr, val);"
496 NL "}"
497 NL;
498 /* Qualcomm fix: 12502 Gen Addr Space - Fix kernel for generic variable gentype (half) test
499 const std::string KERNEL_FUNCTION_TEMPLATE_HALF = common::CONFORMANCE_VERIFY_FENCE */
500 const std::string vector_sizes[] = { "", "2", "3", "4", "8", "16" };
501 const std::string gentype_base[] = { "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
502 const std::string gentype_others[] = { "bool", "size_t", "ptrdiff_t", "intptr_t", "uintptr_t" };
503
504 const std::string address_spaces[] = { "static __global", "__private" };
505
506 const std::string vector_cmp = "all";
507
508 std::vector<std::string> KERNEL_FUNCTIONS;
509
510 // Add base types plus theirs vector variants
511 for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
512 for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
513 for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
514 char temp_kernel[1024];
515 const std::string fulltype = gentype_base[i] + vector_sizes[j];
516 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
517 "",
518 fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
519 (j > 0 ? vector_cmp.c_str() : ""),
520 fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
521 fulltype.c_str());
522
523 KERNEL_FUNCTIONS.push_back(temp_kernel);
524 }
525 }
526 }
527
528 const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
529
530 // Add double floating types if they are supported
531 if (is_extension_available(deviceID, "cl_khr_fp64")) {
532 for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
533 for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
534 char temp_kernel[1024];
535 const std::string fulltype = std::string("double") + vector_sizes[j];
536 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
537 cl_khr_fp64_pragma.c_str(),
538 fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
539 (j > 0 ? vector_cmp.c_str() : ""),
540 fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
541 fulltype.c_str());
542
543 KERNEL_FUNCTIONS.push_back(temp_kernel);
544 }
545 }
546 }
547 /* Qualcomm fix: 12502 Gen Addr Space - Fix kernel for generic variable gentype (half) test */
548 const std::string cl_khr_fp16_pragma = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable";
549
550 // Add half floating types if they are supported
551 if (is_extension_available(deviceID, "cl_khr_fp16")) {
552 for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
553 for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
554 char temp_kernel[1024];
555 const std::string fulltype = std::string("half") + vector_sizes[j];
556 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
557 cl_khr_fp16_pragma.c_str(),
558 fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
559 (j > 0 ? vector_cmp.c_str() : ""),
560 fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
561 fulltype.c_str());
562 /* Qualcomm fix: end */
563 KERNEL_FUNCTIONS.push_back(temp_kernel);
564 }
565 }
566 }
567
568 // Add other types that do not have vector variants
569 for (size_t i = 0; i < sizeof(gentype_others) / sizeof(gentype_others[0]); i++) {
570 for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
571 char temp_kernel[1024];
572 const std::string fulltype = gentype_others[i];
573 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
574 "",
575 fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
576 "",
577 fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
578 fulltype.c_str());
579
580 KERNEL_FUNCTIONS.push_back(temp_kernel);
581 }
582 }
583
584 CAdvancedTest test(KERNEL_FUNCTIONS);
585
586 return test.Execute(deviceID, context, queue, num_elements);
587 }
588
create_math_kernels(std::vector<std::string> & KERNEL_FUNCTIONS)589 void create_math_kernels(std::vector<std::string>& KERNEL_FUNCTIONS) {
590 const std::string KERNEL_FUNCTION_TEMPLATE =
591 NL
592 NL "__kernel void testKernel(__global uint *results) {"
593 NL " uint tid = get_global_id(0);"
594 NL
595 NL " const %s param1 = %s;"
596 NL " %s param2_generic;"
597 NL " %s param2_reference;"
598 NL " %s * ptr = ¶m2_generic;"
599 NL " %s return_value_generic;"
600 NL " %s return_value_reference;"
601 NL
602 NL " return_value_generic = %s(param1, ptr);"
603 NL " return_value_reference = %s(param1, ¶m2_reference);"
604 NL
605 NL " results[tid] = (%s(*ptr == param2_reference) && %s(return_value_generic == return_value_reference));"
606 NL "}"
607 NL;
608
609 typedef struct {
610 std::string bulitin_name;
611 std::string base_gentype;
612 std::string pointer_gentype;
613 std::string first_param_value;
614 std::string compare_fn;
615 } BuiltinDescriptor;
616
617 BuiltinDescriptor builtins[] = {
618 { "fract", "float", "float", "133.55f", "" },
619 { "frexp", "float2", "int2", "(float2)(24.12f, 99999.7f)", "all" },
620 { "frexp", "float", "int", "1234.5f", "" },
621 { "lgamma_r", "float2", "int2", "(float2)(1000.0f, 9999.5f)", "all" },
622 { "lgamma_r", "float", "int", "1000.0f", "" },
623 { "modf", "float", "float", "1234.56789f", "" },
624 { "sincos", "float", "float", "3.141592f", "" }
625 };
626
627 for (size_t i = 0; i < sizeof(builtins) / sizeof(builtins[0]); i++) {
628 char temp_kernel[1024];
629 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(), builtins[i].base_gentype.c_str(), builtins[i].first_param_value.c_str(),
630 builtins[i].pointer_gentype.c_str(), builtins[i].pointer_gentype.c_str(), builtins[i].pointer_gentype.c_str(), builtins[i].base_gentype.c_str(),
631 builtins[i].base_gentype.c_str(), builtins[i].bulitin_name.c_str(), builtins[i].bulitin_name.c_str(),
632 builtins[i].compare_fn.c_str(), builtins[i].compare_fn.c_str());
633
634 KERNEL_FUNCTIONS.push_back(temp_kernel);
635 }
636
637 // add special case for remquo (3 params)
638 KERNEL_FUNCTIONS.push_back(
639 NL
640 NL "__kernel void testKernel(__global uint *results) {"
641 NL " uint tid = get_global_id(0);"
642 NL
643 NL " const float param1 = 1234.56789f;"
644 NL " const float param2 = 123.456789f;"
645 NL " int param3_generic;"
646 NL " int param3_reference;"
647 NL " int * ptr = ¶m3_generic;"
648 NL " float return_value_generic;"
649 NL " float return_value_reference;"
650 NL
651 NL " return_value_generic = remquo(param1, param2, ptr);"
652 NL " return_value_reference = remquo(param1, param2, ¶m3_reference);"
653 NL
654 NL " results[tid] = (*ptr == param3_reference && return_value_generic == return_value_reference);"
655 NL "}"
656 NL
657 );
658 }
659
get_default_data_for_type(const std::string & type)660 std::string get_default_data_for_type(const std::string& type) {
661 std::string result;
662
663 if (type == "float") {
664 for (int i = 0; i < 10; i++) {
665 for (int j = 0; j < 10; j++) {
666 char temp[10];
667 sprintf(temp, "%d.%df, ", i, j);
668 result += std::string(temp);
669 }
670 }
671 }
672
673 else if (type == "double") {
674 for (int i = 0; i < 10; i++) {
675 for (int j = 0; j < 10; j++) {
676 char temp[10];
677 sprintf(temp, "%d.%d, ", i, j);
678 result += std::string(temp);
679 }
680 }
681 }
682
683 else {
684 for (int i = 0; i < 100; i++) {
685 char temp[10];
686 sprintf(temp, "%d, ", i);
687 result += std::string(temp);
688 }
689 }
690
691 return result;
692 }
693
create_vload_kernels(std::vector<std::string> & KERNEL_FUNCTIONS,cl_device_id deviceID)694 void create_vload_kernels(std::vector<std::string>& KERNEL_FUNCTIONS, cl_device_id deviceID) {
695 const std::string KERNEL_FUNCTION_TEMPLATE_GLOBAL =
696 NL
697 NL "%s"
698 NL "__global %s data[] = { %s };"
699 NL
700 NL "__kernel void testKernel(__global uint *results) {"
701 NL " uint tid = get_global_id(0);"
702 NL
703 NL " // Testing: %s"
704 NL " const %s * ptr = data;"
705 NL " %s%s result_generic = vload%s(2, ptr);"
706 NL " %s%s result_reference = vload%s(2, data);"
707 NL
708 NL " results[tid] = all(result_generic == result_reference);"
709 NL "}"
710 NL;
711
712 const std::string KERNEL_FUNCTION_TEMPLATE_LOCAL =
713 NL
714 NL "%s"
715 NL "__constant %s to_copy_from[] = { %s };"
716 NL
717 NL "__kernel void testKernel(__global uint *results) {"
718 NL " uint tid = get_global_id(0);"
719 NL
720 NL " __local %s data[100];"
721 NL " for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
722 NL " data[i] = to_copy_from[i];"
723 NL
724 NL " const %s * ptr = data;"
725 NL " %s%s result_generic = vload%s(2, ptr);"
726 NL " %s%s result_reference = vload%s(2, data);"
727 NL
728 NL " results[tid] = all(result_generic == result_reference);"
729 NL "}"
730 NL;
731
732 const std::string KERNEL_FUNCTION_TEMPLATE_PRIVATE =
733 NL
734 NL "%s"
735 NL "__kernel void testKernel(__global uint *results) {"
736 NL " uint tid = get_global_id(0);"
737 NL
738 NL " %s data[] = { %s };"
739 NL " // Testing: %s"
740 NL " const %s * ptr = data;"
741 NL " %s%s result_generic = vload%s(2, ptr);"
742 NL " %s%s result_reference = vload%s(2, data);"
743 NL
744 NL " results[tid] = all(result_generic == result_reference);"
745 NL "}"
746 NL;
747
748 const std::string vector_sizes[] = { "2", "3", "4", "8", "16" };
749 const std::string gentype_base[] = { "double", "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
750 const std::string kernel_variants[] = { KERNEL_FUNCTION_TEMPLATE_GLOBAL, KERNEL_FUNCTION_TEMPLATE_LOCAL, KERNEL_FUNCTION_TEMPLATE_PRIVATE };
751
752 const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
753
754 for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
755 const char *pragma_str = "";
756
757 if (i == 0) {
758 if (!is_extension_available(deviceID, "cl_khr_fp64"))
759 continue;
760 else
761 pragma_str = cl_khr_fp64_pragma.c_str();
762 }
763
764 for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
765 for (size_t k = 0; k < sizeof(kernel_variants) / sizeof(kernel_variants[0]); k++) {
766 char temp_kernel[4098];
767 sprintf(temp_kernel, kernel_variants[k].c_str(),
768 pragma_str,
769 gentype_base[i].c_str(),
770 get_default_data_for_type(gentype_base[i]).c_str(),
771 gentype_base[i].c_str(),
772 gentype_base[i].c_str(),
773 gentype_base[i].c_str(), vector_sizes[j].c_str(), vector_sizes[j].c_str(),
774 gentype_base[i].c_str(), vector_sizes[j].c_str(), vector_sizes[j].c_str()
775 );
776
777 KERNEL_FUNCTIONS.push_back(temp_kernel);
778 }
779 }
780 }
781 }
782
create_vstore_kernels(std::vector<std::string> & KERNEL_FUNCTIONS,cl_device_id deviceID)783 void create_vstore_kernels(std::vector<std::string>& KERNEL_FUNCTIONS, cl_device_id deviceID) {
784 const std::string KERNEL_FUNCTION_TEMPLATE_GLOBAL =
785 NL
786 NL "%s"
787 NL "__global %s data_generic[] = { %s };"
788 NL "__global %s data_reference[] = { %s };"
789 NL
790 NL "__kernel void testKernel(__global uint *results) {"
791 NL " uint tid = get_global_id(0);"
792 NL
793 NL " %s%s input = (%s%s)(1);"
794 NL " %s * ptr = data_generic;"
795 NL
796 NL " vstore%s(input, 2, ptr);"
797 NL " vstore%s(input, 2, data_reference);"
798 NL
799 NL " bool result = true;"
800 NL " for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
801 NL " if (data_generic[i] != data_reference[i])"
802 NL " result = false;"
803 NL
804 NL " results[tid] = result;"
805 NL "}"
806 NL;
807
808 const std::string KERNEL_FUNCTION_TEMPLATE_LOCAL =
809 NL
810 NL "%s"
811 NL "__constant %s to_copy_from[] = { %s };"
812 NL
813 NL "__kernel void testKernel(__global uint *results) {"
814 NL " uint tid = get_global_id(0);"
815 NL
816 NL " __local %s data_generic[100];"
817 NL " for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
818 NL " data_generic[i] = to_copy_from[i];"
819 NL
820 NL " __local %s data_reference[100];"
821 NL " for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
822 NL " data_reference[i] = to_copy_from[i];"
823 NL
824 NL " %s%s input = (%s%s)(1);"
825 NL " %s * ptr = data_generic;"
826 NL
827 NL " vstore%s(input, 2, ptr);"
828 NL " vstore%s(input, 2, data_reference);"
829 NL
830 NL " work_group_barrier(CLK_LOCAL_MEM_FENCE);"
831 NL
832 NL " bool result = true;"
833 NL " for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
834 NL " if (data_generic[i] != data_reference[i])"
835 NL " result = false;"
836 NL
837 NL " results[tid] = result;"
838 NL "}"
839 NL;
840
841 const std::string KERNEL_FUNCTION_TEMPLATE_PRIVATE =
842 NL
843 NL "%s"
844 NL "__kernel void testKernel(__global uint *results) {"
845 NL " uint tid = get_global_id(0);"
846 NL
847 NL " __private %s data_generic[] = { %s };"
848 NL " __private %s data_reference[] = { %s };"
849 NL
850 NL " %s%s input = (%s%s)(1);"
851 NL " %s * ptr = data_generic;"
852 NL
853 NL " vstore%s(input, 2, ptr);"
854 NL " vstore%s(input, 2, data_reference);"
855 NL
856 NL " bool result = true;"
857 NL " for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
858 NL " if (data_generic[i] != data_reference[i])"
859 NL " result = false;"
860 NL
861 NL " results[tid] = result;"
862 NL "}"
863 NL;
864
865 const std::string vector_sizes[] = { "2", "3", "4", "8", "16" };
866 const std::string gentype_base[] = { "double", "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
867 const std::string kernel_variants[] = { KERNEL_FUNCTION_TEMPLATE_GLOBAL, KERNEL_FUNCTION_TEMPLATE_LOCAL, KERNEL_FUNCTION_TEMPLATE_PRIVATE };
868
869 const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
870
871 for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
872 const char *pragma_str = "";
873 if (i == 0) {
874 if (!is_extension_available(deviceID, "cl_khr_fp64"))
875 continue;
876 else
877 pragma_str = cl_khr_fp64_pragma.c_str();
878 }
879
880
881 for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
882 for (size_t k = 0; k < sizeof(kernel_variants) / sizeof(kernel_variants[0]); k++) {
883 char temp_kernel[4098];
884
885 switch (k) {
886 case 0: // global template
887 case 2: // private template
888 sprintf(temp_kernel, kernel_variants[k].c_str(),
889 pragma_str,
890 gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
891 gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
892 gentype_base[i].c_str(), vector_sizes[j].c_str(), gentype_base[i].c_str(), vector_sizes[j].c_str(),
893 gentype_base[i].c_str(),
894 vector_sizes[j].c_str(),
895 vector_sizes[j].c_str()
896 );
897 break;
898
899 case 1: // local template
900 sprintf(temp_kernel, kernel_variants[k].c_str(),
901 pragma_str,
902 gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
903 gentype_base[i].c_str(),
904 gentype_base[i].c_str(),
905 gentype_base[i].c_str(), vector_sizes[j].c_str(), gentype_base[i].c_str(), vector_sizes[j].c_str(),
906 gentype_base[i].c_str(),
907 vector_sizes[j].c_str(),
908 vector_sizes[j].c_str()
909 );
910 break;
911 }
912
913 KERNEL_FUNCTIONS.push_back(temp_kernel);
914 }
915 }
916 }
917 }
918
test_builtin_functions(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)919 int test_builtin_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
920 std::vector<std::string> KERNEL_FUNCTIONS;
921
922 create_math_kernels(KERNEL_FUNCTIONS);
923 create_vload_kernels(KERNEL_FUNCTIONS, deviceID);
924 create_vstore_kernels(KERNEL_FUNCTIONS, deviceID);
925
926 CAdvancedTest test(KERNEL_FUNCTIONS);
927
928 return test.Execute(deviceID, context, queue, num_elements);
929 }
930
test_generic_advanced_casting(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)931 int test_generic_advanced_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
932 std::vector<std::string> KERNEL_FUNCTIONS;
933
934 KERNEL_FUNCTIONS.push_back(
935 NL
936 NL "__global char arr[16] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 };"
937 NL
938 NL "__kernel void testKernel(__global uint *results) {"
939 NL " uint tid = get_global_id(0);"
940 NL
941 NL " const int * volatile ptr = (const int *)arr;"
942 NL
943 NL " results[tid] = (ptr[0] == 0x00000000) && (ptr[1] == 0x01010101) && (ptr[2] == 0x02020202) && (ptr[3] == 0x03030303);"
944 NL "}"
945 NL
946 );
947
948 KERNEL_FUNCTIONS.push_back(
949 NL
950 NL "__kernel void testKernel(__global uint *results) {"
951 NL " uint tid = get_global_id(0);"
952 NL
953 NL " __local int i;"
954 NL " i = 0x11112222;"
955 NL " short *ptr = (short *)&i;"
956 NL " local int *lptr = (local int *)ptr;"
957 NL
958 NL " results[tid] = (lptr == &i) && (*lptr == i);"
959 NL "}"
960 NL
961 );
962
963 KERNEL_FUNCTIONS.push_back(
964 NL
965 NL "__kernel void testKernel(__global uint *results) {"
966 NL " uint tid = get_global_id(0);"
967 NL
968 NL " int i = 0x11112222;"
969 NL
970 NL " void *ptr = &i;"
971 NL " int copy = *((int *)ptr);"
972 NL
973 NL " results[tid] = (copy == i);"
974 NL "}"
975 NL
976 );
977
978 CAdvancedTest test(KERNEL_FUNCTIONS);
979
980 return test.Execute(deviceID, context, queue, num_elements);
981 }
982
test_generic_ptr_to_host_mem_svm(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)983 int test_generic_ptr_to_host_mem_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
984 cl_int result = CL_SUCCESS;
985
986 /* Test SVM capabilities and select matching tests */
987 cl_device_svm_capabilities caps;
988 auto version = get_device_cl_version(deviceID);
989 auto expected_min_version = Version(2, 0);
990
991 cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(caps), &caps, NULL);
992 test_error(error, "clGetDeviceInfo(CL_DEVICE_SVM_CAPABILITIES) failed");
993
994 if ((version < expected_min_version)
995 || (version >= Version(3, 0) && caps == 0))
996 return TEST_SKIPPED_ITSELF;
997
998 if (caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) {
999 CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_COARSE_GRAINED_SVM);
1000 result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
1001 }
1002
1003 if (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
1004 CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_FINE_GRAINED_BUFFER_SVM);
1005 result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
1006 }
1007
1008 if (caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) {
1009 CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_FINE_GRAINED_SYSTEM_SVM);
1010 result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
1011 }
1012
1013 if (caps & CL_DEVICE_SVM_ATOMICS) {
1014 CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_ATOMICS_SVM);
1015 result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
1016 }
1017
1018 return result;
1019 }
1020
test_generic_ptr_to_host_mem(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1021 int test_generic_ptr_to_host_mem(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1022 cl_int result = CL_SUCCESS;
1023
1024 CAdvancedTest test_global_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_HOST_PTR);
1025 result |= test_global_ptr.Execute(deviceID, context, queue, num_elements);
1026
1027 CAdvancedTest test_local_ptr(common::LOCAL_KERNEL_FUNCTION, ARG_TYPE_HOST_LOCAL);
1028 result |= test_local_ptr.Execute(deviceID, context, queue, num_elements / 64);
1029
1030 return result;
1031 }
1032