xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/generic_address_space/advanced_tests.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/testHarness.h"
17 #include "harness/typeWrappers.h"
18 #include "base.h"
19 
20 #include <string>
21 #include <vector>
22 #include <algorithm>
23 #include <sstream>
24 
25 typedef enum {
26     ARG_TYPE_NONE,
27 
28     ARG_TYPE_HOST_PTR,
29     ARG_TYPE_HOST_LOCAL,
30 
31     ARG_TYPE_COARSE_GRAINED_SVM,
32     ARG_TYPE_FINE_GRAINED_BUFFER_SVM,
33     ARG_TYPE_FINE_GRAINED_SYSTEM_SVM,
34     ARG_TYPE_ATOMICS_SVM
35 } ExtraKernelArgMemType;
36 
37 class CSVMWrapper {
38 public:
CSVMWrapper()39     CSVMWrapper() : ptr_(NULL), context_(NULL) { }
40 
Attach(cl_context context,void * ptr)41     void Attach(cl_context context, void *ptr) {
42         context_ = context;
43         ptr_ = ptr;
44     }
45 
~CSVMWrapper()46     ~CSVMWrapper() {
47         if (ptr_)
48             clSVMFree(context_, ptr_);
49     }
50 
operator void*()51     operator void *() {
52         return ptr_;
53     }
54 
55 private:
56     void *ptr_;
57     cl_context context_;
58 };
59 
60 class CAdvancedTest : public CTest {
61 public:
CAdvancedTest(const std::vector<std::string> & kernel)62     CAdvancedTest(const std::vector<std::string>& kernel) : CTest(), _kernels(kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
63 
64     }
65 
CAdvancedTest(const std::string & library,const std::vector<std::string> & kernel)66     CAdvancedTest(const std::string& library, const std::vector<std::string>& kernel) : CTest(), _libraryCode(library), _kernels(kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
67 
68     }
69 
CAdvancedTest(const std::string & kernel,ExtraKernelArgMemType argType=ARG_TYPE_NONE)70     CAdvancedTest(const std::string& kernel, ExtraKernelArgMemType argType = ARG_TYPE_NONE) : CTest(), _kernels(1, kernel), _extraKernelArgMemType(argType) {
71 
72     }
73 
CAdvancedTest(const std::string & library,const std::string & kernel)74     CAdvancedTest(const std::string& library, const std::string& kernel) : CTest(), _libraryCode(library), _kernels(1, kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
75 
76     }
77 
PrintCompilationLog(cl_program program,cl_device_id device)78     int PrintCompilationLog(cl_program program, cl_device_id device) {
79         cl_int error;
80         size_t buildLogSize = 0;
81 
82         error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize);
83         test_error(error, "clGetProgramBuildInfo failed");
84 
85         std::string log;
86         log.resize(buildLogSize);
87 
88         error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, buildLogSize, &log[0], NULL);
89         test_error(error, "clGetProgramBuildInfo failed");
90 
91         log_error("Build log for device is:\n------------\n");
92         log_error("%s\n", log.c_str() );
93         log_error( "\n----------\n" );
94 
95         return CL_SUCCESS;
96     }
97 
ExecuteSubcase(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,const std::string & src)98     int ExecuteSubcase(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const std::string& src) {
99         cl_int error;
100 
101         clProgramWrapper program, preCompiledLibrary, library, finalProgram;
102         clKernelWrapper kernel;
103 
104         const char *srcPtr = src.c_str();
105 
106         if (!_libraryCode.empty()) {
107             program = clCreateProgramWithSource(context, 1, &srcPtr, NULL, &error);
108             test_error(error, "clCreateProgramWithSource failed");
109 
110             // Use the latest OpenCL-C version supported by the device. This
111             // allows calling code to force a particular CL C version if it is
112             // required, but also means that callers need not specify a version
113             // if they want to assume the most recent CL C.
114 
115             auto version = get_max_OpenCL_C_for_context(context);
116 
117             const char* cl_std = nullptr;
118             if (version >= Version(3, 0))
119             {
120                 cl_std = "-cl-std=CL3.0";
121             }
122             else if (version >= Version(2, 0) && version < Version(3, 0))
123             {
124                 cl_std = "-cl-std=CL2.0";
125             }
126             else
127             {
128                 // If the -cl-std build option is not specified, the highest
129                 // OpenCL C 1.x language version supported by each device is
130                 // used when compiling the program for each device.
131                 cl_std = "";
132             }
133 
134             error = clCompileProgram(program, 1, &deviceID, cl_std, 0, NULL,
135                                      NULL, NULL, NULL);
136 
137             if (error != CL_SUCCESS)
138                 PrintCompilationLog(program, deviceID);
139             test_error(error, "clCompileProgram failed");
140 
141             const char *srcPtrLibrary = _libraryCode.c_str();
142 
143             preCompiledLibrary = clCreateProgramWithSource(context, 1, &srcPtrLibrary, NULL, &error);
144             test_error(error, "clCreateProgramWithSource failed");
145 
146             error = clCompileProgram(preCompiledLibrary, 1, &deviceID, cl_std,
147                                      0, NULL, NULL, NULL, NULL);
148 
149             if (error != CL_SUCCESS)
150                 PrintCompilationLog(preCompiledLibrary, deviceID);
151             test_error(error, "clCompileProgram failed");
152 
153             library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &preCompiledLibrary, NULL, NULL, &error);
154             test_error(error, "clLinkProgram failed");
155 
156             cl_program objects[] = { program, library };
157             finalProgram = clLinkProgram(context, 1, &deviceID, "", 2, objects, NULL, NULL, &error);
158             test_error(error, "clLinkProgram failed");
159 
160             kernel = clCreateKernel(finalProgram, "testKernel", &error);
161             test_error(error, "clCreateKernel failed");
162         }
163 
164         else {
165             if (create_single_kernel_helper(context, &program, &kernel, 1,
166                                             &srcPtr, "testKernel"))
167             {
168                 log_error("create_single_kernel_helper failed\n");
169                 return -1;
170             }
171         }
172 
173         size_t bufferSize = num_elements * sizeof(cl_uint);
174         clMemWrapper buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bufferSize, NULL, &error);
175         test_error(error, "clCreateBuffer failed");
176 
177         error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
178         test_error(error, "clSetKernelArg(0) failed");
179 
180         // Warning: the order below is very important as SVM buffer cannot be free'd before corresponding mem_object
181         CSVMWrapper svmWrapper;
182         clMemWrapper extraArg;
183         std::vector<cl_uint> extraArgData(num_elements);
184         for (cl_uint i = 0; i < (cl_uint)num_elements; i++)
185             extraArgData[i] = i;
186 
187         if (_extraKernelArgMemType != ARG_TYPE_NONE) {
188             if (_extraKernelArgMemType == ARG_TYPE_HOST_PTR) {
189                 extraArg = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, bufferSize, &extraArgData[0], &error);
190                 test_error(error, "clCreateBuffer failed");
191             }
192 
193             else {
194                 void *ptr = NULL;
195 
196                 switch (_extraKernelArgMemType) {
197                 case ARG_TYPE_COARSE_GRAINED_SVM:
198                     ptr = clSVMAlloc(context, CL_MEM_READ_WRITE, bufferSize, 0);
199                     break;
200                 case ARG_TYPE_FINE_GRAINED_BUFFER_SVM:
201                     ptr = clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_READ_WRITE, bufferSize, 0);
202                     break;
203                 case ARG_TYPE_FINE_GRAINED_SYSTEM_SVM:
204                     ptr = &extraArgData[0];
205                     break;
206                 case ARG_TYPE_ATOMICS_SVM:
207                     ptr = clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS | CL_MEM_READ_WRITE, bufferSize, 0);
208                     break;
209                 default:
210                     break;
211                 }
212 
213                 if(_extraKernelArgMemType != ARG_TYPE_HOST_LOCAL) {
214                   if (!ptr) {
215                     log_error("Allocation failed\n");
216                     return -1;
217                   }
218 
219                   if (_extraKernelArgMemType != ARG_TYPE_FINE_GRAINED_SYSTEM_SVM) {
220                   svmWrapper.Attach(context, ptr);
221                   }
222 
223                   if (_extraKernelArgMemType == ARG_TYPE_COARSE_GRAINED_SVM) {
224                     error = clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_WRITE, ptr, bufferSize, 0, NULL, NULL);
225                     test_error(error, "clEnqueueSVMMap failed");
226                   }
227 
228                   memcpy(ptr, &extraArgData[0], bufferSize);
229 
230                   if (_extraKernelArgMemType == ARG_TYPE_COARSE_GRAINED_SVM) {
231                     error = clEnqueueSVMUnmap(queue, ptr, 0, NULL, NULL);
232                     test_error(error, "clEnqueueSVMUnmap failed");
233                     clFinish(queue);
234                   }
235 
236                   extraArg = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, bufferSize, ptr, &error);
237                   test_error(error, "clCreateBuffer from SVM buffer failed");
238                 }
239             }
240 
241             if(_extraKernelArgMemType == ARG_TYPE_HOST_LOCAL)
242               error = clSetKernelArg(kernel, 1, bufferSize, NULL);
243             else
244               error = clSetKernelArg(kernel, 1, sizeof(extraArg), &extraArg);
245 
246 
247             test_error(error, "clSetKernelArg(1) failed");
248         }
249 
250         size_t globalWorkGroupSize = num_elements;
251         size_t localWorkGroupSize = 0;
252         error = get_max_common_work_group_size(context, kernel, globalWorkGroupSize, &localWorkGroupSize);
253         test_error(error, "Unable to get common work group size");
254 
255         error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalWorkGroupSize, &localWorkGroupSize, 0, NULL, NULL);
256         test_error(error, "clEnqueueNDRangeKernel failed");
257 
258         // verify results
259         std::vector<cl_uint> results(num_elements);
260 
261         error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, bufferSize, &results[0], 0, NULL, NULL);
262         test_error(error, "clEnqueueReadBuffer failed");
263 
264         size_t passCount = std::count(results.begin(), results.end(), 1);
265         if (passCount != results.size()) {
266             std::vector<cl_uint>::iterator iter = std::find(results.begin(), results.end(), 0);
267             log_error("Verification on device failed at index %ld\n", std::distance(results.begin(), iter));
268             log_error("%ld out of %ld failed\n", (results.size()-passCount), results.size());
269             return -1;
270         }
271 
272         return CL_SUCCESS;
273     }
274 
Execute(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)275     int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
276         cl_int result = CL_SUCCESS;
277 
278         for (std::vector<std::string>::const_iterator it = _kernels.begin(); it != _kernels.end(); ++it) {
279             log_info("Executing subcase #%ld out of %ld\n", (it - _kernels.begin() + 1), _kernels.size());
280 
281             result |= ExecuteSubcase(deviceID, context, queue, num_elements, *it);
282         }
283 
284         return result;
285     }
286 
287 private:
288     const std::string _libraryCode;
289     const std::vector<std::string> _kernels;
290     const ExtraKernelArgMemType _extraKernelArgMemType;
291 };
292 
test_library_function(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)293 int test_library_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
294     const std::string LIBRARY_FUNCTION = common::CONFORMANCE_VERIFY_FENCE +
295         NL
296         NL "bool helperFunction(float *floatp, float val) {"
297         NL "    if (!isFenceValid(get_fence(floatp)))"
298         NL "        return false;"
299         NL
300         NL "    if (*floatp != val)"
301         NL "        return false;"
302         NL
303         NL "    return true;"
304         NL "}"
305         NL;
306 
307     const std::string KERNEL_FUNCTION = R"OpenCLC(
308 extern bool helperFunction(float *floatp, float val);
309 
310 #ifdef __opencl_c_program_scope_global_variables
311 __global float gfloat = 1.0f;
312 #endif
313 
314 __kernel void testKernel(__global uint *results) {
315     uint tid = get_global_id(0);
316 
317 #ifdef __opencl_c_program_scope_global_variables
318     __global float *gfloatp = &gfloat;
319 #endif
320     __local float lfloat;
321     lfloat = 2.0f;
322     __local float *lfloatp = &lfloat;
323     float pfloat = 3.0f;
324     __private float *pfloatp = &pfloat;
325 
326     uint failures = 0;
327 
328 #ifdef __opencl_c_program_scope_global_variables
329     failures += helperFunction(gfloatp, gfloat) ? 0 : 1;
330 #endif
331     failures += helperFunction(lfloatp, lfloat) ? 0 : 1;
332     failures += helperFunction(pfloatp, pfloat) ? 0 : 1;
333 
334     results[tid] = failures == 0;
335 };
336 )OpenCLC";
337 
338     CAdvancedTest test(LIBRARY_FUNCTION, KERNEL_FUNCTION);
339 
340     return test.Execute(deviceID, context, queue, num_elements);
341 }
342 
test_generic_variable_volatile(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)343 int test_generic_variable_volatile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
344     std::vector<std::string> KERNEL_FUNCTIONS;
345 
346     KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
347         NL
348         NL "bool helperFunction(float *floatp, float val) {"
349         NL "    if (!isFenceValid(get_fence(floatp)))"
350         NL "        return false;"
351         NL
352         NL "    if (*floatp != val)"
353         NL "        return false;"
354         NL
355         NL "    return true;"
356         NL "}"
357         NL
358         NL "__kernel void testKernel(__global uint *results) {"
359         NL "    uint tid = get_global_id(0);"
360         NL
361         NL "    static __global float val;"
362         NL "    val = 0.1f;"
363         NL "    float * volatile ptr = &val;"
364         NL
365         NL "    results[tid] = helperFunction(ptr, val);"
366         NL "}"
367         NL
368     );
369 
370     KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
371         NL
372         NL "bool helperFunction(float *floatp, float val) {"
373         NL "    if (!isFenceValid(get_fence(floatp)))"
374         NL "        return false;"
375         NL
376         NL "    if (*floatp != val)"
377         NL "        return false;"
378         NL
379         NL "    return true;"
380         NL "}"
381         NL
382         NL "__kernel void testKernel(__global uint *results) {"
383         NL "    uint tid = get_global_id(0);"
384         NL
385         NL "    __local float val;"
386         NL "    val = 0.1f;"
387         NL "    float * ptr = &val;"
388         NL
389         NL "    results[tid] = helperFunction(ptr, val);"
390         NL "}"
391         NL
392     );
393 
394     KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
395         NL
396         NL "bool helperFunction(float *floatp, float val) {"
397         NL "    if (!isFenceValid(get_fence(floatp)))"
398         NL "        return false;"
399         NL
400         NL "    if (*floatp != val)"
401         NL "        return false;"
402         NL
403         NL "    return true;"
404         NL "}"
405         NL
406         NL "__kernel void testKernel(__global uint *results) {"
407         NL "    uint tid = get_global_id(0);"
408         NL
409         NL "    __private float val;"
410         NL "    val = 0.1f;"
411         NL "    float * volatile ptr = &val;"
412         NL
413         NL "    results[tid] = helperFunction(ptr, val);"
414         NL "}"
415         NL
416     );
417 
418     CAdvancedTest test(KERNEL_FUNCTIONS);
419 
420     return test.Execute(deviceID, context, queue, num_elements);
421 }
422 
test_generic_variable_const(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)423 int test_generic_variable_const(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
424     std::vector<std::string> KERNEL_FUNCTIONS;
425 
426     KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
427         NL
428         NL "bool helperFunction(const float *floatp, float val) {"
429         NL "    if (!isFenceValid(get_fence(floatp)))"
430         NL "        return false;"
431         NL
432         NL "    if (*floatp != val)"
433         NL "        return false;"
434         NL
435         NL "    return true;"
436         NL "}"
437         NL
438         NL "__kernel void testKernel(__global uint *results) {"
439         NL "    uint tid = get_global_id(0);"
440         NL
441         NL "    const __private float val = 0.1f;"
442         NL "    const float * ptr = &val;"
443         NL
444         NL "    results[tid] = helperFunction(ptr, val);"
445         NL "}"
446         NL
447     );
448 
449     KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
450         NL
451         NL "bool helperFunction(const float *floatp, float val) {"
452         NL "    if (!isFenceValid(get_fence(floatp)))"
453         NL "        return false;"
454         NL
455         NL "    if (*floatp != val)"
456         NL "        return false;"
457         NL
458         NL "    return true;"
459         NL "}"
460         NL
461         NL "__kernel void testKernel(__global uint *results) {"
462         NL "    uint tid = get_global_id(0);"
463         NL
464         NL "    const static __global float val = 0.1f;"
465         NL "    const float * ptr = &val;"
466         NL
467         NL "    results[tid] = helperFunction(ptr, val);"
468         NL "}"
469         NL
470     );
471 
472     CAdvancedTest test(KERNEL_FUNCTIONS);
473 
474     return test.Execute(deviceID, context, queue, num_elements);
475 }
476 
test_generic_variable_gentype(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)477 int test_generic_variable_gentype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
478     const std::string KERNEL_FUNCTION_TEMPLATE = common::CONFORMANCE_VERIFY_FENCE +
479         NL
480         NL "%s"
481         NL
482         NL "bool helperFunction(const %s *%sp, %s val) {"
483         NL "    if (!isFenceValid(get_fence(%sp)))"
484         NL "        return false;"
485         NL
486         NL "    return %s(*%sp == val);"
487         NL "}"
488         NL
489         NL "__kernel void testKernel(__global uint *results) {"
490         NL "    uint tid = get_global_id(0);"
491         NL
492         NL "    %s %s val = (%s)1;"
493         NL "    %s * ptr = &val;"
494         NL
495         NL "    results[tid] = helperFunction(ptr, val);"
496         NL "}"
497         NL;
498 /* Qualcomm fix: 12502  Gen Addr Space - Fix kernel for generic variable gentype (half) test
499    const std::string KERNEL_FUNCTION_TEMPLATE_HALF = common::CONFORMANCE_VERIFY_FENCE */
500     const std::string vector_sizes[] = { "", "2", "3", "4", "8", "16" };
501     const std::string gentype_base[] = { "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
502     const std::string gentype_others[] = { "bool", "size_t", "ptrdiff_t", "intptr_t", "uintptr_t" };
503 
504     const std::string address_spaces[] = { "static __global", "__private" };
505 
506     const std::string vector_cmp = "all";
507 
508     std::vector<std::string> KERNEL_FUNCTIONS;
509 
510     // Add base types plus theirs vector variants
511     for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
512         for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
513             for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
514                 char temp_kernel[1024];
515                 const std::string fulltype = gentype_base[i] + vector_sizes[j];
516                 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
517                     "",
518                     fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
519                     (j > 0 ? vector_cmp.c_str() : ""),
520                     fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
521                     fulltype.c_str());
522 
523                 KERNEL_FUNCTIONS.push_back(temp_kernel);
524             }
525         }
526     }
527 
528     const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
529 
530     // Add double floating types if they are supported
531     if (is_extension_available(deviceID, "cl_khr_fp64")) {
532         for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
533             for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
534                 char temp_kernel[1024];
535                 const std::string fulltype = std::string("double") + vector_sizes[j];
536                 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
537                     cl_khr_fp64_pragma.c_str(),
538                     fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
539                     (j > 0 ? vector_cmp.c_str() : ""),
540                     fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
541                     fulltype.c_str());
542 
543                 KERNEL_FUNCTIONS.push_back(temp_kernel);
544             }
545         }
546     }
547 /* Qualcomm fix: 12502  Gen Addr Space - Fix kernel for generic variable gentype (half) test */
548     const std::string cl_khr_fp16_pragma = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable";
549 
550     // Add half floating types if they are supported
551     if (is_extension_available(deviceID, "cl_khr_fp16")) {
552         for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
553             for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
554                 char temp_kernel[1024];
555                 const std::string fulltype = std::string("half") + vector_sizes[j];
556                 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
557                     cl_khr_fp16_pragma.c_str(),
558                     fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
559                     (j > 0 ? vector_cmp.c_str() : ""),
560                     fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
561                     fulltype.c_str());
562 /* Qualcomm fix: end */
563                 KERNEL_FUNCTIONS.push_back(temp_kernel);
564             }
565         }
566     }
567 
568     // Add other types that do not have vector variants
569     for (size_t i = 0; i < sizeof(gentype_others) / sizeof(gentype_others[0]); i++) {
570         for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
571             char temp_kernel[1024];
572             const std::string fulltype = gentype_others[i];
573             sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
574                 "",
575                 fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
576                 "",
577                 fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
578                 fulltype.c_str());
579 
580             KERNEL_FUNCTIONS.push_back(temp_kernel);
581         }
582     }
583 
584     CAdvancedTest test(KERNEL_FUNCTIONS);
585 
586     return test.Execute(deviceID, context, queue, num_elements);
587 }
588 
create_math_kernels(std::vector<std::string> & KERNEL_FUNCTIONS)589 void create_math_kernels(std::vector<std::string>& KERNEL_FUNCTIONS) {
590     const std::string KERNEL_FUNCTION_TEMPLATE =
591         NL
592         NL "__kernel void testKernel(__global uint *results) {"
593         NL "    uint tid = get_global_id(0);"
594         NL
595         NL "    const %s param1 = %s;"
596         NL "    %s param2_generic;"
597         NL "    %s param2_reference;"
598         NL "    %s * ptr = &param2_generic;"
599         NL "    %s return_value_generic;"
600         NL "    %s return_value_reference;"
601         NL
602         NL "    return_value_generic = %s(param1, ptr);"
603         NL "    return_value_reference = %s(param1, &param2_reference);"
604         NL
605         NL "    results[tid] = (%s(*ptr == param2_reference) && %s(return_value_generic == return_value_reference));"
606         NL "}"
607         NL;
608 
609     typedef struct {
610         std::string bulitin_name;
611         std::string base_gentype;
612         std::string pointer_gentype;
613         std::string first_param_value;
614         std::string compare_fn;
615     } BuiltinDescriptor;
616 
617     BuiltinDescriptor builtins[] = {
618         { "fract", "float", "float", "133.55f", "" },
619         { "frexp", "float2", "int2", "(float2)(24.12f, 99999.7f)", "all" },
620         { "frexp", "float", "int", "1234.5f", "" },
621         { "lgamma_r", "float2", "int2", "(float2)(1000.0f, 9999.5f)", "all" },
622         { "lgamma_r", "float", "int", "1000.0f", "" },
623         { "modf", "float", "float", "1234.56789f", "" },
624         { "sincos", "float", "float", "3.141592f", "" }
625     };
626 
627     for (size_t i = 0; i < sizeof(builtins) / sizeof(builtins[0]); i++) {
628         char temp_kernel[1024];
629         sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(), builtins[i].base_gentype.c_str(), builtins[i].first_param_value.c_str(),
630             builtins[i].pointer_gentype.c_str(), builtins[i].pointer_gentype.c_str(), builtins[i].pointer_gentype.c_str(), builtins[i].base_gentype.c_str(),
631             builtins[i].base_gentype.c_str(), builtins[i].bulitin_name.c_str(), builtins[i].bulitin_name.c_str(),
632             builtins[i].compare_fn.c_str(), builtins[i].compare_fn.c_str());
633 
634         KERNEL_FUNCTIONS.push_back(temp_kernel);
635     }
636 
637     // add special case for remquo (3 params)
638     KERNEL_FUNCTIONS.push_back(
639         NL
640         NL "__kernel void testKernel(__global uint *results) {"
641         NL "    uint tid = get_global_id(0);"
642         NL
643         NL "    const float param1 = 1234.56789f;"
644         NL "    const float param2 = 123.456789f;"
645         NL "    int param3_generic;"
646         NL "    int param3_reference;"
647         NL "    int * ptr = &param3_generic;"
648         NL "    float return_value_generic;"
649         NL "    float return_value_reference;"
650         NL
651         NL "    return_value_generic = remquo(param1, param2, ptr);"
652         NL "    return_value_reference = remquo(param1, param2, &param3_reference);"
653         NL
654         NL "    results[tid] = (*ptr == param3_reference && return_value_generic == return_value_reference);"
655         NL "}"
656         NL
657     );
658 }
659 
get_default_data_for_type(const std::string & type)660 std::string get_default_data_for_type(const std::string& type) {
661     std::string result;
662 
663     if (type == "float") {
664         for (int i = 0; i < 10; i++) {
665             for (int j = 0; j < 10; j++) {
666                 char temp[10];
667                 sprintf(temp, "%d.%df, ", i, j);
668                 result += std::string(temp);
669             }
670         }
671     }
672 
673     else if (type == "double") {
674         for (int i = 0; i < 10; i++) {
675             for (int j = 0; j < 10; j++) {
676                 char temp[10];
677                 sprintf(temp, "%d.%d, ", i, j);
678                 result += std::string(temp);
679             }
680         }
681     }
682 
683     else {
684         for (int i = 0; i < 100; i++) {
685             char temp[10];
686             sprintf(temp, "%d, ", i);
687             result += std::string(temp);
688         }
689     }
690 
691     return result;
692 }
693 
create_vload_kernels(std::vector<std::string> & KERNEL_FUNCTIONS,cl_device_id deviceID)694 void create_vload_kernels(std::vector<std::string>& KERNEL_FUNCTIONS, cl_device_id deviceID) {
695     const std::string KERNEL_FUNCTION_TEMPLATE_GLOBAL =
696         NL
697         NL "%s"
698         NL "__global %s data[] = { %s };"
699         NL
700         NL "__kernel void testKernel(__global uint *results) {"
701         NL "    uint tid = get_global_id(0);"
702         NL
703         NL "    // Testing: %s"
704         NL "    const %s * ptr = data;"
705         NL "    %s%s result_generic = vload%s(2, ptr);"
706         NL "    %s%s result_reference = vload%s(2, data);"
707         NL
708         NL "    results[tid] = all(result_generic == result_reference);"
709         NL "}"
710         NL;
711 
712     const std::string KERNEL_FUNCTION_TEMPLATE_LOCAL =
713         NL
714         NL "%s"
715         NL "__constant %s to_copy_from[] = { %s };"
716         NL
717         NL "__kernel void testKernel(__global uint *results) {"
718         NL "    uint tid = get_global_id(0);"
719         NL
720         NL "    __local %s data[100];"
721         NL "    for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
722         NL "        data[i] = to_copy_from[i];"
723         NL
724         NL "    const %s * ptr = data;"
725         NL "    %s%s result_generic = vload%s(2, ptr);"
726         NL "    %s%s result_reference = vload%s(2, data);"
727         NL
728         NL "    results[tid] = all(result_generic == result_reference);"
729         NL "}"
730         NL;
731 
732     const std::string KERNEL_FUNCTION_TEMPLATE_PRIVATE =
733         NL
734         NL "%s"
735         NL "__kernel void testKernel(__global uint *results) {"
736         NL "    uint tid = get_global_id(0);"
737         NL
738         NL "    %s data[] = { %s };"
739         NL "    // Testing: %s"
740         NL "    const %s * ptr = data;"
741         NL "    %s%s result_generic = vload%s(2, ptr);"
742         NL "    %s%s result_reference = vload%s(2, data);"
743         NL
744         NL "    results[tid] = all(result_generic == result_reference);"
745         NL "}"
746         NL;
747 
748     const std::string vector_sizes[] = { "2", "3", "4", "8", "16" };
749     const std::string gentype_base[] = { "double", "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
750     const std::string kernel_variants[] = { KERNEL_FUNCTION_TEMPLATE_GLOBAL, KERNEL_FUNCTION_TEMPLATE_LOCAL, KERNEL_FUNCTION_TEMPLATE_PRIVATE };
751 
752     const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
753 
754     for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
755         const char *pragma_str = "";
756 
757         if (i == 0) {
758             if (!is_extension_available(deviceID, "cl_khr_fp64"))
759                 continue;
760             else
761                 pragma_str = cl_khr_fp64_pragma.c_str();
762         }
763 
764         for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
765             for (size_t k = 0; k < sizeof(kernel_variants) / sizeof(kernel_variants[0]); k++) {
766                 char temp_kernel[4098];
767                 sprintf(temp_kernel, kernel_variants[k].c_str(),
768                     pragma_str,
769                     gentype_base[i].c_str(),
770                     get_default_data_for_type(gentype_base[i]).c_str(),
771                     gentype_base[i].c_str(),
772                     gentype_base[i].c_str(),
773                     gentype_base[i].c_str(), vector_sizes[j].c_str(), vector_sizes[j].c_str(),
774                     gentype_base[i].c_str(), vector_sizes[j].c_str(), vector_sizes[j].c_str()
775                 );
776 
777                 KERNEL_FUNCTIONS.push_back(temp_kernel);
778             }
779         }
780     }
781 }
782 
create_vstore_kernels(std::vector<std::string> & KERNEL_FUNCTIONS,cl_device_id deviceID)783 void create_vstore_kernels(std::vector<std::string>& KERNEL_FUNCTIONS, cl_device_id deviceID) {
784     const std::string KERNEL_FUNCTION_TEMPLATE_GLOBAL =
785         NL
786         NL "%s"
787         NL "__global %s data_generic[] = { %s };"
788         NL "__global %s data_reference[] = { %s };"
789         NL
790         NL "__kernel void testKernel(__global uint *results) {"
791         NL "    uint tid = get_global_id(0);"
792         NL
793         NL "    %s%s input = (%s%s)(1);"
794         NL "    %s * ptr = data_generic;"
795         NL
796         NL "    vstore%s(input, 2, ptr);"
797         NL "    vstore%s(input, 2, data_reference);"
798         NL
799         NL "    bool result = true;"
800         NL "    for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
801         NL "        if (data_generic[i] != data_reference[i])"
802         NL "            result = false;"
803         NL
804         NL "    results[tid] = result;"
805         NL "}"
806         NL;
807 
808     const std::string KERNEL_FUNCTION_TEMPLATE_LOCAL =
809         NL
810         NL "%s"
811         NL "__constant %s to_copy_from[] = { %s };"
812         NL
813         NL "__kernel void testKernel(__global uint *results) {"
814         NL "    uint tid = get_global_id(0);"
815         NL
816         NL "    __local %s data_generic[100];"
817         NL "    for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
818         NL "        data_generic[i] = to_copy_from[i];"
819         NL
820         NL "    __local %s data_reference[100];"
821         NL "    for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
822         NL "        data_reference[i] = to_copy_from[i];"
823         NL
824         NL "    %s%s input = (%s%s)(1);"
825         NL "    %s * ptr = data_generic;"
826         NL
827         NL "    vstore%s(input, 2, ptr);"
828         NL "    vstore%s(input, 2, data_reference);"
829         NL
830         NL "    work_group_barrier(CLK_LOCAL_MEM_FENCE);"
831         NL
832         NL "    bool result = true;"
833         NL "    for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
834         NL "        if (data_generic[i] != data_reference[i])"
835         NL "            result = false;"
836         NL
837         NL "    results[tid] = result;"
838         NL "}"
839         NL;
840 
841     const std::string KERNEL_FUNCTION_TEMPLATE_PRIVATE =
842         NL
843         NL "%s"
844         NL "__kernel void testKernel(__global uint *results) {"
845         NL "    uint tid = get_global_id(0);"
846         NL
847         NL "    __private %s data_generic[] = { %s };"
848         NL "    __private %s data_reference[] = { %s };"
849         NL
850         NL "    %s%s input = (%s%s)(1);"
851         NL "    %s * ptr = data_generic;"
852         NL
853         NL "    vstore%s(input, 2, ptr);"
854         NL "    vstore%s(input, 2, data_reference);"
855         NL
856         NL "    bool result = true;"
857         NL "    for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
858         NL "        if (data_generic[i] != data_reference[i])"
859         NL "            result = false;"
860         NL
861         NL "    results[tid] = result;"
862         NL "}"
863         NL;
864 
865     const std::string vector_sizes[] = { "2", "3", "4", "8", "16" };
866     const std::string gentype_base[] = { "double", "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
867     const std::string kernel_variants[] = { KERNEL_FUNCTION_TEMPLATE_GLOBAL, KERNEL_FUNCTION_TEMPLATE_LOCAL, KERNEL_FUNCTION_TEMPLATE_PRIVATE };
868 
869     const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
870 
871     for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
872         const char *pragma_str = "";
873         if (i == 0) {
874             if (!is_extension_available(deviceID, "cl_khr_fp64"))
875                 continue;
876             else
877                 pragma_str = cl_khr_fp64_pragma.c_str();
878         }
879 
880 
881         for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
882             for (size_t k = 0; k < sizeof(kernel_variants) / sizeof(kernel_variants[0]); k++) {
883                 char temp_kernel[4098];
884 
885                 switch (k) {
886                     case 0: // global template
887                     case 2: // private template
888                         sprintf(temp_kernel, kernel_variants[k].c_str(),
889                             pragma_str,
890                             gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
891                             gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
892                             gentype_base[i].c_str(), vector_sizes[j].c_str(), gentype_base[i].c_str(), vector_sizes[j].c_str(),
893                             gentype_base[i].c_str(),
894                             vector_sizes[j].c_str(),
895                             vector_sizes[j].c_str()
896                         );
897                         break;
898 
899                     case 1: // local template
900                         sprintf(temp_kernel, kernel_variants[k].c_str(),
901                             pragma_str,
902                             gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
903                             gentype_base[i].c_str(),
904                             gentype_base[i].c_str(),
905                             gentype_base[i].c_str(), vector_sizes[j].c_str(), gentype_base[i].c_str(), vector_sizes[j].c_str(),
906                             gentype_base[i].c_str(),
907                             vector_sizes[j].c_str(),
908                             vector_sizes[j].c_str()
909                         );
910                         break;
911                 }
912 
913                 KERNEL_FUNCTIONS.push_back(temp_kernel);
914             }
915         }
916     }
917 }
918 
test_builtin_functions(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)919 int test_builtin_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
920     std::vector<std::string> KERNEL_FUNCTIONS;
921 
922     create_math_kernels(KERNEL_FUNCTIONS);
923     create_vload_kernels(KERNEL_FUNCTIONS, deviceID);
924     create_vstore_kernels(KERNEL_FUNCTIONS, deviceID);
925 
926     CAdvancedTest test(KERNEL_FUNCTIONS);
927 
928     return test.Execute(deviceID, context, queue, num_elements);
929 }
930 
test_generic_advanced_casting(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)931 int test_generic_advanced_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
932     std::vector<std::string> KERNEL_FUNCTIONS;
933 
934     KERNEL_FUNCTIONS.push_back(
935         NL
936         NL "__global char arr[16] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 };"
937         NL
938         NL "__kernel void testKernel(__global uint *results) {"
939         NL "    uint tid = get_global_id(0);"
940         NL
941         NL "    const int * volatile ptr = (const int *)arr;"
942         NL
943         NL "    results[tid] = (ptr[0] == 0x00000000) && (ptr[1] == 0x01010101) && (ptr[2] == 0x02020202) && (ptr[3] == 0x03030303);"
944         NL "}"
945         NL
946     );
947 
948     KERNEL_FUNCTIONS.push_back(
949         NL
950         NL "__kernel void testKernel(__global uint *results) {"
951         NL "    uint tid = get_global_id(0);"
952         NL
953         NL "    __local int i;"
954         NL "    i = 0x11112222;"
955         NL "    short *ptr = (short *)&i;"
956         NL "    local int *lptr = (local int *)ptr;"
957         NL
958         NL "    results[tid] = (lptr == &i) && (*lptr == i);"
959         NL "}"
960         NL
961     );
962 
963     KERNEL_FUNCTIONS.push_back(
964         NL
965         NL "__kernel void testKernel(__global uint *results) {"
966         NL "    uint tid = get_global_id(0);"
967         NL
968         NL "    int i = 0x11112222;"
969         NL
970         NL "    void *ptr = &i;"
971         NL "    int copy = *((int *)ptr);"
972         NL
973         NL "    results[tid] = (copy == i);"
974         NL "}"
975         NL
976     );
977 
978     CAdvancedTest test(KERNEL_FUNCTIONS);
979 
980     return test.Execute(deviceID, context, queue, num_elements);
981 }
982 
test_generic_ptr_to_host_mem_svm(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)983 int test_generic_ptr_to_host_mem_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
984     cl_int result = CL_SUCCESS;
985 
986     /* Test SVM capabilities and select matching tests */
987     cl_device_svm_capabilities caps;
988     auto version = get_device_cl_version(deviceID);
989     auto expected_min_version = Version(2, 0);
990 
991     cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(caps), &caps, NULL);
992     test_error(error, "clGetDeviceInfo(CL_DEVICE_SVM_CAPABILITIES) failed");
993 
994     if ((version < expected_min_version)
995         || (version >= Version(3, 0) && caps == 0))
996         return TEST_SKIPPED_ITSELF;
997 
998     if (caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) {
999         CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_COARSE_GRAINED_SVM);
1000         result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
1001     }
1002 
1003     if (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
1004         CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_FINE_GRAINED_BUFFER_SVM);
1005         result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
1006     }
1007 
1008     if (caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) {
1009         CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_FINE_GRAINED_SYSTEM_SVM);
1010         result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
1011     }
1012 
1013     if (caps & CL_DEVICE_SVM_ATOMICS) {
1014         CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_ATOMICS_SVM);
1015         result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
1016     }
1017 
1018     return result;
1019 }
1020 
test_generic_ptr_to_host_mem(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1021 int test_generic_ptr_to_host_mem(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1022     cl_int result = CL_SUCCESS;
1023 
1024     CAdvancedTest test_global_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_HOST_PTR);
1025     result |= test_global_ptr.Execute(deviceID, context, queue, num_elements);
1026 
1027     CAdvancedTest test_local_ptr(common::LOCAL_KERNEL_FUNCTION, ARG_TYPE_HOST_LOCAL);
1028     result |= test_local_ptr.Execute(deviceID, context, queue, num_elements / 64);
1029 
1030     return result;
1031 }
1032