xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/basic/test_kernel_memory_alignment.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef _WIN32
17 #include <unistd.h>
18 #endif
19 
20 #include "procs.h"
21 #include "harness/conversions.h"
22 #include "harness/typeWrappers.h"
23 #include "harness/errorHelpers.h"
24 
25 // For global, local, and constant
26 const char *parameter_kernel_long =
27 "%s\n" // optional pragma
28 "kernel void test(global ulong *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
29 "{\n"
30 "   results[0] = (ulong)&mem0[0];\n"
31 "   results[1] = (ulong)&mem2[0];\n"
32 "   results[2] = (ulong)&mem3[0];\n"
33 "   results[3] = (ulong)&mem4[0];\n"
34 "   results[4] = (ulong)&mem8[0];\n"
35 "   results[5] = (ulong)&mem16[0];\n"
36 "}\n";
37 
38 // For private and local
39 const char *local_kernel_long =
40 "%s\n" // optional pragma
41 "kernel void test(global ulong *results)\n"
42 "{\n"
43 "   %s %s mem0[3];\n"
44 "   %s %s2 mem2[3];\n"
45 "   %s %s3 mem3[3];\n"
46 "   %s %s4 mem4[3];\n"
47 "   %s %s8 mem8[3];\n"
48 "   %s %s16 mem16[3];\n"
49 "   results[0] = (ulong)&mem0[0];\n"
50 "   results[1] = (ulong)&mem2[0];\n"
51 "   results[2] = (ulong)&mem3[0];\n"
52 "   results[3] = (ulong)&mem4[0];\n"
53 "   results[4] = (ulong)&mem8[0];\n"
54 "   results[5] = (ulong)&mem16[0];\n"
55 "}\n";
56 
57 // For constant
58 const char *constant_kernel_long =
59 "%s\n" // optional pragma
60 "  constant %s mem0[3]    = {0};\n"
61 "  constant %s2 mem2[3]   = {(%s2)(0)};\n"
62 "  constant %s3 mem3[3]   = {(%s3)(0)};\n"
63 "  constant %s4 mem4[3]   = {(%s4)(0)};\n"
64 "  constant %s8 mem8[3]   = {(%s8)(0)};\n"
65 "  constant %s16 mem16[3] = {(%s16)(0)};\n"
66 "\n"
67 "kernel void test(global ulong *results)\n"
68 "{\n"
69 "   results[0] = (ulong)&mem0;\n"
70 "   results[1] = (ulong)&mem2;\n"
71 "   results[2] = (ulong)&mem3;\n"
72 "   results[3] = (ulong)&mem4;\n"
73 "   results[4] = (ulong)&mem8;\n"
74 "   results[5] = (ulong)&mem16;\n"
75 "}\n";
76 
77 
78 // For global, local, and constant
79 const char *parameter_kernel_no_long =
80 "%s\n" // optional pragma
81 "kernel void test(global uint *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
82 "{\n"
83 "   results[0] = (uint)&mem0[0];\n"
84 "   results[1] = (uint)&mem2[0];\n"
85 "   results[2] = (uint)&mem3[0];\n"
86 "   results[3] = (uint)&mem4[0];\n"
87 "   results[4] = (uint)&mem8[0];\n"
88 "   results[5] = (uint)&mem16[0];\n"
89 "}\n";
90 
91 // For private and local
92 const char *local_kernel_no_long =
93 "%s\n" // optional pragma
94 "kernel void test(global uint *results)\n"
95 "{\n"
96 "   %s %s mem0[3];\n"
97 "   %s %s2 mem2[3];\n"
98 "   %s %s3 mem3[3];\n"
99 "   %s %s4 mem4[3];\n"
100 "   %s %s8 mem8[3];\n"
101 "   %s %s16 mem16[3];\n"
102 "   results[0] = (uint)&mem0[0];\n"
103 "   results[1] = (uint)&mem2[0];\n"
104 "   results[2] = (uint)&mem3[0];\n"
105 "   results[3] = (uint)&mem4[0];\n"
106 "   results[4] = (uint)&mem8[0];\n"
107 "   results[5] = (uint)&mem16[0];\n"
108 "}\n";
109 
110 // For constant
111 const char *constant_kernel_no_long =
112 "%s\n" // optional pragma
113 "  constant %s mem0[3]    = {0};\n"
114 "  constant %s2 mem2[3]   = {(%s2)(0)};\n"
115 "  constant %s3 mem3[3]   = {(%s3)(0)};\n"
116 "  constant %s4 mem4[3]   = {(%s4)(0)};\n"
117 "  constant %s8 mem8[3]   = {(%s8)(0)};\n"
118 "  constant %s16 mem16[3] = {(%s16)(0)};\n"
119 "\n"
120 "kernel void test(global uint *results)\n"
121 "{\n"
122 "   results[0] = (uint)&mem0;\n"
123 "   results[1] = (uint)&mem2;\n"
124 "   results[2] = (uint)&mem3;\n"
125 "   results[3] = (uint)&mem4;\n"
126 "   results[4] = (uint)&mem8;\n"
127 "   results[5] = (uint)&mem16;\n"
128 "}\n";
129 
130 enum AddressSpaces
131 {
132     kGlobal        = 0,
133     kLocal,
134     kConstant,
135     kPrivate
136 };
137 
138 typedef enum AddressSpaces    AddressSpaces;
139 
140 #define DEBUG 0
141 
get_explicit_address_name(AddressSpaces address)142 const char * get_explicit_address_name( AddressSpaces address )
143 {
144     /* Quick method to avoid branching: make sure the following array matches the Enum order */
145     static const char *sExplicitAddressNames[] = { "global", "local", "constant", "private"};
146 
147     return sExplicitAddressNames[ address ];
148 }
149 
150 
test_kernel_memory_alignment(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems,AddressSpaces address)151 int test_kernel_memory_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, AddressSpaces address )
152 {
153     const char *constant_kernel;
154     const char *parameter_kernel;
155     const char *local_kernel;
156 
157     if ( gHasLong )
158     {
159         constant_kernel  = constant_kernel_long;
160         parameter_kernel = parameter_kernel_long;
161         local_kernel     = local_kernel_long;
162     }
163     else
164     {
165         constant_kernel  = constant_kernel_no_long;
166         parameter_kernel = parameter_kernel_no_long;
167         local_kernel     = local_kernel_no_long;
168     }
169 
170     ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
171     char *kernel_code = (char*)malloc(4096);
172     cl_kernel kernel;
173     cl_program program;
174     int error;
175     int total_errors = 0;
176     cl_mem results;
177     cl_ulong *results_data;
178     cl_mem mem0, mem2, mem3, mem4, mem8, mem16;
179 
180     results_data = (cl_ulong*)malloc(sizeof(cl_ulong)*6);
181     results = clCreateBuffer(context, 0, sizeof(cl_ulong)*6, NULL, &error);
182     test_error(error, "clCreateBuffer failed");
183 
184     mem0 = clCreateBuffer(context, 0, sizeof(cl_long), NULL, &error);
185     test_error(error, "clCreateBuffer failed");
186     mem2 = clCreateBuffer(context, 0, sizeof(cl_long)*2, NULL, &error);
187     test_error(error, "clCreateBuffer failed");
188     mem3 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
189     test_error(error, "clCreateBuffer failed");
190     mem4 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
191     test_error(error, "clCreateBuffer failed");
192     mem8 = clCreateBuffer(context, 0, sizeof(cl_long)*8, NULL, &error);
193     test_error(error, "clCreateBuffer failed");
194     mem16 = clCreateBuffer(context, 0, sizeof(cl_long)*16, NULL, &error);
195     test_error(error, "clCreateBuffer failed");
196 
197 
198     // For each type
199 
200     // Calculate alignment mask for each size
201 
202     // For global, local, constant, private
203 
204     // If global, local or constant -- do parameter_kernel
205     // If private or local -- do local_kernel
206     // If constant -- do constant kernel
207 
208     int numConstantArgs;
209     clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(numConstantArgs), &numConstantArgs, NULL);
210 
211     int typeIndex;
212     for (typeIndex = 0; typeIndex < 10; typeIndex++) {
213         // Skip double tests if we don't support doubles
214         if (vecType[typeIndex] == kDouble && !is_extension_available(device, "cl_khr_fp64")) {
215             log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
216             continue;
217         }
218 
219         if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
220             continue;
221 
222         log_info("Testing %s...\n", get_explicit_type_name(vecType[typeIndex]));
223 
224         // Determine the expected alignment masks.
225         // E.g., if it is supposed to be 4 byte aligned, we should get 4-1=3 = ... 000011
226         // We can then and the returned address with that and we should have 0.
227         cl_ulong alignments[6];
228         alignments[0] = get_explicit_type_size(vecType[typeIndex])-1;
229         alignments[1] = (get_explicit_type_size(vecType[typeIndex])<<1)-1;
230         alignments[2] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
231         alignments[3] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
232         alignments[4] = (get_explicit_type_size(vecType[typeIndex])<<3)-1;
233         alignments[5] = (get_explicit_type_size(vecType[typeIndex])<<4)-1;
234 
235         // Parameter kernel
236         if (address == kGlobal || address == kLocal || address == kConstant) {
237             log_info("\tTesting parameter kernel...\n");
238 
239             if ( (gIsEmbedded) && (address == kConstant) && (numConstantArgs < 6)) {
240                 sprintf(kernel_code, parameter_kernel,
241                     vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
242                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
243                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
244                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
245                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
246                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
247                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
248                 );
249             }
250             else {
251                 sprintf(kernel_code, parameter_kernel,
252                     vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
253                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
254                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
255                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
256                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
257                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
258                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
259                 );
260             }
261             //printf("Kernel is: \n%s\n", kernel_code);
262 
263             // Create the kernel
264             error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
265             test_error(error, "create_single_kernel_helper failed");
266 
267             // Initialize the results
268             memset(results_data, 0, sizeof(cl_long)*5);
269             error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*6, results_data, 0, NULL, NULL);
270             test_error(error, "clEnqueueWriteBuffer failed");
271 
272             // Set the arguments
273             error = clSetKernelArg(kernel, 0, sizeof(results), &results);
274             test_error(error, "clSetKernelArg failed");
275             if (address != kLocal) {
276                 error = clSetKernelArg(kernel, 1, sizeof(mem0), &mem0);
277                 test_error(error, "clSetKernelArg failed");
278                 error = clSetKernelArg(kernel, 2, sizeof(mem2), &mem2);
279                 test_error(error, "clSetKernelArg failed");
280                 error = clSetKernelArg(kernel, 3, sizeof(mem3), &mem3);
281                 test_error(error, "clSetKernelArg failed");
282                 error = clSetKernelArg(kernel, 4, sizeof(mem4), &mem4);
283                 test_error(error, "clSetKernelArg failed");
284                 error = clSetKernelArg(kernel, 5, sizeof(mem8), &mem8);
285                 test_error(error, "clSetKernelArg failed");
286                 error = clSetKernelArg(kernel, 6, sizeof(mem16), &mem16);
287                 test_error(error, "clSetKernelArg failed");
288             } else {
289                 error = clSetKernelArg(kernel, 1, get_explicit_type_size(vecType[typeIndex]), NULL);
290                 test_error(error, "clSetKernelArg failed");
291                 error = clSetKernelArg(kernel, 2, get_explicit_type_size(vecType[typeIndex])*2, NULL);
292                 test_error(error, "clSetKernelArg failed");
293                 error = clSetKernelArg(kernel, 3, get_explicit_type_size(vecType[typeIndex])*4, NULL);
294                 test_error(error, "clSetKernelArg failed");
295                 error = clSetKernelArg(kernel, 4, get_explicit_type_size(vecType[typeIndex])*4, NULL);
296                 test_error(error, "clSetKernelArg failed");
297                 error = clSetKernelArg(kernel, 5, get_explicit_type_size(vecType[typeIndex])*8, NULL);
298                 test_error(error, "clSetKernelArg failed");
299                 error = clSetKernelArg(kernel, 6, get_explicit_type_size(vecType[typeIndex])*16, NULL);
300                 test_error(error, "clSetKernelArg failed");
301             }
302 
303             // Enqueue the kernel
304             size_t global_size = 1;
305             error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
306             test_error(error, "clEnqueueNDRangeKernel failed");
307 
308             // Read back the results
309             error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*6, results_data, 0, NULL, NULL);
310             test_error(error, "clEnqueueReadBuffer failed");
311 
312             // Verify the results
313             if (gHasLong) {
314                 for (int i = 0; i < 6; i++) {
315                     if ((results_data[i] & alignments[i]) != 0) {
316                         total_errors++;
317                         log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
318                     } else {
319                         if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
320                     }
321                 }
322             }
323             // Verify the results on devices that do not support longs
324             else {
325                 cl_uint *results_data_no_long = (cl_uint *)results_data;
326 
327                 for (int i = 0; i < 6; i++) {
328                     if ((results_data_no_long[i] & alignments[i]) != 0) {
329                         total_errors++;
330                         log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
331                     } else {
332                         if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
333                     }
334                 }
335             }
336 
337             clReleaseKernel(kernel);
338             clReleaseProgram(program);
339         }
340 
341 
342 
343 
344         // Local kernel
345         if (address == kLocal || address == kPrivate) {
346             log_info("\tTesting local kernel...\n");
347             sprintf(kernel_code, local_kernel,
348                     vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
349                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
350                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
351                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
352                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
353                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
354                     get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
355                     );
356             //printf("Kernel is: \n%s\n", kernel_code);
357 
358             // Create the kernel
359             error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
360             test_error(error, "create_single_kernel_helper failed");
361 
362             // Initialize the results
363             memset(results_data, 0, sizeof(cl_long)*5);
364             error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
365             test_error(error, "clEnqueueWriteBuffer failed");
366 
367             // Set the arguments
368             error = clSetKernelArg(kernel, 0, sizeof(results), &results);
369             test_error(error, "clSetKernelArg failed");
370 
371             // Enqueue the kernel
372             size_t global_size = 1;
373             error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
374             test_error(error, "clEnqueueNDRangeKernel failed");
375 
376             // Read back the results
377             error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
378             test_error(error, "clEnqueueReadBuffer failed");
379 
380             // Verify the results
381             if (gHasLong) {
382                 for (int i = 0; i < 5; i++) {
383                     if ((results_data[i] & alignments[i]) != 0) {
384                         total_errors++;
385                         log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
386                     } else {
387                         if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
388                     }
389                 }
390             }
391             // Verify the results on devices that do not support longs
392             else {
393                 cl_uint *results_data_no_long = (cl_uint *)results_data;
394 
395                 for (int i = 0; i < 5; i++) {
396                     if ((results_data_no_long[i] & alignments[i]) != 0) {
397                         total_errors++;
398                         log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
399                     } else {
400                         if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
401                     }
402                 }
403             }
404             clReleaseKernel(kernel);
405             clReleaseProgram(program);
406         }
407 
408 
409 
410         // Constant kernel
411         if (address == kConstant) {
412             log_info("\tTesting constant kernel...\n");
413             sprintf(kernel_code, constant_kernel,
414                     vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
415                     get_explicit_type_name(vecType[typeIndex]),
416                     get_explicit_type_name(vecType[typeIndex]),
417                     get_explicit_type_name(vecType[typeIndex]),
418                     get_explicit_type_name(vecType[typeIndex]),
419                     get_explicit_type_name(vecType[typeIndex]),
420                     get_explicit_type_name(vecType[typeIndex]),
421                     get_explicit_type_name(vecType[typeIndex]),
422                     get_explicit_type_name(vecType[typeIndex]),
423                     get_explicit_type_name(vecType[typeIndex]),
424                     get_explicit_type_name(vecType[typeIndex]),
425                     get_explicit_type_name(vecType[typeIndex]),
426                     get_explicit_type_name(vecType[typeIndex])
427                     );
428             //printf("Kernel is: \n%s\n", kernel_code);
429 
430             // Create the kernel
431             error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
432             test_error(error, "create_single_kernel_helper failed");
433 
434             // Initialize the results
435             memset(results_data, 0, sizeof(cl_long)*5);
436             error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
437             test_error(error, "clEnqueueWriteBuffer failed");
438 
439             // Set the arguments
440             error = clSetKernelArg(kernel, 0, sizeof(results), &results);
441             test_error(error, "clSetKernelArg failed");
442 
443             // Enqueue the kernel
444             size_t global_size = 1;
445             error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
446             test_error(error, "clEnqueueNDRangeKernel failed");
447 
448             // Read back the results
449             error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
450             test_error(error, "clEnqueueReadBuffer failed");
451 
452             // Verify the results
453             if (gHasLong) {
454                 for (int i = 0; i < 5; i++) {
455                     if ((results_data[i] & alignments[i]) != 0) {
456                         total_errors++;
457                         log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
458                     } else {
459                         if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
460                     }
461                 }
462             }
463             // Verify the results on devices that do not support longs
464             else {
465                 cl_uint *results_data_no_long = (cl_uint *)results_data;
466 
467                 for (int i = 0; i < 5; i++) {
468                     if ((results_data_no_long[i] & alignments[i]) != 0) {
469                         total_errors++;
470                         log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
471                     } else {
472                         if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
473                     }
474                 }
475             }
476             clReleaseKernel(kernel);
477             clReleaseProgram(program);
478         }
479     }
480 
481     clReleaseMemObject(results);
482     clReleaseMemObject(mem0);
483     clReleaseMemObject(mem2);
484     clReleaseMemObject(mem3);
485     clReleaseMemObject(mem4);
486     clReleaseMemObject(mem8);
487     clReleaseMemObject(mem16);
488     free( kernel_code );
489     free( results_data );
490 
491     if (total_errors != 0)
492         return -1;
493     return 0;
494 
495 }
496 
497 
test_kernel_memory_alignment_local(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)498 int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
499 {
500     return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kLocal );
501 }
502 
test_kernel_memory_alignment_global(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)503 int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
504 {
505     return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kGlobal );
506 }
507 
test_kernel_memory_alignment_constant(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)508 int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
509 {
510     // There is a class of approved OpenCL 1.0 conformant devices out there that in some circumstances
511     // are unable to meaningfully take (or more precisely use) the address of constant data by virtue
512     // of limitations in their ISA design. This feature was not tested in 1.0, so they were declared
513     // conformant by Khronos. The failure is however caught here.
514     //
515     // Unfortunately, determining whether or not these devices are 1.0 conformant is not the jurisdiction
516     // of the 1.1 tests -- We can't fail them from 1.1 conformance here because they are not 1.1
517     // devices. They are merely 1.0 conformant devices that interop with 1.1 devices in a 1.1 platform.
518     // To add new binding tests now to conformant 1.0 devices would violate the workingroup requirement
519     // of no new tests for 1.0 devices.  So certain allowances have to be made in intractable cases
520     // such as this one.
521     //
522     // There is some precedent. Similar allowances are made for other 1.0 hardware features such as
523     // local memory size.  The minimum required local memory size grew from 16 kB to 32 kB in OpenCL 1.1.
524 
525     // Detect 1.0 devices
526     // Get CL_DEVICE_VERSION size
527     size_t string_size = 0;
528     int err;
529     if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, 0, NULL, &string_size ) ) )
530     {
531         log_error( "FAILURE: Unable to get size of CL_DEVICE_VERSION string!" );
532         return -1;
533     }
534 
535     //Allocate storage to hold the version string
536     char *version_string = (char*) malloc(string_size);
537     if( NULL == version_string )
538     {
539         log_error( "FAILURE: Unable to allocate memory to hold CL_DEVICE_VERSION string!" );
540         return -1;
541     }
542 
543     // Get CL_DEVICE_VERSION string
544     if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, string_size, version_string, NULL ) ) )
545     {
546         log_error( "FAILURE: Unable to read CL_DEVICE_VERSION string!" );
547         return -1;
548     }
549 
550     // easy out for 1.0 devices
551     const char *string_1_0 = "OpenCL 1.0 ";
552     if( 0 == strncmp( version_string, string_1_0, strlen(string_1_0)) )
553     {
554         log_info( "WARNING: Allowing device to escape testing of difficult constant memory alignment case.\n\tDevice is not a OpenCL 1.1 device. CL_DEVICE_VERSION: \"%s\"\n", version_string );
555         free(version_string);
556         return 0;
557     }
558     log_info( "Device version string: \"%s\"\n", version_string );
559     free(version_string);
560 
561     // Everyone else is to be ground mercilessly under the wheels of progress
562     return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kConstant );
563 }
564 
test_kernel_memory_alignment_private(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)565 int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
566 {
567     return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kPrivate );
568 }
569 
570 
571