xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/basic/test_vector_creation.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2023 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "harness/conversions.h"
18 #include "harness/typeWrappers.h"
19 #include "harness/errorHelpers.h"
20 #include <vector>
21 
22 #include <CL/cl_half.h>
23 
24 #define DEBUG 0
25 #define DEPTH 16
26 // Limit the maximum code size for any given kernel.
27 #define MAX_CODE_SIZE (1024 * 32)
28 
29 static const int sizes[] = { 1, 2, 3, 4, 8, 16, -1, -1, -1, -1 };
30 static const int initial_no_sizes[] = { 0, 0, 0, 0, 0, 0, 2 };
31 static const char *size_names[] = { "",   "2",   "3",   "4",   "8",
32                                     "16", "!!a", "!!b", "!!c", "!!d" };
33 static char extension[128] = { 0 };
34 
35 // Creates a kernel by enumerating all possible ways of building the vector out
36 // of vloads skip_to_results will skip results up to a given number. If the
37 // amount of code generated is greater than MAX_CODE_SIZE, this function will
38 // return the number of results used, which can then be used as the
39 // skip_to_result value to continue where it left off.
create_kernel(ExplicitType type,int output_size,char * program,int * number_of_results,int skip_to_result)40 int create_kernel(ExplicitType type, int output_size, char *program,
41                   int *number_of_results, int skip_to_result)
42 {
43 
44     int number_of_sizes;
45 
46     switch (output_size)
47     {
48         case 1: number_of_sizes = 1; break;
49         case 2: number_of_sizes = 2; break;
50         case 3: number_of_sizes = 3; break;
51         case 4: number_of_sizes = 4; break;
52         case 8: number_of_sizes = 5; break;
53         case 16: number_of_sizes = 6; break;
54         default: log_error("Invalid size: %d\n", output_size); return -1;
55     }
56 
57     int total_results = 0;
58     int current_result = 0;
59     int total_vloads = 0;
60     int total_program_length = 0;
61     int aborted_due_to_size = 0;
62 
63     if (skip_to_result < 0) skip_to_result = 0;
64 
65     // The line of code for the vector creation
66     char line[1024];
67     // Keep track of what size vector we are using in each position so we can
68     // iterate through all fo them
69     int pos[DEPTH];
70     int max_size = output_size;
71     if (DEBUG > 1) log_info("max_size: %d\n", max_size);
72 
73     program[0] = '\0';
74     sprintf(program,
75             "%s\n__kernel void test_vector_creation(__global %s *src, __global "
76             "%s%s *result) {\n",
77             extension, get_explicit_type_name(type),
78             get_explicit_type_name(type),
79             (number_of_sizes == 3) ? "" : size_names[number_of_sizes - 1]);
80     total_program_length += (int)strlen(program);
81 
82     char storePrefix[128], storeSuffix[128];
83 
84     // Start out trying sizes 1,1,1... by initializing pos array to zeros for
85     // all vector sizes except 16. For 16-sizes initial_no_sizes array holds
86     // factor to omit time consuming, similar creation cases tested earlier.
87     for (int i = 0; i < DEPTH; i++) pos[i] = initial_no_sizes[number_of_sizes];
88 
89     int done = 0;
90     while (!done)
91     {
92         if (DEBUG > 1)
93         {
94             log_info("pos size[] = [");
95             for (int k = 0; k < DEPTH; k++) log_info(" %d ", pos[k]);
96             log_info("]\n");
97         }
98 
99         // Go through the selected vector sizes and see if the first n of them
100         // fit the
101         //  required size exactly.
102         int size_so_far = 0;
103         int vloads;
104         for (vloads = 0; vloads < DEPTH; vloads++)
105         {
106             if (size_so_far + sizes[pos[vloads]] <= max_size)
107             {
108                 size_so_far += sizes[pos[vloads]];
109             }
110             else
111             {
112                 break;
113             }
114         }
115         if (DEBUG > 1)
116             log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far);
117 
118         // If they did not fit the required size exactly it is too long, so
119         // there is no point in checking any other combinations
120         //  of the sizes to the right. Prune them from the search.
121         if (size_so_far != max_size)
122         {
123             // Zero all the sizes to the right
124             for (int k = vloads + 1; k < DEPTH; k++)
125             {
126                 pos[k] = 0;
127             }
128             // Increment this current size and propagate the values up if needed
129             for (int d = vloads; d >= 0; d--)
130             {
131                 pos[d]++;
132                 if (pos[d] >= number_of_sizes)
133                 {
134                     pos[d] = 0;
135                     if (d == 0)
136                     {
137                         // If we rolled over then we are done
138                         done = 1;
139                         break;
140                     }
141                 }
142                 else
143                 {
144                     break;
145                 }
146             }
147             // Go on to the next size since this one (and all others "under" it)
148             // didn't fit
149             continue;
150         }
151 
152 
153         // Generate the actual load line if we are building this part
154         line[0] = '\0';
155         if (skip_to_result == 0 || total_results >= skip_to_result)
156         {
157             if (number_of_sizes == 3)
158             {
159                 sprintf(storePrefix, "vstore3( ");
160                 sprintf(storeSuffix, ", %d, result )", current_result);
161             }
162             else
163             {
164                 sprintf(storePrefix, "result[%d] = ", current_result);
165                 storeSuffix[0] = 0;
166             }
167 
168             sprintf(line, "\t%s(%s%d)(", storePrefix,
169                     get_explicit_type_name(type), output_size);
170             current_result++;
171 
172             int offset = 0;
173             for (int i = 0; i < vloads; i++)
174             {
175                 if (pos[i] == 0)
176                     sprintf(line + strlen(line), "src[%d]", offset);
177                 else
178                     sprintf(line + strlen(line), "vload%s(0,src+%d)",
179                             size_names[pos[i]], offset);
180                 offset += sizes[pos[i]];
181                 if (i < (vloads - 1)) sprintf(line + strlen(line), ",");
182             }
183             sprintf(line + strlen(line), ")%s;\n", storeSuffix);
184 
185             strcat(program, line);
186             total_vloads += vloads;
187         }
188         total_results++;
189         total_program_length += (int)strlen(line);
190         if (total_program_length > MAX_CODE_SIZE)
191         {
192             aborted_due_to_size = 1;
193             done = 1;
194         }
195 
196 
197         if (DEBUG) log_info("line is: %s", line);
198 
199         // If we did not use all of them, then we ignore any changes further to
200         // the right. We do this by causing those loops to skip on the next
201         // iteration.
202         if (vloads < DEPTH)
203         {
204             if (DEBUG > 1) log_info("done with this depth\n");
205             for (int k = vloads; k < DEPTH; k++) pos[k] = number_of_sizes;
206         }
207 
208         // Increment the far right size by 1, rolling over as needed
209         for (int d = DEPTH - 1; d >= 0; d--)
210         {
211             pos[d]++;
212             if (pos[d] >= number_of_sizes)
213             {
214                 pos[d] = 0;
215                 if (d == 0)
216                 {
217                     // If we rolled over at the far-left then we are done
218                     done = 1;
219                     break;
220                 }
221             }
222             else
223             {
224                 break;
225             }
226         }
227         if (done) break;
228 
229         // Continue until we are done.
230     }
231     strcat(program, "}\n\n"); // log_info("%s\n", program);
232     total_program_length += 3;
233     if (DEBUG)
234         log_info(
235             "\t\t(Program for vector type %s%s contains %d vector creations, "
236             "of total program length %gkB, with a total of %d vloads.)\n",
237             get_explicit_type_name(type), size_names[number_of_sizes - 1],
238             total_results, total_program_length / 1024.0, total_vloads);
239     *number_of_results = current_result;
240     if (aborted_due_to_size) return total_results;
241     return 0;
242 }
243 
244 
test_vector_creation(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)245 int test_vector_creation(cl_device_id deviceID, cl_context context,
246                          cl_command_queue queue, int num_elements)
247 {
248     const std::vector<ExplicitType> vecType = { kChar,  kUChar, kShort, kUShort,
249                                                 kInt,   kUInt,  kLong,  kULong,
250                                                 kFloat, kHalf,  kDouble };
251     // should be in sync with global array size_names
252     const std::vector<unsigned int> vecSizes = { 1, 2, 3, 4, 8, 16 };
253 
254     int error = CL_SUCCESS;
255     int total_errors = 0;
256     int number_of_results = 0;
257 
258     std::vector<char> input_data_converted(sizeof(cl_double) * 16);
259     std::vector<char> program_source(sizeof(char) * 1024 * 1024 * 4);
260     std::vector<char> output_data;
261 
262     // Iterate over all the types
263     for (size_t type_index = 0; type_index < vecType.size(); type_index++)
264     {
265 
266         if (!gHasLong
267             && ((vecType[type_index] == kLong)
268                 || (vecType[type_index] == kULong)))
269         {
270             log_info("Long/ULong data type not supported on this device\n");
271             continue;
272         }
273         else if (vecType[type_index] == kDouble)
274         {
275             if (!is_extension_available(deviceID, "cl_khr_fp64"))
276             {
277                 log_info("Extension cl_khr_fp64 not supported; skipping double "
278                          "tests.\n");
279                 continue;
280             }
281             snprintf(extension, sizeof(extension), "%s",
282                      "#pragma OPENCL EXTENSION cl_khr_fp64 : enable");
283         }
284         else if (vecType[type_index] == kHalf)
285         {
286             if (!is_extension_available(deviceID, "cl_khr_fp16"))
287             {
288                 log_info("Extension cl_khr_fp16 not supported; skipping half "
289                          "tests.\n");
290                 continue;
291             }
292             snprintf(extension, sizeof(extension), "%s",
293                      "#pragma OPENCL EXTENSION cl_khr_fp16 : enable");
294         }
295 
296         log_info("Testing %s.\n", get_explicit_type_name(vecType[type_index]));
297 
298         // Convert the data to the right format for the test.
299         memset(input_data_converted.data(), 0xff, sizeof(cl_double) * 16);
300         if (vecType[type_index] == kDouble)
301         {
302             const cl_double input_data_double[16] = { 0,  1,  2,  3, 4,  5,
303                                                       6,  7,  8,  9, 10, 11,
304                                                       12, 13, 14, 15 };
305             memcpy(input_data_converted.data(), &input_data_double,
306                    sizeof(cl_double) * 16);
307         }
308         else if (vecType[type_index] == kHalf)
309         {
310             cl_half *buf =
311                 reinterpret_cast<cl_half *>(input_data_converted.data());
312             for (int j = 0; j < 16; j++)
313                 buf[j] = cl_half_from_float(float(j), CL_HALF_RTE);
314         }
315         else
316         {
317             for (int j = 0; j < 16; j++)
318             {
319                 convert_explicit_value(
320                     &j,
321                     ((char *)input_data_converted.data())
322                         + get_explicit_type_size(vecType[type_index]) * j,
323                     kInt, 0, kRoundToEven, vecType[type_index]);
324             }
325         }
326 
327         clMemWrapper input =
328             clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
329                            get_explicit_type_size(vecType[type_index]) * 16,
330                            input_data_converted.data(), &error);
331         if (error)
332         {
333             print_error(error, "clCreateBuffer failed");
334             total_errors++;
335             continue;
336         }
337 
338         // Iterate over all the vector sizes.
339         for (size_t size_index = 1; size_index < vecSizes.size(); size_index++)
340         {
341             size_t global[] = { 1, 1, 1 };
342             int number_generated = -1;
343             int previous_number_generated = 0;
344 
345             log_info("Testing %s%s...\n",
346                      get_explicit_type_name(vecType[type_index]),
347                      size_names[size_index]);
348             while (number_generated != 0)
349             {
350                 clMemWrapper output;
351                 clKernelWrapper kernel;
352                 clProgramWrapper program;
353 
354                 number_generated =
355                     create_kernel(vecType[type_index], vecSizes[size_index],
356                                   program_source.data(), &number_of_results,
357                                   number_generated);
358                 if (number_generated != 0)
359                 {
360                     if (previous_number_generated == 0)
361                         log_info("Code size greater than %gkB; splitting test "
362                                  "into multiple kernels.\n",
363                                  MAX_CODE_SIZE / 1024.0);
364                     log_info("\tExecuting vector permutations %d to %d...\n",
365                              previous_number_generated, number_generated - 1);
366                 }
367 
368                 char *src = program_source.data();
369                 error = create_single_kernel_helper(context, &program, &kernel,
370                                                     1, (const char **)&src,
371                                                     "test_vector_creation");
372                 if (error)
373                 {
374                     log_error("create_single_kernel_helper failed.\n");
375                     total_errors++;
376                     break;
377                 }
378 
379                 output = clCreateBuffer(
380                     context, CL_MEM_WRITE_ONLY,
381                     number_of_results
382                         * get_explicit_type_size(vecType[type_index])
383                         * vecSizes[size_index],
384                     NULL, &error);
385                 if (error)
386                 {
387                     print_error(error, "clCreateBuffer failed");
388                     total_errors++;
389                     break;
390                 }
391 
392                 error = clSetKernelArg(kernel, 0, sizeof(input), &input);
393                 error |= clSetKernelArg(kernel, 1, sizeof(output), &output);
394                 if (error)
395                 {
396                     print_error(error, "clSetKernelArg failed");
397                     total_errors++;
398                     break;
399                 }
400 
401                 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global,
402                                                NULL, 0, NULL, NULL);
403                 if (error)
404                 {
405                     print_error(error, "clEnqueueNDRangeKernel failed");
406                     total_errors++;
407                     break;
408                 }
409 
410                 error = clFinish(queue);
411                 if (error)
412                 {
413                     print_error(error, "clFinish failed");
414                     total_errors++;
415                     break;
416                 }
417 
418                 output_data.resize(number_of_results
419                                    * get_explicit_type_size(vecType[type_index])
420                                    * vecSizes[size_index]);
421                 memset(output_data.data(), 0xff,
422                        number_of_results
423                            * get_explicit_type_size(vecType[type_index])
424                            * vecSizes[size_index]);
425                 error = clEnqueueReadBuffer(
426                     queue, output, CL_TRUE, 0,
427                     number_of_results
428                         * get_explicit_type_size(vecType[type_index])
429                         * vecSizes[size_index],
430                     output_data.data(), 0, NULL, NULL);
431                 if (error)
432                 {
433                     print_error(error, "clEnqueueReadBuffer failed");
434                     total_errors++;
435                     break;
436                 }
437 
438                 // Check the results
439                 char *res = (char *)output_data.data();
440                 char *exp = (char *)input_data_converted.data();
441                 for (int i = 0; i < number_of_results; i++)
442                 {
443                     // If they do not match, then print out why
444                     if (memcmp(exp,
445                                res
446                                    + i
447                                        * (get_explicit_type_size(
448                                               vecType[type_index])
449                                           * vecSizes[size_index]),
450                                get_explicit_type_size(vecType[type_index])
451                                    * vecSizes[size_index]))
452                     {
453                         log_error("Data failed to validate for result %d\n", i);
454 
455                         // Find the line in the program that failed. This is
456                         // ugly.
457                         char search[32] = { 0 };
458                         char found_line[1024] = { 0 };
459                         sprintf(search, "result[%d] = (", i);
460                         char *start_loc = strstr(program_source.data(), search);
461                         if (start_loc == NULL)
462                             log_error("Failed to find program source for "
463                                       "failure for %s in \n%s",
464                                       search, program_source.data());
465                         else
466                         {
467                             char *end_loc = strstr(start_loc, "\n");
468                             memcpy(&found_line, start_loc,
469                                    (end_loc - start_loc));
470                             found_line[end_loc - start_loc] = '\0';
471                             log_error("Failed vector line: %s\n", found_line);
472                         }
473 
474                         for (int j = 0; j < (int)vecSizes[size_index]; j++)
475                         {
476                             char expected_value[64] = { 0 };
477                             char returned_value[64] = { 0 };
478                             print_type_to_string(
479                                 vecType[type_index],
480                                 (void *)(res
481                                          + get_explicit_type_size(
482                                                vecType[type_index])
483                                              * (i * vecSizes[size_index] + j)),
484                                 returned_value);
485                             print_type_to_string(
486                                 vecType[type_index],
487                                 (void *)(exp
488                                          + get_explicit_type_size(
489                                                vecType[type_index])
490                                              * j),
491                                 expected_value);
492                             log_error("index [%d, component %d]: got: %s "
493                                       "expected: %s\n",
494                                       i, j, returned_value, expected_value);
495                         }
496                         total_errors++;
497                     }
498                 }
499                 previous_number_generated = number_generated;
500             } // number_generated != 0
501         } // vector sizes
502     } // vector types
503 
504     return total_errors;
505 }
506