1 //
2 // Copyright (c) 2023 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "harness/conversions.h"
18 #include "harness/typeWrappers.h"
19 #include "harness/errorHelpers.h"
20 #include <vector>
21
22 #include <CL/cl_half.h>
23
24 #define DEBUG 0
25 #define DEPTH 16
26 // Limit the maximum code size for any given kernel.
27 #define MAX_CODE_SIZE (1024 * 32)
28
29 static const int sizes[] = { 1, 2, 3, 4, 8, 16, -1, -1, -1, -1 };
30 static const int initial_no_sizes[] = { 0, 0, 0, 0, 0, 0, 2 };
31 static const char *size_names[] = { "", "2", "3", "4", "8",
32 "16", "!!a", "!!b", "!!c", "!!d" };
33 static char extension[128] = { 0 };
34
35 // Creates a kernel by enumerating all possible ways of building the vector out
36 // of vloads skip_to_results will skip results up to a given number. If the
37 // amount of code generated is greater than MAX_CODE_SIZE, this function will
38 // return the number of results used, which can then be used as the
39 // skip_to_result value to continue where it left off.
create_kernel(ExplicitType type,int output_size,char * program,int * number_of_results,int skip_to_result)40 int create_kernel(ExplicitType type, int output_size, char *program,
41 int *number_of_results, int skip_to_result)
42 {
43
44 int number_of_sizes;
45
46 switch (output_size)
47 {
48 case 1: number_of_sizes = 1; break;
49 case 2: number_of_sizes = 2; break;
50 case 3: number_of_sizes = 3; break;
51 case 4: number_of_sizes = 4; break;
52 case 8: number_of_sizes = 5; break;
53 case 16: number_of_sizes = 6; break;
54 default: log_error("Invalid size: %d\n", output_size); return -1;
55 }
56
57 int total_results = 0;
58 int current_result = 0;
59 int total_vloads = 0;
60 int total_program_length = 0;
61 int aborted_due_to_size = 0;
62
63 if (skip_to_result < 0) skip_to_result = 0;
64
65 // The line of code for the vector creation
66 char line[1024];
67 // Keep track of what size vector we are using in each position so we can
68 // iterate through all fo them
69 int pos[DEPTH];
70 int max_size = output_size;
71 if (DEBUG > 1) log_info("max_size: %d\n", max_size);
72
73 program[0] = '\0';
74 sprintf(program,
75 "%s\n__kernel void test_vector_creation(__global %s *src, __global "
76 "%s%s *result) {\n",
77 extension, get_explicit_type_name(type),
78 get_explicit_type_name(type),
79 (number_of_sizes == 3) ? "" : size_names[number_of_sizes - 1]);
80 total_program_length += (int)strlen(program);
81
82 char storePrefix[128], storeSuffix[128];
83
84 // Start out trying sizes 1,1,1... by initializing pos array to zeros for
85 // all vector sizes except 16. For 16-sizes initial_no_sizes array holds
86 // factor to omit time consuming, similar creation cases tested earlier.
87 for (int i = 0; i < DEPTH; i++) pos[i] = initial_no_sizes[number_of_sizes];
88
89 int done = 0;
90 while (!done)
91 {
92 if (DEBUG > 1)
93 {
94 log_info("pos size[] = [");
95 for (int k = 0; k < DEPTH; k++) log_info(" %d ", pos[k]);
96 log_info("]\n");
97 }
98
99 // Go through the selected vector sizes and see if the first n of them
100 // fit the
101 // required size exactly.
102 int size_so_far = 0;
103 int vloads;
104 for (vloads = 0; vloads < DEPTH; vloads++)
105 {
106 if (size_so_far + sizes[pos[vloads]] <= max_size)
107 {
108 size_so_far += sizes[pos[vloads]];
109 }
110 else
111 {
112 break;
113 }
114 }
115 if (DEBUG > 1)
116 log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far);
117
118 // If they did not fit the required size exactly it is too long, so
119 // there is no point in checking any other combinations
120 // of the sizes to the right. Prune them from the search.
121 if (size_so_far != max_size)
122 {
123 // Zero all the sizes to the right
124 for (int k = vloads + 1; k < DEPTH; k++)
125 {
126 pos[k] = 0;
127 }
128 // Increment this current size and propagate the values up if needed
129 for (int d = vloads; d >= 0; d--)
130 {
131 pos[d]++;
132 if (pos[d] >= number_of_sizes)
133 {
134 pos[d] = 0;
135 if (d == 0)
136 {
137 // If we rolled over then we are done
138 done = 1;
139 break;
140 }
141 }
142 else
143 {
144 break;
145 }
146 }
147 // Go on to the next size since this one (and all others "under" it)
148 // didn't fit
149 continue;
150 }
151
152
153 // Generate the actual load line if we are building this part
154 line[0] = '\0';
155 if (skip_to_result == 0 || total_results >= skip_to_result)
156 {
157 if (number_of_sizes == 3)
158 {
159 sprintf(storePrefix, "vstore3( ");
160 sprintf(storeSuffix, ", %d, result )", current_result);
161 }
162 else
163 {
164 sprintf(storePrefix, "result[%d] = ", current_result);
165 storeSuffix[0] = 0;
166 }
167
168 sprintf(line, "\t%s(%s%d)(", storePrefix,
169 get_explicit_type_name(type), output_size);
170 current_result++;
171
172 int offset = 0;
173 for (int i = 0; i < vloads; i++)
174 {
175 if (pos[i] == 0)
176 sprintf(line + strlen(line), "src[%d]", offset);
177 else
178 sprintf(line + strlen(line), "vload%s(0,src+%d)",
179 size_names[pos[i]], offset);
180 offset += sizes[pos[i]];
181 if (i < (vloads - 1)) sprintf(line + strlen(line), ",");
182 }
183 sprintf(line + strlen(line), ")%s;\n", storeSuffix);
184
185 strcat(program, line);
186 total_vloads += vloads;
187 }
188 total_results++;
189 total_program_length += (int)strlen(line);
190 if (total_program_length > MAX_CODE_SIZE)
191 {
192 aborted_due_to_size = 1;
193 done = 1;
194 }
195
196
197 if (DEBUG) log_info("line is: %s", line);
198
199 // If we did not use all of them, then we ignore any changes further to
200 // the right. We do this by causing those loops to skip on the next
201 // iteration.
202 if (vloads < DEPTH)
203 {
204 if (DEBUG > 1) log_info("done with this depth\n");
205 for (int k = vloads; k < DEPTH; k++) pos[k] = number_of_sizes;
206 }
207
208 // Increment the far right size by 1, rolling over as needed
209 for (int d = DEPTH - 1; d >= 0; d--)
210 {
211 pos[d]++;
212 if (pos[d] >= number_of_sizes)
213 {
214 pos[d] = 0;
215 if (d == 0)
216 {
217 // If we rolled over at the far-left then we are done
218 done = 1;
219 break;
220 }
221 }
222 else
223 {
224 break;
225 }
226 }
227 if (done) break;
228
229 // Continue until we are done.
230 }
231 strcat(program, "}\n\n"); // log_info("%s\n", program);
232 total_program_length += 3;
233 if (DEBUG)
234 log_info(
235 "\t\t(Program for vector type %s%s contains %d vector creations, "
236 "of total program length %gkB, with a total of %d vloads.)\n",
237 get_explicit_type_name(type), size_names[number_of_sizes - 1],
238 total_results, total_program_length / 1024.0, total_vloads);
239 *number_of_results = current_result;
240 if (aborted_due_to_size) return total_results;
241 return 0;
242 }
243
244
test_vector_creation(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)245 int test_vector_creation(cl_device_id deviceID, cl_context context,
246 cl_command_queue queue, int num_elements)
247 {
248 const std::vector<ExplicitType> vecType = { kChar, kUChar, kShort, kUShort,
249 kInt, kUInt, kLong, kULong,
250 kFloat, kHalf, kDouble };
251 // should be in sync with global array size_names
252 const std::vector<unsigned int> vecSizes = { 1, 2, 3, 4, 8, 16 };
253
254 int error = CL_SUCCESS;
255 int total_errors = 0;
256 int number_of_results = 0;
257
258 std::vector<char> input_data_converted(sizeof(cl_double) * 16);
259 std::vector<char> program_source(sizeof(char) * 1024 * 1024 * 4);
260 std::vector<char> output_data;
261
262 // Iterate over all the types
263 for (size_t type_index = 0; type_index < vecType.size(); type_index++)
264 {
265
266 if (!gHasLong
267 && ((vecType[type_index] == kLong)
268 || (vecType[type_index] == kULong)))
269 {
270 log_info("Long/ULong data type not supported on this device\n");
271 continue;
272 }
273 else if (vecType[type_index] == kDouble)
274 {
275 if (!is_extension_available(deviceID, "cl_khr_fp64"))
276 {
277 log_info("Extension cl_khr_fp64 not supported; skipping double "
278 "tests.\n");
279 continue;
280 }
281 snprintf(extension, sizeof(extension), "%s",
282 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable");
283 }
284 else if (vecType[type_index] == kHalf)
285 {
286 if (!is_extension_available(deviceID, "cl_khr_fp16"))
287 {
288 log_info("Extension cl_khr_fp16 not supported; skipping half "
289 "tests.\n");
290 continue;
291 }
292 snprintf(extension, sizeof(extension), "%s",
293 "#pragma OPENCL EXTENSION cl_khr_fp16 : enable");
294 }
295
296 log_info("Testing %s.\n", get_explicit_type_name(vecType[type_index]));
297
298 // Convert the data to the right format for the test.
299 memset(input_data_converted.data(), 0xff, sizeof(cl_double) * 16);
300 if (vecType[type_index] == kDouble)
301 {
302 const cl_double input_data_double[16] = { 0, 1, 2, 3, 4, 5,
303 6, 7, 8, 9, 10, 11,
304 12, 13, 14, 15 };
305 memcpy(input_data_converted.data(), &input_data_double,
306 sizeof(cl_double) * 16);
307 }
308 else if (vecType[type_index] == kHalf)
309 {
310 cl_half *buf =
311 reinterpret_cast<cl_half *>(input_data_converted.data());
312 for (int j = 0; j < 16; j++)
313 buf[j] = cl_half_from_float(float(j), CL_HALF_RTE);
314 }
315 else
316 {
317 for (int j = 0; j < 16; j++)
318 {
319 convert_explicit_value(
320 &j,
321 ((char *)input_data_converted.data())
322 + get_explicit_type_size(vecType[type_index]) * j,
323 kInt, 0, kRoundToEven, vecType[type_index]);
324 }
325 }
326
327 clMemWrapper input =
328 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
329 get_explicit_type_size(vecType[type_index]) * 16,
330 input_data_converted.data(), &error);
331 if (error)
332 {
333 print_error(error, "clCreateBuffer failed");
334 total_errors++;
335 continue;
336 }
337
338 // Iterate over all the vector sizes.
339 for (size_t size_index = 1; size_index < vecSizes.size(); size_index++)
340 {
341 size_t global[] = { 1, 1, 1 };
342 int number_generated = -1;
343 int previous_number_generated = 0;
344
345 log_info("Testing %s%s...\n",
346 get_explicit_type_name(vecType[type_index]),
347 size_names[size_index]);
348 while (number_generated != 0)
349 {
350 clMemWrapper output;
351 clKernelWrapper kernel;
352 clProgramWrapper program;
353
354 number_generated =
355 create_kernel(vecType[type_index], vecSizes[size_index],
356 program_source.data(), &number_of_results,
357 number_generated);
358 if (number_generated != 0)
359 {
360 if (previous_number_generated == 0)
361 log_info("Code size greater than %gkB; splitting test "
362 "into multiple kernels.\n",
363 MAX_CODE_SIZE / 1024.0);
364 log_info("\tExecuting vector permutations %d to %d...\n",
365 previous_number_generated, number_generated - 1);
366 }
367
368 char *src = program_source.data();
369 error = create_single_kernel_helper(context, &program, &kernel,
370 1, (const char **)&src,
371 "test_vector_creation");
372 if (error)
373 {
374 log_error("create_single_kernel_helper failed.\n");
375 total_errors++;
376 break;
377 }
378
379 output = clCreateBuffer(
380 context, CL_MEM_WRITE_ONLY,
381 number_of_results
382 * get_explicit_type_size(vecType[type_index])
383 * vecSizes[size_index],
384 NULL, &error);
385 if (error)
386 {
387 print_error(error, "clCreateBuffer failed");
388 total_errors++;
389 break;
390 }
391
392 error = clSetKernelArg(kernel, 0, sizeof(input), &input);
393 error |= clSetKernelArg(kernel, 1, sizeof(output), &output);
394 if (error)
395 {
396 print_error(error, "clSetKernelArg failed");
397 total_errors++;
398 break;
399 }
400
401 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global,
402 NULL, 0, NULL, NULL);
403 if (error)
404 {
405 print_error(error, "clEnqueueNDRangeKernel failed");
406 total_errors++;
407 break;
408 }
409
410 error = clFinish(queue);
411 if (error)
412 {
413 print_error(error, "clFinish failed");
414 total_errors++;
415 break;
416 }
417
418 output_data.resize(number_of_results
419 * get_explicit_type_size(vecType[type_index])
420 * vecSizes[size_index]);
421 memset(output_data.data(), 0xff,
422 number_of_results
423 * get_explicit_type_size(vecType[type_index])
424 * vecSizes[size_index]);
425 error = clEnqueueReadBuffer(
426 queue, output, CL_TRUE, 0,
427 number_of_results
428 * get_explicit_type_size(vecType[type_index])
429 * vecSizes[size_index],
430 output_data.data(), 0, NULL, NULL);
431 if (error)
432 {
433 print_error(error, "clEnqueueReadBuffer failed");
434 total_errors++;
435 break;
436 }
437
438 // Check the results
439 char *res = (char *)output_data.data();
440 char *exp = (char *)input_data_converted.data();
441 for (int i = 0; i < number_of_results; i++)
442 {
443 // If they do not match, then print out why
444 if (memcmp(exp,
445 res
446 + i
447 * (get_explicit_type_size(
448 vecType[type_index])
449 * vecSizes[size_index]),
450 get_explicit_type_size(vecType[type_index])
451 * vecSizes[size_index]))
452 {
453 log_error("Data failed to validate for result %d\n", i);
454
455 // Find the line in the program that failed. This is
456 // ugly.
457 char search[32] = { 0 };
458 char found_line[1024] = { 0 };
459 sprintf(search, "result[%d] = (", i);
460 char *start_loc = strstr(program_source.data(), search);
461 if (start_loc == NULL)
462 log_error("Failed to find program source for "
463 "failure for %s in \n%s",
464 search, program_source.data());
465 else
466 {
467 char *end_loc = strstr(start_loc, "\n");
468 memcpy(&found_line, start_loc,
469 (end_loc - start_loc));
470 found_line[end_loc - start_loc] = '\0';
471 log_error("Failed vector line: %s\n", found_line);
472 }
473
474 for (int j = 0; j < (int)vecSizes[size_index]; j++)
475 {
476 char expected_value[64] = { 0 };
477 char returned_value[64] = { 0 };
478 print_type_to_string(
479 vecType[type_index],
480 (void *)(res
481 + get_explicit_type_size(
482 vecType[type_index])
483 * (i * vecSizes[size_index] + j)),
484 returned_value);
485 print_type_to_string(
486 vecType[type_index],
487 (void *)(exp
488 + get_explicit_type_size(
489 vecType[type_index])
490 * j),
491 expected_value);
492 log_error("index [%d, component %d]: got: %s "
493 "expected: %s\n",
494 i, j, returned_value, expected_value);
495 }
496 total_errors++;
497 }
498 }
499 previous_number_generated = number_generated;
500 } // number_generated != 0
501 } // vector sizes
502 } // vector types
503
504 return total_errors;
505 }
506