xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/allocations/allocation_execute.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "allocation_execute.h"
17 #include "allocation_functions.h"
18 
19 
20 const char *buffer_kernel_pattern = {
21     "__kernel void sample_test(%s __global uint *result, __global %s *array_sizes, uint per_item)\n"
22     "{\n"
23     "\tint tid = get_global_id(0);\n"
24     "\tuint r = 0;\n"
25     "\t%s i;\n"
26     "\tfor(i=(%s)tid*(%s)per_item; i<(%s)(1+tid)*(%s)per_item; i++) {\n"
27     "%s"
28     "\t}\n"
29     "\tresult[tid] = r;\n"
30     "}\n" };
31 
32 const char *image_kernel_pattern = {
33     "__kernel void sample_test(%s __global uint *result)\n"
34     "{\n"
35     "\tuint4 color;\n"
36     "\tcolor = (uint4)(0);\n"
37     "%s"
38     "\tint x, y;\n"
39     "%s"
40     "\tresult[get_global_id(0)] += color.x + color.y + color.z + color.w;\n"
41     "}\n" };
42 
43 const char *read_pattern = {
44     "\tfor(y=0; y<get_image_height(image%d); y++)\n"
45     "\t\tif (y %s get_global_size(0) == get_global_id(0))\n"
46     "\t\t\tfor (x=0; x<get_image_width(image%d); x++) {\n"
47     "\t\t\t\tcolor += read_imageui(image%d, sampler, (int2)(x,y));\n"
48     "\t\t\t}\n"
49 };
50 
51 const char *offset_pattern =
52 "\tconst uint4 offset = (uint4)(0,1,2,3);\n";
53 
54 const char *sampler_pattern =
55 "\tconst sampler_t sampler = CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n";
56 
57 
58 const char *write_pattern = {
59     "\tfor(y=0; y<get_image_height(image%d); y++)\n"
60     "\t\tif (y %s get_global_size(0) == get_global_id(0))\n"
61     "\t\t\tfor (x=0; x<get_image_width(image%d); x++) {\n"
62     "\t\t\t\tcolor = (uint4)x*(uint4)y+offset;\n"
63     "\t\t\t\twrite_imageui(image%d, (int2)(x,y), color);\n"
64     "\t\t\t}\n"
65     "\tbarrier(CLK_LOCAL_MEM_FENCE);\n"
66 };
67 
68 
check_image(cl_command_queue queue,cl_mem mem)69 int check_image(cl_command_queue queue, cl_mem mem) {
70     int error;
71     cl_mem_object_type type;
72     size_t width, height;
73     size_t origin[3], region[3], x, j;
74     cl_uint *data;
75 
76     error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
77     if (error) {
78         print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
79         return -1;
80     }
81 
82     switch (type)
83     {
84         case CL_MEM_OBJECT_BUFFER:
85             log_error("Expected image object, not buffer.\n");
86             return -1;
87         case CL_MEM_OBJECT_IMAGE2D:
88             error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width,
89                                    NULL);
90             if (error)
91             {
92                 print_error(error,
93                             "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
94                 return -1;
95             }
96             error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height),
97                                    &height, NULL);
98             if (error)
99             {
100                 print_error(error,
101                             "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
102                 return -1;
103             }
104             break;
105         default: log_error("unexpected object type"); return -1;
106     }
107 
108 
109     data = (cl_uint*)malloc(width*4*sizeof(cl_uint));
110     if (data == NULL) {
111         log_error("Failed to malloc host buffer for writing into image.\n");
112         return FAILED_ABORT;
113     }
114     origin[0] = 0;
115     origin[1] = 0;
116     origin[2] = 0;
117     region[0] = width;
118     region[1] = 1;
119     region[2] = 1;
120     for (origin[1] = 0; origin[1] < height; origin[1]++) {
121         error = clEnqueueReadImage(queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
122         if (error) {
123             print_error(error, "clEnqueueReadImage failed");
124             free(data);
125             return error;
126         }
127 
128         for (x=0; x<width; x++) {
129             for (j=0; j<4; j++) {
130                 if (data[x*4+j] != (cl_uint)(x*origin[1]+j)) {
131                     log_error("Pixel %d, %d, component %d, expected %u, got %u.\n",
132                               (int)x, (int)origin[1], (int)j, (cl_uint)(x*origin[1]+j), data[x*4+j]);
133                     return -1;
134                 }
135             }
136         }
137     }
138     free(data);
139     return 0;
140 }
141 
142 
143 #define NUM_OF_WORK_ITEMS 8192*2
144 
execute_kernel(cl_context context,cl_command_queue * queue,cl_device_id device_id,int test,cl_mem mems[],int number_of_mems_used,int verify_checksum)145 int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum) {
146 
147     char *argument_string;
148     char *access_string;
149     char *kernel_string;
150     int i, error, result;
151     clKernelWrapper kernel;
152     clProgramWrapper program;
153     clMemWrapper result_mem;
154     char *ptr;
155     size_t global_dims[3];
156     cl_uint per_item;
157     cl_uint per_item_uint;
158     cl_uint returned_results[NUM_OF_WORK_ITEMS], final_result;
159     clEventWrapper event;
160     cl_int event_status;
161 
162     // Allocate memory for the kernel source
163     argument_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*64);
164     access_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10));
165     kernel_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10+64)+1024);
166     argument_string[0] = '\0';
167     access_string[0] = '\0';
168     kernel_string[0] = '\0';
169 
170     // Zero the results.
171     for (i=0; i<NUM_OF_WORK_ITEMS; i++)
172         returned_results[i] = 0;
173 
174     // detect if device supports ulong/int64
175     //detect whether profile of the device is embedded
176     bool support64 = true;
177     char profile[1024] = "";
178     error = clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
179     test_error(error, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
180     if ((NULL != strstr(profile, "EMBEDDED_PROFILE")) &&
181         (!is_extension_available(device_id, "cles_khr_int64"))) {
182             support64 = false;
183     }
184 
185     // Build the kernel source
186     if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
187         for(i=0; i<number_of_mems_used; i++) {
188             sprintf(argument_string + strlen(argument_string), " __global uint *buffer%d, ", i);
189             sprintf(access_string + strlen( access_string), "\t\tif (i<array_sizes[%d]) r += buffer%d[i];\n", i, i);
190         }
191         char type[10];
192         if (support64) {
193             sprintf(type, "ulong");
194         }
195         else {
196             sprintf(type, "uint");
197         }
198         sprintf(kernel_string, buffer_kernel_pattern, argument_string, type, type, type, type, type, type, access_string);
199     }
200     else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING) {
201         for(i=0; i<number_of_mems_used; i++) {
202             sprintf(argument_string + strlen(argument_string), " read_only image2d_t image%d, ", i);
203             sprintf(access_string + strlen(access_string), read_pattern, i, "%", i, i);
204         }
205         sprintf(kernel_string, image_kernel_pattern, argument_string, sampler_pattern, access_string);
206     }
207     else if (test == IMAGE_WRITE || test == IMAGE_WRITE_NON_BLOCKING) {
208         for(i=0; i<number_of_mems_used; i++) {
209             sprintf(argument_string + strlen(argument_string), " write_only image2d_t image%d, ", i);
210             sprintf(access_string + strlen( access_string), write_pattern, i, "%", i, i);
211         }
212         sprintf(kernel_string, image_kernel_pattern, argument_string, offset_pattern, access_string);
213     }
214     ptr = kernel_string;
215 
216     // Create the kernel
217     error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" );
218 
219     free(argument_string);
220     free(access_string);
221     free(kernel_string);
222 
223     result = check_allocation_error(context, device_id, error, queue);
224     if (result != SUCCEEDED) {
225         if (result == FAILED_TOO_BIG)
226             log_info("\t\tCreate kernel failed: %s.\n", IGetErrorString(error));
227         else
228             print_error(error, "Create kernel and program failed");
229         return result;
230     }
231 
232     // Set the arguments
233     for (i=0; i<number_of_mems_used; i++) {
234         error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mems[i]);
235         test_error(error, "clSetKernelArg failed");
236     }
237 
238     // Set the result
239     result_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*NUM_OF_WORK_ITEMS, &returned_results, &error);
240     test_error(error, "clCreateBuffer failed");
241     error = clSetKernelArg(kernel, i, sizeof(result_mem), &result_mem);
242     test_error(error, "clSetKernelArg failed");
243 
244     // Thread dimensions for execution
245     global_dims[0] = NUM_OF_WORK_ITEMS; global_dims[1] = 1; global_dims[2] = 1;
246 
247     // We have extra arguments for the buffer kernel because we need to pass in the buffer sizes
248     cl_ulong *ulSizes = NULL;
249     cl_uint  *uiSizes = NULL;
250     if (support64) {
251         ulSizes = (cl_ulong*)malloc(sizeof(cl_ulong)*number_of_mems_used);
252     }
253     else {
254         uiSizes = (cl_uint*)malloc(sizeof(cl_uint)*number_of_mems_used);
255     }
256     cl_ulong max_size = 0;
257     clMemWrapper buffer_sizes;
258     if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
259         for (i=0; i<number_of_mems_used; i++) {
260             size_t size;
261             error = clGetMemObjectInfo(mems[i], CL_MEM_SIZE, sizeof(size), &size, NULL);
262             test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
263             if (support64) {
264                 ulSizes[i] = size/sizeof(cl_uint);
265             }
266             else {
267                 uiSizes[i] = (cl_uint)size/sizeof(cl_uint);
268             }
269             if (size/sizeof(cl_uint) > max_size)
270                 max_size = size/sizeof(cl_uint);
271         }
272         if (support64) {
273             buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_ulong)*number_of_mems_used, ulSizes, &error);
274         }
275         else {
276             buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*number_of_mems_used, uiSizes, &error);
277         }
278         test_error_abort(error, "clCreateBuffer failed");
279         error = clSetKernelArg(kernel, number_of_mems_used+1, sizeof(cl_mem), &buffer_sizes);
280         test_error(error, "clSetKernelArg failed");
281         per_item = (cl_uint)ceil((double)max_size/global_dims[0]);
282         if (per_item > CL_UINT_MAX)
283             log_error("Size is too large for a uint parameter to the kernel. Expect invalid results.\n");
284         per_item_uint = (cl_uint)per_item;
285         error = clSetKernelArg(kernel, number_of_mems_used+2, sizeof(per_item_uint), &per_item_uint);
286         test_error(error, "clSetKernelArg failed");
287     }
288     if (ulSizes) {
289         free(ulSizes);
290     }
291     if (uiSizes) {
292         free(uiSizes);
293     }
294 
295     size_t local_dims[3] = {1,1,1};
296     error = get_max_common_work_group_size(context, kernel, global_dims[0], &local_dims[0]);
297     test_error(error, "get_max_common_work_group_size failed");
298 
299     // Execute the kernel
300     error = clEnqueueNDRangeKernel(*queue, kernel, 1, NULL, global_dims, local_dims, 0, NULL, &event);
301     result = check_allocation_error(context, device_id, error, queue);
302     if (result != SUCCEEDED) {
303         if (result == FAILED_TOO_BIG)
304             log_info("\t\tExecute kernel failed: %s (global dim: %ld, local dim: %ld)\n", IGetErrorString(error), global_dims[0], local_dims[0]);
305         else
306             print_error(error, "clEnqueueNDRangeKernel failed");
307         return result;
308     }
309 
310     // Finish the test
311     error = clFinish(*queue);
312 
313     result = check_allocation_error(context, device_id, error, queue);
314 
315     if (result != SUCCEEDED) {
316         if (result == FAILED_TOO_BIG)
317             log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
318         else
319             print_error(error, "clFinish failed");
320         return result;
321     }
322 
323     // Verify that the event from the execution did not have an error
324     error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
325     test_error_abort(error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
326     if (event_status < 0) {
327         result = check_allocation_error(context, device_id, event_status, queue);
328         if (result != SUCCEEDED) {
329             if (result == FAILED_TOO_BIG)
330                 log_info("\t\tEvent returned from kernel execution indicates failure: %s.\n", IGetErrorString(event_status));
331             else
332                 print_error(event_status, "clEnqueueNDRangeKernel failed");
333             return result;
334         }
335     }
336 
337     // If we are not verifying the checksum return here
338     if (!verify_checksum) {
339         log_info("Note: Allocations were not initialized so kernel execution can not verify correct results.\n");
340         return SUCCEEDED;
341     }
342 
343     // Verify the checksum.
344     // Read back the result
345     error = clEnqueueReadBuffer(*queue, result_mem, CL_TRUE, 0, sizeof(cl_uint)*NUM_OF_WORK_ITEMS, &returned_results, 0, NULL, NULL);
346     test_error_abort(error, "clEnqueueReadBuffer failed");
347     final_result = 0;
348     if (test == BUFFER || test == IMAGE_READ || test == BUFFER_NON_BLOCKING || test == IMAGE_READ_NON_BLOCKING) {
349         // For buffers or read images we are just looking at the sum of what each thread summed up
350         for (i=0; i<NUM_OF_WORK_ITEMS; i++) {
351             final_result += returned_results[i];
352         }
353         if (final_result != checksum) {
354             log_error("\t\tChecksum failed to verify. Expected %u got %u.\n", checksum, final_result);
355             return FAILED_ABORT;
356         }
357         log_info("\t\tChecksum verified (%u == %u).\n", checksum, final_result);
358     } else {
359         // For write images we need to verify the values
360         for (i=0; i<number_of_mems_used; i++) {
361             if (check_image(*queue, mems[i])) {
362                 log_error("\t\tImage contents failed to verify for image %d.\n", (int)i);
363                 return FAILED_ABORT;
364             }
365         }
366         log_info("\t\tImage contents verified.\n");
367     }
368 
369     // Finish the test
370     error = clFinish(*queue);
371     result = check_allocation_error(context, device_id, error, queue);
372     if (result != SUCCEEDED) {
373         if (result == FAILED_TOO_BIG)
374             log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
375         else
376             print_error(error, "clFinish failed");
377         return result;
378     }
379 
380     return SUCCEEDED;
381 }
382 
383 
384