xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/basic/test_queue_priority.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include "harness/rounding_mode.h"
24 
25 #include "procs.h"
26 
27 static const char *fpadd_kernel_code =
28 "__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
29 "{\n"
30 "    int  tid = get_global_id(0);\n"
31 "\n"
32 "    dst[tid] = srcA[tid] + srcB[tid];\n"
33 "}\n";
34 
35 static const char *fpsub_kernel_code =
36 "__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
37 "{\n"
38 "    int  tid = get_global_id(0);\n"
39 "\n"
40 "    dst[tid] = srcA[tid] - srcB[tid];\n"
41 "}\n";
42 
43 static const char *fpmul_kernel_code =
44 "__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
45 "{\n"
46 "    int  tid = get_global_id(0);\n"
47 "\n"
48 "    dst[tid] = srcA[tid] * srcB[tid];\n"
49 "}\n";
50 
51 static int
verify_fpadd(float * inptrA,float * inptrB,float * outptr,int n,int fileNum)52 verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
53 {
54     int         i;
55 
56     float * reference_ptr = (float *)malloc(n * sizeof(float));
57 
58     for (i=0; i<n; i++)
59     {
60         reference_ptr[i] = inptrA[i] + inptrB[i];
61     }
62 
63     for (i=0; i<n; i++)
64     {
65         if (reference_ptr[i] != outptr[i])
66         {
67             log_error("FP_ADD float test failed\n");
68             return -1;
69         }
70     }
71 
72     free(reference_ptr);
73 
74     log_info("FP_ADD float test passed\n");
75     return 0;
76 }
77 
78 static int
verify_fpsub(float * inptrA,float * inptrB,float * outptr,int n,int fileNum)79 verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
80 {
81     int         i;
82 
83     float * reference_ptr = (float *)malloc(n * sizeof(float));
84 
85     for (i=0; i<n; i++)
86     {
87         reference_ptr[i] = inptrA[i] - inptrB[i];
88     }
89 
90     for (i=0; i<n; i++)
91     {
92         if (reference_ptr[i] != outptr[i])
93         {
94             log_error("FP_SUB float test failed\n");
95             return -1;
96         }
97     }
98 
99     free(reference_ptr);
100 
101     log_info("FP_SUB float test passed\n");
102     return 0;
103 }
104 
105 static int
verify_fpmul(float * inptrA,float * inptrB,float * outptr,int n,int fileNum)106 verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
107 {
108     int         i;
109 
110     float * reference_ptr = (float *)malloc(n * sizeof(float));
111 
112     for (i=0; i<n; i++)
113     {
114         reference_ptr[i] = inptrA[i] * inptrB[i];
115     }
116 
117     for (i=0; i<n; i++)
118     {
119         if (reference_ptr[i] != outptr[i])
120         {
121             log_error("FP_MUL float test failed\n");
122             return -1;
123         }
124     }
125 
126     free(reference_ptr);
127 
128     log_info("FP_MUL float test passed\n");
129     return 0;
130 }
131 
132 #if defined( __APPLE__ )
133 
test_queue_priority(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)134 int test_queue_priority(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
135 {
136   int err;
137   int command_queue_priority = 0;
138   int command_queue_select_compute_units = 0;
139 
140   cl_queue_properties queue_properties[] = { CL_QUEUE_PROPERTIES, 0, 0, 0, 0, 0, 0 };
141   int idx = 2;
142 
143   // Check to see if queue priority is supported
144   if (((command_queue_priority = is_extension_available(device, "cl_APPLE_command_queue_priority"))) == 0)
145   {
146     log_info("cl_APPLE_command_queue_priority extension is not supported - skipping test\n");
147   }
148 
149   // Check to see if selecting the number of compute units is supported
150   if (((command_queue_select_compute_units = is_extension_available(device, "cl_APPLE_command_queue_select_compute_units"))) == 0)
151   {
152     log_info("cl_APPLE_command_queue_select_compute_units extension is not supported - skipping test\n");
153   }
154 
155   // If neither extension is supported, skip the test
156   if (!command_queue_priority && !command_queue_select_compute_units)
157     return 0;
158 
159   // Setup the queue properties
160 #ifdef cl_APPLE_command_queue_priority
161   if (command_queue_priority) {
162     queue_properties[idx++] = CL_QUEUE_PRIORITY_APPLE;
163     queue_properties[idx++] = CL_QUEUE_PRIORITY_BACKGROUND_APPLE;
164   }
165 #endif
166 
167 #ifdef cl_APPLE_command_queue_select_compute_units
168   // Check the number of compute units on the device
169   cl_uint num_compute_units = 0;
170   err = clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( num_compute_units ), &num_compute_units, NULL );
171   if (err) {
172     log_error("clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed: %d", err);
173     return -1;
174   }
175 
176   if (command_queue_select_compute_units) {
177     queue_properties[idx++] = CL_QUEUE_NUM_COMPUTE_UNITS_APPLE;
178     queue_properties[idx++] = num_compute_units/2;
179   }
180 #endif
181   queue_properties[idx++] = 0;
182 
183   // Create the command queue
184   cl_command_queue background_queue = clCreateCommandQueueWithProperties(context, device, queue_properties, &err);
185   if (err) {
186     log_error("clCreateCommandQueueWithPropertiesAPPLE failed: %d", err);
187     return -1;
188   }
189 
190   // Test the command queue
191   cl_mem streams[4];
192     cl_program program[3];
193     cl_kernel kernel[3];
194   cl_event marker_event;
195 
196   float *input_ptr[3], *output_ptr, *p;
197     size_t threads[1];
198     int i;
199   MTdata d = init_genrand( gRandomSeed );
200   size_t length = sizeof(cl_float) * num_elements;
201   int isRTZ = 0;
202   RoundingMode oldMode = kDefaultRoundingMode;
203 
204   // check for floating point capabilities
205   cl_device_fp_config single_config = 0;
206   err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
207   if (err) {
208     log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
209     return -1;
210   }
211   //If we only support rtz mode
212   if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
213   {
214     //Check to make sure we are an embedded device
215     char profile[32];
216     err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
217     if( err )
218     {
219       log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
220       return -1;
221     }
222     if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
223     {
224       log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
225       return -1;
226     }
227 
228     isRTZ = 1;
229     oldMode = get_round();
230   }
231 
232   input_ptr[0] = (cl_float *)malloc(length);
233   input_ptr[1] = (cl_float *)malloc(length);
234   input_ptr[2] = (cl_float *)malloc(length);
235   output_ptr = (cl_float *)malloc(length);
236 
237   streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
238   test_error( err, "clCreateBuffer failed.");
239   streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
240   test_error( err, "clCreateBuffer failed.");
241   streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
242   test_error( err, "clCreateBuffer failed.");
243   streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
244   test_error( err, "clCreateBuffer failed.");
245 
246   p = input_ptr[0];
247   for (i=0; i<num_elements; i++)
248     p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
249   p = input_ptr[1];
250   for (i=0; i<num_elements; i++)
251     p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
252   p = input_ptr[2];
253   for (i=0; i<num_elements; i++)
254     p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
255 
256   err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
257   test_error( err, "clEnqueueWriteBuffer failed.");
258 
259   err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
260   test_error( err, "clEnqueueWriteBuffer failed.");
261 
262   err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
263   test_error( err, "clEnqueueWriteBuffer failed.");
264 
265   err = clEnqueueMarkerWithWaitList(queue, 0, NULL, &marker_event);
266   test_error( err, "clEnqueueMarkerWithWaitList failed.");
267   clFlush(queue);
268 
269   err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
270   test_error( err, "create_single_kernel_helper failed");
271 
272   err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
273   test_error( err, "create_single_kernel_helper failed");
274 
275   err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
276   test_error( err, "create_single_kernel_helper failed");
277 
278 
279   err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
280   err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
281   err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
282   test_error( err, "clSetKernelArgs failed.");
283 
284   err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
285   err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
286   err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
287   test_error( err, "clSetKernelArgs failed.");
288 
289   err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
290   err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
291   err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
292   test_error( err, "clSetKernelArgs failed.");
293 
294   threads[0] = (unsigned int)num_elements;
295   for (i=0; i<3; i++)
296   {
297     err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 1, &marker_event, NULL);
298     test_error( err, "clEnqueueNDRangeKernel failed.");
299 
300     err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
301     test_error( err, "clEnqueueReadBuffer failed.");
302 
303     if( isRTZ )
304       set_round( kRoundTowardZero, kfloat );
305 
306     switch (i)
307     {
308       case 0:
309         err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
310         break;
311       case 1:
312         err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
313         break;
314       case 2:
315         err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
316         break;
317     }
318 
319     if( isRTZ )
320       set_round( oldMode, kfloat );
321   }
322 
323   // cleanup
324   clReleaseCommandQueue(background_queue);
325   clReleaseEvent(marker_event);
326   clReleaseMemObject(streams[0]);
327   clReleaseMemObject(streams[1]);
328   clReleaseMemObject(streams[2]);
329   clReleaseMemObject(streams[3]);
330   for (i=0; i<3; i++)
331   {
332     clReleaseKernel(kernel[i]);
333     clReleaseProgram(program[i]);
334   }
335   free(input_ptr[0]);
336   free(input_ptr[1]);
337   free(input_ptr[2]);
338   free(output_ptr);
339   free_mtdata( d );
340 
341   return err;
342 }
343 
344 
345 
346 #endif
347 
348