xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/half/cl_utils.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "cl_utils.h"
17 #include <stdlib.h>
18 
19 #if !defined (_WIN32)
20 #include <sys/mman.h>
21 #endif
22 
23 #include "test_config.h"
24 #include "string.h"
25 #include "harness/kernelHelpers.h"
26 
27 #include "harness/testHarness.h"
28 
29 #define HALF_MIN 1.0p-14
30 
31 
32 const char *vector_size_name_extensions[kVectorSizeCount+kStrangeVectorSizeCount] = { "", "2", "4", "8", "16", "3" };
33 const char *vector_size_strings[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2", "4", "8", "16", "3" };
34 const char *align_divisors[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2", "4", "8", "16", "4" };
35 const char *align_types[kVectorSizeCount+kStrangeVectorSizeCount] = { "half", "int", "int2", "int4", "int8", "int2" };
36 
37 
38 void *gIn_half = NULL;
39 void *gOut_half = NULL;
40 void *gOut_half_reference = NULL;
41 void *gOut_half_reference_double = NULL;
42 void *gIn_single = NULL;
43 void *gOut_single = NULL;
44 void *gOut_single_reference = NULL;
45 void *gIn_double = NULL;
46 // void *gOut_double = NULL;
47 // void *gOut_double_reference = NULL;
48 cl_mem gInBuffer_half = NULL;
49 cl_mem gOutBuffer_half = NULL;
50 cl_mem gInBuffer_single = NULL;
51 cl_mem gOutBuffer_single = NULL;
52 cl_mem gInBuffer_double = NULL;
53 // cl_mem gOutBuffer_double = NULL;
54 
55 cl_context gContext = NULL;
56 cl_command_queue gQueue = NULL;
57 uint32_t gDeviceFrequency = 0;
58 uint32_t gComputeDevices = 0;
59 size_t gMaxThreadGroupSize = 0;
60 size_t gWorkGroupSize = 0;
61 bool gWimpyMode = false;
62 int gWimpyReductionFactor = 512;
63 int gTestDouble = 0;
64 bool gHostReset = false;
65 
66 #if defined( __APPLE__ )
67 int gReportTimes = 1;
68 #else
69 int gReportTimes = 0;
70 #endif
71 
72 #pragma mark -
73 
InitCL(cl_device_id device)74 test_status InitCL( cl_device_id device )
75 {
76     size_t configSize = sizeof( gComputeDevices );
77     int error;
78 
79 #if MULTITHREAD
80     if( (error = clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS,  configSize, &gComputeDevices, NULL )) )
81 #endif
82     gComputeDevices = 1;
83 
84     configSize = sizeof( gMaxThreadGroupSize );
85     if( (error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_GROUP_SIZE, configSize, &gMaxThreadGroupSize,  NULL )) )
86         gMaxThreadGroupSize = 1;
87 
88     // Use only one-eighth the work group size
89     if (gMaxThreadGroupSize > 8)
90         gWorkGroupSize = gMaxThreadGroupSize / 8;
91     else
92         gWorkGroupSize = gMaxThreadGroupSize;
93 
94     configSize = sizeof( gDeviceFrequency );
95     if( (error = clGetDeviceInfo( device, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency,  NULL )) )
96         gDeviceFrequency = 1;
97 
98     // Check extensions
99     int hasDouble = is_extension_available(device, "cl_khr_fp64");
100     gTestDouble ^= hasDouble;
101 
102     vlog( "%d compute devices at %f GHz\n", gComputeDevices, (double) gDeviceFrequency / 1000. );
103     vlog( "Max thread group size is %lld.\n", (uint64_t) gMaxThreadGroupSize );
104 
105     gContext = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
106     if( NULL == gContext )
107     {
108         vlog_error( "clCreateDeviceGroup failed. (%d)\n", error );
109         return TEST_FAIL;
110     }
111 
112     gQueue = clCreateCommandQueue(gContext, device, 0, &error);
113     if( NULL == gQueue )
114     {
115         vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
116         return TEST_FAIL;
117     }
118 
119 #if defined( __APPLE__ )
120     // FIXME: use clProtectedArray
121 #endif
122     //Allocate buffers
123     gIn_half   = malloc( getBufferSize(device)/2  );
124     gOut_half = malloc( BUFFER_SIZE/2  );
125     gOut_half_reference = malloc( BUFFER_SIZE/2  );
126     gOut_half_reference_double = malloc( BUFFER_SIZE/2  );
127     gIn_single   = malloc( BUFFER_SIZE );
128     gOut_single = malloc( getBufferSize(device)  );
129     gOut_single_reference = malloc( getBufferSize(device)  );
130     gIn_double   = malloc( 2*BUFFER_SIZE  );
131     // gOut_double = malloc( (2*getBufferSize(device))  );
132     // gOut_double_reference = malloc( (2*getBufferSize(device))  );
133 
134     if ( NULL == gIn_half ||
135      NULL == gOut_half ||
136      NULL == gOut_half_reference ||
137      NULL == gOut_half_reference_double ||
138          NULL == gIn_single ||
139      NULL == gOut_single ||
140      NULL == gOut_single_reference ||
141          NULL == gIn_double // || NULL == gOut_double || NULL == gOut_double_reference
142          )
143         return TEST_FAIL;
144 
145     gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, getBufferSize(device) / 2, NULL, &error);
146     if( gInBuffer_half == NULL )
147     {
148         vlog_error( "clCreateArray failed for input (%d)\n", error );
149         return TEST_FAIL;
150     }
151 
152     gInBuffer_single = clCreateBuffer(gContext, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &error );
153     if( gInBuffer_single == NULL )
154     {
155         vlog_error( "clCreateArray failed for input (%d)\n", error );
156         return TEST_FAIL;
157     }
158 
159     gInBuffer_double = clCreateBuffer(gContext, CL_MEM_READ_ONLY, BUFFER_SIZE*2, NULL, &error );
160     if( gInBuffer_double == NULL )
161     {
162         vlog_error( "clCreateArray failed for input (%d)\n", error );
163         return TEST_FAIL;
164     }
165 
166     gOutBuffer_half = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, BUFFER_SIZE/2, NULL, &error );
167     if( gOutBuffer_half == NULL )
168     {
169         vlog_error( "clCreateArray failed for output (%d)\n", error );
170         return TEST_FAIL;
171     }
172 
173     gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, getBufferSize(device), NULL, &error );
174     if( gOutBuffer_single == NULL )
175     {
176         vlog_error( "clCreateArray failed for output (%d)\n", error );
177         return TEST_FAIL;
178     }
179 
180 #if 0
181     gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*getBufferSize(device)), NULL, &error );
182     if( gOutBuffer_double == NULL )
183     {
184         vlog_error( "clCreateArray failed for output (%d)\n", error );
185         return TEST_FAIL;
186     }
187 #endif
188 
189     char string[16384];
190     vlog( "\nCompute Device info:\n" );
191     error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(string), string, NULL);
192     vlog( "\tDevice Name: %s\n", string );
193     error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(string), string, NULL);
194     vlog( "\tVendor: %s\n", string );
195     error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(string), string, NULL);
196     vlog( "\tDevice Version: %s\n", string );
197     error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(string), string, NULL);
198     vlog( "\tOpenCL C Version: %s\n", string );
199     error = clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(string), string, NULL);
200     vlog( "\tDriver Version: %s\n", string );
201     vlog( "\tProcessing with %d devices\n", gComputeDevices );
202     vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
203     vlog( "\tHas double? %s\n", hasDouble ? "YES" : "NO" );
204     vlog( "\tTest double? %s\n", gTestDouble ? "YES" : "NO" );
205 
206     return TEST_PASS;
207 }
208 
MakeProgram(cl_device_id device,const char * source[],int count)209 cl_program MakeProgram( cl_device_id device, const char *source[], int count )
210 {
211     int error;
212     int i;
213 
214     //create the program
215     cl_program program;
216     error = create_single_kernel_helper_create_program(gContext, &program, (cl_uint)count, source);
217     if( NULL == program )
218     {
219         vlog_error( "\t\tFAILED -- Failed to create program. (%d)\n", error );
220         return NULL;
221     }
222 
223     // build it
224     if( (error = clBuildProgram( program, 1, &device, NULL, NULL, NULL )) )
225     {
226         size_t  len;
227         char    buffer[16384];
228 
229         vlog_error("\t\tFAILED -- clBuildProgramExecutable() failed:\n");
230         clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
231         vlog_error("Log: %s\n", buffer);
232         vlog_error("Source :\n");
233         for(i = 0; i < count; ++i) {
234             vlog_error("%s", source[i]);
235         }
236         vlog_error("\n");
237 
238         clReleaseProgram( program );
239         return NULL;
240     }
241 
242     return program;
243 }
244 
ReleaseCL(void)245 void ReleaseCL(void)
246 {
247     clReleaseMemObject(gInBuffer_half);
248     clReleaseMemObject(gOutBuffer_half);
249     clReleaseMemObject(gInBuffer_single);
250     clReleaseMemObject(gOutBuffer_single);
251     clReleaseMemObject(gInBuffer_double);
252     // clReleaseMemObject(gOutBuffer_double);
253     clReleaseCommandQueue(gQueue);
254     clReleaseContext(gContext);
255 
256     free(gIn_half);
257     free(gOut_half);
258     free(gOut_half_reference);
259     free(gOut_half_reference_double);
260     free(gIn_single);
261     free(gOut_single);
262     free(gOut_single_reference);
263     free(gIn_double);
264 }
265 
numVecs(cl_uint count,int vectorSizeIdx,bool aligned)266 cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned) {
267     if(aligned && g_arrVecSizes[vectorSizeIdx] == 3) {
268         return count/4;
269     }
270     return  (count + g_arrVecSizes[vectorSizeIdx] - 1)/
271     ( (g_arrVecSizes[vectorSizeIdx]) );
272 }
273 
runsOverBy(cl_uint count,int vectorSizeIdx,bool aligned)274 cl_uint runsOverBy(cl_uint count, int vectorSizeIdx, bool aligned) {
275     if(aligned || g_arrVecSizes[vectorSizeIdx] != 3) { return -1; }
276     return count% (g_arrVecSizes[vectorSizeIdx]);
277 }
278 
printSource(const char * src[],int len)279 void printSource(const char * src[], int len) {
280     int i;
281     for(i = 0; i < len; ++i) {
282         vlog("%s", src[i]);
283     }
284 }
285 
RunKernel(cl_device_id device,cl_kernel kernel,void * inBuf,void * outBuf,uint32_t blockCount,int extraArg)286 int RunKernel( cl_device_id device, cl_kernel kernel, void *inBuf, void *outBuf, uint32_t blockCount , int extraArg)
287 {
288     size_t localCount = blockCount;
289     size_t wg_size;
290     int error;
291 
292     error = clSetKernelArg(kernel, 0, sizeof inBuf, &inBuf);
293     error |= clSetKernelArg(kernel, 1, sizeof outBuf, &outBuf);
294 
295     if(extraArg >= 0) {
296         error |= clSetKernelArg(kernel, 2, sizeof(cl_uint), &extraArg);
297     }
298 
299     if( error )
300     {
301         vlog_error( "FAILED -- could not set kernel args\n" );
302         return -3;
303     }
304 
305     error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( wg_size ), &wg_size, NULL);
306     if (error)
307     {
308         vlog_error( "FAILED -- could not get kernel work group info\n" );
309         return -4;
310     }
311 
312     wg_size = (wg_size > gWorkGroupSize) ? gWorkGroupSize : wg_size;
313     while( localCount % wg_size )
314         wg_size--;
315 
316     if( (error = clEnqueueNDRangeKernel( gQueue, kernel, 1, NULL, &localCount, &wg_size, 0, NULL, NULL )) )
317     {
318         vlog_error( "FAILED -- could not execute kernel\n" );
319         return -5;
320     }
321 
322     return 0;
323 }
324 
325 #if defined (__APPLE__ )
326 
327 #include <mach/mach_time.h>
328 
ReadTime(void)329 uint64_t ReadTime( void )
330 {
331     return mach_absolute_time();        // returns time since boot.  Ticks have better than microsecond precsion.
332 }
333 
SubtractTime(uint64_t endTime,uint64_t startTime)334 double SubtractTime( uint64_t endTime, uint64_t startTime )
335 {
336     static double conversion = 0.0;
337 
338     if(  0.0 == conversion )
339     {
340         mach_timebase_info_data_t   info;
341         kern_return_t err = mach_timebase_info( &info );
342         if( 0 == err )
343             conversion = 1e-9 * (double) info.numer / (double) info.denom;
344     }
345 
346     return (double) (endTime - startTime) * conversion;
347 }
348 
349 #elif defined( _WIN32 ) && defined (_MSC_VER)
350 
351 // functions are defined in compat.h
352 
353 #else
354 
355 //
356 //  Please feel free to substitute your own timing facility here.
357 //
358 
359 #warning  Times are meaningless. No timing facility in place for this platform.
ReadTime(void)360 uint64_t ReadTime( void )
361 {
362     return 0ULL;
363 }
364 
365 // return the difference between two times obtained from ReadTime in seconds
SubtractTime(uint64_t endTime,uint64_t startTime)366 double SubtractTime( uint64_t endTime, uint64_t startTime )
367 {
368     return INFINITY;
369 }
370 
371 #endif
372 
getBufferSize(cl_device_id device_id)373 size_t getBufferSize(cl_device_id device_id)
374 {
375     static int s_initialized = 0;
376     static cl_device_id s_device_id;
377     static cl_ulong s_result = 64*1024;
378 
379     if(s_initialized == 0 || s_device_id != device_id)
380     {
381         cl_ulong result, maxGlobalSize;
382         cl_int err = clGetDeviceInfo (device_id,
383                                       CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
384                                       sizeof(result), (void *)&result,
385                                       NULL);
386         if(err)
387         {
388             vlog_error("clGetDeviceInfo(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) failed\n");
389             s_result = 64*1024;
390             goto exit;
391         }
392         if (result > BUFFER_SIZE)
393             result = BUFFER_SIZE;
394         log_info("Using const buffer size 0x%lx (%lu)\n", (unsigned long)result, (unsigned long)result);
395         err = clGetDeviceInfo (device_id,
396                                CL_DEVICE_GLOBAL_MEM_SIZE,
397                                sizeof(maxGlobalSize), (void *)&maxGlobalSize,
398                                NULL);
399         if(err)
400         {
401             vlog_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed\n");
402             goto exit;
403         }
404         result = result / 2;
405         if(maxGlobalSize < result * 10)
406             result = result / 10;
407         s_initialized = 1;
408         s_device_id = device_id;
409         s_result = result;
410     }
411 
412 exit:
413     if( s_result > SIZE_MAX )
414     {
415         vlog_error( "ERROR: clGetDeviceInfo is reporting a CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE larger than addressable memory on the host.\n It seems highly unlikely that this is usable, due to the API design.\n" );
416         fflush(stdout);
417         abort();
418     }
419 
420     return (size_t) s_result;
421 }
422 
getBufferCount(cl_device_id device_id,size_t vecSize,size_t typeSize)423 cl_ulong getBufferCount(cl_device_id device_id, size_t vecSize, size_t typeSize)
424 {
425     cl_ulong tmp = getBufferSize(device_id);
426     if(vecSize == 3)
427     {
428         return tmp/(cl_ulong)(4*typeSize);
429     }
430     return tmp/(cl_ulong)(vecSize*typeSize);
431 }
432