xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/integer_ops/test_add_sat.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <limits.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include <algorithm>
25 
26 #include "procs.h"
27 
verify_addsat_char(const cl_char * inA,const cl_char * inB,const cl_char * outptr,int n,const char * sizeName,int vecSize)28 static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
29 {
30     int i;
31     for( i = 0; i < n; i++ )
32     {
33         cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
34         r = std::max(r, CL_CHAR_MIN);
35         r = std::min(r, CL_CHAR_MAX);
36 
37         if( r != outptr[i] )
38         { log_info( "\n%d) Failure for add_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
39     }
40     return 0;
41 }
42 
verify_addsat_uchar(const cl_uchar * inA,const cl_uchar * inB,const cl_uchar * outptr,int n,const char * sizeName,int vecSize)43 static int verify_addsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const cl_uchar *outptr, int n, const char *sizeName, int vecSize )
44 {
45     int i;
46     for( i = 0; i < n; i++ )
47     {
48         cl_int r = (int) inA[i] + (int) inB[i];
49         r = std::max(r, 0);
50         r = std::min(r, CL_UCHAR_MAX);
51         if (r != outptr[i])
52         { log_info( "\n%d) Failure for add_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
53     }
54     return 0;
55 }
56 
verify_addsat_short(const cl_short * inA,const cl_short * inB,const cl_short * outptr,int n,const char * sizeName,int vecSize)57 static int verify_addsat_short( const cl_short *inA, const cl_short *inB, const cl_short *outptr, int n, const char *sizeName , int vecSize)
58 {
59     int i;
60     for( i = 0; i < n; i++ )
61     {
62         cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
63         r = std::max(r, CL_SHRT_MIN);
64         r = std::min(r, CL_SHRT_MAX);
65 
66         if( r != outptr[i] )
67         { log_info( "\n%d) Failure for add_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
68     }
69     return 0;
70 }
71 
verify_addsat_ushort(const cl_ushort * inA,const cl_ushort * inB,const cl_ushort * outptr,int n,const char * sizeName,int vecSize)72 static int verify_addsat_ushort( const cl_ushort *inA, const cl_ushort *inB, const cl_ushort *outptr, int n, const char *sizeName , int vecSize)
73 {
74     int i;
75     for( i = 0; i < n; i++ )
76     {
77         cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
78         r = std::max(r, 0);
79         r = std::min(r, CL_USHRT_MAX);
80 
81         if( r != outptr[i] )
82         { log_info( "\n%d) Failure for add_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
83     }
84     return 0;
85 }
86 
verify_addsat_int(const cl_int * inA,const cl_int * inB,const cl_int * outptr,int n,const char * sizeName,int vecSize)87 static int verify_addsat_int( const cl_int *inA, const cl_int *inB, const cl_int *outptr, int n, const char *sizeName , int vecSize)
88 {
89     int i;
90     for( i = 0; i < n; i++ )
91     {
92         cl_int r = (cl_int) ((cl_uint) inA[i] + (cl_uint)inB[i]);
93         if( inB[i] > 0 )
94         {
95             if( r < inA[i] )
96                 r = CL_INT_MAX;
97         }
98         else
99         {
100             if( r > inA[i] )
101                 r = CL_INT_MIN;
102         }
103 
104 
105         if( r != outptr[i] )
106         { log_info( "\n%d) Failure for add_sat( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
107     }
108     return 0;
109 }
110 
verify_addsat_uint(const cl_uint * inA,const cl_uint * inB,const cl_uint * outptr,int n,const char * sizeName,int vecSize)111 static int verify_addsat_uint( const cl_uint *inA, const cl_uint *inB, const cl_uint *outptr, int n, const char *sizeName , int vecSize)
112 {
113     int i;
114     for( i = 0; i < n; i++ )
115     {
116         cl_uint r = inA[i] + inB[i];
117         if( r < inA[i] )
118             r = CL_UINT_MAX;
119 
120         if( r != outptr[i] )
121         { log_info( "\n%d) Failure for add_sat( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
122     }
123     return 0;
124 }
125 
verify_addsat_long(const cl_long * inA,const cl_long * inB,const cl_long * outptr,int n,const char * sizeName,int vecSize)126 static int verify_addsat_long( const cl_long *inA, const cl_long *inB, const cl_long *outptr, int n, const char *sizeName , int vecSize)
127 {
128     int i;
129     for( i = 0; i < n; i++ )
130     {
131         cl_long r = (cl_long)((cl_ulong)inA[i] + (cl_ulong)inB[i]);
132         if( inB[i] > 0 )
133         {
134             if( r < inA[i] )
135                 r = CL_LONG_MAX;
136         }
137         else
138         {
139             if( r > inA[i] )
140                 r = CL_LONG_MIN;
141         }
142         if( r != outptr[i] )
143         { log_info( "%d) Failure for add_sat( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
144     }
145     return 0;
146 }
147 
verify_addsat_ulong(const cl_ulong * inA,const cl_ulong * inB,const cl_ulong * outptr,int n,const char * sizeName,int vecSize)148 static int verify_addsat_ulong( const cl_ulong *inA, const cl_ulong *inB, const cl_ulong *outptr, int n, const char *sizeName , int vecSize)
149 {
150     int i;
151     for( i = 0; i < n; i++ )
152     {
153         cl_ulong r = inA[i] + inB[i];
154         if( r < inA[i] )
155             r = CL_ULONG_MAX;
156         if( r != outptr[i] )
157         { log_info( "%d) Failure for add_sat( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
158     }
159     return 0;
160 }
161 
162 typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName, int );
163 static const verifyFunc verify[] = {   (verifyFunc) verify_addsat_char, (verifyFunc) verify_addsat_uchar,
164     (verifyFunc) verify_addsat_short, (verifyFunc) verify_addsat_ushort,
165     (verifyFunc) verify_addsat_int, (verifyFunc) verify_addsat_uint,
166     (verifyFunc) verify_addsat_long, (verifyFunc) verify_addsat_ulong };
167 //FIXME:  enable long and ulong when GPU path is working
168 static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
169 
170 //FIXME:  enable "16" when support for > 64 byte vectors go into LLVM
171 static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
172 static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
173 static const size_t  kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
174 
test_integer_add_sat(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)175 int test_integer_add_sat(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
176 {
177     cl_int *input_ptr[2], *output_ptr, *p;
178     int err;
179     int i;
180     cl_uint vectorSize;
181     cl_uint type;
182     MTdata d;
183     int fail_count = 0;
184 
185     size_t length = sizeof(cl_int) * 4 * n_elems;
186 
187     input_ptr[0] = (cl_int*)malloc(length);
188     input_ptr[1] = (cl_int*)malloc(length);
189     output_ptr   = (cl_int*)malloc(length);
190 
191     d = init_genrand( gRandomSeed );
192     p = input_ptr[0];
193     for (i=0; i<4 * n_elems; i++)
194         p[i] = genrand_int32(d);
195     p = input_ptr[1];
196     for (i=0; i<4 * n_elems; i++)
197         p[i] = genrand_int32(d);
198     free_mtdata(d); d = NULL;
199 
200     for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
201     {
202 
203         //embedded devices don't support long/ulong so skip over
204         if (! gHasLong && strstr(test_str_names[type],"long"))
205         {
206             log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
207             continue;
208         }
209 
210         verifyFunc f = verify[ type ];
211         // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
212         size_t elementCount = length / kSizes[type];
213         cl_mem streams[3];
214 
215         log_info( "%s", test_str_names[type] );
216         fflush( stdout );
217 
218         // Set up data streams for the type
219         streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
220         if (!streams[0])
221         {
222             log_error("clCreateBuffer failed\n");
223             return -1;
224         }
225         streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
226         if (!streams[1])
227         {
228             log_error("clCreateBuffer failed\n");
229             return -1;
230         }
231         streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
232         if (!streams[2])
233         {
234             log_error("clCreateBuffer failed\n");
235             return -1;
236         }
237 
238         err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
239         if (err != CL_SUCCESS)
240         {
241             log_error("clEnqueueWriteBuffer failed\n");
242             return -1;
243         }
244         err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
245         if (err != CL_SUCCESS)
246         {
247             log_error("clEnqueueWriteBuffer failed\n");
248             return -1;
249         }
250 
251         for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
252         {
253             cl_program program = NULL;
254             cl_kernel kernel = NULL;
255 
256             const char *source[] = {
257                 "__kernel void test_add_sat_", test_str_names[type], vector_size_names[vectorSize],
258                 "(__global ", test_str_names[type], vector_size_names[vectorSize],
259                 " *srcA, __global ", test_str_names[type], vector_size_names[vectorSize],
260                 " *srcB, __global ", test_str_names[type], vector_size_names[vectorSize],
261                 " *dst)\n"
262                 "{\n"
263                 "    int  tid = get_global_id(0);\n"
264                 "\n"
265                 "    ", test_str_names[type], vector_size_names[vectorSize], " tmp = add_sat(srcA[tid], srcB[tid]);\n"
266                 "    dst[tid] = tmp;\n"
267                 "}\n" };
268 
269 
270             const char *sourceV3[] = {
271                 "__kernel void test_add_sat_", test_str_names[type], vector_size_names[vectorSize],
272                 "(__global ", test_str_names[type],
273                 " *srcA, __global ", test_str_names[type],
274                 " *srcB, __global ", test_str_names[type],
275                 " *dst)\n"
276                 "{\n"
277                 "    int  tid = get_global_id(0);\n"
278                 "\n"
279                 "    ", test_str_names[type], vector_size_names[vectorSize], " tmp = add_sat(vload3(tid, srcA), vload3(tid, srcB));\n"
280                 "    vstore3(tmp, tid, dst);\n"
281                 "}\n" };
282 
283             char kernelName[128];
284             snprintf( kernelName, sizeof( kernelName ), "test_add_sat_%s%s", test_str_names[type], vector_size_names[vectorSize] );
285             if(vector_sizes[vectorSize] != 3)
286             {
287                 err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
288             }
289             else
290             {
291                 err = create_single_kernel_helper(context, &program, &kernel, sizeof( sourceV3 ) / sizeof( sourceV3[0] ), sourceV3, kernelName );
292             }
293             if (err)
294                 return -1;
295 
296             err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
297             err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
298             err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
299             if (err != CL_SUCCESS)
300             {
301                 log_error("clSetKernelArgs failed\n");
302                 return -1;
303             }
304 
305             //Wipe the output buffer clean
306             uint32_t pattern = 0xdeadbeef;
307             memset_pattern4( output_ptr, &pattern, length );
308             err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
309             if (err != CL_SUCCESS)
310             {
311                 log_error("clWriteArray failed\n");
312                 return -1;
313             }
314 
315             size_t size = elementCount / (vector_sizes[vectorSize]);
316             err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
317             if (err != CL_SUCCESS)
318             {
319                 log_error("clExecuteKernel failed\n");
320                 return -1;
321             }
322 
323             err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
324             if (err != CL_SUCCESS)
325             {
326                 log_error("clReadArray failed\n");
327                 return -1;
328             }
329 
330             char *inP = (char *)input_ptr[0];
331             char *inP2 = (char *)input_ptr[1];
332             char *outP = (char *)output_ptr;
333 
334             for( size_t e = 0; e < size; e++ )
335             {
336                 if( f( inP, inP2, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
337                     ++fail_count; break; // return -1;
338                 }
339                 inP += kSizes[type] * vector_sizes[vectorSize];
340                 inP2 += kSizes[type] * vector_sizes[vectorSize];
341                 outP += kSizes[type] * vector_sizes[vectorSize];
342             }
343 
344             clReleaseKernel( kernel );
345             clReleaseProgram( program );
346             log_info( "." );
347             fflush( stdout );
348         }
349 
350         clReleaseMemObject( streams[0] );
351         clReleaseMemObject( streams[1] );
352         clReleaseMemObject( streams[2] );
353         log_info( "done\n" );
354     }
355     if(fail_count) {
356         log_info("Failed on %d types\n", fail_count);
357         return -1;
358     }
359 
360     free(input_ptr[0]);
361     free(input_ptr[1]);
362     free(output_ptr);
363 
364     return err;
365 }
366 
367 
368