1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <limits.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23
24 #include <algorithm>
25
26 #include "procs.h"
27
verify_subsat_char(const cl_char * inA,const cl_char * inB,const cl_char * outptr,int n,const char * sizeName,int vecSize)28 static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
29 {
30 int i;
31 for( i = 0; i < n; i++ )
32 {
33 cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
34 r = std::max(r, CL_CHAR_MIN);
35 r = std::min(r, CL_CHAR_MAX);
36
37 if( r != outptr[i] )
38 { log_info( "\n%d) Failure for sub_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
39 }
40 return 0;
41 }
42
verify_subsat_uchar(const cl_uchar * inA,const cl_uchar * inB,const cl_uchar * outptr,int n,const char * sizeName,int vecSize)43 static int verify_subsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const cl_uchar *outptr, int n, const char *sizeName, int vecSize )
44 {
45 int i;
46 for( i = 0; i < n; i++ )
47 {
48 cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
49 r = std::max(r, 0);
50 r = std::min(r, CL_UCHAR_MAX);
51 if (r != outptr[i])
52 { log_info( "\n%d) Failure for sub_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
53 }
54 return 0;
55 }
56
verify_subsat_short(const cl_short * inA,const cl_short * inB,const cl_short * outptr,int n,const char * sizeName,int vecSize)57 static int verify_subsat_short( const cl_short *inA, const cl_short *inB, const cl_short *outptr, int n, const char *sizeName, int vecSize )
58 {
59 int i;
60 for( i = 0; i < n; i++ )
61 {
62 cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
63 r = std::max(r, CL_SHRT_MIN);
64 r = std::min(r, CL_SHRT_MAX);
65
66 if( r != outptr[i] )
67 { log_info( "\n%d) Failure for sub_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
68 }
69 return 0;
70 }
71
verify_subsat_ushort(const cl_ushort * inA,const cl_ushort * inB,const cl_ushort * outptr,int n,const char * sizeName,int vecSize)72 static int verify_subsat_ushort( const cl_ushort *inA, const cl_ushort *inB, const cl_ushort *outptr, int n, const char *sizeName , int vecSize)
73 {
74 int i;
75 for( i = 0; i < n; i++ )
76 {
77 cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
78 r = std::max(r, 0);
79 r = std::min(r, CL_USHRT_MAX);
80
81 if( r != outptr[i] )
82 { log_info( "\n%d) Failure for sub_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
83 }
84 return 0;
85 }
86
verify_subsat_int(const cl_int * inA,const cl_int * inB,const cl_int * outptr,int n,const char * sizeName,int vecSize)87 static int verify_subsat_int( const cl_int *inA, const cl_int *inB, const cl_int *outptr, int n, const char *sizeName , int vecSize)
88 {
89 int i;
90 for( i = 0; i < n; i++ )
91 {
92 cl_int r = (cl_int) ((cl_uint)inA[i] - (cl_uint)inB[i]);
93 if( inB[i] < 0 )
94 {
95 if( r < inA[i] )
96 r = CL_INT_MAX;
97 }
98 else
99 {
100 if( r > inA[i] )
101 r = CL_INT_MIN;
102 }
103
104
105 if( r != outptr[i] )
106 { log_info( "\n%d) Failure for sub_sat( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
107 }
108 return 0;
109 }
110
verify_subsat_uint(const cl_uint * inA,const cl_uint * inB,const cl_uint * outptr,int n,const char * sizeName,int vecSize)111 static int verify_subsat_uint( const cl_uint *inA, const cl_uint *inB, const cl_uint *outptr, int n, const char *sizeName , int vecSize)
112 {
113 int i;
114 for( i = 0; i < n; i++ )
115 {
116 cl_uint r = inA[i] - inB[i];
117 if( inA[i] < inB[i] )
118 r = 0;
119
120 if( r != outptr[i] )
121 { log_info( "\n%d) Failure for sub_sat( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
122 }
123 return 0;
124 }
125
verify_subsat_long(const cl_long * inA,const cl_long * inB,const cl_long * outptr,int n,const char * sizeName,int vecSize)126 static int verify_subsat_long( const cl_long *inA, const cl_long *inB, const cl_long *outptr, int n, const char *sizeName , int vecSize)
127 {
128 int i;
129 for( i = 0; i < n; i++ )
130 {
131 cl_long r = (cl_long)((cl_ulong)inA[i] - (cl_ulong)inB[i]);
132 if( inB[i] < 0 )
133 {
134 if( r < inA[i] )
135 r = CL_LONG_MAX;
136 }
137 else
138 {
139 if( r > inA[i] )
140 r = CL_LONG_MIN;
141 }
142 if( r != outptr[i] )
143 { log_info( "%d) Failure for sub_sat( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
144 }
145 return 0;
146 }
147
verify_subsat_ulong(const cl_ulong * inA,const cl_ulong * inB,const cl_ulong * outptr,int n,const char * sizeName,int vecSize)148 static int verify_subsat_ulong( const cl_ulong *inA, const cl_ulong *inB, const cl_ulong *outptr, int n, const char *sizeName , int vecSize)
149 {
150 int i;
151 for( i = 0; i < n; i++ )
152 {
153 cl_ulong r = inA[i] - inB[i];
154 if( inA[i] < inB[i] )
155 r = 0;
156 if( r != outptr[i] )
157 { log_info( "%d) Failure for sub_sat( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
158 }
159 return 0;
160 }
161
162 typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName, int );
163 static const verifyFunc verify[] = { (verifyFunc) verify_subsat_char, (verifyFunc) verify_subsat_uchar,
164 (verifyFunc) verify_subsat_short, (verifyFunc) verify_subsat_ushort,
165 (verifyFunc) verify_subsat_int, (verifyFunc) verify_subsat_uint,
166 (verifyFunc) verify_subsat_long, (verifyFunc) verify_subsat_ulong };
167
168 static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
169 static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
170 static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
171
172 static const size_t kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
173
test_integer_sub_sat(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)174 int test_integer_sub_sat(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
175 {
176 int *input_ptr[2], *output_ptr, *p;
177 int err;
178 cl_uint i;
179 cl_uint vectorSize;
180 cl_uint type;
181 MTdata d;
182 int fail_count = 0;
183
184 size_t length = sizeof(int) * 4 * n_elems;
185
186 input_ptr[0] = (int*)malloc(length);
187 input_ptr[1] = (int*)malloc(length);
188 output_ptr = (int*)malloc(length);
189
190 d = init_genrand( gRandomSeed );
191 p = input_ptr[0];
192 for (i=0; i<4 * (cl_uint) n_elems; i++)
193 p[i] = genrand_int32(d);
194 p = input_ptr[1];
195 for (i=0; i<4 * (cl_uint) n_elems; i++)
196 p[i] = genrand_int32(d);
197 free_mtdata(d); d = NULL;
198
199 for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
200 {
201
202 //embedded devices don't support long/ulong so skip over
203 if (! gHasLong && strstr(test_str_names[type],"long"))
204 {
205 log_info( "WARNING: device does not support 64-bit integers. Skipping %s\n", test_str_names[type] );
206 continue;
207 }
208
209 verifyFunc f = verify[ type ];
210 // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
211 size_t elementCount = length / kSizes[type];
212 cl_mem streams[3];
213
214 log_info( "%s", test_str_names[type] );
215 fflush( stdout );
216
217 // Set up data streams for the type
218 streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
219 if (!streams[0])
220 {
221 log_error("clCreateBuffer failed\n");
222 return -1;
223 }
224 streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
225 if (!streams[1])
226 {
227 log_error("clCreateBuffer failed\n");
228 return -1;
229 }
230 streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
231 if (!streams[2])
232 {
233 log_error("clCreateBuffer failed\n");
234 return -1;
235 }
236
237 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
238 if (err != CL_SUCCESS)
239 {
240 log_error("clEnqueueWriteBuffer failed\n");
241 return -1;
242 }
243 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
244 if (err != CL_SUCCESS)
245 {
246 log_error("clEnqueueWriteBuffer failed\n");
247 return -1;
248 }
249
250 for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
251 {
252 cl_program program = NULL;
253 cl_kernel kernel = NULL;
254
255 const char *source[] = {
256 "__kernel void test_sub_sat_", test_str_names[type], vector_size_names[vectorSize],
257 "(__global ", test_str_names[type], vector_size_names[vectorSize],
258 " *srcA, __global ", test_str_names[type], vector_size_names[vectorSize],
259 " *srcB, __global ", test_str_names[type], vector_size_names[vectorSize],
260 " *dst)\n"
261 "{\n"
262 " int tid = get_global_id(0);\n"
263 "\n"
264 " ", test_str_names[type], vector_size_names[vectorSize], " tmp = sub_sat(srcA[tid], srcB[tid]);\n"
265 " dst[tid] = tmp;\n"
266 "}\n"
267 };
268
269 const char *sourceV3[] = {
270 "__kernel void test_sub_sat_", test_str_names[type], vector_size_names[vectorSize],
271 "(__global ", test_str_names[type],
272 " *srcA, __global ", test_str_names[type],
273 " *srcB, __global ", test_str_names[type],
274 " *dst)\n"
275 "{\n"
276 " int tid = get_global_id(0);\n"
277 "\n"
278 " ", test_str_names[type], vector_size_names[vectorSize], " tmp = sub_sat(vload3(tid, srcA), vload3(tid, srcB));\n"
279 " vstore3(tmp, tid, dst);\n"
280 "}\n"
281 };
282
283 char kernelName[128];
284 snprintf( kernelName, sizeof( kernelName ), "test_sub_sat_%s%s", test_str_names[type], vector_size_names[vectorSize] );
285 if(vector_sizes[vectorSize] != 3)
286 {
287 err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
288 } else {
289 err = create_single_kernel_helper(context, &program, &kernel, sizeof( sourceV3 ) / sizeof( sourceV3[0] ), sourceV3, kernelName );
290 }
291 if (err)
292 return -1;
293
294 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
295 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
296 err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
297 if (err != CL_SUCCESS)
298 {
299 log_error("clSetKernelArgs failed\n");
300 return -1;
301 }
302
303 //Wipe the output buffer clean
304 uint32_t pattern = 0xdeadbeef;
305 memset_pattern4( output_ptr, &pattern, length );
306 err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
307 if (err != CL_SUCCESS)
308 {
309 log_error("clEnqueueWriteBuffer failed\n");
310 return -1;
311 }
312
313 size_t size = elementCount / vector_sizes[vectorSize];
314 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
315 if (err != CL_SUCCESS)
316 {
317 log_error("clEnqueueNDRangeKernel failed\n");
318 return -1;
319 }
320
321 err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
322 if (err != CL_SUCCESS)
323 {
324 log_error("clEnqueueReadBuffer failed\n");
325 return -1;
326 }
327
328 char *inP = (char *)input_ptr[0];
329 char *inP2 = (char *)input_ptr[1];
330 char *outP = (char *)output_ptr;
331
332 for( size_t e = 0; e < size; e++ )
333 {
334 if( f( inP, inP2, outP, vector_sizes[vectorSize], vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
335 ++fail_count; break; // return -1;
336 }
337 inP += kSizes[type] * vector_sizes[vectorSize];
338 inP2 += kSizes[type] * vector_sizes[vectorSize];
339 outP += kSizes[type] * vector_sizes[vectorSize];
340 }
341
342 clReleaseKernel( kernel );
343 clReleaseProgram( program );
344 log_info( "." );
345 fflush( stdout );
346 }
347
348 clReleaseMemObject( streams[0] );
349 clReleaseMemObject( streams[1] );
350 clReleaseMemObject( streams[2] );
351 log_info( "done\n" );
352 }
353 if(fail_count) {
354 log_info("Failed on %d types\n", fail_count);
355 return -1;
356 }
357
358 free(input_ptr[0]);
359 free(input_ptr[1]);
360 free(output_ptr);
361
362 return err;
363 }
364
365
366