1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25 template <class Integer>
abs_diff(Integer a,Integer b)26 static typename std::make_unsigned<Integer>::type abs_diff(Integer a, Integer b)
27 {
28 using Unsigned = typename std::make_unsigned<Integer>::type;
29 Unsigned ua = a;
30 Unsigned ub = b;
31 Unsigned diff = ua - ub;
32 if (a < b) diff = -diff;
33 return diff;
34 }
35
verify_absdiff_char(const void * p,const void * q,const void * r,size_t n,const char * sizeName,size_t vecSize)36 static int verify_absdiff_char( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
37 {
38 const cl_char *inA = (const cl_char *)p;
39 const cl_char *inB = (const cl_char *)q;
40 const cl_uchar *outptr = (const cl_uchar *)r;
41 size_t i;
42 for( i = 0; i < n; i++ )
43 {
44 cl_uchar r = abs_diff(inA[i], inB[i]);
45 if( r != outptr[i] )
46 { log_info( "%ld) Failure for absdiff( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
47 }
48 return 0;
49 }
50
verify_absdiff_uchar(const void * p,const void * q,const void * r,size_t n,const char * sizeName,size_t vecSize)51 static int verify_absdiff_uchar( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
52 {
53 const cl_uchar *inA = (const cl_uchar *)p;
54 const cl_uchar *inB = (const cl_uchar *)q;
55 const cl_uchar *outptr = (const cl_uchar *)r;
56 size_t i;
57 for( i = 0; i < n; i++ )
58 {
59 cl_uchar r = abs_diff(inA[i], inB[i]);
60 if( r != outptr[i] )
61 { log_info( "%ld) Failure for absdiff( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
62 }
63 return 0;
64 }
65
verify_absdiff_short(const void * p,const void * q,const void * r,size_t n,const char * sizeName,size_t vecSize)66 static int verify_absdiff_short( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
67 {
68 const cl_short *inA = (const cl_short *)p;
69 const cl_short *inB = (const cl_short *)q;
70 const cl_ushort *outptr = (const cl_ushort *)r;
71 size_t i;
72 for( i = 0; i < n; i++ )
73 {
74 cl_ushort r = abs_diff(inA[i], inB[i]);
75 if( r != outptr[i] )
76 { log_info( "%ld) Failure for absdiff( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
77 }
78 return 0;
79 }
80
verify_absdiff_ushort(const void * p,const void * q,const void * r,size_t n,const char * sizeName,size_t vecSize)81 static int verify_absdiff_ushort( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
82 {
83 const cl_ushort *inA = (const cl_ushort *)p;
84 const cl_ushort *inB = (const cl_ushort *)q;
85 const cl_ushort *outptr = (const cl_ushort *)r;
86 size_t i;
87 for( i = 0; i < n; i++ )
88 {
89 cl_ushort r = abs_diff(inA[i], inB[i]);
90 if( r != outptr[i] )
91 { log_info( "%ld) Failure for absdiff( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
92 }
93 return 0;
94 }
95
verify_absdiff_int(const void * p,const void * q,const void * r,size_t n,const char * sizeName,size_t vecSize)96 static int verify_absdiff_int( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
97 {
98 const cl_int *inA = (const cl_int *)p;
99 const cl_int *inB = (const cl_int *)q;
100 const cl_uint *outptr = (const cl_uint *)r;
101 size_t i;
102 for( i = 0; i < n; i++ )
103 {
104 cl_uint r = abs_diff(inA[i], inB[i]);
105 if( r != outptr[i] )
106 {
107 log_info( "%ld) Failure for absdiff( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] );
108 return -1;
109 }
110 }
111 return 0;
112 }
113
verify_absdiff_uint(const void * p,const void * q,const void * r,size_t n,const char * sizeName,size_t vecSize)114 static int verify_absdiff_uint( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
115 {
116 const cl_uint *inA = (const cl_uint *)p;
117 const cl_uint *inB = (const cl_uint *)q;
118 const cl_uint *outptr = (const cl_uint *)r;
119 size_t i;
120 for( i = 0; i < n; i++ )
121 {
122 cl_uint r = abs_diff(inA[i], inB[i]);
123 if( r != outptr[i] )
124 { log_info( "%ld) Failure for absdiff( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
125 }
126 return 0;
127 }
128
verify_absdiff_long(const void * p,const void * q,const void * r,size_t n,const char * sizeName,size_t vecSize)129 static int verify_absdiff_long( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
130 {
131 const cl_long *inA = (const cl_long *)p;
132 const cl_long *inB = (const cl_long *)q;
133 const cl_ulong *outptr = (const cl_ulong *)r;
134 size_t i;
135 for( i = 0; i < n; i++ )
136 {
137 cl_ulong r = abs_diff(inA[i], inB[i]);
138 if( r != outptr[i] )
139 { log_info( "%ld) Failure for absdiff( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
140 }
141 return 0;
142 }
143
verify_absdiff_ulong(const void * p,const void * q,const void * r,size_t n,const char * sizeName,size_t vecSize)144 static int verify_absdiff_ulong( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
145 {
146 const cl_ulong *inA = (const cl_ulong *)p;
147 const cl_ulong *inB = (const cl_ulong *)q;
148 const cl_ulong *outptr = (const cl_ulong *)r;
149 size_t i;
150 for( i = 0; i < n; i++ )
151 {
152 cl_ulong r = abs_diff(inA[i], inB[i]);
153 if( r != outptr[i] )
154 { log_info( "%ld) Failure for absdiff( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
155 }
156 return 0;
157 }
158
159 typedef int (*verifyFunc)( const void *, const void *, const void *, size_t n, const char *sizeName, size_t vecSize);
160 static const verifyFunc verify[] = { verify_absdiff_char, verify_absdiff_uchar,
161 verify_absdiff_short, verify_absdiff_ushort,
162 verify_absdiff_int, verify_absdiff_uint,
163 verify_absdiff_long, verify_absdiff_ulong };
164
165 //FIXME: enable long and ulong when GPU path is working
166 static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
167
168 //FIXME: enable "16" when support for > 64 byte vectors go into LLVM
169 static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
170 static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
171 static const char *vector_param_size_names[] = { "", "2", "", "4", "8", "16" };
172 static const size_t kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
173
printSrc(const char * src[],int nSrcStrings)174 static void printSrc(const char *src[], int nSrcStrings) {
175 int i;
176 for(i = 0; i < nSrcStrings; ++i) {
177 log_info("%s", src[i]);
178 }
179 }
180
test_integer_abs_diff(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)181 int test_integer_abs_diff(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
182 {
183 cl_int *input_ptr[2], *output_ptr, *p;
184 int err;
185 int i;
186 cl_uint vectorSize;
187 cl_uint type;
188 MTdata d;
189 int fail_count = 0;
190
191 size_t length = sizeof(cl_int) * 4 * n_elems;
192
193 input_ptr[0] = (cl_int*)malloc(length);
194 input_ptr[1] = (cl_int*)malloc(length);
195 output_ptr = (cl_int*)malloc(length);
196
197 d = init_genrand( gRandomSeed );
198 p = input_ptr[0];
199 for (i=0; i<4 * n_elems; i++)
200 p[i] = genrand_int32(d);
201 p = input_ptr[1];
202 for (i=0; i<4 * n_elems; i++)
203 p[i] = genrand_int32(d);
204 free_mtdata(d); d = NULL;
205
206 for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
207 {
208 //embedded devices don't support long/ulong so skip over
209 if (! gHasLong && strstr(test_str_names[type],"long"))
210 {
211 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
212 continue;
213 }
214
215 verifyFunc f = verify[ type ];
216 // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
217 size_t elementCount = length / kSizes[type];
218 cl_mem streams[3];
219
220 log_info( "%s", test_str_names[type] );
221 fflush( stdout );
222
223 // Set up data streams for the type
224 streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
225 if (!streams[0])
226 {
227 log_error("clCreateBuffer failed\n");
228 return -1;
229 }
230 streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
231 if (!streams[1])
232 {
233 log_error("clCreateBuffer failed\n");
234 return -1;
235 }
236 streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
237 if (!streams[2])
238 {
239 log_error("clCreateBuffer failed\n");
240 return -1;
241 }
242
243 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
244 if (err != CL_SUCCESS)
245 {
246 log_error("clEnqueueWriteBuffer failed\n");
247 return -1;
248 }
249 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
250 if (err != CL_SUCCESS)
251 {
252 log_error("clEnqueueWriteBuffer failed\n");
253 return -1;
254 }
255
256 for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
257 {
258 cl_program program = NULL;
259 cl_kernel kernel = NULL;
260
261 const char *source[] = {
262 "__kernel void test_absdiff_", test_str_names[type], vector_size_names[vectorSize],
263 "(__global ", test_str_names[type], vector_param_size_names[vectorSize],
264 " *srcA, __global ", test_str_names[type], vector_param_size_names[vectorSize],
265 " *srcB, __global u", test_str_names[type & -2], vector_param_size_names[vectorSize],
266 " *dst)\n"
267 "{\n"
268 " int tid = get_global_id(0);\n"
269 "\n"
270 " ", test_str_names[type], vector_size_names[vectorSize], " sA, sB;\n",
271 " sA = ", ( vector_sizes[ vectorSize ] == 3 ) ? "vload3( tid, srcA )" : "srcA[tid]", ";\n",
272 " sB = ", ( vector_sizes[ vectorSize ] == 3 ) ? "vload3( tid, srcB )" : "srcB[tid]", ";\n",
273 " u", test_str_names[type & -2], vector_size_names[vectorSize], " dstVal = abs_diff(sA, sB);\n"
274 " ", ( vector_sizes[ vectorSize ] == 3 ) ? "vstore3( dstVal, tid, dst )" : "dst[ tid ] = dstVal", ";\n",
275 "}\n" };
276
277
278 char kernelName[128];
279 snprintf( kernelName, sizeof( kernelName ), "test_absdiff_%s%s", test_str_names[type], vector_size_names[vectorSize] );
280
281 err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
282
283 if (err) {
284 return -1;
285 }
286
287 #if 0
288 log_info("About to run\n");
289 log_info("=====\n");
290 printSrc(source, sizeof(source)/sizeof(source[0]));
291 log_info("=====\n");
292 #endif
293
294 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
295 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
296 err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
297 if (err != CL_SUCCESS)
298 {
299 log_error("clSetKernelArgs failed\n");
300 return -1;
301 }
302
303 //Wipe the output buffer clean
304 uint32_t pattern = 0xdeadbeef;
305 memset_pattern4( output_ptr, &pattern, length );
306 err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
307 if (err != CL_SUCCESS)
308 {
309 log_error("clEnqueueWriteBuffer failed\n");
310 return -1;
311 }
312
313 size_t size = elementCount / (vector_sizes[vectorSize]);
314 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
315 if (err != CL_SUCCESS)
316 {
317 log_error("clEnqueueNDRangeKernel failed\n");
318 return -1;
319 }
320
321 err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
322 if (err != CL_SUCCESS)
323 {
324 log_error("clEnqueueReadBuffer failed\n");
325 return -1;
326 }
327
328 char *inP = (char *)input_ptr[0];
329 char *inP2 = (char *)input_ptr[1];
330 char *outP = (char *)output_ptr;
331
332 for( size_t e = 0; e < size; e++ )
333 {
334 if( f( inP, inP2, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
335 printSrc(source, sizeof(source)/sizeof(source[0]));
336 ++fail_count; break; // return -1;
337 }
338 inP += kSizes[type] * ( (vector_sizes[vectorSize]) );
339 inP2 += kSizes[type] * ( (vector_sizes[vectorSize]) );
340 outP += kSizes[type] * ( (vector_sizes[vectorSize]) );
341 }
342
343 clReleaseKernel( kernel );
344 clReleaseProgram( program );
345 log_info( "." );
346 fflush( stdout );
347 }
348
349 clReleaseMemObject( streams[0] );
350 clReleaseMemObject( streams[1] );
351 clReleaseMemObject( streams[2] );
352 log_info( "done\n" );
353 }
354
355
356 if(fail_count) {
357 log_info("Failed on %d types\n", fail_count);
358 return -1;
359 }
360
361 free(input_ptr[0]);
362 free(input_ptr[1]);
363 free(output_ptr);
364
365 return err;
366 }
367
368
369