1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/conversions.h"
18
19 #include <algorithm>
20
21 #define TEST_SIZE 512
22
23 const char *singleParamIntegerKernelSourcePattern =
24 "__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n"
25 "{\n"
26 " int tid = get_global_id(0);\n"
27 " %s%s tmp = vload%s( tid, destValues );\n"
28 " tmp %s= %s( vload%s( tid, sourceA ) );\n"
29 " vstore%s( tmp, tid, destValues );\n"
30 "\n"
31 "}\n";
32
33 const char *singleParamSingleSizeIntegerKernelSourcePattern =
34 "__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n"
35 "{\n"
36 " int tid = get_global_id(0);\n"
37 " destValues[tid] %s= %s( sourceA[tid] );\n"
38 "}\n";
39
40 typedef bool (*singleParamIntegerVerifyFn)( void *source, void *destination, ExplicitType vecType );
41 static void patchup_divide_results( void *outData, const void *inDataA, const void *inDataB, size_t count, ExplicitType vecType );
42 bool verify_integer_divideAssign( void *source, void *destination, ExplicitType vecType );
43 bool verify_integer_moduloAssign( void *source, void *destination, ExplicitType vecType );
44
test_single_param_integer_kernel(cl_command_queue queue,cl_context context,const char * fnName,ExplicitType vecType,size_t vecSize,singleParamIntegerVerifyFn verifyFn,MTdata d,bool useOpKernel=false)45 int test_single_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName,
46 ExplicitType vecType, size_t vecSize, singleParamIntegerVerifyFn verifyFn,
47 MTdata d, bool useOpKernel = false )
48 {
49 clProgramWrapper program;
50 clKernelWrapper kernel;
51 clMemWrapper streams[2];
52 cl_long inDataA[TEST_SIZE * 16], outData[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], expected;
53 int error, i;
54 size_t threads[1], localThreads[1];
55 char kernelSource[10240];
56 char *programPtr;
57 char sizeName[4];
58
59 if (! gHasLong && strstr(get_explicit_type_name(vecType),"long"))
60 {
61 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecType) );
62 return CL_SUCCESS;
63 }
64
65 /* Create the source */
66 if( vecSize == 1 )
67 sizeName[ 0 ] = 0;
68 else
69 sprintf( sizeName, "%d", (int)vecSize );
70
71 if( vecSize == 1 )
72 sprintf( kernelSource, singleParamSingleSizeIntegerKernelSourcePattern,
73 get_explicit_type_name( vecType ), get_explicit_type_name( vecType ),
74 useOpKernel ? fnName : "", useOpKernel ? "" : fnName );
75 else
76 sprintf( kernelSource, singleParamIntegerKernelSourcePattern,
77 get_explicit_type_name( vecType ), get_explicit_type_name( vecType ),
78 get_explicit_type_name( vecType ), sizeName, sizeName,
79 useOpKernel ? fnName : "", useOpKernel ? "" : fnName, sizeName,
80 sizeName );
81
82 /* Create kernels */
83 programPtr = kernelSource;
84 if (create_single_kernel_helper(context, &program, &kernel, 1,
85 (const char **)&programPtr, "sample_test"))
86 {
87 log_error("The program we attempted to compile was: \n%s\n", kernelSource);
88 return -1;
89 }
90
91 /* Generate some streams */
92 generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataA );
93
94 streams[0] = clCreateBuffer(
95 context, CL_MEM_COPY_HOST_PTR,
96 get_explicit_type_size(vecType) * vecSize * TEST_SIZE, inDataA, NULL);
97 if( streams[0] == NULL )
98 {
99 log_error("ERROR: Creating input array A failed!\n");
100 return -1;
101 }
102
103 if( useOpKernel )
104 {
105 // Op kernels use an r/w buffer for the second param, so we need to init it with data
106 generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataB );
107 }
108 streams[1] = clCreateBuffer(
109 context, (CL_MEM_READ_WRITE | (useOpKernel ? CL_MEM_COPY_HOST_PTR : 0)),
110 get_explicit_type_size(vecType) * vecSize * TEST_SIZE,
111 (useOpKernel) ? &inDataB : NULL, NULL);
112 if( streams[1] == NULL )
113 {
114 log_error("ERROR: Creating output array failed!\n");
115 return -1;
116 }
117
118 /* Assign streams and execute */
119 error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
120 test_error( error, "Unable to set indexed kernel arguments" );
121 error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
122 test_error( error, "Unable to set indexed kernel arguments" );
123
124 /* Run the kernel */
125 threads[0] = TEST_SIZE;
126
127 error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
128 test_error( error, "Unable to get work group size to use" );
129
130 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
131 test_error( error, "Unable to execute test kernel" );
132
133 memset(outData, 0xFF, get_explicit_type_size( vecType ) * TEST_SIZE * vecSize );
134
135 /* Now get the results */
136 error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0,
137 get_explicit_type_size( vecType ) * TEST_SIZE * vecSize,
138 outData, 0, NULL, NULL );
139 test_error( error, "Unable to read output array!" );
140
141 // deal with division by 0 -- any answer is allowed here
142 if( verifyFn == verify_integer_divideAssign || verifyFn == verify_integer_moduloAssign )
143 patchup_divide_results( outData, inDataA, inDataB, TEST_SIZE * vecSize, vecType );
144
145 /* And verify! */
146 char *p = (char *)outData;
147 char *in = (char *)inDataA;
148 char *in2 = (char *)inDataB;
149 for( i = 0; i < (int)TEST_SIZE; i++ )
150 {
151 for( size_t j = 0; j < vecSize; j++ )
152 {
153 if( useOpKernel )
154 memcpy( &expected, in2, get_explicit_type_size( vecType ) );
155
156 verifyFn( in, &expected, vecType );
157 if( memcmp( &expected, p, get_explicit_type_size( vecType ) ) != 0 )
158 {
159 switch( get_explicit_type_size( vecType ))
160 {
161 case 1:
162 if( useOpKernel )
163 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x)\n",
164 (int)i, (int)j,
165 ((cl_uchar*)&expected)[0],
166 *( (cl_uchar *)p ),
167 *( (cl_uchar *)in ),
168 *( (cl_uchar *)in2 ) );
169 else
170 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x)\n",
171 (int)i, (int)j,
172 ((cl_uchar*)&expected)[0],
173 *( (cl_uchar *)p ),
174 *( (cl_uchar *)in ) );
175 break;
176
177 case 2:
178 if( useOpKernel )
179 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x)\n",
180 (int)i, (int)j, ((cl_ushort*)&expected)[0], *( (cl_ushort *)p ),
181 *( (cl_ushort *)in ), *( (cl_ushort *)in2 ) );
182 else
183 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x)\n",
184 (int)i, (int)j, ((cl_ushort*)&expected)[0], *( (cl_ushort *)p ),
185 *( (cl_ushort *)in ) );
186 break;
187
188 case 4:
189 if( useOpKernel )
190 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x)\n",
191 (int)i, (int)j, ((cl_uint*)&expected)[0], *( (cl_uint *)p ),
192 *( (cl_uint *)in ), *( (cl_uint *)in2 ) );
193 else
194 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x)\n",
195 (int)i, (int)j, ((cl_uint*)&expected)[0], *( (cl_uint *)p ),
196 *( (cl_uint *)in ) );
197 break;
198
199 case 8:
200 if( useOpKernel )
201 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx)\n",
202 (int)i, (int)j, ((cl_ulong*)&expected)[0], *( (cl_ulong *)p ),
203 *( (cl_ulong *)in ), *( (cl_ulong *)in2 ) );
204 else
205 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx)\n",
206 (int)i, (int)j, ((cl_ulong*)&expected)[0], *( (cl_ulong *)p ),
207 *( (cl_ulong *)in ) );
208 break;
209 }
210 return -1;
211 }
212 p += get_explicit_type_size( vecType );
213 in += get_explicit_type_size( vecType );
214 in2 += get_explicit_type_size( vecType );
215 }
216 }
217
218 return 0;
219 }
220
test_single_param_integer_fn(cl_command_queue queue,cl_context context,const char * fnName,singleParamIntegerVerifyFn verifyFn,bool useOpKernel=false)221 int test_single_param_integer_fn( cl_command_queue queue, cl_context context, const char *fnName, singleParamIntegerVerifyFn verifyFn, bool useOpKernel = false )
222 {
223 ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
224 unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; // TODO 3 not tested
225 unsigned int index, typeIndex;
226 int retVal = 0;
227 RandomSeed seed(gRandomSeed );
228
229 for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
230 {
231 if ((types[ typeIndex ] == kLong || types[ typeIndex ] == kULong) && !gHasLong)
232 continue;
233
234 for( index = 0; vecSizes[ index ] != 0; index++ )
235 {
236 if( test_single_param_integer_kernel(queue, context, fnName, types[ typeIndex ], vecSizes[ index ], verifyFn, seed, useOpKernel ) != 0 )
237 {
238 log_error( " Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] );
239 retVal = -1;
240 }
241 }
242 }
243
244 return retVal;
245 }
246
verify_integer_clz(void * source,void * destination,ExplicitType vecType)247 bool verify_integer_clz( void *source, void *destination, ExplicitType vecType )
248 {
249 cl_long testValue;
250 int count;
251 int typeBits;
252
253 switch( vecType )
254 {
255 case kChar:
256 testValue = *( (cl_char *)source );
257 typeBits = 8 * sizeof( cl_char );
258 break;
259 case kUChar:
260 testValue = *( (cl_uchar *)source );
261 typeBits = 8 * sizeof( cl_uchar );
262 break;
263 case kShort:
264 testValue = *( (cl_short *)source );
265 typeBits = 8 * sizeof( cl_short );
266 break;
267 case kUShort:
268 testValue = *( (cl_ushort *)source );
269 typeBits = 8 * sizeof( cl_ushort );
270 break;
271 case kInt:
272 testValue = *( (cl_int *)source );
273 typeBits = 8 * sizeof( cl_int );
274 break;
275 case kUInt:
276 testValue = *( (cl_uint *)source );
277 typeBits = 8 * sizeof( cl_uint );
278 break;
279 case kLong:
280 testValue = *( (cl_long *)source );
281 typeBits = 8 * sizeof( cl_long );
282 break;
283 case kULong:
284 // Hack for now: just treat it as a signed cl_long, since it won't matter for bitcounting
285 testValue = *( (cl_ulong *)source );
286 typeBits = 8 * sizeof( cl_ulong );
287 break;
288 default:
289 // Should never happen
290 return false;
291 }
292
293 count = typeBits;
294 if( testValue )
295 {
296 testValue <<= 8 * sizeof( testValue ) - typeBits;
297 for( count = 0; 0 == (testValue & CL_LONG_MIN); count++ )
298 testValue <<= 1;
299 }
300
301 switch( vecType )
302 {
303 case kChar:
304 *( (cl_char *)destination ) = count;
305 break;
306 case kUChar:
307 *( (cl_uchar *)destination ) = count;
308 break;
309 case kShort:
310 *( (cl_short *)destination ) = count;
311 break;
312 case kUShort:
313 *( (cl_ushort *)destination ) = count;
314 break;
315 case kInt:
316 *( (cl_int *)destination ) = count;
317 break;
318 case kUInt:
319 *( (cl_uint *)destination ) = count;
320 break;
321 case kLong:
322 *( (cl_long *)destination ) = count;
323 break;
324 case kULong:
325 *( (cl_ulong *)destination ) = count;
326 break;
327 default:
328 // Should never happen
329 return false;
330 }
331 return true;
332 }
333
test_integer_clz(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)334 int test_integer_clz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
335 {
336 return test_single_param_integer_fn( queue, context, "clz", verify_integer_clz );
337 }
338
339
verify_integer_ctz(void * source,void * destination,ExplicitType vecType)340 bool verify_integer_ctz( void *source, void *destination, ExplicitType vecType )
341 {
342 cl_long testValue;
343 int count;
344 int typeBits;
345
346 switch( vecType )
347 {
348 case kChar:
349 testValue = *( (cl_char *)source );
350 typeBits = 8 * sizeof( cl_char );
351 break;
352 case kUChar:
353 testValue = *( (cl_uchar *)source );
354 typeBits = 8 * sizeof( cl_uchar );
355 break;
356 case kShort:
357 testValue = *( (cl_short *)source );
358 typeBits = 8 * sizeof( cl_short );
359 break;
360 case kUShort:
361 testValue = *( (cl_ushort *)source );
362 typeBits = 8 * sizeof( cl_ushort );
363 break;
364 case kInt:
365 testValue = *( (cl_int *)source );
366 typeBits = 8 * sizeof( cl_int );
367 break;
368 case kUInt:
369 testValue = *( (cl_uint *)source );
370 typeBits = 8 * sizeof( cl_uint );
371 break;
372 case kLong:
373 testValue = *( (cl_long *)source );
374 typeBits = 8 * sizeof( cl_long );
375 break;
376 case kULong:
377 // Hack for now: just treat it as a signed cl_long, since it won't matter for bitcounting
378 testValue = *( (cl_ulong *)source );
379 typeBits = 8 * sizeof( cl_ulong );
380 break;
381 default:
382 // Should never happen
383 return false;
384 }
385
386 if ( testValue == 0 )
387 count = typeBits;
388 else
389 {
390 for( count = 0; (0 == (testValue & 0x1)); count++ )
391 testValue >>= 1;
392 }
393
394 switch( vecType )
395 {
396 case kChar:
397 *( (cl_char *)destination ) = count;
398 break;
399 case kUChar:
400 *( (cl_uchar *)destination ) = count;
401 break;
402 case kShort:
403 *( (cl_short *)destination ) = count;
404 break;
405 case kUShort:
406 *( (cl_ushort *)destination ) = count;
407 break;
408 case kInt:
409 *( (cl_int *)destination ) = count;
410 break;
411 case kUInt:
412 *( (cl_uint *)destination ) = count;
413 break;
414 case kLong:
415 *( (cl_long *)destination ) = count;
416 break;
417 case kULong:
418 *( (cl_ulong *)destination ) = count;
419 break;
420 default:
421 // Should never happen
422 return false;
423 }
424 return true;
425 }
426
427
test_integer_ctz(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)428 int test_integer_ctz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
429 {
430 return test_single_param_integer_fn( queue, context, "ctz", verify_integer_ctz );
431 }
432
433 #define OP_CASE( op, sizeName, size ) \
434 case sizeName: \
435 { \
436 cl_##size *d = (cl_##size *)destination; \
437 *d op##= *( (cl_##size *)source ); \
438 break; \
439 }
440
441 #define OP_CASES( op ) \
442 switch( vecType ) \
443 { \
444 OP_CASE( op, kChar, char ) \
445 OP_CASE( op, kUChar, uchar ) \
446 OP_CASE( op, kShort, short ) \
447 OP_CASE( op, kUShort, ushort ) \
448 OP_CASE( op, kInt, int ) \
449 OP_CASE( op, kUInt, uint ) \
450 OP_CASE( op, kLong, long ) \
451 OP_CASE( op, kULong, ulong ) \
452 default: \
453 break; \
454 }
455
456 #define OP_TEST( op, opName ) \
457 bool verify_integer_##opName##Assign( void *source, void *destination, ExplicitType vecType ) \
458 { \
459 OP_CASES( op ) \
460 return true; \
461 } \
462 int test_integer_##opName##Assign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) \
463 { \
464 return test_single_param_integer_fn( queue, context, #op, verify_integer_##opName##Assign, true ); \
465 }
466
467 OP_TEST( +, add )
468 OP_TEST( -, subtract )
469 OP_TEST( *, multiply )
470 OP_TEST( ^, exclusiveOr )
471 OP_TEST( |, or )
472 OP_TEST( &, and )
473
474 #define OP_CASE_GUARD( op, sizeName, size ) \
475 case sizeName: \
476 { \
477 cl_##size *d = (cl_##size *)destination; \
478 cl_##size *s = (cl_##size *)source; \
479 if( *s == 0 ) \
480 *d = -1; \
481 else \
482 *d op##= *s; \
483 break; \
484 }
485
486 #define OP_CASE_GUARD_SIGNED( op, sizeName, size, MIN_VAL ) \
487 case sizeName: \
488 { \
489 cl_##size *d = (cl_##size *)destination; \
490 cl_##size *s = (cl_##size *)source; \
491 if( *s == 0 || (*d == MIN_VAL && *s == -1)) \
492 *d = -1 - MIN_VAL; \
493 else \
494 *d op##= *s; \
495 break; \
496 }
497
498 #define OP_CASES_GUARD( op ) \
499 switch( vecType ) \
500 { \
501 OP_CASE_GUARD_SIGNED( op, kChar, char, CL_CHAR_MIN ) \
502 OP_CASE_GUARD( op, kUChar, uchar ) \
503 OP_CASE_GUARD_SIGNED( op, kShort, short, CL_SHRT_MIN ) \
504 OP_CASE_GUARD( op, kUShort, ushort ) \
505 OP_CASE_GUARD_SIGNED( op, kInt, int, CL_INT_MIN ) \
506 OP_CASE_GUARD( op, kUInt, uint ) \
507 OP_CASE_GUARD_SIGNED( op, kLong, long, CL_LONG_MIN ) \
508 OP_CASE_GUARD( op, kULong, ulong ) \
509 default: \
510 break; \
511 }
512
513 #define OP_TEST_GUARD( op, opName ) \
514 bool verify_integer_##opName##Assign( void *source, void *destination, ExplicitType vecType ) \
515 { \
516 OP_CASES_GUARD( op ) \
517 return true; \
518 } \
519 int test_integer_##opName##Assign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) \
520 { \
521 return test_single_param_integer_fn( queue, context, #op, verify_integer_##opName##Assign, true ); \
522 }
523
524 OP_TEST_GUARD( /, divide )
525 OP_TEST_GUARD( %, modulo )
526
527 #define PATCH_CASE( _out, _src, _dest, _count, _cl_type ) \
528 { \
529 const _cl_type *denom = (const _cl_type* ) _src; \
530 _cl_type *result = (_cl_type* ) _out; \
531 for( size_t i = 0; i < _count; i++ ) \
532 if( denom[i] == 0 ) \
533 result[i] = (_cl_type) -1; \
534 }
535
536 #define PATCH_CASE_SIGNED( _out, _src, _dest, _count, _cl_type, _MIN_VAL ) \
537 { \
538 const _cl_type *num = (const _cl_type* ) _dest; \
539 const _cl_type *denom = (const _cl_type* ) _src; \
540 _cl_type *result = (_cl_type* ) _out; \
541 for( size_t i = 0; i < _count; i++ ) \
542 if( denom[i] == 0 || ( num[i] == _MIN_VAL && denom[i] == -1)) \
543 result[i] = -1 - _MIN_VAL; \
544 }
545
patchup_divide_results(void * outData,const void * inDataA,const void * inDataB,size_t count,ExplicitType vecType)546 static void patchup_divide_results( void *outData, const void *inDataA, const void *inDataB, size_t count, ExplicitType vecType )
547 {
548 switch( vecType )
549 {
550 case kChar:
551 PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_char, CL_CHAR_MIN )
552 break;
553 case kUChar:
554 PATCH_CASE( outData, inDataA, inDataB, count, cl_uchar )
555 break;
556 case kShort:
557 PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_short, CL_SHRT_MIN )
558 break;
559 case kUShort:
560 PATCH_CASE( outData, inDataA, inDataB, count, cl_ushort )
561 break;
562 case kInt:
563 PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_int, CL_INT_MIN )
564 break;
565 case kUInt:
566 PATCH_CASE( outData, inDataA, inDataB, count, cl_uint )
567 break;
568 case kLong:
569 PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_long, CL_LONG_MIN )
570 break;
571 case kULong:
572 PATCH_CASE( outData, inDataA, inDataB, count, cl_ulong )
573 break;
574 default:
575 log_error( "ERROR: internal test error -- unknown data type %d\n", vecType );
576 break;
577 }
578 }
579
580 const char *twoParamIntegerKernelSourcePattern =
581 "__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *destValues)\n"
582 "{\n"
583 " int tid = get_global_id(0);\n"
584 " %s%s sA = %s;\n"
585 " %s%s sB = %s;\n"
586 " %s%s dst = %s( sA, sB );\n"
587 " %s;\n"
588 "\n"
589 "}\n";
590
591 typedef bool (*twoParamIntegerVerifyFn)( void *sourceA, void *sourceB, void *destination, ExplicitType vecType );
592
build_load_statement(char * outString,size_t vecSize,const char * name)593 static char * build_load_statement( char *outString, size_t vecSize, const char *name )
594 {
595 if( vecSize != 3 )
596 sprintf( outString, "%s[ tid ]", name );
597 else
598 sprintf( outString, "vload3( tid, %s )", name );
599 return outString;
600 }
601
build_store_statement(char * outString,size_t vecSize,const char * name,const char * srcName)602 static char * build_store_statement( char *outString, size_t vecSize, const char *name, const char *srcName )
603 {
604 if( vecSize != 3 )
605 sprintf( outString, "%s[ tid ] = %s", name, srcName );
606 else
607 sprintf( outString, "vstore3( %s, tid, %s )", srcName, name );
608 return outString;
609 }
610
test_two_param_integer_kernel(cl_command_queue queue,cl_context context,const char * fnName,ExplicitType vecAType,ExplicitType vecBType,unsigned int vecSize,twoParamIntegerVerifyFn verifyFn,MTdata d)611 int test_two_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName,
612 ExplicitType vecAType, ExplicitType vecBType, unsigned int vecSize, twoParamIntegerVerifyFn verifyFn, MTdata d )
613 {
614 clProgramWrapper program;
615 clKernelWrapper kernel;
616 clMemWrapper streams[3];
617 cl_long inDataA[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], outData[TEST_SIZE * 16], expected;
618 int error, i;
619 size_t threads[1], localThreads[1];
620 char kernelSource[10240];
621 char *programPtr;
622 char sizeName[4], paramSizeName[4];
623
624 // embedded profiles don't support long/ulong datatypes
625 if (! gHasLong && strstr(get_explicit_type_name(vecAType),"long"))
626 {
627 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecAType) );
628 return CL_SUCCESS;
629 }
630
631 /* Create the source */
632 if( vecSize == 1 )
633 sizeName[ 0 ] = 0;
634 else
635 sprintf( sizeName, "%d", vecSize );
636 if( ( vecSize == 1 ) || ( vecSize == 3 ) )
637 paramSizeName[ 0 ] = 0;
638 else
639 sprintf( paramSizeName, "%d", vecSize );
640
641 char sourceALoad[ 128 ], sourceBLoad[ 128 ], destStore[ 128 ];
642
643 sprintf( kernelSource, twoParamIntegerKernelSourcePattern,
644 get_explicit_type_name( vecAType ), paramSizeName,
645 get_explicit_type_name( vecBType ), paramSizeName,
646 get_explicit_type_name( vecAType ), paramSizeName,
647 get_explicit_type_name( vecAType ), sizeName, build_load_statement( sourceALoad, (size_t)vecSize, "sourceA" ),
648 get_explicit_type_name( vecBType ), sizeName, build_load_statement( sourceBLoad, (size_t)vecSize, "sourceB" ),
649 get_explicit_type_name( vecAType ), sizeName,
650 fnName,
651 build_store_statement( destStore, (size_t)vecSize, "destValues", "dst" )
652 );
653
654 /* Create kernels */
655 programPtr = kernelSource;
656 if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
657 {
658 log_error("The program we attempted to compile was: \n%s\n", kernelSource);
659 return -1;
660 }
661
662 /* Generate some streams */
663 generate_random_data( vecAType, vecSize * TEST_SIZE, d, inDataA );
664 generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB );
665
666 streams[0] = clCreateBuffer(
667 context, CL_MEM_COPY_HOST_PTR,
668 get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, &inDataA, NULL);
669 if( streams[0] == NULL )
670 {
671 log_error("ERROR: Creating input array A failed!\n");
672 return -1;
673 }
674 streams[1] = clCreateBuffer(
675 context, CL_MEM_COPY_HOST_PTR,
676 get_explicit_type_size(vecBType) * vecSize * TEST_SIZE, &inDataB, NULL);
677 if( streams[1] == NULL )
678 {
679 log_error("ERROR: Creating input array B failed!\n");
680 return -1;
681 }
682 streams[2] = clCreateBuffer(
683 context, CL_MEM_READ_WRITE,
684 get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, NULL, NULL);
685 if( streams[2] == NULL )
686 {
687 log_error("ERROR: Creating output array failed!\n");
688 return -1;
689 }
690
691 /* Assign streams and execute */
692 error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
693 test_error( error, "Unable to set indexed kernel arguments" );
694 error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
695 test_error( error, "Unable to set indexed kernel arguments" );
696 error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
697 test_error( error, "Unable to set indexed kernel arguments" );
698
699 /* Run the kernel */
700 threads[0] = TEST_SIZE;
701
702 error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
703 test_error( error, "Unable to get work group size to use" );
704
705 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
706 test_error( error, "Unable to execute test kernel" );
707
708 memset(outData, 0xFF, get_explicit_type_size( vecAType ) * TEST_SIZE * vecSize);
709
710 /* Now get the results */
711 error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0,
712 get_explicit_type_size( vecAType ) * TEST_SIZE * vecSize, outData, 0,
713 NULL, NULL );
714 test_error( error, "Unable to read output array!" );
715
716 /* And verify! */
717 char *inA = (char *)inDataA;
718 char *inB = (char *)inDataB;
719 char *out = (char *)outData;
720 for( i = 0; i < (int)TEST_SIZE; i++ )
721 {
722 for( size_t j = 0; j < vecSize; j++ )
723 {
724 bool test = verifyFn( inA, inB, &expected, vecAType );
725 if( test && ( memcmp( &expected, out, get_explicit_type_size( vecAType ) ) != 0 ) )
726 {
727 switch( get_explicit_type_size( vecAType ))
728 {
729 case 1:
730 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x), TEST_SIZE %d\n",
731 (int)i, (int)j, ((cl_uchar*)&expected)[ 0 ], *( (cl_uchar *)out ),
732 *( (cl_uchar *)inA ),
733 *( (cl_uchar *)inB ) ,
734 TEST_SIZE);
735 break;
736
737 case 2:
738 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x), TEST_SIZE %d\n",
739 (int)i, (int)j, ((cl_ushort*)&expected)[ 0 ], *( (cl_ushort *)out ),
740 *( (cl_ushort *)inA ),
741 *( (cl_ushort *)inB ),
742 TEST_SIZE);
743 break;
744
745 case 4:
746 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x)\n",
747 (int)i, (int)j, ((cl_uint*)&expected)[ 0 ], *( (cl_uint *)out ),
748 *( (cl_uint *)inA ),
749 *( (cl_uint *)inB ) );
750 break;
751
752 case 8:
753 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx)\n",
754 (int)i, (int)j, ((cl_ulong*)&expected)[ 0 ], *( (cl_ulong *)out ),
755 *( (cl_ulong *)inA ),
756 *( (cl_ulong *)inB ) );
757 break;
758 }
759 return -1;
760 }
761 inA += get_explicit_type_size( vecAType );
762 inB += get_explicit_type_size( vecBType );
763 out += get_explicit_type_size( vecAType );
764 }
765 }
766
767 return 0;
768 }
769
test_two_param_integer_fn(cl_command_queue queue,cl_context context,const char * fnName,twoParamIntegerVerifyFn verifyFn)770 int test_two_param_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, twoParamIntegerVerifyFn verifyFn)
771 {
772 ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
773 unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; // TODO : 3 not tested
774 unsigned int index, typeIndex;
775 int retVal = 0;
776 RandomSeed seed(gRandomSeed );
777
778 for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
779 {
780 if (( types[ typeIndex ] == kLong || types[ typeIndex ] == kULong) && !gHasLong)
781 continue;
782
783 for( index = 0; vecSizes[ index ] != 0; index++ )
784 {
785 if( test_two_param_integer_kernel(queue, context, fnName, types[ typeIndex ], types[ typeIndex ], vecSizes[ index ], verifyFn, seed ) != 0 )
786 {
787 log_error( " Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] );
788 retVal = -1;
789 }
790 }
791 }
792
793 return retVal;
794 }
795
test_two_param_unmatched_integer_fn(cl_command_queue queue,cl_context context,const char * fnName,twoParamIntegerVerifyFn verifyFn)796 int test_two_param_unmatched_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, twoParamIntegerVerifyFn verifyFn)
797 {
798 ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
799 unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
800 unsigned int index, typeAIndex, typeBIndex;
801 int retVal = 0;
802 RandomSeed seed( gRandomSeed );
803
804 for( typeAIndex = 0; types[ typeAIndex ] != kNumExplicitTypes; typeAIndex++ )
805 {
806 if (( types[ typeAIndex ] == kLong || types[ typeAIndex ] == kULong) && !gHasLong)
807 continue;
808
809 for( typeBIndex = 0; types[ typeBIndex ] != kNumExplicitTypes; typeBIndex++ )
810 {
811 if (( types[ typeBIndex ] == kLong || types[ typeBIndex ] == kULong) && !gHasLong)
812 continue;
813
814 for( index = 0; vecSizes[ index ] != 0; index++ )
815 {
816 if( test_two_param_integer_kernel( queue, context, fnName, types[ typeAIndex ], types[ typeBIndex ], vecSizes[ index ], verifyFn, seed ) != 0 )
817 {
818 log_error( " Vector %s%d / %s%d FAILED\n", get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ], get_explicit_type_name( types[ typeBIndex ] ), vecSizes[ index ] );
819 retVal = -1;
820 }
821 }
822 }
823 }
824
825 return retVal;
826 }
827
verify_integer_hadd(void * sourceA,void * sourceB,void * destination,ExplicitType vecType)828 bool verify_integer_hadd( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
829 {
830 cl_long testValueA, testValueB, overflow;
831 cl_ulong uValueA, uValueB, uOverflow;
832
833 switch( vecType )
834 {
835 case kChar:
836 testValueA = *( (cl_char *)sourceA );
837 testValueB = *( (cl_char *)sourceB );
838 *( (cl_char *)destination ) = (cl_char)( ( testValueA + testValueB ) >> 1 );
839 break;
840 case kUChar:
841 testValueA = *( (cl_uchar *)sourceA );
842 testValueB = *( (cl_uchar *)sourceB );
843 *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA + testValueB ) >> 1 );
844 break;
845 case kShort:
846 testValueA = *( (cl_short *)sourceA );
847 testValueB = *( (cl_short *)sourceB );
848 *( (cl_short *)destination ) = (cl_short)( ( testValueA + testValueB ) >> 1 );
849 break;
850 case kUShort:
851 testValueA = *( (cl_ushort *)sourceA );
852 testValueB = *( (cl_ushort *)sourceB );
853 *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA + testValueB ) >> 1 );
854 break;
855 case kInt:
856 testValueA = *( (cl_int *)sourceA );
857 testValueB = *( (cl_int *)sourceB );
858 *( (cl_int *)destination ) = (cl_int)( ( testValueA + testValueB ) >> 1 );
859 break;
860 case kUInt:
861 testValueA = *( (cl_uint *)sourceA );
862 testValueB = *( (cl_uint *)sourceB );
863 *( (cl_uint *)destination ) = (cl_uint)( ( testValueA + testValueB ) >> 1 );
864 break;
865 case kLong:
866 // The long way to avoid dropping bits
867 testValueA = *( (cl_long *)sourceA );
868 testValueB = *( (cl_long *)sourceB );
869 overflow = ( testValueA & 0x1 ) + ( testValueB & 0x1 );
870 *( (cl_long *)destination ) = ( ( testValueA >> 1 ) + ( testValueB >> 1 ) ) + ( overflow >> 1 );
871 break;
872 case kULong:
873 // The long way to avoid dropping bits
874 uValueA = *( (cl_ulong *)sourceA );
875 uValueB = *( (cl_ulong *)sourceB );
876 uOverflow = ( uValueA & 0x1 ) + ( uValueB & 0x1 );
877 *( (cl_ulong *)destination ) = ( ( uValueA >> 1 ) + ( uValueB >> 1 ) ) + ( uOverflow >> 1 );
878 break;
879 default:
880 // Should never happen
881 return false;
882 }
883 return true;
884 }
885
test_integer_hadd(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)886 int test_integer_hadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
887 {
888 return test_two_param_integer_fn( queue, context, "hadd", verify_integer_hadd );
889 }
890
verify_integer_rhadd(void * sourceA,void * sourceB,void * destination,ExplicitType vecType)891 bool verify_integer_rhadd( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
892 {
893 cl_long testValueA, testValueB, overflow;
894 cl_ulong uValueA, uValueB, uOverflow;
895
896 switch( vecType )
897 {
898 case kChar:
899 testValueA = *( (cl_char *)sourceA );
900 testValueB = *( (cl_char *)sourceB );
901 *( (cl_char *)destination ) = (cl_char)( ( testValueA + testValueB + 1 ) >> 1 );
902 break;
903 case kUChar:
904 testValueA = *( (cl_uchar *)sourceA );
905 testValueB = *( (cl_uchar *)sourceB );
906 *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA + testValueB + 1 ) >> 1 );
907 break;
908 case kShort:
909 testValueA = *( (cl_short *)sourceA );
910 testValueB = *( (cl_short *)sourceB );
911 *( (cl_short *)destination ) = (cl_short)( ( testValueA + testValueB + 1 ) >> 1 );
912 break;
913 case kUShort:
914 testValueA = *( (cl_ushort *)sourceA );
915 testValueB = *( (cl_ushort *)sourceB );
916 *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA + testValueB + 1 ) >> 1 );
917 break;
918 case kInt:
919 testValueA = *( (cl_int *)sourceA );
920 testValueB = *( (cl_int *)sourceB );
921 *( (cl_int *)destination ) = (cl_int)( ( testValueA + testValueB + 1 ) >> 1 );
922 break;
923 case kUInt:
924 testValueA = *( (cl_uint *)sourceA );
925 testValueB = *( (cl_uint *)sourceB );
926 *( (cl_uint *)destination ) = (cl_uint)( ( testValueA + testValueB + 1 ) >> 1 );
927 break;
928 case kLong:
929 // The long way to avoid dropping bits
930 testValueA = *( (cl_long *)sourceA );
931 testValueB = *( (cl_long *)sourceB );
932 overflow = ( testValueA | testValueB ) & 0x1;
933 *( (cl_long *)destination ) = ( ( testValueA >> 1 ) + ( testValueB >> 1 ) ) + overflow;
934 break;
935 case kULong:
936 // The long way to avoid dropping bits
937 uValueA = *( (cl_ulong *)sourceA );
938 uValueB = *( (cl_ulong *)sourceB );
939 uOverflow = ( uValueA | uValueB ) & 0x1;
940 *( (cl_ulong *)destination ) = ( ( uValueA >> 1 ) + ( uValueB >> 1 ) ) + uOverflow;
941 break;
942 default:
943 // Should never happen
944 return false;
945 }
946 return true;
947 }
948
test_integer_rhadd(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)949 int test_integer_rhadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
950 {
951 return test_two_param_integer_fn( queue, context, "rhadd", verify_integer_rhadd );
952 }
953
954 #define MIN_CASE( type, const ) \
955 case const : \
956 { \
957 cl_##type valueA = *( (cl_##type *)sourceA ); \
958 cl_##type valueB = *( (cl_##type *)sourceB ); \
959 *( (cl_##type *)destination ) = (cl_##type)( valueB < valueA ? valueB : valueA ); \
960 break; \
961 }
962
verify_integer_min(void * sourceA,void * sourceB,void * destination,ExplicitType vecType)963 bool verify_integer_min( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
964 {
965 switch( vecType )
966 {
967 MIN_CASE( char, kChar )
968 MIN_CASE( uchar, kUChar )
969 MIN_CASE( short, kShort )
970 MIN_CASE( ushort, kUShort )
971 MIN_CASE( int, kInt )
972 MIN_CASE( uint, kUInt )
973 MIN_CASE( long, kLong )
974 MIN_CASE( ulong, kULong )
975 default:
976 // Should never happen
977 return false;
978 }
979 return true;
980 }
981
test_integer_min(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)982 int test_integer_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
983 {
984 return test_two_param_integer_fn( queue, context, "min", verify_integer_min);
985 }
986
987 #define MAX_CASE( type, const ) \
988 case const : \
989 { \
990 cl_##type valueA = *( (cl_##type *)sourceA ); \
991 cl_##type valueB = *( (cl_##type *)sourceB ); \
992 *( (cl_##type *)destination ) = (cl_##type)( valueA < valueB ? valueB : valueA ); \
993 break; \
994 }
995
verify_integer_max(void * sourceA,void * sourceB,void * destination,ExplicitType vecType)996 bool verify_integer_max( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
997 {
998 switch( vecType )
999 {
1000 MAX_CASE( char, kChar )
1001 MAX_CASE( uchar, kUChar )
1002 MAX_CASE( short, kShort )
1003 MAX_CASE( ushort, kUShort )
1004 MAX_CASE( int, kInt )
1005 MAX_CASE( uint, kUInt )
1006 MAX_CASE( long, kLong )
1007 MAX_CASE( ulong, kULong )
1008 default:
1009 // Should never happen
1010 return false;
1011 }
1012 return true;
1013 }
1014
test_integer_max(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1015 int test_integer_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1016 {
1017 return test_two_param_integer_fn( queue, context, "max", verify_integer_max );
1018 }
1019
1020
multiply_unsigned_64_by_64(cl_ulong sourceA,cl_ulong sourceB,cl_ulong & destLow,cl_ulong & destHi)1021 void multiply_unsigned_64_by_64( cl_ulong sourceA, cl_ulong sourceB, cl_ulong &destLow, cl_ulong &destHi )
1022 {
1023 cl_ulong lowA, lowB;
1024 cl_ulong highA, highB;
1025
1026 // Split up the values
1027 lowA = sourceA & 0xffffffff;
1028 highA = sourceA >> 32;
1029 lowB = sourceB & 0xffffffff;
1030 highB = sourceB >> 32;
1031
1032 // Note that, with this split, our multiplication becomes:
1033 // ( a * b )
1034 // = ( ( aHI << 32 + aLO ) * ( bHI << 32 + bLO ) ) >> 64
1035 // = ( ( aHI << 32 * bHI << 32 ) + ( aHI << 32 * bLO ) + ( aLO * bHI << 32 ) + ( aLO * bLO ) ) >> 64
1036 // = ( ( aHI * bHI << 64 ) + ( aHI * bLO << 32 ) + ( aLO * bHI << 32 ) + ( aLO * bLO ) ) >> 64
1037 // = ( aHI * bHI ) + ( aHI * bLO >> 32 ) + ( aLO * bHI >> 32 ) + ( aLO * bLO >> 64 )
1038
1039 // Now, since each value is 32 bits, the max size of any multiplication is:
1040 // ( 2 ^ 32 - 1 ) * ( 2 ^ 32 - 1 ) = 2^64 - 4^32 + 1 = 2^64 - 2^33 + 1, which fits within 64 bits
1041 // Which means we can do each component within a 64-bit integer as necessary (each component above marked as AB1 - AB4)
1042 cl_ulong aHibHi = highA * highB;
1043 cl_ulong aHibLo = highA * lowB;
1044 cl_ulong aLobHi = lowA * highB;
1045 cl_ulong aLobLo = lowA * lowB;
1046
1047 // Assemble terms.
1048 // We note that in certain cases, sums of products cannot overflow:
1049 //
1050 // The maximum product of two N-bit unsigned numbers is
1051 //
1052 // (2**N-1)^2 = 2**2N - 2**(N+1) + 1
1053 //
1054 // We note that we can add the maximum N-bit number to the 2N-bit product twice without overflow:
1055 //
1056 // (2**N-1)^2 + 2*(2**N-1) = 2**2N - 2**(N+1) + 1 + 2**(N+1) - 2 = 2**2N - 1
1057 //
1058 // If we breakdown the product of two numbers a,b into high and low halves of partial products as follows:
1059 //
1060 // a.hi a.lo
1061 // x b.hi b.lo
1062 //===============================================================================
1063 // (b.hi*a.hi).hi (b.hi*a.hi).lo
1064 // (b.lo*a.hi).hi (b.lo*a.hi).lo
1065 // (b.hi*a.lo).hi (b.hi*a.lo).lo
1066 // + (b.lo*a.lo).hi (b.lo*a.lo).lo
1067 //===============================================================================
1068 //
1069 // The (b.lo*a.lo).lo term cannot cause a carry, so we can ignore them for now. We also know from above, that we can add (b.lo*a.lo).hi
1070 // and (b.hi*a.lo).lo to the 2N bit term [(b.lo*a.hi).hi + (b.lo*a.hi).lo] without overflow. That takes care of all of the terms
1071 // on the right half that might carry. Do that now.
1072 //
1073 cl_ulong aLobLoHi = aLobLo >> 32;
1074 cl_ulong aLobHiLo = aLobHi & 0xFFFFFFFFULL;
1075 aHibLo += aLobLoHi + aLobHiLo;
1076
1077 // That leaves us with these terms:
1078 //
1079 // a.hi a.lo
1080 // x b.hi b.lo
1081 //===============================================================================
1082 // (b.hi*a.hi).hi (b.hi*a.hi).lo
1083 // (b.hi*a.lo).hi
1084 // [ (b.lo*a.hi).hi + (b.lo*a.hi).lo + other ]
1085 // + (b.lo*a.lo).lo
1086 //===============================================================================
1087
1088 // All of the overflow potential from the right half has now been accumulated into the [ (b.lo*a.hi).hi + (b.lo*a.hi).lo ] 2N bit term.
1089 // We can safely separate into high and low parts. Per our rule above, we know we can accumulate the high part of that and (b.hi*a.lo).hi
1090 // into the 2N bit term (b.lo*a.hi) without carry. The low part can be pieced together with (b.lo*a.lo).lo, to give the final low result
1091
1092 destHi = aHibHi + (aHibLo >> 32 ) + (aLobHi >> 32); // Cant overflow
1093 destLow = (aHibLo << 32) | ( aLobLo & 0xFFFFFFFFULL );
1094 }
1095
multiply_signed_64_by_64(cl_long sourceA,cl_long sourceB,cl_ulong & destLow,cl_long & destHi)1096 void multiply_signed_64_by_64( cl_long sourceA, cl_long sourceB, cl_ulong &destLow, cl_long &destHi )
1097 {
1098 // Find sign of result
1099 cl_long aSign = sourceA >> 63;
1100 cl_long bSign = sourceB >> 63;
1101 cl_long resultSign = aSign ^ bSign;
1102
1103 // take absolute values of the argument
1104 sourceA = (sourceA ^ aSign) - aSign;
1105 sourceB = (sourceB ^ bSign) - bSign;
1106
1107 cl_ulong hi;
1108 multiply_unsigned_64_by_64( (cl_ulong) sourceA, (cl_ulong) sourceB, destLow, hi );
1109
1110 // Fix the sign
1111 if( resultSign )
1112 {
1113 destLow ^= resultSign;
1114 hi ^= resultSign;
1115 destLow -= resultSign;
1116
1117 //carry if necessary
1118 if( 0 == destLow )
1119 hi -= resultSign;
1120 }
1121
1122 destHi = (cl_long) hi;
1123 }
1124
verify_integer_mul_hi(void * sourceA,void * sourceB,void * destination,ExplicitType vecType)1125 bool verify_integer_mul_hi( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
1126 {
1127 cl_long testValueA, testValueB, highSigned;
1128 cl_ulong highUnsigned, lowHalf;
1129
1130 switch( vecType )
1131 {
1132 case kChar:
1133 testValueA = *( (cl_char *)sourceA );
1134 testValueB = *( (cl_char *)sourceB );
1135 *( (cl_char *)destination ) = (cl_char)( ( testValueA * testValueB ) >> 8 );
1136 break;
1137 case kUChar:
1138 testValueA = *( (cl_uchar *)sourceA );
1139 testValueB = *( (cl_uchar *)sourceB );
1140 *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA * testValueB ) >> 8 );
1141 break;
1142 case kShort:
1143 testValueA = *( (cl_short *)sourceA );
1144 testValueB = *( (cl_short *)sourceB );
1145 *( (cl_short *)destination ) = (cl_short)( ( testValueA * testValueB ) >> 16 );
1146 break;
1147 case kUShort:
1148 testValueA = *( (cl_ushort *)sourceA );
1149 testValueB = *( (cl_ushort *)sourceB );
1150 *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA * testValueB ) >> 16 );
1151 break;
1152 case kInt:
1153 testValueA = *( (cl_int *)sourceA );
1154 testValueB = *( (cl_int *)sourceB );
1155 *( (cl_int *)destination ) = (cl_int)( ( testValueA * testValueB ) >> 32 );
1156 break;
1157 case kUInt:
1158 testValueA = *( (cl_uint *)sourceA );
1159 testValueB = *( (cl_uint *)sourceB );
1160 *( (cl_uint *)destination ) = (cl_uint)( ( testValueA * testValueB ) >> 32 );
1161 break;
1162 case kLong:
1163 testValueA = *( (cl_long *)sourceA );
1164 testValueB = *( (cl_long *)sourceB );
1165
1166 multiply_signed_64_by_64( testValueA, testValueB, lowHalf, highSigned );
1167 *( (cl_long *)destination ) = highSigned;
1168 break;
1169 case kULong:
1170 testValueA = *( (cl_ulong *)sourceA );
1171 testValueB = *( (cl_ulong *)sourceB );
1172
1173 multiply_unsigned_64_by_64( testValueA, testValueB, lowHalf, highUnsigned );
1174 *( (cl_ulong *)destination ) = highUnsigned;
1175 break;
1176 default:
1177 // Should never happen
1178 return false;
1179 }
1180 return true;
1181 }
1182
test_integer_mul_hi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1183 int test_integer_mul_hi(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1184 {
1185 return test_two_param_integer_fn( queue, context, "mul_hi", verify_integer_mul_hi );
1186 }
1187
verify_integer_rotate(void * sourceA,void * sourceB,void * destination,ExplicitType vecType)1188 bool verify_integer_rotate( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
1189 {
1190 cl_ulong testValueA;
1191 char numBits;
1192
1193 switch( vecType )
1194 {
1195 case kChar:
1196 case kUChar:
1197 testValueA = *( (cl_uchar *)sourceA );
1198 numBits = *( (cl_uchar *)sourceB );
1199 numBits &= 7;
1200 if ( numBits == 0 )
1201 *( (cl_uchar *)destination ) = (cl_uchar)testValueA;
1202 else
1203 *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA << numBits ) | ( testValueA >> ( 8 - numBits ) ) );
1204 break;
1205 case kShort:
1206 case kUShort:
1207 testValueA = *( (cl_ushort *)sourceA );
1208 numBits = *( (cl_ushort *)sourceB );
1209 numBits &= 15;
1210 if ( numBits == 0 )
1211 *( (cl_ushort *)destination ) = (cl_ushort)testValueA;
1212 else
1213 *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA << numBits ) | ( testValueA >> ( 16 - numBits ) ) );
1214 break;
1215 case kInt:
1216 case kUInt:
1217 testValueA = *( (cl_uint *)sourceA );
1218 numBits = *( (cl_uint *)sourceB );
1219 numBits &= 31;
1220 if ( numBits == 0 )
1221 *( (cl_uint *)destination ) = (cl_uint) testValueA;
1222 else
1223 *( (cl_uint *)destination ) = (cl_uint)( ( testValueA << numBits ) | ( testValueA >> ( 32 - numBits ) ) );
1224 break;
1225 case kLong:
1226 case kULong:
1227 testValueA = *( (cl_ulong *)sourceA );
1228 numBits = *( (cl_ulong *)sourceB );
1229 numBits &= 63;
1230 if ( numBits == 0 )
1231 *( (cl_ulong *)destination ) = (cl_ulong)testValueA;
1232 else
1233 *( (cl_ulong *)destination ) = (cl_ulong)( ( testValueA << numBits ) | ( testValueA >> ( 64 - numBits ) ) );
1234 break;
1235 default:
1236 // Should never happen
1237 log_error( "Unknown type encountered in verify_integer_rotate. Test failed. Aborting...\n" );
1238 abort();
1239 return false;
1240 }
1241 return true;
1242 }
1243
test_integer_rotate(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1244 int test_integer_rotate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1245 {
1246 return test_two_param_integer_fn( queue, context, "rotate", verify_integer_rotate );
1247 }
1248
1249 const char *threeParamIntegerKernelSourcePattern =
1250 "__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n"
1251 "{\n"
1252 " int tid = get_global_id(0);\n"
1253 " %s%s sA = %s;\n"
1254 " %s%s sB = %s;\n"
1255 " %s%s sC = %s;\n"
1256 " %s%s dst = %s( sA, sB, sC );\n"
1257 " %s;\n"
1258 "\n"
1259 "}\n";
1260
1261 typedef bool (*threeParamIntegerVerifyFn)( void *sourceA, void *sourceB, void *sourceC, void *destination,
1262 ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType );
1263
test_three_param_integer_kernel(cl_command_queue queue,cl_context context,const char * fnName,ExplicitType vecAType,ExplicitType vecBType,ExplicitType vecCType,ExplicitType destType,unsigned int vecSize,threeParamIntegerVerifyFn verifyFn,MTdata d)1264 int test_three_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName,
1265 ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType,
1266 unsigned int vecSize, threeParamIntegerVerifyFn verifyFn, MTdata d )
1267 {
1268 clProgramWrapper program;
1269 clKernelWrapper kernel;
1270 clMemWrapper streams[4];
1271 cl_long inDataA[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], inDataC[TEST_SIZE * 16], outData[TEST_SIZE * 16], expected;
1272 int error, i;
1273 size_t threads[1], localThreads[1];
1274 char kernelSource[10240];
1275 char *programPtr;
1276 char sizeName[4], paramSizeName[4];
1277
1278 if (! gHasLong && strstr(get_explicit_type_name(vecAType),"long"))
1279 {
1280 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecAType) );
1281 return CL_SUCCESS;
1282 }
1283
1284
1285 /* Create the source */
1286 if( vecSize == 1 )
1287 sizeName[ 0 ] = 0;
1288 else
1289 sprintf( sizeName, "%d", vecSize );
1290 if( ( vecSize == 1 ) || ( vecSize == 3 ) )
1291 paramSizeName[ 0 ] = 0;
1292 else
1293 sprintf( paramSizeName, "%d", vecSize );
1294
1295 char sourceALoad[ 128 ], sourceBLoad[ 128 ], sourceCLoad[ 128 ], destStore[ 128 ];
1296
1297 sprintf( kernelSource, threeParamIntegerKernelSourcePattern,
1298 get_explicit_type_name( vecAType ), paramSizeName,
1299 get_explicit_type_name( vecBType ), paramSizeName,
1300 get_explicit_type_name( vecCType ), paramSizeName,
1301 get_explicit_type_name( destType ), paramSizeName,
1302 get_explicit_type_name( vecAType ), sizeName, build_load_statement( sourceALoad, (size_t)vecSize, "sourceA" ),
1303 get_explicit_type_name( vecBType ), sizeName, build_load_statement( sourceBLoad, (size_t)vecSize, "sourceB" ),
1304 get_explicit_type_name( vecCType ), sizeName, build_load_statement( sourceCLoad, (size_t)vecSize, "sourceC" ),
1305 get_explicit_type_name( destType ), sizeName,
1306 fnName,
1307 build_store_statement( destStore, (size_t)vecSize, "destValues", "dst" )
1308 );
1309
1310 /* Create kernels */
1311 programPtr = kernelSource;
1312 if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
1313 {
1314 log_error("The program we attempted to compile was: \n%s\n", kernelSource);
1315 return -1;
1316 }
1317
1318 /* Generate some streams */
1319 generate_random_data( vecAType, vecSize * TEST_SIZE, d, inDataA );
1320 generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB );
1321 generate_random_data( vecCType, vecSize * TEST_SIZE, d, inDataC );
1322
1323 streams[0] = clCreateBuffer(
1324 context, CL_MEM_COPY_HOST_PTR,
1325 get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, &inDataA, NULL);
1326 if( streams[0] == NULL )
1327 {
1328 log_error("ERROR: Creating input array A failed!\n");
1329 return -1;
1330 }
1331 streams[1] = clCreateBuffer(
1332 context, CL_MEM_COPY_HOST_PTR,
1333 get_explicit_type_size(vecBType) * vecSize * TEST_SIZE, &inDataB, NULL);
1334 if( streams[1] == NULL )
1335 {
1336 log_error("ERROR: Creating input array B failed!\n");
1337 return -1;
1338 }
1339 streams[2] = clCreateBuffer(
1340 context, CL_MEM_COPY_HOST_PTR,
1341 get_explicit_type_size(vecCType) * vecSize * TEST_SIZE, &inDataC, NULL);
1342 if( streams[2] == NULL )
1343 {
1344 log_error("ERROR: Creating input array C failed!\n");
1345 return -1;
1346 }
1347 streams[3] = clCreateBuffer(
1348 context, CL_MEM_READ_WRITE,
1349 get_explicit_type_size(destType) * vecSize * TEST_SIZE, NULL, NULL);
1350 if( streams[3] == NULL )
1351 {
1352 log_error("ERROR: Creating output array failed!\n");
1353 return -1;
1354 }
1355
1356 /* Assign streams and execute */
1357 error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
1358 test_error( error, "Unable to set indexed kernel arguments" );
1359 error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
1360 test_error( error, "Unable to set indexed kernel arguments" );
1361 error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
1362 test_error( error, "Unable to set indexed kernel arguments" );
1363 error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
1364 test_error( error, "Unable to set indexed kernel arguments" );
1365
1366 /* Run the kernel */
1367 threads[0] = TEST_SIZE;
1368
1369 error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
1370 test_error( error, "Unable to get work group size to use" );
1371
1372 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
1373 test_error( error, "Unable to execute test kernel" );
1374
1375 memset(outData, 0xFF, get_explicit_type_size( destType ) * TEST_SIZE * vecSize);
1376
1377 /* Now get the results */
1378 error = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, get_explicit_type_size( destType ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
1379 test_error( error, "Unable to read output array!" );
1380
1381 /* And verify! */
1382 char *inA = (char *)inDataA;
1383 char *inB = (char *)inDataB;
1384 char *inC = (char *)inDataC;
1385 char *out = (char *)outData;
1386 for( i = 0; i < (int)TEST_SIZE; i++ )
1387 {
1388 for( size_t j = 0; j < vecSize; j++ )
1389 {
1390 bool test = verifyFn( inA, inB, inC, &expected, vecAType, vecBType, vecCType, destType );
1391 if( test && ( memcmp( &expected, out, get_explicit_type_size( destType ) ) != 0 ) )
1392 {
1393 switch( get_explicit_type_size( vecAType ))
1394 {
1395 case 1:
1396 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x, 0x%2.2x)\n",
1397 (int)i, (int)j, ((cl_uchar*)&expected)[ 0 ], *( (cl_uchar *)out ),
1398 *( (cl_uchar *)inA ),
1399 *( (cl_uchar *)inB ),
1400 *( (cl_uchar *)inC ) );
1401 break;
1402
1403 case 2:
1404 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x, 0x%4.4x)\n",
1405 (int)i, (int)j, ((cl_ushort*)&expected)[ 0 ], *( (cl_ushort *)out ),
1406 *( (cl_ushort *)inA ),
1407 *( (cl_ushort *)inB ),
1408 *( (cl_ushort *)inC ) );
1409 break;
1410
1411 case 4:
1412 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x, 0x%8.8x)\n",
1413 (int)i, (int)j, ((cl_uint*)&expected)[ 0 ], *( (cl_uint *)out ),
1414 *( (cl_uint *)inA ),
1415 *( (cl_uint *)inB ),
1416 *( (cl_uint *)inC ) );
1417 break;
1418
1419 case 8:
1420 log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx, 0x%16.16llx)\n",
1421 (int)i, (int)j, ((cl_ulong*)&expected)[ 0 ], *( (cl_ulong *)out ),
1422 *( (cl_ulong *)inA ),
1423 *( (cl_ulong *)inB ),
1424 *( (cl_ulong *)inC ) );
1425 break;
1426 }
1427 return -1;
1428 }
1429 inA += get_explicit_type_size( vecAType );
1430 inB += get_explicit_type_size( vecBType );
1431 inC += get_explicit_type_size( vecCType );
1432 out += get_explicit_type_size( destType );
1433 }
1434 }
1435
1436 return 0;
1437 }
1438
test_three_param_integer_fn(cl_command_queue queue,cl_context context,const char * fnName,threeParamIntegerVerifyFn verifyFn)1439 int test_three_param_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, threeParamIntegerVerifyFn verifyFn)
1440 {
1441 ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
1442 unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
1443 unsigned int index, typeAIndex;
1444 int retVal = 0;
1445 RandomSeed seed(gRandomSeed);
1446
1447 for( typeAIndex = 0; types[ typeAIndex ] != kNumExplicitTypes; typeAIndex++ )
1448 {
1449 if ((types[ typeAIndex ] == kLong || types[ typeAIndex] == kULong) && !gHasLong)
1450 continue;
1451
1452 for( index = 0; vecSizes[ index ] != 0; index++ )
1453 {
1454 if( test_three_param_integer_kernel(queue, context, fnName, types[ typeAIndex ], types[ typeAIndex ], types[ typeAIndex ], types[ typeAIndex ], vecSizes[ index ], verifyFn, seed ) != 0 )
1455 {
1456 log_error( " Vector %s%d,%s%d,%s%d FAILED\n", get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ],
1457 get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ] ,
1458 get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ] );
1459 retVal = -1;
1460 }
1461 }
1462 }
1463
1464 return retVal;
1465 }
1466
verify_integer_clamp(void * sourceA,void * sourceB,void * sourceC,void * destination,ExplicitType vecAType,ExplicitType vecBType,ExplicitType vecCType,ExplicitType destType)1467 bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *destination,
1468 ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType )
1469 {
1470 if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar )
1471 {
1472 cl_ulong valueA, valueB, valueC;
1473
1474 switch( vecAType )
1475 {
1476 case kULong:
1477 valueA = ((cl_ulong*) sourceA)[0];
1478 valueB = ((cl_ulong*) sourceB)[0];
1479 valueC = ((cl_ulong*) sourceC)[0];
1480 break;
1481 case kUInt:
1482 valueA = ((cl_uint*) sourceA)[0];
1483 valueB = ((cl_uint*) sourceB)[0];
1484 valueC = ((cl_uint*) sourceC)[0];
1485 break;
1486 case kUShort:
1487 valueA = ((cl_ushort*) sourceA)[0];
1488 valueB = ((cl_ushort*) sourceB)[0];
1489 valueC = ((cl_ushort*) sourceC)[0];
1490 break;
1491 case kUChar:
1492 valueA = ((cl_uchar*) sourceA)[0];
1493 valueB = ((cl_uchar*) sourceB)[0];
1494 valueC = ((cl_uchar*) sourceC)[0];
1495 break;
1496 default:
1497 //error -- should never get here
1498 abort();
1499 break;
1500 }
1501
1502
1503 if(valueB > valueC) {
1504 return false; // results are undefined : let expected alone.
1505 }
1506
1507 switch( vecAType )
1508 {
1509 case kULong:
1510 ((cl_ulong *)destination)[0] =
1511 std::max(std::min(valueA, valueC), valueB);
1512 break;
1513 case kUInt:
1514 ((cl_uint *)destination)[0] =
1515 (cl_uint)(std::max(std::min(valueA, valueC), valueB));
1516 break;
1517 case kUShort:
1518 ((cl_ushort *)destination)[0] =
1519 (cl_ushort)(std::max(std::min(valueA, valueC), valueB));
1520 break;
1521 case kUChar:
1522 ((cl_uchar *)destination)[0] =
1523 (cl_uchar)(std::max(std::min(valueA, valueC), valueB));
1524 break;
1525 default:
1526 //error -- should never get here
1527 abort();
1528 break;
1529 }
1530
1531
1532
1533
1534 }
1535 else
1536 {
1537 cl_long valueA, valueB, valueC;
1538
1539
1540 switch( vecAType )
1541 {
1542 case kLong:
1543 valueA = ((cl_long*) sourceA)[0];
1544 valueB = ((cl_long*) sourceB)[0];
1545 valueC = ((cl_long*) sourceC)[0];
1546 break;
1547 case kInt:
1548 valueA = ((cl_int*) sourceA)[0];
1549 valueB = ((cl_int*) sourceB)[0];
1550 valueC = ((cl_int*) sourceC)[0];
1551 break;
1552 case kShort:
1553 valueA = ((cl_short*) sourceA)[0];
1554 valueB = ((cl_short*) sourceB)[0];
1555 valueC = ((cl_short*) sourceC)[0];
1556 break;
1557 case kChar:
1558 valueA = ((cl_char*) sourceA)[0];
1559 valueB = ((cl_char*) sourceB)[0];
1560 valueC = ((cl_char*) sourceC)[0];
1561 break;
1562 default:
1563 //error -- should never get here
1564 abort();
1565 break;
1566 }
1567
1568 if(valueB > valueC) {
1569 return false; // undefined behavior : leave "expected" alone
1570 }
1571
1572 switch( vecAType )
1573 {
1574 case kLong:
1575 ((cl_long *)destination)[0] =
1576 std::max(std::min(valueA, valueC), valueB);
1577 break;
1578 case kInt:
1579 ((cl_int *)destination)[0] =
1580 (cl_int)(std::max(std::min(valueA, valueC), valueB));
1581 break;
1582 case kShort:
1583 ((cl_short *)destination)[0] =
1584 (cl_short)(std::max(std::min(valueA, valueC), valueB));
1585 break;
1586 case kChar:
1587 ((cl_char *)destination)[0] =
1588 (cl_char)(std::max(std::min(valueA, valueC), valueB));
1589 break;
1590 default:
1591 //error -- should never get here
1592 abort();
1593 break;
1594 }
1595
1596 }
1597 return true;
1598 }
1599
test_integer_clamp(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1600 int test_integer_clamp(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1601 {
1602 return test_three_param_integer_fn( queue, context, "clamp", verify_integer_clamp );
1603 }
1604
verify_integer_mad_sat(void * sourceA,void * sourceB,void * sourceC,void * destination,ExplicitType vecAType,ExplicitType vecBType,ExplicitType vecCType,ExplicitType destType)1605 bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void *destination,
1606 ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType )
1607 {
1608 if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar )
1609 {
1610 cl_ulong valueA, valueB, valueC;
1611
1612 switch( vecAType )
1613 {
1614 case kULong:
1615 valueA = ((cl_ulong*) sourceA)[0];
1616 valueB = ((cl_ulong*) sourceB)[0];
1617 valueC = ((cl_ulong*) sourceC)[0];
1618 break;
1619 case kUInt:
1620 valueA = ((cl_uint*) sourceA)[0];
1621 valueB = ((cl_uint*) sourceB)[0];
1622 valueC = ((cl_uint*) sourceC)[0];
1623 break;
1624 case kUShort:
1625 valueA = ((cl_ushort*) sourceA)[0];
1626 valueB = ((cl_ushort*) sourceB)[0];
1627 valueC = ((cl_ushort*) sourceC)[0];
1628 break;
1629 case kUChar:
1630 valueA = ((cl_uchar*) sourceA)[0];
1631 valueB = ((cl_uchar*) sourceB)[0];
1632 valueC = ((cl_uchar*) sourceC)[0];
1633 break;
1634 default:
1635 //error -- should never get here
1636 abort();
1637 break;
1638 }
1639
1640 cl_ulong multHi, multLo;
1641 multiply_unsigned_64_by_64( valueA, valueB, multLo, multHi );
1642
1643 multLo += valueC;
1644 multHi += multLo < valueC; // carry if overflow
1645 if( multHi )
1646 multLo = 0xFFFFFFFFFFFFFFFFULL;
1647
1648 switch( vecAType )
1649 {
1650 case kULong:
1651 ((cl_ulong*) destination)[0] = multLo;
1652 break;
1653 case kUInt:
1654 ((cl_uint *)destination)[0] =
1655 (cl_uint)std::min(multLo, (cl_ulong)CL_UINT_MAX);
1656 break;
1657 case kUShort:
1658 ((cl_ushort *)destination)[0] =
1659 (cl_ushort)std::min(multLo, (cl_ulong)CL_USHRT_MAX);
1660 break;
1661 case kUChar:
1662 ((cl_uchar *)destination)[0] =
1663 (cl_uchar)std::min(multLo, (cl_ulong)CL_UCHAR_MAX);
1664 break;
1665 default:
1666 //error -- should never get here
1667 abort();
1668 break;
1669 }
1670 }
1671 else
1672 {
1673 cl_long valueA, valueB, valueC;
1674
1675 switch( vecAType )
1676 {
1677 case kLong:
1678 valueA = ((cl_long*) sourceA)[0];
1679 valueB = ((cl_long*) sourceB)[0];
1680 valueC = ((cl_long*) sourceC)[0];
1681 break;
1682 case kInt:
1683 valueA = ((cl_int*) sourceA)[0];
1684 valueB = ((cl_int*) sourceB)[0];
1685 valueC = ((cl_int*) sourceC)[0];
1686 break;
1687 case kShort:
1688 valueA = ((cl_short*) sourceA)[0];
1689 valueB = ((cl_short*) sourceB)[0];
1690 valueC = ((cl_short*) sourceC)[0];
1691 break;
1692 case kChar:
1693 valueA = ((cl_char*) sourceA)[0];
1694 valueB = ((cl_char*) sourceB)[0];
1695 valueC = ((cl_char*) sourceC)[0];
1696 break;
1697 default:
1698 //error -- should never get here
1699 abort();
1700 break;
1701 }
1702
1703 cl_long multHi;
1704 cl_ulong multLo;
1705 multiply_signed_64_by_64( valueA, valueB, multLo, multHi );
1706
1707 cl_ulong sum = multLo + valueC;
1708 // carry if overflow
1709 if( valueC >= 0 )
1710 {
1711 if( multLo > sum )
1712 {
1713 multHi++;
1714 if( CL_LONG_MIN == multHi )
1715 {
1716 multHi = CL_LONG_MAX;
1717 sum = CL_ULONG_MAX;
1718 }
1719 }
1720 }
1721 else
1722 {
1723 if( multLo < sum )
1724 {
1725 multHi--;
1726 if( CL_LONG_MAX == multHi )
1727 {
1728 multHi = CL_LONG_MIN;
1729 sum = 0;
1730 }
1731 }
1732 }
1733
1734 // saturate
1735 if( multHi > 0 )
1736 sum = CL_LONG_MAX;
1737 else if( multHi < -1 )
1738 sum = CL_LONG_MIN;
1739 cl_long result = (cl_long) sum;
1740
1741 switch( vecAType )
1742 {
1743 case kLong:
1744 ((cl_long*) destination)[0] = result;
1745 break;
1746 case kInt:
1747 result = std::min(result, (cl_long)CL_INT_MAX);
1748 result = std::max(result, (cl_long)CL_INT_MIN);
1749 ((cl_int*) destination)[0] = (cl_int) result;
1750 break;
1751 case kShort:
1752 result = std::min(result, (cl_long)CL_SHRT_MAX);
1753 result = std::max(result, (cl_long)CL_SHRT_MIN);
1754 ((cl_short*) destination)[0] = (cl_short) result;
1755 break;
1756 case kChar:
1757 result = std::min(result, (cl_long)CL_CHAR_MAX);
1758 result = std::max(result, (cl_long)CL_CHAR_MIN);
1759 ((cl_char*) destination)[0] = (cl_char) result;
1760 break;
1761 default:
1762 //error -- should never get here
1763 abort();
1764 break;
1765 }
1766 }
1767 return true;
1768 }
1769
test_integer_mad_sat(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1770 int test_integer_mad_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1771 {
1772 return test_three_param_integer_fn( queue, context, "mad_sat", verify_integer_mad_sat );
1773 }
1774
verify_integer_mad_hi(void * sourceA,void * sourceB,void * sourceC,void * destination,ExplicitType vecAType,ExplicitType vecBType,ExplicitType vecCType,ExplicitType destType)1775 bool verify_integer_mad_hi( void *sourceA, void *sourceB, void *sourceC, void *destination,
1776 ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType )
1777 {
1778 if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar )
1779 {
1780 cl_ulong valueA, valueB, valueC;
1781
1782 switch( vecAType )
1783 {
1784 case kULong:
1785 valueA = ((cl_ulong*) sourceA)[0];
1786 valueB = ((cl_ulong*) sourceB)[0];
1787 valueC = ((cl_ulong*) sourceC)[0];
1788 break;
1789 case kUInt:
1790 valueA = ((cl_uint*) sourceA)[0];
1791 valueB = ((cl_uint*) sourceB)[0];
1792 valueC = ((cl_uint*) sourceC)[0];
1793 break;
1794 case kUShort:
1795 valueA = ((cl_ushort*) sourceA)[0];
1796 valueB = ((cl_ushort*) sourceB)[0];
1797 valueC = ((cl_ushort*) sourceC)[0];
1798 break;
1799 case kUChar:
1800 valueA = ((cl_uchar*) sourceA)[0];
1801 valueB = ((cl_uchar*) sourceB)[0];
1802 valueC = ((cl_uchar*) sourceC)[0];
1803 break;
1804 default:
1805 //error -- should never get here
1806 abort();
1807 break;
1808 }
1809
1810 cl_ulong multHi, multLo;
1811 multiply_unsigned_64_by_64( valueA, valueB, multLo, multHi );
1812
1813 switch( vecAType )
1814 {
1815 case kULong:
1816 ((cl_ulong*) destination)[0] = multHi + valueC;
1817 break;
1818 case kUInt:
1819 ((cl_uint*) destination)[0] = (cl_uint) (( multLo >> 32) + valueC );
1820 break;
1821 case kUShort:
1822 ((cl_ushort*) destination)[0] = (cl_ushort) (( multLo >> 16) + valueC );
1823 break;
1824 case kUChar:
1825 ((cl_uchar*) destination)[0] = (cl_uchar) (( multLo >> 8) + valueC );
1826 break;
1827 default:
1828 //error -- should never get here
1829 abort();
1830 break;
1831 }
1832 }
1833 else
1834 {
1835 cl_long valueA, valueB, valueC;
1836
1837 switch( vecAType )
1838 {
1839 case kLong:
1840 valueA = ((cl_long*) sourceA)[0];
1841 valueB = ((cl_long*) sourceB)[0];
1842 valueC = ((cl_long*) sourceC)[0];
1843 break;
1844 case kInt:
1845 valueA = ((cl_int*) sourceA)[0];
1846 valueB = ((cl_int*) sourceB)[0];
1847 valueC = ((cl_int*) sourceC)[0];
1848 break;
1849 case kShort:
1850 valueA = ((cl_short*) sourceA)[0];
1851 valueB = ((cl_short*) sourceB)[0];
1852 valueC = ((cl_short*) sourceC)[0];
1853 break;
1854 case kChar:
1855 valueA = ((cl_char*) sourceA)[0];
1856 valueB = ((cl_char*) sourceB)[0];
1857 valueC = ((cl_char*) sourceC)[0];
1858 break;
1859 default:
1860 //error -- should never get here
1861 abort();
1862 break;
1863 }
1864
1865 cl_long multHi;
1866 cl_ulong multLo;
1867 multiply_signed_64_by_64( valueA, valueB, multLo, multHi );
1868
1869 switch( vecAType )
1870 {
1871 case kLong:
1872 ((cl_long*) destination)[0] = multHi + valueC;
1873 break;
1874 case kInt:
1875 ((cl_int*) destination)[0] = (cl_int) ((multLo >> 32) + valueC);
1876 break;
1877 case kShort:
1878 ((cl_short*) destination)[0] = (cl_int) ((multLo >> 16) + valueC);
1879 break;
1880 case kChar:
1881 ((cl_char*) destination)[0] = (cl_char) (cl_int) ((multLo >> 8) + valueC);
1882 break;
1883 default:
1884 //error -- should never get here
1885 abort();
1886 break;
1887 }
1888 }
1889 return true;
1890 }
1891
test_integer_mad_hi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1892 int test_integer_mad_hi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1893 {
1894 return test_three_param_integer_fn( queue, context, "mad_hi", verify_integer_mad_hi );
1895 }
1896
1897
1898