xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/relationals/test_shuffles.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include <iomanip>
18 
19 #include "testBase.h"
20 #include "harness/conversions.h"
21 #include "harness/typeWrappers.h"
22 #include "harness/testHarness.h"
23 
24 // #define USE_NEW_SYNTAX    1
25 // The number of shuffles to test per test
26 #define NUM_TESTS 32
27 // The number of times to run each combination of shuffles
28 #define NUM_ITERATIONS_PER_TEST 2
29 #define MAX_PROGRAM_SIZE NUM_TESTS*1024
30 #define PRINT_SHUFFLE_KERNEL_SOURCE 0
31 #define SPEW_ORDER_DETAILS 0
32 
33 enum ShuffleMode
34 {
35     kNormalMode = 0,
36     kFunctionCallMode,
37     kArrayAccessMode,
38     kBuiltInFnMode,
39     kBuiltInDualInputFnMode
40 };
41 
42 static const char *shuffleKernelPattern[3] =  {
43     "__kernel void sample_test( __global %s%s *source, __global %s%s *dest )\n"
44     "{\n"
45     "    if (get_global_id(0) != 0) return;\n"
46     "     //%s%s src1 %s, src2%s;\n",// Here's a comma...
47                                     // Above code is commented out for now, but keeping around for testing local storage options
48     "}\n" };
49 
50 static const char *shuffleTempPattern = "  %s%s tmp;\n";
51 
52 static const char *clearTempPattern = "        tmp = (%s%s)((%s)0);\n";
53 
54 static const char *shuffleSinglePattern =
55 "        tmp%s%s = source[%d]%s%s;\n"
56 "        dest[%d] = tmp;\n"
57 ;
58 
59 static const char * shuffleSinglePatternV3src =
60 "           tmp%s%s = vload3(%d, source)%s%s;\n"
61 "        dest[%d] = tmp;\n";
62 
63 static const char * shuffleSinglePatternV3dst =
64 "        tmp%s%s = source[%d]%s%s;\n"
65 "           vstore3(tmp, %d, dest);\n";
66 
67 
68 static const char * shuffleSinglePatternV3srcV3dst =
69 "tmp%s%s = vload3(%d, source)%s%s;\n"
70 "vstore3(tmp, %d, dest);\n";
71 
72 static const char *shuffleFnLinePattern = "%s%s shuffle_fn( %s%s source );\n%s%s shuffle_fn( %s%s source ) { return source; }\n\n";
73 
74 static const char *shuffleFnPattern =
75 "        tmp%s%s = shuffle_fn( source[%d] )%s%s;\n"
76 "        dest[%d] = tmp;\n"
77 ;
78 
79 
80 static const char *shuffleFnPatternV3src =
81 "        tmp%s%s = shuffle_fn( vload3(%d, source) )%s%s;\n"
82 "        dest[%d] = tmp;\n"
83 ;
84 
85 
86 static const char *shuffleFnPatternV3dst =
87 "        tmp%s%s = shuffle_fn( source[%d] )%s%s;\n"
88 "               vstore3(tmp, %d, dest);\n"
89 ;
90 
91 
92 static const char *shuffleFnPatternV3srcV3dst =
93 "        tmp%s%s = shuffle_fn(vload3(%d, source) )%s%s;\n"
94 "               vstore3(tmp, %d, dest);\n"
95 ;
96 
97 // shuffle() built-in function patterns
98 static const char *shuffleBuiltInPattern =
99 "        {\n"
100 "            %s%s src1 = %s;\n"
101 "            %s%s%s mask = (%s%s%s)( %s );\n"
102 "            tmp = shuffle( src1, mask );\n"
103 "            %s;\n"
104 "        }\n"
105 ;
106 
107 // shuffle() built-in dual-input function patterns
108 static const char *shuffleBuiltInDualPattern =
109 "        {\n"
110 "            %s%s src1 = %s;\n"
111 "            %s%s src2 = %s;\n"
112 "            %s%s%s mask = (%s%s%s)( %s );\n"
113 "            tmp = shuffle2( src1, src2, mask );\n"
114 "            %s;\n"
115 "        }\n"
116 ;
117 
118 
119 typedef unsigned char ShuffleOrder[ 16 ];
120 
incrementShuffleOrder(ShuffleOrder & order,size_t orderSize,size_t orderRange)121 void incrementShuffleOrder( ShuffleOrder &order, size_t orderSize, size_t orderRange )
122 {
123     for( size_t i = 0; i < orderSize; i++ )
124     {
125         order[ i ]++;
126         if( order[ i ] < orderRange )
127             return;
128         order[ i ] = 0;
129     }
130 }
131 
shuffleOrderContainsDuplicates(ShuffleOrder & order,size_t orderSize)132 bool shuffleOrderContainsDuplicates( ShuffleOrder &order, size_t orderSize )
133 {
134     bool flags[ 16 ] = { false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false };
135     for( size_t i = 0; i < orderSize; i++ )
136     {
137         if( flags[ order[ i ] ] )
138             return true;
139         flags[ order[ i ] ] = true;
140     }
141     return false;
142 }
143 
shuffleVector(unsigned char * inVector,unsigned char * outVector,ShuffleOrder order,size_t vecSize,size_t typeSize,cl_uint lengthToUse)144 static void shuffleVector( unsigned char *inVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse )
145 {
146     for(size_t i = 0; i < lengthToUse; i++ )
147     {
148         unsigned char *inPtr = inVector + typeSize *order[ i ];
149         memcpy( outVector, inPtr, typeSize );
150         outVector += typeSize;
151     }
152 }
153 
shuffleVector2(unsigned char * inVector,unsigned char * outVector,ShuffleOrder order,size_t vecSize,size_t typeSize,cl_uint lengthToUse)154 static void shuffleVector2( unsigned char *inVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse )
155 {
156     for(size_t i = 0; i < lengthToUse; i++ )
157     {
158         unsigned char *outPtr = outVector + typeSize *order[ i ];
159         memcpy( outPtr, inVector, typeSize );
160         inVector += typeSize;
161     }
162 }
163 
shuffleVectorDual(unsigned char * inVector,unsigned char * inSecondVector,unsigned char * outVector,ShuffleOrder order,size_t vecSize,size_t typeSize,cl_uint lengthToUse)164 static void shuffleVectorDual( unsigned char *inVector, unsigned char *inSecondVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse )
165 {
166     // This is tricky: the indices of each shuffle are in a range (0-srcVecSize * 2-1),
167     // where (srcVecSize-srcVecSize*2-1) refers to the second input.
168     size_t uphalfMask = (size_t)vecSize;
169     size_t lowerBits = (size_t)( vecSize - 1 );
170 
171     for(size_t i = 0; i < lengthToUse; i++ )
172     {
173         unsigned char *inPtr;
174 #if SPEW_ORDER_DETAILS
175         log_info("order[%d] is %d, or %d of %s\n", (int)i,
176                  (int)(order[i]),
177                  (int)(order[i] & lowerBits),
178                  ((order[i]&uphalfMask) == 0)?"lower num":"upper num");
179 #endif
180         if( order[ i ] & uphalfMask )
181             inPtr = inSecondVector + typeSize * ( order[ i ] & lowerBits );
182         else
183             inPtr = inVector + typeSize * ( order[ i ] & lowerBits );
184         memcpy( outVector, inPtr, typeSize );
185         outVector += typeSize;
186     }
187 }
188 
189 
190 static ShuffleOrder sNaturalOrder = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
191 
192 static int useNumbersFlip = 0;
get_order_string(ShuffleOrder & order,size_t vecSize,cl_uint lengthToUse,bool byNumber,MTdata d)193 const char *get_order_string( ShuffleOrder &order, size_t vecSize, cl_uint lengthToUse, bool byNumber, MTdata d )
194 {
195     // NOTE: names are only valid for hex characters (up to F) but for debugging, we use
196     // this to print out orders for dual inputs, which actually can be valid up to position 31 (two 16-element vectors)
197     // so we go ahead and fake the rest of the alphabet for those other 16 positions, so we have
198     // some (indirectly) meaningful output
199     char names[] = "0123456789abcdefghijklmnopqrstuv";
200     char namesUpperCase[] = "0123456789ABCDEFGHIJKLMNOPQRSTUV";
201     char names2[] = "xyzw!!!!!!!!!!!!";
202 
203     static char orderString[ 18 ];
204 
205     size_t j, idx;
206 
207     // Assume we don't have to use numbered indices (.s0123...).
208     byNumber = false;
209     // Check if any index is beyond xyzw, which requires to use numbers.
210     for( j = 0; j < lengthToUse; j++ )
211     {
212         if (order[j] > 3) {
213             byNumber = true;
214             break;
215         }
216     }
217     // If we can use numbers, do so half the time.
218     if (!byNumber) {
219         byNumber = (useNumbersFlip++)%2;
220     }
221 
222     if (byNumber)
223     {
224         idx = 0;
225         // Randomly chose upper and lower case S.
226         orderString[ idx++ ] = random_in_range(0, 1, d) ? 's' : 'S';
227         for( j = 0; j < vecSize && j < lengthToUse; j++ ) {
228             // Randomly choose upper and lower case.
229             orderString[ idx++ ] = random_in_range(0, 1, d) ? names[ (int)order[ j ] ] : namesUpperCase[ (int)order[ j ] ];
230         }
231         orderString[ idx++ ] = 0;
232     }
233     else
234     {
235         // Use xyzw.
236         for( j = 0; j < vecSize && j < lengthToUse; j++ ) {
237             orderString[ j ] = names2[ (int)order[ j ] ];
238         }
239         orderString[ j ] = 0;
240     }
241 
242     return orderString;
243 }
244 
get_order_name(ExplicitType vecType,size_t inVecSize,size_t outVecSize,ShuffleOrder & inOrder,ShuffleOrder & outOrder,cl_uint lengthToUse,MTdata d,bool inUseNumerics,bool outUseNumerics)245 char * get_order_name( ExplicitType vecType, size_t inVecSize, size_t outVecSize, ShuffleOrder &inOrder, ShuffleOrder &outOrder, cl_uint lengthToUse, MTdata d, bool inUseNumerics, bool outUseNumerics )
246 {
247     static char orderName[ 512 ] = "";
248     char inOrderStr[ 512 ], outOrderStr[ 512 ];
249 
250     if( inVecSize == 1 )
251         inOrderStr[ 0 ] = 0;
252     else
253         sprintf(inOrderStr, "%d.%s", (int)inVecSize,
254                 get_order_string(inOrder, inVecSize, lengthToUse, inUseNumerics,
255                                  d));
256     if( outVecSize == 1 )
257         outOrderStr[ 0 ] = 0;
258     else
259         sprintf( outOrderStr, "%d.%s", (int)outVecSize, get_order_string( outOrder, outVecSize, lengthToUse, outUseNumerics, d ) );
260 
261     sprintf( orderName, "order %s%s -> %s%s",
262             get_explicit_type_name( vecType ), inOrderStr, get_explicit_type_name( vecType ), outOrderStr );
263     return orderName;
264 }
265 
print_hex_mem_dump(const unsigned char * inDataPtr,const unsigned char * inDataPtr2,const unsigned char * expected,const unsigned char * outDataPtr,size_t inVecSize,size_t outVecSize,size_t typeSize)266 void print_hex_mem_dump(const unsigned char *inDataPtr,
267                         const unsigned char *inDataPtr2,
268                         const unsigned char *expected,
269                         const unsigned char *outDataPtr, size_t inVecSize,
270                         size_t outVecSize, size_t typeSize)
271 {
272     auto byte_to_hex_str = [](unsigned char v) {
273         // Use a new stream to avoid manipulating state of outer stream.
274         std::ostringstream ss;
275         ss << std::setfill('0') << std::setw(2) << std::right << std::hex << +v;
276         return ss.str();
277     };
278 
279     std::ostringstream error;
280     error << "      Source: ";
281     for (size_t j = 0; j < inVecSize * typeSize; j++)
282     {
283         error << (j % typeSize ? "" : " ") << byte_to_hex_str(inDataPtr[j])
284               << " ";
285     }
286     if (inDataPtr2 != NULL)
287     {
288         error << "\n    Source 2: ";
289         for (size_t j = 0; j < inVecSize * typeSize; j++)
290         {
291             error << (j % typeSize ? "" : " ") << byte_to_hex_str(inDataPtr2[j])
292                   << " ";
293         }
294     }
295     error << "\n    Expected: ";
296     for (size_t j = 0; j < outVecSize * typeSize; j++)
297     {
298         error << (j % typeSize ? "" : " ") << byte_to_hex_str(expected[j])
299               << " ";
300     }
301     error << "\n      Actual: ";
302     for (size_t j = 0; j < outVecSize * typeSize; j++)
303     {
304         error << (j % typeSize ? "" : " ") << byte_to_hex_str(outDataPtr[j])
305               << " ";
306     }
307     log_info("%s\n", error.str().c_str());
308 }
309 
generate_shuffle_mask(char * outMaskString,size_t maskSize,const ShuffleOrder * order)310 void generate_shuffle_mask( char *outMaskString, size_t maskSize, const ShuffleOrder *order )
311 {
312     outMaskString[ 0 ] = 0;
313     if( order != NULL )
314     {
315         for( size_t jj = 0; jj < maskSize; jj++ )
316         {
317             char thisMask[ 16 ];
318             sprintf( thisMask, "%s%d", ( jj == 0 ) ? "" : ", ", (*order)[ jj ] );
319             strcat( outMaskString, thisMask );
320         }
321     }
322     else
323     {
324         for( size_t jj = 0; jj < maskSize; jj++ )
325         {
326             char thisMask[ 16 ];
327             sprintf( thisMask, "%s%ld", ( jj == 0 ) ? "" : ", ", jj );
328             strcat( outMaskString, thisMask );
329         }
330     }
331 }
332 
create_shuffle_kernel(cl_context context,cl_program * outProgram,cl_kernel * outKernel,size_t * outRealVecSize,ExplicitType vecType,size_t inVecSize,size_t outVecSize,cl_uint * lengthToUse,bool inUseNumerics,bool outUseNumerics,size_t numOrders,ShuffleOrder * inOrders,ShuffleOrder * outOrders,MTdata d,ShuffleMode shuffleMode=kNormalMode)333 static int create_shuffle_kernel( cl_context context, cl_program *outProgram, cl_kernel *outKernel,
334                                  size_t *outRealVecSize,
335                                  ExplicitType vecType, size_t inVecSize, size_t outVecSize, cl_uint *lengthToUse, bool inUseNumerics, bool outUseNumerics,
336                                  size_t numOrders, ShuffleOrder *inOrders, ShuffleOrder *outOrders,
337                                  MTdata d, ShuffleMode shuffleMode = kNormalMode )
338 {
339     char inOrder[18], shuffledOrder[18];
340     char kernelSource[MAX_PROGRAM_SIZE], progLine[ 10240 ];
341     char *programPtr;
342     char inSizeName[4], outSizeName[4], outRealSizeName[4], inSizeArgName[4];
343     char outSizeNameTmpVar[4];
344 
345 
346     /* Create the source; note vec size is the vector length we are testing */
347     if( inVecSize == 1 ) //|| (inVecSize == 3)) // just have arrays if we go with size 3
348         inSizeName[ 0 ] = 0;
349     else
350         sprintf( inSizeName, "%ld", inVecSize );
351     if( inVecSize == 3 )
352         inSizeArgName[ 0 ] = 0;
353     else
354         strcpy( inSizeArgName, inSizeName );
355 
356     *outRealVecSize = outVecSize;
357 
358     if( outVecSize == 1 ||  (outVecSize == 3))
359         outSizeName[ 0 ] = 0;
360     else
361         sprintf( outSizeName, "%d", (int)outVecSize );
362 
363     if(outVecSize == 1) {
364         outSizeNameTmpVar[0] = 0;
365     } else {
366         sprintf(outSizeNameTmpVar, "%d", (int)outVecSize);
367     }
368 
369     if( *outRealVecSize == 1 || ( *outRealVecSize == 3))
370         outRealSizeName[ 0 ] = 0;
371     else
372         sprintf( outRealSizeName, "%d", (int)*outRealVecSize );
373 
374 
375     // Loop through and create the source for all order strings
376     kernelSource[ 0 ] = 0;
377     if (vecType == kDouble) {
378         strcat(kernelSource, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n");
379     }
380 
381     if( shuffleMode == kFunctionCallMode )
382     {
383         sprintf( progLine, shuffleFnLinePattern, get_explicit_type_name( vecType ), inSizeName, get_explicit_type_name( vecType ), inSizeName,
384                 get_explicit_type_name( vecType ), inSizeName, get_explicit_type_name( vecType ), inSizeName );
385         strcat(kernelSource, progLine);
386     }
387 
388     // We're going to play a REALLY NASTY trick here. We're going to use the inSize insert point
389     // to put in an entire third parameter if we need it
390     char inParamSizeString[ 1024 ];
391     if( shuffleMode == kBuiltInDualInputFnMode )
392         sprintf( inParamSizeString, "%s *secondSource, __global %s%s", inSizeArgName, get_explicit_type_name( vecType ), inSizeArgName );
393     else
394         strcpy( inParamSizeString, inSizeArgName );
395 
396     // These two take care of unused variable warnings
397     const char * src2EnableA = ( shuffleMode == kBuiltInDualInputFnMode ) ? "" : "/*";
398     const char * src2EnableB = ( shuffleMode == kBuiltInDualInputFnMode ) ? "" : "*/";
399 
400     sprintf( progLine, shuffleKernelPattern[ 0 ], get_explicit_type_name( vecType ), inParamSizeString,
401             get_explicit_type_name( vecType ), outRealSizeName, get_explicit_type_name( vecType ), inSizeName,
402             src2EnableA, src2EnableB );
403     strcat(kernelSource, progLine);
404     if( inOrders == NULL )
405         strcpy( inOrder, get_order_string( sNaturalOrder, outVecSize, (cl_uint)outVecSize, inUseNumerics, d ) );
406 
407     sprintf( progLine, shuffleTempPattern, get_explicit_type_name( vecType ), outSizeNameTmpVar);
408     strcat(kernelSource, progLine);
409 
410     for( unsigned int i = 0; i < numOrders; i++ )
411     {
412         if( inOrders != NULL )
413             strcpy(inOrder,
414                    get_order_string(inOrders[i], inVecSize, lengthToUse[i],
415                                     inUseNumerics, d));
416         strcpy( shuffledOrder, get_order_string( outOrders[ i ], outVecSize, lengthToUse[i], outUseNumerics, d ) );
417 
418 
419         sprintf( progLine, clearTempPattern, get_explicit_type_name( vecType ), outSizeName,get_explicit_type_name( vecType ));
420         strcat(kernelSource, progLine);
421 
422 
423         if( shuffleMode == kNormalMode )
424         {
425             if(outVecSize == 3 && inVecSize == 3) {
426                 // shuffleSinglePatternV3srcV3dst
427                 sprintf( progLine, shuffleSinglePatternV3srcV3dst,
428                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
429                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
430             } else if(inVecSize == 3) {
431                 // shuffleSinglePatternV3src
432                 sprintf( progLine, shuffleSinglePatternV3src,
433                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
434                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
435             } else if(outVecSize == 3) {
436                 // shuffleSinglePatternV3dst
437                 sprintf( progLine, shuffleSinglePatternV3dst,
438                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
439                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "",
440                         (int)i );
441             } else {
442                 sprintf( progLine, shuffleSinglePattern,
443                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
444                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
445             }
446         }
447         else if( shuffleMode == kFunctionCallMode )
448         {
449             // log_info("About to make a shuffle line\n");
450             // fflush(stdout);
451             if(inVecSize == 3 && outVecSize == 3) { // swap last two
452                 sprintf( progLine, shuffleFnPatternV3srcV3dst,
453                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
454                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "",
455                         (int)i );
456             } else if(outVecSize == 3)  { // swap last two
457                                           // log_info("Here\n\n");
458                                           // fflush(stdout);
459                 sprintf( progLine, shuffleFnPatternV3dst,
460                         outVecSize > 1 ? "." : "",
461                         outVecSize > 1 ? shuffledOrder : "",
462                         (int)i,
463                         inVecSize > 1 ? "." : "",
464                         inVecSize > 1 ? inOrder : "",
465                         (int)i );
466                 // log_info("\n%s\n", progLine);
467                 // fflush(stdout);
468             } else if(inVecSize == 3) {
469                 sprintf( progLine, shuffleFnPatternV3src,
470                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
471                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
472             } else  {
473                 sprintf( progLine, shuffleFnPattern,
474                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
475                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
476             }
477         }
478         else if( shuffleMode == kArrayAccessMode )
479         { // now we want to replace inSizeName with inSizeNameShuffleFn
480             int vectorSizeToCastTo = 16;
481             cl_uint item;
482             for (item =0; item<lengthToUse[i]; item++) {
483                 int absoluteIndex = i*(int)inVecSize+(int)inOrders[i][item];
484                 int castVectorIndex = absoluteIndex/vectorSizeToCastTo;
485                 size_t castElementIndex = absoluteIndex % vectorSizeToCastTo;
486                 ShuffleOrder myOutOrders, myInOrders;
487                 myOutOrders[0]  = outOrders[i][item];
488                 myInOrders[0] = castElementIndex;
489 
490                 strcpy( inOrder, get_order_string( myInOrders, 1, 1, 0, d ) );
491                 strcpy( shuffledOrder, get_order_string( myOutOrders, 1, 1, 0, d ) );
492 
493                 sprintf(progLine, "     tmp%s%s = ((__global %s%d *)source)[%d]%s%s;\n",
494                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "",
495                         get_explicit_type_name( vecType ), vectorSizeToCastTo,
496                         castVectorIndex,
497                         vectorSizeToCastTo > 1 ? "." : "", vectorSizeToCastTo > 1 ? inOrder : "");
498                 strcat(kernelSource, progLine);
499             }
500             if(outVecSize == 3) {
501                 sprintf(progLine,"     vstore3(tmp, %d, (__global %s *)dest);\n",
502                         i, get_explicit_type_name( vecType ));
503                 // probably don't need that last
504                 // cast to (__global %s *) where %s is get_explicit_type_name( vecType)
505             } else {
506                 sprintf(progLine,"     dest[%d] = tmp;\n", i );
507             }
508         }
509         else // shuffleMode == kBuiltInFnMode or kBuiltInDualInputFnMode
510         {
511             if(inVecSize == 3 || outVecSize == 3 ||
512                inVecSize == 1 || outVecSize == 1) {
513                 // log_info("Skipping test for size 3\n");
514                 continue;
515             }
516             ExplicitType maskType = vecType;
517             if( maskType == kFloat )
518                 maskType = kUInt;
519             if( maskType == kDouble) {
520                 maskType = kULong;
521             }
522 
523             char maskString[ 1024 ] = "";
524             size_t maskSize = outVecSize;// ( shuffleMode == kBuiltInDualInputFnMode ) ? ( outVecSize << 1 ) : outVecSize;
525             generate_shuffle_mask( maskString, maskSize, ( outOrders != NULL ) ? &outOrders[ i ] : NULL );
526 
527             // Set up a quick prefix, so mask gets unsigned type regardless of the input/output type
528             char maskPrefix[ 2 ] = "u";
529             if( get_explicit_type_name( maskType )[ 0 ] == 'u' )
530                 maskPrefix[ 0 ] = 0;
531 
532             char progLine2[ 10240 ];
533             if( shuffleMode == kBuiltInDualInputFnMode )
534             {
535                 sprintf( progLine2, shuffleBuiltInDualPattern, get_explicit_type_name( vecType ), inSizeName,
536                         ( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)source )" : "source[ %ld ]",
537                         get_explicit_type_name( vecType ), inSizeName,
538                         ( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)secondSource )" : "secondSource[ %ld ]",
539                         maskPrefix, get_explicit_type_name( maskType ), outSizeName, maskPrefix, get_explicit_type_name( maskType ), outSizeName,
540                         maskString,
541                         ( outVecSize == 3 ) ? "vstore3( tmp, %ld, (__global %s *)dest )" : "dest[ %ld ] = tmp" );
542 
543                 if( outVecSize == 3 )
544                 {
545                     if( inVecSize == 3 )
546                         sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ) );
547                     else
548                         sprintf( progLine, progLine2, i, i, i, get_explicit_type_name( vecType ) );
549                 }
550                 else
551                 {
552                     if( inVecSize == 3 )
553                         sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ), i );
554                     else
555                         sprintf( progLine, progLine2, i, i, i );
556                 }
557             }
558             else
559             {
560                 sprintf( progLine2, shuffleBuiltInPattern, get_explicit_type_name( vecType ), inSizeName,
561                         ( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)source )" : "source[ %ld ]",
562                         maskPrefix, get_explicit_type_name( maskType ), outSizeName, maskPrefix, get_explicit_type_name( maskType ), outSizeName,
563                         maskString,
564                         ( outVecSize == 3 ) ? "vstore3( tmp, %ld, (__global %s *)dest )" : "dest[ %ld ] = tmp" );
565 
566                 if( outVecSize == 3 )
567                 {
568                     if( inVecSize == 3 )
569                         sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ) );
570                     else
571                         sprintf( progLine, progLine2, i, i, get_explicit_type_name( vecType ) );
572                 }
573                 else
574                 {
575                     if( inVecSize == 3 )
576                         sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i );
577                     else
578                         sprintf( progLine, progLine2, i, i );
579                 }
580             }
581         }
582 
583         strcat( kernelSource, progLine );
584         if (strlen(kernelSource) > 0.9*MAX_PROGRAM_SIZE)
585             log_info("WARNING: Program has grown to 90%% (%d) of the defined max program size of %d\n", (int)strlen(kernelSource), (int)MAX_PROGRAM_SIZE);
586     }
587     strcat( kernelSource, shuffleKernelPattern[ 1 ] );
588 
589     // Print the kernel source
590     if (PRINT_SHUFFLE_KERNEL_SOURCE)
591         log_info( "Kernel:%s\n", kernelSource );
592 
593     /* Create kernel */
594     programPtr = kernelSource;
595     if( create_single_kernel_helper( context, outProgram, outKernel, 1, (const char **)&programPtr, "sample_test" ) )
596     {
597         return -1;
598     }
599     return 0;
600 }
601 
test_shuffle_dual_kernel(cl_context context,cl_command_queue queue,ExplicitType vecType,size_t inVecSize,size_t outVecSize,cl_uint * lengthToUse,size_t numOrders,ShuffleOrder * inOrderIdx,ShuffleOrder * outOrderIdx,bool inUseNumerics,bool outUseNumerics,MTdata d,ShuffleMode shuffleMode=kNormalMode)602 int test_shuffle_dual_kernel(cl_context context, cl_command_queue queue,
603                              ExplicitType vecType, size_t inVecSize, size_t outVecSize, cl_uint *lengthToUse, size_t numOrders,
604                              ShuffleOrder *inOrderIdx, ShuffleOrder *outOrderIdx, bool inUseNumerics, bool outUseNumerics, MTdata d,
605                              ShuffleMode shuffleMode = kNormalMode )
606 {
607     clProgramWrapper program;
608     clKernelWrapper kernel;
609     int error;
610     size_t threads[1], localThreads[1];
611     size_t typeSize, outRealVecSize;
612     clMemWrapper streams[ 3 ];
613 
614     /* Create the source */
615     error = create_shuffle_kernel( context, &program, &kernel, &outRealVecSize, vecType,
616                                   inVecSize, outVecSize, lengthToUse, inUseNumerics, outUseNumerics, numOrders, inOrderIdx, outOrderIdx,
617                                   d, shuffleMode );
618     if( error != 0 )
619         return error;
620 
621     typeSize = get_explicit_type_size( vecType );
622 
623 #if !(defined(_WIN32) && defined (_MSC_VER))
624     cl_long inData[ inVecSize * numOrders ];
625     cl_long inSecondData[ inVecSize * numOrders ];
626     cl_long outData[ outRealVecSize * numOrders ];
627 #else
628     cl_long* inData  = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long));
629     cl_long* inSecondData  = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long));
630     cl_long* outData = (cl_long*)_malloca(outRealVecSize * numOrders * sizeof(cl_long));
631 #endif
632     memset(outData, 0, outRealVecSize * numOrders * sizeof(cl_long) );
633 
634     generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inData );
635     if( shuffleMode == kBuiltInDualInputFnMode )
636         generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inSecondData );
637 
638     streams[0] =
639         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
640                        typeSize * inVecSize * numOrders, inData, &error);
641     test_error( error, "Unable to create input stream" );
642 
643     streams[1] =
644         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
645                        typeSize * outRealVecSize * numOrders, outData, &error);
646     test_error( error, "Unable to create output stream" );
647 
648     int argIndex = 0;
649     if( shuffleMode == kBuiltInDualInputFnMode )
650     {
651         streams[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
652                                     typeSize * inVecSize * numOrders,
653                                     inSecondData, &error);
654         test_error( error, "Unable to create second input stream" );
655 
656         error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 2 ] ), &streams[ 2 ] );
657         test_error( error, "Unable to set kernel argument" );
658     }
659 
660     // Set kernel arguments
661     error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 0 ] ), &streams[ 0 ] );
662     test_error( error, "Unable to set kernel argument" );
663     error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 1 ] ), &streams[ 1 ] );
664     test_error( error, "Unable to set kernel argument" );
665 
666 
667     /* Run the kernel */
668     threads[0] = numOrders;
669 
670     error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
671     test_error( error, "Unable to get work group size to use" );
672 
673     error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
674     test_error( error, "Unable to execute test kernel" );
675 
676 
677     // Read the results back
678     error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, typeSize * numOrders * outRealVecSize, outData, 0, NULL, NULL );
679     test_error( error, "Unable to read results" );
680 
681     unsigned char *inDataPtr = (unsigned char *)inData;
682     unsigned char *inSecondDataPtr = (unsigned char *)inSecondData;
683     unsigned char *outDataPtr = (unsigned char *)outData;
684     int ret = 0;
685     int errors_printed = 0;
686     for( size_t i = 0; i < numOrders; i++ )
687     {
688         unsigned char expected[ 1024 ];
689         unsigned char temp[ 1024 ];
690         memset(expected, 0, sizeof(expected));
691         memset(temp, 0, sizeof(temp));
692         if( shuffleMode == kBuiltInFnMode )
693             shuffleVector( inDataPtr, expected, outOrderIdx[ i ], outVecSize, typeSize, lengthToUse[i] );
694         else if( shuffleMode == kBuiltInDualInputFnMode )
695             shuffleVectorDual( inDataPtr, inSecondDataPtr, expected, outOrderIdx[ i ], inVecSize, typeSize, lengthToUse[i] );
696         else
697         {
698             shuffleVector( inDataPtr, temp, inOrderIdx[ i ], inVecSize, typeSize, lengthToUse[i] );
699             shuffleVector2( temp, expected, outOrderIdx[ i ], outVecSize, typeSize, lengthToUse[i] );
700         }
701 
702         if( memcmp( expected, outDataPtr, outVecSize * typeSize ) != 0 )
703         {
704             log_error( " ERROR: Shuffle test %d FAILED for %s (memory hex dump follows)\n", (int)i,
705                       get_order_name( vecType, inVecSize, outVecSize, inOrderIdx[ i ], outOrderIdx[ i ], lengthToUse[i], d, inUseNumerics, outUseNumerics ) );
706 
707             print_hex_mem_dump( inDataPtr, ( shuffleMode == kBuiltInDualInputFnMode ) ? inSecondDataPtr : NULL, expected, outDataPtr, inVecSize, outVecSize, typeSize );
708 
709             if( ( shuffleMode == kBuiltInFnMode ) || ( shuffleMode == kBuiltInDualInputFnMode ) )
710             {
711                 // Mask would've been different for every shuffle done, so we have to regen it to print it
712                 char maskString[ 1024 ];
713                 generate_shuffle_mask( maskString, outVecSize, ( outOrderIdx != NULL ) ? &outOrderIdx[ i ] : NULL );
714                 log_error( "        Mask:  %s\n", maskString );
715             }
716 
717             ret++;
718             errors_printed++;
719             if (errors_printed > MAX_ERRORS_TO_PRINT)
720             {
721                 log_info("Further errors suppressed.\n");
722                 return ret;
723             }
724         }
725         inDataPtr += inVecSize * typeSize;
726         inSecondDataPtr += inVecSize * typeSize;
727         outDataPtr += outRealVecSize * typeSize;
728     }
729 
730     return ret;
731 }
732 
build_random_shuffle_order(ShuffleOrder & outIndices,unsigned int length,unsigned int selectLength,bool allowRepeats,MTdata d)733 void    build_random_shuffle_order( ShuffleOrder &outIndices, unsigned int length, unsigned int selectLength, bool allowRepeats, MTdata d )
734 {
735     char flags[ 16 ];
736 
737     memset( flags, 0, sizeof( flags ) );
738 
739     for( unsigned int i = 0; i < length; i++ )
740     {
741         char selector = (char)random_in_range( 0, selectLength - 1, d );
742         if( !allowRepeats )
743         {
744             while( flags[ (int)selector ] )
745                 selector = (char)random_in_range( 0, selectLength - 1, d );
746             flags[ (int)selector ] = true;
747         }
748         outIndices[ i ] = selector;
749     }
750 }
751 
752 class shuffleBuffer
753 {
754 public:
755 
shuffleBuffer(cl_context ctx,cl_command_queue queue,ExplicitType type,size_t inSize,size_t outSize,ShuffleMode mode)756     shuffleBuffer( cl_context ctx, cl_command_queue queue, ExplicitType type, size_t inSize, size_t outSize, ShuffleMode mode )
757     {
758         mContext = ctx;
759         mQueue = queue;
760         mVecType = type;
761         mInVecSize = inSize;
762         mOutVecSize = outSize;
763         mShuffleMode = mode;
764 
765         mCount = 0;
766 
767         // Here's the deal with mLengthToUse[i].
768         // if you have, for instance
769         // uchar4 dst;
770         // uchar8 src;
771         // you can do
772         // src.s0213 = dst.s1045;
773         // but you can also do
774         // src.s02 = dst.s10;
775         // which has a different effect
776         // The intent with these "sub lengths" is to test all such
777         // possibilities
778         // Calculate a range of sub-lengths within the vector to copy.
779         int i;
780         size_t maxSize = (mInVecSize < mOutVecSize) ? mInVecSize : mOutVecSize;
781         for(i=0; i<NUM_TESTS; i++)
782         {
783             // Built-in fns can't select sub-lengths (the mask must be the length of the dest vector).
784             // Well, at least for these tests...
785             if( ( mode == kBuiltInFnMode ) || ( mode == kBuiltInDualInputFnMode ) )
786                 mLengthToUse[i]    = (cl_int)mOutVecSize;
787             else
788             {
789                 mLengthToUse[i] = (cl_uint)(((double)i/NUM_TESTS)*maxSize) + 1;
790                 // Force the length to be a valid vector length.
791                 if( ( mLengthToUse[i] == 1 ) && ( mode != kBuiltInFnMode ) )
792                     mLengthToUse[i] = 1;
793                 else if (mLengthToUse[i] < 4)
794                     mLengthToUse[i] = 2;
795                 else if (mLengthToUse[i] < 8)
796                     mLengthToUse[i] = 4;
797                 else if (mLengthToUse[i] < 16)
798                     mLengthToUse[i] = 8;
799                 else
800                     mLengthToUse[i] = 16;
801             }
802         }
803     }
804 
AddRun(ShuffleOrder & inOrder,ShuffleOrder & outOrder,MTdata d)805     int    AddRun( ShuffleOrder &inOrder, ShuffleOrder &outOrder, MTdata d )
806     {
807         memcpy( &mInOrders[ mCount ], &inOrder, sizeof( inOrder ) );
808         memcpy( &mOutOrders[ mCount ], &outOrder, sizeof( outOrder ) );
809         mCount++;
810 
811         if( mCount == NUM_TESTS )
812             return Flush(d);
813 
814         return CL_SUCCESS;
815     }
816 
Flush(MTdata d)817     int Flush( MTdata d )
818     {
819         int err = CL_SUCCESS;
820         if( mCount > 0 )
821         {
822             err = test_shuffle_dual_kernel( mContext, mQueue, mVecType, mInVecSize, mOutVecSize, mLengthToUse,
823                                            mCount, mInOrders, mOutOrders, true, true, d, mShuffleMode );
824             mCount = 0;
825         }
826         return err;
827     }
828 
829 protected:
830     cl_context            mContext;
831     cl_command_queue    mQueue;
832     ExplicitType        mVecType;
833     size_t                mInVecSize, mOutVecSize, mCount;
834     ShuffleMode            mShuffleMode;
835     cl_uint             mLengthToUse[ NUM_TESTS ];
836 
837     ShuffleOrder        mInOrders[ NUM_TESTS ], mOutOrders[ NUM_TESTS ];
838 };
839 
840 
test_shuffle_random(cl_device_id device,cl_context context,cl_command_queue queue,ShuffleMode shuffleMode,MTdata d)841 int test_shuffle_random(cl_device_id device, cl_context context, cl_command_queue queue, ShuffleMode shuffleMode, MTdata d )
842 {
843     ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
844     unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
845     unsigned int srcIdx, dstIdx, typeIndex;
846     int error = 0, totalError = 0, prevTotalError = 0;
847     RandomSeed seed(gRandomSeed);
848 
849     for( typeIndex = 0; typeIndex < 10; typeIndex++ )
850     {
851         //log_info( "\n\t%s... ", get_explicit_type_name( vecType[ typeIndex ] ) );
852         //fflush( stdout );
853         if (vecType[typeIndex] == kDouble) {
854             if (!is_extension_available(device, "cl_khr_fp64")) {
855                 log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
856                 continue;
857             }
858             log_info("Testing doubles.\n");
859         }
860 
861         if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong )
862         {
863             log_info("Long types are unsupported, skipping.");
864             continue;
865         }
866 
867         error = 0;
868         for( srcIdx = 0; vecSizes[ srcIdx ] != 0 /*&& error == 0*/; srcIdx++ )
869         {
870             for( dstIdx = 0; vecSizes[ dstIdx ] != 0 /*&& error == 0*/; dstIdx++ )
871             {
872                 if( ( ( shuffleMode == kBuiltInDualInputFnMode ) || ( shuffleMode == kBuiltInFnMode ) ) &&
873                    ( ( vecSizes[ dstIdx ] & 1 ) || ( vecSizes[ srcIdx ] & 1 ) ) )
874                 {
875                     // Built-in shuffle functions don't work on size 1 (scalars) or size 3 (vec3s)
876                     continue;
877                 }
878 
879                 log_info("Testing [%s%d to %s%d]... ", get_explicit_type_name( vecType[ typeIndex ] ) , vecSizes[srcIdx], get_explicit_type_name( vecType[ typeIndex ] ) , vecSizes[dstIdx]);
880                 shuffleBuffer buffer( context, queue, vecType[ typeIndex ], vecSizes[ srcIdx ], vecSizes[ dstIdx ], shuffleMode );
881 
882                 int numTests = NUM_TESTS*NUM_ITERATIONS_PER_TEST;
883                 for( int i = 0; i < numTests /*&& error == 0*/; i++ )
884                 {
885                     ShuffleOrder src, dst;
886                     if( shuffleMode == kBuiltInFnMode )
887                     {
888                         build_random_shuffle_order( dst, vecSizes[ dstIdx ], vecSizes[ srcIdx ], true, d );
889                     }
890                     else if(shuffleMode == kBuiltInDualInputFnMode)
891                     {
892                         build_random_shuffle_order(dst, vecSizes[dstIdx], 2*vecSizes[srcIdx], true, d);
893                     }
894                     else
895                     {
896                         build_random_shuffle_order( src, vecSizes[ dstIdx ], vecSizes[ srcIdx ], true, d );
897                         build_random_shuffle_order( dst, vecSizes[ dstIdx ], vecSizes[ dstIdx ], false, d );
898                     }
899 
900                     error = buffer.AddRun( src, dst, seed );
901                     if (error)
902                         totalError++;
903                 }
904                 int test_error = buffer.Flush(seed);
905                 if (test_error)
906                     totalError++;
907 
908                 if (totalError == prevTotalError)
909                     log_info("\tPassed.\n");
910                 else
911                 {
912                     log_error("\tFAILED.\n");
913                     prevTotalError = totalError;
914                 }
915             }
916         }
917     }
918     return totalError;
919 }
920 
test_shuffle_copy(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)921 int test_shuffle_copy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
922 {
923     RandomSeed seed(gRandomSeed);
924     return test_shuffle_random( device, context, queue, kNormalMode, seed );
925 }
926 
test_shuffle_function_call(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)927 int test_shuffle_function_call(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
928 {
929     RandomSeed seed(gRandomSeed);
930     return test_shuffle_random( device, context, queue, kFunctionCallMode, seed );
931 }
932 
test_shuffle_array_cast(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)933 int test_shuffle_array_cast(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
934 {
935     RandomSeed seed(gRandomSeed);
936     return test_shuffle_random( device, context, queue, kArrayAccessMode, seed );
937 }
938 
test_shuffle_built_in(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)939 int test_shuffle_built_in(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
940 {
941     RandomSeed seed(gRandomSeed);
942     return test_shuffle_random( device, context, queue, kBuiltInFnMode, seed );
943 }
944 
test_shuffle_built_in_dual_input(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)945 int test_shuffle_built_in_dual_input(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
946 {
947     RandomSeed seed(gRandomSeed);
948     return test_shuffle_random( device, context, queue, kBuiltInDualInputFnMode, seed );
949 }
950 
951