xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/basic/test_global_work_offsets.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include <ctype.h>
18 
19 
20 const char *work_offset_test[] = {
21     "__kernel void test( __global int * outputID_A, \n"
22     "                        __global int * outputID_B, __global int * outputID_C )\n"
23     "{\n"
24     "    size_t id0 = get_local_id( 0 ) + get_group_id( 0 ) * get_local_size( 0 );\n"
25     "    size_t id1 = get_local_id( 1 ) + get_group_id( 1 ) * get_local_size( 1 );\n"
26     "    size_t id2 = get_local_id( 2 ) + get_group_id( 2 ) * get_local_size( 2 );\n"
27     "    size_t id = ( id2 * get_global_size( 0 ) * get_global_size( 1 ) ) + ( id1 * get_global_size( 0 ) ) + id0;\n"
28     "\n"
29     "    outputID_A[ id ] = get_global_id( 0 );\n"
30     "    outputID_B[ id ] = get_global_id( 1 );\n"
31     "    outputID_C[ id ] = get_global_id( 2 );\n"
32     "}\n"
33     };
34 
35 #define MAX_TEST_ITEMS 16 * 16 * 16
36 #define NUM_TESTS 16
37 #define MAX_OFFSET 256
38 
39 #define CHECK_RANGE( v, m, c ) \
40     if( ( v >= (cl_int)m ) || ( v < 0 ) ) \
41     {    \
42         log_error( "ERROR: ouputID_%c[%lu]: %d is < 0 or >= %lu\n", c, i, v, m ); \
43         return -1;    \
44     }
45 
check_results(size_t threads[],size_t offsets[],cl_int outputA[],cl_int outputB[],cl_int outputC[])46 int check_results( size_t threads[], size_t offsets[], cl_int outputA[], cl_int outputB[], cl_int outputC[] )
47 {
48     size_t offsettedSizes[ 3 ] = { threads[ 0 ] + offsets[ 0 ], threads[ 1 ] + offsets[ 1 ], threads[ 2 ] + offsets[ 2 ] };
49     size_t limit = threads[ 0 ] * threads[ 1 ] * threads[ 2 ];
50 
51     static char counts[ MAX_OFFSET + 32 ][ MAX_OFFSET + 16 ][ MAX_OFFSET + 16 ];
52     memset( counts, 0, sizeof( counts ) );
53 
54     for( size_t i = 0; i < limit; i++ )
55     {
56         // Check ranges first
57         CHECK_RANGE( outputA[ i ], offsettedSizes[ 0 ], 'A' )
58         CHECK_RANGE( outputB[ i ], offsettedSizes[ 1 ], 'B' )
59         CHECK_RANGE( outputC[ i ], offsettedSizes[ 2 ], 'C' )
60 
61         // Now set the value in the map
62         counts[ outputA[ i ] ][ outputB[ i ] ][ outputC[ i ] ]++;
63     }
64 
65     // Now check the map
66     int missed = 0, multiple = 0, errored = 0, corrected = 0;
67     for( size_t x = 0; x < offsettedSizes[ 0 ]; x++ )
68     {
69         for( size_t y = 0; y < offsettedSizes[ 1 ]; y++ )
70         {
71             for( size_t z = 0; z < offsettedSizes[ 2 ]; z++ )
72             {
73                 const char * limitMsg = " (further errors of this type suppressed)";
74                 if( ( x >= offsets[ 0 ] ) && ( y >= offsets[ 1 ] ) && ( z >= offsets[ 2 ] ) )
75                 {
76                     if( counts[ x ][ y ][ z ] < 1 )
77                     {
78                         if( missed < 3 )
79                             log_error( "ERROR: Map value (%ld,%ld,%ld) was missed%s\n", x, y, z, ( missed == 2 ) ? limitMsg : "" );
80                         missed++;
81                     }
82                     else if( counts[ x ][ y ][ z ] > 1 )
83                     {
84                         if( multiple < 3 )
85                             log_error( "ERROR: Map value (%ld,%ld,%ld) was returned multiple times%s\n", x, y, z, ( multiple == 2 ) ? limitMsg : "" );
86                         multiple++;
87                     }
88                 }
89                 else
90                 {
91                     if( counts[ x ][ y ][ z ] > 0 )
92                     {
93                         if( errored < 3 )
94                             log_error( "ERROR: Map value (%ld,%ld,%ld) was erroneously returned%s\n", x, y, z, ( errored == 2 ) ? limitMsg : "" );
95                         errored++;
96                     }
97                 }
98                     }
99                 }
100                     }
101 
102     if( missed || multiple || errored )
103     {
104         size_t diffs[3] = { ( offsets[ 0 ] > threads[ 0 ] ? 0 : threads[ 0 ] - offsets[ 0 ] ),
105                         ( offsets[ 1 ] > threads[ 1 ] ? 0 : threads[ 1 ] - offsets[ 1 ] ),
106                         ( offsets[ 2 ] > threads[ 2 ] ? 0 : threads[ 2 ] - offsets[ 2 ] ) };
107             int diff = (int)( ( threads[ 0 ] - diffs[ 0 ] ) * ( threads[ 1 ] - diffs[ 1 ] ) * ( threads[ 2 ] - diffs[ 2 ] ) );
108 
109         if( ( multiple == 0 ) && ( missed == diff ) && ( errored == diff ) )
110             log_error( "ERROR: Global work offset values are not being respected by get_global_id()\n" );
111         else
112             log_error( "ERROR: Global work offset values did not function as expected (%d missed, %d reported multiple times, %d erroneously hit)\n",
113                             missed, multiple, errored );
114     }
115     return ( missed | multiple | errored | corrected );
116 }
117 
test_global_work_offsets(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)118 int test_global_work_offsets(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
119 {
120     clProgramWrapper program;
121     clKernelWrapper kernel;
122     clMemWrapper streams[ 7 ];
123 
124     int error;
125     size_t    threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
126     cl_int outputA[ MAX_TEST_ITEMS ], outputB[ MAX_TEST_ITEMS ], outputC[ MAX_TEST_ITEMS ];
127 
128 
129     // Create the kernel
130     if( create_single_kernel_helper( context, &program, &kernel, 1, work_offset_test, "test" ) != 0 )
131     {
132         return -1;
133     }
134 
135     //// Create some output streams
136 
137     // Use just one output array to init them all (no need to init every single stack storage here)
138     memset( outputA, 0xff, sizeof( outputA ) );
139     for( int i = 0; i < 3; i++ )
140     {
141         streams[i] =
142             clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
143                            sizeof(outputA), outputA, &error);
144         test_error( error, "Unable to create output array" );
145     }
146 
147     // Run a few different times
148     MTdata seed = init_genrand( gRandomSeed );
149     for( int test = 0; test < NUM_TESTS; test++ )
150     {
151         // Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
152         threads[ 0 ] = random_in_range( 1, 32, seed );
153         threads[ 1 ] = random_in_range( 1, 16, seed );
154         threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
155 
156         // Make sure we get the local thread count right
157         error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
158         test_error( error, "Unable to determine local work group sizes" );
159 
160         // Randomize some offsets
161         for( int j = 0; j < 3; j++ )
162             offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
163 
164         log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
165                  threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
166                  offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
167 
168         // Now set up and run
169         for( int i = 0; i < 3; i++ )
170         {
171             error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
172             test_error( error, "Unable to set indexed kernel arguments" );
173         }
174 
175         error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
176         test_error( error, "Kernel execution failed" );
177 
178         // Read our results back now
179         cl_int * resultBuffers[] = { outputA, outputB, outputC };
180         for( int i = 0; i < 3; i++ )
181         {
182             error = clEnqueueReadBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( outputA ), resultBuffers[ i ], 0, NULL, NULL );
183             test_error( error, "Unable to get result data" );
184         }
185 
186         // Now we need to check the results. The outputs should have one entry for each possible ID,
187         // but they won't be in order, so we need to construct a count map to determine what we got
188         if( check_results( threads, offsets, outputA, outputB, outputC ) )
189         {
190             log_error( "\t(Test failed for global dim %ld,%ld,%ld, local dim %ld,%ld,%ld, offsets %ld,%ld,%ld)\n",
191                       threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
192                       offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
193             return -1;
194         }
195     }
196 
197     free_mtdata(seed);
198 
199     // All done!
200     return 0;
201 }
202 
203 const char *get_offset_test[] = {
204     "__kernel void test( __global int * outOffsets )\n"
205     "{\n"
206     "    // We use local ID here so we don't have to worry about offsets\n"
207     "   // Also note that these should be the same for ALL threads, so we won't worry about contention\n"
208     "    outOffsets[ 0 ] = (int)get_global_offset( 0 );\n"
209     "    outOffsets[ 1 ] = (int)get_global_offset( 1 );\n"
210     "    outOffsets[ 2 ] = (int)get_global_offset( 2 );\n"
211     "}\n"
212 };
213 
test_get_global_offset(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)214 int test_get_global_offset(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
215 {
216     clProgramWrapper program;
217     clKernelWrapper kernel;
218     clMemWrapper streams[ 1 ];
219 
220     int error;
221     size_t    threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
222     cl_int outOffsets[ 3 ];
223 
224 
225     // Create the kernel
226     if( create_single_kernel_helper( context, &program, &kernel, 1, get_offset_test, "test" ) != 0 )
227     {
228         return -1;
229     }
230 
231     // Create some output streams, and storage for a single control ID
232     memset( outOffsets, 0xff, sizeof( outOffsets ) );
233     streams[0] =
234         clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
235                        sizeof(outOffsets), outOffsets, &error);
236     test_error( error, "Unable to create control ID buffer" );
237 
238     // Run a few different times
239     MTdata seed = init_genrand( gRandomSeed );
240     for( int test = 0; test < NUM_TESTS; test++ )
241     {
242         // Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
243         threads[ 0 ] = random_in_range( 1, 32, seed );
244         threads[ 1 ] = random_in_range( 1, 16, seed );
245         threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
246 
247         // Make sure we get the local thread count right
248         error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
249         test_error( error, "Unable to determine local work group sizes" );
250 
251         // Randomize some offsets
252         for( int j = 0; j < 3; j++ )
253             offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
254 
255         log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
256                  threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
257                  offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
258 
259         // Now set up and run
260         error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
261         test_error( error, "Unable to set indexed kernel arguments" );
262 
263         error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
264         test_error( error, "Kernel execution failed" );
265 
266         // Read our results back now
267         error = clEnqueueReadBuffer( queue, streams[ 0 ], CL_TRUE, 0, sizeof( outOffsets ), outOffsets, 0, NULL, NULL );
268         test_error( error, "Unable to get result data" );
269 
270         // And check!
271         int errors = 0;
272         for( int j = 0; j < 3; j++ )
273         {
274             if( outOffsets[ j ] != (cl_int)offsets[ j ] )
275             {
276                 log_error( "ERROR: get_global_offset( %d ) did not return expected value (expected %ld, got %d)\n", j, offsets[ j ], outOffsets[ j ] );
277                 errors++;
278             }
279         }
280         if( errors > 0 )
281             return errors;
282     }
283     free_mtdata(seed);
284 
285     // All done!
286     return 0;
287 }
288 
289