xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/basic/test_local_kernel_scope.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 
25 #include "procs.h"
26 
27 #define MAX_LOCAL_STORAGE_SIZE  256
28 #define MAX_LOCAL_STORAGE_SIZE_STRING "256"
29 
30 const char *kernelSource[] = {
31     "__kernel void test( __global unsigned int * input, __global unsigned int *outMaxes )\n"
32     "{\n"
33     "   __local unsigned int localStorage[ " MAX_LOCAL_STORAGE_SIZE_STRING " ];\n"
34     "   unsigned int theValue = input[ get_global_id( 0 ) ];\n"
35     "\n"
36     "   // If we just write linearly, there's no verification that the items in a group share local data\n"
37     "   // So we write reverse-linearly, which requires items to read the local data written by at least one\n"
38     "   // different item\n"
39     "   localStorage[ get_local_size( 0 ) - get_local_id( 0 ) - 1 ] = theValue;\n"
40     "\n"
41     "   // The barrier ensures that all local items have written to the local storage\n"
42     "   barrier( CLK_LOCAL_MEM_FENCE );\n"
43     "\n"
44     "   // Now we loop back through the local storage and look for the max value. We only do this if\n"
45     "   // we're the first item in a group\n"
46     "   unsigned int max = 0;\n"
47     "   if( get_local_id( 0 ) == 0 )\n"
48     "   {\n"
49     "       for( size_t i = 0; i < get_local_size( 0 ); i++ )\n"
50     "       {\n"
51     "           if( localStorage[ i ] > max )\n"
52     "               max = localStorage[ i ];\n"
53     "       }\n"
54     "       outMaxes[ get_group_id( 0 ) ] = max;\n"
55     "   }\n"
56     "}\n"
57 };
58 
test_local_kernel_scope(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)59 int test_local_kernel_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
60 {
61     cl_int error;
62     clProgramWrapper program;
63     clKernelWrapper kernel;
64     clMemWrapper streams[ 2 ];
65     MTdata randSeed = init_genrand( gRandomSeed );
66 
67     // Create a test kernel
68     error = create_single_kernel_helper( context, &program, &kernel, 1, kernelSource, "test" );
69     test_error( error, "Unable to create test kernel" );
70 
71 
72     // Determine an appropriate test size
73     size_t workGroupSize;
74     error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workGroupSize ), &workGroupSize, NULL );
75     test_error( error, "Unable to obtain kernel work group size" );
76 
77     // Make sure the work group size doesn't overrun our local storage size in the kernel
78     while( workGroupSize > MAX_LOCAL_STORAGE_SIZE )
79         workGroupSize >>= 1;
80 
81     size_t testSize = workGroupSize;
82     while( testSize < 1024 )
83         testSize += workGroupSize;
84     size_t numGroups = testSize / workGroupSize;
85     log_info( "\tTesting with %ld groups, %ld elements per group...\n", numGroups, workGroupSize );
86 
87     // Create two buffers for operation
88     cl_uint *inputData = (cl_uint*)malloc( testSize * sizeof(cl_uint) );
89     generate_random_data( kUInt, testSize, randSeed, inputData );
90     free_mtdata( randSeed );
91     streams[ 0 ] = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, testSize * sizeof(cl_uint), inputData, &error );
92     test_error( error, "Unable to create input buffer" );
93 
94     cl_uint *outputData = (cl_uint*)malloc( numGroups *sizeof(cl_uint) );
95     streams[ 1 ] = clCreateBuffer( context, CL_MEM_WRITE_ONLY, numGroups * sizeof(cl_uint), NULL, &error );
96     test_error( error, "Unable to create output buffer" );
97 
98 
99     // Set up the kernel args and run
100     error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
101     test_error( error, "Unable to set kernel arg" );
102     error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
103     test_error( error, "Unable to set kernel arg" );
104 
105     error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &testSize, &workGroupSize, 0, NULL, NULL );
106     test_error( error, "Unable to enqueue kernel" );
107 
108 
109     // Read results and verify
110     error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, numGroups * sizeof(cl_uint), outputData, 0, NULL, NULL );
111     test_error( error, "Unable to read output data" );
112 
113     // MingW compiler seems to have a bug that otimizes the code below incorrectly.
114     // adding the volatile keyword to size_t decleration to avoid aggressive optimization by the compiler.
115     for( volatile size_t i = 0; i < numGroups; i++ )
116     {
117         // Determine the max in our case
118         cl_uint localMax = 0;
119         for( volatile size_t j = 0; j < workGroupSize; j++ )
120         {
121             if( inputData[ i * workGroupSize + j ] > localMax )
122                 localMax = inputData[ i * workGroupSize + j ];
123         }
124 
125         if( outputData[ i ] != localMax )
126         {
127             log_error( "ERROR: Local max validation failed! (expected %u, got %u for i=%lu)\n", localMax, outputData[ i ] , i );
128             free(inputData);
129             free(outputData);
130             return -1;
131         }
132     }
133 
134     free(inputData);
135     free(outputData);
136     return 0;
137 }
138 
139 
140