xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/device_partition/test_device_partition.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/typeWrappers.h"
18 #include "harness/testHarness.h"
19 #include "harness/conversions.h"
20 
21 #include <vector>
22 
23 typedef long long int lld;
24 typedef long long unsigned llu;
25 
26 const char *test_kernels[] = {
27 "__kernel void kernelA(__global int *dst)\n"
28 "{\n"
29 "\n"
30 " dst[get_global_id(0)]*=3;\n"
31 "\n"
32 "}\n"
33 "__kernel void kernelB(__global int *dst)\n"
34 "{\n"
35 "\n"
36 " dst[get_global_id(0)]++;\n"
37 "\n"
38 "}\n"
39 };
40 
41 #define TEST_SIZE 512
42 #define MAX_QUEUES 1000
43 
printPartition(cl_device_partition_property partition)44 const char *printPartition(cl_device_partition_property partition)
45 {
46   switch (partition) {
47     case (0):                                      return "<NONE>";
48     case (CL_DEVICE_PARTITION_EQUALLY):            return "CL_DEVICE_PARTITION_EQUALLY";
49     case (CL_DEVICE_PARTITION_BY_COUNTS):          return "CL_DEVICE_PARTITION_BY_COUNTS";
50     case (CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN): return "CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN";
51     default:                                       return "<unknown>";
52   } // switch
53 }
54 
printAffinity(cl_device_affinity_domain affinity)55 const char *printAffinity(cl_device_affinity_domain affinity)
56 {
57   switch (affinity) {
58     case (0):                                            return "<NONE>";
59     case (CL_DEVICE_AFFINITY_DOMAIN_NUMA):               return "CL_DEVICE_AFFINITY_DOMAIN_NUMA";
60     case (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE):           return "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE";
61     case (CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE):           return "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE";
62     case (CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE):           return "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE";
63     case (CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE):           return "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE";
64     case (CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE): return "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE";
65     default:                                             return "<unknown>";
66   } // switch
67 }
create_single_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const cl_device_id * parentDevice)68 int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, const cl_device_id *parentDevice )
69 {
70     int error = CL_SUCCESS;
71 
72     /* Create the program object from source */
73     error = create_single_kernel_helper_create_program(context, outProgram, numKernelLines, kernelProgram);
74     if( *outProgram == NULL || error != CL_SUCCESS)
75     {
76         print_error( error, "clCreateProgramWithSource failed" );
77         return error;
78     }
79 
80     /* Compile the program */
81     int buildProgramFailed = 0;
82     int printedSource = 0;
83     error = clBuildProgram( *outProgram, ((parentDevice == NULL) ? 0 : 1), parentDevice, NULL, NULL, NULL );
84     if (error != CL_SUCCESS)
85     {
86         unsigned int i;
87         print_error(error, "clBuildProgram failed");
88         buildProgramFailed = 1;
89         printedSource = 1;
90         log_error( "Original source is: ------------\n" );
91         for( i = 0; i < numKernelLines; i++ )
92             log_error( "%s", kernelProgram[ i ] );
93     }
94 
95     // Verify the build status on all devices
96     cl_uint deviceCount = 0;
97     error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
98     if (error != CL_SUCCESS) {
99         print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
100         return error;
101     }
102 
103     if (deviceCount == 0) {
104         log_error("No devices found for program.\n");
105         return -1;
106     }
107 
108     cl_device_id    *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
109     if( NULL == devices )
110         return -1;
111     memset( devices, 0, deviceCount * sizeof( cl_device_id ));
112     error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
113     if (error != CL_SUCCESS) {
114         print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
115         free( devices );
116         return error;
117     }
118 
119     cl_uint z;
120     for( z = 0; z < deviceCount; z++ )
121     {
122         char deviceName[4096] = "";
123         error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
124         if (error != CL_SUCCESS || deviceName[0] == '\0') {
125             log_error("Device \"%d\" failed to return a name\n", z);
126             print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
127         }
128 
129         cl_build_status buildStatus;
130         error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
131         if (error != CL_SUCCESS) {
132             print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
133             free( devices );
134             return error;
135         }
136 
137         if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
138             char log[10240] = "";
139             if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
140 
141             char statusString[64] = "";
142             if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
143                 sprintf(statusString, "CL_BUILD_SUCCESS");
144             else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
145                 sprintf(statusString, "CL_BUILD_NONE");
146             else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
147                 sprintf(statusString, "CL_BUILD_ERROR");
148             else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
149                 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
150             else
151                 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
152 
153             if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
154             error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
155             if (error != CL_SUCCESS || log[0]=='\0'){
156                 log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
157                 if (error) {
158                     print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
159                     free( devices );
160                     return error;
161                 } else {
162                     log_error("clGetProgramBuildInfo returned an empty log.\n");
163                     free( devices );
164                     return -1;
165                 }
166             }
167             // In this case we've already printed out the code above.
168             if (!printedSource)
169             {
170                 unsigned int i;
171                 log_error( "Original source is: ------------\n" );
172                 for( i = 0; i < numKernelLines; i++ )
173                     log_error( "%s", kernelProgram[ i ] );
174                 printedSource = 1;
175             }
176             log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
177             log_error( "%s\n", log );
178             log_error( "\n----------\n" );
179             free( devices );
180             return -1;
181         }
182     }
183 
184     /* And create a kernel from it */
185     *outKernel = clCreateKernel( *outProgram, kernelName, &error );
186     if( *outKernel == NULL || error != CL_SUCCESS)
187     {
188         print_error( error, "Unable to create kernel" );
189         free( devices );
190         return error;
191     }
192 
193     free( devices );
194     return 0;
195 }
196 
197 template<class T>
198 class AutoDestructArray
199 {
200 public:
AutoDestructArray(T * arr)201     AutoDestructArray(T* arr) : m_arr(arr) {}
~AutoDestructArray()202     ~AutoDestructArray() { if (m_arr) delete [] m_arr; }
203 
204 private:
205     T* m_arr;
206 };
207 
test_device_set(size_t deviceCount,size_t queueCount,cl_device_id * devices,int num_elements,cl_device_id * parentDevice=NULL)208 int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices, int num_elements, cl_device_id *parentDevice = NULL)
209 {
210     int error;
211     clContextWrapper context;
212     clProgramWrapper program;
213     clKernelWrapper kernels[2];
214     clMemWrapper  stream;
215     clCommandQueueWrapper queues[MAX_QUEUES] = {};
216     size_t threads[1], localThreads[1];
217     int data[TEST_SIZE];
218     int outputData[TEST_SIZE];
219     int expectedResults[TEST_SIZE];
220     int *expectedResultsOneDeviceArray = new int[deviceCount * TEST_SIZE];
221     int **expectedResultsOneDevice = (int**)alloca(sizeof(int**) * deviceCount);
222     size_t i;
223     AutoDestructArray<int> autoDestruct(expectedResultsOneDeviceArray);
224 
225     for (i=0; i<deviceCount; i++) {
226         expectedResultsOneDevice[i] = expectedResultsOneDeviceArray + (i * TEST_SIZE);
227     }
228 
229     RandomSeed seed( gRandomSeed );
230 
231     if (queueCount > MAX_QUEUES) {
232         log_error("Number of queues (%ld) is greater than the number for which the test was written (%d).", queueCount, MAX_QUEUES);
233         return -1;
234     }
235 
236     log_info("Testing with %ld queues on %ld devices, %ld kernel executions.\n", queueCount, deviceCount, queueCount*num_elements/TEST_SIZE);
237 
238     for (i=0; i<deviceCount; i++) {
239         size_t deviceNameSize;
240         error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 0, NULL, &deviceNameSize);
241         test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
242         char *deviceName = (char *)alloca(deviceNameSize * (sizeof(char)));
243         error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, deviceNameSize, deviceName, NULL);
244         test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
245         log_info("Device %ld is \"%s\".\n", i, deviceName);
246     }
247 
248     /* Create a context */
249     context = clCreateContext( NULL, (cl_uint)deviceCount, devices, notify_callback, NULL, &error );
250     test_error( error, "Unable to create testing context" );
251 
252     /* Create our kernels (they all have the same arguments so we don't need multiple ones for each device) */
253     if( create_single_kernel_helper( context, &program, &kernels[0], 1, test_kernels, "kernelA", parentDevice ) != 0 )
254     {
255         return -1;
256     }
257 
258     kernels[1] = clCreateKernel(program, "kernelB", &error);
259     test_error(error, "clCreateKernel failed");
260 
261 
262     /* Now create I/O streams */
263     for( i = 0; i < TEST_SIZE; i++ )
264         data[i] = genrand_int32(seed);
265 
266     stream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
267                             sizeof(cl_int) * TEST_SIZE, data, &error);
268     test_error( error, "Unable to create test array" );
269 
270     // Update the expected results
271     for( i = 0; i < TEST_SIZE; i++ ) {
272         expectedResults[i] = data[i];
273         for (size_t j=0; j<deviceCount; j++)
274             expectedResultsOneDevice[j][i] = data[i];
275     }
276 
277 
278     // Set the arguments
279     error = clSetKernelArg( kernels[0], 0, sizeof( stream ), &stream);
280     test_error( error, "Unable to set kernel arguments" );
281     error = clSetKernelArg( kernels[1], 0, sizeof( stream ), &stream);
282     test_error( error, "Unable to set kernel arguments" );
283 
284     /* Run the test */
285     threads[0] = (size_t)TEST_SIZE;
286 
287     error = get_max_common_work_group_size( context, kernels[0], threads[0], &localThreads[ 0 ] );
288     test_error( error, "Unable to calc work group size" );
289 
290     /* Create work queues */
291     for( i = 0; i < queueCount; i++ )
292     {
293         queues[i] = clCreateCommandQueueWithProperties( context, devices[ i % deviceCount ], 0, &error );
294         if (error != CL_SUCCESS || queues[i] == NULL) {
295             log_info("Could not create queue[%d].\n", (int)i);
296             queueCount = i;
297             break;
298         }
299     }
300     log_info("Testing with %d queues.\n", (int)queueCount);
301 
302     /* Enqueue executions */
303     for( int z = 0; z<num_elements/TEST_SIZE; z++) {
304         for( i = 0; i < queueCount; i++ )
305         {
306             // Randomly choose a kernel to execute.
307             int kernel_selection = (int)get_random_float(0, 2, seed);
308             error = clEnqueueNDRangeKernel( queues[ i ], kernels[ kernel_selection ], 1, NULL, threads, localThreads, 0, NULL, NULL );
309             test_error( error, "Kernel execution failed" );
310 
311             // Update the expected results
312             for( int j = 0; j < TEST_SIZE; j++ ) {
313                 expectedResults[j] = (kernel_selection) ? expectedResults[j]+1 : expectedResults[j]*3;
314                 expectedResultsOneDevice[i % deviceCount][j] = (kernel_selection) ? expectedResultsOneDevice[i % deviceCount][j]+1 : expectedResultsOneDevice[i % deviceCount][j]*3;
315             }
316 
317             // Force the queue to finish so the next one will be in sync
318             error = clFinish(queues[i]);
319             test_error( error, "clFinish failed");
320         }
321     }
322 
323     /* Read results */
324     int errors = 0;
325     for (int q = 0; q<(int)queueCount; q++) {
326         error = clEnqueueReadBuffer( queues[ q ], stream, CL_TRUE, 0, sizeof(cl_int)*TEST_SIZE, (char *)outputData, 0, NULL, NULL );
327         test_error( error, "Unable to get result data set" );
328 
329         int errorsThisTime = 0;
330         /* Verify all of the data now */
331         for( i = 0; i < TEST_SIZE; i++ )
332         {
333             if( expectedResults[ i ] != outputData[ i ] )
334             {
335                 log_error( "ERROR: Sample data did not verify for queue %d on device %ld (sample %d, expected %d, got %d)\n",
336                     q, q % deviceCount, (int)i, expectedResults[ i ], outputData[ i ] );
337                 for (size_t j=0; j<deviceCount; j++) {
338                     if (expectedResultsOneDevice[j][i] == outputData[i])
339                         log_info("Sample consistent with only device %ld having modified the data.\n", j);
340                 }
341                 errorsThisTime++;
342                 break;
343             }
344         }
345         if (errorsThisTime)
346             errors++;
347     }
348 
349     /* All done now! */
350     if (errors)
351         return -1;
352     return 0;
353 }
354 
355 
init_device_partition_test(cl_device_id parentDevice,cl_uint & maxComputeUnits,cl_uint & maxSubDevices)356 int init_device_partition_test(cl_device_id parentDevice, cl_uint &maxComputeUnits, cl_uint &maxSubDevices)
357 {
358     int err = clGetDeviceInfo(parentDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(maxComputeUnits), &maxComputeUnits, NULL);
359     test_error( err, "Unable to get maximal number of compute units" );
360     err = clGetDeviceInfo(parentDevice, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(maxSubDevices), &maxSubDevices, NULL);
361     test_error( err, "Unable to get maximal number of sub-devices" );
362 
363     log_info("Maximal number of sub-devices on device %p is %d.\n", parentDevice, maxSubDevices );
364     return 0;
365 }
366 
test_device_partition_type_support(cl_device_id parentDevice,const cl_device_partition_property partitionType,const cl_device_affinity_domain affinityDomain)367 int test_device_partition_type_support(cl_device_id parentDevice, const cl_device_partition_property partitionType, const cl_device_affinity_domain affinityDomain)
368 {
369     typedef std::vector< cl_device_partition_property > properties_t;
370     properties_t supportedProps( 3 ); // only 3 types defined in the spec (but implementation can define more)
371     size_t const propSize = sizeof( cl_device_partition_property ); // Size of one property in bytes.
372     size_t size;    // size of all properties in bytes.
373     cl_int err;
374     size = 0;
375     err = clGetDeviceInfo( parentDevice, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL, & size );
376     if ( err == CL_SUCCESS ) {
377         if ( size % propSize != 0 ) {
378             log_error( "ERROR: clGetDeviceInfo: Bad size of returned partition properties (%llu), it must me a multiply of partition property size (%llu)\n", llu( size ), llu( propSize ) );
379             return -1;
380         }
381         supportedProps.resize( size / propSize );
382         size = 0;
383         err = clGetDeviceInfo( parentDevice, CL_DEVICE_PARTITION_PROPERTIES, supportedProps.size() * propSize, & supportedProps.front(), & size );
384         test_error_ret( err, "Unable to get device partition properties (2)", -1 );
385     } else if ( err == CL_INVALID_VALUE ) {
386         log_error( "ERROR: clGetDeviceInfo: CL_DEVICE_PARTITION_PROPERTIES is not supported.\n" );
387         return -1;
388     } else {
389         test_error_ret( err, "Unable to get device partition properties (1)", -1 );
390     };
391     for (size_t i = 0; i < supportedProps.size(); i++)
392     {
393         if (supportedProps[i] == partitionType)
394         {
395            if (partitionType == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
396            {
397               cl_device_affinity_domain supportedAffinityDomain;
398               err = clGetDeviceInfo(parentDevice, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(supportedAffinityDomain), &supportedAffinityDomain, NULL);
399               test_error( err, "Unable to get supported affinity domains" );
400               if (supportedAffinityDomain & affinityDomain)
401                 return 0;
402            }
403            else
404             return 0;
405         }
406     }
407 
408     return -1;
409 }
410 
test_partition_of_device(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,cl_device_partition_property * partition_type,cl_uint starting_property,cl_uint ending_property)411 int test_partition_of_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_device_partition_property *partition_type,
412                              cl_uint starting_property, cl_uint ending_property)
413 {
414     cl_uint maxComputeUnits;
415     cl_uint maxSubDevices;    // maximal number of sub-devices that can be created in one call to clCreateSubDevices
416     int err = 0;
417 
418     if (init_device_partition_test(deviceID, maxComputeUnits, maxSubDevices) != 0)
419         return -1;
420 
421     if (maxComputeUnits <= 1)
422         return 0;
423     // confirm that this devices reports how it was partitioned
424     if (partition_type != NULL)
425     { // if we're not the root device
426       size_t psize;
427       err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_TYPE, 0,  NULL, &psize);
428       test_error( err, "Unable to get CL_DEVICE_PARTITION_TYPE" );
429       cl_device_partition_property *properties_returned = (cl_device_partition_property *)alloca(psize);
430       err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_TYPE, psize, (void *) properties_returned, NULL);
431       test_error( err, "Unable to get CL_DEVICE_PARTITION_TYPE" );
432 
433       // test returned type
434       for (cl_uint i = 0;i < psize / sizeof(cl_device_partition_property);i++) {
435         if (properties_returned[i] != partition_type[i]) {
436           if (!(partition_type[0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN &&
437               i == 1 && partition_type[1] == CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE &&
438               (properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_NUMA     ||
439                properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE ||
440                properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE ||
441                properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE ||
442                properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE))) {
443             log_error("properties_returned[%d] 0x%x != 0x%x partition_type[%d].", i, properties_returned[i], partition_type[i], i);
444             return -1;
445               }
446         }
447       } // for
448     }
449 
450 #define PROPERTY_TYPES 8
451     cl_device_partition_property partitionProp[PROPERTY_TYPES][5] = {
452         { CL_DEVICE_PARTITION_EQUALLY, maxComputeUnits / 2, 0, 0, 0 } ,
453         { CL_DEVICE_PARTITION_BY_COUNTS, 1, maxComputeUnits - 1, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, 0 } ,
454         { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NUMA, 0, 0, 0 } ,
455         { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE, 0, 0, 0 } ,
456         { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE, 0, 0, 0 } ,
457         { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE, 0, 0, 0 } ,
458         { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE, 0, 0, 0 } ,
459         { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, 0, 0, 0 }
460     };
461 
462     // loop thru each type, creating sub-devices for each type
463     for (cl_uint i = starting_property;i < ending_property;i++) {
464 
465       if (test_device_partition_type_support(deviceID, partitionProp[i][0], partitionProp[i][1]) != 0)
466       {
467         if (partitionProp[i][0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
468         {
469           log_info( "Device partition type \"%s\" \"%s\" is not supported on device %p. Skipping test...\n",
470                       printPartition(partitionProp[i][0]),
471                       printAffinity(partitionProp[i][1]), deviceID);
472         }
473         else
474         {
475           log_info( "Device partition type \"%s\" is not supported on device %p. Skipping test...\n",
476                       printPartition(partitionProp[i][0]), deviceID);
477         }
478         continue;
479       }
480 
481       if (partitionProp[i][0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
482       {
483         log_info("Testing on device %p partition type \"%s\" \"%s\"\n", deviceID, printPartition(partitionProp[i][0]),
484                   printAffinity(partitionProp[i][1]));
485       }
486       else
487       {
488         log_info("Testing on device %p partition type \"%s\" (%d,%d)\n", deviceID, printPartition(partitionProp[i][0]),
489                   partitionProp[i][1], partitionProp[i][2]);
490       }
491 
492       cl_uint deviceCount;
493 
494       // how many sub-devices can we create?
495       err = clCreateSubDevices(deviceID, partitionProp[i], 0, NULL, &deviceCount);
496       if ( err == CL_DEVICE_PARTITION_FAILED ) {
497           log_info( "The device %p could not be further partitioned.\n", deviceID );
498           continue;
499       }
500       test_error( err, "Failed to get number of sub-devices" );
501 
502       // get the list of subDevices
503       //  create room for 1 more device_id, so that we can put the parent device in there.
504       cl_device_id *subDevices = (cl_device_id*)alloca(sizeof(cl_device_id) * (deviceCount + 1));
505       err = clCreateSubDevices(deviceID, partitionProp[i], deviceCount, subDevices, &deviceCount);
506       test_error( err, "Actual creation of sub-devices failed" );
507 
508       log_info("Testing on all devices in context\n");
509       err = test_device_set(deviceCount, deviceCount, subDevices, num_elements);
510       if (err == 0)
511       {
512           log_info("Testing on a parent device for context\n");
513 
514           // add the parent device
515           subDevices[deviceCount] = deviceID;
516           err = test_device_set(deviceCount + 1, deviceCount, subDevices, num_elements, &deviceID);
517       }
518       if (err != 0)
519       {
520           printf("error! returning %d\n",err);
521           return err;
522       }
523 
524       // now, recurse and test the FIRST of these sub-devices, to make sure it can be further partitioned
525       err = test_partition_of_device(subDevices[0], context, queue, num_elements, partitionProp[i], starting_property, ending_property);
526       if (err != 0)
527       {
528           printf("error! returning %d\n",err);
529           return err;
530       }
531 
532       for (cl_uint j=0;j < deviceCount;j++)
533       {
534         err = clReleaseDevice(subDevices[j]);
535         test_error( err, "\n Releasing sub-device failed \n" );
536       }
537 
538     } // for
539 
540     log_info("Testing on all device %p finished\n", deviceID);
541     return 0;
542 }
543 
544 
test_partition_equally(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)545 int test_partition_equally(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
546 {
547   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 0, 1);
548 }
549 
test_partition_by_counts(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)550 int test_partition_by_counts(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
551 {
552   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 1, 2);
553 }
554 
test_partition_by_affinity_domain_numa(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)555 int test_partition_by_affinity_domain_numa(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
556 {
557   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 2, 3);
558 }
559 
test_partition_by_affinity_domain_l4_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)560 int test_partition_by_affinity_domain_l4_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
561 {
562   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 3, 4);
563 }
564 
test_partition_by_affinity_domain_l3_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)565 int test_partition_by_affinity_domain_l3_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
566 {
567   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 4, 5);
568 }
569 
test_partition_by_affinity_domain_l2_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)570 int test_partition_by_affinity_domain_l2_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
571 {
572   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 5, 6);
573 }
574 
test_partition_by_affinity_domain_l1_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)575 int test_partition_by_affinity_domain_l1_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
576 {
577   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 6, 7);
578 }
579 
test_partition_by_affinity_domain_next_partitionable(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)580 int test_partition_by_affinity_domain_next_partitionable(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
581 {
582   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 7, 8);
583 }
584 
test_partition_all(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)585 int test_partition_all(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
586 {
587   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 0, 8);
588 }
589