1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/typeWrappers.h"
18 #include "harness/testHarness.h"
19 #include "harness/conversions.h"
20
21 #include <vector>
22
23 typedef long long int lld;
24 typedef long long unsigned llu;
25
26 const char *test_kernels[] = {
27 "__kernel void kernelA(__global int *dst)\n"
28 "{\n"
29 "\n"
30 " dst[get_global_id(0)]*=3;\n"
31 "\n"
32 "}\n"
33 "__kernel void kernelB(__global int *dst)\n"
34 "{\n"
35 "\n"
36 " dst[get_global_id(0)]++;\n"
37 "\n"
38 "}\n"
39 };
40
41 #define TEST_SIZE 512
42 #define MAX_QUEUES 1000
43
printPartition(cl_device_partition_property partition)44 const char *printPartition(cl_device_partition_property partition)
45 {
46 switch (partition) {
47 case (0): return "<NONE>";
48 case (CL_DEVICE_PARTITION_EQUALLY): return "CL_DEVICE_PARTITION_EQUALLY";
49 case (CL_DEVICE_PARTITION_BY_COUNTS): return "CL_DEVICE_PARTITION_BY_COUNTS";
50 case (CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN): return "CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN";
51 default: return "<unknown>";
52 } // switch
53 }
54
printAffinity(cl_device_affinity_domain affinity)55 const char *printAffinity(cl_device_affinity_domain affinity)
56 {
57 switch (affinity) {
58 case (0): return "<NONE>";
59 case (CL_DEVICE_AFFINITY_DOMAIN_NUMA): return "CL_DEVICE_AFFINITY_DOMAIN_NUMA";
60 case (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE): return "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE";
61 case (CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE): return "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE";
62 case (CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE): return "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE";
63 case (CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE): return "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE";
64 case (CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE): return "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE";
65 default: return "<unknown>";
66 } // switch
67 }
create_single_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const cl_device_id * parentDevice)68 int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, const cl_device_id *parentDevice )
69 {
70 int error = CL_SUCCESS;
71
72 /* Create the program object from source */
73 error = create_single_kernel_helper_create_program(context, outProgram, numKernelLines, kernelProgram);
74 if( *outProgram == NULL || error != CL_SUCCESS)
75 {
76 print_error( error, "clCreateProgramWithSource failed" );
77 return error;
78 }
79
80 /* Compile the program */
81 int buildProgramFailed = 0;
82 int printedSource = 0;
83 error = clBuildProgram( *outProgram, ((parentDevice == NULL) ? 0 : 1), parentDevice, NULL, NULL, NULL );
84 if (error != CL_SUCCESS)
85 {
86 unsigned int i;
87 print_error(error, "clBuildProgram failed");
88 buildProgramFailed = 1;
89 printedSource = 1;
90 log_error( "Original source is: ------------\n" );
91 for( i = 0; i < numKernelLines; i++ )
92 log_error( "%s", kernelProgram[ i ] );
93 }
94
95 // Verify the build status on all devices
96 cl_uint deviceCount = 0;
97 error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
98 if (error != CL_SUCCESS) {
99 print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
100 return error;
101 }
102
103 if (deviceCount == 0) {
104 log_error("No devices found for program.\n");
105 return -1;
106 }
107
108 cl_device_id *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
109 if( NULL == devices )
110 return -1;
111 memset( devices, 0, deviceCount * sizeof( cl_device_id ));
112 error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
113 if (error != CL_SUCCESS) {
114 print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
115 free( devices );
116 return error;
117 }
118
119 cl_uint z;
120 for( z = 0; z < deviceCount; z++ )
121 {
122 char deviceName[4096] = "";
123 error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
124 if (error != CL_SUCCESS || deviceName[0] == '\0') {
125 log_error("Device \"%d\" failed to return a name\n", z);
126 print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
127 }
128
129 cl_build_status buildStatus;
130 error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
131 if (error != CL_SUCCESS) {
132 print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
133 free( devices );
134 return error;
135 }
136
137 if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
138 char log[10240] = "";
139 if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
140
141 char statusString[64] = "";
142 if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
143 sprintf(statusString, "CL_BUILD_SUCCESS");
144 else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
145 sprintf(statusString, "CL_BUILD_NONE");
146 else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
147 sprintf(statusString, "CL_BUILD_ERROR");
148 else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
149 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
150 else
151 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
152
153 if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
154 error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
155 if (error != CL_SUCCESS || log[0]=='\0'){
156 log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
157 if (error) {
158 print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
159 free( devices );
160 return error;
161 } else {
162 log_error("clGetProgramBuildInfo returned an empty log.\n");
163 free( devices );
164 return -1;
165 }
166 }
167 // In this case we've already printed out the code above.
168 if (!printedSource)
169 {
170 unsigned int i;
171 log_error( "Original source is: ------------\n" );
172 for( i = 0; i < numKernelLines; i++ )
173 log_error( "%s", kernelProgram[ i ] );
174 printedSource = 1;
175 }
176 log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
177 log_error( "%s\n", log );
178 log_error( "\n----------\n" );
179 free( devices );
180 return -1;
181 }
182 }
183
184 /* And create a kernel from it */
185 *outKernel = clCreateKernel( *outProgram, kernelName, &error );
186 if( *outKernel == NULL || error != CL_SUCCESS)
187 {
188 print_error( error, "Unable to create kernel" );
189 free( devices );
190 return error;
191 }
192
193 free( devices );
194 return 0;
195 }
196
197 template<class T>
198 class AutoDestructArray
199 {
200 public:
AutoDestructArray(T * arr)201 AutoDestructArray(T* arr) : m_arr(arr) {}
~AutoDestructArray()202 ~AutoDestructArray() { if (m_arr) delete [] m_arr; }
203
204 private:
205 T* m_arr;
206 };
207
test_device_set(size_t deviceCount,size_t queueCount,cl_device_id * devices,int num_elements,cl_device_id * parentDevice=NULL)208 int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices, int num_elements, cl_device_id *parentDevice = NULL)
209 {
210 int error;
211 clContextWrapper context;
212 clProgramWrapper program;
213 clKernelWrapper kernels[2];
214 clMemWrapper stream;
215 clCommandQueueWrapper queues[MAX_QUEUES] = {};
216 size_t threads[1], localThreads[1];
217 int data[TEST_SIZE];
218 int outputData[TEST_SIZE];
219 int expectedResults[TEST_SIZE];
220 int *expectedResultsOneDeviceArray = new int[deviceCount * TEST_SIZE];
221 int **expectedResultsOneDevice = (int**)alloca(sizeof(int**) * deviceCount);
222 size_t i;
223 AutoDestructArray<int> autoDestruct(expectedResultsOneDeviceArray);
224
225 for (i=0; i<deviceCount; i++) {
226 expectedResultsOneDevice[i] = expectedResultsOneDeviceArray + (i * TEST_SIZE);
227 }
228
229 RandomSeed seed( gRandomSeed );
230
231 if (queueCount > MAX_QUEUES) {
232 log_error("Number of queues (%ld) is greater than the number for which the test was written (%d).", queueCount, MAX_QUEUES);
233 return -1;
234 }
235
236 log_info("Testing with %ld queues on %ld devices, %ld kernel executions.\n", queueCount, deviceCount, queueCount*num_elements/TEST_SIZE);
237
238 for (i=0; i<deviceCount; i++) {
239 size_t deviceNameSize;
240 error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 0, NULL, &deviceNameSize);
241 test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
242 char *deviceName = (char *)alloca(deviceNameSize * (sizeof(char)));
243 error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, deviceNameSize, deviceName, NULL);
244 test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
245 log_info("Device %ld is \"%s\".\n", i, deviceName);
246 }
247
248 /* Create a context */
249 context = clCreateContext( NULL, (cl_uint)deviceCount, devices, notify_callback, NULL, &error );
250 test_error( error, "Unable to create testing context" );
251
252 /* Create our kernels (they all have the same arguments so we don't need multiple ones for each device) */
253 if( create_single_kernel_helper( context, &program, &kernels[0], 1, test_kernels, "kernelA", parentDevice ) != 0 )
254 {
255 return -1;
256 }
257
258 kernels[1] = clCreateKernel(program, "kernelB", &error);
259 test_error(error, "clCreateKernel failed");
260
261
262 /* Now create I/O streams */
263 for( i = 0; i < TEST_SIZE; i++ )
264 data[i] = genrand_int32(seed);
265
266 stream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
267 sizeof(cl_int) * TEST_SIZE, data, &error);
268 test_error( error, "Unable to create test array" );
269
270 // Update the expected results
271 for( i = 0; i < TEST_SIZE; i++ ) {
272 expectedResults[i] = data[i];
273 for (size_t j=0; j<deviceCount; j++)
274 expectedResultsOneDevice[j][i] = data[i];
275 }
276
277
278 // Set the arguments
279 error = clSetKernelArg( kernels[0], 0, sizeof( stream ), &stream);
280 test_error( error, "Unable to set kernel arguments" );
281 error = clSetKernelArg( kernels[1], 0, sizeof( stream ), &stream);
282 test_error( error, "Unable to set kernel arguments" );
283
284 /* Run the test */
285 threads[0] = (size_t)TEST_SIZE;
286
287 error = get_max_common_work_group_size( context, kernels[0], threads[0], &localThreads[ 0 ] );
288 test_error( error, "Unable to calc work group size" );
289
290 /* Create work queues */
291 for( i = 0; i < queueCount; i++ )
292 {
293 queues[i] = clCreateCommandQueueWithProperties( context, devices[ i % deviceCount ], 0, &error );
294 if (error != CL_SUCCESS || queues[i] == NULL) {
295 log_info("Could not create queue[%d].\n", (int)i);
296 queueCount = i;
297 break;
298 }
299 }
300 log_info("Testing with %d queues.\n", (int)queueCount);
301
302 /* Enqueue executions */
303 for( int z = 0; z<num_elements/TEST_SIZE; z++) {
304 for( i = 0; i < queueCount; i++ )
305 {
306 // Randomly choose a kernel to execute.
307 int kernel_selection = (int)get_random_float(0, 2, seed);
308 error = clEnqueueNDRangeKernel( queues[ i ], kernels[ kernel_selection ], 1, NULL, threads, localThreads, 0, NULL, NULL );
309 test_error( error, "Kernel execution failed" );
310
311 // Update the expected results
312 for( int j = 0; j < TEST_SIZE; j++ ) {
313 expectedResults[j] = (kernel_selection) ? expectedResults[j]+1 : expectedResults[j]*3;
314 expectedResultsOneDevice[i % deviceCount][j] = (kernel_selection) ? expectedResultsOneDevice[i % deviceCount][j]+1 : expectedResultsOneDevice[i % deviceCount][j]*3;
315 }
316
317 // Force the queue to finish so the next one will be in sync
318 error = clFinish(queues[i]);
319 test_error( error, "clFinish failed");
320 }
321 }
322
323 /* Read results */
324 int errors = 0;
325 for (int q = 0; q<(int)queueCount; q++) {
326 error = clEnqueueReadBuffer( queues[ q ], stream, CL_TRUE, 0, sizeof(cl_int)*TEST_SIZE, (char *)outputData, 0, NULL, NULL );
327 test_error( error, "Unable to get result data set" );
328
329 int errorsThisTime = 0;
330 /* Verify all of the data now */
331 for( i = 0; i < TEST_SIZE; i++ )
332 {
333 if( expectedResults[ i ] != outputData[ i ] )
334 {
335 log_error( "ERROR: Sample data did not verify for queue %d on device %ld (sample %d, expected %d, got %d)\n",
336 q, q % deviceCount, (int)i, expectedResults[ i ], outputData[ i ] );
337 for (size_t j=0; j<deviceCount; j++) {
338 if (expectedResultsOneDevice[j][i] == outputData[i])
339 log_info("Sample consistent with only device %ld having modified the data.\n", j);
340 }
341 errorsThisTime++;
342 break;
343 }
344 }
345 if (errorsThisTime)
346 errors++;
347 }
348
349 /* All done now! */
350 if (errors)
351 return -1;
352 return 0;
353 }
354
355
init_device_partition_test(cl_device_id parentDevice,cl_uint & maxComputeUnits,cl_uint & maxSubDevices)356 int init_device_partition_test(cl_device_id parentDevice, cl_uint &maxComputeUnits, cl_uint &maxSubDevices)
357 {
358 int err = clGetDeviceInfo(parentDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(maxComputeUnits), &maxComputeUnits, NULL);
359 test_error( err, "Unable to get maximal number of compute units" );
360 err = clGetDeviceInfo(parentDevice, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(maxSubDevices), &maxSubDevices, NULL);
361 test_error( err, "Unable to get maximal number of sub-devices" );
362
363 log_info("Maximal number of sub-devices on device %p is %d.\n", parentDevice, maxSubDevices );
364 return 0;
365 }
366
test_device_partition_type_support(cl_device_id parentDevice,const cl_device_partition_property partitionType,const cl_device_affinity_domain affinityDomain)367 int test_device_partition_type_support(cl_device_id parentDevice, const cl_device_partition_property partitionType, const cl_device_affinity_domain affinityDomain)
368 {
369 typedef std::vector< cl_device_partition_property > properties_t;
370 properties_t supportedProps( 3 ); // only 3 types defined in the spec (but implementation can define more)
371 size_t const propSize = sizeof( cl_device_partition_property ); // Size of one property in bytes.
372 size_t size; // size of all properties in bytes.
373 cl_int err;
374 size = 0;
375 err = clGetDeviceInfo( parentDevice, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL, & size );
376 if ( err == CL_SUCCESS ) {
377 if ( size % propSize != 0 ) {
378 log_error( "ERROR: clGetDeviceInfo: Bad size of returned partition properties (%llu), it must me a multiply of partition property size (%llu)\n", llu( size ), llu( propSize ) );
379 return -1;
380 }
381 supportedProps.resize( size / propSize );
382 size = 0;
383 err = clGetDeviceInfo( parentDevice, CL_DEVICE_PARTITION_PROPERTIES, supportedProps.size() * propSize, & supportedProps.front(), & size );
384 test_error_ret( err, "Unable to get device partition properties (2)", -1 );
385 } else if ( err == CL_INVALID_VALUE ) {
386 log_error( "ERROR: clGetDeviceInfo: CL_DEVICE_PARTITION_PROPERTIES is not supported.\n" );
387 return -1;
388 } else {
389 test_error_ret( err, "Unable to get device partition properties (1)", -1 );
390 };
391 for (size_t i = 0; i < supportedProps.size(); i++)
392 {
393 if (supportedProps[i] == partitionType)
394 {
395 if (partitionType == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
396 {
397 cl_device_affinity_domain supportedAffinityDomain;
398 err = clGetDeviceInfo(parentDevice, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(supportedAffinityDomain), &supportedAffinityDomain, NULL);
399 test_error( err, "Unable to get supported affinity domains" );
400 if (supportedAffinityDomain & affinityDomain)
401 return 0;
402 }
403 else
404 return 0;
405 }
406 }
407
408 return -1;
409 }
410
test_partition_of_device(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,cl_device_partition_property * partition_type,cl_uint starting_property,cl_uint ending_property)411 int test_partition_of_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_device_partition_property *partition_type,
412 cl_uint starting_property, cl_uint ending_property)
413 {
414 cl_uint maxComputeUnits;
415 cl_uint maxSubDevices; // maximal number of sub-devices that can be created in one call to clCreateSubDevices
416 int err = 0;
417
418 if (init_device_partition_test(deviceID, maxComputeUnits, maxSubDevices) != 0)
419 return -1;
420
421 if (maxComputeUnits <= 1)
422 return 0;
423 // confirm that this devices reports how it was partitioned
424 if (partition_type != NULL)
425 { // if we're not the root device
426 size_t psize;
427 err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_TYPE, 0, NULL, &psize);
428 test_error( err, "Unable to get CL_DEVICE_PARTITION_TYPE" );
429 cl_device_partition_property *properties_returned = (cl_device_partition_property *)alloca(psize);
430 err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_TYPE, psize, (void *) properties_returned, NULL);
431 test_error( err, "Unable to get CL_DEVICE_PARTITION_TYPE" );
432
433 // test returned type
434 for (cl_uint i = 0;i < psize / sizeof(cl_device_partition_property);i++) {
435 if (properties_returned[i] != partition_type[i]) {
436 if (!(partition_type[0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN &&
437 i == 1 && partition_type[1] == CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE &&
438 (properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_NUMA ||
439 properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE ||
440 properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE ||
441 properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE ||
442 properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE))) {
443 log_error("properties_returned[%d] 0x%x != 0x%x partition_type[%d].", i, properties_returned[i], partition_type[i], i);
444 return -1;
445 }
446 }
447 } // for
448 }
449
450 #define PROPERTY_TYPES 8
451 cl_device_partition_property partitionProp[PROPERTY_TYPES][5] = {
452 { CL_DEVICE_PARTITION_EQUALLY, maxComputeUnits / 2, 0, 0, 0 } ,
453 { CL_DEVICE_PARTITION_BY_COUNTS, 1, maxComputeUnits - 1, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, 0 } ,
454 { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NUMA, 0, 0, 0 } ,
455 { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE, 0, 0, 0 } ,
456 { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE, 0, 0, 0 } ,
457 { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE, 0, 0, 0 } ,
458 { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE, 0, 0, 0 } ,
459 { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, 0, 0, 0 }
460 };
461
462 // loop thru each type, creating sub-devices for each type
463 for (cl_uint i = starting_property;i < ending_property;i++) {
464
465 if (test_device_partition_type_support(deviceID, partitionProp[i][0], partitionProp[i][1]) != 0)
466 {
467 if (partitionProp[i][0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
468 {
469 log_info( "Device partition type \"%s\" \"%s\" is not supported on device %p. Skipping test...\n",
470 printPartition(partitionProp[i][0]),
471 printAffinity(partitionProp[i][1]), deviceID);
472 }
473 else
474 {
475 log_info( "Device partition type \"%s\" is not supported on device %p. Skipping test...\n",
476 printPartition(partitionProp[i][0]), deviceID);
477 }
478 continue;
479 }
480
481 if (partitionProp[i][0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
482 {
483 log_info("Testing on device %p partition type \"%s\" \"%s\"\n", deviceID, printPartition(partitionProp[i][0]),
484 printAffinity(partitionProp[i][1]));
485 }
486 else
487 {
488 log_info("Testing on device %p partition type \"%s\" (%d,%d)\n", deviceID, printPartition(partitionProp[i][0]),
489 partitionProp[i][1], partitionProp[i][2]);
490 }
491
492 cl_uint deviceCount;
493
494 // how many sub-devices can we create?
495 err = clCreateSubDevices(deviceID, partitionProp[i], 0, NULL, &deviceCount);
496 if ( err == CL_DEVICE_PARTITION_FAILED ) {
497 log_info( "The device %p could not be further partitioned.\n", deviceID );
498 continue;
499 }
500 test_error( err, "Failed to get number of sub-devices" );
501
502 // get the list of subDevices
503 // create room for 1 more device_id, so that we can put the parent device in there.
504 cl_device_id *subDevices = (cl_device_id*)alloca(sizeof(cl_device_id) * (deviceCount + 1));
505 err = clCreateSubDevices(deviceID, partitionProp[i], deviceCount, subDevices, &deviceCount);
506 test_error( err, "Actual creation of sub-devices failed" );
507
508 log_info("Testing on all devices in context\n");
509 err = test_device_set(deviceCount, deviceCount, subDevices, num_elements);
510 if (err == 0)
511 {
512 log_info("Testing on a parent device for context\n");
513
514 // add the parent device
515 subDevices[deviceCount] = deviceID;
516 err = test_device_set(deviceCount + 1, deviceCount, subDevices, num_elements, &deviceID);
517 }
518 if (err != 0)
519 {
520 printf("error! returning %d\n",err);
521 return err;
522 }
523
524 // now, recurse and test the FIRST of these sub-devices, to make sure it can be further partitioned
525 err = test_partition_of_device(subDevices[0], context, queue, num_elements, partitionProp[i], starting_property, ending_property);
526 if (err != 0)
527 {
528 printf("error! returning %d\n",err);
529 return err;
530 }
531
532 for (cl_uint j=0;j < deviceCount;j++)
533 {
534 err = clReleaseDevice(subDevices[j]);
535 test_error( err, "\n Releasing sub-device failed \n" );
536 }
537
538 } // for
539
540 log_info("Testing on all device %p finished\n", deviceID);
541 return 0;
542 }
543
544
test_partition_equally(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)545 int test_partition_equally(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
546 {
547 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 0, 1);
548 }
549
test_partition_by_counts(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)550 int test_partition_by_counts(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
551 {
552 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 1, 2);
553 }
554
test_partition_by_affinity_domain_numa(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)555 int test_partition_by_affinity_domain_numa(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
556 {
557 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 2, 3);
558 }
559
test_partition_by_affinity_domain_l4_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)560 int test_partition_by_affinity_domain_l4_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
561 {
562 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 3, 4);
563 }
564
test_partition_by_affinity_domain_l3_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)565 int test_partition_by_affinity_domain_l3_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
566 {
567 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 4, 5);
568 }
569
test_partition_by_affinity_domain_l2_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)570 int test_partition_by_affinity_domain_l2_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
571 {
572 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 5, 6);
573 }
574
test_partition_by_affinity_domain_l1_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)575 int test_partition_by_affinity_domain_l1_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
576 {
577 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 6, 7);
578 }
579
test_partition_by_affinity_domain_next_partitionable(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)580 int test_partition_by_affinity_domain_next_partitionable(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
581 {
582 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 7, 8);
583 }
584
test_partition_all(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)585 int test_partition_all(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
586 {
587 return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 0, 8);
588 }
589