xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/buffers/test_sub_buffers.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 
18 #include <algorithm>
19 
20 // Design:
21 // To test sub buffers, we first create one main buffer. We then create several sub-buffers and
22 // queue Actions on each one. Each Action is encapsulated in a class so it can keep track of
23 // what results it expects, and so we can test scaling degrees of Actions on scaling numbers of
24 // sub-buffers.
25 
26 class SubBufferWrapper : public clMemWrapper
27 {
28 public:
29     cl_mem mParentBuffer;
30     size_t mOrigin;
31     size_t mSize;
32 
Allocate(cl_mem parent,cl_mem_flags flags,size_t origin,size_t size)33     cl_int Allocate( cl_mem parent, cl_mem_flags flags, size_t origin, size_t size )
34     {
35         mParentBuffer = parent;
36         mOrigin = origin;
37         mSize = size;
38 
39         cl_buffer_region region;
40         region.origin = mOrigin;
41         region.size = mSize;
42 
43         cl_int error;
44         reset(clCreateSubBuffer(mParentBuffer, flags,
45                                 CL_BUFFER_CREATE_TYPE_REGION, &region, &error));
46         return error;
47     }
48 };
49 
50 class Action
51 {
52 public:
~Action()53     virtual ~Action() {}
54     virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) = 0;
55     virtual const char * GetName( void ) const = 0;
56 
57     static MTdata d;
GetRandSeed(void)58     static MTdata GetRandSeed( void )
59     {
60         if ( d == 0 )
61             d = init_genrand( gRandomSeed );
62         return d;
63     }
FreeRandSeed()64     static void FreeRandSeed() {
65         if ( d != 0 ) {
66             free_mtdata(d);
67             d = 0;
68         }
69     }
70 };
71 
72 MTdata Action::d = 0;
73 
74 class ReadWriteAction : public Action
75 {
76 public:
~ReadWriteAction()77     virtual ~ReadWriteAction() {}
GetName(void) const78     virtual const char * GetName( void ) const { return "ReadWrite";}
79 
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)80     virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
81     {
82         cl_char *tempBuffer = (cl_char*)malloc(buffer1.mSize);
83         if (!tempBuffer) {
84             log_error("Out of memory\n");
85             return -1;
86         }
87         cl_int error = clEnqueueReadBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
88         test_error( error, "Unable to enqueue buffer read" );
89 
90         size_t start = get_random_size_t( 0, buffer1.mSize / 2, GetRandSeed() );
91         size_t end = get_random_size_t( start, buffer1.mSize, GetRandSeed() );
92 
93         for ( size_t i = start; i < end; i++ )
94         {
95             tempBuffer[ i ] |= tag;
96             parentBufferState[ i + buffer1.mOrigin ] |= tag;
97         }
98 
99         error = clEnqueueWriteBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
100         test_error( error, "Unable to enqueue buffer write" );
101         free(tempBuffer);
102         return CL_SUCCESS;
103     }
104 };
105 
106 class CopyAction : public Action
107 {
108 public:
~CopyAction()109     virtual ~CopyAction() {}
GetName(void) const110     virtual const char * GetName( void ) const { return "Copy";}
111 
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)112     virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
113     {
114         // Copy from sub-buffer 1 to sub-buffer 2
115         size_t size = get_random_size_t(
116             0, std::min(buffer1.mSize, buffer2.mSize), GetRandSeed());
117 
118         size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
119         size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() );
120 
121         cl_int error = clEnqueueCopyBuffer( queue, buffer1, buffer2, startOffset, endOffset, size, 0, NULL, NULL );
122         test_error( error, "Unable to enqueue buffer copy" );
123 
124         memcpy( parentBufferState + buffer2.mOrigin + endOffset, parentBufferState + buffer1.mOrigin + startOffset, size );
125 
126         return CL_SUCCESS;
127     }
128 };
129 
130 class MapAction : public Action
131 {
132 public:
~MapAction()133     virtual ~MapAction() {}
GetName(void) const134     virtual const char * GetName( void ) const { return "Map";}
135 
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)136     virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
137     {
138         size_t size = get_random_size_t( 0, buffer1.mSize, GetRandSeed() );
139         size_t start = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
140 
141         cl_int error;
142         void * mappedPtr = clEnqueueMapBuffer( queue, buffer1, CL_TRUE, (cl_map_flags)( CL_MAP_READ | CL_MAP_WRITE ),
143                                                start, size, 0, NULL, NULL, &error );
144         test_error( error, "Unable to map buffer" );
145 
146         cl_char *cPtr = (cl_char *)mappedPtr;
147         for ( size_t i = 0; i < size; i++ )
148         {
149             cPtr[ i ] |= tag;
150             parentBufferState[ i + start + buffer1.mOrigin ] |= tag;
151         }
152 
153         error = clEnqueueUnmapMemObject( queue, buffer1, mappedPtr, 0, NULL, NULL );
154         test_error( error, "Unable to unmap buffer" );
155 
156         return CL_SUCCESS;
157     }
158 };
159 
160 class KernelReadWriteAction : public Action
161 {
162 public:
~KernelReadWriteAction()163     virtual ~KernelReadWriteAction() {}
GetName(void) const164     virtual const char * GetName( void ) const { return "KernelReadWrite";}
165 
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)166     virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
167     {
168         const char *kernelCode[] = {
169             "__kernel void readTest( __global char *inBuffer, char tag )\n"
170             "{\n"
171             "    int tid = get_global_id(0);\n"
172             "    inBuffer[ tid ] |= tag;\n"
173             "}\n" };
174 
175         clProgramWrapper program;
176         clKernelWrapper kernel;
177         cl_int error;
178 
179         if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
180         {
181             return -1;
182         }
183 
184         size_t threads[1] = { buffer1.mSize };
185 
186         error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &buffer1 );
187         test_error( error, "Unable to set kernel argument" );
188         error = clSetKernelArg( kernel, 1, sizeof( tag ), &tag );
189         test_error( error, "Unable to set kernel argument" );
190 
191         error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
192         test_error( error, "Unable to queue kernel" );
193 
194         for ( size_t i = 0; i < buffer1.mSize; i++ )
195             parentBufferState[ i + buffer1.mOrigin ] |= tag;
196 
197         return CL_SUCCESS;
198     }
199 };
200 
get_reasonable_buffer_size(cl_device_id device,size_t & outSize)201 cl_int get_reasonable_buffer_size( cl_device_id device, size_t &outSize )
202 {
203     cl_ulong maxAllocSize;
204     cl_int error;
205 
206     // Get the largest possible buffer we could allocate
207     error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
208     test_error( error, "Unable to get max alloc size" );
209 
210     // Don't create a buffer quite that big, just so we have some space left over for other work
211     outSize = (size_t)( maxAllocSize / 5 );
212 
213     // Cap at 32M so tests complete in a reasonable amount of time.
214     if ( outSize > 32 << 20 )
215         outSize = 32 << 20;
216 
217     return CL_SUCCESS;
218 }
219 
find_subbuffer_by_index(SubBufferWrapper * subBuffers,size_t numSubBuffers,size_t index)220 size_t find_subbuffer_by_index( SubBufferWrapper * subBuffers, size_t numSubBuffers, size_t index )
221 {
222     for ( size_t i = 0; i < numSubBuffers; i++ )
223     {
224         if ( subBuffers[ i ].mOrigin > index )
225             return numSubBuffers;
226         if ( ( subBuffers[ i ].mOrigin <= index ) && ( ( subBuffers[ i ].mOrigin + subBuffers[ i ].mSize ) > index ) )
227             return i;
228     }
229     return numSubBuffers;
230 }
231 
232 // This tests the read/write capabilities of sub buffers (if we are read/write, the sub buffers
233 // can't overlap)
test_sub_buffers_read_write_core(cl_context context,cl_command_queue queueA,cl_command_queue queueB,size_t mainSize,size_t addressAlign)234 int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queueA, cl_command_queue queueB, size_t mainSize, size_t addressAlign )
235 {
236     clMemWrapper mainBuffer;
237     SubBufferWrapper subBuffers[ 8 ];
238     size_t numSubBuffers;
239     cl_int error;
240     size_t i;
241     MTdata m = init_genrand( 22 );
242 
243 
244     cl_char * mainBufferContents = (cl_char*)calloc(1,mainSize);
245     cl_char * actualResults      = (cl_char*)calloc(1,mainSize);
246 
247     for ( i = 0; i < mainSize / 4; i++ )
248         ((cl_uint*) mainBufferContents)[i] = genrand_int32(m);
249 
250     free_mtdata( m );
251 
252     // Create the main buffer to test against
253     mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, mainSize, mainBufferContents, &error );
254     test_error( error, "Unable to create test main buffer" );
255 
256     // Create some sub-buffers to use
257     size_t toStartFrom = 0;
258     for ( numSubBuffers = 0; numSubBuffers < 8; numSubBuffers++ )
259     {
260         size_t endRange = toStartFrom + ( mainSize / 4 );
261         if ( endRange > mainSize )
262             endRange = mainSize;
263 
264         size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign;
265         size_t size =
266             get_random_size_t(
267                 1, (std::min(mainSize / 8, mainSize - offset)) / addressAlign,
268                 Action::GetRandSeed())
269             * addressAlign;
270         error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size );
271         test_error( error, "Unable to allocate sub buffer" );
272 
273         toStartFrom = offset + size;
274         if ( toStartFrom > ( mainSize - ( addressAlign * 256 ) ) )
275             break;
276     }
277 
278     ReadWriteAction rwAction;
279     MapAction mapAction;
280     CopyAction copyAction;
281     KernelReadWriteAction kernelAction;
282 
283     Action * actions[] = { &rwAction, &mapAction, &copyAction, &kernelAction };
284     int numErrors = 0;
285 
286     // Do the following steps twice, to make sure the parent gets updated *and* we can
287     // still work on the sub-buffers
288     cl_command_queue prev_queue = queueA;
289     for ( int time = 0; time < 2; time++ )
290     {
291         // Randomly apply actions to the set of sub buffers
292         size_t i;
293         for (  i = 0; i < 64; i++ )
294         {
295             int which = random_in_range( 0, 3, Action::GetRandSeed() );
296             int whichQueue = random_in_range( 0, 1, Action::GetRandSeed() );
297             int whichBufferA = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
298             int whichBufferB;
299             do
300             {
301                 whichBufferB = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
302             } while ( whichBufferB == whichBufferA );
303 
304             cl_command_queue queue = ( whichQueue == 1 ) ? queueB : queueA;
305             if (queue != prev_queue) {
306                 error = clFinish( prev_queue );
307                 test_error( error, "Error finishing other queue." );
308 
309                 prev_queue = queue;
310             }
311 
312             error = actions[ which ]->Execute( context, queue, (cl_int)i, subBuffers[ whichBufferA ], subBuffers[ whichBufferB ], mainBufferContents );
313             test_error( error, "Unable to execute action against sub buffers" );
314         }
315 
316         error = clFinish( queueA );
317         test_error( error, "Error finishing queueA." );
318 
319         error = clFinish( queueB );
320         test_error( error, "Error finishing queueB." );
321 
322         // Validate by reading the final contents of the main buffer and
323         // validating against our ref copy we generated
324         error = clEnqueueReadBuffer( queueA, mainBuffer, CL_TRUE, 0, mainSize, actualResults, 0, NULL, NULL );
325         test_error( error, "Unable to enqueue buffer read" );
326 
327         for ( i = 0; i < mainSize; i += 65536 )
328         {
329             size_t left = 65536;
330             if ( ( i + left ) > mainSize )
331                 left = mainSize - i;
332 
333             if ( memcmp( actualResults + i, mainBufferContents + i, left ) == 0 )
334                 continue;
335 
336             // The fast compare failed, so we need to determine where exactly the failure is
337 
338             for ( size_t j = 0; j < left; j++ )
339             {
340                 if ( actualResults[ i + j ] != mainBufferContents[ i + j ] )
341                 {
342                     // Hit a failure; report the subbuffer at this address as having failed
343                     size_t sbThatFailed = find_subbuffer_by_index( subBuffers, numSubBuffers, i + j );
344                     if ( sbThatFailed == numSubBuffers )
345                     {
346                         log_error( "ERROR: Validation failure outside of a sub-buffer! (Shouldn't be possible, but it happened at index %ld out of %ld...)\n", i + j, mainSize );
347                         // Since this is a nonsensical, don't bother continuing to check
348                         // (we will, however, print our map of sub-buffers for comparison)
349                         for ( size_t k = 0; k < numSubBuffers; k++ )
350                         {
351                             log_error( "\tBuffer %ld: %ld to %ld (length %ld)\n", k, subBuffers[ k ].mOrigin, subBuffers[ k ].mOrigin + subBuffers[ k ].mSize, subBuffers[ k ].mSize );
352                         }
353                         return -1;
354                     }
355                     log_error( "ERROR: Validation failure on sub-buffer %ld (start: %ld, length: %ld)\n", sbThatFailed, subBuffers[ sbThatFailed ].mOrigin, subBuffers[ sbThatFailed ].mSize );
356                     size_t newPos = subBuffers[ sbThatFailed ].mOrigin + subBuffers[ sbThatFailed ].mSize - 1;
357                     i = newPos & ~65535;
358                     j = newPos - i;
359                     numErrors++;
360                 }
361             }
362         }
363     }
364 
365     free(mainBufferContents);
366     free(actualResults);
367     Action::FreeRandSeed();
368 
369     return numErrors;
370 }
371 
test_sub_buffers_read_write(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)372 int test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
373 {
374     cl_int error;
375     size_t mainSize;
376     cl_uint addressAlignBits;
377 
378     // Get the size of the main buffer to use
379     error = get_reasonable_buffer_size( deviceID, mainSize );
380     test_error( error, "Unable to get reasonable buffer size" );
381 
382     // Determine the alignment of the device so we can make sure sub buffers are valid
383     error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlignBits ), &addressAlignBits, NULL );
384     test_error( error, "Unable to get device's address alignment" );
385 
386     size_t addressAlign = addressAlignBits/8;
387 
388     return test_sub_buffers_read_write_core( context, queue, queue, mainSize, addressAlign );
389 }
390 
391 // This test performs the same basic operations as sub_buffers_read_write, but instead of a single
392 // device, it creates a context and buffer shared between two devices, then executes commands
393 // on queues for each device to ensure that everything still operates as expected.
test_sub_buffers_read_write_dual_devices(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)394 int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
395 {
396     cl_int error;
397 
398 
399     // First obtain the second device
400     cl_device_id otherDevice = GetOpposingDevice( deviceID );
401     if ( otherDevice == NULL )
402     {
403         log_error( "ERROR: Unable to obtain a second device for sub-buffer dual-device test.\n" );
404         return -1;
405     }
406     if ( otherDevice == deviceID )
407     {
408         log_info( "Note: Unable to run dual-device sub-buffer test (only one device available). Skipping test (implicitly passing).\n" );
409         return 0;
410     }
411 
412     // Determine the device id.
413     size_t param_size;
414     error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, 0, NULL, &param_size );
415     test_error( error, "Error obtaining device name" );
416 
417 #if !(defined(_WIN32) && defined(_MSC_VER))
418     char device_name[param_size];
419 #else
420     char* device_name = (char*)_malloca(param_size);
421 #endif
422     error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, param_size, &device_name[0], NULL );
423     test_error( error, "Error obtaining device name" );
424 
425     log_info( "\tOther device obtained for dual device test is type %s\n", device_name );
426 
427     // Create a shared context for these two devices
428     cl_device_id devices[ 2 ] = { deviceID, otherDevice };
429     clContextWrapper testingContext = clCreateContext( NULL, 2, devices, NULL, NULL, &error );
430     test_error( error, "Unable to create shared context" );
431 
432     // Create two queues (can't use the existing one, because it's on the wrong context)
433     clCommandQueueWrapper queue1 = clCreateCommandQueue( testingContext, deviceID, 0, &error );
434     test_error( error, "Unable to create command queue on main device" );
435 
436     clCommandQueueWrapper queue2 = clCreateCommandQueue( testingContext, otherDevice, 0, &error );
437     test_error( error, "Unable to create command queue on secondary device" );
438 
439     // Determine the reasonable buffer size and address alignment that applies to BOTH devices
440     size_t maxBuffer1, maxBuffer2;
441     error = get_reasonable_buffer_size( deviceID, maxBuffer1 );
442     test_error( error, "Unable to get buffer size for main device" );
443 
444     error = get_reasonable_buffer_size( otherDevice, maxBuffer2 );
445     test_error( error, "Unable to get buffer size for secondary device" );
446     maxBuffer1 = std::min(maxBuffer1, maxBuffer2);
447 
448     cl_uint addressAlign1Bits, addressAlign2Bits;
449     error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL );
450     test_error( error, "Unable to get main device's address alignment" );
451 
452     error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL );
453     test_error( error, "Unable to get secondary device's address alignment" );
454 
455     cl_uint addressAlign1 = std::max(addressAlign1Bits, addressAlign2Bits) / 8;
456 
457     // Finally time to run!
458     return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
459 }
460 
read_buffer_via_kernel(cl_context context,cl_command_queue queue,cl_mem buffer,size_t length,cl_char * outResults)461 cl_int read_buffer_via_kernel( cl_context context, cl_command_queue queue, cl_mem buffer, size_t length, cl_char *outResults )
462 {
463     const char *kernelCode[] = {
464         "__kernel void readTest( __global char *inBuffer, __global char *outBuffer )\n"
465         "{\n"
466         "    int tid = get_global_id(0);\n"
467         "    outBuffer[ tid ] = inBuffer[ tid ];\n"
468         "}\n" };
469 
470     clProgramWrapper program;
471     clKernelWrapper kernel;
472     cl_int error;
473 
474     if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
475     {
476         return -1;
477     }
478 
479     size_t threads[1] = { length };
480 
481     clMemWrapper outStream = clCreateBuffer( context, CL_MEM_READ_WRITE, length, NULL, &error );
482     test_error( error, "Unable to create output stream" );
483 
484     error = clSetKernelArg( kernel, 0, sizeof( buffer ), &buffer );
485     test_error( error, "Unable to set kernel argument" );
486     error = clSetKernelArg( kernel, 1, sizeof( outStream ), &outStream );
487     test_error( error, "Unable to set kernel argument" );
488 
489     error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
490     test_error( error, "Unable to queue kernel" );
491 
492     error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, length, outResults, 0, NULL, NULL );
493     test_error( error, "Unable to read results from kernel" );
494 
495     return CL_SUCCESS;
496 }
497 
498 
test_sub_buffers_overlapping(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)499 int test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
500 {
501     cl_int error;
502     size_t mainSize;
503     cl_uint addressAlign;
504 
505     clMemWrapper mainBuffer;
506     SubBufferWrapper subBuffers[ 16 ];
507 
508 
509     // Create the main buffer to test against
510     error = get_reasonable_buffer_size( deviceID, mainSize );
511     test_error( error, "Unable to get reasonable buffer size" );
512 
513     mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, mainSize, NULL, &error );
514     test_error( error, "Unable to create test main buffer" );
515 
516     // Determine the alignment of the device so we can make sure sub buffers are valid
517     error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign ), &addressAlign, NULL );
518     test_error( error, "Unable to get device's address alignment" );
519 
520     // Create some sub-buffers to use. Note: they don't have to not overlap (we actually *want* them to overlap)
521     for ( size_t i = 0; i < 16; i++ )
522     {
523         size_t offset = get_random_size_t( 0, mainSize / addressAlign, Action::GetRandSeed() ) * addressAlign;
524         size_t size = get_random_size_t( 1, ( mainSize - offset ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
525 
526         error = subBuffers[ i ].Allocate( mainBuffer, CL_MEM_READ_ONLY, offset, size );
527         test_error( error, "Unable to allocate sub buffer" );
528     }
529 
530     /// For logging, we determine the amount of overlap we just generated
531     // Build a fast in-out map to help with generating the stats
532     int sbMap[ 32 ], mapSize = 0;
533     for ( int i = 0; i < 16; i++ )
534     {
535         int j;
536         for ( j = 0; j < mapSize; j++ )
537         {
538             size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
539                         : subBuffers[ sbMap[ j ] ].mOrigin;
540             if ( subBuffers[ i ].mOrigin < pt )
541             {
542                 // Origin is before this part of the map, so move map forward so we can insert
543                 memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
544                 sbMap[ j ] = i;
545                 mapSize++;
546                 break;
547             }
548         }
549         if ( j == mapSize )
550         {
551             sbMap[ j ] = i;
552             mapSize++;
553         }
554 
555         size_t endPt = subBuffers[ i ].mOrigin + subBuffers[ i ].mSize;
556         for ( j = 0; j < mapSize; j++ )
557         {
558             size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
559                         : subBuffers[ sbMap[ j ] ].mOrigin;
560             if ( endPt < pt )
561             {
562                 // Origin is before this part of the map, so move map forward so we can insert
563                 memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
564                 sbMap[ j ] = -( i + 1 );
565                 mapSize++;
566                 break;
567             }
568         }
569         if ( j == mapSize )
570         {
571             sbMap[ j ] = -( i + 1 );
572             mapSize++;
573         }
574     }
575     long long delta = 0;
576     size_t maxOverlap = 1, overlap = 0;
577     for ( int i = 0; i < 32; i++ )
578     {
579         if ( sbMap[ i ] >= 0 )
580         {
581             overlap++;
582             if ( overlap > 1 )
583                 delta -= (long long)( subBuffers[ sbMap[ i ] ].mOrigin );
584             if ( overlap > maxOverlap )
585                 maxOverlap = overlap;
586         }
587         else
588         {
589             if ( overlap > 1 )
590                 delta += (long long)( subBuffers[ -sbMap[ i ] - 1 ].mOrigin + subBuffers[ -sbMap[ i ] - 1 ].mSize );
591             overlap--;
592         }
593     }
594 
595     log_info( "\tTesting %d sub-buffers with %lld overlapping Kbytes (%d%%; as many as %ld buffers overlapping at once)\n",
596               16, ( delta / 1024LL ), (int)( delta * 100LL / (long long)mainSize ), maxOverlap );
597 
598     // Write some random contents to the main buffer
599     cl_char * contents = new cl_char[ mainSize ];
600     generate_random_data( kChar, mainSize, Action::GetRandSeed(), contents );
601 
602     error = clEnqueueWriteBuffer( queue, mainBuffer, CL_TRUE, 0, mainSize, contents, 0, NULL, NULL );
603     test_error( error, "Unable to write to main buffer" );
604 
605     // Now read from each sub-buffer and check to make sure that they make sense w.r.t. the main contents
606     cl_char * tempBuffer = new cl_char[ mainSize ];
607 
608     int numErrors = 0;
609     for ( size_t i = 0; i < 16; i++ )
610     {
611         // Read from this buffer
612         int which = random_in_range( 0, 1, Action::GetRandSeed() );
613         if ( which )
614             error = clEnqueueReadBuffer( queue, subBuffers[ i ], CL_TRUE, 0, subBuffers[ i ].mSize, tempBuffer, 0, NULL, NULL );
615         else
616             error = read_buffer_via_kernel( context, queue, subBuffers[ i ], subBuffers[ i ].mSize, tempBuffer );
617         test_error( error, "Unable to read sub buffer contents" );
618 
619         if ( memcmp( tempBuffer, contents + subBuffers[ i ].mOrigin, subBuffers[ i ].mSize ) != 0 )
620         {
621             log_error( "ERROR: Validation for sub-buffer %ld failed!\n", i );
622             numErrors++;
623         }
624     }
625 
626     delete [] contents;
627     delete [] tempBuffer;
628     Action::FreeRandSeed();
629 
630     return numErrors;
631 }
632 
633