1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17
18 #include <algorithm>
19
20 // Design:
21 // To test sub buffers, we first create one main buffer. We then create several sub-buffers and
22 // queue Actions on each one. Each Action is encapsulated in a class so it can keep track of
23 // what results it expects, and so we can test scaling degrees of Actions on scaling numbers of
24 // sub-buffers.
25
26 class SubBufferWrapper : public clMemWrapper
27 {
28 public:
29 cl_mem mParentBuffer;
30 size_t mOrigin;
31 size_t mSize;
32
Allocate(cl_mem parent,cl_mem_flags flags,size_t origin,size_t size)33 cl_int Allocate( cl_mem parent, cl_mem_flags flags, size_t origin, size_t size )
34 {
35 mParentBuffer = parent;
36 mOrigin = origin;
37 mSize = size;
38
39 cl_buffer_region region;
40 region.origin = mOrigin;
41 region.size = mSize;
42
43 cl_int error;
44 reset(clCreateSubBuffer(mParentBuffer, flags,
45 CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error));
46 return error;
47 }
48 };
49
50 class Action
51 {
52 public:
~Action()53 virtual ~Action() {}
54 virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) = 0;
55 virtual const char * GetName( void ) const = 0;
56
57 static MTdata d;
GetRandSeed(void)58 static MTdata GetRandSeed( void )
59 {
60 if ( d == 0 )
61 d = init_genrand( gRandomSeed );
62 return d;
63 }
FreeRandSeed()64 static void FreeRandSeed() {
65 if ( d != 0 ) {
66 free_mtdata(d);
67 d = 0;
68 }
69 }
70 };
71
72 MTdata Action::d = 0;
73
74 class ReadWriteAction : public Action
75 {
76 public:
~ReadWriteAction()77 virtual ~ReadWriteAction() {}
GetName(void) const78 virtual const char * GetName( void ) const { return "ReadWrite";}
79
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)80 virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
81 {
82 cl_char *tempBuffer = (cl_char*)malloc(buffer1.mSize);
83 if (!tempBuffer) {
84 log_error("Out of memory\n");
85 return -1;
86 }
87 cl_int error = clEnqueueReadBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
88 test_error( error, "Unable to enqueue buffer read" );
89
90 size_t start = get_random_size_t( 0, buffer1.mSize / 2, GetRandSeed() );
91 size_t end = get_random_size_t( start, buffer1.mSize, GetRandSeed() );
92
93 for ( size_t i = start; i < end; i++ )
94 {
95 tempBuffer[ i ] |= tag;
96 parentBufferState[ i + buffer1.mOrigin ] |= tag;
97 }
98
99 error = clEnqueueWriteBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
100 test_error( error, "Unable to enqueue buffer write" );
101 free(tempBuffer);
102 return CL_SUCCESS;
103 }
104 };
105
106 class CopyAction : public Action
107 {
108 public:
~CopyAction()109 virtual ~CopyAction() {}
GetName(void) const110 virtual const char * GetName( void ) const { return "Copy";}
111
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)112 virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
113 {
114 // Copy from sub-buffer 1 to sub-buffer 2
115 size_t size = get_random_size_t(
116 0, std::min(buffer1.mSize, buffer2.mSize), GetRandSeed());
117
118 size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
119 size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() );
120
121 cl_int error = clEnqueueCopyBuffer( queue, buffer1, buffer2, startOffset, endOffset, size, 0, NULL, NULL );
122 test_error( error, "Unable to enqueue buffer copy" );
123
124 memcpy( parentBufferState + buffer2.mOrigin + endOffset, parentBufferState + buffer1.mOrigin + startOffset, size );
125
126 return CL_SUCCESS;
127 }
128 };
129
130 class MapAction : public Action
131 {
132 public:
~MapAction()133 virtual ~MapAction() {}
GetName(void) const134 virtual const char * GetName( void ) const { return "Map";}
135
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)136 virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
137 {
138 size_t size = get_random_size_t( 0, buffer1.mSize, GetRandSeed() );
139 size_t start = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
140
141 cl_int error;
142 void * mappedPtr = clEnqueueMapBuffer( queue, buffer1, CL_TRUE, (cl_map_flags)( CL_MAP_READ | CL_MAP_WRITE ),
143 start, size, 0, NULL, NULL, &error );
144 test_error( error, "Unable to map buffer" );
145
146 cl_char *cPtr = (cl_char *)mappedPtr;
147 for ( size_t i = 0; i < size; i++ )
148 {
149 cPtr[ i ] |= tag;
150 parentBufferState[ i + start + buffer1.mOrigin ] |= tag;
151 }
152
153 error = clEnqueueUnmapMemObject( queue, buffer1, mappedPtr, 0, NULL, NULL );
154 test_error( error, "Unable to unmap buffer" );
155
156 return CL_SUCCESS;
157 }
158 };
159
160 class KernelReadWriteAction : public Action
161 {
162 public:
~KernelReadWriteAction()163 virtual ~KernelReadWriteAction() {}
GetName(void) const164 virtual const char * GetName( void ) const { return "KernelReadWrite";}
165
Execute(cl_context context,cl_command_queue queue,cl_char tag,SubBufferWrapper & buffer1,SubBufferWrapper & buffer2,cl_char * parentBufferState)166 virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
167 {
168 const char *kernelCode[] = {
169 "__kernel void readTest( __global char *inBuffer, char tag )\n"
170 "{\n"
171 " int tid = get_global_id(0);\n"
172 " inBuffer[ tid ] |= tag;\n"
173 "}\n" };
174
175 clProgramWrapper program;
176 clKernelWrapper kernel;
177 cl_int error;
178
179 if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
180 {
181 return -1;
182 }
183
184 size_t threads[1] = { buffer1.mSize };
185
186 error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &buffer1 );
187 test_error( error, "Unable to set kernel argument" );
188 error = clSetKernelArg( kernel, 1, sizeof( tag ), &tag );
189 test_error( error, "Unable to set kernel argument" );
190
191 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
192 test_error( error, "Unable to queue kernel" );
193
194 for ( size_t i = 0; i < buffer1.mSize; i++ )
195 parentBufferState[ i + buffer1.mOrigin ] |= tag;
196
197 return CL_SUCCESS;
198 }
199 };
200
get_reasonable_buffer_size(cl_device_id device,size_t & outSize)201 cl_int get_reasonable_buffer_size( cl_device_id device, size_t &outSize )
202 {
203 cl_ulong maxAllocSize;
204 cl_int error;
205
206 // Get the largest possible buffer we could allocate
207 error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
208 test_error( error, "Unable to get max alloc size" );
209
210 // Don't create a buffer quite that big, just so we have some space left over for other work
211 outSize = (size_t)( maxAllocSize / 5 );
212
213 // Cap at 32M so tests complete in a reasonable amount of time.
214 if ( outSize > 32 << 20 )
215 outSize = 32 << 20;
216
217 return CL_SUCCESS;
218 }
219
find_subbuffer_by_index(SubBufferWrapper * subBuffers,size_t numSubBuffers,size_t index)220 size_t find_subbuffer_by_index( SubBufferWrapper * subBuffers, size_t numSubBuffers, size_t index )
221 {
222 for ( size_t i = 0; i < numSubBuffers; i++ )
223 {
224 if ( subBuffers[ i ].mOrigin > index )
225 return numSubBuffers;
226 if ( ( subBuffers[ i ].mOrigin <= index ) && ( ( subBuffers[ i ].mOrigin + subBuffers[ i ].mSize ) > index ) )
227 return i;
228 }
229 return numSubBuffers;
230 }
231
232 // This tests the read/write capabilities of sub buffers (if we are read/write, the sub buffers
233 // can't overlap)
test_sub_buffers_read_write_core(cl_context context,cl_command_queue queueA,cl_command_queue queueB,size_t mainSize,size_t addressAlign)234 int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queueA, cl_command_queue queueB, size_t mainSize, size_t addressAlign )
235 {
236 clMemWrapper mainBuffer;
237 SubBufferWrapper subBuffers[ 8 ];
238 size_t numSubBuffers;
239 cl_int error;
240 size_t i;
241 MTdata m = init_genrand( 22 );
242
243
244 cl_char * mainBufferContents = (cl_char*)calloc(1,mainSize);
245 cl_char * actualResults = (cl_char*)calloc(1,mainSize);
246
247 for ( i = 0; i < mainSize / 4; i++ )
248 ((cl_uint*) mainBufferContents)[i] = genrand_int32(m);
249
250 free_mtdata( m );
251
252 // Create the main buffer to test against
253 mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, mainSize, mainBufferContents, &error );
254 test_error( error, "Unable to create test main buffer" );
255
256 // Create some sub-buffers to use
257 size_t toStartFrom = 0;
258 for ( numSubBuffers = 0; numSubBuffers < 8; numSubBuffers++ )
259 {
260 size_t endRange = toStartFrom + ( mainSize / 4 );
261 if ( endRange > mainSize )
262 endRange = mainSize;
263
264 size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign;
265 size_t size =
266 get_random_size_t(
267 1, (std::min(mainSize / 8, mainSize - offset)) / addressAlign,
268 Action::GetRandSeed())
269 * addressAlign;
270 error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size );
271 test_error( error, "Unable to allocate sub buffer" );
272
273 toStartFrom = offset + size;
274 if ( toStartFrom > ( mainSize - ( addressAlign * 256 ) ) )
275 break;
276 }
277
278 ReadWriteAction rwAction;
279 MapAction mapAction;
280 CopyAction copyAction;
281 KernelReadWriteAction kernelAction;
282
283 Action * actions[] = { &rwAction, &mapAction, ©Action, &kernelAction };
284 int numErrors = 0;
285
286 // Do the following steps twice, to make sure the parent gets updated *and* we can
287 // still work on the sub-buffers
288 cl_command_queue prev_queue = queueA;
289 for ( int time = 0; time < 2; time++ )
290 {
291 // Randomly apply actions to the set of sub buffers
292 size_t i;
293 for ( i = 0; i < 64; i++ )
294 {
295 int which = random_in_range( 0, 3, Action::GetRandSeed() );
296 int whichQueue = random_in_range( 0, 1, Action::GetRandSeed() );
297 int whichBufferA = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
298 int whichBufferB;
299 do
300 {
301 whichBufferB = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
302 } while ( whichBufferB == whichBufferA );
303
304 cl_command_queue queue = ( whichQueue == 1 ) ? queueB : queueA;
305 if (queue != prev_queue) {
306 error = clFinish( prev_queue );
307 test_error( error, "Error finishing other queue." );
308
309 prev_queue = queue;
310 }
311
312 error = actions[ which ]->Execute( context, queue, (cl_int)i, subBuffers[ whichBufferA ], subBuffers[ whichBufferB ], mainBufferContents );
313 test_error( error, "Unable to execute action against sub buffers" );
314 }
315
316 error = clFinish( queueA );
317 test_error( error, "Error finishing queueA." );
318
319 error = clFinish( queueB );
320 test_error( error, "Error finishing queueB." );
321
322 // Validate by reading the final contents of the main buffer and
323 // validating against our ref copy we generated
324 error = clEnqueueReadBuffer( queueA, mainBuffer, CL_TRUE, 0, mainSize, actualResults, 0, NULL, NULL );
325 test_error( error, "Unable to enqueue buffer read" );
326
327 for ( i = 0; i < mainSize; i += 65536 )
328 {
329 size_t left = 65536;
330 if ( ( i + left ) > mainSize )
331 left = mainSize - i;
332
333 if ( memcmp( actualResults + i, mainBufferContents + i, left ) == 0 )
334 continue;
335
336 // The fast compare failed, so we need to determine where exactly the failure is
337
338 for ( size_t j = 0; j < left; j++ )
339 {
340 if ( actualResults[ i + j ] != mainBufferContents[ i + j ] )
341 {
342 // Hit a failure; report the subbuffer at this address as having failed
343 size_t sbThatFailed = find_subbuffer_by_index( subBuffers, numSubBuffers, i + j );
344 if ( sbThatFailed == numSubBuffers )
345 {
346 log_error( "ERROR: Validation failure outside of a sub-buffer! (Shouldn't be possible, but it happened at index %ld out of %ld...)\n", i + j, mainSize );
347 // Since this is a nonsensical, don't bother continuing to check
348 // (we will, however, print our map of sub-buffers for comparison)
349 for ( size_t k = 0; k < numSubBuffers; k++ )
350 {
351 log_error( "\tBuffer %ld: %ld to %ld (length %ld)\n", k, subBuffers[ k ].mOrigin, subBuffers[ k ].mOrigin + subBuffers[ k ].mSize, subBuffers[ k ].mSize );
352 }
353 return -1;
354 }
355 log_error( "ERROR: Validation failure on sub-buffer %ld (start: %ld, length: %ld)\n", sbThatFailed, subBuffers[ sbThatFailed ].mOrigin, subBuffers[ sbThatFailed ].mSize );
356 size_t newPos = subBuffers[ sbThatFailed ].mOrigin + subBuffers[ sbThatFailed ].mSize - 1;
357 i = newPos & ~65535;
358 j = newPos - i;
359 numErrors++;
360 }
361 }
362 }
363 }
364
365 free(mainBufferContents);
366 free(actualResults);
367 Action::FreeRandSeed();
368
369 return numErrors;
370 }
371
test_sub_buffers_read_write(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)372 int test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
373 {
374 cl_int error;
375 size_t mainSize;
376 cl_uint addressAlignBits;
377
378 // Get the size of the main buffer to use
379 error = get_reasonable_buffer_size( deviceID, mainSize );
380 test_error( error, "Unable to get reasonable buffer size" );
381
382 // Determine the alignment of the device so we can make sure sub buffers are valid
383 error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlignBits ), &addressAlignBits, NULL );
384 test_error( error, "Unable to get device's address alignment" );
385
386 size_t addressAlign = addressAlignBits/8;
387
388 return test_sub_buffers_read_write_core( context, queue, queue, mainSize, addressAlign );
389 }
390
391 // This test performs the same basic operations as sub_buffers_read_write, but instead of a single
392 // device, it creates a context and buffer shared between two devices, then executes commands
393 // on queues for each device to ensure that everything still operates as expected.
test_sub_buffers_read_write_dual_devices(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)394 int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
395 {
396 cl_int error;
397
398
399 // First obtain the second device
400 cl_device_id otherDevice = GetOpposingDevice( deviceID );
401 if ( otherDevice == NULL )
402 {
403 log_error( "ERROR: Unable to obtain a second device for sub-buffer dual-device test.\n" );
404 return -1;
405 }
406 if ( otherDevice == deviceID )
407 {
408 log_info( "Note: Unable to run dual-device sub-buffer test (only one device available). Skipping test (implicitly passing).\n" );
409 return 0;
410 }
411
412 // Determine the device id.
413 size_t param_size;
414 error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, 0, NULL, ¶m_size );
415 test_error( error, "Error obtaining device name" );
416
417 #if !(defined(_WIN32) && defined(_MSC_VER))
418 char device_name[param_size];
419 #else
420 char* device_name = (char*)_malloca(param_size);
421 #endif
422 error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, param_size, &device_name[0], NULL );
423 test_error( error, "Error obtaining device name" );
424
425 log_info( "\tOther device obtained for dual device test is type %s\n", device_name );
426
427 // Create a shared context for these two devices
428 cl_device_id devices[ 2 ] = { deviceID, otherDevice };
429 clContextWrapper testingContext = clCreateContext( NULL, 2, devices, NULL, NULL, &error );
430 test_error( error, "Unable to create shared context" );
431
432 // Create two queues (can't use the existing one, because it's on the wrong context)
433 clCommandQueueWrapper queue1 = clCreateCommandQueue( testingContext, deviceID, 0, &error );
434 test_error( error, "Unable to create command queue on main device" );
435
436 clCommandQueueWrapper queue2 = clCreateCommandQueue( testingContext, otherDevice, 0, &error );
437 test_error( error, "Unable to create command queue on secondary device" );
438
439 // Determine the reasonable buffer size and address alignment that applies to BOTH devices
440 size_t maxBuffer1, maxBuffer2;
441 error = get_reasonable_buffer_size( deviceID, maxBuffer1 );
442 test_error( error, "Unable to get buffer size for main device" );
443
444 error = get_reasonable_buffer_size( otherDevice, maxBuffer2 );
445 test_error( error, "Unable to get buffer size for secondary device" );
446 maxBuffer1 = std::min(maxBuffer1, maxBuffer2);
447
448 cl_uint addressAlign1Bits, addressAlign2Bits;
449 error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL );
450 test_error( error, "Unable to get main device's address alignment" );
451
452 error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL );
453 test_error( error, "Unable to get secondary device's address alignment" );
454
455 cl_uint addressAlign1 = std::max(addressAlign1Bits, addressAlign2Bits) / 8;
456
457 // Finally time to run!
458 return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
459 }
460
read_buffer_via_kernel(cl_context context,cl_command_queue queue,cl_mem buffer,size_t length,cl_char * outResults)461 cl_int read_buffer_via_kernel( cl_context context, cl_command_queue queue, cl_mem buffer, size_t length, cl_char *outResults )
462 {
463 const char *kernelCode[] = {
464 "__kernel void readTest( __global char *inBuffer, __global char *outBuffer )\n"
465 "{\n"
466 " int tid = get_global_id(0);\n"
467 " outBuffer[ tid ] = inBuffer[ tid ];\n"
468 "}\n" };
469
470 clProgramWrapper program;
471 clKernelWrapper kernel;
472 cl_int error;
473
474 if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
475 {
476 return -1;
477 }
478
479 size_t threads[1] = { length };
480
481 clMemWrapper outStream = clCreateBuffer( context, CL_MEM_READ_WRITE, length, NULL, &error );
482 test_error( error, "Unable to create output stream" );
483
484 error = clSetKernelArg( kernel, 0, sizeof( buffer ), &buffer );
485 test_error( error, "Unable to set kernel argument" );
486 error = clSetKernelArg( kernel, 1, sizeof( outStream ), &outStream );
487 test_error( error, "Unable to set kernel argument" );
488
489 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
490 test_error( error, "Unable to queue kernel" );
491
492 error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, length, outResults, 0, NULL, NULL );
493 test_error( error, "Unable to read results from kernel" );
494
495 return CL_SUCCESS;
496 }
497
498
test_sub_buffers_overlapping(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)499 int test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
500 {
501 cl_int error;
502 size_t mainSize;
503 cl_uint addressAlign;
504
505 clMemWrapper mainBuffer;
506 SubBufferWrapper subBuffers[ 16 ];
507
508
509 // Create the main buffer to test against
510 error = get_reasonable_buffer_size( deviceID, mainSize );
511 test_error( error, "Unable to get reasonable buffer size" );
512
513 mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, mainSize, NULL, &error );
514 test_error( error, "Unable to create test main buffer" );
515
516 // Determine the alignment of the device so we can make sure sub buffers are valid
517 error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign ), &addressAlign, NULL );
518 test_error( error, "Unable to get device's address alignment" );
519
520 // Create some sub-buffers to use. Note: they don't have to not overlap (we actually *want* them to overlap)
521 for ( size_t i = 0; i < 16; i++ )
522 {
523 size_t offset = get_random_size_t( 0, mainSize / addressAlign, Action::GetRandSeed() ) * addressAlign;
524 size_t size = get_random_size_t( 1, ( mainSize - offset ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
525
526 error = subBuffers[ i ].Allocate( mainBuffer, CL_MEM_READ_ONLY, offset, size );
527 test_error( error, "Unable to allocate sub buffer" );
528 }
529
530 /// For logging, we determine the amount of overlap we just generated
531 // Build a fast in-out map to help with generating the stats
532 int sbMap[ 32 ], mapSize = 0;
533 for ( int i = 0; i < 16; i++ )
534 {
535 int j;
536 for ( j = 0; j < mapSize; j++ )
537 {
538 size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
539 : subBuffers[ sbMap[ j ] ].mOrigin;
540 if ( subBuffers[ i ].mOrigin < pt )
541 {
542 // Origin is before this part of the map, so move map forward so we can insert
543 memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
544 sbMap[ j ] = i;
545 mapSize++;
546 break;
547 }
548 }
549 if ( j == mapSize )
550 {
551 sbMap[ j ] = i;
552 mapSize++;
553 }
554
555 size_t endPt = subBuffers[ i ].mOrigin + subBuffers[ i ].mSize;
556 for ( j = 0; j < mapSize; j++ )
557 {
558 size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
559 : subBuffers[ sbMap[ j ] ].mOrigin;
560 if ( endPt < pt )
561 {
562 // Origin is before this part of the map, so move map forward so we can insert
563 memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
564 sbMap[ j ] = -( i + 1 );
565 mapSize++;
566 break;
567 }
568 }
569 if ( j == mapSize )
570 {
571 sbMap[ j ] = -( i + 1 );
572 mapSize++;
573 }
574 }
575 long long delta = 0;
576 size_t maxOverlap = 1, overlap = 0;
577 for ( int i = 0; i < 32; i++ )
578 {
579 if ( sbMap[ i ] >= 0 )
580 {
581 overlap++;
582 if ( overlap > 1 )
583 delta -= (long long)( subBuffers[ sbMap[ i ] ].mOrigin );
584 if ( overlap > maxOverlap )
585 maxOverlap = overlap;
586 }
587 else
588 {
589 if ( overlap > 1 )
590 delta += (long long)( subBuffers[ -sbMap[ i ] - 1 ].mOrigin + subBuffers[ -sbMap[ i ] - 1 ].mSize );
591 overlap--;
592 }
593 }
594
595 log_info( "\tTesting %d sub-buffers with %lld overlapping Kbytes (%d%%; as many as %ld buffers overlapping at once)\n",
596 16, ( delta / 1024LL ), (int)( delta * 100LL / (long long)mainSize ), maxOverlap );
597
598 // Write some random contents to the main buffer
599 cl_char * contents = new cl_char[ mainSize ];
600 generate_random_data( kChar, mainSize, Action::GetRandSeed(), contents );
601
602 error = clEnqueueWriteBuffer( queue, mainBuffer, CL_TRUE, 0, mainSize, contents, 0, NULL, NULL );
603 test_error( error, "Unable to write to main buffer" );
604
605 // Now read from each sub-buffer and check to make sure that they make sense w.r.t. the main contents
606 cl_char * tempBuffer = new cl_char[ mainSize ];
607
608 int numErrors = 0;
609 for ( size_t i = 0; i < 16; i++ )
610 {
611 // Read from this buffer
612 int which = random_in_range( 0, 1, Action::GetRandSeed() );
613 if ( which )
614 error = clEnqueueReadBuffer( queue, subBuffers[ i ], CL_TRUE, 0, subBuffers[ i ].mSize, tempBuffer, 0, NULL, NULL );
615 else
616 error = read_buffer_via_kernel( context, queue, subBuffers[ i ], subBuffers[ i ].mSize, tempBuffer );
617 test_error( error, "Unable to read sub buffer contents" );
618
619 if ( memcmp( tempBuffer, contents + subBuffers[ i ].mOrigin, subBuffers[ i ].mSize ) != 0 )
620 {
621 log_error( "ERROR: Validation for sub-buffer %ld failed!\n", i );
622 numErrors++;
623 }
624 }
625
626 delete [] contents;
627 delete [] tempBuffer;
628 Action::FreeRandSeed();
629
630 return numErrors;
631 }
632
633