1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <CL/cl_half.h>
25
26 #include "procs.h"
27
28 //#define HK_DO_NOT_RUN_SHORT_ASYNC 1
29 //#define HK_DO_NOT_RUN_USHORT_ASYNC 1
30 //#define HK_DO_NOT_RUN_CHAR_ASYNC 1
31 //#define HK_DO_NOT_RUN_UCHAR_ASYNC 1
32
33 #define TEST_PRIME_INT ((1<<16)+1)
34 #define TEST_PRIME_UINT ((1U<<16)+1U)
35 #define TEST_PRIME_LONG ((1LL<<32)+1LL)
36 #define TEST_PRIME_ULONG ((1ULL<<32)+1ULL)
37 #define TEST_PRIME_SHORT ((1S<<8)+1S)
38 #define TEST_PRIME_FLOAT (float)3.40282346638528860e+38
39 #define TEST_PRIME_HALF 119.f
40 #define TEST_BOOL true
41 #define TEST_PRIME_CHAR 0x77
42
43 #ifndef ulong
44 typedef unsigned long ulong;
45 #endif
46
47 #ifndef uchar
48 typedef unsigned char uchar;
49 #endif
50
51 #ifndef TestStruct
52 typedef struct{
53 int a;
54 float b;
55 } TestStruct;
56 #endif
57
58 //--- the code for the kernel executables
59 static const char *buffer_read_int_kernel_code[] = {
60 "__kernel void test_buffer_read_int(__global int *dst)\n"
61 "{\n"
62 " int tid = get_global_id(0);\n"
63 "\n"
64 " dst[tid] = ((1<<16)+1);\n"
65 "}\n",
66
67 "__kernel void test_buffer_read_int2(__global int2 *dst)\n"
68 "{\n"
69 " int tid = get_global_id(0);\n"
70 "\n"
71 " dst[tid] = ((1<<16)+1);\n"
72 "}\n",
73
74 "__kernel void test_buffer_read_int4(__global int4 *dst)\n"
75 "{\n"
76 " int tid = get_global_id(0);\n"
77 "\n"
78 " dst[tid] = ((1<<16)+1);\n"
79 "}\n",
80
81 "__kernel void test_buffer_read_int8(__global int8 *dst)\n"
82 "{\n"
83 " int tid = get_global_id(0);\n"
84 "\n"
85 " dst[tid] = ((1<<16)+1);\n"
86 "}\n",
87
88 "__kernel void test_buffer_read_int16(__global int16 *dst)\n"
89 "{\n"
90 " int tid = get_global_id(0);\n"
91 "\n"
92 " dst[tid] = ((1<<16)+1);\n"
93 "}\n" };
94
95 static const char *int_kernel_name[] = { "test_buffer_read_int", "test_buffer_read_int2", "test_buffer_read_int4", "test_buffer_read_int8", "test_buffer_read_int16" };
96
97 static const char *buffer_read_uint_kernel_code[] = {
98 "__kernel void test_buffer_read_uint(__global uint *dst)\n"
99 "{\n"
100 " int tid = get_global_id(0);\n"
101 "\n"
102 " dst[tid] = ((1U<<16)+1U);\n"
103 "}\n",
104
105 "__kernel void test_buffer_read_uint2(__global uint2 *dst)\n"
106 "{\n"
107 " int tid = get_global_id(0);\n"
108 "\n"
109 " dst[tid] = ((1U<<16)+1U);\n"
110 "}\n",
111
112 "__kernel void test_buffer_read_uint4(__global uint4 *dst)\n"
113 "{\n"
114 " int tid = get_global_id(0);\n"
115 "\n"
116 " dst[tid] = ((1U<<16)+1U);\n"
117 "}\n",
118
119 "__kernel void test_buffer_read_uint8(__global uint8 *dst)\n"
120 "{\n"
121 " int tid = get_global_id(0);\n"
122 "\n"
123 " dst[tid] = ((1U<<16)+1U);\n"
124 "}\n",
125
126 "__kernel void test_buffer_read_uint16(__global uint16 *dst)\n"
127 "{\n"
128 " int tid = get_global_id(0);\n"
129 "\n"
130 " dst[tid] = ((1U<<16)+1U);\n"
131 "}\n" };
132
133 static const char *uint_kernel_name[] = { "test_buffer_read_uint", "test_buffer_read_uint2", "test_buffer_read_uint4", "test_buffer_read_uint8", "test_buffer_read_uint16" };
134
135 static const char *buffer_read_long_kernel_code[] = {
136 "__kernel void test_buffer_read_long(__global long *dst)\n"
137 "{\n"
138 " int tid = get_global_id(0);\n"
139 "\n"
140 " dst[tid] = ((1L<<32)+1L);\n"
141 "}\n",
142
143 "__kernel void test_buffer_read_long2(__global long2 *dst)\n"
144 "{\n"
145 " int tid = get_global_id(0);\n"
146 "\n"
147 " dst[tid] = ((1L<<32)+1L);\n"
148 "}\n",
149
150 "__kernel void test_buffer_read_long4(__global long4 *dst)\n"
151 "{\n"
152 " int tid = get_global_id(0);\n"
153 "\n"
154 " dst[tid] = ((1L<<32)+1L);\n"
155 "}\n",
156
157 "__kernel void test_buffer_read_long8(__global long8 *dst)\n"
158 "{\n"
159 " int tid = get_global_id(0);\n"
160 "\n"
161 " dst[tid] = ((1L<<32)+1L);\n"
162 "}\n",
163
164 "__kernel void test_buffer_read_long16(__global long16 *dst)\n"
165 "{\n"
166 " int tid = get_global_id(0);\n"
167 "\n"
168 " dst[tid] = ((1L<<32)+1L);\n"
169 "}\n" };
170
171 static const char *long_kernel_name[] = { "test_buffer_read_long", "test_buffer_read_long2", "test_buffer_read_long4", "test_buffer_read_long8", "test_buffer_read_long16" };
172
173 static const char *buffer_read_ulong_kernel_code[] = {
174 "__kernel void test_buffer_read_ulong(__global ulong *dst)\n"
175 "{\n"
176 " int tid = get_global_id(0);\n"
177 "\n"
178 " dst[tid] = ((1UL<<32)+1UL);\n"
179 "}\n",
180
181 "__kernel void test_buffer_read_ulong2(__global ulong2 *dst)\n"
182 "{\n"
183 " int tid = get_global_id(0);\n"
184 "\n"
185 " dst[tid] = ((1UL<<32)+1UL);\n"
186 "}\n",
187
188 "__kernel void test_buffer_read_ulong4(__global ulong4 *dst)\n"
189 "{\n"
190 " int tid = get_global_id(0);\n"
191 "\n"
192 " dst[tid] = ((1UL<<32)+1UL);\n"
193 "}\n",
194
195 "__kernel void test_buffer_read_ulong8(__global ulong8 *dst)\n"
196 "{\n"
197 " int tid = get_global_id(0);\n"
198 "\n"
199 " dst[tid] = ((1UL<<32)+1UL);\n"
200 "}\n",
201
202 "__kernel void test_buffer_read_ulong16(__global ulong16 *dst)\n"
203 "{\n"
204 " int tid = get_global_id(0);\n"
205 "\n"
206 " dst[tid] = ((1UL<<32)+1UL);\n"
207 "}\n" };
208
209 static const char *ulong_kernel_name[] = { "test_buffer_read_ulong", "test_buffer_read_ulong2", "test_buffer_read_ulong4", "test_buffer_read_ulong8", "test_buffer_read_ulong16" };
210
211 static const char *buffer_read_short_kernel_code[] = {
212 "__kernel void test_buffer_read_short(__global short *dst)\n"
213 "{\n"
214 " int tid = get_global_id(0);\n"
215 "\n"
216 " dst[tid] = (short)((1<<8)+1);\n"
217 "}\n",
218
219 "__kernel void test_buffer_read_short2(__global short2 *dst)\n"
220 "{\n"
221 " int tid = get_global_id(0);\n"
222 "\n"
223 " dst[tid] = (short)((1<<8)+1);\n"
224 "}\n",
225
226 "__kernel void test_buffer_read_short4(__global short4 *dst)\n"
227 "{\n"
228 " int tid = get_global_id(0);\n"
229 "\n"
230 " dst[tid] = (short)((1<<8)+1);\n"
231 "}\n",
232
233 "__kernel void test_buffer_read_short8(__global short8 *dst)\n"
234 "{\n"
235 " int tid = get_global_id(0);\n"
236 "\n"
237 " dst[tid] = (short)((1<<8)+1);\n"
238 "}\n",
239
240 "__kernel void test_buffer_read_short16(__global short16 *dst)\n"
241 "{\n"
242 " int tid = get_global_id(0);\n"
243 "\n"
244 " dst[tid] = (short)((1<<8)+1);\n"
245 "}\n" };
246
247 static const char *short_kernel_name[] = { "test_buffer_read_short", "test_buffer_read_short2", "test_buffer_read_short4", "test_buffer_read_short8", "test_buffer_read_short16" };
248
249
250 static const char *buffer_read_ushort_kernel_code[] = {
251 "__kernel void test_buffer_read_ushort(__global ushort *dst)\n"
252 "{\n"
253 " int tid = get_global_id(0);\n"
254 "\n"
255 " dst[tid] = (ushort)((1<<8)+1);\n"
256 "}\n",
257
258 "__kernel void test_buffer_read_ushort2(__global ushort2 *dst)\n"
259 "{\n"
260 " int tid = get_global_id(0);\n"
261 "\n"
262 " dst[tid] = (ushort)((1<<8)+1);\n"
263 "}\n",
264
265 "__kernel void test_buffer_read_ushort4(__global ushort4 *dst)\n"
266 "{\n"
267 " int tid = get_global_id(0);\n"
268 "\n"
269 " dst[tid] = (ushort)((1<<8)+1);\n"
270 "}\n",
271
272 "__kernel void test_buffer_read_ushort8(__global ushort8 *dst)\n"
273 "{\n"
274 " int tid = get_global_id(0);\n"
275 "\n"
276 " dst[tid] = (ushort)((1<<8)+1);\n"
277 "}\n",
278
279 "__kernel void test_buffer_read_ushort16(__global ushort16 *dst)\n"
280 "{\n"
281 " int tid = get_global_id(0);\n"
282 "\n"
283 " dst[tid] = (ushort)((1<<8)+1);\n"
284 "}\n" };
285
286 static const char *ushort_kernel_name[] = { "test_buffer_read_ushort", "test_buffer_read_ushort2", "test_buffer_read_ushort4", "test_buffer_read_ushort8", "test_buffer_read_ushort16" };
287
288
289 static const char *buffer_read_float_kernel_code[] = {
290 "__kernel void test_buffer_read_float(__global float *dst)\n"
291 "{\n"
292 " int tid = get_global_id(0);\n"
293 "\n"
294 " dst[tid] = (float)3.40282346638528860e+38;\n"
295 "}\n",
296
297 "__kernel void test_buffer_read_float2(__global float2 *dst)\n"
298 "{\n"
299 " int tid = get_global_id(0);\n"
300 "\n"
301 " dst[tid] = (float)3.40282346638528860e+38;\n"
302 "}\n",
303
304 "__kernel void test_buffer_read_float4(__global float4 *dst)\n"
305 "{\n"
306 " int tid = get_global_id(0);\n"
307 "\n"
308 " dst[tid] = (float)3.40282346638528860e+38;\n"
309 "}\n",
310
311 "__kernel void test_buffer_read_float8(__global float8 *dst)\n"
312 "{\n"
313 " int tid = get_global_id(0);\n"
314 "\n"
315 " dst[tid] = (float)3.40282346638528860e+38;\n"
316 "}\n",
317
318 "__kernel void test_buffer_read_float16(__global float16 *dst)\n"
319 "{\n"
320 " int tid = get_global_id(0);\n"
321 "\n"
322 " dst[tid] = (float)3.40282346638528860e+38;\n"
323 "}\n" };
324
325 static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffer_read_float2", "test_buffer_read_float4", "test_buffer_read_float8", "test_buffer_read_float16" };
326
327
328 static const char *buffer_read_half_kernel_code[] = {
329 "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
330 "__kernel void test_buffer_read_half(__global half *dst)\n"
331 "{\n"
332 " int tid = get_global_id(0);\n"
333 "\n"
334 " dst[tid] = (half)119;\n"
335 "}\n",
336
337 "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
338 "__kernel void test_buffer_read_half2(__global half2 *dst)\n"
339 "{\n"
340 " int tid = get_global_id(0);\n"
341 "\n"
342 " dst[tid] = (half)119;\n"
343 "}\n",
344
345 "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
346 "__kernel void test_buffer_read_half4(__global half4 *dst)\n"
347 "{\n"
348 " int tid = get_global_id(0);\n"
349 "\n"
350 " dst[tid] = (half)119;\n"
351 "}\n",
352
353 "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
354 "__kernel void test_buffer_read_half8(__global half8 *dst)\n"
355 "{\n"
356 " int tid = get_global_id(0);\n"
357 "\n"
358 " dst[tid] = (half)119;\n"
359 "}\n",
360
361 "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
362 "__kernel void test_buffer_read_half16(__global half16 *dst)\n"
363 "{\n"
364 " int tid = get_global_id(0);\n"
365 "\n"
366 " dst[tid] = (half)119;\n"
367 "}\n"
368 };
369
370 static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
371
372
373 static const char *buffer_read_char_kernel_code[] = {
374 "__kernel void test_buffer_read_char(__global char *dst)\n"
375 "{\n"
376 " int tid = get_global_id(0);\n"
377 "\n"
378 " dst[tid] = (char)'w';\n"
379 "}\n",
380
381 "__kernel void test_buffer_read_char2(__global char2 *dst)\n"
382 "{\n"
383 " int tid = get_global_id(0);\n"
384 "\n"
385 " dst[tid] = (char)'w';\n"
386 "}\n",
387
388 "__kernel void test_buffer_read_char4(__global char4 *dst)\n"
389 "{\n"
390 " int tid = get_global_id(0);\n"
391 "\n"
392 " dst[tid] = (char)'w';\n"
393 "}\n",
394
395 "__kernel void test_buffer_read_char8(__global char8 *dst)\n"
396 "{\n"
397 " int tid = get_global_id(0);\n"
398 "\n"
399 " dst[tid] = (char)'w';\n"
400 "}\n",
401
402 "__kernel void test_buffer_read_char16(__global char16 *dst)\n"
403 "{\n"
404 " int tid = get_global_id(0);\n"
405 "\n"
406 " dst[tid] = (char)'w';\n"
407 "}\n" };
408
409 static const char *char_kernel_name[] = { "test_buffer_read_char", "test_buffer_read_char2", "test_buffer_read_char4", "test_buffer_read_char8", "test_buffer_read_char16" };
410
411
412 static const char *buffer_read_uchar_kernel_code[] = {
413 "__kernel void test_buffer_read_uchar(__global uchar *dst)\n"
414 "{\n"
415 " int tid = get_global_id(0);\n"
416 "\n"
417 " dst[tid] = 'w';\n"
418 "}\n",
419
420 "__kernel void test_buffer_read_uchar2(__global uchar2 *dst)\n"
421 "{\n"
422 " int tid = get_global_id(0);\n"
423 "\n"
424 " dst[tid] = (uchar)'w';\n"
425 "}\n",
426
427 "__kernel void test_buffer_read_uchar4(__global uchar4 *dst)\n"
428 "{\n"
429 " int tid = get_global_id(0);\n"
430 "\n"
431 " dst[tid] = (uchar)'w';\n"
432 "}\n",
433
434 "__kernel void test_buffer_read_uchar8(__global uchar8 *dst)\n"
435 "{\n"
436 " int tid = get_global_id(0);\n"
437 "\n"
438 " dst[tid] = (uchar)'w';\n"
439 "}\n",
440
441 "__kernel void test_buffer_read_uchar16(__global uchar16 *dst)\n"
442 "{\n"
443 " int tid = get_global_id(0);\n"
444 "\n"
445 " dst[tid] = (uchar)'w';\n"
446 "}\n" };
447
448 static const char *uchar_kernel_name[] = { "test_buffer_read_uchar", "test_buffer_read_uchar2", "test_buffer_read_uchar4", "test_buffer_read_uchar8", "test_buffer_read_uchar16" };
449
450
451 static const char *buffer_read_struct_kernel_code =
452 "typedef struct{\n"
453 "int a;\n"
454 "float b;\n"
455 "} TestStruct;\n"
456 "__kernel void test_buffer_read_struct(__global TestStruct *dst)\n"
457 "{\n"
458 " int tid = get_global_id(0);\n"
459 "\n"
460 " dst[tid].a = ((1<<16)+1);\n"
461 " dst[tid].b = (float)3.40282346638528860e+38;\n"
462 "}\n";
463
464
465 //--- the verify functions
verify_read_int(void * ptr,int n)466 static int verify_read_int(void *ptr, int n)
467 {
468 int i;
469 cl_int *outptr = (cl_int *)ptr;
470
471 for (i=0; i<n; i++){
472 if ( outptr[i] != TEST_PRIME_INT )
473 return -1;
474 }
475
476 return 0;
477 }
478
479
verify_read_uint(void * ptr,int n)480 static int verify_read_uint(void *ptr, int n)
481 {
482 int i;
483 cl_uint *outptr = (cl_uint *)ptr;
484
485 for (i=0; i<n; i++){
486 if ( outptr[i] != TEST_PRIME_UINT )
487 return -1;
488 }
489
490 return 0;
491 }
492
493
verify_read_long(void * ptr,int n)494 static int verify_read_long(void *ptr, int n)
495 {
496 int i;
497 cl_long *outptr = (cl_long *)ptr;
498
499 for (i=0; i<n; i++){
500 if ( outptr[i] != TEST_PRIME_LONG )
501 return -1;
502 }
503
504 return 0;
505 }
506
507
verify_read_ulong(void * ptr,int n)508 static int verify_read_ulong(void *ptr, int n)
509 {
510 int i;
511 cl_ulong *outptr = (cl_ulong *)ptr;
512
513 for (i=0; i<n; i++){
514 if ( outptr[i] != TEST_PRIME_ULONG )
515 return -1;
516 }
517
518 return 0;
519 }
520
521
verify_read_short(void * ptr,int n)522 static int verify_read_short(void *ptr, int n)
523 {
524 int i;
525 cl_short *outptr = (cl_short *)ptr;
526
527 for (i=0; i<n; i++){
528 if ( outptr[i] != (cl_short)((1<<8)+1) )
529 return -1;
530 }
531
532 return 0;
533 }
534
535
verify_read_ushort(void * ptr,int n)536 static int verify_read_ushort(void *ptr, int n)
537 {
538 int i;
539 cl_ushort *outptr = (cl_ushort *)ptr;
540
541 for (i=0; i<n; i++){
542 if ( outptr[i] != (cl_ushort)((1<<8)+1) )
543 return -1;
544 }
545
546 return 0;
547 }
548
549
verify_read_float(void * ptr,int n)550 static int verify_read_float( void *ptr, int n )
551 {
552 int i;
553 cl_float *outptr = (cl_float *)ptr;
554
555 for (i=0; i<n; i++){
556 if ( outptr[i] != TEST_PRIME_FLOAT )
557 return -1;
558 }
559
560 return 0;
561 }
562
563
verify_read_half(void * ptr,int n)564 static int verify_read_half( void *ptr, int n )
565 {
566 int i;
567 cl_half *outptr = (cl_half *)ptr;
568
569 for (i = 0; i < n; i++)
570 {
571 if (cl_half_to_float(outptr[i]) != TEST_PRIME_HALF) return -1;
572 }
573
574 return 0;
575 }
576
577
verify_read_char(void * ptr,int n)578 static int verify_read_char(void *ptr, int n)
579 {
580 int i;
581 cl_char *outptr = (cl_char *)ptr;
582
583 for (i=0; i<n; i++){
584 if ( outptr[i] != TEST_PRIME_CHAR )
585 return -1;
586 }
587
588 return 0;
589 }
590
591
verify_read_uchar(void * ptr,int n)592 static int verify_read_uchar(void *ptr, int n)
593 {
594 int i;
595 cl_uchar *outptr = (cl_uchar *)ptr;
596
597 for (i=0; i<n; i++){
598 if ( outptr[i] != TEST_PRIME_CHAR )
599 return -1;
600 }
601
602 return 0;
603 }
604
605
verify_read_struct(TestStruct * outptr,int n)606 static int verify_read_struct(TestStruct *outptr, int n)
607 {
608 int i;
609
610 for (i=0; i<n; i++)
611 {
612 if ( ( outptr[i].a != TEST_PRIME_INT ) ||
613 ( outptr[i].b != TEST_PRIME_FLOAT ) )
614 return -1;
615 }
616
617 return 0;
618 }
619
620 //----- the test functions
test_buffer_read(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))621 int test_buffer_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
622 const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
623 {
624 void *outptr[5];
625 void *inptr[5];
626 clProgramWrapper program[5];
627 clKernelWrapper kernel[5];
628 size_t global_work_size[3];
629 cl_int err;
630 int i;
631 size_t ptrSizes[5];
632 int src_flag_id;
633 int total_errors = 0;
634
635 size_t min_alignment = get_min_alignment(context);
636
637 global_work_size[0] = (cl_uint)num_elements;
638
639 ptrSizes[0] = size;
640 ptrSizes[1] = ptrSizes[0] << 1;
641 ptrSizes[2] = ptrSizes[1] << 1;
642 ptrSizes[3] = ptrSizes[2] << 1;
643 ptrSizes[4] = ptrSizes[3] << 1;
644
645 //skip devices that don't support long
646 if (! gHasLong && strstr(type,"long") )
647 {
648 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
649 return CL_SUCCESS;
650 }
651
652 for (i = 0; i < loops; i++)
653 {
654
655 err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
656 &kernelCode[i], kernelName[i]);
657 if (err)
658 {
659 log_error("Creating program for %s\n", type);
660 print_error(err, " Error creating program ");
661 return -1;
662 }
663
664 for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
665 {
666 clMemWrapper buffer;
667 outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
668 if ( ! outptr[i] ){
669 log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
670 return -1;
671 }
672 inptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
673 if ( ! inptr[i] ){
674 log_error( " unable to allocate %d bytes for inptr\n", (int)( ptrSizes[i] * num_elements ) );
675 return -1;
676 }
677
678
679 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
680 buffer =
681 clCreateBuffer(context, flag_set[src_flag_id],
682 ptrSizes[i] * num_elements, inptr[i], &err);
683 else
684 buffer = clCreateBuffer(context, flag_set[src_flag_id],
685 ptrSizes[i] * num_elements, NULL, &err);
686 if (err != CL_SUCCESS)
687 {
688 print_error(err, " clCreateBuffer failed\n" );
689 align_free( outptr[i] );
690 align_free( inptr[i] );
691 return -1;
692 }
693
694 err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
695 if ( err != CL_SUCCESS ){
696 print_error( err, "clSetKernelArg failed" );
697 align_free( outptr[i] );
698 align_free( inptr[i] );
699 return -1;
700 }
701
702 err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL,
703 global_work_size, NULL, 0, NULL, NULL);
704 if ( err != CL_SUCCESS ){
705 print_error( err, "clEnqueueNDRangeKernel failed" );
706 align_free( outptr[i] );
707 align_free( inptr[i] );
708 return -1;
709 }
710
711 err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
712 ptrSizes[i] * num_elements, outptr[i], 0,
713 NULL, NULL);
714 if ( err != CL_SUCCESS ){
715 print_error( err, "clEnqueueReadBuffer failed" );
716 align_free( outptr[i] );
717 align_free( inptr[i] );
718 return -1;
719 }
720
721 if (fn(outptr[i], num_elements*(1<<i))){
722 log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
723 1 << i, flag_set_names[src_flag_id]);
724 total_errors++;
725 }
726 else{
727 log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
728 1 << i, flag_set_names[src_flag_id]);
729 }
730
731 err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
732 ptrSizes[i] * num_elements, inptr[i], 0,
733 NULL, NULL);
734 if (err != CL_SUCCESS)
735 {
736 print_error( err, "clEnqueueReadBuffer failed" );
737 align_free( outptr[i] );
738 align_free( inptr[i] );
739 return -1;
740 }
741
742 if (fn(inptr[i], num_elements*(1<<i))){
743 log_error( " %s%d test failed in-place readback\n", type, 1<<i );
744 total_errors++;
745 }
746 else{
747 log_info( " %s%d test passed in-place readback\n", type, 1<<i );
748 }
749
750
751 // cleanup
752 align_free( outptr[i] );
753 align_free( inptr[i] );
754 }
755 } // mem flag
756
757 return total_errors;
758
759 } // end test_buffer_read()
760
test_buffer_read_async(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))761 int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
762 const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
763 {
764 clProgramWrapper program[5];
765 clKernelWrapper kernel[5];
766 void *outptr[5];
767 void *inptr[5];
768 size_t global_work_size[3];
769 cl_int err;
770 int i;
771 size_t ptrSizes[5];
772 int src_flag_id;
773 int total_errors = 0;
774
775 size_t min_alignment = get_min_alignment(context);
776
777 global_work_size[0] = (cl_uint)num_elements;
778
779 ptrSizes[0] = size;
780 ptrSizes[1] = ptrSizes[0] << 1;
781 ptrSizes[2] = ptrSizes[1] << 1;
782 ptrSizes[3] = ptrSizes[2] << 1;
783 ptrSizes[4] = ptrSizes[3] << 1;
784
785 //skip devices that don't support long
786 if (! gHasLong && strstr(type,"long") )
787 {
788 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
789 return CL_SUCCESS;
790 }
791
792 for (i = 0; i < loops; i++)
793 {
794
795 err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
796 &kernelCode[i], kernelName[i]);
797 if (err)
798 {
799 log_error(" Error creating program for %s\n", type);
800 return -1;
801 }
802
803 for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
804 {
805 clMemWrapper buffer;
806 clEventWrapper event;
807 outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
808 if ( ! outptr[i] ){
809 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
810 return -1;
811 }
812 memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
813 inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
814 if ( ! inptr[i] ){
815 log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
816 return -1;
817 }
818 memset( inptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
819
820
821 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
822 buffer =
823 clCreateBuffer(context, flag_set[src_flag_id],
824 ptrSizes[i] * num_elements, inptr[i], &err);
825 else
826 buffer = clCreateBuffer(context, flag_set[src_flag_id],
827 ptrSizes[i] * num_elements, NULL, &err);
828 if ( err != CL_SUCCESS ){
829 print_error(err, " clCreateBuffer failed\n" );
830 align_free( outptr[i] );
831 align_free( inptr[i] );
832 return -1;
833 }
834
835 err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
836 if ( err != CL_SUCCESS ){
837 print_error( err, "clSetKernelArg failed" );
838 align_free( outptr[i] );
839 align_free( inptr[i] );
840 return -1;
841 }
842
843 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
844 if ( err != CL_SUCCESS ){
845 print_error( err, "clEnqueueNDRangeKernel failed" );
846 align_free( outptr[i] );
847 align_free( inptr[i] );
848 return -1;
849 }
850
851 err = clEnqueueReadBuffer(queue, buffer, false, 0,
852 ptrSizes[i] * num_elements, outptr[i], 0,
853 NULL, &event);
854 #ifdef CHECK_FOR_NON_WAIT
855 size_t lastIndex = (num_elements * (1 << i) - 1) * ptrSizes[0];
856 if ( ((uchar *)outptr[i])[lastIndex] ){
857 log_error( " clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
858 log_error( " Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
859 }
860 #endif
861 if ( err != CL_SUCCESS ){
862 print_error( err, "clEnqueueReadBuffer failed" );
863 align_free( outptr[i] );
864 align_free( inptr[i] );
865 return -1;
866 }
867 err = clWaitForEvents(1, &event );
868 if ( err != CL_SUCCESS ){
869 print_error( err, "clWaitForEvents() failed" );
870 align_free( outptr[i] );
871 align_free( inptr[i] );
872 return -1;
873 }
874
875 if ( fn(outptr[i], num_elements*(1<<i)) ){
876 log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
877 1 << i, flag_set_names[src_flag_id]);
878 total_errors++;
879 }
880 else{
881 log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
882 1 << i, flag_set_names[src_flag_id]);
883 }
884
885 // cleanup
886 align_free( outptr[i] );
887 align_free( inptr[i] );
888 }
889 } // mem flags
890
891
892 return total_errors;
893
894 } // end test_buffer_read_array_async()
895
896
test_buffer_read_array_barrier(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))897 int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
898 const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
899 {
900 clProgramWrapper program[5];
901 clKernelWrapper kernel[5];
902 void *outptr[5], *inptr[5];
903 size_t global_work_size[3];
904 cl_int err;
905 int i;
906 size_t ptrSizes[5];
907 int src_flag_id;
908 int total_errors = 0;
909
910 size_t min_alignment = get_min_alignment(context);
911
912 global_work_size[0] = (cl_uint)num_elements;
913
914 ptrSizes[0] = size;
915 ptrSizes[1] = ptrSizes[0] << 1;
916 ptrSizes[2] = ptrSizes[1] << 1;
917 ptrSizes[3] = ptrSizes[2] << 1;
918 ptrSizes[4] = ptrSizes[3] << 1;
919
920 //skip devices that don't support long
921 if (! gHasLong && strstr(type,"long") )
922 {
923 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
924 return CL_SUCCESS;
925 }
926
927 for (i = 0; i < loops; i++)
928 {
929
930 err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
931 &kernelCode[i], kernelName[i]);
932 if (err)
933 {
934 log_error(" Error creating program for %s\n", type);
935 return -1;
936 }
937
938 for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
939 {
940 clMemWrapper buffer;
941 clEventWrapper event;
942 outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
943 if ( ! outptr[i] ){
944 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
945 return -1;
946 }
947 memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
948 inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
949 if ( ! inptr[i] ){
950 log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
951 return -1;
952 }
953 memset( inptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
954
955 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
956 buffer =
957 clCreateBuffer(context, flag_set[src_flag_id],
958 ptrSizes[i] * num_elements, inptr[i], &err);
959 else
960 buffer = clCreateBuffer(context, flag_set[src_flag_id],
961 ptrSizes[i] * num_elements, NULL, &err);
962 if ( err != CL_SUCCESS ){
963 print_error(err, " clCreateBuffer failed\n" );
964 align_free( outptr[i] );
965 align_free( inptr[i] );
966 return -1;
967 }
968
969 err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
970 if ( err != CL_SUCCESS ){
971 print_error( err, "clSetKernelArgs failed" );
972 align_free( outptr[i] );
973 align_free( inptr[i] );
974 return -1;
975 }
976
977 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
978 if ( err != CL_SUCCESS ){
979 print_error( err, "clEnqueueNDRangeKernel failed" );
980 align_free( outptr[i] );
981 align_free( inptr[i] );
982 return -1;
983 }
984
985 err = clEnqueueReadBuffer(queue, buffer, false, 0,
986 ptrSizes[i] * num_elements,
987 (void *)(outptr[i]), 0, NULL, &event);
988 #ifdef CHECK_FOR_NON_WAIT
989 size_t lastIndex = (num_elements * (1 << i) - 1) * ptrSizes[0];
990 if ( ((uchar *)outptr[i])[lastIndex] ){
991 log_error( " clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
992 log_error( " Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
993 }
994 #endif
995 if ( err != CL_SUCCESS ){
996 print_error( err, "clEnqueueReadBuffer failed" );
997 align_free( outptr[i] );
998 align_free( inptr[i] );
999 return -1;
1000 }
1001 err = clEnqueueBarrierWithWaitList(queue, 0, NULL, NULL);
1002 if ( err != CL_SUCCESS ){
1003 print_error( err, "clEnqueueBarrierWithWaitList() failed" );
1004 align_free( outptr[i] );
1005 return -1;
1006 }
1007
1008 err = clWaitForEvents(1, &event);
1009 if ( err != CL_SUCCESS ){
1010 print_error( err, "clWaitForEvents() failed" );
1011 align_free( outptr[i] );
1012 align_free( inptr[i] );
1013 return -1;
1014 }
1015
1016 if ( fn(outptr[i], num_elements*(1<<i)) ){
1017 log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
1018 1 << i, flag_set_names[src_flag_id]);
1019 total_errors++;
1020 }
1021 else{
1022 log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
1023 1 << i, flag_set_names[src_flag_id]);
1024 }
1025
1026 // cleanup
1027 align_free( outptr[i] );
1028 align_free( inptr[i] );
1029 }
1030 } // cl_mem flags
1031 return total_errors;
1032
1033 } // end test_buffer_read_array_barrier()
1034
1035
1036 #define DECLARE_READ_TEST(type, realType) \
1037 int test_buffer_read_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
1038 { \
1039 return test_buffer_read( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1040 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1041 }
1042
DECLARE_READ_TEST(int,cl_int)1043 DECLARE_READ_TEST(int, cl_int)
1044 DECLARE_READ_TEST(uint, cl_uint)
1045 DECLARE_READ_TEST(long, cl_long)
1046 DECLARE_READ_TEST(ulong, cl_ulong)
1047 DECLARE_READ_TEST(short, cl_short)
1048 DECLARE_READ_TEST(ushort, cl_ushort)
1049 DECLARE_READ_TEST(float, cl_float)
1050 DECLARE_READ_TEST(char, cl_char)
1051 DECLARE_READ_TEST(uchar, cl_uchar)
1052
1053 int test_buffer_read_half(cl_device_id deviceID, cl_context context,
1054 cl_command_queue queue, int num_elements)
1055 {
1056 PASSIVE_REQUIRE_FP16_SUPPORT(deviceID)
1057 return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
1058 buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
1059 }
1060
1061
1062 #define DECLARE_ASYNC_TEST(type, realType) \
1063 int test_buffer_read_async_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
1064 { \
1065 return test_buffer_read_async( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1066 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1067 }
1068
DECLARE_ASYNC_TEST(char,cl_char)1069 DECLARE_ASYNC_TEST(char, cl_char)
1070 DECLARE_ASYNC_TEST(uchar, cl_uchar)
1071 DECLARE_ASYNC_TEST(short, cl_short)
1072 DECLARE_ASYNC_TEST(ushort, cl_ushort)
1073 DECLARE_ASYNC_TEST(int, cl_int)
1074 DECLARE_ASYNC_TEST(uint, cl_uint)
1075 DECLARE_ASYNC_TEST(long, cl_long)
1076 DECLARE_ASYNC_TEST(ulong, cl_ulong)
1077 DECLARE_ASYNC_TEST(float, cl_float)
1078
1079
1080 #define DECLARE_BARRIER_TEST(type, realType) \
1081 int test_buffer_read_array_barrier_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
1082 { \
1083 return test_buffer_read_array_barrier( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1084 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1085 }
1086
1087 DECLARE_BARRIER_TEST(int, cl_int)
1088 DECLARE_BARRIER_TEST(uint, cl_uint)
1089 DECLARE_BARRIER_TEST(long, cl_long)
1090 DECLARE_BARRIER_TEST(ulong, cl_ulong)
1091 DECLARE_BARRIER_TEST(short, cl_short)
1092 DECLARE_BARRIER_TEST(ushort, cl_ushort)
1093 DECLARE_BARRIER_TEST(char, cl_char)
1094 DECLARE_BARRIER_TEST(uchar, cl_uchar)
1095 DECLARE_BARRIER_TEST(float, cl_float)
1096
1097 int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1098 {
1099 cl_mem buffers[1];
1100 TestStruct *output_ptr;
1101 cl_program program[1];
1102 cl_kernel kernel[1];
1103 size_t global_work_size[3];
1104 cl_int err;
1105 size_t objSize = sizeof(TestStruct);
1106
1107 size_t min_alignment = get_min_alignment(context);
1108
1109 global_work_size[0] = (cl_uint)num_elements;
1110
1111 output_ptr = (TestStruct*)align_malloc(objSize * num_elements, min_alignment);
1112 if ( ! output_ptr ){
1113 log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
1114 return -1;
1115 }
1116 buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
1117 objSize * num_elements, NULL, &err);
1118 if ( err != CL_SUCCESS ){
1119 print_error( err, " clCreateBuffer failed\n" );
1120 align_free( output_ptr );
1121 return -1;
1122 }
1123
1124 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &buffer_read_struct_kernel_code, "test_buffer_read_struct" );
1125 if ( err ){
1126 clReleaseProgram( program[0] );
1127 align_free( output_ptr );
1128 return -1;
1129 }
1130
1131 err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
1132 if ( err != CL_SUCCESS){
1133 print_error( err, "clSetKernelArg failed" );
1134 clReleaseMemObject( buffers[0] );
1135 clReleaseKernel( kernel[0] );
1136 clReleaseProgram( program[0] );
1137 align_free( output_ptr );
1138 return -1;
1139 }
1140
1141 err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1142 if ( err != CL_SUCCESS ){
1143 print_error( err, "clEnqueueNDRangeKernel failed" );
1144 clReleaseMemObject( buffers[0] );
1145 clReleaseKernel( kernel[0] );
1146 clReleaseProgram( program[0] );
1147 align_free( output_ptr );
1148 return -1;
1149 }
1150
1151 err = clEnqueueReadBuffer( queue, buffers[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
1152 if ( err != CL_SUCCESS){
1153 print_error( err, "clEnqueueReadBuffer failed" );
1154 clReleaseMemObject( buffers[0] );
1155 clReleaseKernel( kernel[0] );
1156 clReleaseProgram( program[0] );
1157 align_free( output_ptr );
1158 return -1;
1159 }
1160
1161 if (verify_read_struct(output_ptr, num_elements)){
1162 log_error(" struct test failed\n");
1163 err = -1;
1164 }
1165 else{
1166 log_info(" struct test passed\n");
1167 err = 0;
1168 }
1169
1170 // cleanup
1171 clReleaseMemObject( buffers[0] );
1172 clReleaseKernel( kernel[0] );
1173 clReleaseProgram( program[0] );
1174 align_free( output_ptr );
1175
1176 return err;
1177 }
1178
1179
testRandomReadSize(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,cl_uint startOfRead,size_t sizeOfRead)1180 static int testRandomReadSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_uint startOfRead, size_t sizeOfRead )
1181 {
1182 cl_mem buffers[3];
1183 int *outptr[3];
1184 cl_program program[3];
1185 cl_kernel kernel[3];
1186 size_t global_work_size[3];
1187 cl_int err;
1188 int i, j;
1189 size_t ptrSizes[3]; // sizeof(int), sizeof(int2), sizeof(int4)
1190 int total_errors = 0;
1191 size_t min_alignment = get_min_alignment(context);
1192
1193 global_work_size[0] = (cl_uint)num_elements;
1194
1195 ptrSizes[0] = sizeof(cl_int);
1196 ptrSizes[1] = ptrSizes[0] << 1;
1197 ptrSizes[2] = ptrSizes[1] << 1;
1198 for ( i = 0; i < 3; i++ ){
1199 outptr[i] = (int *)align_malloc( ptrSizes[i] * num_elements, min_alignment);
1200 if ( ! outptr[i] ){
1201 log_error( " Unable to allocate %d bytes for outptr[%d]\n", (int)(ptrSizes[i] * num_elements), i );
1202 for ( j = 0; j < i; j++ ){
1203 clReleaseMemObject( buffers[j] );
1204 align_free( outptr[j] );
1205 }
1206 return -1;
1207 }
1208 buffers[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
1209 ptrSizes[i] * num_elements, NULL, &err);
1210 if ( err != CL_SUCCESS ){
1211 print_error(err, " clCreateBuffer failed\n" );
1212 for ( j = 0; j < i; j++ ){
1213 clReleaseMemObject( buffers[j] );
1214 align_free( outptr[j] );
1215 }
1216 align_free( outptr[i] );
1217 return -1;
1218 }
1219 }
1220
1221 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &buffer_read_int_kernel_code[0], "test_buffer_read_int" );
1222 if ( err ){
1223 log_error( " Error creating program for int\n" );
1224 for ( i = 0; i < 3; i++ ){
1225 clReleaseMemObject( buffers[i] );
1226 align_free( outptr[i] );
1227 }
1228 return -1;
1229 }
1230
1231 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &buffer_read_int_kernel_code[1], "test_buffer_read_int2" );
1232 if ( err ){
1233 log_error( " Error creating program for int2\n" );
1234 clReleaseKernel( kernel[0] );
1235 clReleaseProgram( program[0] );
1236 for ( i = 0; i < 3; i++ ){
1237 clReleaseMemObject( buffers[i] );
1238 align_free( outptr[i] );
1239 }
1240 return -1;
1241 }
1242
1243 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &buffer_read_int_kernel_code[2], "test_buffer_read_int4" );
1244 if ( err ){
1245 log_error( " Error creating program for int4\n" );
1246 clReleaseKernel( kernel[0] );
1247 clReleaseProgram( program[0] );
1248 clReleaseKernel( kernel[1] );
1249 clReleaseProgram( program[1] );
1250 for ( i = 0; i < 3; i++ ){
1251 clReleaseMemObject( buffers[i] );
1252 align_free( outptr[i] );
1253 }
1254 return -1;
1255 }
1256
1257 for (i=0; i<3; i++){
1258 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
1259 if ( err != CL_SUCCESS ){
1260 print_error( err, "clSetKernelArgs failed" );
1261 clReleaseMemObject( buffers[i] );
1262 clReleaseKernel( kernel[i] );
1263 clReleaseProgram( program[i] );
1264 align_free( outptr[i] );
1265 return -1;
1266 }
1267
1268 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1269 if ( err != CL_SUCCESS ){
1270 print_error( err, "clEnqueueNDRangeKernel failed" );
1271 clReleaseMemObject( buffers[i] );
1272 clReleaseKernel( kernel[i] );
1273 clReleaseProgram( program[i] );
1274 align_free( outptr[i] );
1275 return -1;
1276 }
1277
1278 err = clEnqueueReadBuffer( queue, buffers[i], true, startOfRead*ptrSizes[i], ptrSizes[i]*sizeOfRead, (void *)(outptr[i]), 0, NULL, NULL );
1279 if ( err != CL_SUCCESS ){
1280 print_error( err, "clEnqueueReadBuffer failed" );
1281 clReleaseMemObject( buffers[i] );
1282 clReleaseKernel( kernel[i] );
1283 clReleaseProgram( program[i] );
1284 align_free( outptr[i] );
1285 return -1;
1286 }
1287
1288 if ( verify_read_int( outptr[i], (int)sizeOfRead*(1<<i) ) ){
1289 log_error(" random size from %d, size: %d test failed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
1290 total_errors++;
1291 }
1292 else{
1293 log_info(" random size from %d, size: %d test passed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
1294 }
1295
1296 // cleanup
1297 clReleaseMemObject( buffers[i] );
1298 clReleaseKernel( kernel[i] );
1299 clReleaseProgram( program[i] );
1300 align_free( outptr[i] );
1301 }
1302
1303 return total_errors;
1304
1305 } // end testRandomReadSize()
1306
1307
test_buffer_read_random_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1308 int test_buffer_read_random_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1309 {
1310 int err = 0;
1311 int i;
1312 cl_uint start;
1313 size_t size;
1314 MTdata d = init_genrand( gRandomSeed );
1315
1316 // now test for random sizes of array being read
1317 for ( i = 0; i < 8; i++ ){
1318 start = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
1319 size = (size_t)get_random_float( 8.f, (float)(num_elements - start), d );
1320 if (testRandomReadSize( deviceID, context, queue, num_elements, start, size ))
1321 err++;
1322 }
1323
1324 free_mtdata(d);
1325
1326 return err;
1327 }
1328
1329