1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24 #include "harness/testHarness.h"
25 #include "harness/errorHelpers.h"
26 #include "harness/conversions.h"
27
28 #ifndef uchar
29 typedef unsigned char uchar;
30 #endif
31
32 #ifndef TestStruct
33 typedef struct{
34 int a;
35 float b;
36 } TestStruct;
37 #endif
38
39 const char *stream_write_int_kernel_code[] = {
40 "__kernel void test_stream_write_int(__global int *src, __global int *dst)\n"
41 "{\n"
42 " int tid = get_global_id(0);\n"
43 "\n"
44 " dst[tid] = src[tid];\n"
45 "}\n",
46
47 "__kernel void test_stream_write_int2(__global int2 *src, __global int2 *dst)\n"
48 "{\n"
49 " int tid = get_global_id(0);\n"
50 "\n"
51 " dst[tid] = src[tid];\n"
52 "}\n",
53
54 "__kernel void test_stream_write_int4(__global int4 *src, __global int4 *dst)\n"
55 "{\n"
56 " int tid = get_global_id(0);\n"
57 "\n"
58 " dst[tid] = src[tid];\n"
59 "}\n",
60
61 "__kernel void test_stream_write_int8(__global int8 *src, __global int8 *dst)\n"
62 "{\n"
63 " int tid = get_global_id(0);\n"
64 "\n"
65 " dst[tid] = src[tid];\n"
66 "}\n",
67
68 "__kernel void test_stream_write_int16(__global int16 *src, __global int16 *dst)\n"
69 "{\n"
70 " int tid = get_global_id(0);\n"
71 "\n"
72 " dst[tid] = src[tid];\n"
73 "}\n" };
74
75 static const char *int_kernel_name[] = { "test_stream_write_int", "test_stream_write_int2", "test_stream_write_int4", "test_stream_write_int8", "test_stream_write_int16" };
76
77
78 const char *stream_write_uint_kernel_code[] = {
79 "__kernel void test_stream_write_uint(__global uint *src, __global uint *dst)\n"
80 "{\n"
81 " int tid = get_global_id(0);\n"
82 "\n"
83 " dst[tid] = src[tid];\n"
84 "}\n",
85
86 "__kernel void test_stream_write_uint2(__global uint2 *src, __global uint2 *dst)\n"
87 "{\n"
88 " int tid = get_global_id(0);\n"
89 "\n"
90 " dst[tid] = src[tid];\n"
91 "}\n",
92
93 "__kernel void test_stream_write_uint4(__global uint4 *src, __global uint4 *dst)\n"
94 "{\n"
95 " int tid = get_global_id(0);\n"
96 "\n"
97 " dst[tid] = src[tid];\n"
98 "}\n",
99
100 "__kernel void test_stream_write_uint8(__global uint8 *src, __global uint8 *dst)\n"
101 "{\n"
102 " int tid = get_global_id(0);\n"
103 "\n"
104 " dst[tid] = src[tid];\n"
105 "}\n",
106
107 "__kernel void test_stream_write_uint16(__global uint16 *src, __global uint16 *dst)\n"
108 "{\n"
109 " int tid = get_global_id(0);\n"
110 "\n"
111 " dst[tid] = src[tid];\n"
112 "}\n" };
113
114 static const char *uint_kernel_name[] = { "test_stream_write_uint", "test_stream_write_uint2", "test_stream_write_uint4", "test_stream_write_uint8", "test_stream_write_uint16" };
115
116
117 const char *stream_write_ushort_kernel_code[] = {
118 "__kernel void test_stream_write_ushort(__global ushort *src, __global ushort *dst)\n"
119 "{\n"
120 " int tid = get_global_id(0);\n"
121 "\n"
122 " dst[tid] = src[tid];\n"
123 "}\n",
124
125 "__kernel void test_stream_write_ushort2(__global ushort2 *src, __global ushort2 *dst)\n"
126 "{\n"
127 " int tid = get_global_id(0);\n"
128 "\n"
129 " dst[tid] = src[tid];\n"
130 "}\n",
131
132 "__kernel void test_stream_write_ushort4(__global ushort4 *src, __global ushort4 *dst)\n"
133 "{\n"
134 " int tid = get_global_id(0);\n"
135 "\n"
136 " dst[tid] = src[tid];\n"
137 "}\n",
138
139 "__kernel void test_stream_write_ushort8(__global ushort8 *src, __global ushort8 *dst)\n"
140 "{\n"
141 " int tid = get_global_id(0);\n"
142 "\n"
143 " dst[tid] = src[tid];\n"
144 "}\n",
145
146 "__kernel void test_stream_write_ushort16(__global ushort16 *src, __global ushort16 *dst)\n"
147 "{\n"
148 " int tid = get_global_id(0);\n"
149 "\n"
150 " dst[tid] = src[tid];\n"
151 "}\n" };
152
153 static const char *ushort_kernel_name[] = { "test_stream_write_ushort", "test_stream_write_ushort2", "test_stream_write_ushort4", "test_stream_write_ushort8", "test_stream_write_ushort16" };
154
155
156
157 const char *stream_write_short_kernel_code[] = {
158 "__kernel void test_stream_write_short(__global short *src, __global short *dst)\n"
159 "{\n"
160 " int tid = get_global_id(0);\n"
161 "\n"
162 " dst[tid] = src[tid];\n"
163 "}\n",
164
165 "__kernel void test_stream_write_short2(__global short2 *src, __global short2 *dst)\n"
166 "{\n"
167 " int tid = get_global_id(0);\n"
168 "\n"
169 " dst[tid] = src[tid];\n"
170 "}\n",
171
172 "__kernel void test_stream_write_short4(__global short4 *src, __global short4 *dst)\n"
173 "{\n"
174 " int tid = get_global_id(0);\n"
175 "\n"
176 " dst[tid] = src[tid];\n"
177 "}\n",
178
179 "__kernel void test_stream_write_short8(__global short8 *src, __global short8 *dst)\n"
180 "{\n"
181 " int tid = get_global_id(0);\n"
182 "\n"
183 " dst[tid] = src[tid];\n"
184 "}\n",
185
186 "__kernel void test_stream_write_short16(__global short16 *src, __global short16 *dst)\n"
187 "{\n"
188 " int tid = get_global_id(0);\n"
189 "\n"
190 " dst[tid] = src[tid];\n"
191 "}\n" };
192
193 static const char *short_kernel_name[] = { "test_stream_write_short", "test_stream_write_short2", "test_stream_write_short4", "test_stream_write_short8", "test_stream_write_short16" };
194
195
196 const char *stream_write_char_kernel_code[] = {
197 "__kernel void test_stream_write_char(__global char *src, __global char *dst)\n"
198 "{\n"
199 " int tid = get_global_id(0);\n"
200 "\n"
201 " dst[tid] = src[tid];\n"
202 "}\n",
203
204 "__kernel void test_stream_write_char2(__global char2 *src, __global char2 *dst)\n"
205 "{\n"
206 " int tid = get_global_id(0);\n"
207 "\n"
208 " dst[tid] = src[tid];\n"
209 "}\n",
210
211 "__kernel void test_stream_write_char4(__global char4 *src, __global char4 *dst)\n"
212 "{\n"
213 " int tid = get_global_id(0);\n"
214 "\n"
215 " dst[tid] = src[tid];\n"
216 "}\n",
217
218 "__kernel void test_stream_write_char8(__global char8 *src, __global char8 *dst)\n"
219 "{\n"
220 " int tid = get_global_id(0);\n"
221 "\n"
222 " dst[tid] = src[tid];\n"
223 "}\n",
224
225 "__kernel void test_stream_write_char16(__global char16 *src, __global char16 *dst)\n"
226 "{\n"
227 " int tid = get_global_id(0);\n"
228 "\n"
229 " dst[tid] = src[tid];\n"
230 "}\n" };
231
232 static const char *char_kernel_name[] = { "test_stream_write_char", "test_stream_write_char2", "test_stream_write_char4", "test_stream_write_char8", "test_stream_write_char16" };
233
234
235 const char *stream_write_uchar_kernel_code[] = {
236 "__kernel void test_stream_write_uchar(__global uchar *src, __global uchar *dst)\n"
237 "{\n"
238 " int tid = get_global_id(0);\n"
239 "\n"
240 " dst[tid] = src[tid];\n"
241 "}\n",
242
243 "__kernel void test_stream_write_uchar2(__global uchar2 *src, __global uchar2 *dst)\n"
244 "{\n"
245 " int tid = get_global_id(0);\n"
246 "\n"
247 " dst[tid] = src[tid];\n"
248 "}\n",
249
250 "__kernel void test_stream_write_uchar4(__global uchar4 *src, __global uchar4 *dst)\n"
251 "{\n"
252 " int tid = get_global_id(0);\n"
253 "\n"
254 " dst[tid] = src[tid];\n"
255 "}\n",
256
257 "__kernel void test_stream_write_uchar8(__global uchar8 *src, __global uchar8 *dst)\n"
258 "{\n"
259 " int tid = get_global_id(0);\n"
260 "\n"
261 " dst[tid] = src[tid];\n"
262 "}\n",
263
264 "__kernel void test_stream_write_uchar16(__global uchar16 *src, __global uchar16 *dst)\n"
265 "{\n"
266 " int tid = get_global_id(0);\n"
267 "\n"
268 " dst[tid] = src[tid];\n"
269 "}\n" };
270
271 static const char *uchar_kernel_name[] = { "test_stream_write_uchar", "test_stream_write_uchar2", "test_stream_write_uchar4", "test_stream_write_uchar8", "test_stream_write_uchar16" };
272
273
274 const char *stream_write_float_kernel_code[] = {
275 "__kernel void test_stream_write_float(__global float *src, __global float *dst)\n"
276 "{\n"
277 " int tid = get_global_id(0);\n"
278 "\n"
279 " dst[tid] = src[tid];\n"
280 "}\n",
281
282 "__kernel void test_stream_write_float2(__global float2 *src, __global float2 *dst)\n"
283 "{\n"
284 " int tid = get_global_id(0);\n"
285 "\n"
286 " dst[tid] = src[tid];\n"
287 "}\n",
288
289 "__kernel void test_stream_write_float4(__global float4 *src, __global float4 *dst)\n"
290 "{\n"
291 " int tid = get_global_id(0);\n"
292 "\n"
293 " dst[tid] = src[tid];\n"
294 "}\n",
295
296 "__kernel void test_stream_write_float8(__global float8 *src, __global float8 *dst)\n"
297 "{\n"
298 " int tid = get_global_id(0);\n"
299 "\n"
300 " dst[tid] = src[tid];\n"
301 "}\n",
302
303 "__kernel void test_stream_write_float16(__global float16 *src, __global float16 *dst)\n"
304 "{\n"
305 " int tid = get_global_id(0);\n"
306 "\n"
307 " dst[tid] = src[tid];\n"
308 "}\n" };
309
310 static const char *float_kernel_name[] = { "test_stream_write_float", "test_stream_write_float2", "test_stream_write_float4", "test_stream_write_float8", "test_stream_write_float16" };
311
312
313 const char *stream_write_half_kernel_code[] = {
314 "__kernel void test_stream_write_half(__global half *src, __global float *dst)\n"
315 "{\n"
316 " int tid = get_global_id(0);\n"
317 "\n"
318 " dst[tid] = vload_half( tid * 2, src );\n"
319 "}\n",
320
321 "__kernel void test_stream_write_half2(__global half2 *src, __global float2 *dst)\n"
322 "{\n"
323 " int tid = get_global_id(0);\n"
324 "\n"
325 " dst[tid] = vload_half2( tid * 2, src );\n"
326 "}\n",
327
328 "__kernel void test_stream_write_half4(__global half4 *src, __global float4 *dst)\n"
329 "{\n"
330 " int tid = get_global_id(0);\n"
331 "\n"
332 " dst[tid] = vload_half4( tid * 2, src );\n"
333 "}\n",
334
335 "__kernel void test_stream_write_half8(__global half8 *src, __global float8 *dst)\n"
336 "{\n"
337 " int tid = get_global_id(0);\n"
338 "\n"
339 " dst[tid] = vload_half8( tid * 2, src );\n"
340 "}\n",
341
342 "__kernel void test_stream_write_half16(__global half16 *src, __global float16 *dst)\n"
343 "{\n"
344 " int tid = get_global_id(0);\n"
345 "\n"
346 " dst[tid] = vload_half16( tid * 2, src );\n"
347 "}\n" };
348
349 static const char *half_kernel_name[] = { "test_stream_write_half", "test_stream_write_half2", "test_stream_write_half4", "test_stream_write_half8", "test_stream_write_half16" };
350
351
352 const char *stream_write_long_kernel_code[] = {
353 "__kernel void test_stream_write_long(__global long *src, __global long *dst)\n"
354 "{\n"
355 " int tid = get_global_id(0);\n"
356 "\n"
357 " dst[tid] = src[tid];\n"
358 "}\n",
359
360 "__kernel void test_stream_write_long2(__global long2 *src, __global long2 *dst)\n"
361 "{\n"
362 " int tid = get_global_id(0);\n"
363 "\n"
364 " dst[tid] = src[tid];\n"
365 "}\n",
366
367 "__kernel void test_stream_write_long4(__global long4 *src, __global long4 *dst)\n"
368 "{\n"
369 " int tid = get_global_id(0);\n"
370 "\n"
371 " dst[tid] = src[tid];\n"
372 "}\n",
373
374 "__kernel void test_stream_write_long8(__global long8 *src, __global long8 *dst)\n"
375 "{\n"
376 " int tid = get_global_id(0);\n"
377 "\n"
378 " dst[tid] = src[tid];\n"
379 "}\n",
380
381 "__kernel void test_stream_write_long16(__global long16 *src, __global long16 *dst)\n"
382 "{\n"
383 " int tid = get_global_id(0);\n"
384 "\n"
385 " dst[tid] = src[tid];\n"
386 "}\n" };
387
388 static const char *long_kernel_name[] = { "test_stream_write_long", "test_stream_write_long2", "test_stream_write_long4", "test_stream_write_long8", "test_stream_write_long16" };
389
390
391 const char *stream_write_ulong_kernel_code[] = {
392 "__kernel void test_stream_write_ulong(__global ulong *src, __global ulong *dst)\n"
393 "{\n"
394 " int tid = get_global_id(0);\n"
395 "\n"
396 " dst[tid] = src[tid];\n"
397 "}\n",
398
399 "__kernel void test_stream_write_ulong2(__global ulong2 *src, __global ulong2 *dst)\n"
400 "{\n"
401 " int tid = get_global_id(0);\n"
402 "\n"
403 " dst[tid] = src[tid];\n"
404 "}\n",
405
406 "__kernel void test_stream_write_ulong4(__global ulong4 *src, __global ulong4 *dst)\n"
407 "{\n"
408 " int tid = get_global_id(0);\n"
409 "\n"
410 " dst[tid] = src[tid];\n"
411 "}\n",
412
413 "__kernel void test_stream_write_ulong8(__global ulong8 *src, __global ulong8 *dst)\n"
414 "{\n"
415 " int tid = get_global_id(0);\n"
416 "\n"
417 " dst[tid] = src[tid];\n"
418 "}\n",
419
420 "__kernel void test_stream_write_ulong16(__global ulong16 *src, __global ulong16 *dst)\n"
421 "{\n"
422 " int tid = get_global_id(0);\n"
423 "\n"
424 " dst[tid] = src[tid];\n"
425 "}\n" };
426
427 static const char *ulong_kernel_name[] = { "test_stream_write_ulong", "test_stream_write_ulong2", "test_stream_write_ulong4", "test_stream_write_ulong8", "test_stream_write_ulong16" };
428
429
430 static const char *stream_write_struct_kernel_code[] = {
431 "typedef struct{\n"
432 "int a;\n"
433 "float b;\n"
434 "} TestStruct;\n"
435 "__kernel void read_write_struct(__global TestStruct *src, __global TestStruct *dst)\n"
436 "{\n"
437 " int tid = get_global_id(0);\n"
438 "\n"
439 " dst[tid].a = src[tid].a;\n"
440 " dst[tid].b = src[tid].b;\n"
441 "}\n" };
442
443 static const char *struct_kernel_name[] = { "read_write_struct" };
444
445
verify_write_int(void * ptr1,void * ptr2,int n)446 static int verify_write_int( void *ptr1, void *ptr2, int n )
447 {
448 int i;
449 int *inptr = (int *)ptr1;
450 int *outptr = (int *)ptr2;
451
452 for (i=0; i<n; i++){
453 if( outptr[i] != inptr[i] )
454 return -1;
455 }
456
457 return 0;
458 }
459
460
verify_write_uint(void * ptr1,void * ptr2,int n)461 static int verify_write_uint( void *ptr1, void *ptr2, int n )
462 {
463 int i;
464 cl_uint *inptr = (cl_uint *)ptr1;
465 cl_uint *outptr = (cl_uint *)ptr2;
466
467 for (i=0; i<n; i++){
468 if( outptr[i] != inptr[i] )
469 return -1;
470 }
471
472 return 0;
473 }
474
475
verify_write_short(void * ptr1,void * ptr2,int n)476 static int verify_write_short( void *ptr1, void *ptr2, int n )
477 {
478 int i;
479 short *inptr = (short *)ptr1;
480 short *outptr = (short *)ptr2;
481
482 for (i=0; i<n; i++){
483 if( outptr[i] != inptr[i] )
484 return -1;
485 }
486
487 return 0;
488 }
489
490
verify_write_ushort(void * ptr1,void * ptr2,int n)491 static int verify_write_ushort( void *ptr1, void *ptr2, int n )
492 {
493 int i;
494 cl_ushort *inptr = (cl_ushort *)ptr1;
495 cl_ushort *outptr = (cl_ushort *)ptr2;
496
497 for (i=0; i<n; i++){
498 if( outptr[i] != inptr[i] )
499 return -1;
500 }
501
502 return 0;
503 }
504
505
verify_write_char(void * ptr1,void * ptr2,int n)506 static int verify_write_char( void *ptr1, void *ptr2, int n )
507 {
508 int i;
509 char *inptr = (char *)ptr1;
510 char *outptr = (char *)ptr2;
511
512 for (i=0; i<n; i++){
513 if( outptr[i] != inptr[i] )
514 return -1;
515 }
516
517 return 0;
518 }
519
520
verify_write_uchar(void * ptr1,void * ptr2,int n)521 static int verify_write_uchar( void *ptr1, void *ptr2, int n )
522 {
523 int i;
524 uchar *inptr = (uchar *)ptr1;
525 uchar *outptr = (uchar *)ptr2;
526
527 for (i=0; i<n; i++){
528 if( outptr[i] != inptr[i] )
529 return -1;
530 }
531
532 return 0;
533 }
534
535
verify_write_float(void * ptr1,void * ptr2,int n)536 static int verify_write_float( void *ptr1, void *ptr2, int n )
537 {
538 int i;
539 float *inptr = (float *)ptr1;
540 float *outptr = (float *)ptr2;
541
542 for (i=0; i<n; i++){
543 if( outptr[i] != inptr[i] )
544 return -1;
545 }
546
547 return 0;
548 }
549
550
verify_write_half(void * ptr1,void * ptr2,int n)551 static int verify_write_half( void *ptr1, void *ptr2, int n )
552 {
553 int i;
554 cl_half *inptr = (cl_half *)ptr1;
555 cl_half *outptr = (cl_half *)ptr2;
556
557 for( i = 0; i < n; i++ ){
558 if( outptr[i] != inptr[i] )
559 return -1;
560 }
561
562 return 0;
563 }
564
565
verify_write_long(void * ptr1,void * ptr2,int n)566 static int verify_write_long( void *ptr1, void *ptr2, int n )
567 {
568 int i;
569 cl_long *inptr = (cl_long *)ptr1;
570 cl_long *outptr = (cl_long *)ptr2;
571
572 for (i=0; i<n; i++){
573 if( outptr[i] != inptr[i] )
574 return -1;
575 }
576
577 return 0;
578 }
579
580
verify_write_ulong(void * ptr1,void * ptr2,int n)581 static int verify_write_ulong( void *ptr1, void *ptr2, int n )
582 {
583 int i;
584 cl_ulong *inptr = (cl_ulong *)ptr1;
585 cl_ulong *outptr = (cl_ulong *)ptr2;
586
587 for (i=0; i<n; i++){
588 if( outptr[i] != inptr[i] )
589 return -1;
590 }
591
592 return 0;
593 }
594
595
verify_write_struct(void * ptr1,void * ptr2,int n)596 static int verify_write_struct( void *ptr1, void *ptr2, int n )
597 {
598 int i;
599 TestStruct *inptr = (TestStruct *)ptr1;
600 TestStruct *outptr = (TestStruct *)ptr2;
601
602 for (i=0; i<n; i++){
603 if( ( outptr[i].a != inptr[i].a ) || ( outptr[i].b != outptr[i].b ) )
604 return -1;
605 }
606
607 return 0;
608 }
609
610
test_stream_write(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,size_t size,const char * type,int loops,void * inptr[5],const char * kernelCode[],const char * kernelName[],int (* fn)(void *,void *,int),MTdata d)611 int test_stream_write( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, size_t size, const char *type, int loops,
612 void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int), MTdata d )
613 {
614 cl_mem streams[10];
615 void *outptr[5];
616 cl_program program[5];
617 cl_kernel kernel[5];
618 cl_event writeEvent;
619 cl_ulong queueStart, submitStart, writeStart, writeEnd;
620 size_t ptrSizes[5], outPtrSizes[5];
621 size_t threads[1];
622 int err, err_count = 0;
623 int i, ii;
624
625 threads[0] = (size_t)num_elements;
626
627 ptrSizes[0] = size;
628 ptrSizes[1] = ptrSizes[0] << 1;
629 ptrSizes[2] = ptrSizes[1] << 1;
630 ptrSizes[3] = ptrSizes[2] << 1;
631 ptrSizes[4] = ptrSizes[3] << 1;
632
633 loops = ( loops < 5 ? loops : 5 );
634
635 for( i = 0; i < loops; i++ )
636 {
637 outPtrSizes[i] = ptrSizes[i];
638 }
639
640 for( i = 0; i < loops; i++ ){
641 ii = i << 1;
642 streams[ii] = clCreateBuffer(context, CL_MEM_READ_WRITE,
643 ptrSizes[i] * num_elements, NULL, &err);
644 if( ! streams[ii] ){
645 free( outptr[i] );
646 log_error( " clCreateBuffer failed\n" );
647 return -1;
648 }
649 if( ! strcmp( type, "half" ) ){
650 outptr[i] = malloc( outPtrSizes[i] * num_elements * 2 );
651 streams[ii + 1] =
652 clCreateBuffer(context, CL_MEM_READ_WRITE,
653 outPtrSizes[i] * 2 * num_elements, NULL, &err);
654 }
655 else{
656 outptr[i] = malloc( outPtrSizes[i] * num_elements );
657 streams[ii + 1] =
658 clCreateBuffer(context, CL_MEM_READ_WRITE,
659 outPtrSizes[i] * num_elements, NULL, &err);
660 }
661 if( ! streams[ii+1] ){
662 clReleaseMemObject(streams[ii]);
663 free( outptr[i] );
664 log_error( " clCreateBuffer failed\n" );
665 return -1;
666 }
667
668 err = clEnqueueWriteBuffer( queue, streams[ii], false, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &writeEvent );
669 if( err != CL_SUCCESS ){
670 clReleaseMemObject( streams[ii] );
671 clReleaseMemObject( streams[ii+1] );
672 free( outptr[i] );
673 print_error( err, " clWriteArray failed" );
674 return -1;
675 }
676
677 // This synchronization point is needed in order to assume the data is valid.
678 // Getting profiling information is not a synchronization point.
679 err = clWaitForEvents( 1, &writeEvent );
680 if( err != CL_SUCCESS )
681 {
682 print_error( err, "Unable to wait for event completion" );
683 clReleaseEvent(writeEvent);
684 clReleaseMemObject( streams[ii] );
685 clReleaseMemObject( streams[ii+1] );
686 free( outptr[i] );
687 return -1;
688 }
689
690 // test profiling
691 while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
692 CL_PROFILING_INFO_NOT_AVAILABLE );
693 if( err != CL_SUCCESS ){
694 print_error( err, "clGetEventProfilingInfo failed" );
695 clReleaseEvent(writeEvent);
696 clReleaseMemObject( streams[ii] );
697 clReleaseMemObject( streams[ii+1] );
698 free( outptr[i] );
699 return -1;
700 }
701
702 while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
703 CL_PROFILING_INFO_NOT_AVAILABLE );
704 if( err != CL_SUCCESS ){
705 print_error( err, "clGetEventProfilingInfo failed" );
706 clReleaseEvent(writeEvent);
707 clReleaseMemObject( streams[ii] );
708 clReleaseMemObject( streams[ii+1] );
709 free( outptr[i] );
710 return -1;
711 }
712
713 err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
714 if( err != CL_SUCCESS ){
715 print_error( err, "clGetEventProfilingInfo failed" );
716 clReleaseEvent(writeEvent);
717 clReleaseMemObject( streams[ii] );
718 clReleaseMemObject( streams[ii+1] );
719 free( outptr[i] );
720 return -1;
721 }
722
723 err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
724 if( err != CL_SUCCESS ){
725 print_error( err, "clGetEventProfilingInfo failed" );
726 clReleaseEvent(writeEvent);
727 clReleaseMemObject( streams[ii] );
728 clReleaseMemObject( streams[ii+1] );
729 free( outptr[i] );
730 return -1;
731 }
732
733
734 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
735 if( err ){
736 clReleaseEvent(writeEvent);
737 clReleaseMemObject(streams[ii]);
738 clReleaseMemObject(streams[ii+1]);
739 free( outptr[i] );
740 log_error( " Error creating program for %s\n", type );
741 return -1;
742 }
743
744 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[ii] );
745 err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&streams[ii+1] );
746 if (err != CL_SUCCESS){
747 clReleaseEvent(writeEvent);
748 clReleaseKernel( kernel[i] );
749 clReleaseProgram( program[i] );
750 clReleaseMemObject( streams[ii] );
751 clReleaseMemObject( streams[ii+1] );
752 free( outptr[i] );
753 print_error( err, " clSetKernelArg failed" );
754 return -1;
755 }
756
757 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
758
759 if( err != CL_SUCCESS ){
760 print_error( err, " clEnqueueNDRangeKernel failed" );
761 clReleaseEvent(writeEvent);
762 clReleaseKernel( kernel[i] );
763 clReleaseProgram( program[i] );
764 clReleaseMemObject( streams[ii] );
765 clReleaseMemObject( streams[ii+1] );
766 free( outptr[i] );
767 return -1;
768 }
769
770 if( ! strcmp( type, "half" ) ){
771 err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, outPtrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
772 }
773 else{
774 err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, outPtrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
775 }
776 if( err != CL_SUCCESS ){
777 clReleaseEvent(writeEvent);
778 clReleaseKernel( kernel[i] );
779 clReleaseProgram( program[i] );
780 clReleaseMemObject( streams[ii] );
781 clReleaseMemObject( streams[ii+1] );
782 free( outptr[i] );
783 print_error( err, " clEnqueueReadBuffer failed" );
784 return -1;
785 }
786
787 char *inP = (char *)inptr[i];
788 char *outP = (char *)outptr[i];
789 int err2 = 0;
790 for( size_t p = 0; p < (size_t)num_elements; p++ )
791 {
792 if( fn( inP, outP, (int)(ptrSizes[i] / ptrSizes[0]) ) )
793 {
794 log_error( " %s%d data failed to verify\n", type, 1<<i );
795 err2 = -1;
796 err_count++;
797 }
798 inP += ptrSizes[i];
799 outP += outPtrSizes[i];
800 }
801 if( !err2 )
802 {
803 log_info(" %s%d data verified\n", type, 1 << i);
804 }
805 err = err2;
806
807 if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
808 err_count++;
809
810 // cleanup
811 clReleaseEvent(writeEvent);
812 clReleaseKernel( kernel[i] );
813 clReleaseProgram( program[i] );
814 clReleaseMemObject( streams[ii] );
815 clReleaseMemObject( streams[ii+1] );
816 free( outptr[i] );
817 }
818
819 return err_count;
820
821 } // end test_stream_write()
822
823
test_write_array_int(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)824 int test_write_array_int( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
825 {
826 int *inptr[5];
827 size_t ptrSizes[5];
828 int i, j, err;
829 int (*foo)(void *,void *,int);
830 MTdata d = init_genrand( gRandomSeed );
831 foo = verify_write_int;
832
833 ptrSizes[0] = sizeof(cl_int);
834 ptrSizes[1] = ptrSizes[0] << 1;
835 ptrSizes[2] = ptrSizes[1] << 1;
836 ptrSizes[3] = ptrSizes[2] << 1;
837 ptrSizes[4] = ptrSizes[3] << 1;
838
839 for( i = 0; i < 5; i++ ){
840 inptr[i] = (int *)malloc(ptrSizes[i] * num_elements);
841
842 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
843 inptr[i][j] = genrand_int32(d);
844 }
845
846 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_int ), "int", 5, (void**)inptr,
847 stream_write_int_kernel_code, int_kernel_name, foo, d );
848
849 for( i = 0; i < 5; i++ ){
850 free( (void *)inptr[i] );
851 }
852
853 free_mtdata(d);
854
855 return err;
856
857 } // end write_int_array()
858
859
test_write_array_uint(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)860 int test_write_array_uint( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
861 {
862 cl_uint *inptr[5];
863 size_t ptrSizes[5];
864 int i, j, err;
865 int (*foo)(void *,void *,int);
866 MTdata d = init_genrand( gRandomSeed );
867 foo = verify_write_uint;
868
869 ptrSizes[0] = sizeof(cl_uint);
870 ptrSizes[1] = ptrSizes[0] << 1;
871 ptrSizes[2] = ptrSizes[1] << 1;
872 ptrSizes[3] = ptrSizes[2] << 1;
873 ptrSizes[4] = ptrSizes[3] << 1;
874
875 for( i = 0; i < 5; i++ ){
876 inptr[i] = (cl_uint *)malloc(ptrSizes[i] * num_elements);
877
878 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
879 inptr[i][j] = genrand_int32(d);
880 }
881
882 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_uint ), "uint", 5, (void **)inptr,
883 stream_write_uint_kernel_code, uint_kernel_name, foo, d );
884
885 for( i = 0; i < 5; i++ ){
886 free( (void *)inptr[i] );
887 }
888
889 free_mtdata(d);
890 return err;
891
892 } // end write_uint_array()
893
894
test_write_array_short(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)895 int test_write_array_short( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
896 {
897 short *inptr[5];
898 size_t ptrSizes[5];
899 int i, j, err;
900 int (*foo)(void *,void *,int);
901 MTdata d = init_genrand( gRandomSeed );
902 foo = verify_write_short;
903
904 ptrSizes[0] = sizeof(cl_short);
905 ptrSizes[1] = ptrSizes[0] << 1;
906 ptrSizes[2] = ptrSizes[1] << 1;
907 ptrSizes[3] = ptrSizes[2] << 1;
908 ptrSizes[4] = ptrSizes[3] << 1;
909
910 for( i = 0; i < 5; i++ ){
911 inptr[i] = (short *)malloc(ptrSizes[i] * num_elements);
912
913 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
914 inptr[i][j] = (short)genrand_int32(d);
915 }
916
917 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_short ), "short", 5, (void **)inptr,
918 stream_write_short_kernel_code, short_kernel_name, foo, d );
919
920 for( i = 0; i < 5; i++ ){
921 free( (void *)inptr[i] );
922 }
923
924 free_mtdata(d);
925 return err;
926
927 } // end write_short_array()
928
929
test_write_array_ushort(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)930 int test_write_array_ushort( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
931 {
932 cl_ushort *inptr[5];
933 size_t ptrSizes[5];
934 int i, j, err;
935 int (*foo)(void *,void *,int);
936 MTdata d = init_genrand( gRandomSeed );
937 foo = verify_write_ushort;
938
939 ptrSizes[0] = sizeof(cl_ushort);
940 ptrSizes[1] = ptrSizes[0] << 1;
941 ptrSizes[2] = ptrSizes[1] << 1;
942 ptrSizes[3] = ptrSizes[2] << 1;
943 ptrSizes[4] = ptrSizes[3] << 1;
944
945 for( i = 0; i < 5; i++ ){
946 inptr[i] = (cl_ushort *)malloc(ptrSizes[i] * num_elements);
947
948 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
949 inptr[i][j] = (cl_ushort)genrand_int32(d);
950 }
951
952 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_ushort ), "ushort", 5, (void **)inptr,
953 stream_write_ushort_kernel_code, ushort_kernel_name, foo, d );
954
955 for( i = 0; i < 5; i++ ){
956 free( (void *)inptr[i] );
957 }
958
959 free_mtdata(d);
960 return err;
961
962 } // end write_ushort_array()
963
964
test_write_array_char(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)965 int test_write_array_char( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
966 {
967 char *inptr[5];
968 size_t ptrSizes[5];
969 int i, j, err;
970 int (*foo)(void *,void *,int);
971 MTdata d = init_genrand( gRandomSeed );
972 foo = verify_write_char;
973
974 ptrSizes[0] = sizeof(cl_char);
975 ptrSizes[1] = ptrSizes[0] << 1;
976 ptrSizes[2] = ptrSizes[1] << 1;
977 ptrSizes[3] = ptrSizes[2] << 1;
978 ptrSizes[4] = ptrSizes[3] << 1;
979
980 for( i = 0; i < 5; i++ ){
981 inptr[i] = (char *)malloc(ptrSizes[i] * num_elements);
982
983 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
984 inptr[i][j] = (char)genrand_int32(d);
985 }
986
987 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_char ), "char", 5, (void **)inptr,
988 stream_write_char_kernel_code, char_kernel_name, foo, d );
989
990 for( i = 0; i < 5; i++ ){
991 free( (void *)inptr[i] );
992 }
993
994 free_mtdata(d);
995 return err;
996
997 } // end write_char_array()
998
999
test_write_array_uchar(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1000 int test_write_array_uchar( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1001 {
1002 uchar *inptr[5];
1003 size_t ptrSizes[5];
1004 int i, j, err;
1005 int (*foo)(void *,void *,int);
1006 MTdata d = init_genrand( gRandomSeed );
1007 foo = verify_write_uchar;
1008
1009 ptrSizes[0] = sizeof(cl_uchar);
1010 ptrSizes[1] = ptrSizes[0] << 1;
1011 ptrSizes[2] = ptrSizes[1] << 1;
1012 ptrSizes[3] = ptrSizes[2] << 1;
1013 ptrSizes[4] = ptrSizes[3] << 1;
1014
1015 for( i = 0; i < 5; i++ ){
1016 inptr[i] = (uchar *)malloc(ptrSizes[i] * num_elements);
1017
1018 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1019 inptr[i][j] = (uchar)genrand_int32(d);
1020 }
1021
1022 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_uchar ), "uchar", 5, (void **)inptr,
1023 stream_write_uchar_kernel_code, uchar_kernel_name, foo, d );
1024
1025 for( i = 0; i < 5; i++ ){
1026 free( (void *)inptr[i] );
1027 }
1028
1029 free_mtdata(d);
1030 return err;
1031
1032 } // end write_uchar_array()
1033
1034
test_write_array_float(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1035 int test_write_array_float( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1036 {
1037 float *inptr[5];
1038 size_t ptrSizes[5];
1039 int i, j, err;
1040 int (*foo)(void *,void *,int);
1041 MTdata d = init_genrand( gRandomSeed );
1042 foo = verify_write_float;
1043
1044 ptrSizes[0] = sizeof(cl_float);
1045 ptrSizes[1] = ptrSizes[0] << 1;
1046 ptrSizes[2] = ptrSizes[1] << 1;
1047 ptrSizes[3] = ptrSizes[2] << 1;
1048 ptrSizes[4] = ptrSizes[3] << 1;
1049
1050 for( i = 0; i < 5; i++ ){
1051 inptr[i] = (float *)malloc(ptrSizes[i] * num_elements);
1052
1053 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1054 inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1055 }
1056
1057 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_float ), "float", 5, (void **)inptr,
1058 stream_write_float_kernel_code, float_kernel_name, foo, d );
1059
1060 for( i = 0; i < 5; i++ ){
1061 free( (void *)inptr[i] );
1062 }
1063
1064 free_mtdata(d);
1065 return err;
1066
1067 } // end write_float_array()
1068
1069
test_write_array_half(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1070 int test_write_array_half( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1071 {
1072 float *inptr[5];
1073 size_t ptrSizes[5];
1074 int i, j, err;
1075 int (*foo)(void *,void *,int);
1076 MTdata d = init_genrand( gRandomSeed );
1077 foo = verify_write_half;
1078
1079 ptrSizes[0] = sizeof( cl_half );
1080 ptrSizes[1] = ptrSizes[0] << 1;
1081 ptrSizes[2] = ptrSizes[1] << 1;
1082 ptrSizes[3] = ptrSizes[2] << 1;
1083 ptrSizes[4] = ptrSizes[3] << 1;
1084
1085 for( i = 0; i < 5; i++ ){
1086 inptr[i] = (float *)malloc(ptrSizes[i] * num_elements);
1087
1088 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ( ptrSizes[0] * 2 ); j++ )
1089 inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1090 }
1091
1092 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_half ), "half", 5, (void **)inptr,
1093 stream_write_half_kernel_code, half_kernel_name, foo, d );
1094
1095 for( i = 0; i < 5; i++ ){
1096 free( (void *)inptr[i] );
1097 }
1098
1099 free_mtdata(d);
1100 return err;
1101
1102 } // end write_half_array()
1103
1104
test_write_array_long(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1105 int test_write_array_long( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1106 {
1107 cl_long *inptr[5];
1108 size_t ptrSizes[5];
1109 int i, j, err;
1110 int (*foo)(void *,void *,int);
1111 MTdata d = init_genrand( gRandomSeed );
1112 foo = verify_write_long;
1113
1114 if (!gHasLong)
1115 {
1116 log_info("write_long_array: Long types unsupported, skipping.");
1117 return CL_SUCCESS;
1118 }
1119
1120 ptrSizes[0] = sizeof(cl_long);
1121 ptrSizes[1] = ptrSizes[0] << 1;
1122 ptrSizes[2] = ptrSizes[1] << 1;
1123 ptrSizes[3] = ptrSizes[2] << 1;
1124 ptrSizes[4] = ptrSizes[3] << 1;
1125
1126 for( i = 0; i < 5; i++ ){
1127 inptr[i] = (cl_long *)malloc(ptrSizes[i] * num_elements);
1128
1129 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1130 inptr[i][j] = (cl_long) genrand_int32(d) ^ ((cl_long) genrand_int32(d) << 32);
1131 }
1132
1133 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_long ), "cl_long", 5, (void **)inptr,
1134 stream_write_long_kernel_code, long_kernel_name, foo, d );
1135
1136 for( i = 0; i < 5; i++ ){
1137 free( (void *)inptr[i] );
1138 }
1139
1140 free_mtdata(d);
1141 return err;
1142
1143 } // end write_long_array()
1144
1145
test_write_array_ulong(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1146 int test_write_array_ulong( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1147 {
1148 cl_ulong *inptr[5];
1149 size_t ptrSizes[5];
1150 int i, j, err;
1151 int (*foo)(void *,void *,int);
1152 MTdata d = init_genrand( gRandomSeed );
1153 foo = verify_write_ulong;
1154
1155 if (!gHasLong)
1156 {
1157 log_info("write_long_array: Long types unsupported, skipping.");
1158 return CL_SUCCESS;
1159 }
1160
1161 ptrSizes[0] = sizeof(cl_ulong);
1162 ptrSizes[1] = ptrSizes[0] << 1;
1163 ptrSizes[2] = ptrSizes[1] << 1;
1164 ptrSizes[3] = ptrSizes[2] << 1;
1165 ptrSizes[4] = ptrSizes[3] << 1;
1166
1167 for( i = 0; i < 5; i++ ){
1168 inptr[i] = (cl_ulong *)malloc(ptrSizes[i] * num_elements);
1169
1170 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1171 inptr[i][j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
1172 }
1173
1174 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_ulong ), "ulong long", 5, (void **)inptr,
1175 stream_write_ulong_kernel_code, ulong_kernel_name, foo, d );
1176
1177 for( i = 0; i < 5; i++ ){
1178 free( (void *)inptr[i] );
1179 }
1180
1181 free_mtdata(d);
1182 return err;
1183
1184 } // end write_ulong_array()
1185
1186
test_write_array_struct(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1187 int test_write_array_struct( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1188 {
1189 TestStruct *inptr[1];
1190 size_t ptrSizes[1];
1191 int j, err;
1192 int (*foo)(void *,void *,int);
1193 MTdata d = init_genrand( gRandomSeed );
1194 foo = verify_write_struct;
1195
1196 ptrSizes[0] = sizeof( TestStruct );
1197
1198 inptr[0] = (TestStruct *)malloc( ptrSizes[0] * num_elements );
1199
1200 for( j = 0; (unsigned int)j < ptrSizes[0] * num_elements / ptrSizes[0]; j++ ){
1201 inptr[0][j].a = (int)genrand_int32(d);
1202 inptr[0][j].b = get_random_float( 0.f, 1.844674407370954e+19f, d );
1203 }
1204
1205 err = test_stream_write( device, context, queue, num_elements, sizeof( TestStruct ), "struct", 1, (void **)inptr,
1206 stream_write_struct_kernel_code, struct_kernel_name, foo, d );
1207
1208 free( (void *)inptr[0] );
1209
1210 free_mtdata(d);
1211 return err;
1212
1213 } // end write_struct_array()
1214
1215
1216
1217
1218
1219