xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/profiling/writeArray.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 #include "harness/testHarness.h"
25 #include "harness/errorHelpers.h"
26 #include "harness/conversions.h"
27 
28 #ifndef uchar
29 typedef unsigned char uchar;
30 #endif
31 
32 #ifndef TestStruct
33 typedef struct{
34     int        a;
35     float    b;
36 } TestStruct;
37 #endif
38 
39 const char *stream_write_int_kernel_code[] = {
40     "__kernel void test_stream_write_int(__global int *src, __global int *dst)\n"
41     "{\n"
42     "    int  tid = get_global_id(0);\n"
43     "\n"
44     "    dst[tid] = src[tid];\n"
45     "}\n",
46 
47     "__kernel void test_stream_write_int2(__global int2 *src, __global int2 *dst)\n"
48     "{\n"
49     "    int  tid = get_global_id(0);\n"
50     "\n"
51     "    dst[tid] = src[tid];\n"
52     "}\n",
53 
54     "__kernel void test_stream_write_int4(__global int4 *src, __global int4 *dst)\n"
55     "{\n"
56     "    int  tid = get_global_id(0);\n"
57     "\n"
58     "    dst[tid] = src[tid];\n"
59     "}\n",
60 
61     "__kernel void test_stream_write_int8(__global int8 *src, __global int8 *dst)\n"
62     "{\n"
63     "    int  tid = get_global_id(0);\n"
64     "\n"
65     "    dst[tid] = src[tid];\n"
66     "}\n",
67 
68     "__kernel void test_stream_write_int16(__global int16 *src, __global int16 *dst)\n"
69     "{\n"
70     "    int  tid = get_global_id(0);\n"
71     "\n"
72     "    dst[tid] = src[tid];\n"
73     "}\n" };
74 
75 static const char *int_kernel_name[] = { "test_stream_write_int", "test_stream_write_int2", "test_stream_write_int4", "test_stream_write_int8", "test_stream_write_int16" };
76 
77 
78 const char *stream_write_uint_kernel_code[] = {
79     "__kernel void test_stream_write_uint(__global uint *src, __global uint *dst)\n"
80     "{\n"
81     "    int  tid = get_global_id(0);\n"
82     "\n"
83     "    dst[tid] = src[tid];\n"
84     "}\n",
85 
86     "__kernel void test_stream_write_uint2(__global uint2 *src, __global uint2 *dst)\n"
87     "{\n"
88     "    int  tid = get_global_id(0);\n"
89     "\n"
90     "    dst[tid] = src[tid];\n"
91     "}\n",
92 
93     "__kernel void test_stream_write_uint4(__global uint4 *src, __global uint4 *dst)\n"
94     "{\n"
95     "    int  tid = get_global_id(0);\n"
96     "\n"
97     "    dst[tid] = src[tid];\n"
98     "}\n",
99 
100     "__kernel void test_stream_write_uint8(__global uint8 *src, __global uint8 *dst)\n"
101     "{\n"
102     "    int  tid = get_global_id(0);\n"
103     "\n"
104     "    dst[tid] = src[tid];\n"
105     "}\n",
106 
107     "__kernel void test_stream_write_uint16(__global uint16 *src, __global uint16 *dst)\n"
108     "{\n"
109     "    int  tid = get_global_id(0);\n"
110     "\n"
111     "    dst[tid] = src[tid];\n"
112     "}\n" };
113 
114 static const char *uint_kernel_name[] = { "test_stream_write_uint", "test_stream_write_uint2", "test_stream_write_uint4", "test_stream_write_uint8", "test_stream_write_uint16" };
115 
116 
117 const char *stream_write_ushort_kernel_code[] = {
118     "__kernel void test_stream_write_ushort(__global ushort *src, __global ushort *dst)\n"
119     "{\n"
120     "    int  tid = get_global_id(0);\n"
121     "\n"
122     "    dst[tid] = src[tid];\n"
123     "}\n",
124 
125     "__kernel void test_stream_write_ushort2(__global ushort2 *src, __global ushort2 *dst)\n"
126     "{\n"
127     "    int  tid = get_global_id(0);\n"
128     "\n"
129     "    dst[tid] = src[tid];\n"
130     "}\n",
131 
132     "__kernel void test_stream_write_ushort4(__global ushort4 *src, __global ushort4 *dst)\n"
133     "{\n"
134     "    int  tid = get_global_id(0);\n"
135     "\n"
136     "    dst[tid] = src[tid];\n"
137     "}\n",
138 
139     "__kernel void test_stream_write_ushort8(__global ushort8 *src, __global ushort8 *dst)\n"
140     "{\n"
141     "    int  tid = get_global_id(0);\n"
142     "\n"
143     "    dst[tid] = src[tid];\n"
144     "}\n",
145 
146     "__kernel void test_stream_write_ushort16(__global ushort16 *src, __global ushort16 *dst)\n"
147     "{\n"
148     "    int  tid = get_global_id(0);\n"
149     "\n"
150     "    dst[tid] = src[tid];\n"
151     "}\n" };
152 
153 static const char *ushort_kernel_name[] = { "test_stream_write_ushort", "test_stream_write_ushort2", "test_stream_write_ushort4", "test_stream_write_ushort8", "test_stream_write_ushort16" };
154 
155 
156 
157 const char *stream_write_short_kernel_code[] = {
158     "__kernel void test_stream_write_short(__global short *src, __global short *dst)\n"
159     "{\n"
160     "    int  tid = get_global_id(0);\n"
161     "\n"
162     "    dst[tid] = src[tid];\n"
163     "}\n",
164 
165     "__kernel void test_stream_write_short2(__global short2 *src, __global short2 *dst)\n"
166     "{\n"
167     "    int  tid = get_global_id(0);\n"
168     "\n"
169     "    dst[tid] = src[tid];\n"
170     "}\n",
171 
172     "__kernel void test_stream_write_short4(__global short4 *src, __global short4 *dst)\n"
173     "{\n"
174     "    int  tid = get_global_id(0);\n"
175     "\n"
176     "    dst[tid] = src[tid];\n"
177     "}\n",
178 
179     "__kernel void test_stream_write_short8(__global short8 *src, __global short8 *dst)\n"
180     "{\n"
181     "    int  tid = get_global_id(0);\n"
182     "\n"
183     "    dst[tid] = src[tid];\n"
184     "}\n",
185 
186     "__kernel void test_stream_write_short16(__global short16 *src, __global short16 *dst)\n"
187     "{\n"
188     "    int  tid = get_global_id(0);\n"
189     "\n"
190     "    dst[tid] = src[tid];\n"
191     "}\n" };
192 
193 static const char *short_kernel_name[] = { "test_stream_write_short", "test_stream_write_short2", "test_stream_write_short4", "test_stream_write_short8", "test_stream_write_short16" };
194 
195 
196 const char *stream_write_char_kernel_code[] = {
197     "__kernel void test_stream_write_char(__global char *src, __global char *dst)\n"
198     "{\n"
199     "    int  tid = get_global_id(0);\n"
200     "\n"
201     "    dst[tid] = src[tid];\n"
202     "}\n",
203 
204     "__kernel void test_stream_write_char2(__global char2 *src, __global char2 *dst)\n"
205     "{\n"
206     "    int  tid = get_global_id(0);\n"
207     "\n"
208     "    dst[tid] = src[tid];\n"
209     "}\n",
210 
211     "__kernel void test_stream_write_char4(__global char4 *src, __global char4 *dst)\n"
212     "{\n"
213     "    int  tid = get_global_id(0);\n"
214     "\n"
215     "    dst[tid] = src[tid];\n"
216     "}\n",
217 
218     "__kernel void test_stream_write_char8(__global char8 *src, __global char8 *dst)\n"
219     "{\n"
220     "    int  tid = get_global_id(0);\n"
221     "\n"
222     "    dst[tid] = src[tid];\n"
223     "}\n",
224 
225     "__kernel void test_stream_write_char16(__global char16 *src, __global char16 *dst)\n"
226     "{\n"
227     "    int  tid = get_global_id(0);\n"
228     "\n"
229     "    dst[tid] = src[tid];\n"
230     "}\n" };
231 
232 static const char *char_kernel_name[] = { "test_stream_write_char", "test_stream_write_char2", "test_stream_write_char4", "test_stream_write_char8", "test_stream_write_char16" };
233 
234 
235 const char *stream_write_uchar_kernel_code[] = {
236     "__kernel void test_stream_write_uchar(__global uchar *src, __global uchar *dst)\n"
237     "{\n"
238     "    int  tid = get_global_id(0);\n"
239     "\n"
240     "    dst[tid] = src[tid];\n"
241     "}\n",
242 
243     "__kernel void test_stream_write_uchar2(__global uchar2 *src, __global uchar2 *dst)\n"
244     "{\n"
245     "    int  tid = get_global_id(0);\n"
246     "\n"
247     "    dst[tid] = src[tid];\n"
248     "}\n",
249 
250     "__kernel void test_stream_write_uchar4(__global uchar4 *src, __global uchar4 *dst)\n"
251     "{\n"
252     "    int  tid = get_global_id(0);\n"
253     "\n"
254     "    dst[tid] = src[tid];\n"
255     "}\n",
256 
257     "__kernel void test_stream_write_uchar8(__global uchar8 *src, __global uchar8 *dst)\n"
258     "{\n"
259     "    int  tid = get_global_id(0);\n"
260     "\n"
261     "    dst[tid] = src[tid];\n"
262     "}\n",
263 
264     "__kernel void test_stream_write_uchar16(__global uchar16 *src, __global uchar16 *dst)\n"
265     "{\n"
266     "    int  tid = get_global_id(0);\n"
267     "\n"
268     "    dst[tid] = src[tid];\n"
269     "}\n" };
270 
271 static const char *uchar_kernel_name[] = { "test_stream_write_uchar", "test_stream_write_uchar2", "test_stream_write_uchar4", "test_stream_write_uchar8", "test_stream_write_uchar16" };
272 
273 
274 const char *stream_write_float_kernel_code[] = {
275     "__kernel void test_stream_write_float(__global float *src, __global float *dst)\n"
276     "{\n"
277     "    int  tid = get_global_id(0);\n"
278     "\n"
279     "    dst[tid] = src[tid];\n"
280     "}\n",
281 
282     "__kernel void test_stream_write_float2(__global float2 *src, __global float2 *dst)\n"
283     "{\n"
284     "    int  tid = get_global_id(0);\n"
285     "\n"
286     "    dst[tid] = src[tid];\n"
287     "}\n",
288 
289     "__kernel void test_stream_write_float4(__global float4 *src, __global float4 *dst)\n"
290     "{\n"
291     "    int  tid = get_global_id(0);\n"
292     "\n"
293     "    dst[tid] = src[tid];\n"
294     "}\n",
295 
296     "__kernel void test_stream_write_float8(__global float8 *src, __global float8 *dst)\n"
297     "{\n"
298     "    int  tid = get_global_id(0);\n"
299     "\n"
300     "    dst[tid] = src[tid];\n"
301     "}\n",
302 
303     "__kernel void test_stream_write_float16(__global float16 *src, __global float16 *dst)\n"
304     "{\n"
305     "    int  tid = get_global_id(0);\n"
306     "\n"
307     "    dst[tid] = src[tid];\n"
308     "}\n" };
309 
310 static const char *float_kernel_name[] = { "test_stream_write_float", "test_stream_write_float2", "test_stream_write_float4", "test_stream_write_float8", "test_stream_write_float16" };
311 
312 
313 const char *stream_write_half_kernel_code[] = {
314     "__kernel void test_stream_write_half(__global half *src, __global float *dst)\n"
315     "{\n"
316     "    int  tid = get_global_id(0);\n"
317     "\n"
318     "    dst[tid] = vload_half( tid * 2, src );\n"
319     "}\n",
320 
321     "__kernel void test_stream_write_half2(__global half2 *src, __global float2 *dst)\n"
322     "{\n"
323     "    int  tid = get_global_id(0);\n"
324     "\n"
325     "    dst[tid] = vload_half2( tid * 2, src );\n"
326     "}\n",
327 
328     "__kernel void test_stream_write_half4(__global half4 *src, __global float4 *dst)\n"
329     "{\n"
330     "    int  tid = get_global_id(0);\n"
331     "\n"
332     "    dst[tid] = vload_half4( tid * 2, src );\n"
333     "}\n",
334 
335     "__kernel void test_stream_write_half8(__global half8 *src, __global float8 *dst)\n"
336     "{\n"
337     "    int  tid = get_global_id(0);\n"
338     "\n"
339     "    dst[tid] = vload_half8( tid * 2, src );\n"
340     "}\n",
341 
342     "__kernel void test_stream_write_half16(__global half16 *src, __global float16 *dst)\n"
343     "{\n"
344     "    int  tid = get_global_id(0);\n"
345     "\n"
346     "    dst[tid] = vload_half16( tid * 2, src );\n"
347     "}\n" };
348 
349 static const char *half_kernel_name[] = { "test_stream_write_half", "test_stream_write_half2", "test_stream_write_half4", "test_stream_write_half8", "test_stream_write_half16" };
350 
351 
352 const char *stream_write_long_kernel_code[] = {
353     "__kernel void test_stream_write_long(__global long *src, __global long *dst)\n"
354     "{\n"
355     "    int  tid = get_global_id(0);\n"
356     "\n"
357     "    dst[tid] = src[tid];\n"
358     "}\n",
359 
360     "__kernel void test_stream_write_long2(__global long2 *src, __global long2 *dst)\n"
361     "{\n"
362     "    int  tid = get_global_id(0);\n"
363     "\n"
364     "    dst[tid] = src[tid];\n"
365     "}\n",
366 
367     "__kernel void test_stream_write_long4(__global long4 *src, __global long4 *dst)\n"
368     "{\n"
369     "    int  tid = get_global_id(0);\n"
370     "\n"
371     "    dst[tid] = src[tid];\n"
372     "}\n",
373 
374     "__kernel void test_stream_write_long8(__global long8 *src, __global long8 *dst)\n"
375     "{\n"
376     "    int  tid = get_global_id(0);\n"
377     "\n"
378     "    dst[tid] = src[tid];\n"
379     "}\n",
380 
381     "__kernel void test_stream_write_long16(__global long16 *src, __global long16 *dst)\n"
382     "{\n"
383     "    int  tid = get_global_id(0);\n"
384     "\n"
385     "    dst[tid] = src[tid];\n"
386     "}\n" };
387 
388 static const char *long_kernel_name[] = { "test_stream_write_long", "test_stream_write_long2", "test_stream_write_long4", "test_stream_write_long8", "test_stream_write_long16" };
389 
390 
391 const char *stream_write_ulong_kernel_code[] = {
392     "__kernel void test_stream_write_ulong(__global ulong *src, __global ulong *dst)\n"
393     "{\n"
394     "    int  tid = get_global_id(0);\n"
395     "\n"
396     "    dst[tid] = src[tid];\n"
397     "}\n",
398 
399     "__kernel void test_stream_write_ulong2(__global ulong2 *src, __global ulong2 *dst)\n"
400     "{\n"
401     "    int  tid = get_global_id(0);\n"
402     "\n"
403     "    dst[tid] = src[tid];\n"
404     "}\n",
405 
406     "__kernel void test_stream_write_ulong4(__global ulong4 *src, __global ulong4 *dst)\n"
407     "{\n"
408     "    int  tid = get_global_id(0);\n"
409     "\n"
410     "    dst[tid] = src[tid];\n"
411     "}\n",
412 
413     "__kernel void test_stream_write_ulong8(__global ulong8 *src, __global ulong8 *dst)\n"
414     "{\n"
415     "    int  tid = get_global_id(0);\n"
416     "\n"
417     "    dst[tid] = src[tid];\n"
418     "}\n",
419 
420     "__kernel void test_stream_write_ulong16(__global ulong16 *src, __global ulong16 *dst)\n"
421     "{\n"
422     "    int  tid = get_global_id(0);\n"
423     "\n"
424     "    dst[tid] = src[tid];\n"
425     "}\n" };
426 
427 static const char *ulong_kernel_name[] = { "test_stream_write_ulong", "test_stream_write_ulong2", "test_stream_write_ulong4", "test_stream_write_ulong8", "test_stream_write_ulong16" };
428 
429 
430 static const char *stream_write_struct_kernel_code[] = {
431     "typedef struct{\n"
432     "int    a;\n"
433     "float    b;\n"
434     "} TestStruct;\n"
435     "__kernel void read_write_struct(__global TestStruct *src, __global TestStruct *dst)\n"
436     "{\n"
437     "    int  tid = get_global_id(0);\n"
438     "\n"
439     "    dst[tid].a = src[tid].a;\n"
440     "     dst[tid].b = src[tid].b;\n"
441     "}\n" };
442 
443 static const char *struct_kernel_name[] = { "read_write_struct" };
444 
445 
verify_write_int(void * ptr1,void * ptr2,int n)446 static int verify_write_int( void *ptr1, void *ptr2, int n )
447 {
448     int        i;
449     int        *inptr = (int *)ptr1;
450     int        *outptr = (int *)ptr2;
451 
452     for (i=0; i<n; i++){
453         if( outptr[i] != inptr[i] )
454             return -1;
455     }
456 
457     return 0;
458 }
459 
460 
verify_write_uint(void * ptr1,void * ptr2,int n)461 static int verify_write_uint( void *ptr1, void *ptr2, int n )
462 {
463     int        i;
464     cl_uint    *inptr = (cl_uint *)ptr1;
465     cl_uint    *outptr = (cl_uint *)ptr2;
466 
467     for (i=0; i<n; i++){
468         if( outptr[i] != inptr[i] )
469             return -1;
470     }
471 
472     return 0;
473 }
474 
475 
verify_write_short(void * ptr1,void * ptr2,int n)476 static int verify_write_short( void *ptr1, void *ptr2, int n )
477 {
478     int        i;
479     short    *inptr = (short *)ptr1;
480     short    *outptr = (short *)ptr2;
481 
482     for (i=0; i<n; i++){
483         if( outptr[i] != inptr[i] )
484             return -1;
485     }
486 
487     return 0;
488 }
489 
490 
verify_write_ushort(void * ptr1,void * ptr2,int n)491 static int verify_write_ushort( void *ptr1, void *ptr2, int n )
492 {
493     int        i;
494     cl_ushort    *inptr = (cl_ushort *)ptr1;
495     cl_ushort    *outptr = (cl_ushort *)ptr2;
496 
497     for (i=0; i<n; i++){
498         if( outptr[i] != inptr[i] )
499             return -1;
500     }
501 
502     return 0;
503 }
504 
505 
verify_write_char(void * ptr1,void * ptr2,int n)506 static int verify_write_char( void *ptr1, void *ptr2, int n )
507 {
508     int        i;
509     char    *inptr = (char *)ptr1;
510     char    *outptr = (char *)ptr2;
511 
512     for (i=0; i<n; i++){
513         if( outptr[i] != inptr[i] )
514             return -1;
515     }
516 
517     return 0;
518 }
519 
520 
verify_write_uchar(void * ptr1,void * ptr2,int n)521 static int verify_write_uchar( void *ptr1, void *ptr2, int n )
522 {
523     int        i;
524     uchar    *inptr = (uchar *)ptr1;
525     uchar    *outptr = (uchar *)ptr2;
526 
527     for (i=0; i<n; i++){
528         if( outptr[i] != inptr[i] )
529             return -1;
530     }
531 
532     return 0;
533 }
534 
535 
verify_write_float(void * ptr1,void * ptr2,int n)536 static int verify_write_float( void *ptr1, void *ptr2, int n )
537 {
538     int        i;
539     float    *inptr = (float *)ptr1;
540     float    *outptr = (float *)ptr2;
541 
542     for (i=0; i<n; i++){
543         if( outptr[i] != inptr[i] )
544             return -1;
545     }
546 
547     return 0;
548 }
549 
550 
verify_write_half(void * ptr1,void * ptr2,int n)551 static int verify_write_half( void *ptr1, void *ptr2, int n )
552 {
553     int        i;
554     cl_half *inptr = (cl_half *)ptr1;
555     cl_half *outptr = (cl_half *)ptr2;
556 
557     for( i = 0; i < n; i++ ){
558         if( outptr[i] != inptr[i] )
559             return -1;
560     }
561 
562     return 0;
563 }
564 
565 
verify_write_long(void * ptr1,void * ptr2,int n)566 static int verify_write_long( void *ptr1, void *ptr2, int n )
567 {
568     int        i;
569     cl_long    *inptr = (cl_long *)ptr1;
570     cl_long    *outptr = (cl_long *)ptr2;
571 
572     for (i=0; i<n; i++){
573         if( outptr[i] != inptr[i] )
574             return -1;
575     }
576 
577     return 0;
578 }
579 
580 
verify_write_ulong(void * ptr1,void * ptr2,int n)581 static int verify_write_ulong( void *ptr1, void *ptr2, int n )
582 {
583     int        i;
584     cl_ulong    *inptr = (cl_ulong *)ptr1;
585     cl_ulong    *outptr = (cl_ulong *)ptr2;
586 
587     for (i=0; i<n; i++){
588         if( outptr[i] != inptr[i] )
589             return -1;
590     }
591 
592     return 0;
593 }
594 
595 
verify_write_struct(void * ptr1,void * ptr2,int n)596 static int verify_write_struct( void *ptr1, void *ptr2, int n )
597 {
598     int            i;
599     TestStruct    *inptr = (TestStruct *)ptr1;
600     TestStruct    *outptr = (TestStruct *)ptr2;
601 
602     for (i=0; i<n; i++){
603         if( ( outptr[i].a != inptr[i].a ) || ( outptr[i].b != outptr[i].b ) )
604             return -1;
605     }
606 
607     return 0;
608 }
609 
610 
test_stream_write(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,size_t size,const char * type,int loops,void * inptr[5],const char * kernelCode[],const char * kernelName[],int (* fn)(void *,void *,int),MTdata d)611 int test_stream_write( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, size_t size, const char *type, int loops,
612                       void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int), MTdata d )
613 {
614     cl_mem            streams[10];
615     void            *outptr[5];
616     cl_program        program[5];
617     cl_kernel        kernel[5];
618     cl_event        writeEvent;
619     cl_ulong    queueStart, submitStart, writeStart, writeEnd;
620     size_t            ptrSizes[5], outPtrSizes[5];
621     size_t            threads[1];
622     int                err, err_count = 0;
623     int                i, ii;
624 
625     threads[0] = (size_t)num_elements;
626 
627     ptrSizes[0] = size;
628     ptrSizes[1] = ptrSizes[0] << 1;
629     ptrSizes[2] = ptrSizes[1] << 1;
630     ptrSizes[3] = ptrSizes[2] << 1;
631     ptrSizes[4] = ptrSizes[3] << 1;
632 
633     loops = ( loops < 5 ? loops : 5 );
634 
635     for( i = 0; i < loops; i++ )
636     {
637         outPtrSizes[i] = ptrSizes[i];
638     }
639 
640     for( i = 0; i < loops; i++ ){
641         ii = i << 1;
642         streams[ii] = clCreateBuffer(context, CL_MEM_READ_WRITE,
643                                      ptrSizes[i] * num_elements, NULL, &err);
644         if( ! streams[ii] ){
645             free( outptr[i] );
646             log_error( " clCreateBuffer failed\n" );
647             return -1;
648         }
649         if( ! strcmp( type, "half" ) ){
650             outptr[i] = malloc( outPtrSizes[i] * num_elements * 2 );
651             streams[ii + 1] =
652                 clCreateBuffer(context, CL_MEM_READ_WRITE,
653                                outPtrSizes[i] * 2 * num_elements, NULL, &err);
654         }
655         else{
656             outptr[i] = malloc( outPtrSizes[i] * num_elements );
657             streams[ii + 1] =
658                 clCreateBuffer(context, CL_MEM_READ_WRITE,
659                                outPtrSizes[i] * num_elements, NULL, &err);
660         }
661         if( ! streams[ii+1] ){
662             clReleaseMemObject(streams[ii]);
663             free( outptr[i] );
664             log_error( " clCreateBuffer failed\n" );
665             return -1;
666         }
667 
668         err = clEnqueueWriteBuffer( queue, streams[ii], false, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &writeEvent );
669         if( err != CL_SUCCESS ){
670             clReleaseMemObject( streams[ii] );
671             clReleaseMemObject( streams[ii+1] );
672             free( outptr[i] );
673             print_error( err, " clWriteArray failed" );
674             return -1;
675         }
676 
677         // This synchronization point is needed in order to assume the data is valid.
678         // Getting profiling information is not a synchronization point.
679         err = clWaitForEvents( 1, &writeEvent );
680         if( err != CL_SUCCESS )
681         {
682             print_error( err, "Unable to wait for event completion" );
683             clReleaseEvent(writeEvent);
684             clReleaseMemObject( streams[ii] );
685             clReleaseMemObject( streams[ii+1] );
686             free( outptr[i] );
687             return -1;
688         }
689 
690         // test profiling
691         while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
692               CL_PROFILING_INFO_NOT_AVAILABLE );
693         if( err != CL_SUCCESS ){
694             print_error( err, "clGetEventProfilingInfo failed" );
695             clReleaseEvent(writeEvent);
696             clReleaseMemObject( streams[ii] );
697             clReleaseMemObject( streams[ii+1] );
698             free( outptr[i] );
699             return -1;
700         }
701 
702         while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
703               CL_PROFILING_INFO_NOT_AVAILABLE );
704         if( err != CL_SUCCESS ){
705             print_error( err, "clGetEventProfilingInfo failed" );
706             clReleaseEvent(writeEvent);
707             clReleaseMemObject( streams[ii] );
708             clReleaseMemObject( streams[ii+1] );
709             free( outptr[i] );
710             return -1;
711         }
712 
713         err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
714         if( err != CL_SUCCESS ){
715             print_error( err, "clGetEventProfilingInfo failed" );
716             clReleaseEvent(writeEvent);
717             clReleaseMemObject( streams[ii] );
718             clReleaseMemObject( streams[ii+1] );
719             free( outptr[i] );
720             return -1;
721         }
722 
723         err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
724         if( err != CL_SUCCESS ){
725             print_error( err, "clGetEventProfilingInfo failed" );
726             clReleaseEvent(writeEvent);
727             clReleaseMemObject( streams[ii] );
728             clReleaseMemObject( streams[ii+1] );
729             free( outptr[i] );
730             return -1;
731         }
732 
733 
734         err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
735         if( err ){
736             clReleaseEvent(writeEvent);
737             clReleaseMemObject(streams[ii]);
738             clReleaseMemObject(streams[ii+1]);
739             free( outptr[i] );
740             log_error( " Error creating program for %s\n", type );
741             return -1;
742         }
743 
744         err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[ii] );
745         err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&streams[ii+1] );
746         if (err != CL_SUCCESS){
747             clReleaseEvent(writeEvent);
748             clReleaseKernel( kernel[i] );
749             clReleaseProgram( program[i] );
750             clReleaseMemObject( streams[ii] );
751             clReleaseMemObject( streams[ii+1] );
752             free( outptr[i] );
753             print_error( err, " clSetKernelArg failed" );
754             return -1;
755         }
756 
757         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
758 
759         if( err != CL_SUCCESS ){
760             print_error( err, " clEnqueueNDRangeKernel failed" );
761             clReleaseEvent(writeEvent);
762             clReleaseKernel( kernel[i] );
763             clReleaseProgram( program[i] );
764             clReleaseMemObject( streams[ii] );
765             clReleaseMemObject( streams[ii+1] );
766             free( outptr[i] );
767             return -1;
768         }
769 
770         if( ! strcmp( type, "half" ) ){
771             err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, outPtrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
772         }
773         else{
774             err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, outPtrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
775         }
776         if( err != CL_SUCCESS ){
777             clReleaseEvent(writeEvent);
778             clReleaseKernel( kernel[i] );
779             clReleaseProgram( program[i] );
780             clReleaseMemObject( streams[ii] );
781             clReleaseMemObject( streams[ii+1] );
782             free( outptr[i] );
783             print_error( err, " clEnqueueReadBuffer failed" );
784             return -1;
785         }
786 
787         char *inP = (char *)inptr[i];
788         char *outP = (char *)outptr[i];
789         int err2 = 0;
790         for( size_t p = 0; p < (size_t)num_elements; p++ )
791         {
792             if( fn( inP, outP, (int)(ptrSizes[i] / ptrSizes[0]) ) )
793             {
794                 log_error( " %s%d data failed to verify\n", type, 1<<i );
795                 err2 = -1;
796                 err_count++;
797             }
798             inP += ptrSizes[i];
799             outP += outPtrSizes[i];
800         }
801         if( !err2 )
802         {
803             log_info(" %s%d data verified\n", type, 1 << i);
804         }
805         err = err2;
806 
807         if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
808             err_count++;
809 
810         // cleanup
811         clReleaseEvent(writeEvent);
812         clReleaseKernel( kernel[i] );
813         clReleaseProgram( program[i] );
814         clReleaseMemObject( streams[ii] );
815         clReleaseMemObject( streams[ii+1] );
816         free( outptr[i] );
817     }
818 
819     return err_count;
820 
821 }    // end test_stream_write()
822 
823 
test_write_array_int(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)824 int test_write_array_int( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
825 {
826     int    *inptr[5];
827     size_t    ptrSizes[5];
828     int        i, j, err;
829     int    (*foo)(void *,void *,int);
830     MTdata d = init_genrand( gRandomSeed );
831     foo = verify_write_int;
832 
833     ptrSizes[0] = sizeof(cl_int);
834     ptrSizes[1] = ptrSizes[0] << 1;
835     ptrSizes[2] = ptrSizes[1] << 1;
836     ptrSizes[3] = ptrSizes[2] << 1;
837     ptrSizes[4] = ptrSizes[3] << 1;
838 
839     for( i = 0; i < 5; i++ ){
840         inptr[i] = (int *)malloc(ptrSizes[i] * num_elements);
841 
842         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
843             inptr[i][j] = genrand_int32(d);
844     }
845 
846     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_int ), "int", 5, (void**)inptr,
847                             stream_write_int_kernel_code, int_kernel_name, foo, d );
848 
849     for( i = 0; i < 5; i++ ){
850         free( (void *)inptr[i] );
851     }
852 
853     free_mtdata(d);
854 
855     return err;
856 
857 }    // end write_int_array()
858 
859 
test_write_array_uint(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)860 int test_write_array_uint( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
861 {
862     cl_uint    *inptr[5];
863     size_t    ptrSizes[5];
864     int        i, j, err;
865     int    (*foo)(void *,void *,int);
866     MTdata d = init_genrand( gRandomSeed );
867     foo = verify_write_uint;
868 
869     ptrSizes[0] = sizeof(cl_uint);
870     ptrSizes[1] = ptrSizes[0] << 1;
871     ptrSizes[2] = ptrSizes[1] << 1;
872     ptrSizes[3] = ptrSizes[2] << 1;
873     ptrSizes[4] = ptrSizes[3] << 1;
874 
875     for( i = 0; i < 5; i++ ){
876         inptr[i] = (cl_uint *)malloc(ptrSizes[i] * num_elements);
877 
878         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
879             inptr[i][j] = genrand_int32(d);
880     }
881 
882     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_uint ), "uint", 5, (void **)inptr,
883                             stream_write_uint_kernel_code, uint_kernel_name, foo, d );
884 
885     for( i = 0; i < 5; i++ ){
886         free( (void *)inptr[i] );
887     }
888 
889     free_mtdata(d);
890     return err;
891 
892 }    // end write_uint_array()
893 
894 
test_write_array_short(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)895 int test_write_array_short( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
896 {
897     short    *inptr[5];
898     size_t    ptrSizes[5];
899     int        i, j, err;
900     int    (*foo)(void *,void *,int);
901     MTdata d = init_genrand( gRandomSeed );
902     foo = verify_write_short;
903 
904     ptrSizes[0] = sizeof(cl_short);
905     ptrSizes[1] = ptrSizes[0] << 1;
906     ptrSizes[2] = ptrSizes[1] << 1;
907     ptrSizes[3] = ptrSizes[2] << 1;
908     ptrSizes[4] = ptrSizes[3] << 1;
909 
910     for( i = 0; i < 5; i++ ){
911         inptr[i] = (short *)malloc(ptrSizes[i] * num_elements);
912 
913         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
914             inptr[i][j] = (short)genrand_int32(d);
915     }
916 
917     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_short ), "short", 5, (void **)inptr,
918                             stream_write_short_kernel_code, short_kernel_name, foo, d );
919 
920     for( i = 0; i < 5; i++ ){
921         free( (void *)inptr[i] );
922     }
923 
924     free_mtdata(d);
925     return err;
926 
927 }    // end write_short_array()
928 
929 
test_write_array_ushort(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)930 int test_write_array_ushort( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
931 {
932     cl_ushort    *inptr[5];
933     size_t    ptrSizes[5];
934     int        i, j, err;
935     int    (*foo)(void *,void *,int);
936     MTdata d = init_genrand( gRandomSeed );
937     foo = verify_write_ushort;
938 
939     ptrSizes[0] = sizeof(cl_ushort);
940     ptrSizes[1] = ptrSizes[0] << 1;
941     ptrSizes[2] = ptrSizes[1] << 1;
942     ptrSizes[3] = ptrSizes[2] << 1;
943     ptrSizes[4] = ptrSizes[3] << 1;
944 
945     for( i = 0; i < 5; i++ ){
946         inptr[i] = (cl_ushort *)malloc(ptrSizes[i] * num_elements);
947 
948         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
949             inptr[i][j] = (cl_ushort)genrand_int32(d);
950     }
951 
952     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_ushort ), "ushort", 5, (void **)inptr,
953                             stream_write_ushort_kernel_code, ushort_kernel_name, foo, d );
954 
955     for( i = 0; i < 5; i++ ){
956         free( (void *)inptr[i] );
957     }
958 
959     free_mtdata(d);
960     return err;
961 
962 }    // end write_ushort_array()
963 
964 
test_write_array_char(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)965 int test_write_array_char( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
966 {
967     char    *inptr[5];
968     size_t    ptrSizes[5];
969     int        i, j, err;
970     int    (*foo)(void *,void *,int);
971     MTdata d = init_genrand( gRandomSeed );
972     foo = verify_write_char;
973 
974     ptrSizes[0] = sizeof(cl_char);
975     ptrSizes[1] = ptrSizes[0] << 1;
976     ptrSizes[2] = ptrSizes[1] << 1;
977     ptrSizes[3] = ptrSizes[2] << 1;
978     ptrSizes[4] = ptrSizes[3] << 1;
979 
980     for( i = 0; i < 5; i++ ){
981         inptr[i] = (char *)malloc(ptrSizes[i] * num_elements);
982 
983         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
984             inptr[i][j] = (char)genrand_int32(d);
985     }
986 
987     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_char ), "char", 5, (void **)inptr,
988                             stream_write_char_kernel_code, char_kernel_name, foo, d );
989 
990     for( i = 0; i < 5; i++ ){
991         free( (void *)inptr[i] );
992     }
993 
994     free_mtdata(d);
995     return err;
996 
997 }    // end write_char_array()
998 
999 
test_write_array_uchar(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1000 int test_write_array_uchar( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1001 {
1002     uchar    *inptr[5];
1003     size_t    ptrSizes[5];
1004     int        i, j, err;
1005     int    (*foo)(void *,void *,int);
1006     MTdata d = init_genrand( gRandomSeed );
1007     foo = verify_write_uchar;
1008 
1009     ptrSizes[0] = sizeof(cl_uchar);
1010     ptrSizes[1] = ptrSizes[0] << 1;
1011     ptrSizes[2] = ptrSizes[1] << 1;
1012     ptrSizes[3] = ptrSizes[2] << 1;
1013     ptrSizes[4] = ptrSizes[3] << 1;
1014 
1015     for( i = 0; i < 5; i++ ){
1016         inptr[i] = (uchar *)malloc(ptrSizes[i] * num_elements);
1017 
1018         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1019             inptr[i][j] = (uchar)genrand_int32(d);
1020     }
1021 
1022     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_uchar ), "uchar", 5, (void **)inptr,
1023                             stream_write_uchar_kernel_code, uchar_kernel_name, foo, d );
1024 
1025     for( i = 0; i < 5; i++ ){
1026         free( (void *)inptr[i] );
1027     }
1028 
1029     free_mtdata(d);
1030     return err;
1031 
1032 }    // end write_uchar_array()
1033 
1034 
test_write_array_float(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1035 int test_write_array_float( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1036 {
1037     float    *inptr[5];
1038     size_t    ptrSizes[5];
1039     int        i, j, err;
1040     int    (*foo)(void *,void *,int);
1041     MTdata d = init_genrand( gRandomSeed );
1042     foo = verify_write_float;
1043 
1044     ptrSizes[0] = sizeof(cl_float);
1045     ptrSizes[1] = ptrSizes[0] << 1;
1046     ptrSizes[2] = ptrSizes[1] << 1;
1047     ptrSizes[3] = ptrSizes[2] << 1;
1048     ptrSizes[4] = ptrSizes[3] << 1;
1049 
1050     for( i = 0; i < 5; i++ ){
1051         inptr[i] = (float *)malloc(ptrSizes[i] * num_elements);
1052 
1053         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1054             inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1055     }
1056 
1057     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_float ), "float", 5, (void **)inptr,
1058                             stream_write_float_kernel_code, float_kernel_name, foo, d );
1059 
1060     for( i = 0; i < 5; i++ ){
1061         free( (void *)inptr[i] );
1062     }
1063 
1064     free_mtdata(d);
1065     return err;
1066 
1067 }    // end write_float_array()
1068 
1069 
test_write_array_half(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1070 int test_write_array_half( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1071 {
1072     float    *inptr[5];
1073     size_t    ptrSizes[5];
1074     int        i, j, err;
1075     int    (*foo)(void *,void *,int);
1076     MTdata d = init_genrand( gRandomSeed );
1077     foo = verify_write_half;
1078 
1079     ptrSizes[0] = sizeof( cl_half );
1080     ptrSizes[1] = ptrSizes[0] << 1;
1081     ptrSizes[2] = ptrSizes[1] << 1;
1082     ptrSizes[3] = ptrSizes[2] << 1;
1083     ptrSizes[4] = ptrSizes[3] << 1;
1084 
1085     for( i = 0; i < 5; i++ ){
1086         inptr[i] = (float *)malloc(ptrSizes[i] * num_elements);
1087 
1088         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ( ptrSizes[0] * 2 ); j++ )
1089             inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1090     }
1091 
1092     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_half ), "half", 5, (void **)inptr,
1093                             stream_write_half_kernel_code, half_kernel_name, foo, d );
1094 
1095     for( i = 0; i < 5; i++ ){
1096         free( (void *)inptr[i] );
1097     }
1098 
1099     free_mtdata(d);
1100     return err;
1101 
1102 }    // end write_half_array()
1103 
1104 
test_write_array_long(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1105 int test_write_array_long( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1106 {
1107     cl_long    *inptr[5];
1108     size_t        ptrSizes[5];
1109     int            i, j, err;
1110     int    (*foo)(void *,void *,int);
1111     MTdata d = init_genrand( gRandomSeed );
1112     foo = verify_write_long;
1113 
1114     if (!gHasLong)
1115     {
1116         log_info("write_long_array: Long types unsupported, skipping.");
1117         return CL_SUCCESS;
1118     }
1119 
1120     ptrSizes[0] = sizeof(cl_long);
1121     ptrSizes[1] = ptrSizes[0] << 1;
1122     ptrSizes[2] = ptrSizes[1] << 1;
1123     ptrSizes[3] = ptrSizes[2] << 1;
1124     ptrSizes[4] = ptrSizes[3] << 1;
1125 
1126     for( i = 0; i < 5; i++ ){
1127         inptr[i] = (cl_long *)malloc(ptrSizes[i] * num_elements);
1128 
1129         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1130             inptr[i][j] = (cl_long) genrand_int32(d) ^ ((cl_long) genrand_int32(d) << 32);
1131     }
1132 
1133     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_long ), "cl_long", 5, (void **)inptr,
1134                             stream_write_long_kernel_code, long_kernel_name, foo, d );
1135 
1136     for( i = 0; i < 5; i++ ){
1137         free( (void *)inptr[i] );
1138     }
1139 
1140     free_mtdata(d);
1141     return err;
1142 
1143 }    // end write_long_array()
1144 
1145 
test_write_array_ulong(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1146 int test_write_array_ulong( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1147 {
1148     cl_ulong    *inptr[5];
1149     size_t                ptrSizes[5];
1150     int                    i, j, err;
1151     int    (*foo)(void *,void *,int);
1152     MTdata d = init_genrand( gRandomSeed );
1153     foo = verify_write_ulong;
1154 
1155     if (!gHasLong)
1156     {
1157         log_info("write_long_array: Long types unsupported, skipping.");
1158         return CL_SUCCESS;
1159     }
1160 
1161     ptrSizes[0] = sizeof(cl_ulong);
1162     ptrSizes[1] = ptrSizes[0] << 1;
1163     ptrSizes[2] = ptrSizes[1] << 1;
1164     ptrSizes[3] = ptrSizes[2] << 1;
1165     ptrSizes[4] = ptrSizes[3] << 1;
1166 
1167     for( i = 0; i < 5; i++ ){
1168         inptr[i] = (cl_ulong *)malloc(ptrSizes[i] * num_elements);
1169 
1170         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1171             inptr[i][j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
1172     }
1173 
1174     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_ulong ), "ulong long", 5, (void **)inptr,
1175                             stream_write_ulong_kernel_code, ulong_kernel_name, foo, d );
1176 
1177     for( i = 0; i < 5; i++ ){
1178         free( (void *)inptr[i] );
1179     }
1180 
1181     free_mtdata(d);
1182     return err;
1183 
1184 }    // end write_ulong_array()
1185 
1186 
test_write_array_struct(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1187 int test_write_array_struct( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1188 {
1189     TestStruct            *inptr[1];
1190     size_t                ptrSizes[1];
1191     int                    j, err;
1192     int    (*foo)(void *,void *,int);
1193     MTdata d = init_genrand( gRandomSeed );
1194     foo = verify_write_struct;
1195 
1196     ptrSizes[0] = sizeof( TestStruct );
1197 
1198     inptr[0] = (TestStruct *)malloc( ptrSizes[0] * num_elements );
1199 
1200     for( j = 0; (unsigned int)j < ptrSizes[0] * num_elements / ptrSizes[0]; j++ ){
1201         inptr[0][j].a = (int)genrand_int32(d);
1202         inptr[0][j].b = get_random_float( 0.f, 1.844674407370954e+19f, d );
1203     }
1204 
1205     err = test_stream_write( device, context, queue, num_elements, sizeof( TestStruct ), "struct", 1, (void **)inptr,
1206                             stream_write_struct_kernel_code, struct_kernel_name, foo, d );
1207 
1208     free( (void *)inptr[0] );
1209 
1210     free_mtdata(d);
1211     return err;
1212 
1213 }    // end write_struct_array()
1214 
1215 
1216 
1217 
1218 
1219