1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23
24
25 #include "procs.h"
26
27 const char *hostptr_kernel_code =
28 "__kernel void test_hostptr(__global float *srcA, __global float *srcB, __global float *dst)\n"
29 "{\n"
30 " int tid = get_global_id(0);\n"
31 "\n"
32 " dst[tid] = srcA[tid] + srcB[tid];\n"
33 "}\n";
34
verify_hostptr(cl_float * inptrA,cl_float * inptrB,cl_float * outptr,int n)35 static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
36 {
37 cl_float r;
38 int i;
39
40 for (i=0; i<n; i++)
41 {
42 r = inptrA[i] + inptrB[i];
43 if (r != outptr[i])
44 {
45 return -1;
46 }
47 }
48 return 0;
49 }
50
make_random_data(unsigned count,float * ptr,MTdata d)51 static void make_random_data(unsigned count, float *ptr, MTdata d)
52 {
53 cl_uint i;
54 for (i=0; i<count; i++)
55 ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), d);
56 }
57
58 static unsigned char *
generate_rgba8_image(int w,int h,MTdata d)59 generate_rgba8_image(int w, int h, MTdata d)
60 {
61 unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
62 int i;
63
64 for (i=0; i<w*h*4; i++)
65 ptr[i] = (unsigned char)genrand_int32(d);
66
67 return ptr;
68 }
69
70 static unsigned char *
randomize_rgba8_image(unsigned char * ptr,int w,int h,MTdata d)71 randomize_rgba8_image(unsigned char *ptr, int w, int h, MTdata d)
72 {
73 int i;
74
75 for (i=0; i<w*h*4; i++)
76 ptr[i] = (unsigned char)genrand_int32(d);
77
78 return ptr;
79 }
80
81 static int
verify_rgba8_image(unsigned char * image,unsigned char * outptr,int w,int h)82 verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
83 {
84 int i;
85
86 for (i=0; i<w*h*4; i++)
87 {
88 if (outptr[i] != image[i])
89 return -1;
90 }
91
92 return 0;
93 }
94
95 int
test_hostptr(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)96 test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
97 {
98 cl_float *input_ptr[2], *output_ptr;
99 cl_program program;
100 cl_kernel kernel;
101 size_t threads[3]={0,0,0};
102 cl_image_format img_format;
103 cl_uchar *rgba8_inptr, *rgba8_outptr;
104 void *lock_buffer;
105 int img_width = 512;
106 int img_height = 512;
107 cl_int err;
108 MTdata d;
109 RoundingMode oldRoundMode;
110 int isRTZ = 0;
111
112 // Block to mark deletion of streams before deletion of host_ptr
113 {
114 clMemWrapper streams[7];
115
116 PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
117
118 // Alloc buffers
119 input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
120 input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
121 output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
122
123 d = init_genrand( gRandomSeed );
124 rgba8_inptr = (cl_uchar *)generate_rgba8_image(img_width, img_height, d);
125 rgba8_outptr = (cl_uchar *)malloc(sizeof(cl_uchar) * 4 * img_width * img_height);
126
127 // Random data
128 make_random_data(num_elements, input_ptr[0], d);
129 make_random_data(num_elements, input_ptr[1], d);
130
131 // Create host-side input
132 streams[0] =
133 clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
134 sizeof(cl_float) * num_elements, input_ptr[0], &err);
135 test_error(err, "clCreateBuffer 0 failed");
136
137 // Create a copied input
138 streams[1] =
139 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
140 sizeof(cl_float) * num_elements, input_ptr[1], &err);
141 test_error(err, "clCreateBuffer 1 failed");
142
143 // Create a host-side output
144 streams[2] =
145 clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
146 sizeof(cl_float) * num_elements, output_ptr, &err);
147 test_error(err, "clCreateBuffer 2 failed");
148
149 // Create a host-side input
150 img_format.image_channel_order = CL_RGBA;
151 img_format.image_channel_data_type = CL_UNORM_INT8;
152 streams[3] =
153 create_image_2d(context, CL_MEM_USE_HOST_PTR, &img_format,
154 img_width, img_height, 0, rgba8_inptr, &err);
155 test_error(err, "create_image_2d 3 failed");
156
157 // Create a copied input
158 img_format.image_channel_order = CL_RGBA;
159 img_format.image_channel_data_type = CL_UNORM_INT8;
160 streams[4] =
161 create_image_2d(context, CL_MEM_COPY_HOST_PTR, &img_format,
162 img_width, img_height, 0, rgba8_inptr, &err);
163 test_error(err, "create_image_2d 4 failed");
164
165 // Create a host-side output
166 img_format.image_channel_order = CL_RGBA;
167 img_format.image_channel_data_type = CL_UNORM_INT8;
168 streams[5] =
169 create_image_2d(context, CL_MEM_USE_HOST_PTR, &img_format,
170 img_width, img_height, 0, rgba8_outptr, &err);
171 test_error(err, "create_image_2d 5 failed");
172
173 // Create a copied output
174 img_format.image_channel_data_type = CL_RGBA;
175 img_format.image_channel_data_type = CL_UNORM_INT8;
176 streams[6] =
177 create_image_2d(context, CL_MEM_COPY_HOST_PTR, &img_format,
178 img_width, img_height, 0, rgba8_outptr, &err);
179 test_error(err, "create_image_2d 6 failed");
180
181 err = create_single_kernel_helper(context, &program, &kernel,1, &hostptr_kernel_code, "test_hostptr" );
182 test_error(err, "create_single_kernel_helper failed");
183
184 // Execute kernel
185 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
186 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
187 err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
188 test_error(err, "clSetKernelArg failed");
189
190 threads[0] = (size_t)num_elements;
191 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
192 test_error(err, "clEnqueueNDRangeKernel failed");
193
194 cl_float *data = (cl_float*) clEnqueueMapBuffer( queue, streams[2], CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float) * num_elements, 0, NULL, NULL, &err );
195 test_error( err, "clEnqueueMapBuffer failed" );
196
197 //If we only support rtz mode
198 if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
199 {
200 oldRoundMode = set_round(kRoundTowardZero, kfloat);
201 isRTZ = 1;
202 }
203
204 if (isRTZ)
205 oldRoundMode = set_round(kRoundTowardZero, kfloat);
206
207 // Verify that we got the expected results back on the host side
208 err = verify_hostptr(input_ptr[0], input_ptr[1], data, num_elements);
209 if (err)
210 {
211 log_error("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
212 "and a CL_MEM_USE_HOST_PTR output did not return the expected results.\n");
213 } else {
214 log_info("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
215 "and a CL_MEM_USE_HOST_PTR output returned the expected results.\n");
216 }
217
218 if (isRTZ)
219 set_round(oldRoundMode, kfloat);
220
221 err = clEnqueueUnmapMemObject( queue, streams[2], data, 0, NULL, NULL );
222 test_error( err, "clEnqueueUnmapMemObject failed" );
223
224 size_t origin[3]={0,0,0}, region[3]={img_width, img_height, 1};
225 randomize_rgba8_image(rgba8_outptr, img_width, img_height, d);
226 free_mtdata(d); d = NULL;
227
228 // Copy from host-side to host-side
229 log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR...\n");
230 err = clEnqueueCopyImage(queue, streams[3], streams[5],
231 origin, origin, region, 0, NULL, NULL);
232 test_error(err, "clEnqueueCopyImage failed");
233 log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR image passed.\n");
234
235 // test the lock buffer interface
236 log_info("Mapping the CL_MEM_USE_HOST_PTR image with clEnqueueMapImage...\n");
237 size_t row_pitch;
238 lock_buffer = clEnqueueMapImage(queue, streams[5], CL_TRUE,
239 CL_MAP_READ, origin, region,
240 &row_pitch, NULL,
241 0, NULL, NULL, &err);
242 test_error(err, "clEnqueueMapImage failed");
243
244 err = verify_rgba8_image(rgba8_inptr, (unsigned char*)lock_buffer, img_width, img_height);
245 if (err != CL_SUCCESS)
246 {
247 log_error("verify_rgba8_image FAILED after clEnqueueMapImage\n");
248 return -1;
249 }
250 log_info("verify_rgba8_image passed after clEnqueueMapImage\n");
251
252 err = clEnqueueUnmapMemObject(queue, streams[5], lock_buffer, 0, NULL, NULL);
253 test_error(err, "clEnqueueUnmapMemObject failed");
254
255 // Copy host-side to device-side and read back
256 log_info("clEnqueueCopyImage CL_MEM_USE_HOST_PTR to CL_MEM_COPY_HOST_PTR...\n");
257 err = clEnqueueCopyImage(queue, streams[3], streams[5],
258 origin, origin, region,
259 0, NULL, NULL);
260 test_error(err, "clEnqueueCopyImage failed");
261
262 err = clEnqueueReadImage(queue, streams[5], CL_TRUE, origin, region, 4*img_width, 0, rgba8_outptr, 0, NULL, NULL);
263 test_error(err, "clEnqueueReadImage failed");
264
265 err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
266 if (err != CL_SUCCESS)
267 {
268 log_error("verify_rgba8_image FAILED after clEnqueueCopyImage, clEnqueueReadImage\n");
269 return -1;
270 }
271 log_info("verify_rgba8_image passed after clEnqueueCopyImage, clEnqueueReadImage\n");
272 }
273 // cleanup
274 clReleaseKernel(kernel);
275 clReleaseProgram(program);
276 free(input_ptr[0]);
277 free(input_ptr[1]);
278 free(output_ptr);
279
280 free(rgba8_inptr);
281 free(rgba8_outptr);
282
283 return err;
284 }
285
286
287
288
289
290