1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include "harness/rounding_mode.h"
24
25 #include "procs.h"
26
27 static const char *fpadd_kernel_code =
28 "__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
29 "{\n"
30 " int tid = get_global_id(0);\n"
31 "\n"
32 " dst[tid] = srcA[tid] + srcB[tid];\n"
33 "}\n";
34
35 static const char *fpsub_kernel_code =
36 "__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
37 "{\n"
38 " int tid = get_global_id(0);\n"
39 "\n"
40 " dst[tid] = srcA[tid] - srcB[tid];\n"
41 "}\n";
42
43 static const char *fpmul_kernel_code =
44 "__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
45 "{\n"
46 " int tid = get_global_id(0);\n"
47 "\n"
48 " dst[tid] = srcA[tid] * srcB[tid];\n"
49 "}\n";
50
51 static int
verify_fpadd(float * inptrA,float * inptrB,float * outptr,int n,int fileNum)52 verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
53 {
54 int i;
55
56 float * reference_ptr = (float *)malloc(n * sizeof(float));
57
58 for (i=0; i<n; i++)
59 {
60 reference_ptr[i] = inptrA[i] + inptrB[i];
61 }
62
63 for (i=0; i<n; i++)
64 {
65 if (reference_ptr[i] != outptr[i])
66 {
67 log_error("FP_ADD float test failed\n");
68 return -1;
69 }
70 }
71
72 free(reference_ptr);
73
74 log_info("FP_ADD float test passed\n");
75 return 0;
76 }
77
78 static int
verify_fpsub(float * inptrA,float * inptrB,float * outptr,int n,int fileNum)79 verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
80 {
81 int i;
82
83 float * reference_ptr = (float *)malloc(n * sizeof(float));
84
85 for (i=0; i<n; i++)
86 {
87 reference_ptr[i] = inptrA[i] - inptrB[i];
88 }
89
90 for (i=0; i<n; i++)
91 {
92 if (reference_ptr[i] != outptr[i])
93 {
94 log_error("FP_SUB float test failed\n");
95 return -1;
96 }
97 }
98
99 free(reference_ptr);
100
101 log_info("FP_SUB float test passed\n");
102 return 0;
103 }
104
105 static int
verify_fpmul(float * inptrA,float * inptrB,float * outptr,int n,int fileNum)106 verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
107 {
108 int i;
109
110 float * reference_ptr = (float *)malloc(n * sizeof(float));
111
112 for (i=0; i<n; i++)
113 {
114 reference_ptr[i] = inptrA[i] * inptrB[i];
115 }
116
117 for (i=0; i<n; i++)
118 {
119 if (reference_ptr[i] != outptr[i])
120 {
121 log_error("FP_MUL float test failed\n");
122 return -1;
123 }
124 }
125
126 free(reference_ptr);
127
128 log_info("FP_MUL float test passed\n");
129 return 0;
130 }
131
132 #if defined( __APPLE__ )
133
test_queue_priority(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)134 int test_queue_priority(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
135 {
136 int err;
137 int command_queue_priority = 0;
138 int command_queue_select_compute_units = 0;
139
140 cl_queue_properties queue_properties[] = { CL_QUEUE_PROPERTIES, 0, 0, 0, 0, 0, 0 };
141 int idx = 2;
142
143 // Check to see if queue priority is supported
144 if (((command_queue_priority = is_extension_available(device, "cl_APPLE_command_queue_priority"))) == 0)
145 {
146 log_info("cl_APPLE_command_queue_priority extension is not supported - skipping test\n");
147 }
148
149 // Check to see if selecting the number of compute units is supported
150 if (((command_queue_select_compute_units = is_extension_available(device, "cl_APPLE_command_queue_select_compute_units"))) == 0)
151 {
152 log_info("cl_APPLE_command_queue_select_compute_units extension is not supported - skipping test\n");
153 }
154
155 // If neither extension is supported, skip the test
156 if (!command_queue_priority && !command_queue_select_compute_units)
157 return 0;
158
159 // Setup the queue properties
160 #ifdef cl_APPLE_command_queue_priority
161 if (command_queue_priority) {
162 queue_properties[idx++] = CL_QUEUE_PRIORITY_APPLE;
163 queue_properties[idx++] = CL_QUEUE_PRIORITY_BACKGROUND_APPLE;
164 }
165 #endif
166
167 #ifdef cl_APPLE_command_queue_select_compute_units
168 // Check the number of compute units on the device
169 cl_uint num_compute_units = 0;
170 err = clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( num_compute_units ), &num_compute_units, NULL );
171 if (err) {
172 log_error("clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed: %d", err);
173 return -1;
174 }
175
176 if (command_queue_select_compute_units) {
177 queue_properties[idx++] = CL_QUEUE_NUM_COMPUTE_UNITS_APPLE;
178 queue_properties[idx++] = num_compute_units/2;
179 }
180 #endif
181 queue_properties[idx++] = 0;
182
183 // Create the command queue
184 cl_command_queue background_queue = clCreateCommandQueueWithProperties(context, device, queue_properties, &err);
185 if (err) {
186 log_error("clCreateCommandQueueWithPropertiesAPPLE failed: %d", err);
187 return -1;
188 }
189
190 // Test the command queue
191 cl_mem streams[4];
192 cl_program program[3];
193 cl_kernel kernel[3];
194 cl_event marker_event;
195
196 float *input_ptr[3], *output_ptr, *p;
197 size_t threads[1];
198 int i;
199 MTdata d = init_genrand( gRandomSeed );
200 size_t length = sizeof(cl_float) * num_elements;
201 int isRTZ = 0;
202 RoundingMode oldMode = kDefaultRoundingMode;
203
204 // check for floating point capabilities
205 cl_device_fp_config single_config = 0;
206 err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
207 if (err) {
208 log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
209 return -1;
210 }
211 //If we only support rtz mode
212 if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
213 {
214 //Check to make sure we are an embedded device
215 char profile[32];
216 err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
217 if( err )
218 {
219 log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
220 return -1;
221 }
222 if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
223 {
224 log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
225 return -1;
226 }
227
228 isRTZ = 1;
229 oldMode = get_round();
230 }
231
232 input_ptr[0] = (cl_float *)malloc(length);
233 input_ptr[1] = (cl_float *)malloc(length);
234 input_ptr[2] = (cl_float *)malloc(length);
235 output_ptr = (cl_float *)malloc(length);
236
237 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
238 test_error( err, "clCreateBuffer failed.");
239 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
240 test_error( err, "clCreateBuffer failed.");
241 streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
242 test_error( err, "clCreateBuffer failed.");
243 streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
244 test_error( err, "clCreateBuffer failed.");
245
246 p = input_ptr[0];
247 for (i=0; i<num_elements; i++)
248 p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
249 p = input_ptr[1];
250 for (i=0; i<num_elements; i++)
251 p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
252 p = input_ptr[2];
253 for (i=0; i<num_elements; i++)
254 p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
255
256 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
257 test_error( err, "clEnqueueWriteBuffer failed.");
258
259 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
260 test_error( err, "clEnqueueWriteBuffer failed.");
261
262 err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
263 test_error( err, "clEnqueueWriteBuffer failed.");
264
265 err = clEnqueueMarkerWithWaitList(queue, 0, NULL, &marker_event);
266 test_error( err, "clEnqueueMarkerWithWaitList failed.");
267 clFlush(queue);
268
269 err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
270 test_error( err, "create_single_kernel_helper failed");
271
272 err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
273 test_error( err, "create_single_kernel_helper failed");
274
275 err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
276 test_error( err, "create_single_kernel_helper failed");
277
278
279 err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
280 err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
281 err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
282 test_error( err, "clSetKernelArgs failed.");
283
284 err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
285 err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
286 err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
287 test_error( err, "clSetKernelArgs failed.");
288
289 err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
290 err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
291 err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
292 test_error( err, "clSetKernelArgs failed.");
293
294 threads[0] = (unsigned int)num_elements;
295 for (i=0; i<3; i++)
296 {
297 err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 1, &marker_event, NULL);
298 test_error( err, "clEnqueueNDRangeKernel failed.");
299
300 err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
301 test_error( err, "clEnqueueReadBuffer failed.");
302
303 if( isRTZ )
304 set_round( kRoundTowardZero, kfloat );
305
306 switch (i)
307 {
308 case 0:
309 err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
310 break;
311 case 1:
312 err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
313 break;
314 case 2:
315 err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
316 break;
317 }
318
319 if( isRTZ )
320 set_round( oldMode, kfloat );
321 }
322
323 // cleanup
324 clReleaseCommandQueue(background_queue);
325 clReleaseEvent(marker_event);
326 clReleaseMemObject(streams[0]);
327 clReleaseMemObject(streams[1]);
328 clReleaseMemObject(streams[2]);
329 clReleaseMemObject(streams[3]);
330 for (i=0; i<3; i++)
331 {
332 clReleaseKernel(kernel[i]);
333 clReleaseProgram(program[i]);
334 }
335 free(input_ptr[0]);
336 free(input_ptr[1]);
337 free(input_ptr[2]);
338 free(output_ptr);
339 free_mtdata( d );
340
341 return err;
342 }
343
344
345
346 #endif
347
348