1 //
2 // Copyright (c) 2023 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include <stdio.h>
18 #include <string.h>
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <vector>
22
23 #include "harness/deviceInfo.h"
24 #include "harness/typeWrappers.h"
25 #include "harness/stringHelpers.h"
26
27 #include "procs.h"
28 #include "test_base.h"
29
30 const char *binary_fn_code_pattern =
31 "%s\n" /* optional pragma */
32 "__kernel void test_fn(__global %s%s *x, __global %s%s *y, __global %s%s *dst)\n"
33 "{\n"
34 " int tid = get_global_id(0);\n"
35 "\n"
36 " dst[tid] = %s(x[tid], y[tid]);\n"
37 "}\n";
38
39 const char *binary_fn_code_pattern_v3 =
40 "%s\n" /* optional pragma */
41 "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *dst)\n"
42 "{\n"
43 " int tid = get_global_id(0);\n"
44 "\n"
45 " vstore3(%s(vload3(tid,x), vload3(tid,y) ), tid, dst);\n"
46 "}\n";
47
48 const char *binary_fn_code_pattern_v3_scalar =
49 "%s\n" /* optional pragma */
50 "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *dst)\n"
51 "{\n"
52 " int tid = get_global_id(0);\n"
53 "\n"
54 " vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n"
55 "}\n";
56
57 template <typename T>
test_binary_fn(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems,const std::string & fnName,bool vecSecParam,VerifyFuncBinary<T> verifyFn)58 int test_binary_fn(cl_device_id device, cl_context context,
59 cl_command_queue queue, int n_elems,
60 const std::string& fnName, bool vecSecParam,
61 VerifyFuncBinary<T> verifyFn)
62 {
63 clMemWrapper streams[3];
64 std::vector<T> input_ptr[2], output_ptr;
65
66 std::vector<clProgramWrapper> programs;
67 std::vector<clKernelWrapper> kernels;
68 int err, i, j;
69 MTdataHolder d = MTdataHolder(gRandomSeed);
70
71 assert(BaseFunctionTest::type2name.find(sizeof(T))
72 != BaseFunctionTest::type2name.end());
73 auto tname = BaseFunctionTest::type2name[sizeof(T)];
74
75 programs.resize(kTotalVecCount);
76 kernels.resize(kTotalVecCount);
77
78 int num_elements = n_elems * (1 << (kTotalVecCount - 1));
79
80 for (i = 0; i < 2; i++) input_ptr[i].resize(num_elements);
81 output_ptr.resize(num_elements);
82
83 for( i = 0; i < 3; i++ )
84 {
85 streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
86 sizeof(T) * num_elements, NULL, &err);
87 test_error( err, "clCreateBuffer failed");
88 }
89
90 std::string pragma_str;
91 if (std::is_same<T, float>::value)
92 {
93 for (j = 0; j < num_elements; j++)
94 {
95 input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
96 input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
97 }
98 }
99 else if (std::is_same<T, double>::value)
100 {
101 pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
102 for (j = 0; j < num_elements; j++)
103 {
104 input_ptr[0][j] = get_random_double(-0x20000000, 0x20000000, d);
105 input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d);
106 }
107 }
108 else if (std::is_same<T, half>::value)
109 {
110 const float fval = CL_HALF_MAX;
111 pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
112 for (int j = 0; j < num_elements; j++)
113 {
114 input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d));
115 input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d));
116 }
117 }
118
119 for (i = 0; i < 2; i++)
120 {
121 err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
122 sizeof(T) * num_elements,
123 &input_ptr[i].front(), 0, NULL, NULL);
124 test_error(err, "Unable to write input buffer");
125 }
126
127 char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
128
129 for (i = 0; i < kTotalVecCount; i++)
130 {
131 std::string kernelSource;
132 if (i >= kVectorSizeCount)
133 {
134 if (vecSecParam)
135 {
136 std::string str = binary_fn_code_pattern_v3;
137 kernelSource =
138 str_sprintf(str, pragma_str.c_str(), tname.c_str(),
139 tname.c_str(), tname.c_str(), fnName.c_str());
140 }
141 else
142 {
143 std::string str = binary_fn_code_pattern_v3_scalar;
144 kernelSource =
145 str_sprintf(str, pragma_str.c_str(), tname.c_str(),
146 tname.c_str(), tname.c_str(), fnName.c_str());
147 }
148 }
149 else
150 {
151 // do regular
152 std::string str = binary_fn_code_pattern;
153 kernelSource = str_sprintf(
154 str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i],
155 tname.c_str(), vecSecParam ? vecSizeNames[i] : "",
156 tname.c_str(), vecSizeNames[i], fnName.c_str());
157 }
158 const char* programPtr = kernelSource.c_str();
159 err = create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
160 (const char**)&programPtr, "test_fn");
161 test_error(err, "Unable to create kernel");
162
163 for( j = 0; j < 3; j++ )
164 {
165 err =
166 clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
167 test_error( err, "Unable to set kernel argument" );
168 }
169
170 size_t threads = (size_t)n_elems;
171
172 err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
173 0, NULL, NULL);
174 test_error( err, "Unable to execute kernel" );
175
176 err = clEnqueueReadBuffer(queue, streams[2], true, 0,
177 sizeof(T) * num_elements, &output_ptr[0], 0,
178 NULL, NULL);
179 test_error( err, "Unable to read results" );
180
181 if (verifyFn((T*)&input_ptr[0].front(), (T*)&input_ptr[1].front(),
182 &output_ptr[0], n_elems, g_arrVecSizes[i],
183 vecSecParam ? 1 : 0))
184 {
185 log_error("%s %s%d%s test failed\n", fnName.c_str(), tname.c_str(),
186 ((g_arrVecSizes[i])),
187 vecSecParam ? "" : std::string(", " + tname).c_str());
188 err = -1;
189 }
190 else
191 {
192 log_info("%s %s%d%s test passed\n", fnName.c_str(), tname.c_str(),
193 ((g_arrVecSizes[i])),
194 vecSecParam ? "" : std::string(", " + tname).c_str());
195 err = 0;
196 }
197
198 if (err)
199 break;
200 }
201 return err;
202 }
203
204 namespace {
205
206 template <typename T>
max_verify(const T * const x,const T * const y,const T * const out,int numElements,int vecSize,int vecParam)207 int max_verify(const T* const x, const T* const y, const T* const out,
208 int numElements, int vecSize, int vecParam)
209 {
210 for (int i = 0; i < numElements; i++)
211 {
212 for (int j = 0; j < vecSize; j++)
213 {
214 int k = i * vecSize + j;
215 int l = (k * vecParam + i * (1 - vecParam));
216 T v = (conv_to_dbl(x[k]) < conv_to_dbl(y[l])) ? y[l] : x[k];
217 if (v != out[k])
218 {
219 if (std::is_same<T, half>::value)
220 log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. "
221 "(index %d is "
222 "vector %d, element %d, for vector size %d)\n",
223 k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k,
224 conv_to_flt(out[k]), v, k, i, j, vecSize);
225 else
226 log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. "
227 "(index %d is "
228 "vector %d, element %d, for vector size %d)\n",
229 k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize);
230 return -1;
231 }
232 }
233 }
234 return 0;
235 }
236
237 template <typename T>
min_verify(const T * const x,const T * const y,const T * const out,int numElements,int vecSize,int vecParam)238 int min_verify(const T* const x, const T* const y, const T* const out,
239 int numElements, int vecSize, int vecParam)
240 {
241 for (int i = 0; i < numElements; i++)
242 {
243 for (int j = 0; j < vecSize; j++)
244 {
245 int k = i * vecSize + j;
246 int l = (k * vecParam + i * (1 - vecParam));
247 T v = (conv_to_dbl(x[k]) > conv_to_dbl(y[l])) ? y[l] : x[k];
248 if (v != out[k])
249 {
250 if (std::is_same<T, half>::value)
251 log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. "
252 "(index %d is "
253 "vector %d, element %d, for vector size %d)\n",
254 k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k,
255 conv_to_flt(out[k]), v, k, i, j, vecSize);
256 else
257 log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. "
258 "(index %d is "
259 "vector %d, element %d, for vector size %d)\n",
260 k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize);
261 return -1;
262 }
263 }
264 }
265 return 0;
266 }
267
268 }
269
Run()270 cl_int MaxTest::Run()
271 {
272 cl_int error = CL_SUCCESS;
273 if (is_extension_available(device, "cl_khr_fp16"))
274 {
275 error = test_binary_fn<cl_half>(device, context, queue, num_elems,
276 fnName.c_str(), vecParam,
277 max_verify<cl_half>);
278 test_error(error, "MaxTest::Run<cl_half> failed");
279 }
280
281 error = test_binary_fn<float>(device, context, queue, num_elems,
282 fnName.c_str(), vecParam, max_verify<float>);
283 test_error(error, "MaxTest::Run<float> failed");
284
285 if (is_extension_available(device, "cl_khr_fp64"))
286 {
287 error = test_binary_fn<double>(device, context, queue, num_elems,
288 fnName.c_str(), vecParam,
289 max_verify<double>);
290 test_error(error, "MaxTest::Run<double> failed");
291 }
292
293 return error;
294 }
295
Run()296 cl_int MinTest::Run()
297 {
298 cl_int error = CL_SUCCESS;
299 if (is_extension_available(device, "cl_khr_fp16"))
300 {
301 error = test_binary_fn<cl_half>(device, context, queue, num_elems,
302 fnName.c_str(), vecParam,
303 min_verify<cl_half>);
304 test_error(error, "MinTest::Run<cl_half> failed");
305 }
306
307 error = test_binary_fn<float>(device, context, queue, num_elems,
308 fnName.c_str(), vecParam, min_verify<float>);
309 test_error(error, "MinTest::Run<float> failed");
310
311 if (is_extension_available(device, "cl_khr_fp64"))
312 {
313 error = test_binary_fn<double>(device, context, queue, num_elems,
314 fnName.c_str(), vecParam,
315 min_verify<double>);
316 test_error(error, "MinTest::Run<double> failed");
317 }
318
319 return error;
320 }
321
test_min(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)322 int test_min(cl_device_id device, cl_context context, cl_command_queue queue,
323 int n_elems)
324 {
325 return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "min",
326 true);
327 }
328
test_minf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)329 int test_minf(cl_device_id device, cl_context context, cl_command_queue queue,
330 int n_elems)
331 {
332 return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "min",
333 false);
334 }
335
test_fmin(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)336 int test_fmin(cl_device_id device, cl_context context, cl_command_queue queue,
337 int n_elems)
338 {
339 return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "fmin",
340 true);
341 }
342
test_fminf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)343 int test_fminf(cl_device_id device, cl_context context, cl_command_queue queue,
344 int n_elems)
345 {
346 return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "fmin",
347 false);
348 }
349
test_max(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)350 int test_max(cl_device_id device, cl_context context, cl_command_queue queue,
351 int n_elems)
352 {
353 return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "max",
354 true);
355 }
356
test_maxf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)357 int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue,
358 int n_elems)
359 {
360 return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "max",
361 false);
362 }
363
test_fmax(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)364 int test_fmax(cl_device_id device, cl_context context, cl_command_queue queue,
365 int n_elems)
366 {
367 return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "fmax",
368 true);
369 }
370
test_fmaxf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)371 int test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue,
372 int n_elems)
373 {
374 return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "fmax",
375 false);
376 }
377