xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/commonfns/test_binary_fn.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2023 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include <stdio.h>
18 #include <string.h>
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <vector>
22 
23 #include "harness/deviceInfo.h"
24 #include "harness/typeWrappers.h"
25 #include "harness/stringHelpers.h"
26 
27 #include "procs.h"
28 #include "test_base.h"
29 
30 const char *binary_fn_code_pattern =
31 "%s\n" /* optional pragma */
32 "__kernel void test_fn(__global %s%s *x, __global %s%s *y, __global %s%s *dst)\n"
33 "{\n"
34 "    int  tid = get_global_id(0);\n"
35 "\n"
36 "    dst[tid] = %s(x[tid], y[tid]);\n"
37 "}\n";
38 
39 const char *binary_fn_code_pattern_v3 =
40 "%s\n" /* optional pragma */
41 "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *dst)\n"
42 "{\n"
43 "    int  tid = get_global_id(0);\n"
44 "\n"
45 "    vstore3(%s(vload3(tid,x), vload3(tid,y) ), tid, dst);\n"
46 "}\n";
47 
48 const char *binary_fn_code_pattern_v3_scalar =
49 "%s\n" /* optional pragma */
50 "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *dst)\n"
51 "{\n"
52 "    int  tid = get_global_id(0);\n"
53 "\n"
54 "    vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n"
55 "}\n";
56 
57 template <typename T>
test_binary_fn(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems,const std::string & fnName,bool vecSecParam,VerifyFuncBinary<T> verifyFn)58 int test_binary_fn(cl_device_id device, cl_context context,
59                    cl_command_queue queue, int n_elems,
60                    const std::string& fnName, bool vecSecParam,
61                    VerifyFuncBinary<T> verifyFn)
62 {
63     clMemWrapper streams[3];
64     std::vector<T> input_ptr[2], output_ptr;
65 
66     std::vector<clProgramWrapper> programs;
67     std::vector<clKernelWrapper> kernels;
68     int err, i, j;
69     MTdataHolder d = MTdataHolder(gRandomSeed);
70 
71     assert(BaseFunctionTest::type2name.find(sizeof(T))
72            != BaseFunctionTest::type2name.end());
73     auto tname = BaseFunctionTest::type2name[sizeof(T)];
74 
75     programs.resize(kTotalVecCount);
76     kernels.resize(kTotalVecCount);
77 
78     int num_elements = n_elems * (1 << (kTotalVecCount - 1));
79 
80     for (i = 0; i < 2; i++) input_ptr[i].resize(num_elements);
81     output_ptr.resize(num_elements);
82 
83     for( i = 0; i < 3; i++ )
84     {
85         streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
86                                     sizeof(T) * num_elements, NULL, &err);
87         test_error( err, "clCreateBuffer failed");
88     }
89 
90     std::string pragma_str;
91     if (std::is_same<T, float>::value)
92     {
93         for (j = 0; j < num_elements; j++)
94         {
95             input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
96             input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
97         }
98     }
99     else if (std::is_same<T, double>::value)
100     {
101         pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
102         for (j = 0; j < num_elements; j++)
103         {
104             input_ptr[0][j] = get_random_double(-0x20000000, 0x20000000, d);
105             input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d);
106         }
107     }
108     else if (std::is_same<T, half>::value)
109     {
110         const float fval = CL_HALF_MAX;
111         pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
112         for (int j = 0; j < num_elements; j++)
113         {
114             input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d));
115             input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d));
116         }
117     }
118 
119     for (i = 0; i < 2; i++)
120     {
121         err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
122                                    sizeof(T) * num_elements,
123                                    &input_ptr[i].front(), 0, NULL, NULL);
124         test_error(err, "Unable to write input buffer");
125     }
126 
127     char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
128 
129     for (i = 0; i < kTotalVecCount; i++)
130     {
131         std::string kernelSource;
132         if (i >= kVectorSizeCount)
133         {
134             if (vecSecParam)
135             {
136                 std::string str = binary_fn_code_pattern_v3;
137                 kernelSource =
138                     str_sprintf(str, pragma_str.c_str(), tname.c_str(),
139                                 tname.c_str(), tname.c_str(), fnName.c_str());
140             }
141             else
142             {
143                 std::string str = binary_fn_code_pattern_v3_scalar;
144                 kernelSource =
145                     str_sprintf(str, pragma_str.c_str(), tname.c_str(),
146                                 tname.c_str(), tname.c_str(), fnName.c_str());
147             }
148         }
149         else
150         {
151             // do regular
152             std::string str = binary_fn_code_pattern;
153             kernelSource = str_sprintf(
154                 str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i],
155                 tname.c_str(), vecSecParam ? vecSizeNames[i] : "",
156                 tname.c_str(), vecSizeNames[i], fnName.c_str());
157         }
158         const char* programPtr = kernelSource.c_str();
159         err = create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
160                                           (const char**)&programPtr, "test_fn");
161         test_error(err, "Unable to create kernel");
162 
163         for( j = 0; j < 3; j++ )
164         {
165             err =
166                 clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
167             test_error( err, "Unable to set kernel argument" );
168         }
169 
170         size_t threads = (size_t)n_elems;
171 
172         err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
173                                      0, NULL, NULL);
174         test_error( err, "Unable to execute kernel" );
175 
176         err = clEnqueueReadBuffer(queue, streams[2], true, 0,
177                                   sizeof(T) * num_elements, &output_ptr[0], 0,
178                                   NULL, NULL);
179         test_error( err, "Unable to read results" );
180 
181         if (verifyFn((T*)&input_ptr[0].front(), (T*)&input_ptr[1].front(),
182                      &output_ptr[0], n_elems, g_arrVecSizes[i],
183                      vecSecParam ? 1 : 0))
184         {
185             log_error("%s %s%d%s test failed\n", fnName.c_str(), tname.c_str(),
186                       ((g_arrVecSizes[i])),
187                       vecSecParam ? "" : std::string(", " + tname).c_str());
188             err = -1;
189         }
190         else
191         {
192             log_info("%s %s%d%s test passed\n", fnName.c_str(), tname.c_str(),
193                      ((g_arrVecSizes[i])),
194                      vecSecParam ? "" : std::string(", " + tname).c_str());
195             err = 0;
196         }
197 
198         if (err)
199             break;
200     }
201     return err;
202 }
203 
204 namespace {
205 
206 template <typename T>
max_verify(const T * const x,const T * const y,const T * const out,int numElements,int vecSize,int vecParam)207 int max_verify(const T* const x, const T* const y, const T* const out,
208                int numElements, int vecSize, int vecParam)
209 {
210     for (int i = 0; i < numElements; i++)
211     {
212         for (int j = 0; j < vecSize; j++)
213         {
214             int k = i * vecSize + j;
215             int l = (k * vecParam + i * (1 - vecParam));
216             T v = (conv_to_dbl(x[k]) < conv_to_dbl(y[l])) ? y[l] : x[k];
217             if (v != out[k])
218             {
219                 if (std::is_same<T, half>::value)
220                     log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. "
221                               "(index %d is "
222                               "vector %d, element %d, for vector size %d)\n",
223                               k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k,
224                               conv_to_flt(out[k]), v, k, i, j, vecSize);
225                 else
226                     log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. "
227                               "(index %d is "
228                               "vector %d, element %d, for vector size %d)\n",
229                               k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize);
230                 return -1;
231             }
232         }
233     }
234     return 0;
235 }
236 
237 template <typename T>
min_verify(const T * const x,const T * const y,const T * const out,int numElements,int vecSize,int vecParam)238 int min_verify(const T* const x, const T* const y, const T* const out,
239                int numElements, int vecSize, int vecParam)
240 {
241     for (int i = 0; i < numElements; i++)
242     {
243         for (int j = 0; j < vecSize; j++)
244         {
245             int k = i * vecSize + j;
246             int l = (k * vecParam + i * (1 - vecParam));
247             T v = (conv_to_dbl(x[k]) > conv_to_dbl(y[l])) ? y[l] : x[k];
248             if (v != out[k])
249             {
250                 if (std::is_same<T, half>::value)
251                     log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. "
252                               "(index %d is "
253                               "vector %d, element %d, for vector size %d)\n",
254                               k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k,
255                               conv_to_flt(out[k]), v, k, i, j, vecSize);
256                 else
257                     log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. "
258                               "(index %d is "
259                               "vector %d, element %d, for vector size %d)\n",
260                               k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize);
261                 return -1;
262             }
263         }
264     }
265     return 0;
266 }
267 
268 }
269 
Run()270 cl_int MaxTest::Run()
271 {
272     cl_int error = CL_SUCCESS;
273     if (is_extension_available(device, "cl_khr_fp16"))
274     {
275         error = test_binary_fn<cl_half>(device, context, queue, num_elems,
276                                         fnName.c_str(), vecParam,
277                                         max_verify<cl_half>);
278         test_error(error, "MaxTest::Run<cl_half> failed");
279     }
280 
281     error = test_binary_fn<float>(device, context, queue, num_elems,
282                                   fnName.c_str(), vecParam, max_verify<float>);
283     test_error(error, "MaxTest::Run<float> failed");
284 
285     if (is_extension_available(device, "cl_khr_fp64"))
286     {
287         error = test_binary_fn<double>(device, context, queue, num_elems,
288                                        fnName.c_str(), vecParam,
289                                        max_verify<double>);
290         test_error(error, "MaxTest::Run<double> failed");
291     }
292 
293     return error;
294 }
295 
Run()296 cl_int MinTest::Run()
297 {
298     cl_int error = CL_SUCCESS;
299     if (is_extension_available(device, "cl_khr_fp16"))
300     {
301         error = test_binary_fn<cl_half>(device, context, queue, num_elems,
302                                         fnName.c_str(), vecParam,
303                                         min_verify<cl_half>);
304         test_error(error, "MinTest::Run<cl_half> failed");
305     }
306 
307     error = test_binary_fn<float>(device, context, queue, num_elems,
308                                   fnName.c_str(), vecParam, min_verify<float>);
309     test_error(error, "MinTest::Run<float> failed");
310 
311     if (is_extension_available(device, "cl_khr_fp64"))
312     {
313         error = test_binary_fn<double>(device, context, queue, num_elems,
314                                        fnName.c_str(), vecParam,
315                                        min_verify<double>);
316         test_error(error, "MinTest::Run<double> failed");
317     }
318 
319     return error;
320 }
321 
test_min(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)322 int test_min(cl_device_id device, cl_context context, cl_command_queue queue,
323              int n_elems)
324 {
325     return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "min",
326                                    true);
327 }
328 
test_minf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)329 int test_minf(cl_device_id device, cl_context context, cl_command_queue queue,
330               int n_elems)
331 {
332     return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "min",
333                                    false);
334 }
335 
test_fmin(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)336 int test_fmin(cl_device_id device, cl_context context, cl_command_queue queue,
337               int n_elems)
338 {
339     return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "fmin",
340                                    true);
341 }
342 
test_fminf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)343 int test_fminf(cl_device_id device, cl_context context, cl_command_queue queue,
344                int n_elems)
345 {
346     return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "fmin",
347                                    false);
348 }
349 
test_max(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)350 int test_max(cl_device_id device, cl_context context, cl_command_queue queue,
351              int n_elems)
352 {
353     return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "max",
354                                    true);
355 }
356 
test_maxf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)357 int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue,
358               int n_elems)
359 {
360     return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "max",
361                                    false);
362 }
363 
test_fmax(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)364 int test_fmax(cl_device_id device, cl_context context, cl_command_queue queue,
365               int n_elems)
366 {
367     return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "fmax",
368                                    true);
369 }
370 
test_fmaxf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)371 int test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue,
372                int n_elems)
373 {
374     return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "fmax",
375                                    false);
376 }
377