xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/commonfns/test_smoothstep.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2023 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 
21 #include "harness/stringHelpers.h"
22 
23 #include "procs.h"
24 #include "test_base.h"
25 
26 const char *smoothstep_fn_code_pattern =
27     "%s\n" /* optional pragma */
28     "__kernel void test_fn(__global %s%s *e0, __global %s%s *e1, __global %s%s "
29     "*x, __global %s%s *dst)\n"
30     "{\n"
31     "    int  tid = get_global_id(0);\n"
32     "\n"
33     "    dst[tid] = smoothstep(e0[tid], e1[tid], x[tid]);\n"
34     "}\n";
35 
36 const char *smoothstep_fn_code_pattern_v3 =
37     "%s\n" /* optional pragma */
38     "__kernel void test_fn(__global %s *e0, __global %s *e1, __global %s *x, "
39     "__global %s *dst)\n"
40     "{\n"
41     "    int  tid = get_global_id(0);\n"
42     "\n"
43     "    vstore3(smoothstep(vload3(tid,e0), vload3(tid,e1), vload3(tid,x)), "
44     "tid, dst);\n"
45     "}\n";
46 
47 const char *smoothstep_fn_code_pattern_v3_scalar =
48     "%s\n" /* optional pragma */
49     "__kernel void test_fn(__global %s *e0, __global %s *e1, __global %s *x, "
50     "__global %s *dst)\n"
51     "{\n"
52     "    int  tid = get_global_id(0);\n"
53     "\n"
54     "    vstore3(smoothstep(e0[tid], e1[tid], vload3(tid,x)), tid, dst);\n"
55     "}\n";
56 
57 #define MAX_ERR (1e-5f)
58 
59 namespace {
60 
61 template <typename T>
verify_smoothstep(const T * const edge0,const T * const edge1,const T * const x,const T * const outptr,const int n,const int veclen,const bool vecParam)62 int verify_smoothstep(const T *const edge0, const T *const edge1,
63                       const T *const x, const T *const outptr, const int n,
64                       const int veclen, const bool vecParam)
65 {
66     double r, t;
67     float delta = 0, max_delta = 0;
68 
69     if (vecParam)
70     {
71         for (int i = 0; i < n * veclen; i++)
72         {
73             t = (conv_to_dbl(x[i]) - conv_to_dbl(edge0[i]))
74                 / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i]));
75             if (t < 0.0)
76                 t = 0.0;
77             else if (t > 1.0)
78                 t = 1.0;
79             r = t * t * (3.0 - 2.0 * t);
80             delta = (float)fabs(r - conv_to_dbl(outptr[i]));
81             if (!std::is_same<T, half>::value)
82             {
83                 if (delta > MAX_ERR)
84                 {
85                     log_error(
86                         "%d) verification error: smoothstep(%a, %a, %a) = "
87                         "*%a vs. %a\n",
88                         i, x[i], edge0[i], edge1[i], r, outptr[i]);
89                     return -1;
90                 }
91             }
92             else
93                 max_delta = std::max(max_delta, delta);
94         }
95     }
96     else
97     {
98         for (int i = 0; i < n; ++i)
99         {
100             int ii = i / veclen;
101             int vi = i * veclen;
102             for (int j = 0; j < veclen; ++j, ++vi)
103             {
104                 t = (conv_to_dbl(x[vi]) - conv_to_dbl(edge0[i]))
105                     / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i]));
106                 if (t < 0.0)
107                     t = 0.0;
108                 else if (t > 1.0)
109                     t = 1.0;
110                 r = t * t * (3.0 - 2.0 * t);
111                 delta = (float)fabs(r - conv_to_dbl(outptr[vi]));
112 
113                 if (!std::is_same<T, half>::value)
114                 {
115                     if (delta > MAX_ERR)
116                     {
117                         log_error("{%d, element %d}) verification error: "
118                                   "smoothstep(%a, %a, %a) = *%a vs. %a\n",
119                                   ii, j, x[vi], edge0[i], edge1[i], r,
120                                   outptr[vi]);
121                         return -1;
122                     }
123                 }
124                 else
125                     max_delta = std::max(max_delta, delta);
126             }
127         }
128     }
129 
130     // due to the fact that accuracy of smoothstep for cl_khr_fp16 is
131     // implementation defined this test only reports maximum error without
132     // testing maximum error threshold
133     if (std::is_same<T, half>::value)
134         log_error("smoothstep half verification result, max delta: %a\n",
135                   max_delta);
136 
137     return 0;
138 }
139 
140 }
141 
142 template <typename T>
test_smoothstep_fn(cl_device_id device,cl_context context,cl_command_queue queue,const int n_elems,const bool vecParam)143 int test_smoothstep_fn(cl_device_id device, cl_context context,
144                        cl_command_queue queue, const int n_elems,
145                        const bool vecParam)
146 {
147     clMemWrapper streams[4];
148     std::vector<T> input_ptr[3], output_ptr;
149 
150     std::vector<clProgramWrapper> programs;
151     std::vector<clKernelWrapper> kernels;
152 
153     int err, i;
154     MTdataHolder d = MTdataHolder(gRandomSeed);
155 
156     assert(BaseFunctionTest::type2name.find(sizeof(T))
157            != BaseFunctionTest::type2name.end());
158     auto tname = BaseFunctionTest::type2name[sizeof(T)];
159 
160     programs.resize(kTotalVecCount);
161     kernels.resize(kTotalVecCount);
162 
163     int num_elements = n_elems * (1 << (kTotalVecCount - 1));
164 
165     for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements);
166     output_ptr.resize(num_elements);
167 
168     for (i = 0; i < 4; i++)
169     {
170         streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
171                                     sizeof(T) * num_elements, NULL, &err);
172         test_error(err, "clCreateBuffer failed");
173     }
174 
175     std::string pragma_str;
176     if (std::is_same<T, float>::value)
177     {
178         for (i = 0; i < num_elements; i++)
179         {
180             input_ptr[0][i] = get_random_float(-0x00200000, 0x00010000, d);
181             input_ptr[1][i] = get_random_float(input_ptr[0][i], 0x00200000, d);
182             input_ptr[2][i] = get_random_float(-0x20000000, 0x20000000, d);
183         }
184     }
185     else if (std::is_same<T, double>::value)
186     {
187         pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
188         for (i = 0; i < num_elements; i++)
189         {
190             input_ptr[0][i] = get_random_double(-0x00200000, 0x00010000, d);
191             input_ptr[1][i] = get_random_double(input_ptr[0][i], 0x00200000, d);
192             input_ptr[2][i] = get_random_double(-0x20000000, 0x20000000, d);
193         }
194     }
195     else if (std::is_same<T, half>::value)
196     {
197         pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
198         for (i = 0; i < num_elements; i++)
199         {
200             input_ptr[0][i] = conv_to_half(get_random_float(-65503, 65503, d));
201             input_ptr[1][i] = conv_to_half(
202                 get_random_float(conv_to_flt(input_ptr[0][i]), 65503, d));
203             input_ptr[2][i] = conv_to_half(get_random_float(-65503, 65503, d));
204         }
205     }
206 
207     for (i = 0; i < 3; i++)
208     {
209         err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
210                                    sizeof(T) * num_elements,
211                                    &input_ptr[i].front(), 0, NULL, NULL);
212         test_error(err, "Unable to write input buffer");
213     }
214 
215     const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
216 
217     for (i = 0; i < kTotalVecCount; i++)
218     {
219         std::string kernelSource;
220         if (i >= kVectorSizeCount)
221         {
222             if (vecParam)
223             {
224                 std::string str = smoothstep_fn_code_pattern_v3;
225                 kernelSource =
226                     str_sprintf(str, pragma_str.c_str(), tname.c_str(),
227                                 tname.c_str(), tname.c_str(), tname.c_str());
228             }
229             else
230             {
231                 std::string str = smoothstep_fn_code_pattern_v3_scalar;
232                 kernelSource =
233                     str_sprintf(str, pragma_str.c_str(), tname.c_str(),
234                                 tname.c_str(), tname.c_str(), tname.c_str());
235             }
236         }
237         else
238         {
239             // regular path
240             std::string str = smoothstep_fn_code_pattern;
241             kernelSource =
242                 str_sprintf(str, pragma_str.c_str(), tname.c_str(),
243                             vecParam ? vecSizeNames[i] : "", tname.c_str(),
244                             vecParam ? vecSizeNames[i] : "", tname.c_str(),
245                             vecSizeNames[i], tname.c_str(), vecSizeNames[i]);
246         }
247 
248         const char *programPtr = kernelSource.c_str();
249         err =
250             create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
251                                         (const char **)&programPtr, "test_fn");
252         test_error(err, "Unable to create kernel");
253 
254         for (int j = 0; j < 4; j++)
255         {
256             err =
257                 clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
258             test_error(err, "Unable to set kernel argument");
259         }
260 
261         size_t threads = (size_t)n_elems;
262 
263         err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
264                                      0, NULL, NULL);
265         test_error(err, "Unable to execute kernel");
266 
267         err = clEnqueueReadBuffer(queue, streams[3], true, 0,
268                                   sizeof(T) * num_elements, &output_ptr[0], 0,
269                                   NULL, NULL);
270         test_error(err, "Unable to read results");
271 
272         if (verify_smoothstep((T *)&input_ptr[0].front(),
273                               (T *)&input_ptr[1].front(),
274                               (T *)&input_ptr[2].front(), &output_ptr[0],
275                               n_elems, g_arrVecSizes[i], vecParam))
276         {
277             log_error("smoothstep %s%d%s test failed\n", tname.c_str(),
278                       ((g_arrVecSizes[i])),
279                       vecParam ? "" : std::string(", " + tname).c_str());
280             err = -1;
281         }
282         else
283         {
284             log_info("smoothstep %s%d%s test passed\n", tname.c_str(),
285                      ((g_arrVecSizes[i])),
286                      vecParam ? "" : std::string(", " + tname).c_str());
287             err = 0;
288         }
289 
290         if (err) break;
291     }
292 
293     return err;
294 }
295 
Run()296 cl_int SmoothstepTest::Run()
297 {
298     cl_int error = CL_SUCCESS;
299     if (is_extension_available(device, "cl_khr_fp16"))
300     {
301         error = test_smoothstep_fn<half>(device, context, queue, num_elems,
302                                          vecParam);
303         test_error(error, "SmoothstepTest::Run<cl_half> failed");
304     }
305 
306     error =
307         test_smoothstep_fn<float>(device, context, queue, num_elems, vecParam);
308     test_error(error, "SmoothstepTest::Run<float> failed");
309 
310     if (is_extension_available(device, "cl_khr_fp64"))
311     {
312         error = test_smoothstep_fn<double>(device, context, queue, num_elems,
313                                            vecParam);
314         test_error(error, "SmoothstepTest::Run<double> failed");
315     }
316 
317     return error;
318 }
319 
test_smoothstep(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)320 int test_smoothstep(cl_device_id device, cl_context context,
321                     cl_command_queue queue, int n_elems)
322 {
323     return MakeAndRunTest<SmoothstepTest>(device, context, queue, n_elems,
324                                           "smoothstep", true);
325 }
326 
test_smoothstepf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)327 int test_smoothstepf(cl_device_id device, cl_context context,
328                      cl_command_queue queue, int n_elems)
329 {
330     return MakeAndRunTest<SmoothstepTest>(device, context, queue, n_elems,
331                                           "smoothstep", false);
332 }
333