xref: /aosp_15_r20/external/OpenCL-CTS/test_conformance/math_brute_force/unary_two_results_double.cpp (revision 6467f958c7de8070b317fc65bcb0f6472e388d82)
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21 
22 #include <cinttypes>
23 #include <cstring>
24 
25 namespace {
26 
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)27 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
28 {
29     BuildKernelInfo &info = *(BuildKernelInfo *)p;
30     auto generator = [](const std::string &kernel_name, const char *builtin,
31                         cl_uint vector_size_index) {
32         return GetUnaryKernel(kernel_name, builtin, ParameterType::Double,
33                               ParameterType::Double, ParameterType::Double,
34                               vector_size_index);
35     };
36     return BuildKernels(info, job_id, generator);
37 }
38 
39 } // anonymous namespace
40 
TestFunc_Double2_Double(const Func * f,MTdata d,bool relaxedMode)41 int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
42 {
43     int error;
44     Programs programs;
45     const unsigned thread_id = 0; // Test is currently not multithreaded.
46     KernelMatrix kernels;
47     float maxError0 = 0.0f;
48     float maxError1 = 0.0f;
49     int ftz = f->ftz || gForceFTZ;
50     double maxErrorVal0 = 0.0f;
51     double maxErrorVal1 = 0.0f;
52     uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
53     int scale =
54         (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(cl_double)) + 1);
55 
56     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
57 
58     Force64BitFPUPrecision();
59 
60     // Init the kernels
61     BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
62                                 relaxedMode };
63     if ((error = ThreadPool_Do(BuildKernelFn,
64                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
65                                &build_info)))
66         return error;
67 
68     for (uint64_t i = 0; i < (1ULL << 32); i += step)
69     {
70         // Init input array
71         double *p = (double *)gIn;
72         if (gWimpyMode)
73         {
74             for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
75                 p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
76         }
77         else
78         {
79             for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
80                 p[j] = DoubleFromUInt32((uint32_t)i + j);
81         }
82         if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
83                                           BUFFER_SIZE, gIn, 0, NULL, NULL)))
84         {
85             vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
86             return error;
87         }
88 
89         // Write garbage into output arrays
90         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
91         {
92             uint32_t pattern = 0xffffdead;
93             if (gHostFill)
94             {
95                 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
96                 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
97                                                   CL_FALSE, 0, BUFFER_SIZE,
98                                                   gOut[j], 0, NULL, NULL)))
99                 {
100                     vlog_error(
101                         "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
102                         error, j);
103                     return error;
104                 }
105 
106                 memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
107                 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
108                                                   CL_FALSE, 0, BUFFER_SIZE,
109                                                   gOut2[j], 0, NULL, NULL)))
110                 {
111                     vlog_error(
112                         "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
113                         error, j);
114                     return error;
115                 }
116             }
117             else
118             {
119                 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
120                                                  &pattern, sizeof(pattern), 0,
121                                                  BUFFER_SIZE, 0, NULL, NULL)))
122                 {
123                     vlog_error("Error: clEnqueueFillBuffer 1 failed! err: %d\n",
124                                error);
125                     return error;
126                 }
127 
128                 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j],
129                                                  &pattern, sizeof(pattern), 0,
130                                                  BUFFER_SIZE, 0, NULL, NULL)))
131                 {
132                     vlog_error("Error: clEnqueueFillBuffer 2 failed! err: %d\n",
133                                error);
134                     return error;
135                 }
136             }
137         }
138 
139         // Run the kernels
140         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
141         {
142             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
143             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
144             if ((error = clSetKernelArg(kernels[j][thread_id], 0,
145                                         sizeof(gOutBuffer[j]), &gOutBuffer[j])))
146             {
147                 LogBuildError(programs[j]);
148                 return error;
149             }
150             if ((error =
151                      clSetKernelArg(kernels[j][thread_id], 1,
152                                     sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
153             {
154                 LogBuildError(programs[j]);
155                 return error;
156             }
157             if ((error = clSetKernelArg(kernels[j][thread_id], 2,
158                                         sizeof(gInBuffer), &gInBuffer)))
159             {
160                 LogBuildError(programs[j]);
161                 return error;
162             }
163 
164             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
165                                                 1, NULL, &localCount, NULL, 0,
166                                                 NULL, NULL)))
167             {
168                 vlog_error("FAILED -- could not execute kernel\n");
169                 return error;
170             }
171         }
172 
173         // Get that moving
174         if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
175 
176         // Calculate the correctly rounded reference result
177         double *r = (double *)gOut_Ref;
178         double *r2 = (double *)gOut_Ref2;
179         double *s = (double *)gIn;
180         for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
181         {
182             long double dd;
183             r[j] = (double)f->dfunc.f_fpf(s[j], &dd);
184             r2[j] = (double)dd;
185         }
186 
187         // Read the data back
188         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
189         {
190             if ((error =
191                      clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
192                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
193             {
194                 vlog_error("ReadArray failed %d\n", error);
195                 return error;
196             }
197             if ((error =
198                      clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
199                                          BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
200             {
201                 vlog_error("ReadArray2 failed %d\n", error);
202                 return error;
203             }
204         }
205 
206         if (gSkipCorrectnessTesting) break;
207 
208         // Verify data
209         uint64_t *t = (uint64_t *)gOut_Ref;
210         uint64_t *t2 = (uint64_t *)gOut_Ref2;
211         for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
212         {
213             for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
214             {
215                 uint64_t *q = (uint64_t *)(gOut[k]);
216                 uint64_t *q2 = (uint64_t *)(gOut2[k]);
217 
218                 // If we aren't getting the correctly rounded result
219                 if (t[j] != q[j] || t2[j] != q2[j])
220                 {
221                     double test = ((double *)q)[j];
222                     double test2 = ((double *)q2)[j];
223                     long double correct2;
224                     long double correct = f->dfunc.f_fpf(s[j], &correct2);
225                     float err = Bruteforce_Ulp_Error_Double(test, correct);
226                     float err2 = Bruteforce_Ulp_Error_Double(test2, correct2);
227                     int fail = !(fabsf(err) <= f->double_ulps
228                                  && fabsf(err2) <= f->double_ulps);
229                     if (ftz || relaxedMode)
230                     {
231                         // retry per section 6.5.3.2
232                         if (IsDoubleResultSubnormal(correct, f->double_ulps))
233                         {
234                             if (IsDoubleResultSubnormal(correct2,
235                                                         f->double_ulps))
236                             {
237                                 fail = fail && !(test == 0.0f && test2 == 0.0f);
238                                 if (!fail)
239                                 {
240                                     err = 0.0f;
241                                     err2 = 0.0f;
242                                 }
243                             }
244                             else
245                             {
246                                 fail = fail
247                                     && !(test == 0.0f
248                                          && fabsf(err2) <= f->double_ulps);
249                                 if (!fail) err = 0.0f;
250                             }
251                         }
252                         else if (IsDoubleResultSubnormal(correct2,
253                                                          f->double_ulps))
254                         {
255                             fail = fail
256                                 && !(test2 == 0.0f
257                                      && fabsf(err) <= f->double_ulps);
258                             if (!fail) err2 = 0.0f;
259                         }
260 
261                         // retry per section 6.5.3.3
262                         if (IsDoubleSubnormal(s[j]))
263                         {
264                             long double correct2p, correct2n;
265                             long double correctp =
266                                 f->dfunc.f_fpf(0.0, &correct2p);
267                             long double correctn =
268                                 f->dfunc.f_fpf(-0.0, &correct2n);
269                             float errp =
270                                 Bruteforce_Ulp_Error_Double(test, correctp);
271                             float err2p =
272                                 Bruteforce_Ulp_Error_Double(test, correct2p);
273                             float errn =
274                                 Bruteforce_Ulp_Error_Double(test, correctn);
275                             float err2n =
276                                 Bruteforce_Ulp_Error_Double(test, correct2n);
277                             fail = fail
278                                 && ((!(fabsf(errp) <= f->double_ulps))
279                                     && (!(fabsf(err2p) <= f->double_ulps))
280                                     && ((!(fabsf(errn) <= f->double_ulps))
281                                         && (!(fabsf(err2n)
282                                               <= f->double_ulps))));
283                             if (fabsf(errp) < fabsf(err)) err = errp;
284                             if (fabsf(errn) < fabsf(err)) err = errn;
285                             if (fabsf(err2p) < fabsf(err2)) err2 = err2p;
286                             if (fabsf(err2n) < fabsf(err2)) err2 = err2n;
287 
288                             // retry per section 6.5.3.4
289                             if (IsDoubleResultSubnormal(correctp,
290                                                         f->double_ulps)
291                                 || IsDoubleResultSubnormal(correctn,
292                                                            f->double_ulps))
293                             {
294                                 if (IsDoubleResultSubnormal(correct2p,
295                                                             f->double_ulps)
296                                     || IsDoubleResultSubnormal(correct2n,
297                                                                f->double_ulps))
298                                 {
299                                     fail = fail
300                                         && !(test == 0.0f && test2 == 0.0f);
301                                     if (!fail) err = err2 = 0.0f;
302                                 }
303                                 else
304                                 {
305                                     fail = fail
306                                         && !(test == 0.0f
307                                              && fabsf(err2) <= f->double_ulps);
308                                     if (!fail) err = 0.0f;
309                                 }
310                             }
311                             else if (IsDoubleResultSubnormal(correct2p,
312                                                              f->double_ulps)
313                                      || IsDoubleResultSubnormal(correct2n,
314                                                                 f->double_ulps))
315                             {
316                                 fail = fail
317                                     && !(test2 == 0.0f
318                                          && (fabsf(err) <= f->double_ulps));
319                                 if (!fail) err2 = 0.0f;
320                             }
321                         }
322                     }
323                     if (fabsf(err) > maxError0)
324                     {
325                         maxError0 = fabsf(err);
326                         maxErrorVal0 = s[j];
327                     }
328                     if (fabsf(err2) > maxError1)
329                     {
330                         maxError1 = fabsf(err2);
331                         maxErrorVal1 = s[j];
332                     }
333                     if (fail)
334                     {
335                         vlog_error(
336                             "\nERROR: %sD%s: {%f, %f} ulp error at %.13la: "
337                             "*{%.13la, %.13la} vs. {%.13la, %.13la}\n",
338                             f->name, sizeNames[k], err, err2,
339                             ((double *)gIn)[j], ((double *)gOut_Ref)[j],
340                             ((double *)gOut_Ref2)[j], test, test2);
341                         return -1;
342                     }
343                 }
344             }
345         }
346 
347         if (0 == (i & 0x0fffffff))
348         {
349             if (gVerboseBruteForce)
350             {
351                 vlog("base:%14" PRIu64 " step:%10" PRIu64
352                      "  bufferSize:%10d \n",
353                      i, step, BUFFER_SIZE);
354             }
355             else
356             {
357                 vlog(".");
358             }
359             fflush(stdout);
360         }
361     }
362 
363     if (!gSkipCorrectnessTesting)
364     {
365         if (gWimpyMode)
366             vlog("Wimp pass");
367         else
368             vlog("passed");
369 
370         vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
371              maxErrorVal1);
372     }
373 
374     vlog("\n");
375 
376     return CL_SUCCESS;
377 }
378