1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21
22 #include <cinttypes>
23 #include <cstring>
24
25 namespace {
26
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)27 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
28 {
29 BuildKernelInfo &info = *(BuildKernelInfo *)p;
30 auto generator = [](const std::string &kernel_name, const char *builtin,
31 cl_uint vector_size_index) {
32 return GetUnaryKernel(kernel_name, builtin, ParameterType::Double,
33 ParameterType::Double, ParameterType::Double,
34 vector_size_index);
35 };
36 return BuildKernels(info, job_id, generator);
37 }
38
39 } // anonymous namespace
40
TestFunc_Double2_Double(const Func * f,MTdata d,bool relaxedMode)41 int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
42 {
43 int error;
44 Programs programs;
45 const unsigned thread_id = 0; // Test is currently not multithreaded.
46 KernelMatrix kernels;
47 float maxError0 = 0.0f;
48 float maxError1 = 0.0f;
49 int ftz = f->ftz || gForceFTZ;
50 double maxErrorVal0 = 0.0f;
51 double maxErrorVal1 = 0.0f;
52 uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
53 int scale =
54 (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(cl_double)) + 1);
55
56 logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
57
58 Force64BitFPUPrecision();
59
60 // Init the kernels
61 BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
62 relaxedMode };
63 if ((error = ThreadPool_Do(BuildKernelFn,
64 gMaxVectorSizeIndex - gMinVectorSizeIndex,
65 &build_info)))
66 return error;
67
68 for (uint64_t i = 0; i < (1ULL << 32); i += step)
69 {
70 // Init input array
71 double *p = (double *)gIn;
72 if (gWimpyMode)
73 {
74 for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
75 p[j] = DoubleFromUInt32((uint32_t)i + j * scale);
76 }
77 else
78 {
79 for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
80 p[j] = DoubleFromUInt32((uint32_t)i + j);
81 }
82 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
83 BUFFER_SIZE, gIn, 0, NULL, NULL)))
84 {
85 vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
86 return error;
87 }
88
89 // Write garbage into output arrays
90 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
91 {
92 uint32_t pattern = 0xffffdead;
93 if (gHostFill)
94 {
95 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
96 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
97 CL_FALSE, 0, BUFFER_SIZE,
98 gOut[j], 0, NULL, NULL)))
99 {
100 vlog_error(
101 "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
102 error, j);
103 return error;
104 }
105
106 memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
107 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
108 CL_FALSE, 0, BUFFER_SIZE,
109 gOut2[j], 0, NULL, NULL)))
110 {
111 vlog_error(
112 "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
113 error, j);
114 return error;
115 }
116 }
117 else
118 {
119 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
120 &pattern, sizeof(pattern), 0,
121 BUFFER_SIZE, 0, NULL, NULL)))
122 {
123 vlog_error("Error: clEnqueueFillBuffer 1 failed! err: %d\n",
124 error);
125 return error;
126 }
127
128 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j],
129 &pattern, sizeof(pattern), 0,
130 BUFFER_SIZE, 0, NULL, NULL)))
131 {
132 vlog_error("Error: clEnqueueFillBuffer 2 failed! err: %d\n",
133 error);
134 return error;
135 }
136 }
137 }
138
139 // Run the kernels
140 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
141 {
142 size_t vectorSize = sizeValues[j] * sizeof(cl_double);
143 size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
144 if ((error = clSetKernelArg(kernels[j][thread_id], 0,
145 sizeof(gOutBuffer[j]), &gOutBuffer[j])))
146 {
147 LogBuildError(programs[j]);
148 return error;
149 }
150 if ((error =
151 clSetKernelArg(kernels[j][thread_id], 1,
152 sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
153 {
154 LogBuildError(programs[j]);
155 return error;
156 }
157 if ((error = clSetKernelArg(kernels[j][thread_id], 2,
158 sizeof(gInBuffer), &gInBuffer)))
159 {
160 LogBuildError(programs[j]);
161 return error;
162 }
163
164 if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
165 1, NULL, &localCount, NULL, 0,
166 NULL, NULL)))
167 {
168 vlog_error("FAILED -- could not execute kernel\n");
169 return error;
170 }
171 }
172
173 // Get that moving
174 if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
175
176 // Calculate the correctly rounded reference result
177 double *r = (double *)gOut_Ref;
178 double *r2 = (double *)gOut_Ref2;
179 double *s = (double *)gIn;
180 for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
181 {
182 long double dd;
183 r[j] = (double)f->dfunc.f_fpf(s[j], &dd);
184 r2[j] = (double)dd;
185 }
186
187 // Read the data back
188 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
189 {
190 if ((error =
191 clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
192 BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
193 {
194 vlog_error("ReadArray failed %d\n", error);
195 return error;
196 }
197 if ((error =
198 clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
199 BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
200 {
201 vlog_error("ReadArray2 failed %d\n", error);
202 return error;
203 }
204 }
205
206 if (gSkipCorrectnessTesting) break;
207
208 // Verify data
209 uint64_t *t = (uint64_t *)gOut_Ref;
210 uint64_t *t2 = (uint64_t *)gOut_Ref2;
211 for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
212 {
213 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
214 {
215 uint64_t *q = (uint64_t *)(gOut[k]);
216 uint64_t *q2 = (uint64_t *)(gOut2[k]);
217
218 // If we aren't getting the correctly rounded result
219 if (t[j] != q[j] || t2[j] != q2[j])
220 {
221 double test = ((double *)q)[j];
222 double test2 = ((double *)q2)[j];
223 long double correct2;
224 long double correct = f->dfunc.f_fpf(s[j], &correct2);
225 float err = Bruteforce_Ulp_Error_Double(test, correct);
226 float err2 = Bruteforce_Ulp_Error_Double(test2, correct2);
227 int fail = !(fabsf(err) <= f->double_ulps
228 && fabsf(err2) <= f->double_ulps);
229 if (ftz || relaxedMode)
230 {
231 // retry per section 6.5.3.2
232 if (IsDoubleResultSubnormal(correct, f->double_ulps))
233 {
234 if (IsDoubleResultSubnormal(correct2,
235 f->double_ulps))
236 {
237 fail = fail && !(test == 0.0f && test2 == 0.0f);
238 if (!fail)
239 {
240 err = 0.0f;
241 err2 = 0.0f;
242 }
243 }
244 else
245 {
246 fail = fail
247 && !(test == 0.0f
248 && fabsf(err2) <= f->double_ulps);
249 if (!fail) err = 0.0f;
250 }
251 }
252 else if (IsDoubleResultSubnormal(correct2,
253 f->double_ulps))
254 {
255 fail = fail
256 && !(test2 == 0.0f
257 && fabsf(err) <= f->double_ulps);
258 if (!fail) err2 = 0.0f;
259 }
260
261 // retry per section 6.5.3.3
262 if (IsDoubleSubnormal(s[j]))
263 {
264 long double correct2p, correct2n;
265 long double correctp =
266 f->dfunc.f_fpf(0.0, &correct2p);
267 long double correctn =
268 f->dfunc.f_fpf(-0.0, &correct2n);
269 float errp =
270 Bruteforce_Ulp_Error_Double(test, correctp);
271 float err2p =
272 Bruteforce_Ulp_Error_Double(test, correct2p);
273 float errn =
274 Bruteforce_Ulp_Error_Double(test, correctn);
275 float err2n =
276 Bruteforce_Ulp_Error_Double(test, correct2n);
277 fail = fail
278 && ((!(fabsf(errp) <= f->double_ulps))
279 && (!(fabsf(err2p) <= f->double_ulps))
280 && ((!(fabsf(errn) <= f->double_ulps))
281 && (!(fabsf(err2n)
282 <= f->double_ulps))));
283 if (fabsf(errp) < fabsf(err)) err = errp;
284 if (fabsf(errn) < fabsf(err)) err = errn;
285 if (fabsf(err2p) < fabsf(err2)) err2 = err2p;
286 if (fabsf(err2n) < fabsf(err2)) err2 = err2n;
287
288 // retry per section 6.5.3.4
289 if (IsDoubleResultSubnormal(correctp,
290 f->double_ulps)
291 || IsDoubleResultSubnormal(correctn,
292 f->double_ulps))
293 {
294 if (IsDoubleResultSubnormal(correct2p,
295 f->double_ulps)
296 || IsDoubleResultSubnormal(correct2n,
297 f->double_ulps))
298 {
299 fail = fail
300 && !(test == 0.0f && test2 == 0.0f);
301 if (!fail) err = err2 = 0.0f;
302 }
303 else
304 {
305 fail = fail
306 && !(test == 0.0f
307 && fabsf(err2) <= f->double_ulps);
308 if (!fail) err = 0.0f;
309 }
310 }
311 else if (IsDoubleResultSubnormal(correct2p,
312 f->double_ulps)
313 || IsDoubleResultSubnormal(correct2n,
314 f->double_ulps))
315 {
316 fail = fail
317 && !(test2 == 0.0f
318 && (fabsf(err) <= f->double_ulps));
319 if (!fail) err2 = 0.0f;
320 }
321 }
322 }
323 if (fabsf(err) > maxError0)
324 {
325 maxError0 = fabsf(err);
326 maxErrorVal0 = s[j];
327 }
328 if (fabsf(err2) > maxError1)
329 {
330 maxError1 = fabsf(err2);
331 maxErrorVal1 = s[j];
332 }
333 if (fail)
334 {
335 vlog_error(
336 "\nERROR: %sD%s: {%f, %f} ulp error at %.13la: "
337 "*{%.13la, %.13la} vs. {%.13la, %.13la}\n",
338 f->name, sizeNames[k], err, err2,
339 ((double *)gIn)[j], ((double *)gOut_Ref)[j],
340 ((double *)gOut_Ref2)[j], test, test2);
341 return -1;
342 }
343 }
344 }
345 }
346
347 if (0 == (i & 0x0fffffff))
348 {
349 if (gVerboseBruteForce)
350 {
351 vlog("base:%14" PRIu64 " step:%10" PRIu64
352 " bufferSize:%10d \n",
353 i, step, BUFFER_SIZE);
354 }
355 else
356 {
357 vlog(".");
358 }
359 fflush(stdout);
360 }
361 }
362
363 if (!gSkipCorrectnessTesting)
364 {
365 if (gWimpyMode)
366 vlog("Wimp pass");
367 else
368 vlog("passed");
369
370 vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
371 maxErrorVal1);
372 }
373
374 vlog("\n");
375
376 return CL_SUCCESS;
377 }
378