1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program Tester Core
3 * ----------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Adjustable-precision floating point operations.
22 *//*--------------------------------------------------------------------*/
23
24 #include "tcuFloatFormat.hpp"
25
26 #include "deMath.h"
27 #include "deUniquePtr.hpp"
28
29 #include <sstream>
30 #include <iomanip>
31 #include <limits>
32
33 namespace tcu
34 {
35 namespace
36 {
37
chooseInterval(YesNoMaybe choice,const Interval & no,const Interval & yes)38 Interval chooseInterval(YesNoMaybe choice, const Interval &no, const Interval &yes)
39 {
40 switch (choice)
41 {
42 case NO:
43 return no;
44 case YES:
45 return yes;
46 case MAYBE:
47 return no | yes;
48 default:
49 DE_FATAL("Impossible case");
50 }
51
52 return Interval();
53 }
54
computeMaxValue(int maxExp,int fractionBits)55 double computeMaxValue(int maxExp, int fractionBits)
56 {
57 return (deLdExp(1.0, maxExp) + deLdExp(double((1ull << fractionBits) - 1), maxExp - fractionBits));
58 }
59
60 } // namespace
61
FloatFormat(int minExp,int maxExp,int fractionBits,bool exactPrecision,YesNoMaybe hasSubnormal_,YesNoMaybe hasInf_,YesNoMaybe hasNaN_)62 FloatFormat::FloatFormat(int minExp, int maxExp, int fractionBits, bool exactPrecision, YesNoMaybe hasSubnormal_,
63 YesNoMaybe hasInf_, YesNoMaybe hasNaN_)
64 : m_minExp(minExp)
65 , m_maxExp(maxExp)
66 , m_fractionBits(fractionBits)
67 , m_hasSubnormal(hasSubnormal_)
68 , m_hasInf(hasInf_)
69 , m_hasNaN(hasNaN_)
70 , m_exactPrecision(exactPrecision)
71 , m_maxValue(computeMaxValue(maxExp, fractionBits))
72 {
73 DE_ASSERT(minExp <= maxExp);
74 }
75
76 /*-------------------------------------------------------------------------
77 * On the definition of ULP
78 *
79 * The GLSL spec does not define ULP. However, it refers to IEEE 754, which
80 * (reportedly) uses Harrison's definition:
81 *
82 * ULP(x) is the distance between the closest floating point numbers
83 * a and be such that a <= x <= b and a != b
84 *
85 * Note that this means that when x = 2^n, ULP(x) = 2^(n-p-1), i.e. it is the
86 * distance to the next lowest float, not next highest.
87 *
88 * Furthermore, it is assumed that ULP is calculated relative to the exact
89 * value, not the approximation. This is because otherwise a less accurate
90 * approximation could be closer in ULPs, because its ULPs are bigger.
91 *
92 * For details, see "On the definition of ulp(x)" by Jean-Michel Muller
93 *
94 *-----------------------------------------------------------------------*/
95
ulp(double x,double count) const96 double FloatFormat::ulp(double x, double count) const
97 {
98 int exp = 0;
99 const double frac = deFractExp(deAbs(x), &exp);
100
101 if (deIsNaN(frac))
102 return TCU_NAN;
103 else if (deIsInf(frac))
104 return deLdExp(1.0, m_maxExp - m_fractionBits);
105 else if (frac == 1.0)
106 {
107 // Harrison's ULP: choose distance to closest (i.e. next lower) at binade
108 // boundary.
109 --exp;
110 }
111 else if (frac == 0.0)
112 exp = m_minExp;
113
114 // ULP cannot be lower than the smallest quantum.
115 exp = de::max(exp, m_minExp);
116
117 {
118 const double oneULP = deLdExp(1.0, exp - m_fractionBits);
119 ScopedRoundingMode ctx(DE_ROUNDINGMODE_TO_POSITIVE_INF);
120
121 return oneULP * count;
122 }
123 }
124
125 //! Return the difference between the given nominal exponent and
126 //! the exponent of the lowest significand bit of the
127 //! representation of a number with this format.
128 //! For normal numbers this is the number of significand bits, but
129 //! for subnormals it is less and for values of exp where 2^exp is too
130 //! small to represent it is <0
exponentShift(int exp) const131 int FloatFormat::exponentShift(int exp) const
132 {
133 return m_fractionBits - de::max(m_minExp - exp, 0);
134 }
135
136 //! Return the number closest to `d` that is exactly representable with the
137 //! significand bits and minimum exponent of the floatformat. Round up if
138 //! `upward` is true, otherwise down.
round(double d,bool upward) const139 double FloatFormat::round(double d, bool upward) const
140 {
141 int exp = 0;
142 const double frac = deFractExp(d, &exp);
143 const int shift = exponentShift(exp);
144 const double shiftFrac = deLdExp(frac, shift);
145 const double roundFrac = upward ? deCeil(shiftFrac) : deFloor(shiftFrac);
146
147 return deLdExp(roundFrac, exp - shift);
148 }
149
150 //! Return the range of numbers that `d` might be converted to in the
151 //! floatformat, given its limitations with infinities, subnormals and maximum
152 //! exponent.
clampValue(double d) const153 Interval FloatFormat::clampValue(double d) const
154 {
155 const double rSign = deSign(d);
156 int rExp = 0;
157
158 DE_ASSERT(!deIsNaN(d));
159
160 deFractExp(d, &rExp);
161 if (rExp < m_minExp)
162 return chooseInterval(m_hasSubnormal, rSign * 0.0, d);
163 else if (deIsInf(d) || rExp > m_maxExp)
164 return chooseInterval(m_hasInf, rSign * getMaxValue(), rSign * TCU_INFINITY);
165
166 return Interval(d);
167 }
168
169 //! Return the range of numbers that might be used with this format to
170 //! represent a number within `x`.
convert(const Interval & x) const171 Interval FloatFormat::convert(const Interval &x) const
172 {
173 Interval ret;
174 Interval tmp = x;
175
176 if (x.hasNaN())
177 {
178 // If NaN might be supported, NaN is a legal return value
179 if (m_hasNaN != NO)
180 ret |= TCU_NAN;
181
182 // If NaN might not be supported, any (non-NaN) value is legal,
183 // _subject_ to clamping. Hence we modify tmp, not ret.
184 if (m_hasNaN != YES)
185 tmp = Interval::unbounded();
186 }
187
188 // Round both bounds _inwards_ to closest representable values.
189 if (!tmp.empty())
190 ret |= clampValue(round(tmp.lo(), false)) | clampValue(round(tmp.hi(), true));
191
192 // If this format's precision is not exact, the (possibly out-of-bounds)
193 // original value is also a possible result.
194 if (!m_exactPrecision)
195 ret |= x;
196
197 return ret;
198 }
199
roundOut(double d,bool upward,bool roundUnderOverflow) const200 double FloatFormat::roundOut(double d, bool upward, bool roundUnderOverflow) const
201 {
202 int exp = 0;
203 deFractExp(d, &exp);
204
205 if (roundUnderOverflow && exp > m_maxExp && (upward == (d < 0.0)))
206 return deSign(d) * getMaxValue();
207 else
208 return round(d, upward);
209 }
210
211 //! Round output of an operation.
212 //! \param roundUnderOverflow Can +/-inf rounded to min/max representable;
213 //! should be false if any of operands was inf, true otherwise.
roundOut(const Interval & x,bool roundUnderOverflow) const214 Interval FloatFormat::roundOut(const Interval &x, bool roundUnderOverflow) const
215 {
216 Interval ret = x.nan();
217
218 if (!x.empty())
219 ret |= Interval(roundOut(x.lo(), false, roundUnderOverflow), roundOut(x.hi(), true, roundUnderOverflow));
220
221 return ret;
222 }
223
floatToHex(double x) const224 std::string FloatFormat::floatToHex(double x) const
225 {
226 if (deIsNaN(x))
227 return "NaN";
228 else if (deIsInf(x))
229 return (x < 0.0 ? "-" : "+") + std::string("inf");
230 else if (x == 0.0) // \todo [2014-03-27 lauri] Negative zero
231 return "0.0";
232
233 int exp = 0;
234 const double frac = deFractExp(deAbs(x), &exp);
235 const int shift = exponentShift(exp);
236 const uint64_t bits = uint64_t(deLdExp(frac, shift));
237 const uint64_t whole = bits >> m_fractionBits;
238 const uint64_t fraction = bits & ((uint64_t(1) << m_fractionBits) - 1);
239 const int exponent = exp + m_fractionBits - shift;
240 const int numDigits = (m_fractionBits + 3) / 4;
241 const uint64_t aligned = fraction << (numDigits * 4 - m_fractionBits);
242 std::ostringstream oss;
243
244 oss << (x < 0 ? "-" : "") << "0x" << whole << "." << std::hex << std::setw(numDigits) << std::setfill('0')
245 << aligned << "p" << std::dec << std::setw(0) << exponent;
246
247 return oss.str();
248 }
249
intervalToHex(const Interval & interval) const250 std::string FloatFormat::intervalToHex(const Interval &interval) const
251 {
252 if (interval.empty())
253 return interval.hasNaN() ? "{ NaN }" : "{}";
254
255 else if (interval.lo() == interval.hi())
256 return (std::string(interval.hasNaN() ? "{ NaN, " : "{ ") + floatToHex(interval.lo()) + " }");
257 else if (interval == Interval::unbounded(true))
258 return "<any>";
259
260 return (std::string(interval.hasNaN() ? "{ NaN } | " : "") + "[" + floatToHex(interval.lo()) + ", " +
261 floatToHex(interval.hi()) + "]");
262 }
263
264 template <typename T>
nativeFormat(void)265 static FloatFormat nativeFormat(void)
266 {
267 typedef std::numeric_limits<T> Limits;
268
269 DE_ASSERT(Limits::radix == 2);
270
271 return FloatFormat(Limits::min_exponent - 1, // These have a built-in offset of one
272 Limits::max_exponent - 1,
273 Limits::digits - 1, // don't count the hidden bit
274 Limits::has_denorm != std::denorm_absent, Limits::has_infinity ? YES : NO,
275 Limits::has_quiet_NaN ? YES : NO,
276 ((Limits::has_denorm == std::denorm_present) ? YES :
277 (Limits::has_denorm == std::denorm_absent) ? NO :
278 MAYBE));
279 }
280
nativeFloat(void)281 FloatFormat FloatFormat::nativeFloat(void)
282 {
283 return nativeFormat<float>();
284 }
285
nativeDouble(void)286 FloatFormat FloatFormat::nativeDouble(void)
287 {
288 return nativeFormat<double>();
289 }
290
NormalizedFormat(int fractionBits)291 NormalizedFormat::NormalizedFormat(int fractionBits) : FloatFormat(0, 0, fractionBits, true, tcu::YES)
292 {
293 }
294
round(double d,bool upward) const295 double NormalizedFormat::round(double d, bool upward) const
296 {
297 const int fractionBits = getFractionBits();
298
299 if (fractionBits <= 0)
300 return d;
301
302 const int maxIntValue = (1 << fractionBits) - 1;
303 const double value = d * maxIntValue;
304 const double normValue = upward ? deCeil(value) : deFloor(value);
305 return normValue / maxIntValue;
306 }
307
ulp(double x,double count) const308 double NormalizedFormat::ulp(double x, double count) const
309 {
310 (void)x;
311
312 const int maxIntValue = (1 << getFractionBits()) - 1;
313 const double precision = 1.0 / maxIntValue;
314 return precision * count;
315 }
316
roundOut(double d,bool upward,bool roundUnderOverflow) const317 double NormalizedFormat::roundOut(double d, bool upward, bool roundUnderOverflow) const
318 {
319 if (roundUnderOverflow && deAbs(d) > 1.0 && (upward == (d < 0.0)))
320 return deSign(d);
321 else
322 return round(d, upward);
323 }
324
325 namespace
326 {
327
328 using de::MovePtr;
329 using de::UniquePtr;
330 using std::ostringstream;
331 using std::string;
332
333 class Test
334 {
335 protected:
Test(MovePtr<FloatFormat> fmt)336 Test(MovePtr<FloatFormat> fmt) : m_fmt(fmt)
337 {
338 }
p(int e) const339 double p(int e) const
340 {
341 return deLdExp(1.0, e);
342 }
343 void check(const string &expr, double result, double reference) const;
344 void testULP(double arg, double ref) const;
345 void testRound(double arg, double refDown, double refUp) const;
346
347 UniquePtr<FloatFormat> m_fmt;
348 };
349
check(const string & expr,double result,double reference) const350 void Test::check(const string &expr, double result, double reference) const
351 {
352 if (result != reference)
353 {
354 ostringstream oss;
355 oss << expr << " returned " << result << ", expected " << reference;
356 TCU_FAIL(oss.str().c_str());
357 }
358 }
359
testULP(double arg,double ref) const360 void Test::testULP(double arg, double ref) const
361 {
362 ostringstream oss;
363
364 oss << "ulp(" << arg << ")";
365 check(oss.str(), m_fmt->ulp(arg), ref);
366 }
367
testRound(double arg,double refDown,double refUp) const368 void Test::testRound(double arg, double refDown, double refUp) const
369 {
370 {
371 ostringstream oss;
372 oss << "round(" << arg << ", false)";
373 check(oss.str(), m_fmt->round(arg, false), refDown);
374 }
375 {
376 ostringstream oss;
377 oss << "round(" << arg << ", true)";
378 check(oss.str(), m_fmt->round(arg, true), refUp);
379 }
380 }
381
382 class TestBinary32 : public Test
383 {
384 public:
TestBinary32(void)385 TestBinary32(void) : Test(MovePtr<FloatFormat>(new FloatFormat(-126, 127, 23, true)))
386 {
387 }
388
389 void runTest(void) const;
390 };
391
runTest(void) const392 void TestBinary32::runTest(void) const
393 {
394 testULP(p(0), p(-24));
395 testULP(p(0) + p(-23), p(-23));
396 testULP(p(-124), p(-148));
397 testULP(p(-125), p(-149));
398 testULP(p(-125) + p(-140), p(-148));
399 testULP(p(-126), p(-149));
400 testULP(p(-130), p(-149));
401
402 testRound(p(0) + p(-20) + p(-40), p(0) + p(-20), p(0) + p(-20) + p(-23));
403 testRound(p(-126) - p(-150), p(-126) - p(-149), p(-126));
404
405 TCU_CHECK(m_fmt->floatToHex(p(0)) == "0x1.000000p0");
406 TCU_CHECK(m_fmt->floatToHex(p(8) + p(-4)) == "0x1.001000p8");
407 TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126");
408 TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126");
409 TCU_CHECK(m_fmt->floatToHex(p(-126) + p(-125)) == "0x1.800000p-125");
410 }
411
412 } // namespace
413
FloatFormat_selfTest(void)414 void FloatFormat_selfTest(void)
415 {
416 TestBinary32 test32;
417 test32.runTest();
418 }
419
420 } // namespace tcu
421