1 // Copyright 2019 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "System/CPUID.hpp"
16 #include "System/Half.hpp"
17 #include "System/Math.hpp"
18
19 #include <gmock/gmock.h>
20 #include <gtest/gtest.h>
21
22 #include <cstdlib>
23 #include <cmath>
24
25 using std::isnan;
26 using std::isinf;
27 using std::signbit;
28
29 using namespace sw;
30
31 // Implementation of frexp() which satisfies C++ <cmath> requirements.
fast_frexp(float val,int * exp)32 float fast_frexp(float val, int *exp)
33 {
34 int isNotZero = (val != 0.0f) ? 0xFFFFFFFF : 0x00000000;
35 int v = bit_cast<int>(val);
36 int isInfOrNaN = (v & 0x7F800000) == 0x7F800000 ? 0xFFFFFFFF : 0x00000000;
37
38 // When val is a subnormal value we can't directly use its mantissa to construct the significand in
39 // the range [0.5, 1.0). We need to multiply it by a factor that makes it normalized. For large
40 // values the factor must avoid overflow to inifity.
41 int factor = ((127 + 23) << 23) - (v & 0x3F800000);
42 int nval = bit_cast<int>(val * bit_cast<float>(factor));
43
44 // Extract the exponent of the normalized value and subtract the exponent of the normalizing factor.
45 int exponent = ((((nval & 0x7F800000) - factor) >> 23) + 1) & isNotZero;
46
47 // Substitute the exponent of 0.5f (if not zero) to obtain the significand.
48 float significand = bit_cast<float>((nval & 0x807FFFFF) | (0x3F000000 & isNotZero) | (0x7F800000 & isInfOrNaN));
49
50 *exp = exponent;
51 return significand;
52 }
53
TEST(MathTest,Frexp)54 TEST(MathTest, Frexp)
55 {
56 for(bool flush : { false, true })
57 {
58 CPUID::setDenormalsAreZero(flush);
59 CPUID::setFlushToZero(flush);
60
61 std::vector<float> a = {
62 2.3f,
63 0.1f,
64 0.7f,
65 1.7f,
66 0.0f,
67 -2.3f,
68 -0.1f,
69 -0.7f,
70 -1.7f,
71 -0.0f,
72 100000000.0f,
73 -100000000.0f,
74 0.000000001f,
75 -0.000000001f,
76 FLT_MIN,
77 -FLT_MIN,
78 FLT_MAX,
79 -FLT_MAX,
80 FLT_TRUE_MIN,
81 -FLT_TRUE_MIN,
82 INFINITY,
83 -INFINITY,
84 NAN,
85 bit_cast<float>(0x007FFFFF), // Largest subnormal
86 bit_cast<float>(0x807FFFFF),
87 bit_cast<float>(0x00000001), // Smallest subnormal
88 bit_cast<float>(0x80000001),
89 };
90
91 for(float f : a)
92 {
93 int exp = -1000;
94 float sig = fast_frexp(f, &exp);
95
96 if(f == 0.0f) // Could be subnormal if `flush` is true
97 {
98 // We don't rely on std::frexp here to produce a reference result because it may
99 // return non-zero significands and exponents for subnormal arguments., while our
100 // implementation is meant to respect denormals-are-zero / flush-to-zero.
101
102 ASSERT_EQ(sig, 0.0f) << "Argument: " << std::hexfloat << f;
103 ASSERT_TRUE(signbit(sig) == signbit(f)) << "Argument: " << std::hexfloat << f;
104 ASSERT_EQ(exp, 0) << "Argument: " << std::hexfloat << f;
105 }
106 else
107 {
108 int ref_exp = -1000;
109 float ref_sig = std::frexp(f, &ref_exp);
110
111 if(!isnan(f))
112 {
113 ASSERT_EQ(sig, ref_sig) << "Argument: " << std::hexfloat << f;
114 }
115 else
116 {
117 ASSERT_TRUE(isnan(sig)) << "Significand: " << std::hexfloat << sig;
118 }
119
120 if(!isinf(f) && !isnan(f)) // If the argument is NaN or Inf the exponent is unspecified.
121 {
122 ASSERT_EQ(exp, ref_exp) << "Argument: " << std::hexfloat << f;
123 }
124 }
125 }
126 }
127 }
128
129 // Returns the whole-number ULP error of `a` relative to `x`.
130 // Use the doouble-precision version below. This just illustrates the principle.
ULP_32(float x,float a)131 [[deprecated]] float ULP_32(float x, float a)
132 {
133 // Flip the last mantissa bit to compute the 'unit in the last place' error.
134 float x1 = bit_cast<float>(bit_cast<uint32_t>(x) ^ 0x00000001);
135 float ulp = abs(x1 - x);
136
137 return abs(a - x) / ulp;
138 }
139
ULP_32(double x,double a)140 double ULP_32(double x, double a)
141 {
142 // binary64 has 52 mantissa bits, while binary32 has 23, so the ULP for the latter is 29 bits shifted.
143 double x1 = bit_cast<double>(bit_cast<uint64_t>(x) ^ 0x0000000020000000ull);
144 double ulp = abs(x1 - x);
145
146 return abs(a - x) / ulp;
147 }
148
ULP_16(float x,float a)149 float ULP_16(float x, float a)
150 {
151 // binary32 has 23 mantissa bits, while binary16 has 10, so the ULP for the latter is 13 bits shifted.
152 double x1 = bit_cast<float>(bit_cast<uint32_t>(x) ^ 0x00002000);
153 float ulp = abs(x1 - x);
154
155 return abs(a - x) / ulp;
156 }
157
158 // lolremez --float -d 2 -r "0:2^23" "(log2(x/2^23+1)-x/2^23)/x" "1/x"
159 // ULP-16: 0.797363281, abs: 0.0991751999
f(float x)160 float f(float x)
161 {
162 float u = 2.8017103e-22f;
163 u = u * x + -8.373131e-15f;
164 return u * x + 5.0615534e-8f;
165 }
166
Log2Relaxed(float x)167 float Log2Relaxed(float x)
168 {
169 // Reinterpretation as an integer provides a piecewise linear
170 // approximation of log2(). Scale to the radix and subtract exponent bias.
171 int im = bit_cast<int>(x);
172 float y = (float)im * (1.0f / (1 << 23)) - 127.0f;
173
174 // Handle log2(inf) = inf.
175 if(im == 0x7F800000) y = INFINITY;
176
177 float m = (float)(im & 0x007FFFFF); // Unnormalized mantissa of x.
178
179 // Add a polynomial approximation of log2(m+1)-m to the result's mantissa.
180 return f(m) * m + y;
181 }
182
TEST(MathTest,Log2RelaxedExhaustive)183 TEST(MathTest, Log2RelaxedExhaustive)
184 {
185 CPUID::setDenormalsAreZero(true);
186 CPUID::setFlushToZero(true);
187
188 float worst_margin = 0;
189 float worst_ulp = 0;
190 float worst_x = 0;
191 float worst_val = 0;
192 float worst_ref = 0;
193
194 float worst_abs = 0;
195
196 for(float x = 0.0f; x <= INFINITY; x = inc(x))
197 {
198 float val = Log2Relaxed(x);
199
200 double ref = log2((double)x);
201
202 if(ref == (int)ref)
203 {
204 ASSERT_EQ(val, ref);
205 }
206 else if(x >= 0.5f && x <= 2.0f)
207 {
208 const float tolerance = pow(2.0f, -7.0f); // Absolute
209
210 float margin = abs(val - ref) / tolerance;
211
212 if(margin > worst_abs)
213 {
214 worst_abs = margin;
215 }
216 }
217 else
218 {
219 const float tolerance = 3; // ULP
220
221 float ulp = (float)ULP_16(ref, (double)val);
222 float margin = ulp / tolerance;
223
224 if(margin > worst_margin)
225 {
226 worst_margin = margin;
227 worst_ulp = ulp;
228 worst_x = x;
229 worst_val = val;
230 worst_ref = ref;
231 }
232 }
233 }
234
235 ASSERT_TRUE(worst_margin < 1.0f) << " worst_x " << worst_x << " worst_val " << worst_val << " worst_ref " << worst_ref << " worst_ulp " << worst_ulp;
236 ASSERT_TRUE(worst_abs <= 1.0f) << " worst_x " << worst_x << " worst_val " << worst_val << " worst_ref " << worst_ref << " worst_ulp " << worst_ulp;
237
238 CPUID::setDenormalsAreZero(false);
239 CPUID::setFlushToZero(false);
240 }
241
242 // lolremez --float -d 2 -r "0:1" "(2^x-x-1)/x" "1/x"
243 // ULP-16: 0.130859017
Pr(float x)244 float Pr(float x)
245 {
246 float u = 7.8145574e-2f;
247 u = u * x + 2.2617357e-1f;
248 return u * x + -3.0444314e-1f;
249 }
250
Exp2Relaxed(float x)251 float Exp2Relaxed(float x)
252 {
253 x = min(x, 128.0f);
254 x = max(x, bit_cast<float>(int(0xC2FDFFFF))); // -126.999992
255
256 // 2^f - f - 1 as P(f) * f
257 // This is a correction term to be added to 1+x to obtain 2^x.
258 float f = x - floor(x);
259 float y = Pr(f) * f + x;
260
261 // bit_cast<float>(int(x * 2^23)) is a piecewise linear approximation of 2^(x-127).
262 // See "Fast Exponential Computation on SIMD Architectures" by Malossi et al.
263 return bit_cast<float>(int((1 << 23) * y + (127 << 23)));
264 }
265
TEST(MathTest,Exp2RelaxedExhaustive)266 TEST(MathTest, Exp2RelaxedExhaustive)
267 {
268 CPUID::setDenormalsAreZero(true);
269 CPUID::setFlushToZero(true);
270
271 float worst_margin = 0;
272 float worst_ulp = 0;
273 float worst_x = 0;
274 float worst_val = 0;
275 float worst_ref = 0;
276
277 for(float x = -10; x <= 10; x = inc(x))
278 {
279 float val = Exp2Relaxed(x);
280
281 double ref = exp2((double)x);
282
283 if(x == (int)x)
284 {
285 ASSERT_EQ(val, ref);
286 }
287
288 const float tolerance = (1 + 2 * abs(x));
289 float ulp = ULP_16((float)ref, val);
290 float margin = ulp / tolerance;
291
292 if(margin > worst_margin)
293 {
294 worst_margin = margin;
295 worst_ulp = ulp;
296 worst_x = x;
297 worst_val = val;
298 worst_ref = ref;
299 }
300 }
301
302 ASSERT_TRUE(worst_margin <= 1.0f) << " worst_x " << worst_x << " worst_val " << worst_val << " worst_ref " << worst_ref << " worst_ulp " << worst_ulp;
303
304 CPUID::setDenormalsAreZero(false);
305 CPUID::setFlushToZero(false);
306 }
307
308 // lolremez --float -d 7 -r "0:1" "(log2(x+1)-x)/x" "1/x"
309 // ULP-32: 1.69571960, abs: 0.360798746
Pl(float x)310 float Pl(float x)
311 {
312 float u = -9.3091638e-3f;
313 u = u * x + 5.2059003e-2f;
314 u = u * x + -1.3752135e-1f;
315 u = u * x + 2.4186478e-1f;
316 u = u * x + -3.4730109e-1f;
317 u = u * x + 4.786837e-1f;
318 u = u * x + -7.2116581e-1f;
319 return u * x + 4.4268988e-1f;
320 }
321
Log2(float x)322 float Log2(float x)
323 {
324 // Reinterpretation as an integer provides a piecewise linear
325 // approximation of log2(). Scale to the radix and subtract exponent bias.
326 int im = bit_cast<int>(x);
327 float y = (float)(im - (127 << 23)) * (1.0f / (1 << 23));
328
329 // Handle log2(inf) = inf.
330 if(im == 0x7F800000) y = INFINITY;
331
332 float m = (float)(im & 0x007FFFFF) * (1.0f / (1 << 23)); // Normalized mantissa of x.
333
334 // Add a polynomial approximation of log2(m+1)-m to the result's mantissa.
335 return Pl(m) * m + y;
336 }
337
TEST(MathTest,Log2Exhaustive)338 TEST(MathTest, Log2Exhaustive)
339 {
340 CPUID::setDenormalsAreZero(true);
341 CPUID::setFlushToZero(true);
342
343 float worst_margin = 0;
344 float worst_ulp = 0;
345 float worst_x = 0;
346 float worst_val = 0;
347 float worst_ref = 0;
348
349 float worst_abs = 0;
350
351 for(float x = 0.0f; x <= INFINITY; x = inc(x))
352 {
353 float val = Log2(x);
354
355 double ref = log2((double)x);
356
357 if(ref == (int)ref)
358 {
359 ASSERT_EQ(val, ref);
360 }
361 else if(x >= 0.5f && x <= 2.0f)
362 {
363 const float tolerance = pow(2.0f, -21.0f); // Absolute
364
365 float margin = abs(val - ref) / tolerance;
366
367 if(margin > worst_abs)
368 {
369 worst_abs = margin;
370 }
371 }
372 else
373 {
374 const float tolerance = 3; // ULP
375
376 float ulp = (float)ULP_32(ref, (double)val);
377 float margin = ulp / tolerance;
378
379 if(margin > worst_margin)
380 {
381 worst_margin = margin;
382 worst_ulp = ulp;
383 worst_x = x;
384 worst_val = val;
385 worst_ref = ref;
386 }
387 }
388 }
389
390 ASSERT_TRUE(worst_margin < 1.0f) << " worst_x " << worst_x << " worst_val " << worst_val << " worst_ref " << worst_ref << " worst_ulp " << worst_ulp;
391 ASSERT_TRUE(worst_abs <= 1.0f) << " worst_x " << worst_x << " worst_val " << worst_val << " worst_ref " << worst_ref << " worst_ulp " << worst_ulp;
392
393 CPUID::setDenormalsAreZero(false);
394 CPUID::setFlushToZero(false);
395 }
396
397 // lolremez --float -d 4 -r "0:1" "(2^x-x-1)/x" "1/x"
398 // ULP_32: 2.14694786, Vulkan margin: 0.686957061
P(float x)399 float P(float x)
400 {
401 float u = 1.8852974e-3f;
402 u = u * x + 8.9733787e-3f;
403 u = u * x + 5.5835927e-2f;
404 u = u * x + 2.4015281e-1f;
405 return u * x + -3.0684753e-1f;
406 }
407
Exp2(float x)408 float Exp2(float x)
409 {
410 x = min(x, 128.0f);
411 x = max(x, bit_cast<float>(0xC2FDFFFF)); // -126.999992
412
413 // 2^f - f - 1 as P(f) * f
414 // This is a correction term to be added to 1+x to obtain 2^x.
415 float f = x - floor(x);
416 float y = P(f) * f + x;
417
418 // bit_cast<float>(int(x * 2^23)) is a piecewise linear approximation of 2^(x-127).
419 // See "Fast Exponential Computation on SIMD Architectures" by Malossi et al.
420 return bit_cast<float>(int(y * (1 << 23)) + (127 << 23));
421 }
422
TEST(MathTest,Exp2Exhaustive)423 TEST(MathTest, Exp2Exhaustive)
424 {
425 CPUID::setDenormalsAreZero(true);
426 CPUID::setFlushToZero(true);
427
428 float worst_margin = 0;
429 float worst_ulp = 0;
430 float worst_x = 0;
431 float worst_val = 0;
432 float worst_ref = 0;
433
434 for(float x = -10; x <= 10; x = inc(x))
435 {
436 float val = Exp2(x);
437
438 double ref = exp2((double)x);
439
440 if(x == (int)x)
441 {
442 ASSERT_EQ(val, ref);
443 }
444
445 const float tolerance = (3 + 2 * abs(x));
446 float ulp = (float)ULP_32(ref, (double)val);
447 float margin = ulp / tolerance;
448
449 if(margin > worst_margin)
450 {
451 worst_margin = margin;
452 worst_ulp = ulp;
453 worst_x = x;
454 worst_val = val;
455 worst_ref = ref;
456 }
457 }
458
459 ASSERT_TRUE(worst_margin <= 1.0f) << " worst_x " << worst_x << " worst_val " << worst_val << " worst_ref " << worst_ref << " worst_ulp " << worst_ulp;
460
461 CPUID::setDenormalsAreZero(false);
462 CPUID::setFlushToZero(false);
463 }
464
465 // Polynomial approximation of order 5 for sin(x * 2 * pi) in the range [-1/4, 1/4]
sin5(float x)466 static float sin5(float x)
467 {
468 // A * x^5 + B * x^3 + C * x
469 // Exact at x = 0, 1/12, 1/6, 1/4, and their negatives, which correspond to x * 2 * pi = 0, pi/6, pi/3, pi/2
470 const float A = (36288 - 20736 * sqrt(3)) / 5;
471 const float B = 288 * sqrt(3) - 540;
472 const float C = (47 - 9 * sqrt(3)) / 5;
473
474 float x2 = x * x;
475
476 return ((A * x2 + B) * x2 + C) * x;
477 }
478
TEST(MathTest,SinExhaustive)479 TEST(MathTest, SinExhaustive)
480 {
481 const float tolerance = powf(2.0f, -12.0f); // Vulkan requires absolute error <= 2^−11 inside the range [−pi, pi]
482 const float pi = 3.1415926535f;
483
484 for(float x = -pi; x <= pi; x = inc(x))
485 {
486 // Range reduction and mirroring
487 float x_2 = 0.25f - x * (0.5f / pi);
488 float z = 0.25f - fabs(x_2 - round(x_2));
489
490 float val = sin5(z);
491
492 ASSERT_NEAR(val, sinf(x), tolerance);
493 }
494 }
495
TEST(MathTest,CosExhaustive)496 TEST(MathTest, CosExhaustive)
497 {
498 const float tolerance = powf(2.0f, -12.0f); // Vulkan requires absolute error <= 2^−11 inside the range [−pi, pi]
499 const float pi = 3.1415926535f;
500
501 for(float x = -pi; x <= pi; x = inc(x))
502 {
503 // Phase shift, range reduction, and mirroring
504 float x_2 = x * (0.5f / pi);
505 float z = 0.25f - fabs(x_2 - round(x_2));
506
507 float val = sin5(z);
508
509 ASSERT_NEAR(val, cosf(x), tolerance);
510 }
511 }
512
TEST(MathTest,UnsignedFloat11_10)513 TEST(MathTest, UnsignedFloat11_10)
514 {
515 // Test the largest value which causes underflow to 0, and the smallest value
516 // which produces a denormalized result.
517
518 EXPECT_EQ(R11G11B10F::float32ToFloat11(bit_cast<float>(0x3500007F)), 0x0000);
519 EXPECT_EQ(R11G11B10F::float32ToFloat11(bit_cast<float>(0x35000080)), 0x0001);
520
521 EXPECT_EQ(R11G11B10F::float32ToFloat10(bit_cast<float>(0x3580003F)), 0x0000);
522 EXPECT_EQ(R11G11B10F::float32ToFloat10(bit_cast<float>(0x35800040)), 0x0001);
523 }
524
525 // Clamps to the [0, hi] range. NaN input produces 0, hi must be non-NaN.
clamp0hi(float x,float hi)526 float clamp0hi(float x, float hi)
527 {
528 // If x=NaN, x > 0 will compare false and we return 0.
529 if(!(x > 0))
530 {
531 return 0;
532 }
533
534 // x is non-NaN at this point, so std::min() is safe for non-NaN hi.
535 return std::min(x, hi);
536 }
537
RGB9E5_reference(float r,float g,float b)538 unsigned int RGB9E5_reference(float r, float g, float b)
539 {
540 // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
541
542 // B is the exponent bias (15)
543 constexpr int g_sharedexp_bias = 15;
544
545 // N is the number of mantissa bits per component (9)
546 constexpr int g_sharedexp_mantissabits = 9;
547
548 // Emax is the maximum allowed biased exponent value (31)
549 constexpr int g_sharedexp_maxexponent = 31;
550
551 constexpr float g_sharedexp_max =
552 ((static_cast<float>(1 << g_sharedexp_mantissabits) - 1) /
553 static_cast<float>(1 << g_sharedexp_mantissabits)) *
554 static_cast<float>(1 << (g_sharedexp_maxexponent - g_sharedexp_bias));
555
556 const float red_c = clamp0hi(r, g_sharedexp_max);
557 const float green_c = clamp0hi(g, g_sharedexp_max);
558 const float blue_c = clamp0hi(b, g_sharedexp_max);
559
560 const float max_c = fmax(fmax(red_c, green_c), blue_c);
561 const float exp_p = fmax(-g_sharedexp_bias - 1, floor(log2(max_c))) + 1 + g_sharedexp_bias;
562 const int max_s = static_cast<int>(floor((max_c / exp2(exp_p - g_sharedexp_bias - g_sharedexp_mantissabits)) + 0.5f));
563 const int exp_s = static_cast<int>((max_s < exp2(g_sharedexp_mantissabits)) ? exp_p : exp_p + 1);
564
565 unsigned int R = static_cast<unsigned int>(floor((red_c / exp2(exp_s - g_sharedexp_bias - g_sharedexp_mantissabits)) + 0.5f));
566 unsigned int G = static_cast<unsigned int>(floor((green_c / exp2(exp_s - g_sharedexp_bias - g_sharedexp_mantissabits)) + 0.5f));
567 unsigned int B = static_cast<unsigned int>(floor((blue_c / exp2(exp_s - g_sharedexp_bias - g_sharedexp_mantissabits)) + 0.5f));
568 unsigned int E = exp_s;
569
570 return (E << 27) | (B << 18) | (G << 9) | R;
571 }
572
TEST(MathTest,SharedExponentSparse)573 TEST(MathTest, SharedExponentSparse)
574 {
575 for(uint64_t i = 0; i < 0x0000000100000000; i += 0x400)
576 {
577 float f = bit_cast<float>(i);
578
579 unsigned int ref = RGB9E5_reference(f, 0.0f, 0.0f);
580 unsigned int val = RGB9E5(f, 0.0f, 0.0f);
581
582 EXPECT_EQ(ref, val);
583 }
584 }
585
TEST(MathTest,SharedExponentRandom)586 TEST(MathTest, SharedExponentRandom)
587 {
588 srand(0);
589
590 unsigned int x = 0;
591 unsigned int y = 0;
592 unsigned int z = 0;
593
594 for(int i = 0; i < 10000000; i++)
595 {
596 float r = bit_cast<float>(x);
597 float g = bit_cast<float>(y);
598 float b = bit_cast<float>(z);
599
600 unsigned int ref = RGB9E5_reference(r, g, b);
601 unsigned int val = RGB9E5(r, g, b);
602
603 EXPECT_EQ(ref, val);
604
605 x += rand();
606 y += rand();
607 z += rand();
608 }
609 }
610
TEST(MathTest,SharedExponentExhaustive)611 TEST(MathTest, SharedExponentExhaustive)
612 {
613 for(uint64_t i = 0; i < 0x0000000100000000; i += 1)
614 {
615 float f = bit_cast<float>(i);
616
617 unsigned int ref = RGB9E5_reference(f, 0.0f, 0.0f);
618 unsigned int val = RGB9E5(f, 0.0f, 0.0f);
619
620 EXPECT_EQ(ref, val);
621 }
622 }
623