xref: /aosp_15_r20/external/llvm-libc/src/__support/FPUtil/NormalFloat.h (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1 //===-- A class to store a normalized floating point number -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
10 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
11 
12 #include "FPBits.h"
13 
14 #include "src/__support/CPP/type_traits.h"
15 #include "src/__support/common.h"
16 #include "src/__support/macros/config.h"
17 
18 #include <stdint.h>
19 
20 namespace LIBC_NAMESPACE_DECL {
21 namespace fputil {
22 
23 // A class which stores the normalized form of a floating point value.
24 // The special IEEE-754 bits patterns of Zero, infinity and NaNs are
25 // are not handled by this class.
26 //
27 // A normalized floating point number is of this form:
28 //    (-1)*sign * 2^exponent * <mantissa>
29 // where <mantissa> is of the form 1.<...>.
30 template <typename T> struct NormalFloat {
31   static_assert(
32       cpp::is_floating_point_v<T>,
33       "NormalFloat template parameter has to be a floating point type.");
34 
35   using StorageType = typename FPBits<T>::StorageType;
36   static constexpr StorageType ONE =
37       (StorageType(1) << FPBits<T>::FRACTION_LEN);
38 
39   // Unbiased exponent value.
40   int32_t exponent;
41 
42   StorageType mantissa;
43   // We want |StorageType| to have atleast one bit more than the actual mantissa
44   // bit width to accommodate the implicit 1 value.
45   static_assert(sizeof(StorageType) * 8 >= FPBits<T>::FRACTION_LEN + 1,
46                 "Bad type for mantissa in NormalFloat.");
47 
48   Sign sign = Sign::POS;
49 
NormalFloatNormalFloat50   LIBC_INLINE NormalFloat(Sign s, int32_t e, StorageType m)
51       : exponent(e), mantissa(m), sign(s) {
52     if (mantissa >= ONE)
53       return;
54 
55     unsigned normalization_shift = evaluate_normalization_shift(mantissa);
56     mantissa <<= normalization_shift;
57     exponent -= normalization_shift;
58   }
59 
NormalFloatNormalFloat60   LIBC_INLINE explicit NormalFloat(T x) { init_from_bits(FPBits<T>(x)); }
61 
NormalFloatNormalFloat62   LIBC_INLINE explicit NormalFloat(FPBits<T> bits) { init_from_bits(bits); }
63 
64   // Compares this normalized number with another normalized number.
65   // Returns -1 is this number is less than |other|, 0 if this number is equal
66   // to |other|, and 1 if this number is greater than |other|.
cmpNormalFloat67   LIBC_INLINE int cmp(const NormalFloat<T> &other) const {
68     const int result = sign.is_neg() ? -1 : 1;
69     if (sign != other.sign)
70       return result;
71 
72     if (exponent > other.exponent) {
73       return result;
74     } else if (exponent == other.exponent) {
75       if (mantissa > other.mantissa)
76         return result;
77       else if (mantissa == other.mantissa)
78         return 0;
79       else
80         return -result;
81     } else {
82       return -result;
83     }
84   }
85 
86   // Returns a new normalized floating point number which is equal in value
87   // to this number multiplied by 2^e. That is:
88   //     new = this *  2^e
mul2NormalFloat89   LIBC_INLINE NormalFloat<T> mul2(int e) const {
90     NormalFloat<T> result = *this;
91     result.exponent += e;
92     return result;
93   }
94 
TNormalFloat95   LIBC_INLINE operator T() const {
96     int biased_exponent = exponent + FPBits<T>::EXP_BIAS;
97     // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
98     constexpr int MAX_EXPONENT_VALUE = (1 << FPBits<T>::EXP_LEN) - 2;
99     if (biased_exponent > MAX_EXPONENT_VALUE) {
100       return FPBits<T>::inf(sign).get_val();
101     }
102 
103     FPBits<T> result(T(0.0));
104     result.set_sign(sign);
105 
106     constexpr int SUBNORMAL_EXPONENT = -FPBits<T>::EXP_BIAS + 1;
107     if (exponent < SUBNORMAL_EXPONENT) {
108       unsigned shift = SUBNORMAL_EXPONENT - exponent;
109       // Since exponent > subnormalExponent, shift is strictly greater than
110       // zero.
111       if (shift <= FPBits<T>::FRACTION_LEN + 1) {
112         // Generate a subnormal number. Might lead to loss of precision.
113         // We round to nearest and round halfway cases to even.
114         const StorageType shift_out_mask =
115             static_cast<StorageType>(StorageType(1) << shift) - 1;
116         const StorageType shift_out_value = mantissa & shift_out_mask;
117         const StorageType halfway_value =
118             static_cast<StorageType>(StorageType(1) << (shift - 1));
119         result.set_biased_exponent(0);
120         result.set_mantissa(mantissa >> shift);
121         StorageType new_mantissa = result.get_mantissa();
122         if (shift_out_value > halfway_value) {
123           new_mantissa += 1;
124         } else if (shift_out_value == halfway_value) {
125           // Round to even.
126           if (result.get_mantissa() & 0x1)
127             new_mantissa += 1;
128         }
129         result.set_mantissa(new_mantissa);
130         // Adding 1 to mantissa can lead to overflow. This can only happen if
131         // mantissa was all ones (0b111..11). For such a case, we will carry
132         // the overflow into the exponent.
133         if (new_mantissa == ONE)
134           result.set_biased_exponent(1);
135         return result.get_val();
136       } else {
137         return result.get_val();
138       }
139     }
140 
141     result.set_biased_exponent(
142         static_cast<StorageType>(exponent + FPBits<T>::EXP_BIAS));
143     result.set_mantissa(mantissa);
144     return result.get_val();
145   }
146 
147 private:
init_from_bitsNormalFloat148   LIBC_INLINE void init_from_bits(FPBits<T> bits) {
149     sign = bits.sign();
150 
151     if (bits.is_inf_or_nan() || bits.is_zero()) {
152       // Ignore special bit patterns. Implementations deal with them separately
153       // anyway so this should not be a problem.
154       exponent = 0;
155       mantissa = 0;
156       return;
157     }
158 
159     // Normalize subnormal numbers.
160     if (bits.is_subnormal()) {
161       unsigned shift = evaluate_normalization_shift(bits.get_mantissa());
162       mantissa = static_cast<StorageType>(bits.get_mantissa() << shift);
163       exponent = 1 - FPBits<T>::EXP_BIAS - shift;
164     } else {
165       exponent = bits.get_biased_exponent() - FPBits<T>::EXP_BIAS;
166       mantissa = ONE | bits.get_mantissa();
167     }
168   }
169 
evaluate_normalization_shiftNormalFloat170   LIBC_INLINE unsigned evaluate_normalization_shift(StorageType m) {
171     unsigned shift = 0;
172     for (; (ONE & m) == 0 && (shift < FPBits<T>::FRACTION_LEN);
173          m <<= 1, ++shift)
174       ;
175     return shift;
176   }
177 };
178 
179 #ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
180 template <>
181 LIBC_INLINE void
init_from_bits(FPBits<long double> bits)182 NormalFloat<long double>::init_from_bits(FPBits<long double> bits) {
183   sign = bits.sign();
184 
185   if (bits.is_inf_or_nan() || bits.is_zero()) {
186     // Ignore special bit patterns. Implementations deal with them separately
187     // anyway so this should not be a problem.
188     exponent = 0;
189     mantissa = 0;
190     return;
191   }
192 
193   if (bits.is_subnormal()) {
194     if (bits.get_implicit_bit() == 0) {
195       // Since we ignore zero value, the mantissa in this case is non-zero.
196       int normalization_shift =
197           evaluate_normalization_shift(bits.get_mantissa());
198       exponent = -16382 - normalization_shift;
199       mantissa = (bits.get_mantissa() << normalization_shift);
200     } else {
201       exponent = -16382;
202       mantissa = ONE | bits.get_mantissa();
203     }
204   } else {
205     if (bits.get_implicit_bit() == 0) {
206       // Invalid number so just store 0 similar to a NaN.
207       exponent = 0;
208       mantissa = 0;
209     } else {
210       exponent = bits.get_biased_exponent() - 16383;
211       mantissa = ONE | bits.get_mantissa();
212     }
213   }
214 }
215 
216 template <> LIBC_INLINE NormalFloat<long double>::operator long double() const {
217   using LDBits = FPBits<long double>;
218   int biased_exponent = exponent + LDBits::EXP_BIAS;
219   // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
220   constexpr int MAX_EXPONENT_VALUE = (1 << LDBits::EXP_LEN) - 2;
221   if (biased_exponent > MAX_EXPONENT_VALUE) {
222     return LDBits::inf(sign).get_val();
223   }
224 
225   FPBits<long double> result(0.0l);
226   result.set_sign(sign);
227 
228   constexpr int SUBNORMAL_EXPONENT = -LDBits::EXP_BIAS + 1;
229   if (exponent < SUBNORMAL_EXPONENT) {
230     unsigned shift = SUBNORMAL_EXPONENT - exponent;
231     if (shift <= LDBits::FRACTION_LEN + 1) {
232       // Generate a subnormal number. Might lead to loss of precision.
233       // We round to nearest and round halfway cases to even.
234       const StorageType shift_out_mask = (StorageType(1) << shift) - 1;
235       const StorageType shift_out_value = mantissa & shift_out_mask;
236       const StorageType halfway_value = StorageType(1) << (shift - 1);
237       result.set_biased_exponent(0);
238       result.set_mantissa(mantissa >> shift);
239       StorageType new_mantissa = result.get_mantissa();
240       if (shift_out_value > halfway_value) {
241         new_mantissa += 1;
242       } else if (shift_out_value == halfway_value) {
243         // Round to even.
244         if (result.get_mantissa() & 0x1)
245           new_mantissa += 1;
246       }
247       result.set_mantissa(new_mantissa);
248       // Adding 1 to mantissa can lead to overflow. This can only happen if
249       // mantissa was all ones (0b111..11). For such a case, we will carry
250       // the overflow into the exponent and set the implicit bit to 1.
251       if (new_mantissa == ONE) {
252         result.set_biased_exponent(1);
253         result.set_implicit_bit(1);
254       } else {
255         result.set_implicit_bit(0);
256       }
257       return result.get_val();
258     } else {
259       return result.get_val();
260     }
261   }
262 
263   result.set_biased_exponent(biased_exponent);
264   result.set_mantissa(mantissa);
265   result.set_implicit_bit(1);
266   return result.get_val();
267 }
268 #endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
269 
270 } // namespace fputil
271 } // namespace LIBC_NAMESPACE_DECL
272 
273 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
274