xref: /aosp_15_r20/external/abseil-cpp/absl/strings/numbers.cc (revision 9356374a3709195abf420251b3e825997ff56c0f)
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // This file contains string processing functions related to
16 // numeric values.
17 
18 #include "absl/strings/numbers.h"
19 
20 #include <algorithm>
21 #include <cassert>
22 #include <cfloat>  // for DBL_DIG and FLT_DIG
23 #include <cmath>   // for HUGE_VAL
24 #include <cstdint>
25 #include <cstdio>
26 #include <cstdlib>
27 #include <cstring>
28 #include <iterator>
29 #include <limits>
30 #include <system_error>  // NOLINT(build/c++11)
31 #include <utility>
32 
33 #include "absl/base/attributes.h"
34 #include "absl/base/config.h"
35 #include "absl/base/internal/endian.h"
36 #include "absl/base/internal/raw_logging.h"
37 #include "absl/base/nullability.h"
38 #include "absl/base/optimization.h"
39 #include "absl/numeric/bits.h"
40 #include "absl/numeric/int128.h"
41 #include "absl/strings/ascii.h"
42 #include "absl/strings/charconv.h"
43 #include "absl/strings/match.h"
44 #include "absl/strings/string_view.h"
45 
46 namespace absl {
47 ABSL_NAMESPACE_BEGIN
48 
SimpleAtof(absl::string_view str,absl::Nonnull<float * > out)49 bool SimpleAtof(absl::string_view str, absl::Nonnull<float*> out) {
50   *out = 0.0;
51   str = StripAsciiWhitespace(str);
52   // std::from_chars doesn't accept an initial +, but SimpleAtof does, so if one
53   // is present, skip it, while avoiding accepting "+-0" as valid.
54   if (!str.empty() && str[0] == '+') {
55     str.remove_prefix(1);
56     if (!str.empty() && str[0] == '-') {
57       return false;
58     }
59   }
60   auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
61   if (result.ec == std::errc::invalid_argument) {
62     return false;
63   }
64   if (result.ptr != str.data() + str.size()) {
65     // not all non-whitespace characters consumed
66     return false;
67   }
68   // from_chars() with DR 3081's current wording will return max() on
69   // overflow.  SimpleAtof returns infinity instead.
70   if (result.ec == std::errc::result_out_of_range) {
71     if (*out > 1.0) {
72       *out = std::numeric_limits<float>::infinity();
73     } else if (*out < -1.0) {
74       *out = -std::numeric_limits<float>::infinity();
75     }
76   }
77   return true;
78 }
79 
SimpleAtod(absl::string_view str,absl::Nonnull<double * > out)80 bool SimpleAtod(absl::string_view str, absl::Nonnull<double*> out) {
81   *out = 0.0;
82   str = StripAsciiWhitespace(str);
83   // std::from_chars doesn't accept an initial +, but SimpleAtod does, so if one
84   // is present, skip it, while avoiding accepting "+-0" as valid.
85   if (!str.empty() && str[0] == '+') {
86     str.remove_prefix(1);
87     if (!str.empty() && str[0] == '-') {
88       return false;
89     }
90   }
91   auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
92   if (result.ec == std::errc::invalid_argument) {
93     return false;
94   }
95   if (result.ptr != str.data() + str.size()) {
96     // not all non-whitespace characters consumed
97     return false;
98   }
99   // from_chars() with DR 3081's current wording will return max() on
100   // overflow.  SimpleAtod returns infinity instead.
101   if (result.ec == std::errc::result_out_of_range) {
102     if (*out > 1.0) {
103       *out = std::numeric_limits<double>::infinity();
104     } else if (*out < -1.0) {
105       *out = -std::numeric_limits<double>::infinity();
106     }
107   }
108   return true;
109 }
110 
SimpleAtob(absl::string_view str,absl::Nonnull<bool * > out)111 bool SimpleAtob(absl::string_view str, absl::Nonnull<bool*> out) {
112   ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
113   if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") ||
114       EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") ||
115       EqualsIgnoreCase(str, "1")) {
116     *out = true;
117     return true;
118   }
119   if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") ||
120       EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") ||
121       EqualsIgnoreCase(str, "0")) {
122     *out = false;
123     return true;
124   }
125   return false;
126 }
127 
128 // ----------------------------------------------------------------------
129 // FastIntToBuffer() overloads
130 //
131 // Like the Fast*ToBuffer() functions above, these are intended for speed.
132 // Unlike the Fast*ToBuffer() functions, however, these functions write
133 // their output to the beginning of the buffer.  The caller is responsible
134 // for ensuring that the buffer has enough space to hold the output.
135 //
136 // Returns a pointer to the end of the string (i.e. the null character
137 // terminating the string).
138 // ----------------------------------------------------------------------
139 
140 namespace {
141 
142 // Various routines to encode integers to strings.
143 
144 // We split data encodings into a group of 2 digits, 4 digits, 8 digits as
145 // it's easier to combine powers of two into scalar arithmetic.
146 
147 // Previous implementation used a lookup table of 200 bytes for every 2 bytes
148 // and it was memory bound, any L1 cache miss would result in a much slower
149 // result. When benchmarking with a cache eviction rate of several percent,
150 // this implementation proved to be better.
151 
152 // These constants represent '00', '0000' and '00000000' as ascii strings in
153 // integers. We can add these numbers if we encode to bytes from 0 to 9. as
154 // 'i' = '0' + i for 0 <= i <= 9.
155 constexpr uint32_t kTwoZeroBytes = 0x0101 * '0';
156 constexpr uint64_t kFourZeroBytes = 0x01010101 * '0';
157 constexpr uint64_t kEightZeroBytes = 0x0101010101010101ull * '0';
158 
159 // * 103 / 1024 is a division by 10 for values from 0 to 99. It's also a
160 // division of a structure [k takes 2 bytes][m takes 2 bytes], then * 103 / 1024
161 // will be [k / 10][m / 10]. It allows parallel division.
162 constexpr uint64_t kDivisionBy10Mul = 103u;
163 constexpr uint64_t kDivisionBy10Div = 1 << 10;
164 
165 // * 10486 / 1048576 is a division by 100 for values from 0 to 9999.
166 constexpr uint64_t kDivisionBy100Mul = 10486u;
167 constexpr uint64_t kDivisionBy100Div = 1 << 20;
168 
169 // Encode functions write the ASCII output of input `n` to `out_str`.
EncodeHundred(uint32_t n,absl::Nonnull<char * > out_str)170 inline char* EncodeHundred(uint32_t n, absl::Nonnull<char*> out_str) {
171   int num_digits = static_cast<int>(n - 10) >> 8;
172   uint32_t div10 = (n * kDivisionBy10Mul) / kDivisionBy10Div;
173   uint32_t mod10 = n - 10u * div10;
174   uint32_t base = kTwoZeroBytes + div10 + (mod10 << 8);
175   base >>= num_digits & 8;
176   little_endian::Store16(out_str, static_cast<uint16_t>(base));
177   return out_str + 2 + num_digits;
178 }
179 
EncodeTenThousand(uint32_t n,absl::Nonnull<char * > out_str)180 inline char* EncodeTenThousand(uint32_t n, absl::Nonnull<char*> out_str) {
181   // We split lower 2 digits and upper 2 digits of n into 2 byte consecutive
182   // blocks. 123 ->  [\0\1][\0\23]. We divide by 10 both blocks
183   // (it's 1 division + zeroing upper bits), and compute modulo 10 as well "in
184   // parallel". Then we combine both results to have both ASCII digits,
185   // strip trailing zeros, add ASCII '0000' and return.
186   uint32_t div100 = (n * kDivisionBy100Mul) / kDivisionBy100Div;
187   uint32_t mod100 = n - 100ull * div100;
188   uint32_t hundreds = (mod100 << 16) + div100;
189   uint32_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
190   tens &= (0xFull << 16) | 0xFull;
191   tens += (hundreds - 10ull * tens) << 8;
192   ABSL_ASSUME(tens != 0);
193   // The result can contain trailing zero bits, we need to strip them to a first
194   // significant byte in a final representation. For example, for n = 123, we
195   // have tens to have representation \0\1\2\3. We do `& -8` to round
196   // to a multiple to 8 to strip zero bytes, not all zero bits.
197   // countr_zero to help.
198   // 0 minus 8 to make MSVC happy.
199   uint32_t zeroes = static_cast<uint32_t>(absl::countr_zero(tens)) & (0 - 8u);
200   tens += kFourZeroBytes;
201   tens >>= zeroes;
202   little_endian::Store32(out_str, tens);
203   return out_str + sizeof(tens) - zeroes / 8;
204 }
205 
206 // Helper function to produce an ASCII representation of `i`.
207 //
208 // Function returns an 8-byte integer which when summed with `kEightZeroBytes`,
209 // can be treated as a printable buffer with ascii representation of `i`,
210 // possibly with leading zeros.
211 //
212 // Example:
213 //
214 //  uint64_t buffer = PrepareEightDigits(102030) + kEightZeroBytes;
215 //  char* ascii = reinterpret_cast<char*>(&buffer);
216 //  // Note two leading zeros:
217 //  EXPECT_EQ(absl::string_view(ascii, 8), "00102030");
218 //
219 // Pre-condition: `i` must be less than 100000000.
PrepareEightDigits(uint32_t i)220 inline uint64_t PrepareEightDigits(uint32_t i) {
221   ABSL_ASSUME(i < 10000'0000);
222   // Prepare 2 blocks of 4 digits "in parallel".
223   uint32_t hi = i / 10000;
224   uint32_t lo = i % 10000;
225   uint64_t merged = hi | (uint64_t{lo} << 32);
226   uint64_t div100 = ((merged * kDivisionBy100Mul) / kDivisionBy100Div) &
227                     ((0x7Full << 32) | 0x7Full);
228   uint64_t mod100 = merged - 100ull * div100;
229   uint64_t hundreds = (mod100 << 16) + div100;
230   uint64_t tens = (hundreds * kDivisionBy10Mul) / kDivisionBy10Div;
231   tens &= (0xFull << 48) | (0xFull << 32) | (0xFull << 16) | 0xFull;
232   tens += (hundreds - 10ull * tens) << 8;
233   return tens;
234 }
235 
EncodeFullU32(uint32_t n,absl::Nonnull<char * > out_str)236 inline ABSL_ATTRIBUTE_ALWAYS_INLINE absl::Nonnull<char*> EncodeFullU32(
237     uint32_t n, absl::Nonnull<char*> out_str) {
238   if (n < 10) {
239     *out_str = static_cast<char>('0' + n);
240     return out_str + 1;
241   }
242   if (n < 100'000'000) {
243     uint64_t bottom = PrepareEightDigits(n);
244     ABSL_ASSUME(bottom != 0);
245     // 0 minus 8 to make MSVC happy.
246     uint32_t zeroes =
247         static_cast<uint32_t>(absl::countr_zero(bottom)) & (0 - 8u);
248     little_endian::Store64(out_str, (bottom + kEightZeroBytes) >> zeroes);
249     return out_str + sizeof(bottom) - zeroes / 8;
250   }
251   uint32_t div08 = n / 100'000'000;
252   uint32_t mod08 = n % 100'000'000;
253   uint64_t bottom = PrepareEightDigits(mod08) + kEightZeroBytes;
254   out_str = EncodeHundred(div08, out_str);
255   little_endian::Store64(out_str, bottom);
256   return out_str + sizeof(bottom);
257 }
258 
EncodeFullU64(uint64_t i,char * buffer)259 inline ABSL_ATTRIBUTE_ALWAYS_INLINE char* EncodeFullU64(uint64_t i,
260                                                         char* buffer) {
261   if (i <= std::numeric_limits<uint32_t>::max()) {
262     return EncodeFullU32(static_cast<uint32_t>(i), buffer);
263   }
264   uint32_t mod08;
265   if (i < 1'0000'0000'0000'0000ull) {
266     uint32_t div08 = static_cast<uint32_t>(i / 100'000'000ull);
267     mod08 =  static_cast<uint32_t>(i % 100'000'000ull);
268     buffer = EncodeFullU32(div08, buffer);
269   } else {
270     uint64_t div08 = i / 100'000'000ull;
271     mod08 =  static_cast<uint32_t>(i % 100'000'000ull);
272     uint32_t div016 = static_cast<uint32_t>(div08 / 100'000'000ull);
273     uint32_t div08mod08 = static_cast<uint32_t>(div08 % 100'000'000ull);
274     uint64_t mid_result = PrepareEightDigits(div08mod08) + kEightZeroBytes;
275     buffer = EncodeTenThousand(div016, buffer);
276     little_endian::Store64(buffer, mid_result);
277     buffer += sizeof(mid_result);
278   }
279   uint64_t mod_result = PrepareEightDigits(mod08) + kEightZeroBytes;
280   little_endian::Store64(buffer, mod_result);
281   return buffer + sizeof(mod_result);
282 }
283 
284 }  // namespace
285 
PutTwoDigits(uint32_t i,absl::Nonnull<char * > buf)286 void numbers_internal::PutTwoDigits(uint32_t i, absl::Nonnull<char*> buf) {
287   assert(i < 100);
288   uint32_t base = kTwoZeroBytes;
289   uint32_t div10 = (i * kDivisionBy10Mul) / kDivisionBy10Div;
290   uint32_t mod10 = i - 10u * div10;
291   base += div10 + (mod10 << 8);
292   little_endian::Store16(buf, static_cast<uint16_t>(base));
293 }
294 
FastIntToBuffer(uint32_t n,absl::Nonnull<char * > out_str)295 absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
296     uint32_t n, absl::Nonnull<char*> out_str) {
297   out_str = EncodeFullU32(n, out_str);
298   *out_str = '\0';
299   return out_str;
300 }
301 
FastIntToBuffer(int32_t i,absl::Nonnull<char * > buffer)302 absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
303     int32_t i, absl::Nonnull<char*> buffer) {
304   uint32_t u = static_cast<uint32_t>(i);
305   if (i < 0) {
306     *buffer++ = '-';
307     // We need to do the negation in modular (i.e., "unsigned")
308     // arithmetic; MSVC++ apparently warns for plain "-u", so
309     // we write the equivalent expression "0 - u" instead.
310     u = 0 - u;
311   }
312   buffer = EncodeFullU32(u, buffer);
313   *buffer = '\0';
314   return buffer;
315 }
316 
FastIntToBuffer(uint64_t i,absl::Nonnull<char * > buffer)317 absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
318     uint64_t i, absl::Nonnull<char*> buffer) {
319   buffer = EncodeFullU64(i, buffer);
320   *buffer = '\0';
321   return buffer;
322 }
323 
FastIntToBuffer(int64_t i,absl::Nonnull<char * > buffer)324 absl::Nonnull<char*> numbers_internal::FastIntToBuffer(
325     int64_t i, absl::Nonnull<char*> buffer) {
326   uint64_t u = static_cast<uint64_t>(i);
327   if (i < 0) {
328     *buffer++ = '-';
329     // We need to do the negation in modular (i.e., "unsigned")
330     // arithmetic; MSVC++ apparently warns for plain "-u", so
331     // we write the equivalent expression "0 - u" instead.
332     u = 0 - u;
333   }
334   buffer = EncodeFullU64(u, buffer);
335   *buffer = '\0';
336   return buffer;
337 }
338 
339 // Given a 128-bit number expressed as a pair of uint64_t, high half first,
340 // return that number multiplied by the given 32-bit value.  If the result is
341 // too large to fit in a 128-bit number, divide it by 2 until it fits.
Mul32(std::pair<uint64_t,uint64_t> num,uint32_t mul)342 static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
343                                            uint32_t mul) {
344   uint64_t bits0_31 = num.second & 0xFFFFFFFF;
345   uint64_t bits32_63 = num.second >> 32;
346   uint64_t bits64_95 = num.first & 0xFFFFFFFF;
347   uint64_t bits96_127 = num.first >> 32;
348 
349   // The picture so far: each of these 64-bit values has only the lower 32 bits
350   // filled in.
351   // bits96_127:          [ 00000000 xxxxxxxx ]
352   // bits64_95:                    [ 00000000 xxxxxxxx ]
353   // bits32_63:                             [ 00000000 xxxxxxxx ]
354   // bits0_31:                                       [ 00000000 xxxxxxxx ]
355 
356   bits0_31 *= mul;
357   bits32_63 *= mul;
358   bits64_95 *= mul;
359   bits96_127 *= mul;
360 
361   // Now the top halves may also have value, though all 64 of their bits will
362   // never be set at the same time, since they are a result of a 32x32 bit
363   // multiply.  This makes the carry calculation slightly easier.
364   // bits96_127:          [ mmmmmmmm | mmmmmmmm ]
365   // bits64_95:                    [ | mmmmmmmm mmmmmmmm | ]
366   // bits32_63:                      |        [ mmmmmmmm | mmmmmmmm ]
367   // bits0_31:                       |                 [ | mmmmmmmm mmmmmmmm ]
368   // eventually:        [ bits128_up | ...bits64_127.... | ..bits0_63... ]
369 
370   uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
371   uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
372                         (bits0_63 < bits0_31);
373   uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
374   if (bits128_up == 0) return {bits64_127, bits0_63};
375 
376   auto shift = static_cast<unsigned>(bit_width(bits128_up));
377   uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
378   uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
379   return {hi, lo};
380 }
381 
382 // Compute num * 5 ^ expfive, and return the first 128 bits of the result,
383 // where the first bit is always a one.  So PowFive(1, 0) starts 0b100000,
384 // PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
PowFive(uint64_t num,int expfive)385 static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
386   std::pair<uint64_t, uint64_t> result = {num, 0};
387   while (expfive >= 13) {
388     // 5^13 is the highest power of five that will fit in a 32-bit integer.
389     result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
390     expfive -= 13;
391   }
392   constexpr uint32_t powers_of_five[13] = {
393       1,
394       5,
395       5 * 5,
396       5 * 5 * 5,
397       5 * 5 * 5 * 5,
398       5 * 5 * 5 * 5 * 5,
399       5 * 5 * 5 * 5 * 5 * 5,
400       5 * 5 * 5 * 5 * 5 * 5 * 5,
401       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
402       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
403       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
404       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
405       5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
406   result = Mul32(result, powers_of_five[expfive & 15]);
407   int shift = countl_zero(result.first);
408   if (shift != 0) {
409     result.first = (result.first << shift) + (result.second >> (64 - shift));
410     result.second = (result.second << shift);
411   }
412   return result;
413 }
414 
415 struct ExpDigits {
416   int32_t exponent;
417   char digits[6];
418 };
419 
420 // SplitToSix converts value, a positive double-precision floating-point number,
421 // into a base-10 exponent and 6 ASCII digits, where the first digit is never
422 // zero.  For example, SplitToSix(1) returns an exponent of zero and a digits
423 // array of {'1', '0', '0', '0', '0', '0'}.  If value is exactly halfway between
424 // two possible representations, e.g. value = 100000.5, then "round to even" is
425 // performed.
SplitToSix(const double value)426 static ExpDigits SplitToSix(const double value) {
427   ExpDigits exp_dig;
428   int exp = 5;
429   double d = value;
430   // First step: calculate a close approximation of the output, where the
431   // value d will be between 100,000 and 999,999, representing the digits
432   // in the output ASCII array, and exp is the base-10 exponent.  It would be
433   // faster to use a table here, and to look up the base-2 exponent of value,
434   // however value is an IEEE-754 64-bit number, so the table would have 2,000
435   // entries, which is not cache-friendly.
436   if (d >= 999999.5) {
437     if (d >= 1e+261) exp += 256, d *= 1e-256;
438     if (d >= 1e+133) exp += 128, d *= 1e-128;
439     if (d >= 1e+69) exp += 64, d *= 1e-64;
440     if (d >= 1e+37) exp += 32, d *= 1e-32;
441     if (d >= 1e+21) exp += 16, d *= 1e-16;
442     if (d >= 1e+13) exp += 8, d *= 1e-8;
443     if (d >= 1e+9) exp += 4, d *= 1e-4;
444     if (d >= 1e+7) exp += 2, d *= 1e-2;
445     if (d >= 1e+6) exp += 1, d *= 1e-1;
446   } else {
447     if (d < 1e-250) exp -= 256, d *= 1e256;
448     if (d < 1e-122) exp -= 128, d *= 1e128;
449     if (d < 1e-58) exp -= 64, d *= 1e64;
450     if (d < 1e-26) exp -= 32, d *= 1e32;
451     if (d < 1e-10) exp -= 16, d *= 1e16;
452     if (d < 1e-2) exp -= 8, d *= 1e8;
453     if (d < 1e+2) exp -= 4, d *= 1e4;
454     if (d < 1e+4) exp -= 2, d *= 1e2;
455     if (d < 1e+5) exp -= 1, d *= 1e1;
456   }
457   // At this point, d is in the range [99999.5..999999.5) and exp is in the
458   // range [-324..308]. Since we need to round d up, we want to add a half
459   // and truncate.
460   // However, the technique above may have lost some precision, due to its
461   // repeated multiplication by constants that each may be off by half a bit
462   // of precision.  This only matters if we're close to the edge though.
463   // Since we'd like to know if the fractional part of d is close to a half,
464   // we multiply it by 65536 and see if the fractional part is close to 32768.
465   // (The number doesn't have to be a power of two,but powers of two are faster)
466   uint64_t d64k = d * 65536;
467   uint32_t dddddd;  // A 6-digit decimal integer.
468   if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
469     // OK, it's fairly likely that precision was lost above, which is
470     // not a surprise given only 52 mantissa bits are available.  Therefore
471     // redo the calculation using 128-bit numbers.  (64 bits are not enough).
472 
473     // Start out with digits rounded down; maybe add one below.
474     dddddd = static_cast<uint32_t>(d64k / 65536);
475 
476     // mantissa is a 64-bit integer representing M.mmm... * 2^63.  The actual
477     // value we're representing, of course, is M.mmm... * 2^exp2.
478     int exp2;
479     double m = std::frexp(value, &exp2);
480     uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
481     // std::frexp returns an m value in the range [0.5, 1.0), however we
482     // can't multiply it by 2^64 and convert to an integer because some FPUs
483     // throw an exception when converting an number higher than 2^63 into an
484     // integer - even an unsigned 64-bit integer!  Fortunately it doesn't matter
485     // since m only has 52 significant bits anyway.
486     mantissa <<= 1;
487     exp2 -= 64;  // not needed, but nice for debugging
488 
489     // OK, we are here to compare:
490     //     (dddddd + 0.5) * 10^(exp-5)  vs.  mantissa * 2^exp2
491     // so we can round up dddddd if appropriate.  Those values span the full
492     // range of 600 orders of magnitude of IEE 64-bit floating-point.
493     // Fortunately, we already know they are very close, so we don't need to
494     // track the base-2 exponent of both sides.  This greatly simplifies the
495     // the math since the 2^exp2 calculation is unnecessary and the power-of-10
496     // calculation can become a power-of-5 instead.
497 
498     std::pair<uint64_t, uint64_t> edge, val;
499     if (exp >= 6) {
500       // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
501       // Since we're tossing powers of two, 2 * dddddd + 1 is the
502       // same as dddddd + 0.5
503       edge = PowFive(2 * dddddd + 1, exp - 5);
504 
505       val.first = mantissa;
506       val.second = 0;
507     } else {
508       // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
509       // above because (exp - 5) is negative.  So we compare (dddddd + 0.5) to
510       // mantissa * 5 ^ (5 - exp)
511       edge = PowFive(2 * dddddd + 1, 0);
512 
513       val = PowFive(mantissa, 5 - exp);
514     }
515     // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
516     //        val.second, edge.first, edge.second);
517     if (val > edge) {
518       dddddd++;
519     } else if (val == edge) {
520       dddddd += (dddddd & 1);
521     }
522   } else {
523     // Here, we are not close to the edge.
524     dddddd = static_cast<uint32_t>((d64k + 32768) / 65536);
525   }
526   if (dddddd == 1000000) {
527     dddddd = 100000;
528     exp += 1;
529   }
530   exp_dig.exponent = exp;
531 
532   uint32_t two_digits = dddddd / 10000;
533   dddddd -= two_digits * 10000;
534   numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[0]);
535 
536   two_digits = dddddd / 100;
537   dddddd -= two_digits * 100;
538   numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[2]);
539 
540   numbers_internal::PutTwoDigits(dddddd, &exp_dig.digits[4]);
541   return exp_dig;
542 }
543 
544 // Helper function for fast formatting of floating-point.
545 // The result is the same as "%g", a.k.a. "%.6g".
SixDigitsToBuffer(double d,absl::Nonnull<char * > const buffer)546 size_t numbers_internal::SixDigitsToBuffer(double d,
547                                            absl::Nonnull<char*> const buffer) {
548   static_assert(std::numeric_limits<float>::is_iec559,
549                 "IEEE-754/IEC-559 support only");
550 
551   char* out = buffer;  // we write data to out, incrementing as we go, but
552                        // FloatToBuffer always returns the address of the buffer
553                        // passed in.
554 
555   if (std::isnan(d)) {
556     strcpy(out, "nan");  // NOLINT(runtime/printf)
557     return 3;
558   }
559   if (d == 0) {  // +0 and -0 are handled here
560     if (std::signbit(d)) *out++ = '-';
561     *out++ = '0';
562     *out = 0;
563     return static_cast<size_t>(out - buffer);
564   }
565   if (d < 0) {
566     *out++ = '-';
567     d = -d;
568   }
569   if (d > std::numeric_limits<double>::max()) {
570     strcpy(out, "inf");  // NOLINT(runtime/printf)
571     return static_cast<size_t>(out + 3 - buffer);
572   }
573 
574   auto exp_dig = SplitToSix(d);
575   int exp = exp_dig.exponent;
576   const char* digits = exp_dig.digits;
577   out[0] = '0';
578   out[1] = '.';
579   switch (exp) {
580     case 5:
581       memcpy(out, &digits[0], 6), out += 6;
582       *out = 0;
583       return static_cast<size_t>(out - buffer);
584     case 4:
585       memcpy(out, &digits[0], 5), out += 5;
586       if (digits[5] != '0') {
587         *out++ = '.';
588         *out++ = digits[5];
589       }
590       *out = 0;
591       return static_cast<size_t>(out - buffer);
592     case 3:
593       memcpy(out, &digits[0], 4), out += 4;
594       if ((digits[5] | digits[4]) != '0') {
595         *out++ = '.';
596         *out++ = digits[4];
597         if (digits[5] != '0') *out++ = digits[5];
598       }
599       *out = 0;
600       return static_cast<size_t>(out - buffer);
601     case 2:
602       memcpy(out, &digits[0], 3), out += 3;
603       *out++ = '.';
604       memcpy(out, &digits[3], 3);
605       out += 3;
606       while (out[-1] == '0') --out;
607       if (out[-1] == '.') --out;
608       *out = 0;
609       return static_cast<size_t>(out - buffer);
610     case 1:
611       memcpy(out, &digits[0], 2), out += 2;
612       *out++ = '.';
613       memcpy(out, &digits[2], 4);
614       out += 4;
615       while (out[-1] == '0') --out;
616       if (out[-1] == '.') --out;
617       *out = 0;
618       return static_cast<size_t>(out - buffer);
619     case 0:
620       memcpy(out, &digits[0], 1), out += 1;
621       *out++ = '.';
622       memcpy(out, &digits[1], 5);
623       out += 5;
624       while (out[-1] == '0') --out;
625       if (out[-1] == '.') --out;
626       *out = 0;
627       return static_cast<size_t>(out - buffer);
628     case -4:
629       out[2] = '0';
630       ++out;
631       ABSL_FALLTHROUGH_INTENDED;
632     case -3:
633       out[2] = '0';
634       ++out;
635       ABSL_FALLTHROUGH_INTENDED;
636     case -2:
637       out[2] = '0';
638       ++out;
639       ABSL_FALLTHROUGH_INTENDED;
640     case -1:
641       out += 2;
642       memcpy(out, &digits[0], 6);
643       out += 6;
644       while (out[-1] == '0') --out;
645       *out = 0;
646       return static_cast<size_t>(out - buffer);
647   }
648   assert(exp < -4 || exp >= 6);
649   out[0] = digits[0];
650   assert(out[1] == '.');
651   out += 2;
652   memcpy(out, &digits[1], 5), out += 5;
653   while (out[-1] == '0') --out;
654   if (out[-1] == '.') --out;
655   *out++ = 'e';
656   if (exp > 0) {
657     *out++ = '+';
658   } else {
659     *out++ = '-';
660     exp = -exp;
661   }
662   if (exp > 99) {
663     int dig1 = exp / 100;
664     exp -= dig1 * 100;
665     *out++ = '0' + static_cast<char>(dig1);
666   }
667   PutTwoDigits(static_cast<uint32_t>(exp), out);
668   out += 2;
669   *out = 0;
670   return static_cast<size_t>(out - buffer);
671 }
672 
673 namespace {
674 // Represents integer values of digits.
675 // Uses 36 to indicate an invalid character since we support
676 // bases up to 36.
677 static const int8_t kAsciiToInt[256] = {
678     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,  // 16 36s.
679     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
680     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0,  1,  2,  3,  4,  5,
681     6,  7,  8,  9,  36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
682     18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
683     36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
684     24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
685     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
686     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
687     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
688     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
689     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
690     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
691     36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
692 
693 // Parse the sign and optional hex or oct prefix in text.
safe_parse_sign_and_base(absl::Nonnull<absl::string_view * > text,absl::Nonnull<int * > base_ptr,absl::Nonnull<bool * > negative_ptr)694 inline bool safe_parse_sign_and_base(
695     absl::Nonnull<absl::string_view*> text /*inout*/,
696     absl::Nonnull<int*> base_ptr /*inout*/,
697     absl::Nonnull<bool*> negative_ptr /*output*/) {
698   if (text->data() == nullptr) {
699     return false;
700   }
701 
702   const char* start = text->data();
703   const char* end = start + text->size();
704   int base = *base_ptr;
705 
706   // Consume whitespace.
707   while (start < end &&
708          absl::ascii_isspace(static_cast<unsigned char>(start[0]))) {
709     ++start;
710   }
711   while (start < end &&
712          absl::ascii_isspace(static_cast<unsigned char>(end[-1]))) {
713     --end;
714   }
715   if (start >= end) {
716     return false;
717   }
718 
719   // Consume sign.
720   *negative_ptr = (start[0] == '-');
721   if (*negative_ptr || start[0] == '+') {
722     ++start;
723     if (start >= end) {
724       return false;
725     }
726   }
727 
728   // Consume base-dependent prefix.
729   //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
730   //  base 16: "0x" -> base 16
731   // Also validate the base.
732   if (base == 0) {
733     if (end - start >= 2 && start[0] == '0' &&
734         (start[1] == 'x' || start[1] == 'X')) {
735       base = 16;
736       start += 2;
737       if (start >= end) {
738         // "0x" with no digits after is invalid.
739         return false;
740       }
741     } else if (end - start >= 1 && start[0] == '0') {
742       base = 8;
743       start += 1;
744     } else {
745       base = 10;
746     }
747   } else if (base == 16) {
748     if (end - start >= 2 && start[0] == '0' &&
749         (start[1] == 'x' || start[1] == 'X')) {
750       start += 2;
751       if (start >= end) {
752         // "0x" with no digits after is invalid.
753         return false;
754       }
755     }
756   } else if (base >= 2 && base <= 36) {
757     // okay
758   } else {
759     return false;
760   }
761   *text = absl::string_view(start, static_cast<size_t>(end - start));
762   *base_ptr = base;
763   return true;
764 }
765 
766 // Consume digits.
767 //
768 // The classic loop:
769 //
770 //   for each digit
771 //     value = value * base + digit
772 //   value *= sign
773 //
774 // The classic loop needs overflow checking.  It also fails on the most
775 // negative integer, -2147483648 in 32-bit two's complement representation.
776 //
777 // My improved loop:
778 //
779 //  if (!negative)
780 //    for each digit
781 //      value = value * base
782 //      value = value + digit
783 //  else
784 //    for each digit
785 //      value = value * base
786 //      value = value - digit
787 //
788 // Overflow checking becomes simple.
789 
790 // Lookup tables per IntType:
791 // vmax/base and vmin/base are precomputed because division costs at least 8ns.
792 // TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
793 // struct of arrays) would probably be better in terms of d-cache for the most
794 // commonly used bases.
795 template <typename IntType>
796 struct LookupTables {
797   ABSL_CONST_INIT static const IntType kVmaxOverBase[];
798   ABSL_CONST_INIT static const IntType kVminOverBase[];
799 };
800 
801 // An array initializer macro for X/base where base in [0, 36].
802 // However, note that lookups for base in [0, 1] should never happen because
803 // base has been validated to be in [2, 36] by safe_parse_sign_and_base().
804 #define X_OVER_BASE_INITIALIZER(X)                                        \
805   {                                                                       \
806     0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
807         X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18,   \
808         X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26,   \
809         X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34,   \
810         X / 35, X / 36,                                                   \
811   }
812 
813 // This kVmaxOverBase is generated with
814 //  for (int base = 2; base < 37; ++base) {
815 //    absl::uint128 max = std::numeric_limits<absl::uint128>::max();
816 //    auto result = max / base;
817 //    std::cout << "    MakeUint128(" << absl::Uint128High64(result) << "u, "
818 //              << absl::Uint128Low64(result) << "u),\n";
819 //  }
820 // See https://godbolt.org/z/aneYsb
821 //
822 // uint128& operator/=(uint128) is not constexpr, so hardcode the resulting
823 // array to avoid a static initializer.
824 template <>
825 ABSL_CONST_INIT const uint128 LookupTables<uint128>::kVmaxOverBase[] = {
826     0,
827     0,
828     MakeUint128(9223372036854775807u, 18446744073709551615u),
829     MakeUint128(6148914691236517205u, 6148914691236517205u),
830     MakeUint128(4611686018427387903u, 18446744073709551615u),
831     MakeUint128(3689348814741910323u, 3689348814741910323u),
832     MakeUint128(3074457345618258602u, 12297829382473034410u),
833     MakeUint128(2635249153387078802u, 5270498306774157604u),
834     MakeUint128(2305843009213693951u, 18446744073709551615u),
835     MakeUint128(2049638230412172401u, 14347467612885206812u),
836     MakeUint128(1844674407370955161u, 11068046444225730969u),
837     MakeUint128(1676976733973595601u, 8384883669867978007u),
838     MakeUint128(1537228672809129301u, 6148914691236517205u),
839     MakeUint128(1418980313362273201u, 4256940940086819603u),
840     MakeUint128(1317624576693539401u, 2635249153387078802u),
841     MakeUint128(1229782938247303441u, 1229782938247303441u),
842     MakeUint128(1152921504606846975u, 18446744073709551615u),
843     MakeUint128(1085102592571150095u, 1085102592571150095u),
844     MakeUint128(1024819115206086200u, 16397105843297379214u),
845     MakeUint128(970881267037344821u, 16504981539634861972u),
846     MakeUint128(922337203685477580u, 14757395258967641292u),
847     MakeUint128(878416384462359600u, 14054662151397753612u),
848     MakeUint128(838488366986797800u, 13415813871788764811u),
849     MakeUint128(802032351030850070u, 4812194106185100421u),
850     MakeUint128(768614336404564650u, 12297829382473034410u),
851     MakeUint128(737869762948382064u, 11805916207174113034u),
852     MakeUint128(709490156681136600u, 11351842506898185609u),
853     MakeUint128(683212743470724133u, 17080318586768103348u),
854     MakeUint128(658812288346769700u, 10540996613548315209u),
855     MakeUint128(636094623231363848u, 15266270957552732371u),
856     MakeUint128(614891469123651720u, 9838263505978427528u),
857     MakeUint128(595056260442243600u, 9520900167075897608u),
858     MakeUint128(576460752303423487u, 18446744073709551615u),
859     MakeUint128(558992244657865200u, 8943875914525843207u),
860     MakeUint128(542551296285575047u, 9765923333140350855u),
861     MakeUint128(527049830677415760u, 8432797290838652167u),
862     MakeUint128(512409557603043100u, 8198552921648689607u),
863 };
864 
865 // This kVmaxOverBase generated with
866 //   for (int base = 2; base < 37; ++base) {
867 //    absl::int128 max = std::numeric_limits<absl::int128>::max();
868 //    auto result = max / base;
869 //    std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", "
870 //              << absl::Int128Low64(result) << "u),\n";
871 //  }
872 // See https://godbolt.org/z/7djYWz
873 //
874 // int128& operator/=(int128) is not constexpr, so hardcode the resulting array
875 // to avoid a static initializer.
876 template <>
877 ABSL_CONST_INIT const int128 LookupTables<int128>::kVmaxOverBase[] = {
878     0,
879     0,
880     MakeInt128(4611686018427387903, 18446744073709551615u),
881     MakeInt128(3074457345618258602, 12297829382473034410u),
882     MakeInt128(2305843009213693951, 18446744073709551615u),
883     MakeInt128(1844674407370955161, 11068046444225730969u),
884     MakeInt128(1537228672809129301, 6148914691236517205u),
885     MakeInt128(1317624576693539401, 2635249153387078802u),
886     MakeInt128(1152921504606846975, 18446744073709551615u),
887     MakeInt128(1024819115206086200, 16397105843297379214u),
888     MakeInt128(922337203685477580, 14757395258967641292u),
889     MakeInt128(838488366986797800, 13415813871788764811u),
890     MakeInt128(768614336404564650, 12297829382473034410u),
891     MakeInt128(709490156681136600, 11351842506898185609u),
892     MakeInt128(658812288346769700, 10540996613548315209u),
893     MakeInt128(614891469123651720, 9838263505978427528u),
894     MakeInt128(576460752303423487, 18446744073709551615u),
895     MakeInt128(542551296285575047, 9765923333140350855u),
896     MakeInt128(512409557603043100, 8198552921648689607u),
897     MakeInt128(485440633518672410, 17475862806672206794u),
898     MakeInt128(461168601842738790, 7378697629483820646u),
899     MakeInt128(439208192231179800, 7027331075698876806u),
900     MakeInt128(419244183493398900, 6707906935894382405u),
901     MakeInt128(401016175515425035, 2406097053092550210u),
902     MakeInt128(384307168202282325, 6148914691236517205u),
903     MakeInt128(368934881474191032, 5902958103587056517u),
904     MakeInt128(354745078340568300, 5675921253449092804u),
905     MakeInt128(341606371735362066, 17763531330238827482u),
906     MakeInt128(329406144173384850, 5270498306774157604u),
907     MakeInt128(318047311615681924, 7633135478776366185u),
908     MakeInt128(307445734561825860, 4919131752989213764u),
909     MakeInt128(297528130221121800, 4760450083537948804u),
910     MakeInt128(288230376151711743, 18446744073709551615u),
911     MakeInt128(279496122328932600, 4471937957262921603u),
912     MakeInt128(271275648142787523, 14106333703424951235u),
913     MakeInt128(263524915338707880, 4216398645419326083u),
914     MakeInt128(256204778801521550, 4099276460824344803u),
915 };
916 
917 // This kVminOverBase generated with
918 //  for (int base = 2; base < 37; ++base) {
919 //    absl::int128 min = std::numeric_limits<absl::int128>::min();
920 //    auto result = min / base;
921 //    std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", "
922 //              << absl::Int128Low64(result) << "u),\n";
923 //  }
924 //
925 // See https://godbolt.org/z/7djYWz
926 //
927 // int128& operator/=(int128) is not constexpr, so hardcode the resulting array
928 // to avoid a static initializer.
929 template <>
930 ABSL_CONST_INIT const int128 LookupTables<int128>::kVminOverBase[] = {
931     0,
932     0,
933     MakeInt128(-4611686018427387904, 0u),
934     MakeInt128(-3074457345618258603, 6148914691236517206u),
935     MakeInt128(-2305843009213693952, 0u),
936     MakeInt128(-1844674407370955162, 7378697629483820647u),
937     MakeInt128(-1537228672809129302, 12297829382473034411u),
938     MakeInt128(-1317624576693539402, 15811494920322472814u),
939     MakeInt128(-1152921504606846976, 0u),
940     MakeInt128(-1024819115206086201, 2049638230412172402u),
941     MakeInt128(-922337203685477581, 3689348814741910324u),
942     MakeInt128(-838488366986797801, 5030930201920786805u),
943     MakeInt128(-768614336404564651, 6148914691236517206u),
944     MakeInt128(-709490156681136601, 7094901566811366007u),
945     MakeInt128(-658812288346769701, 7905747460161236407u),
946     MakeInt128(-614891469123651721, 8608480567731124088u),
947     MakeInt128(-576460752303423488, 0u),
948     MakeInt128(-542551296285575048, 8680820740569200761u),
949     MakeInt128(-512409557603043101, 10248191152060862009u),
950     MakeInt128(-485440633518672411, 970881267037344822u),
951     MakeInt128(-461168601842738791, 11068046444225730970u),
952     MakeInt128(-439208192231179801, 11419412998010674810u),
953     MakeInt128(-419244183493398901, 11738837137815169211u),
954     MakeInt128(-401016175515425036, 16040647020617001406u),
955     MakeInt128(-384307168202282326, 12297829382473034411u),
956     MakeInt128(-368934881474191033, 12543785970122495099u),
957     MakeInt128(-354745078340568301, 12770822820260458812u),
958     MakeInt128(-341606371735362067, 683212743470724134u),
959     MakeInt128(-329406144173384851, 13176245766935394012u),
960     MakeInt128(-318047311615681925, 10813608594933185431u),
961     MakeInt128(-307445734561825861, 13527612320720337852u),
962     MakeInt128(-297528130221121801, 13686293990171602812u),
963     MakeInt128(-288230376151711744, 0u),
964     MakeInt128(-279496122328932601, 13974806116446630013u),
965     MakeInt128(-271275648142787524, 4340410370284600381u),
966     MakeInt128(-263524915338707881, 14230345428290225533u),
967     MakeInt128(-256204778801521551, 14347467612885206813u),
968 };
969 
970 template <typename IntType>
971 ABSL_CONST_INIT const IntType LookupTables<IntType>::kVmaxOverBase[] =
972     X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
973 
974 template <typename IntType>
975 ABSL_CONST_INIT const IntType LookupTables<IntType>::kVminOverBase[] =
976     X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
977 
978 #undef X_OVER_BASE_INITIALIZER
979 
980 template <typename IntType>
safe_parse_positive_int(absl::string_view text,int base,absl::Nonnull<IntType * > value_p)981 inline bool safe_parse_positive_int(absl::string_view text, int base,
982                                     absl::Nonnull<IntType*> value_p) {
983   IntType value = 0;
984   const IntType vmax = std::numeric_limits<IntType>::max();
985   assert(vmax > 0);
986   assert(base >= 0);
987   const IntType base_inttype = static_cast<IntType>(base);
988   assert(vmax >= base_inttype);
989   const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
990   assert(base < 2 ||
991          std::numeric_limits<IntType>::max() / base_inttype == vmax_over_base);
992   const char* start = text.data();
993   const char* end = start + text.size();
994   // loop over digits
995   for (; start < end; ++start) {
996     unsigned char c = static_cast<unsigned char>(start[0]);
997     IntType digit = static_cast<IntType>(kAsciiToInt[c]);
998     if (digit >= base_inttype) {
999       *value_p = value;
1000       return false;
1001     }
1002     if (value > vmax_over_base) {
1003       *value_p = vmax;
1004       return false;
1005     }
1006     value *= base_inttype;
1007     if (value > vmax - digit) {
1008       *value_p = vmax;
1009       return false;
1010     }
1011     value += digit;
1012   }
1013   *value_p = value;
1014   return true;
1015 }
1016 
1017 template <typename IntType>
safe_parse_negative_int(absl::string_view text,int base,absl::Nonnull<IntType * > value_p)1018 inline bool safe_parse_negative_int(absl::string_view text, int base,
1019                                     absl::Nonnull<IntType*> value_p) {
1020   IntType value = 0;
1021   const IntType vmin = std::numeric_limits<IntType>::min();
1022   assert(vmin < 0);
1023   assert(vmin <= 0 - base);
1024   IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
1025   assert(base < 2 ||
1026          std::numeric_limits<IntType>::min() / base == vmin_over_base);
1027   // 2003 c++ standard [expr.mul]
1028   // "... the sign of the remainder is implementation-defined."
1029   // Although (vmin/base)*base + vmin%base is always vmin.
1030   // 2011 c++ standard tightens the spec but we cannot rely on it.
1031   // TODO(junyer): Handle this in the lookup table generation.
1032   if (vmin % base > 0) {
1033     vmin_over_base += 1;
1034   }
1035   const char* start = text.data();
1036   const char* end = start + text.size();
1037   // loop over digits
1038   for (; start < end; ++start) {
1039     unsigned char c = static_cast<unsigned char>(start[0]);
1040     int digit = kAsciiToInt[c];
1041     if (digit >= base) {
1042       *value_p = value;
1043       return false;
1044     }
1045     if (value < vmin_over_base) {
1046       *value_p = vmin;
1047       return false;
1048     }
1049     value *= base;
1050     if (value < vmin + digit) {
1051       *value_p = vmin;
1052       return false;
1053     }
1054     value -= digit;
1055   }
1056   *value_p = value;
1057   return true;
1058 }
1059 
1060 // Input format based on POSIX.1-2008 strtol
1061 // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
1062 template <typename IntType>
safe_int_internal(absl::string_view text,absl::Nonnull<IntType * > value_p,int base)1063 inline bool safe_int_internal(absl::string_view text,
1064                               absl::Nonnull<IntType*> value_p, int base) {
1065   *value_p = 0;
1066   bool negative;
1067   if (!safe_parse_sign_and_base(&text, &base, &negative)) {
1068     return false;
1069   }
1070   if (!negative) {
1071     return safe_parse_positive_int(text, base, value_p);
1072   } else {
1073     return safe_parse_negative_int(text, base, value_p);
1074   }
1075 }
1076 
1077 template <typename IntType>
safe_uint_internal(absl::string_view text,absl::Nonnull<IntType * > value_p,int base)1078 inline bool safe_uint_internal(absl::string_view text,
1079                                absl::Nonnull<IntType*> value_p, int base) {
1080   *value_p = 0;
1081   bool negative;
1082   if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
1083     return false;
1084   }
1085   return safe_parse_positive_int(text, base, value_p);
1086 }
1087 }  // anonymous namespace
1088 
1089 namespace numbers_internal {
1090 
1091 // Digit conversion.
1092 ABSL_CONST_INIT ABSL_DLL const char kHexChar[] =
1093     "0123456789abcdef";
1094 
1095 ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] =
1096     "000102030405060708090a0b0c0d0e0f"
1097     "101112131415161718191a1b1c1d1e1f"
1098     "202122232425262728292a2b2c2d2e2f"
1099     "303132333435363738393a3b3c3d3e3f"
1100     "404142434445464748494a4b4c4d4e4f"
1101     "505152535455565758595a5b5c5d5e5f"
1102     "606162636465666768696a6b6c6d6e6f"
1103     "707172737475767778797a7b7c7d7e7f"
1104     "808182838485868788898a8b8c8d8e8f"
1105     "909192939495969798999a9b9c9d9e9f"
1106     "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
1107     "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
1108     "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
1109     "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
1110     "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
1111     "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
1112 
safe_strto32_base(absl::string_view text,absl::Nonnull<int32_t * > value,int base)1113 bool safe_strto32_base(absl::string_view text, absl::Nonnull<int32_t*> value,
1114                        int base) {
1115   return safe_int_internal<int32_t>(text, value, base);
1116 }
1117 
safe_strto64_base(absl::string_view text,absl::Nonnull<int64_t * > value,int base)1118 bool safe_strto64_base(absl::string_view text, absl::Nonnull<int64_t*> value,
1119                        int base) {
1120   return safe_int_internal<int64_t>(text, value, base);
1121 }
1122 
safe_strto128_base(absl::string_view text,absl::Nonnull<int128 * > value,int base)1123 bool safe_strto128_base(absl::string_view text, absl::Nonnull<int128*> value,
1124                         int base) {
1125   return safe_int_internal<absl::int128>(text, value, base);
1126 }
1127 
safe_strtou32_base(absl::string_view text,absl::Nonnull<uint32_t * > value,int base)1128 bool safe_strtou32_base(absl::string_view text, absl::Nonnull<uint32_t*> value,
1129                         int base) {
1130   return safe_uint_internal<uint32_t>(text, value, base);
1131 }
1132 
safe_strtou64_base(absl::string_view text,absl::Nonnull<uint64_t * > value,int base)1133 bool safe_strtou64_base(absl::string_view text, absl::Nonnull<uint64_t*> value,
1134                         int base) {
1135   return safe_uint_internal<uint64_t>(text, value, base);
1136 }
1137 
safe_strtou128_base(absl::string_view text,absl::Nonnull<uint128 * > value,int base)1138 bool safe_strtou128_base(absl::string_view text, absl::Nonnull<uint128*> value,
1139                          int base) {
1140   return safe_uint_internal<absl::uint128>(text, value, base);
1141 }
1142 
1143 }  // namespace numbers_internal
1144 ABSL_NAMESPACE_END
1145 }  // namespace absl
1146