xref: /aosp_15_r20/external/llvm-libc/src/stdio/scanf_core/float_converter.cpp (revision 71db0c75aadcf003ffe3238005f61d7618a3fead)
1 //===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "src/stdio/scanf_core/float_converter.h"
10 
11 #include "src/__support/CPP/limits.h"
12 #include "src/__support/char_vector.h"
13 #include "src/__support/ctype_utils.h"
14 #include "src/__support/macros/config.h"
15 #include "src/stdio/scanf_core/converter_utils.h"
16 #include "src/stdio/scanf_core/core_structs.h"
17 #include "src/stdio/scanf_core/reader.h"
18 
19 #include <stddef.h>
20 
21 namespace LIBC_NAMESPACE_DECL {
22 namespace scanf_core {
23 
24 // All of the floating point conversions are the same for scanf, every name will
25 // accept every style.
convert_float(Reader * reader,const FormatSection & to_conv)26 int convert_float(Reader *reader, const FormatSection &to_conv) {
27   // %a/A/e/E/f/F/g/G "Matches an optionally signed floating-point number,
28   // infinity, or NaN, whose format is the same as expected for the subject
29   // sequence of the strtod function. The corresponding argument shall be a
30   // pointer to floating."
31 
32   CharVector out_str = CharVector();
33   bool is_number = false;
34 
35   size_t max_width = cpp::numeric_limits<size_t>::max();
36   if (to_conv.max_width > 0) {
37     max_width = to_conv.max_width;
38   }
39 
40   char cur_char = reader->getc();
41   // Handle the sign.
42   if (cur_char == '+' || cur_char == '-') {
43     if (!out_str.append(cur_char)) {
44       return ALLOCATION_FAILURE;
45     }
46     if (out_str.length() == max_width) {
47       return MATCHING_FAILURE;
48     } else {
49       cur_char = reader->getc();
50     }
51   }
52 
53   static constexpr char DECIMAL_POINT = '.';
54   static const char inf_string[] = "infinity";
55 
56   // Handle inf
57 
58   if (to_lower(cur_char) == inf_string[0]) {
59     size_t inf_index = 0;
60 
61     for (; inf_index < sizeof(inf_string) && out_str.length() < max_width &&
62            to_lower(cur_char) == inf_string[inf_index];
63          ++inf_index) {
64       if (!out_str.append(cur_char)) {
65         return ALLOCATION_FAILURE;
66       }
67       cur_char = reader->getc();
68     }
69 
70     if (inf_index == 3 || inf_index == sizeof(inf_string) - 1) {
71       write_float_with_length(out_str.c_str(), to_conv);
72       return READ_OK;
73     } else {
74       return MATCHING_FAILURE;
75     }
76   }
77 
78   static const char nan_string[] = "nan";
79 
80   // Handle nan
81   if (to_lower(cur_char) == nan_string[0]) {
82     size_t nan_index = 0;
83 
84     for (; nan_index < sizeof(nan_string) && out_str.length() < max_width &&
85            to_lower(cur_char) == nan_string[nan_index];
86          ++nan_index) {
87       if (!out_str.append(cur_char)) {
88         return ALLOCATION_FAILURE;
89       }
90       cur_char = reader->getc();
91     }
92 
93     if (nan_index == sizeof(nan_string) - 1) {
94       write_float_with_length(out_str.c_str(), to_conv);
95       return READ_OK;
96     } else {
97       return MATCHING_FAILURE;
98     }
99   }
100 
101   // Assume base of 10 by default but check if it is actually base 16.
102   int base = 10;
103 
104   // If the string starts with 0 it might be in hex.
105   if (cur_char == '0') {
106     is_number = true;
107     // Read the next character to check.
108     if (!out_str.append(cur_char)) {
109       return ALLOCATION_FAILURE;
110     }
111     // If we've hit the end, then this is "0", which is valid.
112     if (out_str.length() == max_width) {
113       write_float_with_length(out_str.c_str(), to_conv);
114       return READ_OK;
115     } else {
116       cur_char = reader->getc();
117     }
118 
119     // If that next character is an 'x' then this is a hexadecimal number.
120     if (to_lower(cur_char) == 'x') {
121       base = 16;
122 
123       if (!out_str.append(cur_char)) {
124         return ALLOCATION_FAILURE;
125       }
126       // If we've hit the end here, we have "0x" which is a valid prefix to a
127       // floating point number, and will be evaluated to 0.
128       if (out_str.length() == max_width) {
129         write_float_with_length(out_str.c_str(), to_conv);
130         return READ_OK;
131       } else {
132         cur_char = reader->getc();
133       }
134     }
135   }
136 
137   const char exponent_mark = ((base == 10) ? 'e' : 'p');
138   bool after_decimal = false;
139 
140   // The format for the remaining characters at this point is DD.DDe+/-DD for
141   // base 10 and XX.XXp+/-DD for base 16
142 
143   // This handles the digits before and after the decimal point, but not the
144   // exponent.
145   while (out_str.length() < max_width) {
146     if (internal::isalnum(cur_char) &&
147         internal::b36_char_to_int(cur_char) < base) {
148       is_number = true;
149       if (!out_str.append(cur_char)) {
150         return ALLOCATION_FAILURE;
151       }
152       cur_char = reader->getc();
153     } else if (cur_char == DECIMAL_POINT && !after_decimal) {
154       after_decimal = true;
155       if (!out_str.append(cur_char)) {
156         return ALLOCATION_FAILURE;
157       }
158       cur_char = reader->getc();
159     } else {
160       break;
161     }
162   }
163 
164   // Handle the exponent, which has an exponent mark, an optional sign, and
165   // decimal digits.
166   if (to_lower(cur_char) == exponent_mark) {
167     if (!out_str.append(cur_char)) {
168       return ALLOCATION_FAILURE;
169     }
170     if (out_str.length() == max_width) {
171       // This is laid out in the standard as being a matching error (100e is not
172       // a valid float) but may conflict with existing implementations.
173       return MATCHING_FAILURE;
174     } else {
175       cur_char = reader->getc();
176     }
177 
178     if (cur_char == '+' || cur_char == '-') {
179       if (!out_str.append(cur_char)) {
180         return ALLOCATION_FAILURE;
181       }
182       if (out_str.length() == max_width) {
183         return MATCHING_FAILURE;
184       } else {
185         cur_char = reader->getc();
186       }
187     }
188 
189     // It is specified by the standard that "100er" is a matching failure since
190     // the longest prefix of a possibly valid floating-point number (which is
191     // "100e") is not a valid floating-point number. If there is an exponent
192     // mark then there must be a digit after it else the number is not valid.
193     // Some implementations will roll back two characters (to just "100") and
194     // accept that since the prefix is not valid, and some will interpret an
195     // exponent mark followed by no digits as an additional exponent of 0
196     // (accepting "100e" and returning 100.0). Both of these behaviors are wrong
197     // by the standard, but they may be used in real code, see Hyrum's law. This
198     // code follows the standard, but may be incompatible due to code expecting
199     // these bugs.
200     if (!internal::isdigit(cur_char)) {
201       return MATCHING_FAILURE;
202     }
203 
204     while (internal::isdigit(cur_char) && out_str.length() < max_width) {
205       if (!out_str.append(cur_char)) {
206         return ALLOCATION_FAILURE;
207       }
208       cur_char = reader->getc();
209     }
210   }
211 
212   // We always read one more character than will be used, so we have to put the
213   // last one back.
214   reader->ungetc(cur_char);
215 
216   // If we haven't actually found any digits, this is a matching failure (this
217   // catches cases like "+.")
218   if (!is_number) {
219     return MATCHING_FAILURE;
220   }
221   write_float_with_length(out_str.c_str(), to_conv);
222 
223   return READ_OK;
224 }
225 
226 } // namespace scanf_core
227 } // namespace LIBC_NAMESPACE_DECL
228