1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package strconv
6
7import (
8	"errors"
9	"internal/stringslite"
10)
11
12// lower(c) is a lower-case letter if and only if
13// c is either that lower-case letter or the equivalent upper-case letter.
14// Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'.
15// Note that lower of non-letters can produce other non-letters.
16func lower(c byte) byte {
17	return c | ('x' - 'X')
18}
19
20// ErrRange indicates that a value is out of range for the target type.
21var ErrRange = errors.New("value out of range")
22
23// ErrSyntax indicates that a value does not have the right syntax for the target type.
24var ErrSyntax = errors.New("invalid syntax")
25
26// A NumError records a failed conversion.
27type NumError struct {
28	Func string // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat, ParseComplex)
29	Num  string // the input
30	Err  error  // the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.)
31}
32
33func (e *NumError) Error() string {
34	return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error()
35}
36
37func (e *NumError) Unwrap() error { return e.Err }
38
39// All ParseXXX functions allow the input string to escape to the error value.
40// This hurts strconv.ParseXXX(string(b)) calls where b is []byte since
41// the conversion from []byte must allocate a string on the heap.
42// If we assume errors are infrequent, then we can avoid escaping the input
43// back to the output by copying it first. This allows the compiler to call
44// strconv.ParseXXX without a heap allocation for most []byte to string
45// conversions, since it can now prove that the string cannot escape Parse.
46
47func syntaxError(fn, str string) *NumError {
48	return &NumError{fn, stringslite.Clone(str), ErrSyntax}
49}
50
51func rangeError(fn, str string) *NumError {
52	return &NumError{fn, stringslite.Clone(str), ErrRange}
53}
54
55func baseError(fn, str string, base int) *NumError {
56	return &NumError{fn, stringslite.Clone(str), errors.New("invalid base " + Itoa(base))}
57}
58
59func bitSizeError(fn, str string, bitSize int) *NumError {
60	return &NumError{fn, stringslite.Clone(str), errors.New("invalid bit size " + Itoa(bitSize))}
61}
62
63const intSize = 32 << (^uint(0) >> 63)
64
65// IntSize is the size in bits of an int or uint value.
66const IntSize = intSize
67
68const maxUint64 = 1<<64 - 1
69
70// ParseUint is like [ParseInt] but for unsigned numbers.
71//
72// A sign prefix is not permitted.
73func ParseUint(s string, base int, bitSize int) (uint64, error) {
74	const fnParseUint = "ParseUint"
75
76	if s == "" {
77		return 0, syntaxError(fnParseUint, s)
78	}
79
80	base0 := base == 0
81
82	s0 := s
83	switch {
84	case 2 <= base && base <= 36:
85		// valid base; nothing to do
86
87	case base == 0:
88		// Look for octal, hex prefix.
89		base = 10
90		if s[0] == '0' {
91			switch {
92			case len(s) >= 3 && lower(s[1]) == 'b':
93				base = 2
94				s = s[2:]
95			case len(s) >= 3 && lower(s[1]) == 'o':
96				base = 8
97				s = s[2:]
98			case len(s) >= 3 && lower(s[1]) == 'x':
99				base = 16
100				s = s[2:]
101			default:
102				base = 8
103				s = s[1:]
104			}
105		}
106
107	default:
108		return 0, baseError(fnParseUint, s0, base)
109	}
110
111	if bitSize == 0 {
112		bitSize = IntSize
113	} else if bitSize < 0 || bitSize > 64 {
114		return 0, bitSizeError(fnParseUint, s0, bitSize)
115	}
116
117	// Cutoff is the smallest number such that cutoff*base > maxUint64.
118	// Use compile-time constants for common cases.
119	var cutoff uint64
120	switch base {
121	case 10:
122		cutoff = maxUint64/10 + 1
123	case 16:
124		cutoff = maxUint64/16 + 1
125	default:
126		cutoff = maxUint64/uint64(base) + 1
127	}
128
129	maxVal := uint64(1)<<uint(bitSize) - 1
130
131	underscores := false
132	var n uint64
133	for _, c := range []byte(s) {
134		var d byte
135		switch {
136		case c == '_' && base0:
137			underscores = true
138			continue
139		case '0' <= c && c <= '9':
140			d = c - '0'
141		case 'a' <= lower(c) && lower(c) <= 'z':
142			d = lower(c) - 'a' + 10
143		default:
144			return 0, syntaxError(fnParseUint, s0)
145		}
146
147		if d >= byte(base) {
148			return 0, syntaxError(fnParseUint, s0)
149		}
150
151		if n >= cutoff {
152			// n*base overflows
153			return maxVal, rangeError(fnParseUint, s0)
154		}
155		n *= uint64(base)
156
157		n1 := n + uint64(d)
158		if n1 < n || n1 > maxVal {
159			// n+d overflows
160			return maxVal, rangeError(fnParseUint, s0)
161		}
162		n = n1
163	}
164
165	if underscores && !underscoreOK(s0) {
166		return 0, syntaxError(fnParseUint, s0)
167	}
168
169	return n, nil
170}
171
172// ParseInt interprets a string s in the given base (0, 2 to 36) and
173// bit size (0 to 64) and returns the corresponding value i.
174//
175// The string may begin with a leading sign: "+" or "-".
176//
177// If the base argument is 0, the true base is implied by the string's
178// prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o",
179// 16 for "0x", and 10 otherwise. Also, for argument base 0 only,
180// underscore characters are permitted as defined by the Go syntax for
181// [integer literals].
182//
183// The bitSize argument specifies the integer type
184// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
185// correspond to int, int8, int16, int32, and int64.
186// If bitSize is below 0 or above 64, an error is returned.
187//
188// The errors that ParseInt returns have concrete type [*NumError]
189// and include err.Num = s. If s is empty or contains invalid
190// digits, err.Err = [ErrSyntax] and the returned value is 0;
191// if the value corresponding to s cannot be represented by a
192// signed integer of the given size, err.Err = [ErrRange] and the
193// returned value is the maximum magnitude integer of the
194// appropriate bitSize and sign.
195//
196// [integer literals]: https://go.dev/ref/spec#Integer_literals
197func ParseInt(s string, base int, bitSize int) (i int64, err error) {
198	const fnParseInt = "ParseInt"
199
200	if s == "" {
201		return 0, syntaxError(fnParseInt, s)
202	}
203
204	// Pick off leading sign.
205	s0 := s
206	neg := false
207	if s[0] == '+' {
208		s = s[1:]
209	} else if s[0] == '-' {
210		neg = true
211		s = s[1:]
212	}
213
214	// Convert unsigned and check range.
215	var un uint64
216	un, err = ParseUint(s, base, bitSize)
217	if err != nil && err.(*NumError).Err != ErrRange {
218		err.(*NumError).Func = fnParseInt
219		err.(*NumError).Num = stringslite.Clone(s0)
220		return 0, err
221	}
222
223	if bitSize == 0 {
224		bitSize = IntSize
225	}
226
227	cutoff := uint64(1 << uint(bitSize-1))
228	if !neg && un >= cutoff {
229		return int64(cutoff - 1), rangeError(fnParseInt, s0)
230	}
231	if neg && un > cutoff {
232		return -int64(cutoff), rangeError(fnParseInt, s0)
233	}
234	n := int64(un)
235	if neg {
236		n = -n
237	}
238	return n, nil
239}
240
241// Atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
242func Atoi(s string) (int, error) {
243	const fnAtoi = "Atoi"
244
245	sLen := len(s)
246	if intSize == 32 && (0 < sLen && sLen < 10) ||
247		intSize == 64 && (0 < sLen && sLen < 19) {
248		// Fast path for small integers that fit int type.
249		s0 := s
250		if s[0] == '-' || s[0] == '+' {
251			s = s[1:]
252			if len(s) < 1 {
253				return 0, syntaxError(fnAtoi, s0)
254			}
255		}
256
257		n := 0
258		for _, ch := range []byte(s) {
259			ch -= '0'
260			if ch > 9 {
261				return 0, syntaxError(fnAtoi, s0)
262			}
263			n = n*10 + int(ch)
264		}
265		if s0[0] == '-' {
266			n = -n
267		}
268		return n, nil
269	}
270
271	// Slow path for invalid, big, or underscored integers.
272	i64, err := ParseInt(s, 10, 0)
273	if nerr, ok := err.(*NumError); ok {
274		nerr.Func = fnAtoi
275	}
276	return int(i64), err
277}
278
279// underscoreOK reports whether the underscores in s are allowed.
280// Checking them in this one function lets all the parsers skip over them simply.
281// Underscore must appear only between digits or between a base prefix and a digit.
282func underscoreOK(s string) bool {
283	// saw tracks the last character (class) we saw:
284	// ^ for beginning of number,
285	// 0 for a digit or base prefix,
286	// _ for an underscore,
287	// ! for none of the above.
288	saw := '^'
289	i := 0
290
291	// Optional sign.
292	if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
293		s = s[1:]
294	}
295
296	// Optional base prefix.
297	hex := false
298	if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
299		i = 2
300		saw = '0' // base prefix counts as a digit for "underscore as digit separator"
301		hex = lower(s[1]) == 'x'
302	}
303
304	// Number proper.
305	for ; i < len(s); i++ {
306		// Digits are always okay.
307		if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
308			saw = '0'
309			continue
310		}
311		// Underscore must follow digit.
312		if s[i] == '_' {
313			if saw != '0' {
314				return false
315			}
316			saw = '_'
317			continue
318		}
319		// Underscore must also be followed by digit.
320		if saw == '_' {
321			return false
322		}
323		// Saw non-digit, non-underscore.
324		saw = '!'
325	}
326	return saw != '_'
327}
328