1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package strconv 6 7import ( 8 "errors" 9 "internal/stringslite" 10) 11 12// lower(c) is a lower-case letter if and only if 13// c is either that lower-case letter or the equivalent upper-case letter. 14// Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'. 15// Note that lower of non-letters can produce other non-letters. 16func lower(c byte) byte { 17 return c | ('x' - 'X') 18} 19 20// ErrRange indicates that a value is out of range for the target type. 21var ErrRange = errors.New("value out of range") 22 23// ErrSyntax indicates that a value does not have the right syntax for the target type. 24var ErrSyntax = errors.New("invalid syntax") 25 26// A NumError records a failed conversion. 27type NumError struct { 28 Func string // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat, ParseComplex) 29 Num string // the input 30 Err error // the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.) 31} 32 33func (e *NumError) Error() string { 34 return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error() 35} 36 37func (e *NumError) Unwrap() error { return e.Err } 38 39// All ParseXXX functions allow the input string to escape to the error value. 40// This hurts strconv.ParseXXX(string(b)) calls where b is []byte since 41// the conversion from []byte must allocate a string on the heap. 42// If we assume errors are infrequent, then we can avoid escaping the input 43// back to the output by copying it first. This allows the compiler to call 44// strconv.ParseXXX without a heap allocation for most []byte to string 45// conversions, since it can now prove that the string cannot escape Parse. 46 47func syntaxError(fn, str string) *NumError { 48 return &NumError{fn, stringslite.Clone(str), ErrSyntax} 49} 50 51func rangeError(fn, str string) *NumError { 52 return &NumError{fn, stringslite.Clone(str), ErrRange} 53} 54 55func baseError(fn, str string, base int) *NumError { 56 return &NumError{fn, stringslite.Clone(str), errors.New("invalid base " + Itoa(base))} 57} 58 59func bitSizeError(fn, str string, bitSize int) *NumError { 60 return &NumError{fn, stringslite.Clone(str), errors.New("invalid bit size " + Itoa(bitSize))} 61} 62 63const intSize = 32 << (^uint(0) >> 63) 64 65// IntSize is the size in bits of an int or uint value. 66const IntSize = intSize 67 68const maxUint64 = 1<<64 - 1 69 70// ParseUint is like [ParseInt] but for unsigned numbers. 71// 72// A sign prefix is not permitted. 73func ParseUint(s string, base int, bitSize int) (uint64, error) { 74 const fnParseUint = "ParseUint" 75 76 if s == "" { 77 return 0, syntaxError(fnParseUint, s) 78 } 79 80 base0 := base == 0 81 82 s0 := s 83 switch { 84 case 2 <= base && base <= 36: 85 // valid base; nothing to do 86 87 case base == 0: 88 // Look for octal, hex prefix. 89 base = 10 90 if s[0] == '0' { 91 switch { 92 case len(s) >= 3 && lower(s[1]) == 'b': 93 base = 2 94 s = s[2:] 95 case len(s) >= 3 && lower(s[1]) == 'o': 96 base = 8 97 s = s[2:] 98 case len(s) >= 3 && lower(s[1]) == 'x': 99 base = 16 100 s = s[2:] 101 default: 102 base = 8 103 s = s[1:] 104 } 105 } 106 107 default: 108 return 0, baseError(fnParseUint, s0, base) 109 } 110 111 if bitSize == 0 { 112 bitSize = IntSize 113 } else if bitSize < 0 || bitSize > 64 { 114 return 0, bitSizeError(fnParseUint, s0, bitSize) 115 } 116 117 // Cutoff is the smallest number such that cutoff*base > maxUint64. 118 // Use compile-time constants for common cases. 119 var cutoff uint64 120 switch base { 121 case 10: 122 cutoff = maxUint64/10 + 1 123 case 16: 124 cutoff = maxUint64/16 + 1 125 default: 126 cutoff = maxUint64/uint64(base) + 1 127 } 128 129 maxVal := uint64(1)<<uint(bitSize) - 1 130 131 underscores := false 132 var n uint64 133 for _, c := range []byte(s) { 134 var d byte 135 switch { 136 case c == '_' && base0: 137 underscores = true 138 continue 139 case '0' <= c && c <= '9': 140 d = c - '0' 141 case 'a' <= lower(c) && lower(c) <= 'z': 142 d = lower(c) - 'a' + 10 143 default: 144 return 0, syntaxError(fnParseUint, s0) 145 } 146 147 if d >= byte(base) { 148 return 0, syntaxError(fnParseUint, s0) 149 } 150 151 if n >= cutoff { 152 // n*base overflows 153 return maxVal, rangeError(fnParseUint, s0) 154 } 155 n *= uint64(base) 156 157 n1 := n + uint64(d) 158 if n1 < n || n1 > maxVal { 159 // n+d overflows 160 return maxVal, rangeError(fnParseUint, s0) 161 } 162 n = n1 163 } 164 165 if underscores && !underscoreOK(s0) { 166 return 0, syntaxError(fnParseUint, s0) 167 } 168 169 return n, nil 170} 171 172// ParseInt interprets a string s in the given base (0, 2 to 36) and 173// bit size (0 to 64) and returns the corresponding value i. 174// 175// The string may begin with a leading sign: "+" or "-". 176// 177// If the base argument is 0, the true base is implied by the string's 178// prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o", 179// 16 for "0x", and 10 otherwise. Also, for argument base 0 only, 180// underscore characters are permitted as defined by the Go syntax for 181// [integer literals]. 182// 183// The bitSize argument specifies the integer type 184// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64 185// correspond to int, int8, int16, int32, and int64. 186// If bitSize is below 0 or above 64, an error is returned. 187// 188// The errors that ParseInt returns have concrete type [*NumError] 189// and include err.Num = s. If s is empty or contains invalid 190// digits, err.Err = [ErrSyntax] and the returned value is 0; 191// if the value corresponding to s cannot be represented by a 192// signed integer of the given size, err.Err = [ErrRange] and the 193// returned value is the maximum magnitude integer of the 194// appropriate bitSize and sign. 195// 196// [integer literals]: https://go.dev/ref/spec#Integer_literals 197func ParseInt(s string, base int, bitSize int) (i int64, err error) { 198 const fnParseInt = "ParseInt" 199 200 if s == "" { 201 return 0, syntaxError(fnParseInt, s) 202 } 203 204 // Pick off leading sign. 205 s0 := s 206 neg := false 207 if s[0] == '+' { 208 s = s[1:] 209 } else if s[0] == '-' { 210 neg = true 211 s = s[1:] 212 } 213 214 // Convert unsigned and check range. 215 var un uint64 216 un, err = ParseUint(s, base, bitSize) 217 if err != nil && err.(*NumError).Err != ErrRange { 218 err.(*NumError).Func = fnParseInt 219 err.(*NumError).Num = stringslite.Clone(s0) 220 return 0, err 221 } 222 223 if bitSize == 0 { 224 bitSize = IntSize 225 } 226 227 cutoff := uint64(1 << uint(bitSize-1)) 228 if !neg && un >= cutoff { 229 return int64(cutoff - 1), rangeError(fnParseInt, s0) 230 } 231 if neg && un > cutoff { 232 return -int64(cutoff), rangeError(fnParseInt, s0) 233 } 234 n := int64(un) 235 if neg { 236 n = -n 237 } 238 return n, nil 239} 240 241// Atoi is equivalent to ParseInt(s, 10, 0), converted to type int. 242func Atoi(s string) (int, error) { 243 const fnAtoi = "Atoi" 244 245 sLen := len(s) 246 if intSize == 32 && (0 < sLen && sLen < 10) || 247 intSize == 64 && (0 < sLen && sLen < 19) { 248 // Fast path for small integers that fit int type. 249 s0 := s 250 if s[0] == '-' || s[0] == '+' { 251 s = s[1:] 252 if len(s) < 1 { 253 return 0, syntaxError(fnAtoi, s0) 254 } 255 } 256 257 n := 0 258 for _, ch := range []byte(s) { 259 ch -= '0' 260 if ch > 9 { 261 return 0, syntaxError(fnAtoi, s0) 262 } 263 n = n*10 + int(ch) 264 } 265 if s0[0] == '-' { 266 n = -n 267 } 268 return n, nil 269 } 270 271 // Slow path for invalid, big, or underscored integers. 272 i64, err := ParseInt(s, 10, 0) 273 if nerr, ok := err.(*NumError); ok { 274 nerr.Func = fnAtoi 275 } 276 return int(i64), err 277} 278 279// underscoreOK reports whether the underscores in s are allowed. 280// Checking them in this one function lets all the parsers skip over them simply. 281// Underscore must appear only between digits or between a base prefix and a digit. 282func underscoreOK(s string) bool { 283 // saw tracks the last character (class) we saw: 284 // ^ for beginning of number, 285 // 0 for a digit or base prefix, 286 // _ for an underscore, 287 // ! for none of the above. 288 saw := '^' 289 i := 0 290 291 // Optional sign. 292 if len(s) >= 1 && (s[0] == '-' || s[0] == '+') { 293 s = s[1:] 294 } 295 296 // Optional base prefix. 297 hex := false 298 if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') { 299 i = 2 300 saw = '0' // base prefix counts as a digit for "underscore as digit separator" 301 hex = lower(s[1]) == 'x' 302 } 303 304 // Number proper. 305 for ; i < len(s); i++ { 306 // Digits are always okay. 307 if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' { 308 saw = '0' 309 continue 310 } 311 // Underscore must follow digit. 312 if s[i] == '_' { 313 if saw != '0' { 314 return false 315 } 316 saw = '_' 317 continue 318 } 319 // Underscore must also be followed by digit. 320 if saw == '_' { 321 return false 322 } 323 // Saw non-digit, non-underscore. 324 saw = '!' 325 } 326 return saw != '_' 327} 328