1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package fmt 6 7import ( 8 "errors" 9 "io" 10 "math" 11 "os" 12 "reflect" 13 "strconv" 14 "sync" 15 "unicode/utf8" 16) 17 18// ScanState represents the scanner state passed to custom scanners. 19// Scanners may do rune-at-a-time scanning or ask the ScanState 20// to discover the next space-delimited token. 21type ScanState interface { 22 // ReadRune reads the next rune (Unicode code point) from the input. 23 // If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will 24 // return EOF after returning the first '\n' or when reading beyond 25 // the specified width. 26 ReadRune() (r rune, size int, err error) 27 // UnreadRune causes the next call to ReadRune to return the same rune. 28 UnreadRune() error 29 // SkipSpace skips space in the input. Newlines are treated appropriately 30 // for the operation being performed; see the package documentation 31 // for more information. 32 SkipSpace() 33 // Token skips space in the input if skipSpace is true, then returns the 34 // run of Unicode code points c satisfying f(c). If f is nil, 35 // !unicode.IsSpace(c) is used; that is, the token will hold non-space 36 // characters. Newlines are treated appropriately for the operation being 37 // performed; see the package documentation for more information. 38 // The returned slice points to shared data that may be overwritten 39 // by the next call to Token, a call to a Scan function using the ScanState 40 // as input, or when the calling Scan method returns. 41 Token(skipSpace bool, f func(rune) bool) (token []byte, err error) 42 // Width returns the value of the width option and whether it has been set. 43 // The unit is Unicode code points. 44 Width() (wid int, ok bool) 45 // Because ReadRune is implemented by the interface, Read should never be 46 // called by the scanning routines and a valid implementation of 47 // ScanState may choose always to return an error from Read. 48 Read(buf []byte) (n int, err error) 49} 50 51// Scanner is implemented by any value that has a Scan method, which scans 52// the input for the representation of a value and stores the result in the 53// receiver, which must be a pointer to be useful. The Scan method is called 54// for any argument to [Scan], [Scanf], or [Scanln] that implements it. 55type Scanner interface { 56 Scan(state ScanState, verb rune) error 57} 58 59// Scan scans text read from standard input, storing successive 60// space-separated values into successive arguments. Newlines count 61// as space. It returns the number of items successfully scanned. 62// If that is less than the number of arguments, err will report why. 63func Scan(a ...any) (n int, err error) { 64 return Fscan(os.Stdin, a...) 65} 66 67// Scanln is similar to [Scan], but stops scanning at a newline and 68// after the final item there must be a newline or EOF. 69func Scanln(a ...any) (n int, err error) { 70 return Fscanln(os.Stdin, a...) 71} 72 73// Scanf scans text read from standard input, storing successive 74// space-separated values into successive arguments as determined by 75// the format. It returns the number of items successfully scanned. 76// If that is less than the number of arguments, err will report why. 77// Newlines in the input must match newlines in the format. 78// The one exception: the verb %c always scans the next rune in the 79// input, even if it is a space (or tab etc.) or newline. 80func Scanf(format string, a ...any) (n int, err error) { 81 return Fscanf(os.Stdin, format, a...) 82} 83 84type stringReader string 85 86func (r *stringReader) Read(b []byte) (n int, err error) { 87 n = copy(b, *r) 88 *r = (*r)[n:] 89 if n == 0 { 90 err = io.EOF 91 } 92 return 93} 94 95// Sscan scans the argument string, storing successive space-separated 96// values into successive arguments. Newlines count as space. It 97// returns the number of items successfully scanned. If that is less 98// than the number of arguments, err will report why. 99func Sscan(str string, a ...any) (n int, err error) { 100 return Fscan((*stringReader)(&str), a...) 101} 102 103// Sscanln is similar to [Sscan], but stops scanning at a newline and 104// after the final item there must be a newline or EOF. 105func Sscanln(str string, a ...any) (n int, err error) { 106 return Fscanln((*stringReader)(&str), a...) 107} 108 109// Sscanf scans the argument string, storing successive space-separated 110// values into successive arguments as determined by the format. It 111// returns the number of items successfully parsed. 112// Newlines in the input must match newlines in the format. 113func Sscanf(str string, format string, a ...any) (n int, err error) { 114 return Fscanf((*stringReader)(&str), format, a...) 115} 116 117// Fscan scans text read from r, storing successive space-separated 118// values into successive arguments. Newlines count as space. It 119// returns the number of items successfully scanned. If that is less 120// than the number of arguments, err will report why. 121func Fscan(r io.Reader, a ...any) (n int, err error) { 122 s, old := newScanState(r, true, false) 123 n, err = s.doScan(a) 124 s.free(old) 125 return 126} 127 128// Fscanln is similar to [Fscan], but stops scanning at a newline and 129// after the final item there must be a newline or EOF. 130func Fscanln(r io.Reader, a ...any) (n int, err error) { 131 s, old := newScanState(r, false, true) 132 n, err = s.doScan(a) 133 s.free(old) 134 return 135} 136 137// Fscanf scans text read from r, storing successive space-separated 138// values into successive arguments as determined by the format. It 139// returns the number of items successfully parsed. 140// Newlines in the input must match newlines in the format. 141func Fscanf(r io.Reader, format string, a ...any) (n int, err error) { 142 s, old := newScanState(r, false, false) 143 n, err = s.doScanf(format, a) 144 s.free(old) 145 return 146} 147 148// scanError represents an error generated by the scanning software. 149// It's used as a unique signature to identify such errors when recovering. 150type scanError struct { 151 err error 152} 153 154const eof = -1 155 156// ss is the internal implementation of ScanState. 157type ss struct { 158 rs io.RuneScanner // where to read input 159 buf buffer // token accumulator 160 count int // runes consumed so far. 161 atEOF bool // already read EOF 162 ssave 163} 164 165// ssave holds the parts of ss that need to be 166// saved and restored on recursive scans. 167type ssave struct { 168 validSave bool // is or was a part of an actual ss. 169 nlIsEnd bool // whether newline terminates scan 170 nlIsSpace bool // whether newline counts as white space 171 argLimit int // max value of ss.count for this arg; argLimit <= limit 172 limit int // max value of ss.count. 173 maxWid int // width of this arg. 174} 175 176// The Read method is only in ScanState so that ScanState 177// satisfies io.Reader. It will never be called when used as 178// intended, so there is no need to make it actually work. 179func (s *ss) Read(buf []byte) (n int, err error) { 180 return 0, errors.New("ScanState's Read should not be called. Use ReadRune") 181} 182 183func (s *ss) ReadRune() (r rune, size int, err error) { 184 if s.atEOF || s.count >= s.argLimit { 185 err = io.EOF 186 return 187 } 188 189 r, size, err = s.rs.ReadRune() 190 if err == nil { 191 s.count++ 192 if s.nlIsEnd && r == '\n' { 193 s.atEOF = true 194 } 195 } else if err == io.EOF { 196 s.atEOF = true 197 } 198 return 199} 200 201func (s *ss) Width() (wid int, ok bool) { 202 if s.maxWid == hugeWid { 203 return 0, false 204 } 205 return s.maxWid, true 206} 207 208// The public method returns an error; this private one panics. 209// If getRune reaches EOF, the return value is EOF (-1). 210func (s *ss) getRune() (r rune) { 211 r, _, err := s.ReadRune() 212 if err != nil { 213 if err == io.EOF { 214 return eof 215 } 216 s.error(err) 217 } 218 return 219} 220 221// mustReadRune turns io.EOF into a panic(io.ErrUnexpectedEOF). 222// It is called in cases such as string scanning where an EOF is a 223// syntax error. 224func (s *ss) mustReadRune() (r rune) { 225 r = s.getRune() 226 if r == eof { 227 s.error(io.ErrUnexpectedEOF) 228 } 229 return 230} 231 232func (s *ss) UnreadRune() error { 233 s.rs.UnreadRune() 234 s.atEOF = false 235 s.count-- 236 return nil 237} 238 239func (s *ss) error(err error) { 240 panic(scanError{err}) 241} 242 243func (s *ss) errorString(err string) { 244 panic(scanError{errors.New(err)}) 245} 246 247func (s *ss) Token(skipSpace bool, f func(rune) bool) (tok []byte, err error) { 248 defer func() { 249 if e := recover(); e != nil { 250 if se, ok := e.(scanError); ok { 251 err = se.err 252 } else { 253 panic(e) 254 } 255 } 256 }() 257 if f == nil { 258 f = notSpace 259 } 260 s.buf = s.buf[:0] 261 tok = s.token(skipSpace, f) 262 return 263} 264 265// space is a copy of the unicode.White_Space ranges, 266// to avoid depending on package unicode. 267var space = [][2]uint16{ 268 {0x0009, 0x000d}, 269 {0x0020, 0x0020}, 270 {0x0085, 0x0085}, 271 {0x00a0, 0x00a0}, 272 {0x1680, 0x1680}, 273 {0x2000, 0x200a}, 274 {0x2028, 0x2029}, 275 {0x202f, 0x202f}, 276 {0x205f, 0x205f}, 277 {0x3000, 0x3000}, 278} 279 280func isSpace(r rune) bool { 281 if r >= 1<<16 { 282 return false 283 } 284 rx := uint16(r) 285 for _, rng := range space { 286 if rx < rng[0] { 287 return false 288 } 289 if rx <= rng[1] { 290 return true 291 } 292 } 293 return false 294} 295 296// notSpace is the default scanning function used in Token. 297func notSpace(r rune) bool { 298 return !isSpace(r) 299} 300 301// readRune is a structure to enable reading UTF-8 encoded code points 302// from an io.Reader. It is used if the Reader given to the scanner does 303// not already implement io.RuneScanner. 304type readRune struct { 305 reader io.Reader 306 buf [utf8.UTFMax]byte // used only inside ReadRune 307 pending int // number of bytes in pendBuf; only >0 for bad UTF-8 308 pendBuf [utf8.UTFMax]byte // bytes left over 309 peekRune rune // if >=0 next rune; when <0 is ^(previous Rune) 310} 311 312// readByte returns the next byte from the input, which may be 313// left over from a previous read if the UTF-8 was ill-formed. 314func (r *readRune) readByte() (b byte, err error) { 315 if r.pending > 0 { 316 b = r.pendBuf[0] 317 copy(r.pendBuf[0:], r.pendBuf[1:]) 318 r.pending-- 319 return 320 } 321 n, err := io.ReadFull(r.reader, r.pendBuf[:1]) 322 if n != 1 { 323 return 0, err 324 } 325 return r.pendBuf[0], err 326} 327 328// ReadRune returns the next UTF-8 encoded code point from the 329// io.Reader inside r. 330func (r *readRune) ReadRune() (rr rune, size int, err error) { 331 if r.peekRune >= 0 { 332 rr = r.peekRune 333 r.peekRune = ^r.peekRune 334 size = utf8.RuneLen(rr) 335 return 336 } 337 r.buf[0], err = r.readByte() 338 if err != nil { 339 return 340 } 341 if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case 342 rr = rune(r.buf[0]) 343 size = 1 // Known to be 1. 344 // Flip the bits of the rune so it's available to UnreadRune. 345 r.peekRune = ^rr 346 return 347 } 348 var n int 349 for n = 1; !utf8.FullRune(r.buf[:n]); n++ { 350 r.buf[n], err = r.readByte() 351 if err != nil { 352 if err == io.EOF { 353 err = nil 354 break 355 } 356 return 357 } 358 } 359 rr, size = utf8.DecodeRune(r.buf[:n]) 360 if size < n { // an error, save the bytes for the next read 361 copy(r.pendBuf[r.pending:], r.buf[size:n]) 362 r.pending += n - size 363 } 364 // Flip the bits of the rune so it's available to UnreadRune. 365 r.peekRune = ^rr 366 return 367} 368 369func (r *readRune) UnreadRune() error { 370 if r.peekRune >= 0 { 371 return errors.New("fmt: scanning called UnreadRune with no rune available") 372 } 373 // Reverse bit flip of previously read rune to obtain valid >=0 state. 374 r.peekRune = ^r.peekRune 375 return nil 376} 377 378var ssFree = sync.Pool{ 379 New: func() any { return new(ss) }, 380} 381 382// newScanState allocates a new ss struct or grab a cached one. 383func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) { 384 s = ssFree.Get().(*ss) 385 if rs, ok := r.(io.RuneScanner); ok { 386 s.rs = rs 387 } else { 388 s.rs = &readRune{reader: r, peekRune: -1} 389 } 390 s.nlIsSpace = nlIsSpace 391 s.nlIsEnd = nlIsEnd 392 s.atEOF = false 393 s.limit = hugeWid 394 s.argLimit = hugeWid 395 s.maxWid = hugeWid 396 s.validSave = true 397 s.count = 0 398 return 399} 400 401// free saves used ss structs in ssFree; avoid an allocation per invocation. 402func (s *ss) free(old ssave) { 403 // If it was used recursively, just restore the old state. 404 if old.validSave { 405 s.ssave = old 406 return 407 } 408 // Don't hold on to ss structs with large buffers. 409 if cap(s.buf) > 1024 { 410 return 411 } 412 s.buf = s.buf[:0] 413 s.rs = nil 414 ssFree.Put(s) 415} 416 417// SkipSpace provides Scan methods the ability to skip space and newline 418// characters in keeping with the current scanning mode set by format strings 419// and [Scan]/[Scanln]. 420func (s *ss) SkipSpace() { 421 for { 422 r := s.getRune() 423 if r == eof { 424 return 425 } 426 if r == '\r' && s.peek("\n") { 427 continue 428 } 429 if r == '\n' { 430 if s.nlIsSpace { 431 continue 432 } 433 s.errorString("unexpected newline") 434 return 435 } 436 if !isSpace(r) { 437 s.UnreadRune() 438 break 439 } 440 } 441} 442 443// token returns the next space-delimited string from the input. It 444// skips white space. For Scanln, it stops at newlines. For Scan, 445// newlines are treated as spaces. 446func (s *ss) token(skipSpace bool, f func(rune) bool) []byte { 447 if skipSpace { 448 s.SkipSpace() 449 } 450 // read until white space or newline 451 for { 452 r := s.getRune() 453 if r == eof { 454 break 455 } 456 if !f(r) { 457 s.UnreadRune() 458 break 459 } 460 s.buf.writeRune(r) 461 } 462 return s.buf 463} 464 465var errComplex = errors.New("syntax error scanning complex number") 466var errBool = errors.New("syntax error scanning boolean") 467 468func indexRune(s string, r rune) int { 469 for i, c := range s { 470 if c == r { 471 return i 472 } 473 } 474 return -1 475} 476 477// consume reads the next rune in the input and reports whether it is in the ok string. 478// If accept is true, it puts the character into the input token. 479func (s *ss) consume(ok string, accept bool) bool { 480 r := s.getRune() 481 if r == eof { 482 return false 483 } 484 if indexRune(ok, r) >= 0 { 485 if accept { 486 s.buf.writeRune(r) 487 } 488 return true 489 } 490 if r != eof && accept { 491 s.UnreadRune() 492 } 493 return false 494} 495 496// peek reports whether the next character is in the ok string, without consuming it. 497func (s *ss) peek(ok string) bool { 498 r := s.getRune() 499 if r != eof { 500 s.UnreadRune() 501 } 502 return indexRune(ok, r) >= 0 503} 504 505func (s *ss) notEOF() { 506 // Guarantee there is data to be read. 507 if r := s.getRune(); r == eof { 508 panic(io.EOF) 509 } 510 s.UnreadRune() 511} 512 513// accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the 514// buffer and returns true. Otherwise it return false. 515func (s *ss) accept(ok string) bool { 516 return s.consume(ok, true) 517} 518 519// okVerb verifies that the verb is present in the list, setting s.err appropriately if not. 520func (s *ss) okVerb(verb rune, okVerbs, typ string) bool { 521 for _, v := range okVerbs { 522 if v == verb { 523 return true 524 } 525 } 526 s.errorString("bad verb '%" + string(verb) + "' for " + typ) 527 return false 528} 529 530// scanBool returns the value of the boolean represented by the next token. 531func (s *ss) scanBool(verb rune) bool { 532 s.SkipSpace() 533 s.notEOF() 534 if !s.okVerb(verb, "tv", "boolean") { 535 return false 536 } 537 // Syntax-checking a boolean is annoying. We're not fastidious about case. 538 switch s.getRune() { 539 case '0': 540 return false 541 case '1': 542 return true 543 case 't', 'T': 544 if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) { 545 s.error(errBool) 546 } 547 return true 548 case 'f', 'F': 549 if s.accept("aA") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) { 550 s.error(errBool) 551 } 552 return false 553 } 554 return false 555} 556 557// Numerical elements 558const ( 559 binaryDigits = "01" 560 octalDigits = "01234567" 561 decimalDigits = "0123456789" 562 hexadecimalDigits = "0123456789aAbBcCdDeEfF" 563 sign = "+-" 564 period = "." 565 exponent = "eEpP" 566) 567 568// getBase returns the numeric base represented by the verb and its digit string. 569func (s *ss) getBase(verb rune) (base int, digits string) { 570 s.okVerb(verb, "bdoUxXv", "integer") // sets s.err 571 base = 10 572 digits = decimalDigits 573 switch verb { 574 case 'b': 575 base = 2 576 digits = binaryDigits 577 case 'o': 578 base = 8 579 digits = octalDigits 580 case 'x', 'X', 'U': 581 base = 16 582 digits = hexadecimalDigits 583 } 584 return 585} 586 587// scanNumber returns the numerical string with specified digits starting here. 588func (s *ss) scanNumber(digits string, haveDigits bool) string { 589 if !haveDigits { 590 s.notEOF() 591 if !s.accept(digits) { 592 s.errorString("expected integer") 593 } 594 } 595 for s.accept(digits) { 596 } 597 return string(s.buf) 598} 599 600// scanRune returns the next rune value in the input. 601func (s *ss) scanRune(bitSize int) int64 { 602 s.notEOF() 603 r := s.getRune() 604 n := uint(bitSize) 605 x := (int64(r) << (64 - n)) >> (64 - n) 606 if x != int64(r) { 607 s.errorString("overflow on character value " + string(r)) 608 } 609 return int64(r) 610} 611 612// scanBasePrefix reports whether the integer begins with a base prefix 613// and returns the base, digit string, and whether a zero was found. 614// It is called only if the verb is %v. 615func (s *ss) scanBasePrefix() (base int, digits string, zeroFound bool) { 616 if !s.peek("0") { 617 return 0, decimalDigits + "_", false 618 } 619 s.accept("0") 620 // Special cases for 0, 0b, 0o, 0x. 621 switch { 622 case s.peek("bB"): 623 s.consume("bB", true) 624 return 0, binaryDigits + "_", true 625 case s.peek("oO"): 626 s.consume("oO", true) 627 return 0, octalDigits + "_", true 628 case s.peek("xX"): 629 s.consume("xX", true) 630 return 0, hexadecimalDigits + "_", true 631 default: 632 return 0, octalDigits + "_", true 633 } 634} 635 636// scanInt returns the value of the integer represented by the next 637// token, checking for overflow. Any error is stored in s.err. 638func (s *ss) scanInt(verb rune, bitSize int) int64 { 639 if verb == 'c' { 640 return s.scanRune(bitSize) 641 } 642 s.SkipSpace() 643 s.notEOF() 644 base, digits := s.getBase(verb) 645 haveDigits := false 646 if verb == 'U' { 647 if !s.consume("U", false) || !s.consume("+", false) { 648 s.errorString("bad unicode format ") 649 } 650 } else { 651 s.accept(sign) // If there's a sign, it will be left in the token buffer. 652 if verb == 'v' { 653 base, digits, haveDigits = s.scanBasePrefix() 654 } 655 } 656 tok := s.scanNumber(digits, haveDigits) 657 i, err := strconv.ParseInt(tok, base, 64) 658 if err != nil { 659 s.error(err) 660 } 661 n := uint(bitSize) 662 x := (i << (64 - n)) >> (64 - n) 663 if x != i { 664 s.errorString("integer overflow on token " + tok) 665 } 666 return i 667} 668 669// scanUint returns the value of the unsigned integer represented 670// by the next token, checking for overflow. Any error is stored in s.err. 671func (s *ss) scanUint(verb rune, bitSize int) uint64 { 672 if verb == 'c' { 673 return uint64(s.scanRune(bitSize)) 674 } 675 s.SkipSpace() 676 s.notEOF() 677 base, digits := s.getBase(verb) 678 haveDigits := false 679 if verb == 'U' { 680 if !s.consume("U", false) || !s.consume("+", false) { 681 s.errorString("bad unicode format ") 682 } 683 } else if verb == 'v' { 684 base, digits, haveDigits = s.scanBasePrefix() 685 } 686 tok := s.scanNumber(digits, haveDigits) 687 i, err := strconv.ParseUint(tok, base, 64) 688 if err != nil { 689 s.error(err) 690 } 691 n := uint(bitSize) 692 x := (i << (64 - n)) >> (64 - n) 693 if x != i { 694 s.errorString("unsigned integer overflow on token " + tok) 695 } 696 return i 697} 698 699// floatToken returns the floating-point number starting here, no longer than swid 700// if the width is specified. It's not rigorous about syntax because it doesn't check that 701// we have at least some digits, but Atof will do that. 702func (s *ss) floatToken() string { 703 s.buf = s.buf[:0] 704 // NaN? 705 if s.accept("nN") && s.accept("aA") && s.accept("nN") { 706 return string(s.buf) 707 } 708 // leading sign? 709 s.accept(sign) 710 // Inf? 711 if s.accept("iI") && s.accept("nN") && s.accept("fF") { 712 return string(s.buf) 713 } 714 digits := decimalDigits + "_" 715 exp := exponent 716 if s.accept("0") && s.accept("xX") { 717 digits = hexadecimalDigits + "_" 718 exp = "pP" 719 } 720 // digits? 721 for s.accept(digits) { 722 } 723 // decimal point? 724 if s.accept(period) { 725 // fraction? 726 for s.accept(digits) { 727 } 728 } 729 // exponent? 730 if s.accept(exp) { 731 // leading sign? 732 s.accept(sign) 733 // digits? 734 for s.accept(decimalDigits + "_") { 735 } 736 } 737 return string(s.buf) 738} 739 740// complexTokens returns the real and imaginary parts of the complex number starting here. 741// The number might be parenthesized and has the format (N+Ni) where N is a floating-point 742// number and there are no spaces within. 743func (s *ss) complexTokens() (real, imag string) { 744 // TODO: accept N and Ni independently? 745 parens := s.accept("(") 746 real = s.floatToken() 747 s.buf = s.buf[:0] 748 // Must now have a sign. 749 if !s.accept("+-") { 750 s.error(errComplex) 751 } 752 // Sign is now in buffer 753 imagSign := string(s.buf) 754 imag = s.floatToken() 755 if !s.accept("i") { 756 s.error(errComplex) 757 } 758 if parens && !s.accept(")") { 759 s.error(errComplex) 760 } 761 return real, imagSign + imag 762} 763 764func hasX(s string) bool { 765 for i := 0; i < len(s); i++ { 766 if s[i] == 'x' || s[i] == 'X' { 767 return true 768 } 769 } 770 return false 771} 772 773// convertFloat converts the string to a float64value. 774func (s *ss) convertFloat(str string, n int) float64 { 775 // strconv.ParseFloat will handle "+0x1.fp+2", 776 // but we have to implement our non-standard 777 // decimal+binary exponent mix (1.2p4) ourselves. 778 if p := indexRune(str, 'p'); p >= 0 && !hasX(str) { 779 // Atof doesn't handle power-of-2 exponents, 780 // but they're easy to evaluate. 781 f, err := strconv.ParseFloat(str[:p], n) 782 if err != nil { 783 // Put full string into error. 784 if e, ok := err.(*strconv.NumError); ok { 785 e.Num = str 786 } 787 s.error(err) 788 } 789 m, err := strconv.Atoi(str[p+1:]) 790 if err != nil { 791 // Put full string into error. 792 if e, ok := err.(*strconv.NumError); ok { 793 e.Num = str 794 } 795 s.error(err) 796 } 797 return math.Ldexp(f, m) 798 } 799 f, err := strconv.ParseFloat(str, n) 800 if err != nil { 801 s.error(err) 802 } 803 return f 804} 805 806// scanComplex converts the next token to a complex128 value. 807// The atof argument is a type-specific reader for the underlying type. 808// If we're reading complex64, atof will parse float32s and convert them 809// to float64's to avoid reproducing this code for each complex type. 810func (s *ss) scanComplex(verb rune, n int) complex128 { 811 if !s.okVerb(verb, floatVerbs, "complex") { 812 return 0 813 } 814 s.SkipSpace() 815 s.notEOF() 816 sreal, simag := s.complexTokens() 817 real := s.convertFloat(sreal, n/2) 818 imag := s.convertFloat(simag, n/2) 819 return complex(real, imag) 820} 821 822// convertString returns the string represented by the next input characters. 823// The format of the input is determined by the verb. 824func (s *ss) convertString(verb rune) (str string) { 825 if !s.okVerb(verb, "svqxX", "string") { 826 return "" 827 } 828 s.SkipSpace() 829 s.notEOF() 830 switch verb { 831 case 'q': 832 str = s.quotedString() 833 case 'x', 'X': 834 str = s.hexString() 835 default: 836 str = string(s.token(true, notSpace)) // %s and %v just return the next word 837 } 838 return 839} 840 841// quotedString returns the double- or back-quoted string represented by the next input characters. 842func (s *ss) quotedString() string { 843 s.notEOF() 844 quote := s.getRune() 845 switch quote { 846 case '`': 847 // Back-quoted: Anything goes until EOF or back quote. 848 for { 849 r := s.mustReadRune() 850 if r == quote { 851 break 852 } 853 s.buf.writeRune(r) 854 } 855 return string(s.buf) 856 case '"': 857 // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes. 858 s.buf.writeByte('"') 859 for { 860 r := s.mustReadRune() 861 s.buf.writeRune(r) 862 if r == '\\' { 863 // In a legal backslash escape, no matter how long, only the character 864 // immediately after the escape can itself be a backslash or quote. 865 // Thus we only need to protect the first character after the backslash. 866 s.buf.writeRune(s.mustReadRune()) 867 } else if r == '"' { 868 break 869 } 870 } 871 result, err := strconv.Unquote(string(s.buf)) 872 if err != nil { 873 s.error(err) 874 } 875 return result 876 default: 877 s.errorString("expected quoted string") 878 } 879 return "" 880} 881 882// hexDigit returns the value of the hexadecimal digit. 883func hexDigit(d rune) (int, bool) { 884 digit := int(d) 885 switch digit { 886 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 887 return digit - '0', true 888 case 'a', 'b', 'c', 'd', 'e', 'f': 889 return 10 + digit - 'a', true 890 case 'A', 'B', 'C', 'D', 'E', 'F': 891 return 10 + digit - 'A', true 892 } 893 return -1, false 894} 895 896// hexByte returns the next hex-encoded (two-character) byte from the input. 897// It returns ok==false if the next bytes in the input do not encode a hex byte. 898// If the first byte is hex and the second is not, processing stops. 899func (s *ss) hexByte() (b byte, ok bool) { 900 rune1 := s.getRune() 901 if rune1 == eof { 902 return 903 } 904 value1, ok := hexDigit(rune1) 905 if !ok { 906 s.UnreadRune() 907 return 908 } 909 value2, ok := hexDigit(s.mustReadRune()) 910 if !ok { 911 s.errorString("illegal hex digit") 912 return 913 } 914 return byte(value1<<4 | value2), true 915} 916 917// hexString returns the space-delimited hexpair-encoded string. 918func (s *ss) hexString() string { 919 s.notEOF() 920 for { 921 b, ok := s.hexByte() 922 if !ok { 923 break 924 } 925 s.buf.writeByte(b) 926 } 927 if len(s.buf) == 0 { 928 s.errorString("no hex data for %x string") 929 return "" 930 } 931 return string(s.buf) 932} 933 934const ( 935 floatVerbs = "beEfFgGv" 936 937 hugeWid = 1 << 30 938 939 intBits = 32 << (^uint(0) >> 63) 940 uintptrBits = 32 << (^uintptr(0) >> 63) 941) 942 943// scanPercent scans a literal percent character. 944func (s *ss) scanPercent() { 945 s.SkipSpace() 946 s.notEOF() 947 if !s.accept("%") { 948 s.errorString("missing literal %") 949 } 950} 951 952// scanOne scans a single value, deriving the scanner from the type of the argument. 953func (s *ss) scanOne(verb rune, arg any) { 954 s.buf = s.buf[:0] 955 var err error 956 // If the parameter has its own Scan method, use that. 957 if v, ok := arg.(Scanner); ok { 958 err = v.Scan(s, verb) 959 if err != nil { 960 if err == io.EOF { 961 err = io.ErrUnexpectedEOF 962 } 963 s.error(err) 964 } 965 return 966 } 967 968 switch v := arg.(type) { 969 case *bool: 970 *v = s.scanBool(verb) 971 case *complex64: 972 *v = complex64(s.scanComplex(verb, 64)) 973 case *complex128: 974 *v = s.scanComplex(verb, 128) 975 case *int: 976 *v = int(s.scanInt(verb, intBits)) 977 case *int8: 978 *v = int8(s.scanInt(verb, 8)) 979 case *int16: 980 *v = int16(s.scanInt(verb, 16)) 981 case *int32: 982 *v = int32(s.scanInt(verb, 32)) 983 case *int64: 984 *v = s.scanInt(verb, 64) 985 case *uint: 986 *v = uint(s.scanUint(verb, intBits)) 987 case *uint8: 988 *v = uint8(s.scanUint(verb, 8)) 989 case *uint16: 990 *v = uint16(s.scanUint(verb, 16)) 991 case *uint32: 992 *v = uint32(s.scanUint(verb, 32)) 993 case *uint64: 994 *v = s.scanUint(verb, 64) 995 case *uintptr: 996 *v = uintptr(s.scanUint(verb, uintptrBits)) 997 // Floats are tricky because you want to scan in the precision of the result, not 998 // scan in high precision and convert, in order to preserve the correct error condition. 999 case *float32: 1000 if s.okVerb(verb, floatVerbs, "float32") { 1001 s.SkipSpace() 1002 s.notEOF() 1003 *v = float32(s.convertFloat(s.floatToken(), 32)) 1004 } 1005 case *float64: 1006 if s.okVerb(verb, floatVerbs, "float64") { 1007 s.SkipSpace() 1008 s.notEOF() 1009 *v = s.convertFloat(s.floatToken(), 64) 1010 } 1011 case *string: 1012 *v = s.convertString(verb) 1013 case *[]byte: 1014 // We scan to string and convert so we get a copy of the data. 1015 // If we scanned to bytes, the slice would point at the buffer. 1016 *v = []byte(s.convertString(verb)) 1017 default: 1018 val := reflect.ValueOf(v) 1019 ptr := val 1020 if ptr.Kind() != reflect.Pointer { 1021 s.errorString("type not a pointer: " + val.Type().String()) 1022 return 1023 } 1024 switch v := ptr.Elem(); v.Kind() { 1025 case reflect.Bool: 1026 v.SetBool(s.scanBool(verb)) 1027 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 1028 v.SetInt(s.scanInt(verb, v.Type().Bits())) 1029 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: 1030 v.SetUint(s.scanUint(verb, v.Type().Bits())) 1031 case reflect.String: 1032 v.SetString(s.convertString(verb)) 1033 case reflect.Slice: 1034 // For now, can only handle (renamed) []byte. 1035 typ := v.Type() 1036 if typ.Elem().Kind() != reflect.Uint8 { 1037 s.errorString("can't scan type: " + val.Type().String()) 1038 } 1039 str := s.convertString(verb) 1040 v.Set(reflect.MakeSlice(typ, len(str), len(str))) 1041 for i := 0; i < len(str); i++ { 1042 v.Index(i).SetUint(uint64(str[i])) 1043 } 1044 case reflect.Float32, reflect.Float64: 1045 s.SkipSpace() 1046 s.notEOF() 1047 v.SetFloat(s.convertFloat(s.floatToken(), v.Type().Bits())) 1048 case reflect.Complex64, reflect.Complex128: 1049 v.SetComplex(s.scanComplex(verb, v.Type().Bits())) 1050 default: 1051 s.errorString("can't scan type: " + val.Type().String()) 1052 } 1053 } 1054} 1055 1056// errorHandler turns local panics into error returns. 1057func errorHandler(errp *error) { 1058 if e := recover(); e != nil { 1059 if se, ok := e.(scanError); ok { // catch local error 1060 *errp = se.err 1061 } else if eof, ok := e.(error); ok && eof == io.EOF { // out of input 1062 *errp = eof 1063 } else { 1064 panic(e) 1065 } 1066 } 1067} 1068 1069// doScan does the real work for scanning without a format string. 1070func (s *ss) doScan(a []any) (numProcessed int, err error) { 1071 defer errorHandler(&err) 1072 for _, arg := range a { 1073 s.scanOne('v', arg) 1074 numProcessed++ 1075 } 1076 // Check for newline (or EOF) if required (Scanln etc.). 1077 if s.nlIsEnd { 1078 for { 1079 r := s.getRune() 1080 if r == '\n' || r == eof { 1081 break 1082 } 1083 if !isSpace(r) { 1084 s.errorString("expected newline") 1085 break 1086 } 1087 } 1088 } 1089 return 1090} 1091 1092// advance determines whether the next characters in the input match 1093// those of the format. It returns the number of bytes (sic) consumed 1094// in the format. All runs of space characters in either input or 1095// format behave as a single space. Newlines are special, though: 1096// newlines in the format must match those in the input and vice versa. 1097// This routine also handles the %% case. If the return value is zero, 1098// either format starts with a % (with no following %) or the input 1099// is empty. If it is negative, the input did not match the string. 1100func (s *ss) advance(format string) (i int) { 1101 for i < len(format) { 1102 fmtc, w := utf8.DecodeRuneInString(format[i:]) 1103 1104 // Space processing. 1105 // In the rest of this comment "space" means spaces other than newline. 1106 // Newline in the format matches input of zero or more spaces and then newline or end-of-input. 1107 // Spaces in the format before the newline are collapsed into the newline. 1108 // Spaces in the format after the newline match zero or more spaces after the corresponding input newline. 1109 // Other spaces in the format match input of one or more spaces or end-of-input. 1110 if isSpace(fmtc) { 1111 newlines := 0 1112 trailingSpace := false 1113 for isSpace(fmtc) && i < len(format) { 1114 if fmtc == '\n' { 1115 newlines++ 1116 trailingSpace = false 1117 } else { 1118 trailingSpace = true 1119 } 1120 i += w 1121 fmtc, w = utf8.DecodeRuneInString(format[i:]) 1122 } 1123 for j := 0; j < newlines; j++ { 1124 inputc := s.getRune() 1125 for isSpace(inputc) && inputc != '\n' { 1126 inputc = s.getRune() 1127 } 1128 if inputc != '\n' && inputc != eof { 1129 s.errorString("newline in format does not match input") 1130 } 1131 } 1132 if trailingSpace { 1133 inputc := s.getRune() 1134 if newlines == 0 { 1135 // If the trailing space stood alone (did not follow a newline), 1136 // it must find at least one space to consume. 1137 if !isSpace(inputc) && inputc != eof { 1138 s.errorString("expected space in input to match format") 1139 } 1140 if inputc == '\n' { 1141 s.errorString("newline in input does not match format") 1142 } 1143 } 1144 for isSpace(inputc) && inputc != '\n' { 1145 inputc = s.getRune() 1146 } 1147 if inputc != eof { 1148 s.UnreadRune() 1149 } 1150 } 1151 continue 1152 } 1153 1154 // Verbs. 1155 if fmtc == '%' { 1156 // % at end of string is an error. 1157 if i+w == len(format) { 1158 s.errorString("missing verb: % at end of format string") 1159 } 1160 // %% acts like a real percent 1161 nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty 1162 if nextc != '%' { 1163 return 1164 } 1165 i += w // skip the first % 1166 } 1167 1168 // Literals. 1169 inputc := s.mustReadRune() 1170 if fmtc != inputc { 1171 s.UnreadRune() 1172 return -1 1173 } 1174 i += w 1175 } 1176 return 1177} 1178 1179// doScanf does the real work when scanning with a format string. 1180// At the moment, it handles only pointers to basic types. 1181func (s *ss) doScanf(format string, a []any) (numProcessed int, err error) { 1182 defer errorHandler(&err) 1183 end := len(format) - 1 1184 // We process one item per non-trivial format 1185 for i := 0; i <= end; { 1186 w := s.advance(format[i:]) 1187 if w > 0 { 1188 i += w 1189 continue 1190 } 1191 // Either we failed to advance, we have a percent character, or we ran out of input. 1192 if format[i] != '%' { 1193 // Can't advance format. Why not? 1194 if w < 0 { 1195 s.errorString("input does not match format") 1196 } 1197 // Otherwise at EOF; "too many operands" error handled below 1198 break 1199 } 1200 i++ // % is one byte 1201 1202 // do we have 20 (width)? 1203 var widPresent bool 1204 s.maxWid, widPresent, i = parsenum(format, i, end) 1205 if !widPresent { 1206 s.maxWid = hugeWid 1207 } 1208 1209 c, w := utf8.DecodeRuneInString(format[i:]) 1210 i += w 1211 1212 if c != 'c' { 1213 s.SkipSpace() 1214 } 1215 if c == '%' { 1216 s.scanPercent() 1217 continue // Do not consume an argument. 1218 } 1219 s.argLimit = s.limit 1220 if f := s.count + s.maxWid; f < s.argLimit { 1221 s.argLimit = f 1222 } 1223 1224 if numProcessed >= len(a) { // out of operands 1225 s.errorString("too few operands for format '%" + format[i-w:] + "'") 1226 break 1227 } 1228 arg := a[numProcessed] 1229 1230 s.scanOne(c, arg) 1231 numProcessed++ 1232 s.argLimit = s.limit 1233 } 1234 if numProcessed < len(a) { 1235 s.errorString("too many operands") 1236 } 1237 return 1238} 1239