1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package fmt
6
7import (
8	"errors"
9	"io"
10	"math"
11	"os"
12	"reflect"
13	"strconv"
14	"sync"
15	"unicode/utf8"
16)
17
18// ScanState represents the scanner state passed to custom scanners.
19// Scanners may do rune-at-a-time scanning or ask the ScanState
20// to discover the next space-delimited token.
21type ScanState interface {
22	// ReadRune reads the next rune (Unicode code point) from the input.
23	// If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will
24	// return EOF after returning the first '\n' or when reading beyond
25	// the specified width.
26	ReadRune() (r rune, size int, err error)
27	// UnreadRune causes the next call to ReadRune to return the same rune.
28	UnreadRune() error
29	// SkipSpace skips space in the input. Newlines are treated appropriately
30	// for the operation being performed; see the package documentation
31	// for more information.
32	SkipSpace()
33	// Token skips space in the input if skipSpace is true, then returns the
34	// run of Unicode code points c satisfying f(c).  If f is nil,
35	// !unicode.IsSpace(c) is used; that is, the token will hold non-space
36	// characters. Newlines are treated appropriately for the operation being
37	// performed; see the package documentation for more information.
38	// The returned slice points to shared data that may be overwritten
39	// by the next call to Token, a call to a Scan function using the ScanState
40	// as input, or when the calling Scan method returns.
41	Token(skipSpace bool, f func(rune) bool) (token []byte, err error)
42	// Width returns the value of the width option and whether it has been set.
43	// The unit is Unicode code points.
44	Width() (wid int, ok bool)
45	// Because ReadRune is implemented by the interface, Read should never be
46	// called by the scanning routines and a valid implementation of
47	// ScanState may choose always to return an error from Read.
48	Read(buf []byte) (n int, err error)
49}
50
51// Scanner is implemented by any value that has a Scan method, which scans
52// the input for the representation of a value and stores the result in the
53// receiver, which must be a pointer to be useful. The Scan method is called
54// for any argument to [Scan], [Scanf], or [Scanln] that implements it.
55type Scanner interface {
56	Scan(state ScanState, verb rune) error
57}
58
59// Scan scans text read from standard input, storing successive
60// space-separated values into successive arguments. Newlines count
61// as space. It returns the number of items successfully scanned.
62// If that is less than the number of arguments, err will report why.
63func Scan(a ...any) (n int, err error) {
64	return Fscan(os.Stdin, a...)
65}
66
67// Scanln is similar to [Scan], but stops scanning at a newline and
68// after the final item there must be a newline or EOF.
69func Scanln(a ...any) (n int, err error) {
70	return Fscanln(os.Stdin, a...)
71}
72
73// Scanf scans text read from standard input, storing successive
74// space-separated values into successive arguments as determined by
75// the format. It returns the number of items successfully scanned.
76// If that is less than the number of arguments, err will report why.
77// Newlines in the input must match newlines in the format.
78// The one exception: the verb %c always scans the next rune in the
79// input, even if it is a space (or tab etc.) or newline.
80func Scanf(format string, a ...any) (n int, err error) {
81	return Fscanf(os.Stdin, format, a...)
82}
83
84type stringReader string
85
86func (r *stringReader) Read(b []byte) (n int, err error) {
87	n = copy(b, *r)
88	*r = (*r)[n:]
89	if n == 0 {
90		err = io.EOF
91	}
92	return
93}
94
95// Sscan scans the argument string, storing successive space-separated
96// values into successive arguments. Newlines count as space. It
97// returns the number of items successfully scanned. If that is less
98// than the number of arguments, err will report why.
99func Sscan(str string, a ...any) (n int, err error) {
100	return Fscan((*stringReader)(&str), a...)
101}
102
103// Sscanln is similar to [Sscan], but stops scanning at a newline and
104// after the final item there must be a newline or EOF.
105func Sscanln(str string, a ...any) (n int, err error) {
106	return Fscanln((*stringReader)(&str), a...)
107}
108
109// Sscanf scans the argument string, storing successive space-separated
110// values into successive arguments as determined by the format. It
111// returns the number of items successfully parsed.
112// Newlines in the input must match newlines in the format.
113func Sscanf(str string, format string, a ...any) (n int, err error) {
114	return Fscanf((*stringReader)(&str), format, a...)
115}
116
117// Fscan scans text read from r, storing successive space-separated
118// values into successive arguments. Newlines count as space. It
119// returns the number of items successfully scanned. If that is less
120// than the number of arguments, err will report why.
121func Fscan(r io.Reader, a ...any) (n int, err error) {
122	s, old := newScanState(r, true, false)
123	n, err = s.doScan(a)
124	s.free(old)
125	return
126}
127
128// Fscanln is similar to [Fscan], but stops scanning at a newline and
129// after the final item there must be a newline or EOF.
130func Fscanln(r io.Reader, a ...any) (n int, err error) {
131	s, old := newScanState(r, false, true)
132	n, err = s.doScan(a)
133	s.free(old)
134	return
135}
136
137// Fscanf scans text read from r, storing successive space-separated
138// values into successive arguments as determined by the format. It
139// returns the number of items successfully parsed.
140// Newlines in the input must match newlines in the format.
141func Fscanf(r io.Reader, format string, a ...any) (n int, err error) {
142	s, old := newScanState(r, false, false)
143	n, err = s.doScanf(format, a)
144	s.free(old)
145	return
146}
147
148// scanError represents an error generated by the scanning software.
149// It's used as a unique signature to identify such errors when recovering.
150type scanError struct {
151	err error
152}
153
154const eof = -1
155
156// ss is the internal implementation of ScanState.
157type ss struct {
158	rs    io.RuneScanner // where to read input
159	buf   buffer         // token accumulator
160	count int            // runes consumed so far.
161	atEOF bool           // already read EOF
162	ssave
163}
164
165// ssave holds the parts of ss that need to be
166// saved and restored on recursive scans.
167type ssave struct {
168	validSave bool // is or was a part of an actual ss.
169	nlIsEnd   bool // whether newline terminates scan
170	nlIsSpace bool // whether newline counts as white space
171	argLimit  int  // max value of ss.count for this arg; argLimit <= limit
172	limit     int  // max value of ss.count.
173	maxWid    int  // width of this arg.
174}
175
176// The Read method is only in ScanState so that ScanState
177// satisfies io.Reader. It will never be called when used as
178// intended, so there is no need to make it actually work.
179func (s *ss) Read(buf []byte) (n int, err error) {
180	return 0, errors.New("ScanState's Read should not be called. Use ReadRune")
181}
182
183func (s *ss) ReadRune() (r rune, size int, err error) {
184	if s.atEOF || s.count >= s.argLimit {
185		err = io.EOF
186		return
187	}
188
189	r, size, err = s.rs.ReadRune()
190	if err == nil {
191		s.count++
192		if s.nlIsEnd && r == '\n' {
193			s.atEOF = true
194		}
195	} else if err == io.EOF {
196		s.atEOF = true
197	}
198	return
199}
200
201func (s *ss) Width() (wid int, ok bool) {
202	if s.maxWid == hugeWid {
203		return 0, false
204	}
205	return s.maxWid, true
206}
207
208// The public method returns an error; this private one panics.
209// If getRune reaches EOF, the return value is EOF (-1).
210func (s *ss) getRune() (r rune) {
211	r, _, err := s.ReadRune()
212	if err != nil {
213		if err == io.EOF {
214			return eof
215		}
216		s.error(err)
217	}
218	return
219}
220
221// mustReadRune turns io.EOF into a panic(io.ErrUnexpectedEOF).
222// It is called in cases such as string scanning where an EOF is a
223// syntax error.
224func (s *ss) mustReadRune() (r rune) {
225	r = s.getRune()
226	if r == eof {
227		s.error(io.ErrUnexpectedEOF)
228	}
229	return
230}
231
232func (s *ss) UnreadRune() error {
233	s.rs.UnreadRune()
234	s.atEOF = false
235	s.count--
236	return nil
237}
238
239func (s *ss) error(err error) {
240	panic(scanError{err})
241}
242
243func (s *ss) errorString(err string) {
244	panic(scanError{errors.New(err)})
245}
246
247func (s *ss) Token(skipSpace bool, f func(rune) bool) (tok []byte, err error) {
248	defer func() {
249		if e := recover(); e != nil {
250			if se, ok := e.(scanError); ok {
251				err = se.err
252			} else {
253				panic(e)
254			}
255		}
256	}()
257	if f == nil {
258		f = notSpace
259	}
260	s.buf = s.buf[:0]
261	tok = s.token(skipSpace, f)
262	return
263}
264
265// space is a copy of the unicode.White_Space ranges,
266// to avoid depending on package unicode.
267var space = [][2]uint16{
268	{0x0009, 0x000d},
269	{0x0020, 0x0020},
270	{0x0085, 0x0085},
271	{0x00a0, 0x00a0},
272	{0x1680, 0x1680},
273	{0x2000, 0x200a},
274	{0x2028, 0x2029},
275	{0x202f, 0x202f},
276	{0x205f, 0x205f},
277	{0x3000, 0x3000},
278}
279
280func isSpace(r rune) bool {
281	if r >= 1<<16 {
282		return false
283	}
284	rx := uint16(r)
285	for _, rng := range space {
286		if rx < rng[0] {
287			return false
288		}
289		if rx <= rng[1] {
290			return true
291		}
292	}
293	return false
294}
295
296// notSpace is the default scanning function used in Token.
297func notSpace(r rune) bool {
298	return !isSpace(r)
299}
300
301// readRune is a structure to enable reading UTF-8 encoded code points
302// from an io.Reader. It is used if the Reader given to the scanner does
303// not already implement io.RuneScanner.
304type readRune struct {
305	reader   io.Reader
306	buf      [utf8.UTFMax]byte // used only inside ReadRune
307	pending  int               // number of bytes in pendBuf; only >0 for bad UTF-8
308	pendBuf  [utf8.UTFMax]byte // bytes left over
309	peekRune rune              // if >=0 next rune; when <0 is ^(previous Rune)
310}
311
312// readByte returns the next byte from the input, which may be
313// left over from a previous read if the UTF-8 was ill-formed.
314func (r *readRune) readByte() (b byte, err error) {
315	if r.pending > 0 {
316		b = r.pendBuf[0]
317		copy(r.pendBuf[0:], r.pendBuf[1:])
318		r.pending--
319		return
320	}
321	n, err := io.ReadFull(r.reader, r.pendBuf[:1])
322	if n != 1 {
323		return 0, err
324	}
325	return r.pendBuf[0], err
326}
327
328// ReadRune returns the next UTF-8 encoded code point from the
329// io.Reader inside r.
330func (r *readRune) ReadRune() (rr rune, size int, err error) {
331	if r.peekRune >= 0 {
332		rr = r.peekRune
333		r.peekRune = ^r.peekRune
334		size = utf8.RuneLen(rr)
335		return
336	}
337	r.buf[0], err = r.readByte()
338	if err != nil {
339		return
340	}
341	if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
342		rr = rune(r.buf[0])
343		size = 1 // Known to be 1.
344		// Flip the bits of the rune so it's available to UnreadRune.
345		r.peekRune = ^rr
346		return
347	}
348	var n int
349	for n = 1; !utf8.FullRune(r.buf[:n]); n++ {
350		r.buf[n], err = r.readByte()
351		if err != nil {
352			if err == io.EOF {
353				err = nil
354				break
355			}
356			return
357		}
358	}
359	rr, size = utf8.DecodeRune(r.buf[:n])
360	if size < n { // an error, save the bytes for the next read
361		copy(r.pendBuf[r.pending:], r.buf[size:n])
362		r.pending += n - size
363	}
364	// Flip the bits of the rune so it's available to UnreadRune.
365	r.peekRune = ^rr
366	return
367}
368
369func (r *readRune) UnreadRune() error {
370	if r.peekRune >= 0 {
371		return errors.New("fmt: scanning called UnreadRune with no rune available")
372	}
373	// Reverse bit flip of previously read rune to obtain valid >=0 state.
374	r.peekRune = ^r.peekRune
375	return nil
376}
377
378var ssFree = sync.Pool{
379	New: func() any { return new(ss) },
380}
381
382// newScanState allocates a new ss struct or grab a cached one.
383func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) {
384	s = ssFree.Get().(*ss)
385	if rs, ok := r.(io.RuneScanner); ok {
386		s.rs = rs
387	} else {
388		s.rs = &readRune{reader: r, peekRune: -1}
389	}
390	s.nlIsSpace = nlIsSpace
391	s.nlIsEnd = nlIsEnd
392	s.atEOF = false
393	s.limit = hugeWid
394	s.argLimit = hugeWid
395	s.maxWid = hugeWid
396	s.validSave = true
397	s.count = 0
398	return
399}
400
401// free saves used ss structs in ssFree; avoid an allocation per invocation.
402func (s *ss) free(old ssave) {
403	// If it was used recursively, just restore the old state.
404	if old.validSave {
405		s.ssave = old
406		return
407	}
408	// Don't hold on to ss structs with large buffers.
409	if cap(s.buf) > 1024 {
410		return
411	}
412	s.buf = s.buf[:0]
413	s.rs = nil
414	ssFree.Put(s)
415}
416
417// SkipSpace provides Scan methods the ability to skip space and newline
418// characters in keeping with the current scanning mode set by format strings
419// and [Scan]/[Scanln].
420func (s *ss) SkipSpace() {
421	for {
422		r := s.getRune()
423		if r == eof {
424			return
425		}
426		if r == '\r' && s.peek("\n") {
427			continue
428		}
429		if r == '\n' {
430			if s.nlIsSpace {
431				continue
432			}
433			s.errorString("unexpected newline")
434			return
435		}
436		if !isSpace(r) {
437			s.UnreadRune()
438			break
439		}
440	}
441}
442
443// token returns the next space-delimited string from the input. It
444// skips white space. For Scanln, it stops at newlines. For Scan,
445// newlines are treated as spaces.
446func (s *ss) token(skipSpace bool, f func(rune) bool) []byte {
447	if skipSpace {
448		s.SkipSpace()
449	}
450	// read until white space or newline
451	for {
452		r := s.getRune()
453		if r == eof {
454			break
455		}
456		if !f(r) {
457			s.UnreadRune()
458			break
459		}
460		s.buf.writeRune(r)
461	}
462	return s.buf
463}
464
465var errComplex = errors.New("syntax error scanning complex number")
466var errBool = errors.New("syntax error scanning boolean")
467
468func indexRune(s string, r rune) int {
469	for i, c := range s {
470		if c == r {
471			return i
472		}
473	}
474	return -1
475}
476
477// consume reads the next rune in the input and reports whether it is in the ok string.
478// If accept is true, it puts the character into the input token.
479func (s *ss) consume(ok string, accept bool) bool {
480	r := s.getRune()
481	if r == eof {
482		return false
483	}
484	if indexRune(ok, r) >= 0 {
485		if accept {
486			s.buf.writeRune(r)
487		}
488		return true
489	}
490	if r != eof && accept {
491		s.UnreadRune()
492	}
493	return false
494}
495
496// peek reports whether the next character is in the ok string, without consuming it.
497func (s *ss) peek(ok string) bool {
498	r := s.getRune()
499	if r != eof {
500		s.UnreadRune()
501	}
502	return indexRune(ok, r) >= 0
503}
504
505func (s *ss) notEOF() {
506	// Guarantee there is data to be read.
507	if r := s.getRune(); r == eof {
508		panic(io.EOF)
509	}
510	s.UnreadRune()
511}
512
513// accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
514// buffer and returns true. Otherwise it return false.
515func (s *ss) accept(ok string) bool {
516	return s.consume(ok, true)
517}
518
519// okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
520func (s *ss) okVerb(verb rune, okVerbs, typ string) bool {
521	for _, v := range okVerbs {
522		if v == verb {
523			return true
524		}
525	}
526	s.errorString("bad verb '%" + string(verb) + "' for " + typ)
527	return false
528}
529
530// scanBool returns the value of the boolean represented by the next token.
531func (s *ss) scanBool(verb rune) bool {
532	s.SkipSpace()
533	s.notEOF()
534	if !s.okVerb(verb, "tv", "boolean") {
535		return false
536	}
537	// Syntax-checking a boolean is annoying. We're not fastidious about case.
538	switch s.getRune() {
539	case '0':
540		return false
541	case '1':
542		return true
543	case 't', 'T':
544		if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
545			s.error(errBool)
546		}
547		return true
548	case 'f', 'F':
549		if s.accept("aA") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
550			s.error(errBool)
551		}
552		return false
553	}
554	return false
555}
556
557// Numerical elements
558const (
559	binaryDigits      = "01"
560	octalDigits       = "01234567"
561	decimalDigits     = "0123456789"
562	hexadecimalDigits = "0123456789aAbBcCdDeEfF"
563	sign              = "+-"
564	period            = "."
565	exponent          = "eEpP"
566)
567
568// getBase returns the numeric base represented by the verb and its digit string.
569func (s *ss) getBase(verb rune) (base int, digits string) {
570	s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
571	base = 10
572	digits = decimalDigits
573	switch verb {
574	case 'b':
575		base = 2
576		digits = binaryDigits
577	case 'o':
578		base = 8
579		digits = octalDigits
580	case 'x', 'X', 'U':
581		base = 16
582		digits = hexadecimalDigits
583	}
584	return
585}
586
587// scanNumber returns the numerical string with specified digits starting here.
588func (s *ss) scanNumber(digits string, haveDigits bool) string {
589	if !haveDigits {
590		s.notEOF()
591		if !s.accept(digits) {
592			s.errorString("expected integer")
593		}
594	}
595	for s.accept(digits) {
596	}
597	return string(s.buf)
598}
599
600// scanRune returns the next rune value in the input.
601func (s *ss) scanRune(bitSize int) int64 {
602	s.notEOF()
603	r := s.getRune()
604	n := uint(bitSize)
605	x := (int64(r) << (64 - n)) >> (64 - n)
606	if x != int64(r) {
607		s.errorString("overflow on character value " + string(r))
608	}
609	return int64(r)
610}
611
612// scanBasePrefix reports whether the integer begins with a base prefix
613// and returns the base, digit string, and whether a zero was found.
614// It is called only if the verb is %v.
615func (s *ss) scanBasePrefix() (base int, digits string, zeroFound bool) {
616	if !s.peek("0") {
617		return 0, decimalDigits + "_", false
618	}
619	s.accept("0")
620	// Special cases for 0, 0b, 0o, 0x.
621	switch {
622	case s.peek("bB"):
623		s.consume("bB", true)
624		return 0, binaryDigits + "_", true
625	case s.peek("oO"):
626		s.consume("oO", true)
627		return 0, octalDigits + "_", true
628	case s.peek("xX"):
629		s.consume("xX", true)
630		return 0, hexadecimalDigits + "_", true
631	default:
632		return 0, octalDigits + "_", true
633	}
634}
635
636// scanInt returns the value of the integer represented by the next
637// token, checking for overflow. Any error is stored in s.err.
638func (s *ss) scanInt(verb rune, bitSize int) int64 {
639	if verb == 'c' {
640		return s.scanRune(bitSize)
641	}
642	s.SkipSpace()
643	s.notEOF()
644	base, digits := s.getBase(verb)
645	haveDigits := false
646	if verb == 'U' {
647		if !s.consume("U", false) || !s.consume("+", false) {
648			s.errorString("bad unicode format ")
649		}
650	} else {
651		s.accept(sign) // If there's a sign, it will be left in the token buffer.
652		if verb == 'v' {
653			base, digits, haveDigits = s.scanBasePrefix()
654		}
655	}
656	tok := s.scanNumber(digits, haveDigits)
657	i, err := strconv.ParseInt(tok, base, 64)
658	if err != nil {
659		s.error(err)
660	}
661	n := uint(bitSize)
662	x := (i << (64 - n)) >> (64 - n)
663	if x != i {
664		s.errorString("integer overflow on token " + tok)
665	}
666	return i
667}
668
669// scanUint returns the value of the unsigned integer represented
670// by the next token, checking for overflow. Any error is stored in s.err.
671func (s *ss) scanUint(verb rune, bitSize int) uint64 {
672	if verb == 'c' {
673		return uint64(s.scanRune(bitSize))
674	}
675	s.SkipSpace()
676	s.notEOF()
677	base, digits := s.getBase(verb)
678	haveDigits := false
679	if verb == 'U' {
680		if !s.consume("U", false) || !s.consume("+", false) {
681			s.errorString("bad unicode format ")
682		}
683	} else if verb == 'v' {
684		base, digits, haveDigits = s.scanBasePrefix()
685	}
686	tok := s.scanNumber(digits, haveDigits)
687	i, err := strconv.ParseUint(tok, base, 64)
688	if err != nil {
689		s.error(err)
690	}
691	n := uint(bitSize)
692	x := (i << (64 - n)) >> (64 - n)
693	if x != i {
694		s.errorString("unsigned integer overflow on token " + tok)
695	}
696	return i
697}
698
699// floatToken returns the floating-point number starting here, no longer than swid
700// if the width is specified. It's not rigorous about syntax because it doesn't check that
701// we have at least some digits, but Atof will do that.
702func (s *ss) floatToken() string {
703	s.buf = s.buf[:0]
704	// NaN?
705	if s.accept("nN") && s.accept("aA") && s.accept("nN") {
706		return string(s.buf)
707	}
708	// leading sign?
709	s.accept(sign)
710	// Inf?
711	if s.accept("iI") && s.accept("nN") && s.accept("fF") {
712		return string(s.buf)
713	}
714	digits := decimalDigits + "_"
715	exp := exponent
716	if s.accept("0") && s.accept("xX") {
717		digits = hexadecimalDigits + "_"
718		exp = "pP"
719	}
720	// digits?
721	for s.accept(digits) {
722	}
723	// decimal point?
724	if s.accept(period) {
725		// fraction?
726		for s.accept(digits) {
727		}
728	}
729	// exponent?
730	if s.accept(exp) {
731		// leading sign?
732		s.accept(sign)
733		// digits?
734		for s.accept(decimalDigits + "_") {
735		}
736	}
737	return string(s.buf)
738}
739
740// complexTokens returns the real and imaginary parts of the complex number starting here.
741// The number might be parenthesized and has the format (N+Ni) where N is a floating-point
742// number and there are no spaces within.
743func (s *ss) complexTokens() (real, imag string) {
744	// TODO: accept N and Ni independently?
745	parens := s.accept("(")
746	real = s.floatToken()
747	s.buf = s.buf[:0]
748	// Must now have a sign.
749	if !s.accept("+-") {
750		s.error(errComplex)
751	}
752	// Sign is now in buffer
753	imagSign := string(s.buf)
754	imag = s.floatToken()
755	if !s.accept("i") {
756		s.error(errComplex)
757	}
758	if parens && !s.accept(")") {
759		s.error(errComplex)
760	}
761	return real, imagSign + imag
762}
763
764func hasX(s string) bool {
765	for i := 0; i < len(s); i++ {
766		if s[i] == 'x' || s[i] == 'X' {
767			return true
768		}
769	}
770	return false
771}
772
773// convertFloat converts the string to a float64value.
774func (s *ss) convertFloat(str string, n int) float64 {
775	// strconv.ParseFloat will handle "+0x1.fp+2",
776	// but we have to implement our non-standard
777	// decimal+binary exponent mix (1.2p4) ourselves.
778	if p := indexRune(str, 'p'); p >= 0 && !hasX(str) {
779		// Atof doesn't handle power-of-2 exponents,
780		// but they're easy to evaluate.
781		f, err := strconv.ParseFloat(str[:p], n)
782		if err != nil {
783			// Put full string into error.
784			if e, ok := err.(*strconv.NumError); ok {
785				e.Num = str
786			}
787			s.error(err)
788		}
789		m, err := strconv.Atoi(str[p+1:])
790		if err != nil {
791			// Put full string into error.
792			if e, ok := err.(*strconv.NumError); ok {
793				e.Num = str
794			}
795			s.error(err)
796		}
797		return math.Ldexp(f, m)
798	}
799	f, err := strconv.ParseFloat(str, n)
800	if err != nil {
801		s.error(err)
802	}
803	return f
804}
805
806// scanComplex converts the next token to a complex128 value.
807// The atof argument is a type-specific reader for the underlying type.
808// If we're reading complex64, atof will parse float32s and convert them
809// to float64's to avoid reproducing this code for each complex type.
810func (s *ss) scanComplex(verb rune, n int) complex128 {
811	if !s.okVerb(verb, floatVerbs, "complex") {
812		return 0
813	}
814	s.SkipSpace()
815	s.notEOF()
816	sreal, simag := s.complexTokens()
817	real := s.convertFloat(sreal, n/2)
818	imag := s.convertFloat(simag, n/2)
819	return complex(real, imag)
820}
821
822// convertString returns the string represented by the next input characters.
823// The format of the input is determined by the verb.
824func (s *ss) convertString(verb rune) (str string) {
825	if !s.okVerb(verb, "svqxX", "string") {
826		return ""
827	}
828	s.SkipSpace()
829	s.notEOF()
830	switch verb {
831	case 'q':
832		str = s.quotedString()
833	case 'x', 'X':
834		str = s.hexString()
835	default:
836		str = string(s.token(true, notSpace)) // %s and %v just return the next word
837	}
838	return
839}
840
841// quotedString returns the double- or back-quoted string represented by the next input characters.
842func (s *ss) quotedString() string {
843	s.notEOF()
844	quote := s.getRune()
845	switch quote {
846	case '`':
847		// Back-quoted: Anything goes until EOF or back quote.
848		for {
849			r := s.mustReadRune()
850			if r == quote {
851				break
852			}
853			s.buf.writeRune(r)
854		}
855		return string(s.buf)
856	case '"':
857		// Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
858		s.buf.writeByte('"')
859		for {
860			r := s.mustReadRune()
861			s.buf.writeRune(r)
862			if r == '\\' {
863				// In a legal backslash escape, no matter how long, only the character
864				// immediately after the escape can itself be a backslash or quote.
865				// Thus we only need to protect the first character after the backslash.
866				s.buf.writeRune(s.mustReadRune())
867			} else if r == '"' {
868				break
869			}
870		}
871		result, err := strconv.Unquote(string(s.buf))
872		if err != nil {
873			s.error(err)
874		}
875		return result
876	default:
877		s.errorString("expected quoted string")
878	}
879	return ""
880}
881
882// hexDigit returns the value of the hexadecimal digit.
883func hexDigit(d rune) (int, bool) {
884	digit := int(d)
885	switch digit {
886	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
887		return digit - '0', true
888	case 'a', 'b', 'c', 'd', 'e', 'f':
889		return 10 + digit - 'a', true
890	case 'A', 'B', 'C', 'D', 'E', 'F':
891		return 10 + digit - 'A', true
892	}
893	return -1, false
894}
895
896// hexByte returns the next hex-encoded (two-character) byte from the input.
897// It returns ok==false if the next bytes in the input do not encode a hex byte.
898// If the first byte is hex and the second is not, processing stops.
899func (s *ss) hexByte() (b byte, ok bool) {
900	rune1 := s.getRune()
901	if rune1 == eof {
902		return
903	}
904	value1, ok := hexDigit(rune1)
905	if !ok {
906		s.UnreadRune()
907		return
908	}
909	value2, ok := hexDigit(s.mustReadRune())
910	if !ok {
911		s.errorString("illegal hex digit")
912		return
913	}
914	return byte(value1<<4 | value2), true
915}
916
917// hexString returns the space-delimited hexpair-encoded string.
918func (s *ss) hexString() string {
919	s.notEOF()
920	for {
921		b, ok := s.hexByte()
922		if !ok {
923			break
924		}
925		s.buf.writeByte(b)
926	}
927	if len(s.buf) == 0 {
928		s.errorString("no hex data for %x string")
929		return ""
930	}
931	return string(s.buf)
932}
933
934const (
935	floatVerbs = "beEfFgGv"
936
937	hugeWid = 1 << 30
938
939	intBits     = 32 << (^uint(0) >> 63)
940	uintptrBits = 32 << (^uintptr(0) >> 63)
941)
942
943// scanPercent scans a literal percent character.
944func (s *ss) scanPercent() {
945	s.SkipSpace()
946	s.notEOF()
947	if !s.accept("%") {
948		s.errorString("missing literal %")
949	}
950}
951
952// scanOne scans a single value, deriving the scanner from the type of the argument.
953func (s *ss) scanOne(verb rune, arg any) {
954	s.buf = s.buf[:0]
955	var err error
956	// If the parameter has its own Scan method, use that.
957	if v, ok := arg.(Scanner); ok {
958		err = v.Scan(s, verb)
959		if err != nil {
960			if err == io.EOF {
961				err = io.ErrUnexpectedEOF
962			}
963			s.error(err)
964		}
965		return
966	}
967
968	switch v := arg.(type) {
969	case *bool:
970		*v = s.scanBool(verb)
971	case *complex64:
972		*v = complex64(s.scanComplex(verb, 64))
973	case *complex128:
974		*v = s.scanComplex(verb, 128)
975	case *int:
976		*v = int(s.scanInt(verb, intBits))
977	case *int8:
978		*v = int8(s.scanInt(verb, 8))
979	case *int16:
980		*v = int16(s.scanInt(verb, 16))
981	case *int32:
982		*v = int32(s.scanInt(verb, 32))
983	case *int64:
984		*v = s.scanInt(verb, 64)
985	case *uint:
986		*v = uint(s.scanUint(verb, intBits))
987	case *uint8:
988		*v = uint8(s.scanUint(verb, 8))
989	case *uint16:
990		*v = uint16(s.scanUint(verb, 16))
991	case *uint32:
992		*v = uint32(s.scanUint(verb, 32))
993	case *uint64:
994		*v = s.scanUint(verb, 64)
995	case *uintptr:
996		*v = uintptr(s.scanUint(verb, uintptrBits))
997	// Floats are tricky because you want to scan in the precision of the result, not
998	// scan in high precision and convert, in order to preserve the correct error condition.
999	case *float32:
1000		if s.okVerb(verb, floatVerbs, "float32") {
1001			s.SkipSpace()
1002			s.notEOF()
1003			*v = float32(s.convertFloat(s.floatToken(), 32))
1004		}
1005	case *float64:
1006		if s.okVerb(verb, floatVerbs, "float64") {
1007			s.SkipSpace()
1008			s.notEOF()
1009			*v = s.convertFloat(s.floatToken(), 64)
1010		}
1011	case *string:
1012		*v = s.convertString(verb)
1013	case *[]byte:
1014		// We scan to string and convert so we get a copy of the data.
1015		// If we scanned to bytes, the slice would point at the buffer.
1016		*v = []byte(s.convertString(verb))
1017	default:
1018		val := reflect.ValueOf(v)
1019		ptr := val
1020		if ptr.Kind() != reflect.Pointer {
1021			s.errorString("type not a pointer: " + val.Type().String())
1022			return
1023		}
1024		switch v := ptr.Elem(); v.Kind() {
1025		case reflect.Bool:
1026			v.SetBool(s.scanBool(verb))
1027		case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
1028			v.SetInt(s.scanInt(verb, v.Type().Bits()))
1029		case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
1030			v.SetUint(s.scanUint(verb, v.Type().Bits()))
1031		case reflect.String:
1032			v.SetString(s.convertString(verb))
1033		case reflect.Slice:
1034			// For now, can only handle (renamed) []byte.
1035			typ := v.Type()
1036			if typ.Elem().Kind() != reflect.Uint8 {
1037				s.errorString("can't scan type: " + val.Type().String())
1038			}
1039			str := s.convertString(verb)
1040			v.Set(reflect.MakeSlice(typ, len(str), len(str)))
1041			for i := 0; i < len(str); i++ {
1042				v.Index(i).SetUint(uint64(str[i]))
1043			}
1044		case reflect.Float32, reflect.Float64:
1045			s.SkipSpace()
1046			s.notEOF()
1047			v.SetFloat(s.convertFloat(s.floatToken(), v.Type().Bits()))
1048		case reflect.Complex64, reflect.Complex128:
1049			v.SetComplex(s.scanComplex(verb, v.Type().Bits()))
1050		default:
1051			s.errorString("can't scan type: " + val.Type().String())
1052		}
1053	}
1054}
1055
1056// errorHandler turns local panics into error returns.
1057func errorHandler(errp *error) {
1058	if e := recover(); e != nil {
1059		if se, ok := e.(scanError); ok { // catch local error
1060			*errp = se.err
1061		} else if eof, ok := e.(error); ok && eof == io.EOF { // out of input
1062			*errp = eof
1063		} else {
1064			panic(e)
1065		}
1066	}
1067}
1068
1069// doScan does the real work for scanning without a format string.
1070func (s *ss) doScan(a []any) (numProcessed int, err error) {
1071	defer errorHandler(&err)
1072	for _, arg := range a {
1073		s.scanOne('v', arg)
1074		numProcessed++
1075	}
1076	// Check for newline (or EOF) if required (Scanln etc.).
1077	if s.nlIsEnd {
1078		for {
1079			r := s.getRune()
1080			if r == '\n' || r == eof {
1081				break
1082			}
1083			if !isSpace(r) {
1084				s.errorString("expected newline")
1085				break
1086			}
1087		}
1088	}
1089	return
1090}
1091
1092// advance determines whether the next characters in the input match
1093// those of the format. It returns the number of bytes (sic) consumed
1094// in the format. All runs of space characters in either input or
1095// format behave as a single space. Newlines are special, though:
1096// newlines in the format must match those in the input and vice versa.
1097// This routine also handles the %% case. If the return value is zero,
1098// either format starts with a % (with no following %) or the input
1099// is empty. If it is negative, the input did not match the string.
1100func (s *ss) advance(format string) (i int) {
1101	for i < len(format) {
1102		fmtc, w := utf8.DecodeRuneInString(format[i:])
1103
1104		// Space processing.
1105		// In the rest of this comment "space" means spaces other than newline.
1106		// Newline in the format matches input of zero or more spaces and then newline or end-of-input.
1107		// Spaces in the format before the newline are collapsed into the newline.
1108		// Spaces in the format after the newline match zero or more spaces after the corresponding input newline.
1109		// Other spaces in the format match input of one or more spaces or end-of-input.
1110		if isSpace(fmtc) {
1111			newlines := 0
1112			trailingSpace := false
1113			for isSpace(fmtc) && i < len(format) {
1114				if fmtc == '\n' {
1115					newlines++
1116					trailingSpace = false
1117				} else {
1118					trailingSpace = true
1119				}
1120				i += w
1121				fmtc, w = utf8.DecodeRuneInString(format[i:])
1122			}
1123			for j := 0; j < newlines; j++ {
1124				inputc := s.getRune()
1125				for isSpace(inputc) && inputc != '\n' {
1126					inputc = s.getRune()
1127				}
1128				if inputc != '\n' && inputc != eof {
1129					s.errorString("newline in format does not match input")
1130				}
1131			}
1132			if trailingSpace {
1133				inputc := s.getRune()
1134				if newlines == 0 {
1135					// If the trailing space stood alone (did not follow a newline),
1136					// it must find at least one space to consume.
1137					if !isSpace(inputc) && inputc != eof {
1138						s.errorString("expected space in input to match format")
1139					}
1140					if inputc == '\n' {
1141						s.errorString("newline in input does not match format")
1142					}
1143				}
1144				for isSpace(inputc) && inputc != '\n' {
1145					inputc = s.getRune()
1146				}
1147				if inputc != eof {
1148					s.UnreadRune()
1149				}
1150			}
1151			continue
1152		}
1153
1154		// Verbs.
1155		if fmtc == '%' {
1156			// % at end of string is an error.
1157			if i+w == len(format) {
1158				s.errorString("missing verb: % at end of format string")
1159			}
1160			// %% acts like a real percent
1161			nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
1162			if nextc != '%' {
1163				return
1164			}
1165			i += w // skip the first %
1166		}
1167
1168		// Literals.
1169		inputc := s.mustReadRune()
1170		if fmtc != inputc {
1171			s.UnreadRune()
1172			return -1
1173		}
1174		i += w
1175	}
1176	return
1177}
1178
1179// doScanf does the real work when scanning with a format string.
1180// At the moment, it handles only pointers to basic types.
1181func (s *ss) doScanf(format string, a []any) (numProcessed int, err error) {
1182	defer errorHandler(&err)
1183	end := len(format) - 1
1184	// We process one item per non-trivial format
1185	for i := 0; i <= end; {
1186		w := s.advance(format[i:])
1187		if w > 0 {
1188			i += w
1189			continue
1190		}
1191		// Either we failed to advance, we have a percent character, or we ran out of input.
1192		if format[i] != '%' {
1193			// Can't advance format. Why not?
1194			if w < 0 {
1195				s.errorString("input does not match format")
1196			}
1197			// Otherwise at EOF; "too many operands" error handled below
1198			break
1199		}
1200		i++ // % is one byte
1201
1202		// do we have 20 (width)?
1203		var widPresent bool
1204		s.maxWid, widPresent, i = parsenum(format, i, end)
1205		if !widPresent {
1206			s.maxWid = hugeWid
1207		}
1208
1209		c, w := utf8.DecodeRuneInString(format[i:])
1210		i += w
1211
1212		if c != 'c' {
1213			s.SkipSpace()
1214		}
1215		if c == '%' {
1216			s.scanPercent()
1217			continue // Do not consume an argument.
1218		}
1219		s.argLimit = s.limit
1220		if f := s.count + s.maxWid; f < s.argLimit {
1221			s.argLimit = f
1222		}
1223
1224		if numProcessed >= len(a) { // out of operands
1225			s.errorString("too few operands for format '%" + format[i-w:] + "'")
1226			break
1227		}
1228		arg := a[numProcessed]
1229
1230		s.scanOne(c, arg)
1231		numProcessed++
1232		s.argLimit = s.limit
1233	}
1234	if numProcessed < len(a) {
1235		s.errorString("too many operands")
1236	}
1237	return
1238}
1239