1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package lex
6
7import (
8	"go/build/constraint"
9	"io"
10	"os"
11	"strings"
12	"text/scanner"
13	"unicode"
14
15	"cmd/asm/internal/flags"
16	"cmd/internal/objabi"
17	"cmd/internal/src"
18)
19
20// A Tokenizer is a simple wrapping of text/scanner.Scanner, configured
21// for our purposes and made a TokenReader. It forms the lowest level,
22// turning text from readers into tokens.
23type Tokenizer struct {
24	tok  ScanToken
25	s    *scanner.Scanner
26	base *src.PosBase
27	line int
28	file *os.File // If non-nil, file descriptor to close.
29}
30
31func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer {
32	var s scanner.Scanner
33	s.Init(r)
34	// Newline is like a semicolon; other space characters are fine.
35	s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' '
36	// Don't skip comments: we need to count newlines.
37	s.Mode = scanner.ScanChars |
38		scanner.ScanFloats |
39		scanner.ScanIdents |
40		scanner.ScanInts |
41		scanner.ScanStrings |
42		scanner.ScanComments
43	s.Position.Filename = name
44	s.IsIdentRune = isIdentRune
45	return &Tokenizer{
46		s:    &s,
47		base: src.NewFileBase(name, objabi.AbsFile(objabi.WorkingDir(), name, *flags.TrimPath)),
48		line: 1,
49		file: file,
50	}
51}
52
53// We want center dot (·) and division slash (∕) to work as identifier characters.
54func isIdentRune(ch rune, i int) bool {
55	if unicode.IsLetter(ch) {
56		return true
57	}
58	switch ch {
59	case '_': // Underscore; traditional.
60		return true
61	case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot
62		return true
63	case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash
64		return true
65	}
66	// Digits are OK only after the first character.
67	return i > 0 && unicode.IsDigit(ch)
68}
69
70func (t *Tokenizer) Text() string {
71	switch t.tok {
72	case LSH:
73		return "<<"
74	case RSH:
75		return ">>"
76	case ARR:
77		return "->"
78	case ROT:
79		return "@>"
80	}
81	return t.s.TokenText()
82}
83
84func (t *Tokenizer) File() string {
85	return t.base.Filename()
86}
87
88func (t *Tokenizer) Base() *src.PosBase {
89	return t.base
90}
91
92func (t *Tokenizer) SetBase(base *src.PosBase) {
93	t.base = base
94}
95
96func (t *Tokenizer) Line() int {
97	return t.line
98}
99
100func (t *Tokenizer) Col() int {
101	return t.s.Pos().Column
102}
103
104func (t *Tokenizer) Next() ScanToken {
105	s := t.s
106	for {
107		t.tok = ScanToken(s.Scan())
108		if t.tok != scanner.Comment {
109			break
110		}
111		text := s.TokenText()
112		t.line += strings.Count(text, "\n")
113		if constraint.IsGoBuild(text) {
114			t.tok = BuildComment
115			break
116		}
117	}
118	switch t.tok {
119	case '\n':
120		t.line++
121	case '-':
122		if s.Peek() == '>' {
123			s.Next()
124			t.tok = ARR
125			return ARR
126		}
127	case '@':
128		if s.Peek() == '>' {
129			s.Next()
130			t.tok = ROT
131			return ROT
132		}
133	case '<':
134		if s.Peek() == '<' {
135			s.Next()
136			t.tok = LSH
137			return LSH
138		}
139	case '>':
140		if s.Peek() == '>' {
141			s.Next()
142			t.tok = RSH
143			return RSH
144		}
145	}
146	return t.tok
147}
148
149func (t *Tokenizer) Close() {
150	if t.file != nil {
151		t.file.Close()
152	}
153}
154