xref: /aosp_15_r20/external/licenseclassifier/commentparser/comment_parser.go (revision 46c4c49da23cae783fa41bf46525a6505638499a)
1*46c4c49dSIbrahim Kanouche// Copyright 2017 Google Inc.
2*46c4c49dSIbrahim Kanouche//
3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License");
4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License.
5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at
6*46c4c49dSIbrahim Kanouche//
7*46c4c49dSIbrahim Kanouche//     http://www.apache.org/licenses/LICENSE-2.0
8*46c4c49dSIbrahim Kanouche//
9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software
10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS,
11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and
13*46c4c49dSIbrahim Kanouche// limitations under the License.
14*46c4c49dSIbrahim Kanouche
15*46c4c49dSIbrahim Kanouche// Package commentparser does a basic parse over a source file and returns all
16*46c4c49dSIbrahim Kanouche// of the comments from the code. This is useful for when you want to analyze
17*46c4c49dSIbrahim Kanouche// text written in comments (like copyright notices) but not in the code
18*46c4c49dSIbrahim Kanouche// itself.
19*46c4c49dSIbrahim Kanouchepackage commentparser
20*46c4c49dSIbrahim Kanouche
21*46c4c49dSIbrahim Kanoucheimport (
22*46c4c49dSIbrahim Kanouche	"bytes"
23*46c4c49dSIbrahim Kanouche	"strings"
24*46c4c49dSIbrahim Kanouche	"unicode/utf8"
25*46c4c49dSIbrahim Kanouche
26*46c4c49dSIbrahim Kanouche	"github.com/google/licenseclassifier/commentparser/language"
27*46c4c49dSIbrahim Kanouche)
28*46c4c49dSIbrahim Kanouche
29*46c4c49dSIbrahim Kanoucheconst (
30*46c4c49dSIbrahim Kanouche	eofInString            = "%d:EOF in string"
31*46c4c49dSIbrahim Kanouche	eofInSingleLineComment = "%d:EOF in single line comment"
32*46c4c49dSIbrahim Kanouche	eofInMultilineComment  = "%d:EOF in multiline comment"
33*46c4c49dSIbrahim Kanouche)
34*46c4c49dSIbrahim Kanouche
35*46c4c49dSIbrahim Kanouche// Parse parses the input data and returns the comments.
36*46c4c49dSIbrahim Kanouchefunc Parse(contents []byte, lang language.Language) Comments {
37*46c4c49dSIbrahim Kanouche	if len(contents) == 0 {
38*46c4c49dSIbrahim Kanouche		return nil
39*46c4c49dSIbrahim Kanouche	}
40*46c4c49dSIbrahim Kanouche
41*46c4c49dSIbrahim Kanouche	c := string(contents)
42*46c4c49dSIbrahim Kanouche	if !strings.HasSuffix(c, "\n") {
43*46c4c49dSIbrahim Kanouche		// Force a terminating newline if one isn't present.
44*46c4c49dSIbrahim Kanouche		c += "\n"
45*46c4c49dSIbrahim Kanouche	}
46*46c4c49dSIbrahim Kanouche	i := &input{
47*46c4c49dSIbrahim Kanouche		s:      c,
48*46c4c49dSIbrahim Kanouche		lang:   lang,
49*46c4c49dSIbrahim Kanouche		offset: 0,
50*46c4c49dSIbrahim Kanouche		pos:    position{line: 1, lineRune: []int{0}},
51*46c4c49dSIbrahim Kanouche	}
52*46c4c49dSIbrahim Kanouche	i.lex()
53*46c4c49dSIbrahim Kanouche	return i.comments
54*46c4c49dSIbrahim Kanouche}
55*46c4c49dSIbrahim Kanouche
56*46c4c49dSIbrahim Kanouche// Comment is either a single line or multiline comment in a source code file.
57*46c4c49dSIbrahim Kanouche// A single line comment has StartLine equal to EndLine. The lines are 1-based.
58*46c4c49dSIbrahim Kanouchetype Comment struct {
59*46c4c49dSIbrahim Kanouche	StartLine int
60*46c4c49dSIbrahim Kanouche	EndLine   int
61*46c4c49dSIbrahim Kanouche	Text      string
62*46c4c49dSIbrahim Kanouche}
63*46c4c49dSIbrahim Kanouche
64*46c4c49dSIbrahim Kanouche// Comments allows us to treat a slice of comments as a unit.
65*46c4c49dSIbrahim Kanouchetype Comments []*Comment
66*46c4c49dSIbrahim Kanouche
67*46c4c49dSIbrahim Kanouche// ChunkIterator returns a read-only channel and generates the comments in a
68*46c4c49dSIbrahim Kanouche// goroutine, then closes the channel.
69*46c4c49dSIbrahim Kanouchefunc (c Comments) ChunkIterator() <-chan Comments {
70*46c4c49dSIbrahim Kanouche	ch := make(chan Comments)
71*46c4c49dSIbrahim Kanouche	go func() {
72*46c4c49dSIbrahim Kanouche		defer close(ch)
73*46c4c49dSIbrahim Kanouche
74*46c4c49dSIbrahim Kanouche		if len(c) == 0 {
75*46c4c49dSIbrahim Kanouche			return
76*46c4c49dSIbrahim Kanouche		}
77*46c4c49dSIbrahim Kanouche
78*46c4c49dSIbrahim Kanouche		prevChunk := c[0]
79*46c4c49dSIbrahim Kanouche		for index := 0; index < len(c); index++ {
80*46c4c49dSIbrahim Kanouche			var chunk Comments
81*46c4c49dSIbrahim Kanouche			for ; index < len(c); index++ {
82*46c4c49dSIbrahim Kanouche				if c[index].StartLine > prevChunk.StartLine+1 {
83*46c4c49dSIbrahim Kanouche					break
84*46c4c49dSIbrahim Kanouche				}
85*46c4c49dSIbrahim Kanouche				if c[index].StartLine == prevChunk.StartLine+2 {
86*46c4c49dSIbrahim Kanouche					if c[index].StartLine != c[index].EndLine || prevChunk.StartLine != prevChunk.EndLine {
87*46c4c49dSIbrahim Kanouche						break
88*46c4c49dSIbrahim Kanouche					}
89*46c4c49dSIbrahim Kanouche				}
90*46c4c49dSIbrahim Kanouche				chunk = append(chunk, c[index])
91*46c4c49dSIbrahim Kanouche				prevChunk = c[index]
92*46c4c49dSIbrahim Kanouche			}
93*46c4c49dSIbrahim Kanouche			if len(chunk) == 0 {
94*46c4c49dSIbrahim Kanouche				break
95*46c4c49dSIbrahim Kanouche			}
96*46c4c49dSIbrahim Kanouche
97*46c4c49dSIbrahim Kanouche			ch <- chunk
98*46c4c49dSIbrahim Kanouche			if index >= len(c) {
99*46c4c49dSIbrahim Kanouche				break
100*46c4c49dSIbrahim Kanouche			}
101*46c4c49dSIbrahim Kanouche
102*46c4c49dSIbrahim Kanouche			prevChunk = c[index]
103*46c4c49dSIbrahim Kanouche			index--
104*46c4c49dSIbrahim Kanouche		}
105*46c4c49dSIbrahim Kanouche	}()
106*46c4c49dSIbrahim Kanouche	return ch
107*46c4c49dSIbrahim Kanouche}
108*46c4c49dSIbrahim Kanouche
109*46c4c49dSIbrahim Kanouche// StartLine is the line number (1-based) the first part of the comment block
110*46c4c49dSIbrahim Kanouche// starts on.
111*46c4c49dSIbrahim Kanouchefunc (c Comments) StartLine() int {
112*46c4c49dSIbrahim Kanouche	if len(c) == 0 {
113*46c4c49dSIbrahim Kanouche		return 0
114*46c4c49dSIbrahim Kanouche	}
115*46c4c49dSIbrahim Kanouche	return c[0].StartLine
116*46c4c49dSIbrahim Kanouche}
117*46c4c49dSIbrahim Kanouche
118*46c4c49dSIbrahim Kanouche// String creates a string out of the text of the comments. Comment begin and
119*46c4c49dSIbrahim Kanouche// end markers are removed.
120*46c4c49dSIbrahim Kanouchefunc (c Comments) String() string {
121*46c4c49dSIbrahim Kanouche	var s []string
122*46c4c49dSIbrahim Kanouche	for _, cmt := range c {
123*46c4c49dSIbrahim Kanouche		s = append(s, cmt.Text)
124*46c4c49dSIbrahim Kanouche	}
125*46c4c49dSIbrahim Kanouche	return strings.Join(s, "\n")
126*46c4c49dSIbrahim Kanouche}
127*46c4c49dSIbrahim Kanouche
128*46c4c49dSIbrahim Kanouche// position records the location of a lexeme.
129*46c4c49dSIbrahim Kanouchetype position struct {
130*46c4c49dSIbrahim Kanouche	line     int   // Line number of input: 1-based
131*46c4c49dSIbrahim Kanouche	lineRune []int // Rune offset from beginning of line: 0-based
132*46c4c49dSIbrahim Kanouche}
133*46c4c49dSIbrahim Kanouche
134*46c4c49dSIbrahim Kanouche// input holds the current state of the lexer.
135*46c4c49dSIbrahim Kanouchetype input struct {
136*46c4c49dSIbrahim Kanouche	s        string            // Entire input.
137*46c4c49dSIbrahim Kanouche	lang     language.Language // Source code language.
138*46c4c49dSIbrahim Kanouche	offset   int               // Offset into input.
139*46c4c49dSIbrahim Kanouche	pos      position          // Current position in the input.
140*46c4c49dSIbrahim Kanouche	comments Comments          // Comments in the source file.
141*46c4c49dSIbrahim Kanouche}
142*46c4c49dSIbrahim Kanouche
143*46c4c49dSIbrahim Kanouche// lex is called to obtain the comments.
144*46c4c49dSIbrahim Kanouchefunc (i *input) lex() {
145*46c4c49dSIbrahim Kanouche	for {
146*46c4c49dSIbrahim Kanouche		c, ok := i.peekRune()
147*46c4c49dSIbrahim Kanouche		if !ok {
148*46c4c49dSIbrahim Kanouche			break
149*46c4c49dSIbrahim Kanouche		}
150*46c4c49dSIbrahim Kanouche
151*46c4c49dSIbrahim Kanouche		switch c {
152*46c4c49dSIbrahim Kanouche		case '"', '\'', '`': // String
153*46c4c49dSIbrahim Kanouche			// Ignore strings because they could contain comment
154*46c4c49dSIbrahim Kanouche			// start or end sequences which we need to ignore.
155*46c4c49dSIbrahim Kanouche			if i.lang == language.HTML {
156*46c4c49dSIbrahim Kanouche				// Quotes in HTML-like files aren't meaningful,
157*46c4c49dSIbrahim Kanouche				// because it's basically plain text
158*46c4c49dSIbrahim Kanouche				break
159*46c4c49dSIbrahim Kanouche			}
160*46c4c49dSIbrahim Kanouche
161*46c4c49dSIbrahim Kanouche			ok, hasEscape := i.lang.QuoteCharacter(c)
162*46c4c49dSIbrahim Kanouche			if !ok {
163*46c4c49dSIbrahim Kanouche				break
164*46c4c49dSIbrahim Kanouche			}
165*46c4c49dSIbrahim Kanouche
166*46c4c49dSIbrahim Kanouche			var content bytes.Buffer
167*46c4c49dSIbrahim Kanouche			isDocString := false
168*46c4c49dSIbrahim Kanouche			quote := string(c)
169*46c4c49dSIbrahim Kanouche			if i.lang == language.Python {
170*46c4c49dSIbrahim Kanouche				if c == '\'' && i.match("'''") {
171*46c4c49dSIbrahim Kanouche					quote = "'''"
172*46c4c49dSIbrahim Kanouche					// Assume module-level docstrings start at the
173*46c4c49dSIbrahim Kanouche					// beginning of a line.  Function docstrings not
174*46c4c49dSIbrahim Kanouche					// supported.
175*46c4c49dSIbrahim Kanouche					if i.pos.lineRune[len(i.pos.lineRune)-1] == 3 {
176*46c4c49dSIbrahim Kanouche						isDocString = true
177*46c4c49dSIbrahim Kanouche					}
178*46c4c49dSIbrahim Kanouche				} else if c == '"' && i.match(`"""`) {
179*46c4c49dSIbrahim Kanouche					quote = `"""`
180*46c4c49dSIbrahim Kanouche					if i.pos.lineRune[len(i.pos.lineRune)-1] == 3 {
181*46c4c49dSIbrahim Kanouche						isDocString = true
182*46c4c49dSIbrahim Kanouche					}
183*46c4c49dSIbrahim Kanouche				} else {
184*46c4c49dSIbrahim Kanouche					i.readRune() // Eat quote.
185*46c4c49dSIbrahim Kanouche				}
186*46c4c49dSIbrahim Kanouche			} else {
187*46c4c49dSIbrahim Kanouche				i.readRune() // Eat quote.
188*46c4c49dSIbrahim Kanouche			}
189*46c4c49dSIbrahim Kanouche
190*46c4c49dSIbrahim Kanouche			startLine := i.pos.line
191*46c4c49dSIbrahim Kanouche			for {
192*46c4c49dSIbrahim Kanouche				c, ok = i.peekRune()
193*46c4c49dSIbrahim Kanouche				if !ok {
194*46c4c49dSIbrahim Kanouche					return
195*46c4c49dSIbrahim Kanouche				}
196*46c4c49dSIbrahim Kanouche				if hasEscape && c == '\\' {
197*46c4c49dSIbrahim Kanouche					i.readRune() // Eat escape.
198*46c4c49dSIbrahim Kanouche				} else if i.match(quote) {
199*46c4c49dSIbrahim Kanouche					break
200*46c4c49dSIbrahim Kanouche				} else if (i.lang == language.JavaScript || i.lang == language.Perl) && c == '\n' {
201*46c4c49dSIbrahim Kanouche					// JavaScript and Perl allow you to
202*46c4c49dSIbrahim Kanouche					// specify regexes without quotes, but
203*46c4c49dSIbrahim Kanouche					// which contain quotes. So treat the
204*46c4c49dSIbrahim Kanouche					// newline as terminating the string.
205*46c4c49dSIbrahim Kanouche					break
206*46c4c49dSIbrahim Kanouche				}
207*46c4c49dSIbrahim Kanouche				c := i.readRune()
208*46c4c49dSIbrahim Kanouche				if isDocString {
209*46c4c49dSIbrahim Kanouche					content.WriteRune(c)
210*46c4c49dSIbrahim Kanouche				}
211*46c4c49dSIbrahim Kanouche				if i.eof() {
212*46c4c49dSIbrahim Kanouche					return
213*46c4c49dSIbrahim Kanouche				}
214*46c4c49dSIbrahim Kanouche			}
215*46c4c49dSIbrahim Kanouche			if isDocString {
216*46c4c49dSIbrahim Kanouche				i.comments = append(i.comments, &Comment{
217*46c4c49dSIbrahim Kanouche					StartLine: startLine,
218*46c4c49dSIbrahim Kanouche					EndLine:   i.pos.line,
219*46c4c49dSIbrahim Kanouche					Text:      content.String(),
220*46c4c49dSIbrahim Kanouche				})
221*46c4c49dSIbrahim Kanouche			}
222*46c4c49dSIbrahim Kanouche		default:
223*46c4c49dSIbrahim Kanouche			startLine := i.pos.line
224*46c4c49dSIbrahim Kanouche			var comment bytes.Buffer
225*46c4c49dSIbrahim Kanouche			if ok, start, end := i.multiLineComment(); ok { // Multiline comment
226*46c4c49dSIbrahim Kanouche				nesting := 0
227*46c4c49dSIbrahim Kanouche				startLine := i.pos.line
228*46c4c49dSIbrahim Kanouche				for {
229*46c4c49dSIbrahim Kanouche					if i.eof() {
230*46c4c49dSIbrahim Kanouche						return
231*46c4c49dSIbrahim Kanouche					}
232*46c4c49dSIbrahim Kanouche					c := i.readRune()
233*46c4c49dSIbrahim Kanouche					comment.WriteRune(c)
234*46c4c49dSIbrahim Kanouche					if i.lang.NestedComments() && i.match(start) {
235*46c4c49dSIbrahim Kanouche						// Allows nested comments.
236*46c4c49dSIbrahim Kanouche						comment.WriteString(start)
237*46c4c49dSIbrahim Kanouche						nesting++
238*46c4c49dSIbrahim Kanouche					}
239*46c4c49dSIbrahim Kanouche					if i.match(end) {
240*46c4c49dSIbrahim Kanouche						if nesting > 0 {
241*46c4c49dSIbrahim Kanouche							comment.WriteString(end)
242*46c4c49dSIbrahim Kanouche							nesting--
243*46c4c49dSIbrahim Kanouche						} else {
244*46c4c49dSIbrahim Kanouche							break
245*46c4c49dSIbrahim Kanouche						}
246*46c4c49dSIbrahim Kanouche					}
247*46c4c49dSIbrahim Kanouche				}
248*46c4c49dSIbrahim Kanouche				i.comments = append(i.comments, &Comment{
249*46c4c49dSIbrahim Kanouche					StartLine: startLine,
250*46c4c49dSIbrahim Kanouche					EndLine:   i.pos.line,
251*46c4c49dSIbrahim Kanouche					Text:      comment.String(),
252*46c4c49dSIbrahim Kanouche				})
253*46c4c49dSIbrahim Kanouche			} else if i.singleLineComment() { // Single line comment
254*46c4c49dSIbrahim Kanouche				for {
255*46c4c49dSIbrahim Kanouche					if i.eof() {
256*46c4c49dSIbrahim Kanouche						return
257*46c4c49dSIbrahim Kanouche					}
258*46c4c49dSIbrahim Kanouche					c = i.readRune()
259*46c4c49dSIbrahim Kanouche					if c == '\n' {
260*46c4c49dSIbrahim Kanouche						i.unreadRune(c)
261*46c4c49dSIbrahim Kanouche						break
262*46c4c49dSIbrahim Kanouche					}
263*46c4c49dSIbrahim Kanouche					comment.WriteRune(c)
264*46c4c49dSIbrahim Kanouche				}
265*46c4c49dSIbrahim Kanouche				i.comments = append(i.comments, &Comment{
266*46c4c49dSIbrahim Kanouche					StartLine: startLine,
267*46c4c49dSIbrahim Kanouche					EndLine:   i.pos.line,
268*46c4c49dSIbrahim Kanouche					Text:      comment.String(),
269*46c4c49dSIbrahim Kanouche				})
270*46c4c49dSIbrahim Kanouche			}
271*46c4c49dSIbrahim Kanouche		}
272*46c4c49dSIbrahim Kanouche
273*46c4c49dSIbrahim Kanouche		i.readRune() // Ignore non-comments.
274*46c4c49dSIbrahim Kanouche	}
275*46c4c49dSIbrahim Kanouche}
276*46c4c49dSIbrahim Kanouche
277*46c4c49dSIbrahim Kanouche// singleLineComment returns 'true' if we've run across a single line comment
278*46c4c49dSIbrahim Kanouche// in the given language.
279*46c4c49dSIbrahim Kanouchefunc (i *input) singleLineComment() bool {
280*46c4c49dSIbrahim Kanouche	if i.match(i.lang.SingleLineCommentStart()) {
281*46c4c49dSIbrahim Kanouche		return true
282*46c4c49dSIbrahim Kanouche	}
283*46c4c49dSIbrahim Kanouche
284*46c4c49dSIbrahim Kanouche	if i.lang == language.SQL {
285*46c4c49dSIbrahim Kanouche		return i.match(language.MySQL.SingleLineCommentStart())
286*46c4c49dSIbrahim Kanouche	} else if i.lang == language.ObjectiveC {
287*46c4c49dSIbrahim Kanouche		return i.match(language.Matlab.SingleLineCommentStart())
288*46c4c49dSIbrahim Kanouche	}
289*46c4c49dSIbrahim Kanouche
290*46c4c49dSIbrahim Kanouche	return false
291*46c4c49dSIbrahim Kanouche}
292*46c4c49dSIbrahim Kanouche
293*46c4c49dSIbrahim Kanouche// multiLineComment returns 'true' if we've run across a multiline comment in
294*46c4c49dSIbrahim Kanouche// the given language.
295*46c4c49dSIbrahim Kanouchefunc (i *input) multiLineComment() (bool, string, string) {
296*46c4c49dSIbrahim Kanouche	if s := i.lang.MultilineCommentStart(); i.match(s) {
297*46c4c49dSIbrahim Kanouche		return true, s, i.lang.MultilineCommentEnd()
298*46c4c49dSIbrahim Kanouche	}
299*46c4c49dSIbrahim Kanouche
300*46c4c49dSIbrahim Kanouche	if i.lang == language.SQL {
301*46c4c49dSIbrahim Kanouche		if s := language.MySQL.MultilineCommentStart(); i.match(s) {
302*46c4c49dSIbrahim Kanouche			return true, s, language.MySQL.MultilineCommentEnd()
303*46c4c49dSIbrahim Kanouche		}
304*46c4c49dSIbrahim Kanouche	} else if i.lang == language.ObjectiveC {
305*46c4c49dSIbrahim Kanouche		if s := language.Matlab.MultilineCommentStart(); i.match(s) {
306*46c4c49dSIbrahim Kanouche			return true, s, language.Matlab.MultilineCommentEnd()
307*46c4c49dSIbrahim Kanouche		}
308*46c4c49dSIbrahim Kanouche	}
309*46c4c49dSIbrahim Kanouche
310*46c4c49dSIbrahim Kanouche	return false, "", ""
311*46c4c49dSIbrahim Kanouche}
312*46c4c49dSIbrahim Kanouche
313*46c4c49dSIbrahim Kanouche// match returns 'true' if the next tokens in the stream match the given
314*46c4c49dSIbrahim Kanouche// string.
315*46c4c49dSIbrahim Kanouchefunc (i *input) match(s string) bool {
316*46c4c49dSIbrahim Kanouche	if s == "" {
317*46c4c49dSIbrahim Kanouche		return false
318*46c4c49dSIbrahim Kanouche	}
319*46c4c49dSIbrahim Kanouche	saved := s
320*46c4c49dSIbrahim Kanouche	var read []rune
321*46c4c49dSIbrahim Kanouche	for len(s) > 0 && !i.eof() {
322*46c4c49dSIbrahim Kanouche		r, size := utf8.DecodeRuneInString(s)
323*46c4c49dSIbrahim Kanouche		if c, ok := i.peekRune(); ok && c == r {
324*46c4c49dSIbrahim Kanouche			read = append(read, c)
325*46c4c49dSIbrahim Kanouche		} else {
326*46c4c49dSIbrahim Kanouche			// No match. Push the tokens we read back onto the stack.
327*46c4c49dSIbrahim Kanouche			for idx := len(read) - 1; idx >= 0; idx-- {
328*46c4c49dSIbrahim Kanouche				i.unreadRune(read[idx])
329*46c4c49dSIbrahim Kanouche			}
330*46c4c49dSIbrahim Kanouche			return false
331*46c4c49dSIbrahim Kanouche		}
332*46c4c49dSIbrahim Kanouche		s = s[size:]
333*46c4c49dSIbrahim Kanouche		i.readRune() // Eat token.
334*46c4c49dSIbrahim Kanouche	}
335*46c4c49dSIbrahim Kanouche	return string(read) == saved
336*46c4c49dSIbrahim Kanouche}
337*46c4c49dSIbrahim Kanouche
338*46c4c49dSIbrahim Kanouche// eof reports whether the input has reached the end of the file.
339*46c4c49dSIbrahim Kanouchefunc (i *input) eof() bool {
340*46c4c49dSIbrahim Kanouche	return len(i.s) <= i.offset
341*46c4c49dSIbrahim Kanouche}
342*46c4c49dSIbrahim Kanouche
343*46c4c49dSIbrahim Kanouche// peekRune returns the next rune in the input without consuming it.
344*46c4c49dSIbrahim Kanouchefunc (i *input) peekRune() (rune, bool) {
345*46c4c49dSIbrahim Kanouche	if i.eof() {
346*46c4c49dSIbrahim Kanouche		return rune(0), false
347*46c4c49dSIbrahim Kanouche	}
348*46c4c49dSIbrahim Kanouche	r, _ := utf8.DecodeRuneInString(i.s[i.offset:])
349*46c4c49dSIbrahim Kanouche	return r, true
350*46c4c49dSIbrahim Kanouche}
351*46c4c49dSIbrahim Kanouche
352*46c4c49dSIbrahim Kanouche// readRune consumes and returns the next rune in the input.
353*46c4c49dSIbrahim Kanouchefunc (i *input) readRune() rune {
354*46c4c49dSIbrahim Kanouche	r, size := utf8.DecodeRuneInString(i.s[i.offset:])
355*46c4c49dSIbrahim Kanouche	if r == '\n' {
356*46c4c49dSIbrahim Kanouche		i.pos.line++
357*46c4c49dSIbrahim Kanouche		i.pos.lineRune = append(i.pos.lineRune, 0)
358*46c4c49dSIbrahim Kanouche	} else {
359*46c4c49dSIbrahim Kanouche		i.pos.lineRune[len(i.pos.lineRune)-1]++
360*46c4c49dSIbrahim Kanouche	}
361*46c4c49dSIbrahim Kanouche	i.offset += size
362*46c4c49dSIbrahim Kanouche	return r
363*46c4c49dSIbrahim Kanouche}
364*46c4c49dSIbrahim Kanouche
365*46c4c49dSIbrahim Kanouche// unreadRune winds the lexer's state back to before the rune was read.
366*46c4c49dSIbrahim Kanouchefunc (i *input) unreadRune(c rune) {
367*46c4c49dSIbrahim Kanouche	p := make([]byte, utf8.UTFMax)
368*46c4c49dSIbrahim Kanouche	size := utf8.EncodeRune(p, c)
369*46c4c49dSIbrahim Kanouche	i.offset -= size
370*46c4c49dSIbrahim Kanouche	if c == '\n' {
371*46c4c49dSIbrahim Kanouche		i.pos.line--
372*46c4c49dSIbrahim Kanouche		if len(i.pos.lineRune) > 1 {
373*46c4c49dSIbrahim Kanouche			i.pos.lineRune = i.pos.lineRune[:len(i.pos.lineRune)-1]
374*46c4c49dSIbrahim Kanouche		} else {
375*46c4c49dSIbrahim Kanouche			i.pos.lineRune[len(i.pos.lineRune)-1] = 0
376*46c4c49dSIbrahim Kanouche		}
377*46c4c49dSIbrahim Kanouche	} else {
378*46c4c49dSIbrahim Kanouche		i.pos.lineRune[len(i.pos.lineRune)-1]--
379*46c4c49dSIbrahim Kanouche	}
380*46c4c49dSIbrahim Kanouche}
381