1*46c4c49dSIbrahim Kanouche// Copyright 2017 Google Inc. 2*46c4c49dSIbrahim Kanouche// 3*46c4c49dSIbrahim Kanouche// Licensed under the Apache License, Version 2.0 (the "License"); 4*46c4c49dSIbrahim Kanouche// you may not use this file except in compliance with the License. 5*46c4c49dSIbrahim Kanouche// You may obtain a copy of the License at 6*46c4c49dSIbrahim Kanouche// 7*46c4c49dSIbrahim Kanouche// http://www.apache.org/licenses/LICENSE-2.0 8*46c4c49dSIbrahim Kanouche// 9*46c4c49dSIbrahim Kanouche// Unless required by applicable law or agreed to in writing, software 10*46c4c49dSIbrahim Kanouche// distributed under the License is distributed on an "AS IS" BASIS, 11*46c4c49dSIbrahim Kanouche// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*46c4c49dSIbrahim Kanouche// See the License for the specific language governing permissions and 13*46c4c49dSIbrahim Kanouche// limitations under the License. 14*46c4c49dSIbrahim Kanouche 15*46c4c49dSIbrahim Kanouche// Package commentparser does a basic parse over a source file and returns all 16*46c4c49dSIbrahim Kanouche// of the comments from the code. This is useful for when you want to analyze 17*46c4c49dSIbrahim Kanouche// text written in comments (like copyright notices) but not in the code 18*46c4c49dSIbrahim Kanouche// itself. 19*46c4c49dSIbrahim Kanouchepackage commentparser 20*46c4c49dSIbrahim Kanouche 21*46c4c49dSIbrahim Kanoucheimport ( 22*46c4c49dSIbrahim Kanouche "bytes" 23*46c4c49dSIbrahim Kanouche "strings" 24*46c4c49dSIbrahim Kanouche "unicode/utf8" 25*46c4c49dSIbrahim Kanouche 26*46c4c49dSIbrahim Kanouche "github.com/google/licenseclassifier/commentparser/language" 27*46c4c49dSIbrahim Kanouche) 28*46c4c49dSIbrahim Kanouche 29*46c4c49dSIbrahim Kanoucheconst ( 30*46c4c49dSIbrahim Kanouche eofInString = "%d:EOF in string" 31*46c4c49dSIbrahim Kanouche eofInSingleLineComment = "%d:EOF in single line comment" 32*46c4c49dSIbrahim Kanouche eofInMultilineComment = "%d:EOF in multiline comment" 33*46c4c49dSIbrahim Kanouche) 34*46c4c49dSIbrahim Kanouche 35*46c4c49dSIbrahim Kanouche// Parse parses the input data and returns the comments. 36*46c4c49dSIbrahim Kanouchefunc Parse(contents []byte, lang language.Language) Comments { 37*46c4c49dSIbrahim Kanouche if len(contents) == 0 { 38*46c4c49dSIbrahim Kanouche return nil 39*46c4c49dSIbrahim Kanouche } 40*46c4c49dSIbrahim Kanouche 41*46c4c49dSIbrahim Kanouche c := string(contents) 42*46c4c49dSIbrahim Kanouche if !strings.HasSuffix(c, "\n") { 43*46c4c49dSIbrahim Kanouche // Force a terminating newline if one isn't present. 44*46c4c49dSIbrahim Kanouche c += "\n" 45*46c4c49dSIbrahim Kanouche } 46*46c4c49dSIbrahim Kanouche i := &input{ 47*46c4c49dSIbrahim Kanouche s: c, 48*46c4c49dSIbrahim Kanouche lang: lang, 49*46c4c49dSIbrahim Kanouche offset: 0, 50*46c4c49dSIbrahim Kanouche pos: position{line: 1, lineRune: []int{0}}, 51*46c4c49dSIbrahim Kanouche } 52*46c4c49dSIbrahim Kanouche i.lex() 53*46c4c49dSIbrahim Kanouche return i.comments 54*46c4c49dSIbrahim Kanouche} 55*46c4c49dSIbrahim Kanouche 56*46c4c49dSIbrahim Kanouche// Comment is either a single line or multiline comment in a source code file. 57*46c4c49dSIbrahim Kanouche// A single line comment has StartLine equal to EndLine. The lines are 1-based. 58*46c4c49dSIbrahim Kanouchetype Comment struct { 59*46c4c49dSIbrahim Kanouche StartLine int 60*46c4c49dSIbrahim Kanouche EndLine int 61*46c4c49dSIbrahim Kanouche Text string 62*46c4c49dSIbrahim Kanouche} 63*46c4c49dSIbrahim Kanouche 64*46c4c49dSIbrahim Kanouche// Comments allows us to treat a slice of comments as a unit. 65*46c4c49dSIbrahim Kanouchetype Comments []*Comment 66*46c4c49dSIbrahim Kanouche 67*46c4c49dSIbrahim Kanouche// ChunkIterator returns a read-only channel and generates the comments in a 68*46c4c49dSIbrahim Kanouche// goroutine, then closes the channel. 69*46c4c49dSIbrahim Kanouchefunc (c Comments) ChunkIterator() <-chan Comments { 70*46c4c49dSIbrahim Kanouche ch := make(chan Comments) 71*46c4c49dSIbrahim Kanouche go func() { 72*46c4c49dSIbrahim Kanouche defer close(ch) 73*46c4c49dSIbrahim Kanouche 74*46c4c49dSIbrahim Kanouche if len(c) == 0 { 75*46c4c49dSIbrahim Kanouche return 76*46c4c49dSIbrahim Kanouche } 77*46c4c49dSIbrahim Kanouche 78*46c4c49dSIbrahim Kanouche prevChunk := c[0] 79*46c4c49dSIbrahim Kanouche for index := 0; index < len(c); index++ { 80*46c4c49dSIbrahim Kanouche var chunk Comments 81*46c4c49dSIbrahim Kanouche for ; index < len(c); index++ { 82*46c4c49dSIbrahim Kanouche if c[index].StartLine > prevChunk.StartLine+1 { 83*46c4c49dSIbrahim Kanouche break 84*46c4c49dSIbrahim Kanouche } 85*46c4c49dSIbrahim Kanouche if c[index].StartLine == prevChunk.StartLine+2 { 86*46c4c49dSIbrahim Kanouche if c[index].StartLine != c[index].EndLine || prevChunk.StartLine != prevChunk.EndLine { 87*46c4c49dSIbrahim Kanouche break 88*46c4c49dSIbrahim Kanouche } 89*46c4c49dSIbrahim Kanouche } 90*46c4c49dSIbrahim Kanouche chunk = append(chunk, c[index]) 91*46c4c49dSIbrahim Kanouche prevChunk = c[index] 92*46c4c49dSIbrahim Kanouche } 93*46c4c49dSIbrahim Kanouche if len(chunk) == 0 { 94*46c4c49dSIbrahim Kanouche break 95*46c4c49dSIbrahim Kanouche } 96*46c4c49dSIbrahim Kanouche 97*46c4c49dSIbrahim Kanouche ch <- chunk 98*46c4c49dSIbrahim Kanouche if index >= len(c) { 99*46c4c49dSIbrahim Kanouche break 100*46c4c49dSIbrahim Kanouche } 101*46c4c49dSIbrahim Kanouche 102*46c4c49dSIbrahim Kanouche prevChunk = c[index] 103*46c4c49dSIbrahim Kanouche index-- 104*46c4c49dSIbrahim Kanouche } 105*46c4c49dSIbrahim Kanouche }() 106*46c4c49dSIbrahim Kanouche return ch 107*46c4c49dSIbrahim Kanouche} 108*46c4c49dSIbrahim Kanouche 109*46c4c49dSIbrahim Kanouche// StartLine is the line number (1-based) the first part of the comment block 110*46c4c49dSIbrahim Kanouche// starts on. 111*46c4c49dSIbrahim Kanouchefunc (c Comments) StartLine() int { 112*46c4c49dSIbrahim Kanouche if len(c) == 0 { 113*46c4c49dSIbrahim Kanouche return 0 114*46c4c49dSIbrahim Kanouche } 115*46c4c49dSIbrahim Kanouche return c[0].StartLine 116*46c4c49dSIbrahim Kanouche} 117*46c4c49dSIbrahim Kanouche 118*46c4c49dSIbrahim Kanouche// String creates a string out of the text of the comments. Comment begin and 119*46c4c49dSIbrahim Kanouche// end markers are removed. 120*46c4c49dSIbrahim Kanouchefunc (c Comments) String() string { 121*46c4c49dSIbrahim Kanouche var s []string 122*46c4c49dSIbrahim Kanouche for _, cmt := range c { 123*46c4c49dSIbrahim Kanouche s = append(s, cmt.Text) 124*46c4c49dSIbrahim Kanouche } 125*46c4c49dSIbrahim Kanouche return strings.Join(s, "\n") 126*46c4c49dSIbrahim Kanouche} 127*46c4c49dSIbrahim Kanouche 128*46c4c49dSIbrahim Kanouche// position records the location of a lexeme. 129*46c4c49dSIbrahim Kanouchetype position struct { 130*46c4c49dSIbrahim Kanouche line int // Line number of input: 1-based 131*46c4c49dSIbrahim Kanouche lineRune []int // Rune offset from beginning of line: 0-based 132*46c4c49dSIbrahim Kanouche} 133*46c4c49dSIbrahim Kanouche 134*46c4c49dSIbrahim Kanouche// input holds the current state of the lexer. 135*46c4c49dSIbrahim Kanouchetype input struct { 136*46c4c49dSIbrahim Kanouche s string // Entire input. 137*46c4c49dSIbrahim Kanouche lang language.Language // Source code language. 138*46c4c49dSIbrahim Kanouche offset int // Offset into input. 139*46c4c49dSIbrahim Kanouche pos position // Current position in the input. 140*46c4c49dSIbrahim Kanouche comments Comments // Comments in the source file. 141*46c4c49dSIbrahim Kanouche} 142*46c4c49dSIbrahim Kanouche 143*46c4c49dSIbrahim Kanouche// lex is called to obtain the comments. 144*46c4c49dSIbrahim Kanouchefunc (i *input) lex() { 145*46c4c49dSIbrahim Kanouche for { 146*46c4c49dSIbrahim Kanouche c, ok := i.peekRune() 147*46c4c49dSIbrahim Kanouche if !ok { 148*46c4c49dSIbrahim Kanouche break 149*46c4c49dSIbrahim Kanouche } 150*46c4c49dSIbrahim Kanouche 151*46c4c49dSIbrahim Kanouche switch c { 152*46c4c49dSIbrahim Kanouche case '"', '\'', '`': // String 153*46c4c49dSIbrahim Kanouche // Ignore strings because they could contain comment 154*46c4c49dSIbrahim Kanouche // start or end sequences which we need to ignore. 155*46c4c49dSIbrahim Kanouche if i.lang == language.HTML { 156*46c4c49dSIbrahim Kanouche // Quotes in HTML-like files aren't meaningful, 157*46c4c49dSIbrahim Kanouche // because it's basically plain text 158*46c4c49dSIbrahim Kanouche break 159*46c4c49dSIbrahim Kanouche } 160*46c4c49dSIbrahim Kanouche 161*46c4c49dSIbrahim Kanouche ok, hasEscape := i.lang.QuoteCharacter(c) 162*46c4c49dSIbrahim Kanouche if !ok { 163*46c4c49dSIbrahim Kanouche break 164*46c4c49dSIbrahim Kanouche } 165*46c4c49dSIbrahim Kanouche 166*46c4c49dSIbrahim Kanouche var content bytes.Buffer 167*46c4c49dSIbrahim Kanouche isDocString := false 168*46c4c49dSIbrahim Kanouche quote := string(c) 169*46c4c49dSIbrahim Kanouche if i.lang == language.Python { 170*46c4c49dSIbrahim Kanouche if c == '\'' && i.match("'''") { 171*46c4c49dSIbrahim Kanouche quote = "'''" 172*46c4c49dSIbrahim Kanouche // Assume module-level docstrings start at the 173*46c4c49dSIbrahim Kanouche // beginning of a line. Function docstrings not 174*46c4c49dSIbrahim Kanouche // supported. 175*46c4c49dSIbrahim Kanouche if i.pos.lineRune[len(i.pos.lineRune)-1] == 3 { 176*46c4c49dSIbrahim Kanouche isDocString = true 177*46c4c49dSIbrahim Kanouche } 178*46c4c49dSIbrahim Kanouche } else if c == '"' && i.match(`"""`) { 179*46c4c49dSIbrahim Kanouche quote = `"""` 180*46c4c49dSIbrahim Kanouche if i.pos.lineRune[len(i.pos.lineRune)-1] == 3 { 181*46c4c49dSIbrahim Kanouche isDocString = true 182*46c4c49dSIbrahim Kanouche } 183*46c4c49dSIbrahim Kanouche } else { 184*46c4c49dSIbrahim Kanouche i.readRune() // Eat quote. 185*46c4c49dSIbrahim Kanouche } 186*46c4c49dSIbrahim Kanouche } else { 187*46c4c49dSIbrahim Kanouche i.readRune() // Eat quote. 188*46c4c49dSIbrahim Kanouche } 189*46c4c49dSIbrahim Kanouche 190*46c4c49dSIbrahim Kanouche startLine := i.pos.line 191*46c4c49dSIbrahim Kanouche for { 192*46c4c49dSIbrahim Kanouche c, ok = i.peekRune() 193*46c4c49dSIbrahim Kanouche if !ok { 194*46c4c49dSIbrahim Kanouche return 195*46c4c49dSIbrahim Kanouche } 196*46c4c49dSIbrahim Kanouche if hasEscape && c == '\\' { 197*46c4c49dSIbrahim Kanouche i.readRune() // Eat escape. 198*46c4c49dSIbrahim Kanouche } else if i.match(quote) { 199*46c4c49dSIbrahim Kanouche break 200*46c4c49dSIbrahim Kanouche } else if (i.lang == language.JavaScript || i.lang == language.Perl) && c == '\n' { 201*46c4c49dSIbrahim Kanouche // JavaScript and Perl allow you to 202*46c4c49dSIbrahim Kanouche // specify regexes without quotes, but 203*46c4c49dSIbrahim Kanouche // which contain quotes. So treat the 204*46c4c49dSIbrahim Kanouche // newline as terminating the string. 205*46c4c49dSIbrahim Kanouche break 206*46c4c49dSIbrahim Kanouche } 207*46c4c49dSIbrahim Kanouche c := i.readRune() 208*46c4c49dSIbrahim Kanouche if isDocString { 209*46c4c49dSIbrahim Kanouche content.WriteRune(c) 210*46c4c49dSIbrahim Kanouche } 211*46c4c49dSIbrahim Kanouche if i.eof() { 212*46c4c49dSIbrahim Kanouche return 213*46c4c49dSIbrahim Kanouche } 214*46c4c49dSIbrahim Kanouche } 215*46c4c49dSIbrahim Kanouche if isDocString { 216*46c4c49dSIbrahim Kanouche i.comments = append(i.comments, &Comment{ 217*46c4c49dSIbrahim Kanouche StartLine: startLine, 218*46c4c49dSIbrahim Kanouche EndLine: i.pos.line, 219*46c4c49dSIbrahim Kanouche Text: content.String(), 220*46c4c49dSIbrahim Kanouche }) 221*46c4c49dSIbrahim Kanouche } 222*46c4c49dSIbrahim Kanouche default: 223*46c4c49dSIbrahim Kanouche startLine := i.pos.line 224*46c4c49dSIbrahim Kanouche var comment bytes.Buffer 225*46c4c49dSIbrahim Kanouche if ok, start, end := i.multiLineComment(); ok { // Multiline comment 226*46c4c49dSIbrahim Kanouche nesting := 0 227*46c4c49dSIbrahim Kanouche startLine := i.pos.line 228*46c4c49dSIbrahim Kanouche for { 229*46c4c49dSIbrahim Kanouche if i.eof() { 230*46c4c49dSIbrahim Kanouche return 231*46c4c49dSIbrahim Kanouche } 232*46c4c49dSIbrahim Kanouche c := i.readRune() 233*46c4c49dSIbrahim Kanouche comment.WriteRune(c) 234*46c4c49dSIbrahim Kanouche if i.lang.NestedComments() && i.match(start) { 235*46c4c49dSIbrahim Kanouche // Allows nested comments. 236*46c4c49dSIbrahim Kanouche comment.WriteString(start) 237*46c4c49dSIbrahim Kanouche nesting++ 238*46c4c49dSIbrahim Kanouche } 239*46c4c49dSIbrahim Kanouche if i.match(end) { 240*46c4c49dSIbrahim Kanouche if nesting > 0 { 241*46c4c49dSIbrahim Kanouche comment.WriteString(end) 242*46c4c49dSIbrahim Kanouche nesting-- 243*46c4c49dSIbrahim Kanouche } else { 244*46c4c49dSIbrahim Kanouche break 245*46c4c49dSIbrahim Kanouche } 246*46c4c49dSIbrahim Kanouche } 247*46c4c49dSIbrahim Kanouche } 248*46c4c49dSIbrahim Kanouche i.comments = append(i.comments, &Comment{ 249*46c4c49dSIbrahim Kanouche StartLine: startLine, 250*46c4c49dSIbrahim Kanouche EndLine: i.pos.line, 251*46c4c49dSIbrahim Kanouche Text: comment.String(), 252*46c4c49dSIbrahim Kanouche }) 253*46c4c49dSIbrahim Kanouche } else if i.singleLineComment() { // Single line comment 254*46c4c49dSIbrahim Kanouche for { 255*46c4c49dSIbrahim Kanouche if i.eof() { 256*46c4c49dSIbrahim Kanouche return 257*46c4c49dSIbrahim Kanouche } 258*46c4c49dSIbrahim Kanouche c = i.readRune() 259*46c4c49dSIbrahim Kanouche if c == '\n' { 260*46c4c49dSIbrahim Kanouche i.unreadRune(c) 261*46c4c49dSIbrahim Kanouche break 262*46c4c49dSIbrahim Kanouche } 263*46c4c49dSIbrahim Kanouche comment.WriteRune(c) 264*46c4c49dSIbrahim Kanouche } 265*46c4c49dSIbrahim Kanouche i.comments = append(i.comments, &Comment{ 266*46c4c49dSIbrahim Kanouche StartLine: startLine, 267*46c4c49dSIbrahim Kanouche EndLine: i.pos.line, 268*46c4c49dSIbrahim Kanouche Text: comment.String(), 269*46c4c49dSIbrahim Kanouche }) 270*46c4c49dSIbrahim Kanouche } 271*46c4c49dSIbrahim Kanouche } 272*46c4c49dSIbrahim Kanouche 273*46c4c49dSIbrahim Kanouche i.readRune() // Ignore non-comments. 274*46c4c49dSIbrahim Kanouche } 275*46c4c49dSIbrahim Kanouche} 276*46c4c49dSIbrahim Kanouche 277*46c4c49dSIbrahim Kanouche// singleLineComment returns 'true' if we've run across a single line comment 278*46c4c49dSIbrahim Kanouche// in the given language. 279*46c4c49dSIbrahim Kanouchefunc (i *input) singleLineComment() bool { 280*46c4c49dSIbrahim Kanouche if i.match(i.lang.SingleLineCommentStart()) { 281*46c4c49dSIbrahim Kanouche return true 282*46c4c49dSIbrahim Kanouche } 283*46c4c49dSIbrahim Kanouche 284*46c4c49dSIbrahim Kanouche if i.lang == language.SQL { 285*46c4c49dSIbrahim Kanouche return i.match(language.MySQL.SingleLineCommentStart()) 286*46c4c49dSIbrahim Kanouche } else if i.lang == language.ObjectiveC { 287*46c4c49dSIbrahim Kanouche return i.match(language.Matlab.SingleLineCommentStart()) 288*46c4c49dSIbrahim Kanouche } 289*46c4c49dSIbrahim Kanouche 290*46c4c49dSIbrahim Kanouche return false 291*46c4c49dSIbrahim Kanouche} 292*46c4c49dSIbrahim Kanouche 293*46c4c49dSIbrahim Kanouche// multiLineComment returns 'true' if we've run across a multiline comment in 294*46c4c49dSIbrahim Kanouche// the given language. 295*46c4c49dSIbrahim Kanouchefunc (i *input) multiLineComment() (bool, string, string) { 296*46c4c49dSIbrahim Kanouche if s := i.lang.MultilineCommentStart(); i.match(s) { 297*46c4c49dSIbrahim Kanouche return true, s, i.lang.MultilineCommentEnd() 298*46c4c49dSIbrahim Kanouche } 299*46c4c49dSIbrahim Kanouche 300*46c4c49dSIbrahim Kanouche if i.lang == language.SQL { 301*46c4c49dSIbrahim Kanouche if s := language.MySQL.MultilineCommentStart(); i.match(s) { 302*46c4c49dSIbrahim Kanouche return true, s, language.MySQL.MultilineCommentEnd() 303*46c4c49dSIbrahim Kanouche } 304*46c4c49dSIbrahim Kanouche } else if i.lang == language.ObjectiveC { 305*46c4c49dSIbrahim Kanouche if s := language.Matlab.MultilineCommentStart(); i.match(s) { 306*46c4c49dSIbrahim Kanouche return true, s, language.Matlab.MultilineCommentEnd() 307*46c4c49dSIbrahim Kanouche } 308*46c4c49dSIbrahim Kanouche } 309*46c4c49dSIbrahim Kanouche 310*46c4c49dSIbrahim Kanouche return false, "", "" 311*46c4c49dSIbrahim Kanouche} 312*46c4c49dSIbrahim Kanouche 313*46c4c49dSIbrahim Kanouche// match returns 'true' if the next tokens in the stream match the given 314*46c4c49dSIbrahim Kanouche// string. 315*46c4c49dSIbrahim Kanouchefunc (i *input) match(s string) bool { 316*46c4c49dSIbrahim Kanouche if s == "" { 317*46c4c49dSIbrahim Kanouche return false 318*46c4c49dSIbrahim Kanouche } 319*46c4c49dSIbrahim Kanouche saved := s 320*46c4c49dSIbrahim Kanouche var read []rune 321*46c4c49dSIbrahim Kanouche for len(s) > 0 && !i.eof() { 322*46c4c49dSIbrahim Kanouche r, size := utf8.DecodeRuneInString(s) 323*46c4c49dSIbrahim Kanouche if c, ok := i.peekRune(); ok && c == r { 324*46c4c49dSIbrahim Kanouche read = append(read, c) 325*46c4c49dSIbrahim Kanouche } else { 326*46c4c49dSIbrahim Kanouche // No match. Push the tokens we read back onto the stack. 327*46c4c49dSIbrahim Kanouche for idx := len(read) - 1; idx >= 0; idx-- { 328*46c4c49dSIbrahim Kanouche i.unreadRune(read[idx]) 329*46c4c49dSIbrahim Kanouche } 330*46c4c49dSIbrahim Kanouche return false 331*46c4c49dSIbrahim Kanouche } 332*46c4c49dSIbrahim Kanouche s = s[size:] 333*46c4c49dSIbrahim Kanouche i.readRune() // Eat token. 334*46c4c49dSIbrahim Kanouche } 335*46c4c49dSIbrahim Kanouche return string(read) == saved 336*46c4c49dSIbrahim Kanouche} 337*46c4c49dSIbrahim Kanouche 338*46c4c49dSIbrahim Kanouche// eof reports whether the input has reached the end of the file. 339*46c4c49dSIbrahim Kanouchefunc (i *input) eof() bool { 340*46c4c49dSIbrahim Kanouche return len(i.s) <= i.offset 341*46c4c49dSIbrahim Kanouche} 342*46c4c49dSIbrahim Kanouche 343*46c4c49dSIbrahim Kanouche// peekRune returns the next rune in the input without consuming it. 344*46c4c49dSIbrahim Kanouchefunc (i *input) peekRune() (rune, bool) { 345*46c4c49dSIbrahim Kanouche if i.eof() { 346*46c4c49dSIbrahim Kanouche return rune(0), false 347*46c4c49dSIbrahim Kanouche } 348*46c4c49dSIbrahim Kanouche r, _ := utf8.DecodeRuneInString(i.s[i.offset:]) 349*46c4c49dSIbrahim Kanouche return r, true 350*46c4c49dSIbrahim Kanouche} 351*46c4c49dSIbrahim Kanouche 352*46c4c49dSIbrahim Kanouche// readRune consumes and returns the next rune in the input. 353*46c4c49dSIbrahim Kanouchefunc (i *input) readRune() rune { 354*46c4c49dSIbrahim Kanouche r, size := utf8.DecodeRuneInString(i.s[i.offset:]) 355*46c4c49dSIbrahim Kanouche if r == '\n' { 356*46c4c49dSIbrahim Kanouche i.pos.line++ 357*46c4c49dSIbrahim Kanouche i.pos.lineRune = append(i.pos.lineRune, 0) 358*46c4c49dSIbrahim Kanouche } else { 359*46c4c49dSIbrahim Kanouche i.pos.lineRune[len(i.pos.lineRune)-1]++ 360*46c4c49dSIbrahim Kanouche } 361*46c4c49dSIbrahim Kanouche i.offset += size 362*46c4c49dSIbrahim Kanouche return r 363*46c4c49dSIbrahim Kanouche} 364*46c4c49dSIbrahim Kanouche 365*46c4c49dSIbrahim Kanouche// unreadRune winds the lexer's state back to before the rune was read. 366*46c4c49dSIbrahim Kanouchefunc (i *input) unreadRune(c rune) { 367*46c4c49dSIbrahim Kanouche p := make([]byte, utf8.UTFMax) 368*46c4c49dSIbrahim Kanouche size := utf8.EncodeRune(p, c) 369*46c4c49dSIbrahim Kanouche i.offset -= size 370*46c4c49dSIbrahim Kanouche if c == '\n' { 371*46c4c49dSIbrahim Kanouche i.pos.line-- 372*46c4c49dSIbrahim Kanouche if len(i.pos.lineRune) > 1 { 373*46c4c49dSIbrahim Kanouche i.pos.lineRune = i.pos.lineRune[:len(i.pos.lineRune)-1] 374*46c4c49dSIbrahim Kanouche } else { 375*46c4c49dSIbrahim Kanouche i.pos.lineRune[len(i.pos.lineRune)-1] = 0 376*46c4c49dSIbrahim Kanouche } 377*46c4c49dSIbrahim Kanouche } else { 378*46c4c49dSIbrahim Kanouche i.pos.lineRune[len(i.pos.lineRune)-1]-- 379*46c4c49dSIbrahim Kanouche } 380*46c4c49dSIbrahim Kanouche} 381