1// Copyright 2017 The Bazel Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package syntax 6 7// This file defines a recursive-descent parser for Starlark. 8// The LL(1) grammar of Starlark and the names of many productions follow Python 2.7. 9// 10// TODO(adonovan): use syntax.Error more systematically throughout the 11// package. Verify that error positions are correct using the 12// chunkedfile mechanism. 13 14import "log" 15 16// Enable this flag to print the token stream and log.Fatal on the first error. 17const debug = false 18 19// A Mode value is a set of flags (or 0) that controls optional parser functionality. 20type Mode uint 21 22const ( 23 RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments 24) 25 26// Parse parses the input data and returns the corresponding parse tree. 27// 28// If src != nil, ParseFile parses the source from src and the filename 29// is only used when recording position information. 30// The type of the argument for the src parameter must be string, 31// []byte, io.Reader, or FilePortion. 32// If src == nil, ParseFile parses the file specified by filename. 33func Parse(filename string, src interface{}, mode Mode) (f *File, err error) { 34 in, err := newScanner(filename, src, mode&RetainComments != 0) 35 if err != nil { 36 return nil, err 37 } 38 p := parser{in: in} 39 defer p.in.recover(&err) 40 41 p.nextToken() // read first lookahead token 42 f = p.parseFile() 43 if f != nil { 44 f.Path = filename 45 } 46 p.assignComments(f) 47 return f, nil 48} 49 50// ParseCompoundStmt parses a single compound statement: 51// a blank line, a def, for, while, or if statement, or a 52// semicolon-separated list of simple statements followed 53// by a newline. These are the units on which the REPL operates. 54// ParseCompoundStmt does not consume any following input. 55// The parser calls the readline function each 56// time it needs a new line of input. 57func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) { 58 in, err := newScanner(filename, readline, false) 59 if err != nil { 60 return nil, err 61 } 62 63 p := parser{in: in} 64 defer p.in.recover(&err) 65 66 p.nextToken() // read first lookahead token 67 68 var stmts []Stmt 69 switch p.tok { 70 case DEF, IF, FOR, WHILE: 71 stmts = p.parseStmt(stmts) 72 case NEWLINE: 73 // blank line 74 default: 75 stmts = p.parseSimpleStmt(stmts, false) 76 // Require but don't consume newline, to avoid blocking again. 77 if p.tok != NEWLINE { 78 p.in.errorf(p.in.pos, "invalid syntax") 79 } 80 } 81 82 return &File{Path: filename, Stmts: stmts}, nil 83} 84 85// ParseExpr parses a Starlark expression. 86// A comma-separated list of expressions is parsed as a tuple. 87// See Parse for explanation of parameters. 88func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) { 89 in, err := newScanner(filename, src, mode&RetainComments != 0) 90 if err != nil { 91 return nil, err 92 } 93 p := parser{in: in} 94 defer p.in.recover(&err) 95 96 p.nextToken() // read first lookahead token 97 98 // Use parseExpr, not parseTest, to permit an unparenthesized tuple. 99 expr = p.parseExpr(false) 100 101 // A following newline (e.g. "f()\n") appears outside any brackets, 102 // on a non-blank line, and thus results in a NEWLINE token. 103 if p.tok == NEWLINE { 104 p.nextToken() 105 } 106 107 if p.tok != EOF { 108 p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok) 109 } 110 p.assignComments(expr) 111 return expr, nil 112} 113 114type parser struct { 115 in *scanner 116 tok Token 117 tokval tokenValue 118} 119 120// nextToken advances the scanner and returns the position of the 121// previous token. 122func (p *parser) nextToken() Position { 123 oldpos := p.tokval.pos 124 p.tok = p.in.nextToken(&p.tokval) 125 // enable to see the token stream 126 if debug { 127 log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos) 128 } 129 return oldpos 130} 131 132// file_input = (NEWLINE | stmt)* EOF 133func (p *parser) parseFile() *File { 134 var stmts []Stmt 135 for p.tok != EOF { 136 if p.tok == NEWLINE { 137 p.nextToken() 138 continue 139 } 140 stmts = p.parseStmt(stmts) 141 } 142 return &File{Stmts: stmts} 143} 144 145func (p *parser) parseStmt(stmts []Stmt) []Stmt { 146 if p.tok == DEF { 147 return append(stmts, p.parseDefStmt()) 148 } else if p.tok == IF { 149 return append(stmts, p.parseIfStmt()) 150 } else if p.tok == FOR { 151 return append(stmts, p.parseForStmt()) 152 } else if p.tok == WHILE { 153 return append(stmts, p.parseWhileStmt()) 154 } 155 return p.parseSimpleStmt(stmts, true) 156} 157 158func (p *parser) parseDefStmt() Stmt { 159 defpos := p.nextToken() // consume DEF 160 id := p.parseIdent() 161 p.consume(LPAREN) 162 params := p.parseParams() 163 p.consume(RPAREN) 164 p.consume(COLON) 165 body := p.parseSuite() 166 return &DefStmt{ 167 Def: defpos, 168 Name: id, 169 Params: params, 170 Body: body, 171 } 172} 173 174func (p *parser) parseIfStmt() Stmt { 175 ifpos := p.nextToken() // consume IF 176 cond := p.parseTest() 177 p.consume(COLON) 178 body := p.parseSuite() 179 ifStmt := &IfStmt{ 180 If: ifpos, 181 Cond: cond, 182 True: body, 183 } 184 tail := ifStmt 185 for p.tok == ELIF { 186 elifpos := p.nextToken() // consume ELIF 187 cond := p.parseTest() 188 p.consume(COLON) 189 body := p.parseSuite() 190 elif := &IfStmt{ 191 If: elifpos, 192 Cond: cond, 193 True: body, 194 } 195 tail.ElsePos = elifpos 196 tail.False = []Stmt{elif} 197 tail = elif 198 } 199 if p.tok == ELSE { 200 tail.ElsePos = p.nextToken() // consume ELSE 201 p.consume(COLON) 202 tail.False = p.parseSuite() 203 } 204 return ifStmt 205} 206 207func (p *parser) parseForStmt() Stmt { 208 forpos := p.nextToken() // consume FOR 209 vars := p.parseForLoopVariables() 210 p.consume(IN) 211 x := p.parseExpr(false) 212 p.consume(COLON) 213 body := p.parseSuite() 214 return &ForStmt{ 215 For: forpos, 216 Vars: vars, 217 X: x, 218 Body: body, 219 } 220} 221 222func (p *parser) parseWhileStmt() Stmt { 223 whilepos := p.nextToken() // consume WHILE 224 cond := p.parseTest() 225 p.consume(COLON) 226 body := p.parseSuite() 227 return &WhileStmt{ 228 While: whilepos, 229 Cond: cond, 230 Body: body, 231 } 232} 233 234// Equivalent to 'exprlist' production in Python grammar. 235// 236// loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA? 237func (p *parser) parseForLoopVariables() Expr { 238 // Avoid parseExpr because it would consume the IN token 239 // following x in "for x in y: ...". 240 v := p.parsePrimaryWithSuffix() 241 if p.tok != COMMA { 242 return v 243 } 244 245 list := []Expr{v} 246 for p.tok == COMMA { 247 p.nextToken() 248 if terminatesExprList(p.tok) { 249 break 250 } 251 list = append(list, p.parsePrimaryWithSuffix()) 252 } 253 return &TupleExpr{List: list} 254} 255 256// simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE 257// In REPL mode, it does not consume the NEWLINE. 258func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt { 259 for { 260 stmts = append(stmts, p.parseSmallStmt()) 261 if p.tok != SEMI { 262 break 263 } 264 p.nextToken() // consume SEMI 265 if p.tok == NEWLINE || p.tok == EOF { 266 break 267 } 268 } 269 // EOF without NEWLINE occurs in `if x: pass`, for example. 270 if p.tok != EOF && consumeNL { 271 p.consume(NEWLINE) 272 } 273 274 return stmts 275} 276 277// small_stmt = RETURN expr? 278// | PASS | BREAK | CONTINUE 279// | LOAD ... 280// | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr // assign 281// | expr 282func (p *parser) parseSmallStmt() Stmt { 283 switch p.tok { 284 case RETURN: 285 pos := p.nextToken() // consume RETURN 286 var result Expr 287 if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI { 288 result = p.parseExpr(false) 289 } 290 return &ReturnStmt{Return: pos, Result: result} 291 292 case BREAK, CONTINUE, PASS: 293 tok := p.tok 294 pos := p.nextToken() // consume it 295 return &BranchStmt{Token: tok, TokenPos: pos} 296 297 case LOAD: 298 return p.parseLoadStmt() 299 } 300 301 // Assignment 302 x := p.parseExpr(false) 303 switch p.tok { 304 case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ: 305 op := p.tok 306 pos := p.nextToken() // consume op 307 rhs := p.parseExpr(false) 308 return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs} 309 } 310 311 // Expression statement (e.g. function call, doc string). 312 return &ExprStmt{X: x} 313} 314 315// stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')' 316func (p *parser) parseLoadStmt() *LoadStmt { 317 loadPos := p.nextToken() // consume LOAD 318 lparen := p.consume(LPAREN) 319 320 if p.tok != STRING { 321 p.in.errorf(p.in.pos, "first operand of load statement must be a string literal") 322 } 323 module := p.parsePrimary().(*Literal) 324 325 var from, to []*Ident 326 for p.tok != RPAREN && p.tok != EOF { 327 p.consume(COMMA) 328 if p.tok == RPAREN { 329 break // allow trailing comma 330 } 331 switch p.tok { 332 case STRING: 333 // load("module", "id") 334 // To name is same as original. 335 lit := p.parsePrimary().(*Literal) 336 id := &Ident{ 337 NamePos: lit.TokenPos.add(`"`), 338 Name: lit.Value.(string), 339 } 340 to = append(to, id) 341 from = append(from, id) 342 343 case IDENT: 344 // load("module", to="from") 345 id := p.parseIdent() 346 to = append(to, id) 347 if p.tok != EQ { 348 p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name) 349 } 350 p.consume(EQ) 351 if p.tok != STRING { 352 p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name) 353 } 354 lit := p.parsePrimary().(*Literal) 355 from = append(from, &Ident{ 356 NamePos: lit.TokenPos.add(`"`), 357 Name: lit.Value.(string), 358 }) 359 360 case RPAREN: 361 p.in.errorf(p.in.pos, "trailing comma in load statement") 362 363 default: 364 p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok) 365 } 366 } 367 rparen := p.consume(RPAREN) 368 369 if len(to) == 0 { 370 p.in.errorf(lparen, "load statement must import at least 1 symbol") 371 } 372 return &LoadStmt{ 373 Load: loadPos, 374 Module: module, 375 To: to, 376 From: from, 377 Rparen: rparen, 378 } 379} 380 381// suite is typically what follows a COLON (e.g. after DEF or FOR). 382// suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT 383func (p *parser) parseSuite() []Stmt { 384 if p.tok == NEWLINE { 385 p.nextToken() // consume NEWLINE 386 p.consume(INDENT) 387 var stmts []Stmt 388 for p.tok != OUTDENT && p.tok != EOF { 389 stmts = p.parseStmt(stmts) 390 } 391 p.consume(OUTDENT) 392 return stmts 393 } 394 395 return p.parseSimpleStmt(nil, true) 396} 397 398func (p *parser) parseIdent() *Ident { 399 if p.tok != IDENT { 400 p.in.error(p.in.pos, "not an identifier") 401 } 402 id := &Ident{ 403 NamePos: p.tokval.pos, 404 Name: p.tokval.raw, 405 } 406 p.nextToken() 407 return id 408} 409 410func (p *parser) consume(t Token) Position { 411 if p.tok != t { 412 p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t) 413 } 414 return p.nextToken() 415} 416 417// params = (param COMMA)* param COMMA? 418// | 419// 420// param = IDENT 421// | IDENT EQ test 422// | STAR 423// | STAR IDENT 424// | STARSTAR IDENT 425// 426// parseParams parses a parameter list. The resulting expressions are of the form: 427// 428// *Ident x 429// *Binary{Op: EQ, X: *Ident, Y: Expr} x=y 430// *Unary{Op: STAR} * 431// *Unary{Op: STAR, X: *Ident} *args 432// *Unary{Op: STARSTAR, X: *Ident} **kwargs 433func (p *parser) parseParams() []Expr { 434 var params []Expr 435 for p.tok != RPAREN && p.tok != COLON && p.tok != EOF { 436 if len(params) > 0 { 437 p.consume(COMMA) 438 } 439 if p.tok == RPAREN { 440 break 441 } 442 443 // * or *args or **kwargs 444 if p.tok == STAR || p.tok == STARSTAR { 445 op := p.tok 446 pos := p.nextToken() 447 var x Expr 448 if op == STARSTAR || p.tok == IDENT { 449 x = p.parseIdent() 450 } 451 params = append(params, &UnaryExpr{ 452 OpPos: pos, 453 Op: op, 454 X: x, 455 }) 456 continue 457 } 458 459 // IDENT 460 // IDENT = test 461 id := p.parseIdent() 462 if p.tok == EQ { // default value 463 eq := p.nextToken() 464 dflt := p.parseTest() 465 params = append(params, &BinaryExpr{ 466 X: id, 467 OpPos: eq, 468 Op: EQ, 469 Y: dflt, 470 }) 471 continue 472 } 473 474 params = append(params, id) 475 } 476 return params 477} 478 479// parseExpr parses an expression, possible consisting of a 480// comma-separated list of 'test' expressions. 481// 482// In many cases we must use parseTest to avoid ambiguity such as 483// f(x, y) vs. f((x, y)). 484func (p *parser) parseExpr(inParens bool) Expr { 485 x := p.parseTest() 486 if p.tok != COMMA { 487 return x 488 } 489 490 // tuple 491 exprs := p.parseExprs([]Expr{x}, inParens) 492 return &TupleExpr{List: exprs} 493} 494 495// parseExprs parses a comma-separated list of expressions, starting with the comma. 496// It is used to parse tuples and list elements. 497// expr_list = (',' expr)* ','? 498func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr { 499 for p.tok == COMMA { 500 pos := p.nextToken() 501 if terminatesExprList(p.tok) { 502 if !allowTrailingComma { 503 p.in.error(pos, "unparenthesized tuple with trailing comma") 504 } 505 break 506 } 507 exprs = append(exprs, p.parseTest()) 508 } 509 return exprs 510} 511 512// parseTest parses a 'test', a single-component expression. 513func (p *parser) parseTest() Expr { 514 if p.tok == LAMBDA { 515 return p.parseLambda(true) 516 } 517 518 x := p.parseTestPrec(0) 519 520 // conditional expression (t IF cond ELSE f) 521 if p.tok == IF { 522 ifpos := p.nextToken() 523 cond := p.parseTestPrec(0) 524 if p.tok != ELSE { 525 p.in.error(ifpos, "conditional expression without else clause") 526 } 527 elsepos := p.nextToken() 528 else_ := p.parseTest() 529 return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_} 530 } 531 532 return x 533} 534 535// parseTestNoCond parses a a single-component expression without 536// consuming a trailing 'if expr else expr'. 537func (p *parser) parseTestNoCond() Expr { 538 if p.tok == LAMBDA { 539 return p.parseLambda(false) 540 } 541 return p.parseTestPrec(0) 542} 543 544// parseLambda parses a lambda expression. 545// The allowCond flag allows the body to be an 'a if b else c' conditional. 546func (p *parser) parseLambda(allowCond bool) Expr { 547 lambda := p.nextToken() 548 var params []Expr 549 if p.tok != COLON { 550 params = p.parseParams() 551 } 552 p.consume(COLON) 553 554 var body Expr 555 if allowCond { 556 body = p.parseTest() 557 } else { 558 body = p.parseTestNoCond() 559 } 560 561 return &LambdaExpr{ 562 Lambda: lambda, 563 Params: params, 564 Body: body, 565 } 566} 567 568func (p *parser) parseTestPrec(prec int) Expr { 569 if prec >= len(preclevels) { 570 return p.parsePrimaryWithSuffix() 571 } 572 573 // expr = NOT expr 574 if p.tok == NOT && prec == int(precedence[NOT]) { 575 pos := p.nextToken() 576 x := p.parseTestPrec(prec) 577 return &UnaryExpr{ 578 OpPos: pos, 579 Op: NOT, 580 X: x, 581 } 582 } 583 584 return p.parseBinopExpr(prec) 585} 586 587// expr = test (OP test)* 588// Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing. 589func (p *parser) parseBinopExpr(prec int) Expr { 590 x := p.parseTestPrec(prec + 1) 591 for first := true; ; first = false { 592 if p.tok == NOT { 593 p.nextToken() // consume NOT 594 // In this context, NOT must be followed by IN. 595 // Replace NOT IN by a single NOT_IN token. 596 if p.tok != IN { 597 p.in.errorf(p.in.pos, "got %#v, want in", p.tok) 598 } 599 p.tok = NOT_IN 600 } 601 602 // Binary operator of specified precedence? 603 opprec := int(precedence[p.tok]) 604 if opprec < prec { 605 return x 606 } 607 608 // Comparisons are non-associative. 609 if !first && opprec == int(precedence[EQL]) { 610 p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)", 611 x.(*BinaryExpr).Op, p.tok) 612 } 613 614 op := p.tok 615 pos := p.nextToken() 616 y := p.parseTestPrec(opprec + 1) 617 x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y} 618 } 619} 620 621// precedence maps each operator to its precedence (0-7), or -1 for other tokens. 622var precedence [maxToken]int8 623 624// preclevels groups operators of equal precedence. 625// Comparisons are nonassociative; other binary operators associate to the left. 626// Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary. 627// See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators 628var preclevels = [...][]Token{ 629 {OR}, // or 630 {AND}, // and 631 {NOT}, // not (unary) 632 {EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in 633 {PIPE}, // | 634 {CIRCUMFLEX}, // ^ 635 {AMP}, // & 636 {LTLT, GTGT}, // << >> 637 {MINUS, PLUS}, // - 638 {STAR, PERCENT, SLASH, SLASHSLASH}, // * % / // 639} 640 641func init() { 642 // populate precedence table 643 for i := range precedence { 644 precedence[i] = -1 645 } 646 for level, tokens := range preclevels { 647 for _, tok := range tokens { 648 precedence[tok] = int8(level) 649 } 650 } 651} 652 653// primary_with_suffix = primary 654// | primary '.' IDENT 655// | primary slice_suffix 656// | primary call_suffix 657func (p *parser) parsePrimaryWithSuffix() Expr { 658 x := p.parsePrimary() 659 for { 660 switch p.tok { 661 case DOT: 662 dot := p.nextToken() 663 id := p.parseIdent() 664 x = &DotExpr{Dot: dot, X: x, Name: id} 665 case LBRACK: 666 x = p.parseSliceSuffix(x) 667 case LPAREN: 668 x = p.parseCallSuffix(x) 669 default: 670 return x 671 } 672 } 673} 674 675// slice_suffix = '[' expr? ':' expr? ':' expr? ']' 676func (p *parser) parseSliceSuffix(x Expr) Expr { 677 lbrack := p.nextToken() 678 var lo, hi, step Expr 679 if p.tok != COLON { 680 y := p.parseExpr(false) 681 682 // index x[y] 683 if p.tok == RBRACK { 684 rbrack := p.nextToken() 685 return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack} 686 } 687 688 lo = y 689 } 690 691 // slice or substring x[lo:hi:step] 692 if p.tok == COLON { 693 p.nextToken() 694 if p.tok != COLON && p.tok != RBRACK { 695 hi = p.parseTest() 696 } 697 } 698 if p.tok == COLON { 699 p.nextToken() 700 if p.tok != RBRACK { 701 step = p.parseTest() 702 } 703 } 704 rbrack := p.consume(RBRACK) 705 return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack} 706} 707 708// call_suffix = '(' arg_list? ')' 709func (p *parser) parseCallSuffix(fn Expr) Expr { 710 lparen := p.consume(LPAREN) 711 var rparen Position 712 var args []Expr 713 if p.tok == RPAREN { 714 rparen = p.nextToken() 715 } else { 716 args = p.parseArgs() 717 rparen = p.consume(RPAREN) 718 } 719 return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen} 720} 721 722// parseArgs parses a list of actual parameter values (arguments). 723// It mirrors the structure of parseParams. 724// arg_list = ((arg COMMA)* arg COMMA?)? 725func (p *parser) parseArgs() []Expr { 726 var args []Expr 727 for p.tok != RPAREN && p.tok != EOF { 728 if len(args) > 0 { 729 p.consume(COMMA) 730 } 731 if p.tok == RPAREN { 732 break 733 } 734 735 // *args or **kwargs 736 if p.tok == STAR || p.tok == STARSTAR { 737 op := p.tok 738 pos := p.nextToken() 739 x := p.parseTest() 740 args = append(args, &UnaryExpr{ 741 OpPos: pos, 742 Op: op, 743 X: x, 744 }) 745 continue 746 } 747 748 // We use a different strategy from Bazel here to stay within LL(1). 749 // Instead of looking ahead two tokens (IDENT, EQ) we parse 750 // 'test = test' then check that the first was an IDENT. 751 x := p.parseTest() 752 753 if p.tok == EQ { 754 // name = value 755 if _, ok := x.(*Ident); !ok { 756 p.in.errorf(p.in.pos, "keyword argument must have form name=expr") 757 } 758 eq := p.nextToken() 759 y := p.parseTest() 760 x = &BinaryExpr{ 761 X: x, 762 OpPos: eq, 763 Op: EQ, 764 Y: y, 765 } 766 } 767 768 args = append(args, x) 769 } 770 return args 771} 772 773// primary = IDENT 774// | INT | FLOAT | STRING | BYTES 775// | '[' ... // list literal or comprehension 776// | '{' ... // dict literal or comprehension 777// | '(' ... // tuple or parenthesized expression 778// | ('-'|'+'|'~') primary_with_suffix 779func (p *parser) parsePrimary() Expr { 780 switch p.tok { 781 case IDENT: 782 return p.parseIdent() 783 784 case INT, FLOAT, STRING, BYTES: 785 var val interface{} 786 tok := p.tok 787 switch tok { 788 case INT: 789 if p.tokval.bigInt != nil { 790 val = p.tokval.bigInt 791 } else { 792 val = p.tokval.int 793 } 794 case FLOAT: 795 val = p.tokval.float 796 case STRING, BYTES: 797 val = p.tokval.string 798 } 799 raw := p.tokval.raw 800 pos := p.nextToken() 801 return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val} 802 803 case LBRACK: 804 return p.parseList() 805 806 case LBRACE: 807 return p.parseDict() 808 809 case LPAREN: 810 lparen := p.nextToken() 811 if p.tok == RPAREN { 812 // empty tuple 813 rparen := p.nextToken() 814 return &TupleExpr{Lparen: lparen, Rparen: rparen} 815 } 816 e := p.parseExpr(true) // allow trailing comma 817 rparen := p.consume(RPAREN) 818 return &ParenExpr{ 819 Lparen: lparen, 820 X: e, 821 Rparen: rparen, 822 } 823 824 case MINUS, PLUS, TILDE: // unary 825 tok := p.tok 826 pos := p.nextToken() 827 x := p.parsePrimaryWithSuffix() 828 return &UnaryExpr{ 829 OpPos: pos, 830 Op: tok, 831 X: x, 832 } 833 } 834 p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok) 835 panic("unreachable") 836} 837 838// list = '[' ']' 839// | '[' expr ']' 840// | '[' expr expr_list ']' 841// | '[' expr (FOR loop_variables IN expr)+ ']' 842func (p *parser) parseList() Expr { 843 lbrack := p.nextToken() 844 if p.tok == RBRACK { 845 // empty List 846 rbrack := p.nextToken() 847 return &ListExpr{Lbrack: lbrack, Rbrack: rbrack} 848 } 849 850 x := p.parseTest() 851 852 if p.tok == FOR { 853 // list comprehension 854 return p.parseComprehensionSuffix(lbrack, x, RBRACK) 855 } 856 857 exprs := []Expr{x} 858 if p.tok == COMMA { 859 // multi-item list literal 860 exprs = p.parseExprs(exprs, true) // allow trailing comma 861 } 862 863 rbrack := p.consume(RBRACK) 864 return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack} 865} 866 867// dict = '{' '}' 868// | '{' dict_entry_list '}' 869// | '{' dict_entry FOR loop_variables IN expr '}' 870func (p *parser) parseDict() Expr { 871 lbrace := p.nextToken() 872 if p.tok == RBRACE { 873 // empty dict 874 rbrace := p.nextToken() 875 return &DictExpr{Lbrace: lbrace, Rbrace: rbrace} 876 } 877 878 x := p.parseDictEntry() 879 880 if p.tok == FOR { 881 // dict comprehension 882 return p.parseComprehensionSuffix(lbrace, x, RBRACE) 883 } 884 885 entries := []Expr{x} 886 for p.tok == COMMA { 887 p.nextToken() 888 if p.tok == RBRACE { 889 break 890 } 891 entries = append(entries, p.parseDictEntry()) 892 } 893 894 rbrace := p.consume(RBRACE) 895 return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace} 896} 897 898// dict_entry = test ':' test 899func (p *parser) parseDictEntry() *DictEntry { 900 k := p.parseTest() 901 colon := p.consume(COLON) 902 v := p.parseTest() 903 return &DictEntry{Key: k, Colon: colon, Value: v} 904} 905 906// comp_suffix = FOR loopvars IN expr comp_suffix 907// | IF expr comp_suffix 908// | ']' or ')' (end) 909// 910// There can be multiple FOR/IF clauses; the first is always a FOR. 911func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr { 912 var clauses []Node 913 for p.tok != endBrace { 914 if p.tok == FOR { 915 pos := p.nextToken() 916 vars := p.parseForLoopVariables() 917 in := p.consume(IN) 918 // Following Python 3, the operand of IN cannot be: 919 // - a conditional expression ('x if y else z'), 920 // due to conflicts in Python grammar 921 // ('if' is used by the comprehension); 922 // - a lambda expression 923 // - an unparenthesized tuple. 924 x := p.parseTestPrec(0) 925 clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x}) 926 } else if p.tok == IF { 927 pos := p.nextToken() 928 cond := p.parseTestNoCond() 929 clauses = append(clauses, &IfClause{If: pos, Cond: cond}) 930 } else { 931 p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace) 932 } 933 } 934 rbrace := p.nextToken() 935 936 return &Comprehension{ 937 Curly: endBrace == RBRACE, 938 Lbrack: lbrace, 939 Body: body, 940 Clauses: clauses, 941 Rbrack: rbrace, 942 } 943} 944 945func terminatesExprList(tok Token) bool { 946 switch tok { 947 case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI: 948 return true 949 } 950 return false 951} 952 953// Comment assignment. 954// We build two lists of all subnodes, preorder and postorder. 955// The preorder list is ordered by start location, with outer nodes first. 956// The postorder list is ordered by end location, with outer nodes last. 957// We use the preorder list to assign each whole-line comment to the syntax 958// immediately following it, and we use the postorder list to assign each 959// end-of-line comment to the syntax immediately preceding it. 960 961// flattenAST returns the list of AST nodes, both in prefix order and in postfix 962// order. 963func flattenAST(root Node) (pre, post []Node) { 964 stack := []Node{} 965 Walk(root, func(n Node) bool { 966 if n != nil { 967 pre = append(pre, n) 968 stack = append(stack, n) 969 } else { 970 post = append(post, stack[len(stack)-1]) 971 stack = stack[:len(stack)-1] 972 } 973 return true 974 }) 975 return pre, post 976} 977 978// assignComments attaches comments to nearby syntax. 979func (p *parser) assignComments(n Node) { 980 // Leave early if there are no comments 981 if len(p.in.lineComments)+len(p.in.suffixComments) == 0 { 982 return 983 } 984 985 pre, post := flattenAST(n) 986 987 // Assign line comments to syntax immediately following. 988 line := p.in.lineComments 989 for _, x := range pre { 990 start, _ := x.Span() 991 992 switch x.(type) { 993 case *File: 994 continue 995 } 996 997 for len(line) > 0 && !start.isBefore(line[0].Start) { 998 x.AllocComments() 999 x.Comments().Before = append(x.Comments().Before, line[0]) 1000 line = line[1:] 1001 } 1002 } 1003 1004 // Remaining line comments go at end of file. 1005 if len(line) > 0 { 1006 n.AllocComments() 1007 n.Comments().After = append(n.Comments().After, line...) 1008 } 1009 1010 // Assign suffix comments to syntax immediately before. 1011 suffix := p.in.suffixComments 1012 for i := len(post) - 1; i >= 0; i-- { 1013 x := post[i] 1014 1015 // Do not assign suffix comments to file 1016 switch x.(type) { 1017 case *File: 1018 continue 1019 } 1020 1021 _, end := x.Span() 1022 if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) { 1023 x.AllocComments() 1024 x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1]) 1025 suffix = suffix[:len(suffix)-1] 1026 } 1027 } 1028} 1029