xref: /aosp_15_r20/external/starlark-go/syntax/parse.go (revision 4947cdc739c985f6d86941e22894f5cefe7c9e9a)
1// Copyright 2017 The Bazel Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package syntax
6
7// This file defines a recursive-descent parser for Starlark.
8// The LL(1) grammar of Starlark and the names of many productions follow Python 2.7.
9//
10// TODO(adonovan): use syntax.Error more systematically throughout the
11// package.  Verify that error positions are correct using the
12// chunkedfile mechanism.
13
14import "log"
15
16// Enable this flag to print the token stream and log.Fatal on the first error.
17const debug = false
18
19// A Mode value is a set of flags (or 0) that controls optional parser functionality.
20type Mode uint
21
22const (
23	RetainComments Mode = 1 << iota // retain comments in AST; see Node.Comments
24)
25
26// Parse parses the input data and returns the corresponding parse tree.
27//
28// If src != nil, ParseFile parses the source from src and the filename
29// is only used when recording position information.
30// The type of the argument for the src parameter must be string,
31// []byte, io.Reader, or FilePortion.
32// If src == nil, ParseFile parses the file specified by filename.
33func Parse(filename string, src interface{}, mode Mode) (f *File, err error) {
34	in, err := newScanner(filename, src, mode&RetainComments != 0)
35	if err != nil {
36		return nil, err
37	}
38	p := parser{in: in}
39	defer p.in.recover(&err)
40
41	p.nextToken() // read first lookahead token
42	f = p.parseFile()
43	if f != nil {
44		f.Path = filename
45	}
46	p.assignComments(f)
47	return f, nil
48}
49
50// ParseCompoundStmt parses a single compound statement:
51// a blank line, a def, for, while, or if statement, or a
52// semicolon-separated list of simple statements followed
53// by a newline. These are the units on which the REPL operates.
54// ParseCompoundStmt does not consume any following input.
55// The parser calls the readline function each
56// time it needs a new line of input.
57func ParseCompoundStmt(filename string, readline func() ([]byte, error)) (f *File, err error) {
58	in, err := newScanner(filename, readline, false)
59	if err != nil {
60		return nil, err
61	}
62
63	p := parser{in: in}
64	defer p.in.recover(&err)
65
66	p.nextToken() // read first lookahead token
67
68	var stmts []Stmt
69	switch p.tok {
70	case DEF, IF, FOR, WHILE:
71		stmts = p.parseStmt(stmts)
72	case NEWLINE:
73		// blank line
74	default:
75		stmts = p.parseSimpleStmt(stmts, false)
76		// Require but don't consume newline, to avoid blocking again.
77		if p.tok != NEWLINE {
78			p.in.errorf(p.in.pos, "invalid syntax")
79		}
80	}
81
82	return &File{Path: filename, Stmts: stmts}, nil
83}
84
85// ParseExpr parses a Starlark expression.
86// A comma-separated list of expressions is parsed as a tuple.
87// See Parse for explanation of parameters.
88func ParseExpr(filename string, src interface{}, mode Mode) (expr Expr, err error) {
89	in, err := newScanner(filename, src, mode&RetainComments != 0)
90	if err != nil {
91		return nil, err
92	}
93	p := parser{in: in}
94	defer p.in.recover(&err)
95
96	p.nextToken() // read first lookahead token
97
98	// Use parseExpr, not parseTest, to permit an unparenthesized tuple.
99	expr = p.parseExpr(false)
100
101	// A following newline (e.g. "f()\n") appears outside any brackets,
102	// on a non-blank line, and thus results in a NEWLINE token.
103	if p.tok == NEWLINE {
104		p.nextToken()
105	}
106
107	if p.tok != EOF {
108		p.in.errorf(p.in.pos, "got %#v after expression, want EOF", p.tok)
109	}
110	p.assignComments(expr)
111	return expr, nil
112}
113
114type parser struct {
115	in     *scanner
116	tok    Token
117	tokval tokenValue
118}
119
120// nextToken advances the scanner and returns the position of the
121// previous token.
122func (p *parser) nextToken() Position {
123	oldpos := p.tokval.pos
124	p.tok = p.in.nextToken(&p.tokval)
125	// enable to see the token stream
126	if debug {
127		log.Printf("nextToken: %-20s%+v\n", p.tok, p.tokval.pos)
128	}
129	return oldpos
130}
131
132// file_input = (NEWLINE | stmt)* EOF
133func (p *parser) parseFile() *File {
134	var stmts []Stmt
135	for p.tok != EOF {
136		if p.tok == NEWLINE {
137			p.nextToken()
138			continue
139		}
140		stmts = p.parseStmt(stmts)
141	}
142	return &File{Stmts: stmts}
143}
144
145func (p *parser) parseStmt(stmts []Stmt) []Stmt {
146	if p.tok == DEF {
147		return append(stmts, p.parseDefStmt())
148	} else if p.tok == IF {
149		return append(stmts, p.parseIfStmt())
150	} else if p.tok == FOR {
151		return append(stmts, p.parseForStmt())
152	} else if p.tok == WHILE {
153		return append(stmts, p.parseWhileStmt())
154	}
155	return p.parseSimpleStmt(stmts, true)
156}
157
158func (p *parser) parseDefStmt() Stmt {
159	defpos := p.nextToken() // consume DEF
160	id := p.parseIdent()
161	p.consume(LPAREN)
162	params := p.parseParams()
163	p.consume(RPAREN)
164	p.consume(COLON)
165	body := p.parseSuite()
166	return &DefStmt{
167		Def:    defpos,
168		Name:   id,
169		Params: params,
170		Body:   body,
171	}
172}
173
174func (p *parser) parseIfStmt() Stmt {
175	ifpos := p.nextToken() // consume IF
176	cond := p.parseTest()
177	p.consume(COLON)
178	body := p.parseSuite()
179	ifStmt := &IfStmt{
180		If:   ifpos,
181		Cond: cond,
182		True: body,
183	}
184	tail := ifStmt
185	for p.tok == ELIF {
186		elifpos := p.nextToken() // consume ELIF
187		cond := p.parseTest()
188		p.consume(COLON)
189		body := p.parseSuite()
190		elif := &IfStmt{
191			If:   elifpos,
192			Cond: cond,
193			True: body,
194		}
195		tail.ElsePos = elifpos
196		tail.False = []Stmt{elif}
197		tail = elif
198	}
199	if p.tok == ELSE {
200		tail.ElsePos = p.nextToken() // consume ELSE
201		p.consume(COLON)
202		tail.False = p.parseSuite()
203	}
204	return ifStmt
205}
206
207func (p *parser) parseForStmt() Stmt {
208	forpos := p.nextToken() // consume FOR
209	vars := p.parseForLoopVariables()
210	p.consume(IN)
211	x := p.parseExpr(false)
212	p.consume(COLON)
213	body := p.parseSuite()
214	return &ForStmt{
215		For:  forpos,
216		Vars: vars,
217		X:    x,
218		Body: body,
219	}
220}
221
222func (p *parser) parseWhileStmt() Stmt {
223	whilepos := p.nextToken() // consume WHILE
224	cond := p.parseTest()
225	p.consume(COLON)
226	body := p.parseSuite()
227	return &WhileStmt{
228		While: whilepos,
229		Cond:  cond,
230		Body:  body,
231	}
232}
233
234// Equivalent to 'exprlist' production in Python grammar.
235//
236// loop_variables = primary_with_suffix (COMMA primary_with_suffix)* COMMA?
237func (p *parser) parseForLoopVariables() Expr {
238	// Avoid parseExpr because it would consume the IN token
239	// following x in "for x in y: ...".
240	v := p.parsePrimaryWithSuffix()
241	if p.tok != COMMA {
242		return v
243	}
244
245	list := []Expr{v}
246	for p.tok == COMMA {
247		p.nextToken()
248		if terminatesExprList(p.tok) {
249			break
250		}
251		list = append(list, p.parsePrimaryWithSuffix())
252	}
253	return &TupleExpr{List: list}
254}
255
256// simple_stmt = small_stmt (SEMI small_stmt)* SEMI? NEWLINE
257// In REPL mode, it does not consume the NEWLINE.
258func (p *parser) parseSimpleStmt(stmts []Stmt, consumeNL bool) []Stmt {
259	for {
260		stmts = append(stmts, p.parseSmallStmt())
261		if p.tok != SEMI {
262			break
263		}
264		p.nextToken() // consume SEMI
265		if p.tok == NEWLINE || p.tok == EOF {
266			break
267		}
268	}
269	// EOF without NEWLINE occurs in `if x: pass`, for example.
270	if p.tok != EOF && consumeNL {
271		p.consume(NEWLINE)
272	}
273
274	return stmts
275}
276
277// small_stmt = RETURN expr?
278//            | PASS | BREAK | CONTINUE
279//            | LOAD ...
280//            | expr ('=' | '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=') expr   // assign
281//            | expr
282func (p *parser) parseSmallStmt() Stmt {
283	switch p.tok {
284	case RETURN:
285		pos := p.nextToken() // consume RETURN
286		var result Expr
287		if p.tok != EOF && p.tok != NEWLINE && p.tok != SEMI {
288			result = p.parseExpr(false)
289		}
290		return &ReturnStmt{Return: pos, Result: result}
291
292	case BREAK, CONTINUE, PASS:
293		tok := p.tok
294		pos := p.nextToken() // consume it
295		return &BranchStmt{Token: tok, TokenPos: pos}
296
297	case LOAD:
298		return p.parseLoadStmt()
299	}
300
301	// Assignment
302	x := p.parseExpr(false)
303	switch p.tok {
304	case EQ, PLUS_EQ, MINUS_EQ, STAR_EQ, SLASH_EQ, SLASHSLASH_EQ, PERCENT_EQ, AMP_EQ, PIPE_EQ, CIRCUMFLEX_EQ, LTLT_EQ, GTGT_EQ:
305		op := p.tok
306		pos := p.nextToken() // consume op
307		rhs := p.parseExpr(false)
308		return &AssignStmt{OpPos: pos, Op: op, LHS: x, RHS: rhs}
309	}
310
311	// Expression statement (e.g. function call, doc string).
312	return &ExprStmt{X: x}
313}
314
315// stmt = LOAD '(' STRING {',' (IDENT '=')? STRING} [','] ')'
316func (p *parser) parseLoadStmt() *LoadStmt {
317	loadPos := p.nextToken() // consume LOAD
318	lparen := p.consume(LPAREN)
319
320	if p.tok != STRING {
321		p.in.errorf(p.in.pos, "first operand of load statement must be a string literal")
322	}
323	module := p.parsePrimary().(*Literal)
324
325	var from, to []*Ident
326	for p.tok != RPAREN && p.tok != EOF {
327		p.consume(COMMA)
328		if p.tok == RPAREN {
329			break // allow trailing comma
330		}
331		switch p.tok {
332		case STRING:
333			// load("module", "id")
334			// To name is same as original.
335			lit := p.parsePrimary().(*Literal)
336			id := &Ident{
337				NamePos: lit.TokenPos.add(`"`),
338				Name:    lit.Value.(string),
339			}
340			to = append(to, id)
341			from = append(from, id)
342
343		case IDENT:
344			// load("module", to="from")
345			id := p.parseIdent()
346			to = append(to, id)
347			if p.tok != EQ {
348				p.in.errorf(p.in.pos, `load operand must be "%[1]s" or %[1]s="originalname" (want '=' after %[1]s)`, id.Name)
349			}
350			p.consume(EQ)
351			if p.tok != STRING {
352				p.in.errorf(p.in.pos, `original name of loaded symbol must be quoted: %s="originalname"`, id.Name)
353			}
354			lit := p.parsePrimary().(*Literal)
355			from = append(from, &Ident{
356				NamePos: lit.TokenPos.add(`"`),
357				Name:    lit.Value.(string),
358			})
359
360		case RPAREN:
361			p.in.errorf(p.in.pos, "trailing comma in load statement")
362
363		default:
364			p.in.errorf(p.in.pos, `load operand must be "name" or localname="name" (got %#v)`, p.tok)
365		}
366	}
367	rparen := p.consume(RPAREN)
368
369	if len(to) == 0 {
370		p.in.errorf(lparen, "load statement must import at least 1 symbol")
371	}
372	return &LoadStmt{
373		Load:   loadPos,
374		Module: module,
375		To:     to,
376		From:   from,
377		Rparen: rparen,
378	}
379}
380
381// suite is typically what follows a COLON (e.g. after DEF or FOR).
382// suite = simple_stmt | NEWLINE INDENT stmt+ OUTDENT
383func (p *parser) parseSuite() []Stmt {
384	if p.tok == NEWLINE {
385		p.nextToken() // consume NEWLINE
386		p.consume(INDENT)
387		var stmts []Stmt
388		for p.tok != OUTDENT && p.tok != EOF {
389			stmts = p.parseStmt(stmts)
390		}
391		p.consume(OUTDENT)
392		return stmts
393	}
394
395	return p.parseSimpleStmt(nil, true)
396}
397
398func (p *parser) parseIdent() *Ident {
399	if p.tok != IDENT {
400		p.in.error(p.in.pos, "not an identifier")
401	}
402	id := &Ident{
403		NamePos: p.tokval.pos,
404		Name:    p.tokval.raw,
405	}
406	p.nextToken()
407	return id
408}
409
410func (p *parser) consume(t Token) Position {
411	if p.tok != t {
412		p.in.errorf(p.in.pos, "got %#v, want %#v", p.tok, t)
413	}
414	return p.nextToken()
415}
416
417// params = (param COMMA)* param COMMA?
418//        |
419//
420// param = IDENT
421//       | IDENT EQ test
422//       | STAR
423//       | STAR IDENT
424//       | STARSTAR IDENT
425//
426// parseParams parses a parameter list.  The resulting expressions are of the form:
427//
428//      *Ident                                          x
429//      *Binary{Op: EQ, X: *Ident, Y: Expr}             x=y
430//      *Unary{Op: STAR}                                *
431//      *Unary{Op: STAR, X: *Ident}                     *args
432//      *Unary{Op: STARSTAR, X: *Ident}                 **kwargs
433func (p *parser) parseParams() []Expr {
434	var params []Expr
435	for p.tok != RPAREN && p.tok != COLON && p.tok != EOF {
436		if len(params) > 0 {
437			p.consume(COMMA)
438		}
439		if p.tok == RPAREN {
440			break
441		}
442
443		// * or *args or **kwargs
444		if p.tok == STAR || p.tok == STARSTAR {
445			op := p.tok
446			pos := p.nextToken()
447			var x Expr
448			if op == STARSTAR || p.tok == IDENT {
449				x = p.parseIdent()
450			}
451			params = append(params, &UnaryExpr{
452				OpPos: pos,
453				Op:    op,
454				X:     x,
455			})
456			continue
457		}
458
459		// IDENT
460		// IDENT = test
461		id := p.parseIdent()
462		if p.tok == EQ { // default value
463			eq := p.nextToken()
464			dflt := p.parseTest()
465			params = append(params, &BinaryExpr{
466				X:     id,
467				OpPos: eq,
468				Op:    EQ,
469				Y:     dflt,
470			})
471			continue
472		}
473
474		params = append(params, id)
475	}
476	return params
477}
478
479// parseExpr parses an expression, possible consisting of a
480// comma-separated list of 'test' expressions.
481//
482// In many cases we must use parseTest to avoid ambiguity such as
483// f(x, y) vs. f((x, y)).
484func (p *parser) parseExpr(inParens bool) Expr {
485	x := p.parseTest()
486	if p.tok != COMMA {
487		return x
488	}
489
490	// tuple
491	exprs := p.parseExprs([]Expr{x}, inParens)
492	return &TupleExpr{List: exprs}
493}
494
495// parseExprs parses a comma-separated list of expressions, starting with the comma.
496// It is used to parse tuples and list elements.
497// expr_list = (',' expr)* ','?
498func (p *parser) parseExprs(exprs []Expr, allowTrailingComma bool) []Expr {
499	for p.tok == COMMA {
500		pos := p.nextToken()
501		if terminatesExprList(p.tok) {
502			if !allowTrailingComma {
503				p.in.error(pos, "unparenthesized tuple with trailing comma")
504			}
505			break
506		}
507		exprs = append(exprs, p.parseTest())
508	}
509	return exprs
510}
511
512// parseTest parses a 'test', a single-component expression.
513func (p *parser) parseTest() Expr {
514	if p.tok == LAMBDA {
515		return p.parseLambda(true)
516	}
517
518	x := p.parseTestPrec(0)
519
520	// conditional expression (t IF cond ELSE f)
521	if p.tok == IF {
522		ifpos := p.nextToken()
523		cond := p.parseTestPrec(0)
524		if p.tok != ELSE {
525			p.in.error(ifpos, "conditional expression without else clause")
526		}
527		elsepos := p.nextToken()
528		else_ := p.parseTest()
529		return &CondExpr{If: ifpos, Cond: cond, True: x, ElsePos: elsepos, False: else_}
530	}
531
532	return x
533}
534
535// parseTestNoCond parses a a single-component expression without
536// consuming a trailing 'if expr else expr'.
537func (p *parser) parseTestNoCond() Expr {
538	if p.tok == LAMBDA {
539		return p.parseLambda(false)
540	}
541	return p.parseTestPrec(0)
542}
543
544// parseLambda parses a lambda expression.
545// The allowCond flag allows the body to be an 'a if b else c' conditional.
546func (p *parser) parseLambda(allowCond bool) Expr {
547	lambda := p.nextToken()
548	var params []Expr
549	if p.tok != COLON {
550		params = p.parseParams()
551	}
552	p.consume(COLON)
553
554	var body Expr
555	if allowCond {
556		body = p.parseTest()
557	} else {
558		body = p.parseTestNoCond()
559	}
560
561	return &LambdaExpr{
562		Lambda: lambda,
563		Params: params,
564		Body:   body,
565	}
566}
567
568func (p *parser) parseTestPrec(prec int) Expr {
569	if prec >= len(preclevels) {
570		return p.parsePrimaryWithSuffix()
571	}
572
573	// expr = NOT expr
574	if p.tok == NOT && prec == int(precedence[NOT]) {
575		pos := p.nextToken()
576		x := p.parseTestPrec(prec)
577		return &UnaryExpr{
578			OpPos: pos,
579			Op:    NOT,
580			X:     x,
581		}
582	}
583
584	return p.parseBinopExpr(prec)
585}
586
587// expr = test (OP test)*
588// Uses precedence climbing; see http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm#climbing.
589func (p *parser) parseBinopExpr(prec int) Expr {
590	x := p.parseTestPrec(prec + 1)
591	for first := true; ; first = false {
592		if p.tok == NOT {
593			p.nextToken() // consume NOT
594			// In this context, NOT must be followed by IN.
595			// Replace NOT IN by a single NOT_IN token.
596			if p.tok != IN {
597				p.in.errorf(p.in.pos, "got %#v, want in", p.tok)
598			}
599			p.tok = NOT_IN
600		}
601
602		// Binary operator of specified precedence?
603		opprec := int(precedence[p.tok])
604		if opprec < prec {
605			return x
606		}
607
608		// Comparisons are non-associative.
609		if !first && opprec == int(precedence[EQL]) {
610			p.in.errorf(p.in.pos, "%s does not associate with %s (use parens)",
611				x.(*BinaryExpr).Op, p.tok)
612		}
613
614		op := p.tok
615		pos := p.nextToken()
616		y := p.parseTestPrec(opprec + 1)
617		x = &BinaryExpr{OpPos: pos, Op: op, X: x, Y: y}
618	}
619}
620
621// precedence maps each operator to its precedence (0-7), or -1 for other tokens.
622var precedence [maxToken]int8
623
624// preclevels groups operators of equal precedence.
625// Comparisons are nonassociative; other binary operators associate to the left.
626// Unary MINUS, unary PLUS, and TILDE have higher precedence so are handled in parsePrimary.
627// See https://github.com/google/starlark-go/blob/master/doc/spec.md#binary-operators
628var preclevels = [...][]Token{
629	{OR},                                   // or
630	{AND},                                  // and
631	{NOT},                                  // not (unary)
632	{EQL, NEQ, LT, GT, LE, GE, IN, NOT_IN}, // == != < > <= >= in not in
633	{PIPE},                                 // |
634	{CIRCUMFLEX},                           // ^
635	{AMP},                                  // &
636	{LTLT, GTGT},                           // << >>
637	{MINUS, PLUS},                          // -
638	{STAR, PERCENT, SLASH, SLASHSLASH},     // * % / //
639}
640
641func init() {
642	// populate precedence table
643	for i := range precedence {
644		precedence[i] = -1
645	}
646	for level, tokens := range preclevels {
647		for _, tok := range tokens {
648			precedence[tok] = int8(level)
649		}
650	}
651}
652
653// primary_with_suffix = primary
654//                     | primary '.' IDENT
655//                     | primary slice_suffix
656//                     | primary call_suffix
657func (p *parser) parsePrimaryWithSuffix() Expr {
658	x := p.parsePrimary()
659	for {
660		switch p.tok {
661		case DOT:
662			dot := p.nextToken()
663			id := p.parseIdent()
664			x = &DotExpr{Dot: dot, X: x, Name: id}
665		case LBRACK:
666			x = p.parseSliceSuffix(x)
667		case LPAREN:
668			x = p.parseCallSuffix(x)
669		default:
670			return x
671		}
672	}
673}
674
675// slice_suffix = '[' expr? ':' expr?  ':' expr? ']'
676func (p *parser) parseSliceSuffix(x Expr) Expr {
677	lbrack := p.nextToken()
678	var lo, hi, step Expr
679	if p.tok != COLON {
680		y := p.parseExpr(false)
681
682		// index x[y]
683		if p.tok == RBRACK {
684			rbrack := p.nextToken()
685			return &IndexExpr{X: x, Lbrack: lbrack, Y: y, Rbrack: rbrack}
686		}
687
688		lo = y
689	}
690
691	// slice or substring x[lo:hi:step]
692	if p.tok == COLON {
693		p.nextToken()
694		if p.tok != COLON && p.tok != RBRACK {
695			hi = p.parseTest()
696		}
697	}
698	if p.tok == COLON {
699		p.nextToken()
700		if p.tok != RBRACK {
701			step = p.parseTest()
702		}
703	}
704	rbrack := p.consume(RBRACK)
705	return &SliceExpr{X: x, Lbrack: lbrack, Lo: lo, Hi: hi, Step: step, Rbrack: rbrack}
706}
707
708// call_suffix = '(' arg_list? ')'
709func (p *parser) parseCallSuffix(fn Expr) Expr {
710	lparen := p.consume(LPAREN)
711	var rparen Position
712	var args []Expr
713	if p.tok == RPAREN {
714		rparen = p.nextToken()
715	} else {
716		args = p.parseArgs()
717		rparen = p.consume(RPAREN)
718	}
719	return &CallExpr{Fn: fn, Lparen: lparen, Args: args, Rparen: rparen}
720}
721
722// parseArgs parses a list of actual parameter values (arguments).
723// It mirrors the structure of parseParams.
724// arg_list = ((arg COMMA)* arg COMMA?)?
725func (p *parser) parseArgs() []Expr {
726	var args []Expr
727	for p.tok != RPAREN && p.tok != EOF {
728		if len(args) > 0 {
729			p.consume(COMMA)
730		}
731		if p.tok == RPAREN {
732			break
733		}
734
735		// *args or **kwargs
736		if p.tok == STAR || p.tok == STARSTAR {
737			op := p.tok
738			pos := p.nextToken()
739			x := p.parseTest()
740			args = append(args, &UnaryExpr{
741				OpPos: pos,
742				Op:    op,
743				X:     x,
744			})
745			continue
746		}
747
748		// We use a different strategy from Bazel here to stay within LL(1).
749		// Instead of looking ahead two tokens (IDENT, EQ) we parse
750		// 'test = test' then check that the first was an IDENT.
751		x := p.parseTest()
752
753		if p.tok == EQ {
754			// name = value
755			if _, ok := x.(*Ident); !ok {
756				p.in.errorf(p.in.pos, "keyword argument must have form name=expr")
757			}
758			eq := p.nextToken()
759			y := p.parseTest()
760			x = &BinaryExpr{
761				X:     x,
762				OpPos: eq,
763				Op:    EQ,
764				Y:     y,
765			}
766		}
767
768		args = append(args, x)
769	}
770	return args
771}
772
773//  primary = IDENT
774//          | INT | FLOAT | STRING | BYTES
775//          | '[' ...                    // list literal or comprehension
776//          | '{' ...                    // dict literal or comprehension
777//          | '(' ...                    // tuple or parenthesized expression
778//          | ('-'|'+'|'~') primary_with_suffix
779func (p *parser) parsePrimary() Expr {
780	switch p.tok {
781	case IDENT:
782		return p.parseIdent()
783
784	case INT, FLOAT, STRING, BYTES:
785		var val interface{}
786		tok := p.tok
787		switch tok {
788		case INT:
789			if p.tokval.bigInt != nil {
790				val = p.tokval.bigInt
791			} else {
792				val = p.tokval.int
793			}
794		case FLOAT:
795			val = p.tokval.float
796		case STRING, BYTES:
797			val = p.tokval.string
798		}
799		raw := p.tokval.raw
800		pos := p.nextToken()
801		return &Literal{Token: tok, TokenPos: pos, Raw: raw, Value: val}
802
803	case LBRACK:
804		return p.parseList()
805
806	case LBRACE:
807		return p.parseDict()
808
809	case LPAREN:
810		lparen := p.nextToken()
811		if p.tok == RPAREN {
812			// empty tuple
813			rparen := p.nextToken()
814			return &TupleExpr{Lparen: lparen, Rparen: rparen}
815		}
816		e := p.parseExpr(true) // allow trailing comma
817		rparen := p.consume(RPAREN)
818		return &ParenExpr{
819			Lparen: lparen,
820			X:      e,
821			Rparen: rparen,
822		}
823
824	case MINUS, PLUS, TILDE: // unary
825		tok := p.tok
826		pos := p.nextToken()
827		x := p.parsePrimaryWithSuffix()
828		return &UnaryExpr{
829			OpPos: pos,
830			Op:    tok,
831			X:     x,
832		}
833	}
834	p.in.errorf(p.in.pos, "got %#v, want primary expression", p.tok)
835	panic("unreachable")
836}
837
838// list = '[' ']'
839//      | '[' expr ']'
840//      | '[' expr expr_list ']'
841//      | '[' expr (FOR loop_variables IN expr)+ ']'
842func (p *parser) parseList() Expr {
843	lbrack := p.nextToken()
844	if p.tok == RBRACK {
845		// empty List
846		rbrack := p.nextToken()
847		return &ListExpr{Lbrack: lbrack, Rbrack: rbrack}
848	}
849
850	x := p.parseTest()
851
852	if p.tok == FOR {
853		// list comprehension
854		return p.parseComprehensionSuffix(lbrack, x, RBRACK)
855	}
856
857	exprs := []Expr{x}
858	if p.tok == COMMA {
859		// multi-item list literal
860		exprs = p.parseExprs(exprs, true) // allow trailing comma
861	}
862
863	rbrack := p.consume(RBRACK)
864	return &ListExpr{Lbrack: lbrack, List: exprs, Rbrack: rbrack}
865}
866
867// dict = '{' '}'
868//      | '{' dict_entry_list '}'
869//      | '{' dict_entry FOR loop_variables IN expr '}'
870func (p *parser) parseDict() Expr {
871	lbrace := p.nextToken()
872	if p.tok == RBRACE {
873		// empty dict
874		rbrace := p.nextToken()
875		return &DictExpr{Lbrace: lbrace, Rbrace: rbrace}
876	}
877
878	x := p.parseDictEntry()
879
880	if p.tok == FOR {
881		// dict comprehension
882		return p.parseComprehensionSuffix(lbrace, x, RBRACE)
883	}
884
885	entries := []Expr{x}
886	for p.tok == COMMA {
887		p.nextToken()
888		if p.tok == RBRACE {
889			break
890		}
891		entries = append(entries, p.parseDictEntry())
892	}
893
894	rbrace := p.consume(RBRACE)
895	return &DictExpr{Lbrace: lbrace, List: entries, Rbrace: rbrace}
896}
897
898// dict_entry = test ':' test
899func (p *parser) parseDictEntry() *DictEntry {
900	k := p.parseTest()
901	colon := p.consume(COLON)
902	v := p.parseTest()
903	return &DictEntry{Key: k, Colon: colon, Value: v}
904}
905
906// comp_suffix = FOR loopvars IN expr comp_suffix
907//             | IF expr comp_suffix
908//             | ']'  or  ')'                              (end)
909//
910// There can be multiple FOR/IF clauses; the first is always a FOR.
911func (p *parser) parseComprehensionSuffix(lbrace Position, body Expr, endBrace Token) Expr {
912	var clauses []Node
913	for p.tok != endBrace {
914		if p.tok == FOR {
915			pos := p.nextToken()
916			vars := p.parseForLoopVariables()
917			in := p.consume(IN)
918			// Following Python 3, the operand of IN cannot be:
919			// - a conditional expression ('x if y else z'),
920			//   due to conflicts in Python grammar
921			//  ('if' is used by the comprehension);
922			// - a lambda expression
923			// - an unparenthesized tuple.
924			x := p.parseTestPrec(0)
925			clauses = append(clauses, &ForClause{For: pos, Vars: vars, In: in, X: x})
926		} else if p.tok == IF {
927			pos := p.nextToken()
928			cond := p.parseTestNoCond()
929			clauses = append(clauses, &IfClause{If: pos, Cond: cond})
930		} else {
931			p.in.errorf(p.in.pos, "got %#v, want '%s', for, or if", p.tok, endBrace)
932		}
933	}
934	rbrace := p.nextToken()
935
936	return &Comprehension{
937		Curly:   endBrace == RBRACE,
938		Lbrack:  lbrace,
939		Body:    body,
940		Clauses: clauses,
941		Rbrack:  rbrace,
942	}
943}
944
945func terminatesExprList(tok Token) bool {
946	switch tok {
947	case EOF, NEWLINE, EQ, RBRACE, RBRACK, RPAREN, SEMI:
948		return true
949	}
950	return false
951}
952
953// Comment assignment.
954// We build two lists of all subnodes, preorder and postorder.
955// The preorder list is ordered by start location, with outer nodes first.
956// The postorder list is ordered by end location, with outer nodes last.
957// We use the preorder list to assign each whole-line comment to the syntax
958// immediately following it, and we use the postorder list to assign each
959// end-of-line comment to the syntax immediately preceding it.
960
961// flattenAST returns the list of AST nodes, both in prefix order and in postfix
962// order.
963func flattenAST(root Node) (pre, post []Node) {
964	stack := []Node{}
965	Walk(root, func(n Node) bool {
966		if n != nil {
967			pre = append(pre, n)
968			stack = append(stack, n)
969		} else {
970			post = append(post, stack[len(stack)-1])
971			stack = stack[:len(stack)-1]
972		}
973		return true
974	})
975	return pre, post
976}
977
978// assignComments attaches comments to nearby syntax.
979func (p *parser) assignComments(n Node) {
980	// Leave early if there are no comments
981	if len(p.in.lineComments)+len(p.in.suffixComments) == 0 {
982		return
983	}
984
985	pre, post := flattenAST(n)
986
987	// Assign line comments to syntax immediately following.
988	line := p.in.lineComments
989	for _, x := range pre {
990		start, _ := x.Span()
991
992		switch x.(type) {
993		case *File:
994			continue
995		}
996
997		for len(line) > 0 && !start.isBefore(line[0].Start) {
998			x.AllocComments()
999			x.Comments().Before = append(x.Comments().Before, line[0])
1000			line = line[1:]
1001		}
1002	}
1003
1004	// Remaining line comments go at end of file.
1005	if len(line) > 0 {
1006		n.AllocComments()
1007		n.Comments().After = append(n.Comments().After, line...)
1008	}
1009
1010	// Assign suffix comments to syntax immediately before.
1011	suffix := p.in.suffixComments
1012	for i := len(post) - 1; i >= 0; i-- {
1013		x := post[i]
1014
1015		// Do not assign suffix comments to file
1016		switch x.(type) {
1017		case *File:
1018			continue
1019		}
1020
1021		_, end := x.Span()
1022		if len(suffix) > 0 && end.isBefore(suffix[len(suffix)-1].Start) {
1023			x.AllocComments()
1024			x.Comments().Suffix = append(x.Comments().Suffix, suffix[len(suffix)-1])
1025			suffix = suffix[:len(suffix)-1]
1026		}
1027	}
1028}
1029