1# Copyright 2019 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""module_ir contains code for generating module-level IRs from parse trees. 16 17The primary export is build_ir(), which takes a parse tree (as returned by a 18parser from lr1.py), and returns a module-level intermediate representation 19("module IR"). 20 21This module also notably exports PRODUCTIONS and START_SYMBOL, which should be 22fed to lr1.Grammar in order to create a parser for the Emboss language. 23""" 24 25import re 26import sys 27 28from compiler.util import ir_data 29from compiler.util import ir_data_utils 30from compiler.util import name_conversion 31from compiler.util import parser_types 32 33 34# Intermediate types; should not be found in the final IR. 35class _List(object): 36 """A list with source location information.""" 37 __slots__ = ('list', 'source_location') 38 39 def __init__(self, l): 40 assert isinstance(l, list), "_List object must wrap list, not '%r'" % l 41 self.list = l 42 self.source_location = ir_data.Location() 43 44 45class _ExpressionTail(object): 46 """A fragment of an expression with an operator and right-hand side. 47 48 _ExpressionTail is the tail of an expression, consisting of an operator and 49 the right-hand argument to the operator; for example, in the expression (6+8), 50 the _ExpressionTail would be "+8". 51 52 This is used as a temporary object while converting the right-recursive 53 "expression" and "times-expression" productions into left-associative 54 Expressions. 55 56 Attributes: 57 operator: An ir_data.Word of the operator's name. 58 expression: The expression on the right side of the operator. 59 source_location: The source location of the operation fragment. 60 """ 61 __slots__ = ('operator', 'expression', 'source_location') 62 63 def __init__(self, operator, expression): 64 self.operator = operator 65 self.expression = expression 66 self.source_location = ir_data.Location() 67 68 69class _FieldWithType(object): 70 """A field with zero or more types defined inline with that field.""" 71 __slots__ = ('field', 'subtypes', 'source_location') 72 73 def __init__(self, field, subtypes=None): 74 self.field = field 75 self.subtypes = subtypes or [] 76 self.source_location = ir_data.Location() 77 78 79def build_ir(parse_tree, used_productions=None): 80 r"""Builds a module-level intermediate representation from a valid parse tree. 81 82 The parse tree is precisely dictated by the exact productions in the grammar 83 used by the parser, with no semantic information. _really_build_ir transforms 84 this "raw" form into a stable, cooked representation, thereby isolating 85 subsequent steps from the exact details of the grammar. 86 87 (Probably incomplete) list of transformations: 88 89 * ParseResult and Token nodes are replaced with Module, Attribute, Struct, 90 Type, etc. objects. 91 92 * Purely syntactic tokens ('"["', '"struct"', etc.) are discarded. 93 94 * Repeated elements are transformed from tree form to list form: 95 96 a* 97 / \ 98 b a* 99 / \ 100 c a* 101 / \ 102 d a* 103 104 (where b, c, and d are nodes of type "a") becomes [b, c, d]. 105 106 * The values of numeric constants (Number, etc. tokens) are parsed. 107 108 * Different classes of names (snake_names, CamelNames, ShoutyNames) are 109 folded into a single "Name" type, since they are guaranteed to appear in 110 the correct places in the parse tree. 111 112 113 Arguments: 114 parse_tree: A parse tree. Each leaf node should be a parser_types.Token 115 object, and each non-leaf node should have a 'symbol' attribute specifying 116 which grammar symbol it represents, and a 'children' attribute containing 117 a list of child nodes. This is the format returned by the parsers 118 produced by the lr1 module, when run against tokens from the tokenizer 119 module. 120 used_productions: If specified, used_productions.add() will be called with 121 each production actually used in parsing. This can be useful when 122 developing the grammar and writing tests; in particular, it can be used to 123 figure out which productions are *not* used when parsing a particular 124 file. 125 126 Returns: 127 A module-level intermediate representation (module IR) for an Emboss module 128 (source file). This IR will not have symbols resolved; that must be done on 129 a forest of module IRs so that names from other modules can be resolved. 130 """ 131 132 # TODO(b/140259131): Refactor _really_build_ir to be less recursive/use an 133 # explicit stack. 134 old_recursion_limit = sys.getrecursionlimit() 135 sys.setrecursionlimit(16 * 1024) # ~8000 top-level entities in one module. 136 try: 137 result = _really_build_ir(parse_tree, used_productions) 138 finally: 139 sys.setrecursionlimit(old_recursion_limit) 140 return result 141 142 143def _really_build_ir(parse_tree, used_productions): 144 """Real implementation of build_ir().""" 145 if used_productions is None: 146 used_productions = set() 147 if hasattr(parse_tree, 'children'): 148 parsed_children = [_really_build_ir(child, used_productions) 149 for child in parse_tree.children] 150 used_productions.add(parse_tree.production) 151 result = _handlers[parse_tree.production](*parsed_children) 152 if parse_tree.source_location is not None: 153 if result.source_location: 154 ir_data_utils.update(result.source_location, parse_tree.source_location) 155 else: 156 result.source_location = ir_data_utils.copy(parse_tree.source_location) 157 return result 158 else: 159 # For leaf nodes, the temporary "IR" is just the token. Higher-level rules 160 # will translate it to a real IR. 161 assert isinstance(parse_tree, parser_types.Token), str(parse_tree) 162 return parse_tree 163 164# Map of productions to their handlers. 165_handlers = {} 166 167_anonymous_name_counter = 0 168 169 170def _get_anonymous_field_name(): 171 global _anonymous_name_counter 172 _anonymous_name_counter += 1 173 return 'emboss_reserved_anonymous_field_{}'.format(_anonymous_name_counter) 174 175 176def _handles(production_text): 177 """_handles marks a function as the handler for a particular production.""" 178 production = parser_types.Production.parse(production_text) 179 180 def handles(f): 181 _handlers[production] = f 182 return f 183 184 return handles 185 186 187def _make_prelude_import(position): 188 """Helper function to construct a synthetic ir_data.Import for the prelude.""" 189 location = parser_types.make_location(position, position) 190 return ir_data.Import( 191 file_name=ir_data.String(text='', source_location=location), 192 local_name=ir_data.Word(text='', source_location=location), 193 source_location=location) 194 195 196def _text_to_operator(text): 197 """Converts an operator's textual name to its corresponding enum.""" 198 operations = { 199 '+': ir_data.FunctionMapping.ADDITION, 200 '-': ir_data.FunctionMapping.SUBTRACTION, 201 '*': ir_data.FunctionMapping.MULTIPLICATION, 202 '==': ir_data.FunctionMapping.EQUALITY, 203 '!=': ir_data.FunctionMapping.INEQUALITY, 204 '&&': ir_data.FunctionMapping.AND, 205 '||': ir_data.FunctionMapping.OR, 206 '>': ir_data.FunctionMapping.GREATER, 207 '>=': ir_data.FunctionMapping.GREATER_OR_EQUAL, 208 '<': ir_data.FunctionMapping.LESS, 209 '<=': ir_data.FunctionMapping.LESS_OR_EQUAL, 210 } 211 return operations[text] 212 213 214def _text_to_function(text): 215 """Converts a function's textual name to its corresponding enum.""" 216 functions = { 217 '$max': ir_data.FunctionMapping.MAXIMUM, 218 '$present': ir_data.FunctionMapping.PRESENCE, 219 '$upper_bound': ir_data.FunctionMapping.UPPER_BOUND, 220 '$lower_bound': ir_data.FunctionMapping.LOWER_BOUND, 221 } 222 return functions[text] 223 224 225################################################################################ 226# Grammar & parse tree to IR translation. 227# 228# From here to (almost) the end of the file are functions which recursively 229# build an IR. The @_handles annotations indicate the exact grammar 230# production(s) handled by each function. The handler function should take 231# exactly one argument for each symbol in the production's RHS. 232# 233# The actual Emboss grammar is extracted directly from the @_handles 234# annotations, so this is also the grammar definition. For convenience, the 235# grammar can be viewed separately in g3doc/grammar.md. 236# 237# At the end, symbols whose names end in "*", "+", or "?" are extracted from the 238# grammar, and appropriate productions are added for zero-or-more, one-or-more, 239# or zero-or-one lists, respectively. (This is analogous to the *, +, and ? 240# operators in regex.) It is necessary for this to happen here (and not in 241# lr1.py) because the generated productions must be associated with 242# IR-generation functions. 243 244 245# A module file is a list of documentation, then imports, then top-level 246# attributes, then type definitions. Any section may be missing. 247# TODO(bolms): Should Emboss disallow completely empty files? 248@_handles('module -> comment-line* doc-line* import-line* attribute-line*' 249 ' type-definition*') 250def _file(leading_newlines, docs, imports, attributes, type_definitions): 251 """Assembles the top-level IR for a module.""" 252 del leading_newlines # Unused. 253 # Figure out the best synthetic source_location for the synthesized prelude 254 # import. 255 if imports.list: 256 position = imports.list[0].source_location.start 257 elif docs.list: 258 position = docs.list[0].source_location.end 259 elif attributes.list: 260 position = attributes.list[0].source_location.start 261 elif type_definitions.list: 262 position = type_definitions.list[0].source_location.start 263 else: 264 position = 1, 1 265 266 # If the source file is completely empty, build_ir won't automatically 267 # populate the source_location attribute for the module. 268 if (not docs.list and not imports.list and not attributes.list and 269 not type_definitions.list): 270 module_source_location = parser_types.make_location((1, 1), (1, 1)) 271 else: 272 module_source_location = None 273 274 return ir_data.Module( 275 documentation=docs.list, 276 foreign_import=[_make_prelude_import(position)] + imports.list, 277 attribute=attributes.list, 278 type=type_definitions.list, 279 source_location=module_source_location) 280 281 282@_handles('import-line ->' 283 ' "import" string-constant "as" snake-word Comment? eol') 284def _import(import_, file_name, as_, local_name, comment, eol): 285 del import_, as_, comment, eol # Unused 286 return ir_data.Import(file_name=file_name, local_name=local_name) 287 288 289@_handles('doc-line -> doc Comment? eol') 290def _doc_line(doc, comment, eol): 291 del comment, eol # Unused. 292 return doc 293 294 295@_handles('doc -> Documentation') 296def _doc(documentation): 297 # As a special case, an empty documentation string may omit the trailing 298 # space. 299 if documentation.text == '--': 300 doc_text = '-- ' 301 else: 302 doc_text = documentation.text 303 assert doc_text[0:3] == '-- ', ( 304 "Documentation token '{}' in unknown format.".format( 305 documentation.text)) 306 return ir_data.Documentation(text=doc_text[3:]) 307 308 309# A attribute-line is just a attribute on its own line. 310@_handles('attribute-line -> attribute Comment? eol') 311def _attribute_line(attr, comment, eol): 312 del comment, eol # Unused. 313 return attr 314 315 316# A attribute is [name = value]. 317@_handles('attribute -> "[" attribute-context? "$default"?' 318 ' snake-word ":" attribute-value "]"') 319def _attribute(open_bracket, context_specifier, default_specifier, name, colon, 320 attribute_value, close_bracket): 321 del open_bracket, colon, close_bracket # Unused. 322 if context_specifier.list: 323 return ir_data.Attribute(name=name, 324 value=attribute_value, 325 is_default=bool(default_specifier.list), 326 back_end=context_specifier.list[0]) 327 else: 328 return ir_data.Attribute(name=name, 329 value=attribute_value, 330 is_default=bool(default_specifier.list)) 331 332 333@_handles('attribute-context -> "(" snake-word ")"') 334def _attribute_context(open_paren, context_name, close_paren): 335 del open_paren, close_paren # Unused. 336 return context_name 337 338 339@_handles('attribute-value -> expression') 340def _attribute_value_expression(expression): 341 return ir_data.AttributeValue(expression=expression) 342 343 344@_handles('attribute-value -> string-constant') 345def _attribute_value_string(string): 346 return ir_data.AttributeValue(string_constant=string) 347 348 349@_handles('boolean-constant -> BooleanConstant') 350def _boolean_constant(boolean): 351 return ir_data.BooleanConstant(value=(boolean.text == 'true')) 352 353 354@_handles('string-constant -> String') 355def _string_constant(string): 356 """Turns a String token into an ir_data.String, with proper unescaping. 357 358 Arguments: 359 string: A String token. 360 361 Returns: 362 An ir_data.String with the "text" field set to the unescaped value of 363 string.text. 364 """ 365 # TODO(bolms): If/when this logic becomes more complex (e.g., to handle \NNN 366 # or \xNN escapes), extract this into a separate module with separate tests. 367 assert string.text[0] == '"' 368 assert string.text[-1] == '"' 369 assert len(string.text) >= 2 370 result = [] 371 for substring in re.split(r'(\\.)', string.text[1:-1]): 372 if substring and substring[0] == '\\': 373 assert len(substring) == 2 374 result.append({'\\': '\\', '"': '"', 'n': '\n'}[substring[1]]) 375 else: 376 result.append(substring) 377 return ir_data.String(text=''.join(result)) 378 379 380# In Emboss, '&&' and '||' may not be mixed without parentheses. These are all 381# fine: 382# 383# x && y && z 384# x || y || z 385# (x || y) && z 386# x || (y && z) 387# 388# These are syntax errors: 389# 390# x || y && z 391# x && y || z 392# 393# This is accomplished by making && and || separate-but-equal in the precedence 394# hierarchy. Instead of the more traditional: 395# 396# logical-expression -> or-expression 397# or-expression -> and-expression or-expression-right* 398# or-expression-right -> '||' and-expression 399# and-expression -> equality-expression and-expression-right* 400# and-expression-right -> '&&' equality-expression 401# 402# Or, using yacc-style precedence specifiers: 403# 404# %left "||" 405# %left "&&" 406# expression -> expression 407# | expression '||' expression 408# | expression '&&' expression 409# 410# Emboss uses a slightly more complex grammar, in which '&&' and '||' are 411# parallel, but unmixable: 412# 413# logical-expression -> and-expression 414# | or-expression 415# | equality-expression 416# or-expression -> equality-expression or-expression-right+ 417# or-expression-right -> '||' equality-expression 418# and-expression -> equality-expression and-expression-right+ 419# and-expression-right -> '&&' equality-expression 420# 421# In either case, explicit parenthesization is handled elsewhere in the grammar. 422@_handles('logical-expression -> and-expression') 423@_handles('logical-expression -> or-expression') 424@_handles('logical-expression -> comparison-expression') 425@_handles('choice-expression -> logical-expression') 426@_handles('expression -> choice-expression') 427def _expression(expression): 428 return expression 429 430 431# The `logical-expression`s here means that ?: can't be chained without 432# parentheses. `x < 0 ? -1 : (x == 0 ? 0 : 1)` is OK, but `x < 0 ? -1 : x == 0 433# ? 0 : 1` is not. Parentheses are also needed in the middle: `x <= 0 ? x < 0 ? 434# -1 : 0 : 1` is not syntactically valid. 435@_handles('choice-expression -> logical-expression "?" logical-expression' 436 ' ":" logical-expression') 437def _choice_expression(condition, question, if_true, colon, if_false): 438 location = parser_types.make_location( 439 condition.source_location.start, if_false.source_location.end) 440 operator_location = parser_types.make_location( 441 question.source_location.start, colon.source_location.end) 442 # The function_name is a bit weird, but should suffice for any error messages 443 # that might need it. 444 return ir_data.Expression( 445 function=ir_data.Function(function=ir_data.FunctionMapping.CHOICE, 446 args=[condition, if_true, if_false], 447 function_name=ir_data.Word( 448 text='?:', 449 source_location=operator_location), 450 source_location=location)) 451 452 453@_handles('comparison-expression -> additive-expression') 454def _no_op_comparative_expression(expression): 455 return expression 456 457 458@_handles('comparison-expression ->' 459 ' additive-expression inequality-operator additive-expression') 460def _comparative_expression(left, operator, right): 461 location = parser_types.make_location( 462 left.source_location.start, right.source_location.end) 463 return ir_data.Expression( 464 function=ir_data.Function(function=_text_to_operator(operator.text), 465 args=[left, right], 466 function_name=operator, 467 source_location=location)) 468 469 470@_handles('additive-expression -> times-expression additive-expression-right*') 471@_handles('times-expression -> negation-expression times-expression-right*') 472@_handles('and-expression -> comparison-expression and-expression-right+') 473@_handles('or-expression -> comparison-expression or-expression-right+') 474def _binary_operator_expression(expression, expression_right): 475 """Builds the IR for a chain of equal-precedence left-associative operations. 476 477 _binary_operator_expression transforms a right-recursive list of expression 478 tails into a left-associative Expression tree. For example, given the 479 arguments: 480 481 6, (Tail("+", 7), Tail("-", 8), Tail("+", 10)) 482 483 _expression produces a structure like: 484 485 Expression(Expression(Expression(6, "+", 7), "-", 8), "+", 10) 486 487 This transformation is necessary because strict LR(1) grammars do not allow 488 left recursion. 489 490 Note that this method is used for several productions; each of those 491 productions handles a different precedence level, but are identical in form. 492 493 Arguments: 494 expression: An ir_data.Expression which is the head of the (expr, operator, 495 expr, operator, expr, ...) list. 496 expression_right: A list of _ExpressionTails corresponding to the (operator, 497 expr, operator, expr, ...) list that comes after expression. 498 499 Returns: 500 An ir_data.Expression with the correct recursive structure to represent a 501 list of left-associative operations. 502 """ 503 e = expression 504 for right in expression_right.list: 505 location = parser_types.make_location( 506 e.source_location.start, right.source_location.end) 507 e = ir_data.Expression( 508 function=ir_data.Function( 509 function=_text_to_operator(right.operator.text), 510 args=[e, right.expression], 511 function_name=right.operator, 512 source_location=location), 513 source_location=location) 514 return e 515 516 517@_handles('comparison-expression ->' 518 ' additive-expression equality-expression-right+') 519@_handles('comparison-expression ->' 520 ' additive-expression less-expression-right-list') 521@_handles('comparison-expression ->' 522 ' additive-expression greater-expression-right-list') 523def _chained_comparison_expression(expression, expression_right): 524 """Builds the IR for a chain of comparisons, like a == b == c. 525 526 Like _binary_operator_expression, _chained_comparison_expression transforms a 527 right-recursive list of expression tails into a left-associative Expression 528 tree. Unlike _binary_operator_expression, extra AND nodes are added. For 529 example, the following expression: 530 531 0 <= b <= 64 532 533 must be translated to the conceptually-equivalent expression: 534 535 0 <= b && b <= 64 536 537 (The middle subexpression is duplicated -- this would be a problem in a 538 programming language like C where expressions like `x++` have side effects, 539 but side effects do not make sense in a data definition language like Emboss.) 540 541 _chained_comparison_expression receives a left-hand head expression and a list 542 of tails, like: 543 544 6, (Tail("<=", b), Tail("<=", 64)) 545 546 which it translates to a structure like: 547 548 Expression(Expression(6, "<=", b), "&&", Expression(b, "<=", 64)) 549 550 The Emboss grammar is constructed such that sequences of "<", "<=", and "==" 551 comparisons may be chained, and sequences of ">", ">=", and "==" can be 552 chained, but greater and less-than comparisons may not; e.g., "b < 64 > a" is 553 not allowed. 554 555 Arguments: 556 expression: An ir_data.Expression which is the head of the (expr, operator, 557 expr, operator, expr, ...) list. 558 expression_right: A list of _ExpressionTails corresponding to the (operator, 559 expr, operator, expr, ...) list that comes after expression. 560 561 Returns: 562 An ir_data.Expression with the correct recursive structure to represent a 563 chain of left-associative comparison operations. 564 """ 565 sequence = [expression] 566 for right in expression_right.list: 567 sequence.append(right.operator) 568 sequence.append(right.expression) 569 comparisons = [] 570 for i in range(0, len(sequence) - 1, 2): 571 left, operator, right = sequence[i:i+3] 572 location = parser_types.make_location( 573 left.source_location.start, right.source_location.end) 574 comparisons.append(ir_data.Expression( 575 function=ir_data.Function( 576 function=_text_to_operator(operator.text), 577 args=[left, right], 578 function_name=operator, 579 source_location=location), 580 source_location=location)) 581 e = comparisons[0] 582 for comparison in comparisons[1:]: 583 location = parser_types.make_location( 584 e.source_location.start, comparison.source_location.end) 585 e = ir_data.Expression( 586 function=ir_data.Function( 587 function=ir_data.FunctionMapping.AND, 588 args=[e, comparison], 589 function_name=ir_data.Word( 590 text='&&', 591 source_location=comparison.function.args[0].source_location), 592 source_location=location), 593 source_location=location) 594 return e 595 596 597# _chained_comparison_expression, above, handles three types of chains: `a == b 598# == c`, `a < b <= c`, and `a > b >= c`. 599# 600# This requires a bit of subtlety in the productions for 601# `x-expression-right-list`, because the `==` operator may be freely mixed into 602# greater-than or less-than chains, like `a < b == c <= d` or `a > b == c >= d`, 603# but greater-than and less-than may not be mixed; i.e., `a < b >= c` is 604# disallowed. 605# 606# In order to keep the grammar unambiguous -- that is, in order to ensure that 607# every valid input can only be parsed in exactly one way -- the languages 608# defined by `equality-expression-right*`, `greater-expression-right-list`, and 609# `less-expression-right-list` cannot overlap. 610# 611# `equality-expression-right*`, by definition, only contains `== n` elements. 612# By forcing `greater-expression-right-list` to contain at least one 613# `greater-expression-right`, we can ensure that a chain like `== n == m` cannot 614# be parsed as a `greater-expression-right-list`. Similar logic applies in the 615# less-than case. 616# 617# There is another potential source of ambiguity here: if 618# `greater-expression-right-list` were 619# 620# greater-expression-right-list -> 621# equality-or-greater-expression-right* greater-expression-right 622# equality-or-greater-expression-right* 623# 624# then a sequence like '> b > c > d' could be parsed as any of: 625# 626# () (> b) ((> c) (> d)) 627# ((> b)) (> c) ((> d)) 628# ((> b) (> c)) (> d) () 629# 630# By using `equality-expression-right*` for the first symbol, only the first 631# parse is possible. 632@_handles('greater-expression-right-list ->' 633 ' equality-expression-right* greater-expression-right' 634 ' equality-or-greater-expression-right*') 635@_handles('less-expression-right-list ->' 636 ' equality-expression-right* less-expression-right' 637 ' equality-or-less-expression-right*') 638def _chained_comparison_tails(start, middle, end): 639 return _List(start.list + [middle] + end.list) 640 641 642@_handles('equality-or-greater-expression-right -> equality-expression-right') 643@_handles('equality-or-greater-expression-right -> greater-expression-right') 644@_handles('equality-or-less-expression-right -> equality-expression-right') 645@_handles('equality-or-less-expression-right -> less-expression-right') 646def _equality_or_less_or_greater(right): 647 return right 648 649 650@_handles('and-expression-right -> and-operator comparison-expression') 651@_handles('or-expression-right -> or-operator comparison-expression') 652@_handles('additive-expression-right -> additive-operator times-expression') 653@_handles('equality-expression-right -> equality-operator additive-expression') 654@_handles('greater-expression-right -> greater-operator additive-expression') 655@_handles('less-expression-right -> less-operator additive-expression') 656@_handles('times-expression-right ->' 657 ' multiplicative-operator negation-expression') 658def _expression_right_production(operator, expression): 659 return _ExpressionTail(operator, expression) 660 661 662# This supports a single layer of unary plus/minus, so "+5" and "-value" are 663# allowed, but "+-5" or "-+-something" are not. 664@_handles('negation-expression -> additive-operator bottom-expression') 665def _negation_expression_with_operator(operator, expression): 666 phantom_zero_location = ir_data.Location(start=operator.source_location.start, 667 end=operator.source_location.start) 668 return ir_data.Expression( 669 function=ir_data.Function( 670 function=_text_to_operator(operator.text), 671 args=[ir_data.Expression( 672 constant=ir_data.NumericConstant( 673 value='0', 674 source_location=phantom_zero_location), 675 source_location=phantom_zero_location), expression], 676 function_name=operator, 677 source_location=ir_data.Location( 678 start=operator.source_location.start, 679 end=expression.source_location.end))) 680 681 682@_handles('negation-expression -> bottom-expression') 683def _negation_expression(expression): 684 return expression 685 686 687@_handles('bottom-expression -> "(" expression ")"') 688def _bottom_expression_parentheses(open_paren, expression, close_paren): 689 del open_paren, close_paren # Unused. 690 return expression 691 692 693@_handles('bottom-expression -> function-name "(" argument-list ")"') 694def _bottom_expression_function(function, open_paren, arguments, close_paren): 695 del open_paren # Unused. 696 return ir_data.Expression( 697 function=ir_data.Function( 698 function=_text_to_function(function.text), 699 args=arguments.list, 700 function_name=function, 701 source_location=ir_data.Location( 702 start=function.source_location.start, 703 end=close_paren.source_location.end))) 704 705 706@_handles('comma-then-expression -> "," expression') 707def _comma_then_expression(comma, expression): 708 del comma # Unused. 709 return expression 710 711 712@_handles('argument-list -> expression comma-then-expression*') 713def _argument_list(head, tail): 714 tail.list.insert(0, head) 715 return tail 716 717 718@_handles('argument-list ->') 719def _empty_argument_list(): 720 return _List([]) 721 722 723@_handles('bottom-expression -> numeric-constant') 724def _bottom_expression_from_numeric_constant(constant): 725 return ir_data.Expression(constant=constant) 726 727 728@_handles('bottom-expression -> constant-reference') 729def _bottom_expression_from_constant_reference(reference): 730 return ir_data.Expression(constant_reference=reference) 731 732 733@_handles('bottom-expression -> builtin-reference') 734def _bottom_expression_from_builtin(reference): 735 return ir_data.Expression(builtin_reference=reference) 736 737 738@_handles('bottom-expression -> boolean-constant') 739def _bottom_expression_from_boolean_constant(boolean): 740 return ir_data.Expression(boolean_constant=boolean) 741 742 743@_handles('bottom-expression -> field-reference') 744def _bottom_expression_from_reference(reference): 745 return reference 746 747 748@_handles('field-reference -> snake-reference field-reference-tail*') 749def _indirect_field_reference(field_reference, field_references): 750 if field_references.source_location.HasField('end'): 751 end_location = field_references.source_location.end 752 else: 753 end_location = field_reference.source_location.end 754 return ir_data.Expression(field_reference=ir_data.FieldReference( 755 path=[field_reference] + field_references.list, 756 source_location=parser_types.make_location( 757 field_reference.source_location.start, end_location))) 758 759 760# If "Type.field" ever becomes syntactically valid, it will be necessary to 761# check that enum values are compile-time constants. 762@_handles('field-reference-tail -> "." snake-reference') 763def _field_reference_tail(dot, reference): 764 del dot # Unused. 765 return reference 766 767 768@_handles('numeric-constant -> Number') 769def _numeric_constant(number): 770 # All types of numeric constant tokenize to the same symbol, because they are 771 # interchangeable in source code. 772 if number.text[0:2] == '0b': 773 n = int(number.text.replace('_', '')[2:], 2) 774 elif number.text[0:2] == '0x': 775 n = int(number.text.replace('_', '')[2:], 16) 776 else: 777 n = int(number.text.replace('_', ''), 10) 778 return ir_data.NumericConstant(value=str(n)) 779 780 781@_handles('type-definition -> struct') 782@_handles('type-definition -> bits') 783@_handles('type-definition -> enum') 784@_handles('type-definition -> external') 785def _type_definition(type_definition): 786 return type_definition 787 788 789# struct StructureName: 790# ... fields ... 791# bits BitName: 792# ... fields ... 793@_handles('struct -> "struct" type-name delimited-parameter-definition-list?' 794 ' ":" Comment? eol struct-body') 795@_handles('bits -> "bits" type-name delimited-parameter-definition-list? ":"' 796 ' Comment? eol bits-body') 797def _structure(struct, name, parameters, colon, comment, newline, struct_body): 798 """Composes the top-level IR for an Emboss structure.""" 799 del colon, comment, newline # Unused. 800 ir_data_utils.builder(struct_body.structure).source_location.start.CopyFrom( 801 struct.source_location.start) 802 ir_data_utils.builder(struct_body.structure).source_location.end.CopyFrom( 803 struct_body.source_location.end) 804 if struct_body.name: 805 ir_data_utils.update(struct_body.name, name) 806 else: 807 struct_body.name = ir_data_utils.copy(name) 808 if parameters.list: 809 struct_body.runtime_parameter.extend(parameters.list[0].list) 810 return struct_body 811 812 813@_handles('delimited-parameter-definition-list ->' 814 ' "(" parameter-definition-list ")"') 815def _delimited_parameter_definition_list(open_paren, parameters, close_paren): 816 del open_paren, close_paren # Unused 817 return parameters 818 819 820@_handles('parameter-definition -> snake-name ":" type') 821def _parameter_definition(name, double_colon, parameter_type): 822 del double_colon # Unused 823 return ir_data.RuntimeParameter(name=name, physical_type_alias=parameter_type) 824 825 826@_handles('parameter-definition-list-tail -> "," parameter-definition') 827def _parameter_definition_list_tail(comma, parameter): 828 del comma # Unused. 829 return parameter 830 831 832@_handles('parameter-definition-list -> parameter-definition' 833 ' parameter-definition-list-tail*') 834def _parameter_definition_list(head, tail): 835 tail.list.insert(0, head) 836 return tail 837 838 839@_handles('parameter-definition-list ->') 840def _empty_parameter_definition_list(): 841 return _List([]) 842 843 844# The body of a struct: basically, the part after the first line. 845@_handles('struct-body -> Indent doc-line* attribute-line*' 846 ' type-definition* struct-field-block Dedent') 847def _struct_body(indent, docs, attributes, types, fields, dedent): 848 del indent, dedent # Unused. 849 return _structure_body(docs, attributes, types, fields, 850 ir_data.AddressableUnit.BYTE) 851 852 853def _structure_body(docs, attributes, types, fields, addressable_unit): 854 """Constructs the body of a structure (bits or struct) definition.""" 855 return ir_data.TypeDefinition( 856 structure=ir_data.Structure(field=[field.field for field in fields.list]), 857 documentation=docs.list, 858 attribute=attributes.list, 859 subtype=types.list + [subtype for field in fields.list for subtype in 860 field.subtypes], 861 addressable_unit=addressable_unit) 862 863 864@_handles('struct-field-block ->') 865@_handles('bits-field-block ->') 866@_handles('anonymous-bits-field-block ->') 867def _empty_field_block(): 868 return _List([]) 869 870 871@_handles('struct-field-block ->' 872 ' conditional-struct-field-block struct-field-block') 873@_handles('bits-field-block ->' 874 ' conditional-bits-field-block bits-field-block') 875@_handles('anonymous-bits-field-block -> conditional-anonymous-bits-field-block' 876 ' anonymous-bits-field-block') 877def _conditional_block_plus_field_block(conditional_block, block): 878 return _List(conditional_block.list + block.list) 879 880 881@_handles('struct-field-block ->' 882 ' unconditional-struct-field struct-field-block') 883@_handles('bits-field-block ->' 884 ' unconditional-bits-field bits-field-block') 885@_handles('anonymous-bits-field-block ->' 886 ' unconditional-anonymous-bits-field anonymous-bits-field-block') 887def _unconditional_block_plus_field_block(field, block): 888 """Prepends an unconditional field to block.""" 889 ir_data_utils.builder(field.field).existence_condition.source_location.CopyFrom( 890 field.source_location) 891 ir_data_utils.builder(field.field).existence_condition.boolean_constant.source_location.CopyFrom( 892 field.source_location) 893 ir_data_utils.builder(field.field).existence_condition.boolean_constant.value = True 894 return _List([field] + block.list) 895 896 897# Struct "fields" are regular fields, inline enums, bits, or structs, anonymous 898# inline bits, or virtual fields. 899@_handles('unconditional-struct-field -> field') 900@_handles('unconditional-struct-field -> inline-enum-field-definition') 901@_handles('unconditional-struct-field -> inline-bits-field-definition') 902@_handles('unconditional-struct-field -> inline-struct-field-definition') 903@_handles('unconditional-struct-field -> anonymous-bits-field-definition') 904@_handles('unconditional-struct-field -> virtual-field') 905# Bits fields are "regular" fields, inline enums or bits, or virtual fields. 906# 907# Inline structs and anonymous inline bits are not allowed inside of bits: 908# anonymous inline bits are pointless, and inline structs do not make sense, 909# since a struct cannot be a part of a bits. 910# 911# Anonymous inline bits may not include virtual fields; instead, the virtual 912# field should be a direct part of the enclosing structure. 913@_handles('unconditional-anonymous-bits-field -> field') 914@_handles('unconditional-anonymous-bits-field -> inline-enum-field-definition') 915@_handles('unconditional-anonymous-bits-field -> inline-bits-field-definition') 916@_handles('unconditional-bits-field -> unconditional-anonymous-bits-field') 917@_handles('unconditional-bits-field -> virtual-field') 918def _unconditional_field(field): 919 """Handles the unifying grammar production for a struct or bits field.""" 920 return field 921 922 923# TODO(bolms): Add 'elif' and 'else' support. 924# TODO(bolms): Should nested 'if' blocks be allowed? 925@_handles('conditional-struct-field-block ->' 926 ' "if" expression ":" Comment? eol' 927 ' Indent unconditional-struct-field+ Dedent') 928@_handles('conditional-bits-field-block ->' 929 ' "if" expression ":" Comment? eol' 930 ' Indent unconditional-bits-field+ Dedent') 931@_handles('conditional-anonymous-bits-field-block ->' 932 ' "if" expression ":" Comment? eol' 933 ' Indent unconditional-anonymous-bits-field+ Dedent') 934def _conditional_field_block(if_keyword, expression, colon, comment, newline, 935 indent, fields, dedent): 936 """Applies an existence_condition to each element of fields.""" 937 del if_keyword, newline, colon, comment, indent, dedent # Unused. 938 for field in fields.list: 939 condition = ir_data_utils.builder(field.field).existence_condition 940 condition.CopyFrom(expression) 941 condition.source_location.is_disjoint_from_parent = True 942 return fields 943 944 945# The body of a bit field definition: basically, the part after the first line. 946@_handles('bits-body -> Indent doc-line* attribute-line*' 947 ' type-definition* bits-field-block Dedent') 948def _bits_body(indent, docs, attributes, types, fields, dedent): 949 del indent, dedent # Unused. 950 return _structure_body(docs, attributes, types, fields, 951 ir_data.AddressableUnit.BIT) 952 953 954# Inline bits (defined as part of a field) are more restricted than standalone 955# bits. 956@_handles('anonymous-bits-body ->' 957 ' Indent attribute-line* anonymous-bits-field-block Dedent') 958def _anonymous_bits_body(indent, attributes, fields, dedent): 959 del indent, dedent # Unused. 960 return _structure_body(_List([]), attributes, _List([]), fields, 961 ir_data.AddressableUnit.BIT) 962 963 964# A field is: 965# range type name (abbr) [attr: value] [attr2: value] -- doc 966# -- doc 967# -- doc 968# [attr3: value] 969# [attr4: value] 970@_handles('field ->' 971 ' field-location type snake-name abbreviation? attribute* doc?' 972 ' Comment? eol field-body?') 973def _field(location, field_type, name, abbreviation, attributes, doc, comment, 974 newline, field_body): 975 """Constructs an ir_data.Field from the given components.""" 976 del comment # Unused 977 field_ir = ir_data.Field(location=location, 978 type=field_type, 979 name=name, 980 attribute=attributes.list, 981 documentation=doc.list) 982 field = ir_data_utils.builder(field_ir) 983 if field_body.list: 984 field.attribute.extend(field_body.list[0].attribute) 985 field.documentation.extend(field_body.list[0].documentation) 986 if abbreviation.list: 987 field.abbreviation.CopyFrom(abbreviation.list[0]) 988 field.source_location.start.CopyFrom(location.source_location.start) 989 if field_body.source_location.HasField('end'): 990 field.source_location.end.CopyFrom(field_body.source_location.end) 991 else: 992 field.source_location.end.CopyFrom(newline.source_location.end) 993 return _FieldWithType(field=field_ir) 994 995 996# A "virtual field" is: 997# let name = value 998# -- doc 999# -- doc 1000# [attr1: value] 1001# [attr2: value] 1002@_handles('virtual-field ->' 1003 ' "let" snake-name "=" expression Comment? eol field-body?') 1004def _virtual_field(let, name, equals, value, comment, newline, field_body): 1005 """Constructs an ir_data.Field from the given components.""" 1006 del equals, comment # Unused 1007 field_ir = ir_data.Field(read_transform=value, name=name) 1008 field = ir_data_utils.builder(field_ir) 1009 if field_body.list: 1010 field.attribute.extend(field_body.list[0].attribute) 1011 field.documentation.extend(field_body.list[0].documentation) 1012 field.source_location.start.CopyFrom(let.source_location.start) 1013 if field_body.source_location.HasField('end'): 1014 field.source_location.end.CopyFrom(field_body.source_location.end) 1015 else: 1016 field.source_location.end.CopyFrom(newline.source_location.end) 1017 return _FieldWithType(field=field_ir) 1018 1019 1020# An inline enum is: 1021# range "enum" name (abbr): 1022# -- doc 1023# -- doc 1024# [attr3: value] 1025# [attr4: value] 1026# NAME = 10 1027# NAME2 = 20 1028@_handles('inline-enum-field-definition ->' 1029 ' field-location "enum" snake-name abbreviation? ":" Comment? eol' 1030 ' enum-body') 1031def _inline_enum_field(location, enum, name, abbreviation, colon, comment, 1032 newline, enum_body): 1033 """Constructs an ir_data.Field for an inline enum field.""" 1034 del enum, colon, comment, newline # Unused. 1035 return _inline_type_field(location, name, abbreviation, enum_body) 1036 1037 1038@_handles( 1039 'inline-struct-field-definition ->' 1040 ' field-location "struct" snake-name abbreviation? ":" Comment? eol' 1041 ' struct-body') 1042def _inline_struct_field(location, struct, name, abbreviation, colon, comment, 1043 newline, struct_body): 1044 del struct, colon, comment, newline # Unused. 1045 return _inline_type_field(location, name, abbreviation, struct_body) 1046 1047 1048@_handles('inline-bits-field-definition ->' 1049 ' field-location "bits" snake-name abbreviation? ":" Comment? eol' 1050 ' bits-body') 1051def _inline_bits_field(location, bits, name, abbreviation, colon, comment, 1052 newline, bits_body): 1053 del bits, colon, comment, newline # Unused. 1054 return _inline_type_field(location, name, abbreviation, bits_body) 1055 1056 1057def _inline_type_field(location, name, abbreviation, body): 1058 """Shared implementation of _inline_enum_field and _anonymous_bit_field.""" 1059 field_ir = ir_data.Field(location=location, 1060 name=name, 1061 attribute=body.attribute, 1062 documentation=body.documentation) 1063 field = ir_data_utils.builder(field_ir) 1064 # All attributes should be attached to the field, not the type definition: if 1065 # the user wants to use type attributes, they should create a separate type 1066 # definition and reference it. 1067 del body.attribute[:] 1068 type_name = ir_data_utils.copy(name) 1069 ir_data_utils.builder(type_name).name.text = name_conversion.snake_to_camel(type_name.name.text) 1070 field.type.atomic_type.reference.source_name.extend([type_name.name]) 1071 field.type.atomic_type.reference.source_location.CopyFrom( 1072 type_name.source_location) 1073 field.type.atomic_type.reference.is_local_name = True 1074 field.type.atomic_type.source_location.CopyFrom(type_name.source_location) 1075 field.type.source_location.CopyFrom(type_name.source_location) 1076 if abbreviation.list: 1077 field.abbreviation.CopyFrom(abbreviation.list[0]) 1078 field.source_location.start.CopyFrom(location.source_location.start) 1079 ir_data_utils.builder(body.source_location).start.CopyFrom(location.source_location.start) 1080 if body.HasField('enumeration'): 1081 ir_data_utils.builder(body.enumeration).source_location.CopyFrom(body.source_location) 1082 else: 1083 assert body.HasField('structure') 1084 ir_data_utils.builder(body.structure).source_location.CopyFrom(body.source_location) 1085 ir_data_utils.builder(body).name.CopyFrom(type_name) 1086 field.source_location.end.CopyFrom(body.source_location.end) 1087 subtypes = [body] + list(body.subtype) 1088 del body.subtype[:] 1089 return _FieldWithType(field=field_ir, subtypes=subtypes) 1090 1091 1092@_handles('anonymous-bits-field-definition ->' 1093 ' field-location "bits" ":" Comment? eol anonymous-bits-body') 1094def _anonymous_bit_field(location, bits_keyword, colon, comment, newline, 1095 bits_body): 1096 """Constructs an ir_data.Field for an anonymous bit field.""" 1097 del colon, comment, newline # Unused. 1098 name = ir_data.NameDefinition( 1099 name=ir_data.Word( 1100 text=_get_anonymous_field_name(), 1101 source_location=bits_keyword.source_location), 1102 source_location=bits_keyword.source_location, 1103 is_anonymous=True) 1104 return _inline_type_field(location, name, _List([]), bits_body) 1105 1106 1107@_handles('field-body -> Indent doc-line* attribute-line* Dedent') 1108def _field_body(indent, docs, attributes, dedent): 1109 del indent, dedent # Unused. 1110 return ir_data.Field(documentation=docs.list, attribute=attributes.list) 1111 1112 1113# A parenthetically-denoted abbreviation. 1114@_handles('abbreviation -> "(" snake-word ")"') 1115def _abbreviation(open_paren, word, close_paren): 1116 del open_paren, close_paren # Unused. 1117 return word 1118 1119 1120# enum EnumName: 1121# ... values ... 1122@_handles('enum -> "enum" type-name ":" Comment? eol enum-body') 1123def _enum(enum, name, colon, comment, newline, enum_body): 1124 del colon, comment, newline # Unused. 1125 ir_data_utils.builder(enum_body.enumeration).source_location.start.CopyFrom( 1126 enum.source_location.start) 1127 ir_data_utils.builder(enum_body.enumeration).source_location.end.CopyFrom( 1128 enum_body.source_location.end) 1129 ir_data_utils.builder(enum_body).name.CopyFrom(name) 1130 return enum_body 1131 1132 1133# [enum Foo:] 1134# name = value 1135# name = value 1136@_handles('enum-body -> Indent doc-line* attribute-line* enum-value+ Dedent') 1137def _enum_body(indent, docs, attributes, values, dedent): 1138 del indent, dedent # Unused. 1139 return ir_data.TypeDefinition( 1140 enumeration=ir_data.Enum(value=values.list), 1141 documentation=docs.list, 1142 attribute=attributes.list, 1143 addressable_unit=ir_data.AddressableUnit.BIT) 1144 1145 1146# name = value 1147@_handles('enum-value -> ' 1148 ' constant-name "=" expression attribute* doc? Comment? eol enum-value-body?') 1149def _enum_value(name, equals, expression, attribute, documentation, comment, newline, 1150 body): 1151 del equals, comment, newline # Unused. 1152 result = ir_data.EnumValue(name=name, 1153 value=expression, 1154 documentation=documentation.list, 1155 attribute=attribute.list) 1156 if body.list: 1157 result.documentation.extend(body.list[0].documentation) 1158 result.attribute.extend(body.list[0].attribute) 1159 return result 1160 1161 1162@_handles('enum-value-body -> Indent doc-line* attribute-line* Dedent') 1163def _enum_value_body(indent, docs, attributes, dedent): 1164 del indent, dedent # Unused. 1165 return ir_data.EnumValue(documentation=docs.list, attribute=attributes.list) 1166 1167 1168# An external is just a declaration that a type exists and has certain 1169# attributes. 1170@_handles('external -> "external" type-name ":" Comment? eol external-body') 1171def _external(external, name, colon, comment, newline, external_body): 1172 del colon, comment, newline # Unused. 1173 ir_data_utils.builder(external_body.source_location).start.CopyFrom(external.source_location.start) 1174 if external_body.name: 1175 ir_data_utils.update(external_body.name, name) 1176 else: 1177 external_body.name = ir_data_utils.copy(name) 1178 return external_body 1179 1180 1181# This syntax implicitly requires either a documentation line or a attribute 1182# line, or it won't parse (because no Indent/Dedent tokens will be emitted). 1183@_handles('external-body -> Indent doc-line* attribute-line* Dedent') 1184def _external_body(indent, docs, attributes, dedent): 1185 return ir_data.TypeDefinition( 1186 external=ir_data.External( 1187 # Set source_location here, since it won't be set automatically. 1188 source_location=ir_data.Location(start=indent.source_location.start, 1189 end=dedent.source_location.end)), 1190 documentation=docs.list, 1191 attribute=attributes.list) 1192 1193 1194@_handles('field-location -> expression "[" "+" expression "]"') 1195def _field_location(start, open_bracket, plus, size, close_bracket): 1196 del open_bracket, plus, close_bracket # Unused. 1197 return ir_data.FieldLocation(start=start, size=size) 1198 1199 1200@_handles('delimited-argument-list -> "(" argument-list ")"') 1201def _type_argument_list(open_paren, arguments, close_paren): 1202 del open_paren, close_paren # Unused 1203 return arguments 1204 1205 1206# A type is "TypeName" or "TypeName[length]" or "TypeName[length][length]", etc. 1207# An array type may have an empty length ("Type[]"). This is only valid for the 1208# outermost length (the last set of brackets), but that must be checked 1209# elsewhere. 1210@_handles('type -> type-reference delimited-argument-list? type-size-specifier?' 1211 ' array-length-specifier*') 1212def _type(reference, parameters, size, array_spec): 1213 """Builds the IR for a type specifier.""" 1214 base_type_source_location_end = reference.source_location.end 1215 atomic_type_source_location_end = reference.source_location.end 1216 if parameters.list: 1217 base_type_source_location_end = parameters.source_location.end 1218 atomic_type_source_location_end = parameters.source_location.end 1219 if size.list: 1220 base_type_source_location_end = size.source_location.end 1221 base_type_location = parser_types.make_location( 1222 reference.source_location.start, 1223 base_type_source_location_end) 1224 atomic_type_location = parser_types.make_location( 1225 reference.source_location.start, 1226 atomic_type_source_location_end) 1227 t = ir_data.Type( 1228 atomic_type=ir_data.AtomicType( 1229 reference=ir_data_utils.copy(reference), 1230 source_location=atomic_type_location, 1231 runtime_parameter=parameters.list[0].list if parameters.list else []), 1232 size_in_bits=size.list[0] if size.list else None, 1233 source_location=base_type_location) 1234 for length in array_spec.list: 1235 location = parser_types.make_location( 1236 t.source_location.start, length.source_location.end) 1237 if isinstance(length, ir_data.Expression): 1238 t = ir_data.Type( 1239 array_type=ir_data.ArrayType(base_type=t, 1240 element_count=length, 1241 source_location=location), 1242 source_location=location) 1243 elif isinstance(length, ir_data.Empty): 1244 t = ir_data.Type( 1245 array_type=ir_data.ArrayType(base_type=t, 1246 automatic=length, 1247 source_location=location), 1248 source_location=location) 1249 else: 1250 assert False, "Shouldn't be here." 1251 return t 1252 1253 1254# TODO(bolms): Should symbolic names or expressions be allowed? E.g., 1255# UInt:FIELD_SIZE or UInt:(16 + 16)? 1256@_handles('type-size-specifier -> ":" numeric-constant') 1257def _type_size_specifier(colon, numeric_constant): 1258 """handles the ":32" part of a type specifier like "UInt:32".""" 1259 del colon 1260 return ir_data.Expression(constant=numeric_constant) 1261 1262 1263# The distinctions between different formats of NameDefinitions, Words, and 1264# References are enforced during parsing, but not propagated to the IR. 1265@_handles('type-name -> type-word') 1266@_handles('snake-name -> snake-word') 1267@_handles('constant-name -> constant-word') 1268def _name(word): 1269 return ir_data.NameDefinition(name=word) 1270 1271 1272@_handles('type-word -> CamelWord') 1273@_handles('snake-word -> SnakeWord') 1274@_handles('builtin-field-word -> "$size_in_bits"') 1275@_handles('builtin-field-word -> "$size_in_bytes"') 1276@_handles('builtin-field-word -> "$max_size_in_bits"') 1277@_handles('builtin-field-word -> "$max_size_in_bytes"') 1278@_handles('builtin-field-word -> "$min_size_in_bits"') 1279@_handles('builtin-field-word -> "$min_size_in_bytes"') 1280@_handles('builtin-word -> "$is_statically_sized"') 1281@_handles('builtin-word -> "$static_size_in_bits"') 1282@_handles('builtin-word -> "$next"') 1283@_handles('constant-word -> ShoutyWord') 1284@_handles('and-operator -> "&&"') 1285@_handles('or-operator -> "||"') 1286@_handles('less-operator -> "<="') 1287@_handles('less-operator -> "<"') 1288@_handles('greater-operator -> ">="') 1289@_handles('greater-operator -> ">"') 1290@_handles('equality-operator -> "=="') 1291@_handles('inequality-operator -> "!="') 1292@_handles('additive-operator -> "+"') 1293@_handles('additive-operator -> "-"') 1294@_handles('multiplicative-operator -> "*"') 1295@_handles('function-name -> "$max"') 1296@_handles('function-name -> "$present"') 1297@_handles('function-name -> "$upper_bound"') 1298@_handles('function-name -> "$lower_bound"') 1299def _word(word): 1300 return ir_data.Word(text=word.text) 1301 1302 1303@_handles('type-reference -> type-reference-tail') 1304@_handles('constant-reference -> constant-reference-tail') 1305def _un_module_qualified_type_reference(reference): 1306 return reference 1307 1308 1309@_handles('constant-reference-tail -> constant-word') 1310@_handles('type-reference-tail -> type-word') 1311@_handles('snake-reference -> snake-word') 1312@_handles('snake-reference -> builtin-field-word') 1313def _reference(word): 1314 return ir_data.Reference(source_name=[word]) 1315 1316 1317@_handles('builtin-reference -> builtin-word') 1318def _builtin_reference(word): 1319 return ir_data.Reference(source_name=[word], 1320 canonical_name=ir_data.CanonicalName( 1321 object_path=[word.text])) 1322 1323 1324# Because constant-references ("Enum.NAME") are used in the same contexts as 1325# field-references ("field.subfield"), module-qualified constant references 1326# ("module.Enum.VALUE") have to take snake-reference, not snake-word, on the 1327# left side of the dot. Otherwise, when a "snake_word" is followed by a "." in 1328# an expression context, the LR(1) parser cannot determine whether to reduce the 1329# snake-word to snake-reference (to eventually become field-reference), or to 1330# shift the dot onto the stack (to eventually become constant-reference). By 1331# using snake-reference as the head of both, the parser can always reduce, then 1332# shift the dot, then determine whether to proceed with constant-reference if it 1333# sees "snake_name.TypeName" or field-reference if it sees 1334# "snake_name.snake_name". 1335@_handles('constant-reference -> snake-reference "." constant-reference-tail') 1336def _module_qualified_constant_reference(new_head, dot, reference): 1337 del dot # Unused. 1338 new_source_name = list(new_head.source_name) + list(reference.source_name) 1339 del reference.source_name[:] 1340 reference.source_name.extend(new_source_name) 1341 return reference 1342 1343 1344@_handles('constant-reference-tail -> type-word "." constant-reference-tail') 1345# module.Type.SubType.name is a reference to something that *must* be a 1346# constant. 1347@_handles('constant-reference-tail -> type-word "." snake-reference') 1348@_handles('type-reference-tail -> type-word "." type-reference-tail') 1349@_handles('type-reference -> snake-word "." type-reference-tail') 1350def _qualified_reference(word, dot, reference): 1351 """Adds a name. or Type. qualification to the head of a reference.""" 1352 del dot # Unused. 1353 new_source_name = [word] + list(reference.source_name) 1354 del reference.source_name[:] 1355 reference.source_name.extend(new_source_name) 1356 return reference 1357 1358 1359# Arrays are properly translated to IR in _type(). 1360@_handles('array-length-specifier -> "[" expression "]"') 1361def _array_length_specifier(open_bracket, length, close_bracket): 1362 del open_bracket, close_bracket # Unused. 1363 return length 1364 1365 1366# An array specifier can end with empty brackets ("arr[3][]"), in which case the 1367# array's size is inferred from the size of its enclosing field. 1368@_handles('array-length-specifier -> "[" "]"') 1369def _auto_array_length_specifier(open_bracket, close_bracket): 1370 # Note that the Void's source_location is the space between the brackets (if 1371 # any). 1372 return ir_data.Empty( 1373 source_location=ir_data.Location(start=open_bracket.source_location.end, 1374 end=close_bracket.source_location.start)) 1375 1376 1377@_handles('eol -> "\\n" comment-line*') 1378def _eol(eol, comments): 1379 del comments # Unused 1380 return eol 1381 1382 1383@_handles('comment-line -> Comment? "\\n"') 1384def _comment_line(comment, eol): 1385 del comment # Unused 1386 return eol 1387 1388 1389def _finalize_grammar(): 1390 """_Finalize adds productions for foo*, foo+, and foo? symbols.""" 1391 star_symbols = set() 1392 plus_symbols = set() 1393 option_symbols = set() 1394 for production in _handlers: 1395 for symbol in production.rhs: 1396 if symbol[-1] == '*': 1397 star_symbols.add(symbol[:-1]) 1398 elif symbol[-1] == '+': 1399 # symbol+ relies on the rule for symbol* 1400 star_symbols.add(symbol[:-1]) 1401 plus_symbols.add(symbol[:-1]) 1402 elif symbol[-1] == '?': 1403 option_symbols.add(symbol[:-1]) 1404 for symbol in star_symbols: 1405 _handles('{s}* -> {s} {s}*'.format(s=symbol))( 1406 lambda e, r: _List([e] + r.list)) 1407 _handles('{s}* ->'.format(s=symbol))(lambda: _List([])) 1408 for symbol in plus_symbols: 1409 _handles('{s}+ -> {s} {s}*'.format(s=symbol))( 1410 lambda e, r: _List([e] + r.list)) 1411 for symbol in option_symbols: 1412 _handles('{s}? -> {s}'.format(s=symbol))(lambda e: _List([e])) 1413 _handles('{s}? ->'.format(s=symbol))(lambda: _List([])) 1414 1415 1416_finalize_grammar() 1417 1418# End of grammar. 1419################################################################################ 1420 1421# These export the grammar used by module_ir so that parser_generator can build 1422# a parser for the same language. 1423START_SYMBOL = 'module' 1424EXPRESSION_START_SYMBOL = 'expression' 1425PRODUCTIONS = list(_handlers.keys()) 1426