xref: /aosp_15_r20/external/emboss/compiler/front_end/module_ir.py (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""module_ir contains code for generating module-level IRs from parse trees.
16
17The primary export is build_ir(), which takes a parse tree (as returned by a
18parser from lr1.py), and returns a module-level intermediate representation
19("module IR").
20
21This module also notably exports PRODUCTIONS and START_SYMBOL, which should be
22fed to lr1.Grammar in order to create a parser for the Emboss language.
23"""
24
25import re
26import sys
27
28from compiler.util import ir_data
29from compiler.util import ir_data_utils
30from compiler.util import name_conversion
31from compiler.util import parser_types
32
33
34# Intermediate types; should not be found in the final IR.
35class _List(object):
36  """A list with source location information."""
37  __slots__ = ('list', 'source_location')
38
39  def __init__(self, l):
40    assert isinstance(l, list), "_List object must wrap list, not '%r'" % l
41    self.list = l
42    self.source_location = ir_data.Location()
43
44
45class _ExpressionTail(object):
46  """A fragment of an expression with an operator and right-hand side.
47
48  _ExpressionTail is the tail of an expression, consisting of an operator and
49  the right-hand argument to the operator; for example, in the expression (6+8),
50  the _ExpressionTail would be "+8".
51
52  This is used as a temporary object while converting the right-recursive
53  "expression" and "times-expression" productions into left-associative
54  Expressions.
55
56  Attributes:
57    operator: An ir_data.Word of the operator's name.
58    expression: The expression on the right side of the operator.
59    source_location: The source location of the operation fragment.
60  """
61  __slots__ = ('operator', 'expression', 'source_location')
62
63  def __init__(self, operator, expression):
64    self.operator = operator
65    self.expression = expression
66    self.source_location = ir_data.Location()
67
68
69class _FieldWithType(object):
70  """A field with zero or more types defined inline with that field."""
71  __slots__ = ('field', 'subtypes', 'source_location')
72
73  def __init__(self, field, subtypes=None):
74    self.field = field
75    self.subtypes = subtypes or []
76    self.source_location = ir_data.Location()
77
78
79def build_ir(parse_tree, used_productions=None):
80  r"""Builds a module-level intermediate representation from a valid parse tree.
81
82  The parse tree is precisely dictated by the exact productions in the grammar
83  used by the parser, with no semantic information.  _really_build_ir transforms
84  this "raw" form into a stable, cooked representation, thereby isolating
85  subsequent steps from the exact details of the grammar.
86
87  (Probably incomplete) list of transformations:
88
89  *   ParseResult and Token nodes are replaced with Module, Attribute, Struct,
90      Type, etc. objects.
91
92  *   Purely syntactic tokens ('"["', '"struct"', etc.) are discarded.
93
94  *   Repeated elements are transformed from tree form to list form:
95
96          a*
97         / \
98        b   a*
99           / \
100          c   a*
101             / \
102            d   a*
103
104      (where b, c, and d are nodes of type "a") becomes [b, c, d].
105
106  *   The values of numeric constants (Number, etc. tokens) are parsed.
107
108  *   Different classes of names (snake_names, CamelNames, ShoutyNames) are
109      folded into a single "Name" type, since they are guaranteed to appear in
110      the correct places in the parse tree.
111
112
113  Arguments:
114    parse_tree: A parse tree.  Each leaf node should be a parser_types.Token
115      object, and each non-leaf node should have a 'symbol' attribute specifying
116      which grammar symbol it represents, and a 'children' attribute containing
117      a list of child nodes.  This is the format returned by the parsers
118      produced by the lr1 module, when run against tokens from the tokenizer
119      module.
120    used_productions: If specified, used_productions.add() will be called with
121      each production actually used in parsing.  This can be useful when
122      developing the grammar and writing tests; in particular, it can be used to
123      figure out which productions are *not* used when parsing a particular
124      file.
125
126  Returns:
127    A module-level intermediate representation (module IR) for an Emboss module
128    (source file).  This IR will not have symbols resolved; that must be done on
129    a forest of module IRs so that names from other modules can be resolved.
130  """
131
132  # TODO(b/140259131): Refactor _really_build_ir to be less recursive/use an
133  # explicit stack.
134  old_recursion_limit = sys.getrecursionlimit()
135  sys.setrecursionlimit(16 * 1024)  # ~8000 top-level entities in one module.
136  try:
137    result = _really_build_ir(parse_tree, used_productions)
138  finally:
139    sys.setrecursionlimit(old_recursion_limit)
140  return result
141
142
143def _really_build_ir(parse_tree, used_productions):
144  """Real implementation of build_ir()."""
145  if used_productions is None:
146    used_productions = set()
147  if hasattr(parse_tree, 'children'):
148    parsed_children = [_really_build_ir(child, used_productions)
149                       for child in parse_tree.children]
150    used_productions.add(parse_tree.production)
151    result = _handlers[parse_tree.production](*parsed_children)
152    if parse_tree.source_location is not None:
153      if result.source_location:
154        ir_data_utils.update(result.source_location, parse_tree.source_location)
155      else:
156        result.source_location = ir_data_utils.copy(parse_tree.source_location)
157    return result
158  else:
159    # For leaf nodes, the temporary "IR" is just the token.  Higher-level rules
160    # will translate it to a real IR.
161    assert isinstance(parse_tree, parser_types.Token), str(parse_tree)
162    return parse_tree
163
164# Map of productions to their handlers.
165_handlers = {}
166
167_anonymous_name_counter = 0
168
169
170def _get_anonymous_field_name():
171  global _anonymous_name_counter
172  _anonymous_name_counter += 1
173  return 'emboss_reserved_anonymous_field_{}'.format(_anonymous_name_counter)
174
175
176def _handles(production_text):
177  """_handles marks a function as the handler for a particular production."""
178  production = parser_types.Production.parse(production_text)
179
180  def handles(f):
181    _handlers[production] = f
182    return f
183
184  return handles
185
186
187def _make_prelude_import(position):
188  """Helper function to construct a synthetic ir_data.Import for the prelude."""
189  location = parser_types.make_location(position, position)
190  return ir_data.Import(
191      file_name=ir_data.String(text='', source_location=location),
192      local_name=ir_data.Word(text='', source_location=location),
193      source_location=location)
194
195
196def _text_to_operator(text):
197  """Converts an operator's textual name to its corresponding enum."""
198  operations = {
199      '+': ir_data.FunctionMapping.ADDITION,
200      '-': ir_data.FunctionMapping.SUBTRACTION,
201      '*': ir_data.FunctionMapping.MULTIPLICATION,
202      '==': ir_data.FunctionMapping.EQUALITY,
203      '!=': ir_data.FunctionMapping.INEQUALITY,
204      '&&': ir_data.FunctionMapping.AND,
205      '||': ir_data.FunctionMapping.OR,
206      '>': ir_data.FunctionMapping.GREATER,
207      '>=': ir_data.FunctionMapping.GREATER_OR_EQUAL,
208      '<': ir_data.FunctionMapping.LESS,
209      '<=': ir_data.FunctionMapping.LESS_OR_EQUAL,
210  }
211  return operations[text]
212
213
214def _text_to_function(text):
215  """Converts a function's textual name to its corresponding enum."""
216  functions = {
217      '$max': ir_data.FunctionMapping.MAXIMUM,
218      '$present': ir_data.FunctionMapping.PRESENCE,
219      '$upper_bound': ir_data.FunctionMapping.UPPER_BOUND,
220      '$lower_bound': ir_data.FunctionMapping.LOWER_BOUND,
221  }
222  return functions[text]
223
224
225################################################################################
226# Grammar & parse tree to IR translation.
227#
228# From here to (almost) the end of the file are functions which recursively
229# build an IR.  The @_handles annotations indicate the exact grammar
230# production(s) handled by each function.  The handler function should take
231# exactly one argument for each symbol in the production's RHS.
232#
233# The actual Emboss grammar is extracted directly from the @_handles
234# annotations, so this is also the grammar definition.  For convenience, the
235# grammar can be viewed separately in g3doc/grammar.md.
236#
237# At the end, symbols whose names end in "*", "+", or "?" are extracted from the
238# grammar, and appropriate productions are added for zero-or-more, one-or-more,
239# or zero-or-one lists, respectively.  (This is analogous to the *, +, and ?
240# operators in regex.)  It is necessary for this to happen here (and not in
241# lr1.py) because the generated productions must be associated with
242# IR-generation functions.
243
244
245# A module file is a list of documentation, then imports, then top-level
246# attributes, then type definitions.  Any section may be missing.
247# TODO(bolms): Should Emboss disallow completely empty files?
248@_handles('module -> comment-line* doc-line* import-line* attribute-line*'
249          '          type-definition*')
250def _file(leading_newlines, docs, imports, attributes, type_definitions):
251  """Assembles the top-level IR for a module."""
252  del leading_newlines  # Unused.
253  # Figure out the best synthetic source_location for the synthesized prelude
254  # import.
255  if imports.list:
256    position = imports.list[0].source_location.start
257  elif docs.list:
258    position = docs.list[0].source_location.end
259  elif attributes.list:
260    position = attributes.list[0].source_location.start
261  elif type_definitions.list:
262    position = type_definitions.list[0].source_location.start
263  else:
264    position = 1, 1
265
266  # If the source file is completely empty, build_ir won't automatically
267  # populate the source_location attribute for the module.
268  if (not docs.list and not imports.list and not attributes.list and
269      not type_definitions.list):
270    module_source_location = parser_types.make_location((1, 1), (1, 1))
271  else:
272    module_source_location = None
273
274  return ir_data.Module(
275      documentation=docs.list,
276      foreign_import=[_make_prelude_import(position)] + imports.list,
277      attribute=attributes.list,
278      type=type_definitions.list,
279      source_location=module_source_location)
280
281
282@_handles('import-line ->'
283          '    "import" string-constant "as" snake-word Comment? eol')
284def _import(import_, file_name, as_, local_name, comment, eol):
285  del import_, as_, comment, eol  # Unused
286  return ir_data.Import(file_name=file_name, local_name=local_name)
287
288
289@_handles('doc-line -> doc Comment? eol')
290def _doc_line(doc, comment, eol):
291  del comment, eol  # Unused.
292  return doc
293
294
295@_handles('doc -> Documentation')
296def _doc(documentation):
297  # As a special case, an empty documentation string may omit the trailing
298  # space.
299  if documentation.text == '--':
300    doc_text = '-- '
301  else:
302    doc_text = documentation.text
303  assert doc_text[0:3] == '-- ', (
304      "Documentation token '{}' in unknown format.".format(
305          documentation.text))
306  return ir_data.Documentation(text=doc_text[3:])
307
308
309# A attribute-line is just a attribute on its own line.
310@_handles('attribute-line -> attribute Comment? eol')
311def _attribute_line(attr, comment, eol):
312  del comment, eol  # Unused.
313  return attr
314
315
316# A attribute is [name = value].
317@_handles('attribute -> "[" attribute-context? "$default"?'
318          '             snake-word ":" attribute-value "]"')
319def _attribute(open_bracket, context_specifier, default_specifier, name, colon,
320               attribute_value, close_bracket):
321  del open_bracket, colon, close_bracket  # Unused.
322  if context_specifier.list:
323    return ir_data.Attribute(name=name,
324                            value=attribute_value,
325                            is_default=bool(default_specifier.list),
326                            back_end=context_specifier.list[0])
327  else:
328    return ir_data.Attribute(name=name,
329                            value=attribute_value,
330                            is_default=bool(default_specifier.list))
331
332
333@_handles('attribute-context -> "(" snake-word ")"')
334def _attribute_context(open_paren, context_name, close_paren):
335  del open_paren, close_paren  # Unused.
336  return context_name
337
338
339@_handles('attribute-value -> expression')
340def _attribute_value_expression(expression):
341  return ir_data.AttributeValue(expression=expression)
342
343
344@_handles('attribute-value -> string-constant')
345def _attribute_value_string(string):
346  return ir_data.AttributeValue(string_constant=string)
347
348
349@_handles('boolean-constant -> BooleanConstant')
350def _boolean_constant(boolean):
351  return ir_data.BooleanConstant(value=(boolean.text == 'true'))
352
353
354@_handles('string-constant -> String')
355def _string_constant(string):
356  """Turns a String token into an ir_data.String, with proper unescaping.
357
358  Arguments:
359    string: A String token.
360
361  Returns:
362    An ir_data.String with the "text" field set to the unescaped value of
363    string.text.
364  """
365  # TODO(bolms): If/when this logic becomes more complex (e.g., to handle \NNN
366  # or \xNN escapes), extract this into a separate module with separate tests.
367  assert string.text[0] == '"'
368  assert string.text[-1] == '"'
369  assert len(string.text) >= 2
370  result = []
371  for substring in re.split(r'(\\.)', string.text[1:-1]):
372    if substring and substring[0] == '\\':
373      assert len(substring) == 2
374      result.append({'\\': '\\', '"': '"', 'n': '\n'}[substring[1]])
375    else:
376      result.append(substring)
377  return ir_data.String(text=''.join(result))
378
379
380# In Emboss, '&&' and '||' may not be mixed without parentheses.  These are all
381# fine:
382#
383#     x && y && z
384#     x || y || z
385#     (x || y) && z
386#     x || (y && z)
387#
388# These are syntax errors:
389#
390#     x || y && z
391#     x && y || z
392#
393# This is accomplished by making && and || separate-but-equal in the precedence
394# hierarchy.  Instead of the more traditional:
395#
396#     logical-expression   -> or-expression
397#     or-expression        -> and-expression or-expression-right*
398#     or-expression-right  -> '||' and-expression
399#     and-expression       -> equality-expression and-expression-right*
400#     and-expression-right -> '&&' equality-expression
401#
402# Or, using yacc-style precedence specifiers:
403#
404#     %left "||"
405#     %left "&&"
406#     expression -> expression
407#                 | expression '||' expression
408#                 | expression '&&' expression
409#
410# Emboss uses a slightly more complex grammar, in which '&&' and '||' are
411# parallel, but unmixable:
412#
413#     logical-expression   -> and-expression
414#                           | or-expression
415#                           | equality-expression
416#     or-expression        -> equality-expression or-expression-right+
417#     or-expression-right  -> '||' equality-expression
418#     and-expression       -> equality-expression and-expression-right+
419#     and-expression-right -> '&&' equality-expression
420#
421# In either case, explicit parenthesization is handled elsewhere in the grammar.
422@_handles('logical-expression -> and-expression')
423@_handles('logical-expression -> or-expression')
424@_handles('logical-expression -> comparison-expression')
425@_handles('choice-expression -> logical-expression')
426@_handles('expression -> choice-expression')
427def _expression(expression):
428  return expression
429
430
431# The `logical-expression`s here means that ?: can't be chained without
432# parentheses.  `x < 0 ? -1 : (x == 0 ? 0 : 1)` is OK, but `x < 0 ? -1 : x == 0
433# ? 0 : 1` is not.  Parentheses are also needed in the middle: `x <= 0 ? x < 0 ?
434# -1 : 0 : 1` is not syntactically valid.
435@_handles('choice-expression -> logical-expression "?" logical-expression'
436          '                                        ":" logical-expression')
437def _choice_expression(condition, question, if_true, colon, if_false):
438  location = parser_types.make_location(
439      condition.source_location.start, if_false.source_location.end)
440  operator_location = parser_types.make_location(
441      question.source_location.start, colon.source_location.end)
442  # The function_name is a bit weird, but should suffice for any error messages
443  # that might need it.
444  return ir_data.Expression(
445      function=ir_data.Function(function=ir_data.FunctionMapping.CHOICE,
446                               args=[condition, if_true, if_false],
447                               function_name=ir_data.Word(
448                                   text='?:',
449                                   source_location=operator_location),
450                               source_location=location))
451
452
453@_handles('comparison-expression -> additive-expression')
454def _no_op_comparative_expression(expression):
455  return expression
456
457
458@_handles('comparison-expression ->'
459          '    additive-expression inequality-operator additive-expression')
460def _comparative_expression(left, operator, right):
461  location = parser_types.make_location(
462      left.source_location.start, right.source_location.end)
463  return ir_data.Expression(
464      function=ir_data.Function(function=_text_to_operator(operator.text),
465                               args=[left, right],
466                               function_name=operator,
467                               source_location=location))
468
469
470@_handles('additive-expression -> times-expression additive-expression-right*')
471@_handles('times-expression -> negation-expression times-expression-right*')
472@_handles('and-expression -> comparison-expression and-expression-right+')
473@_handles('or-expression -> comparison-expression or-expression-right+')
474def _binary_operator_expression(expression, expression_right):
475  """Builds the IR for a chain of equal-precedence left-associative operations.
476
477  _binary_operator_expression transforms a right-recursive list of expression
478  tails into a left-associative Expression tree.  For example, given the
479  arguments:
480
481      6, (Tail("+", 7), Tail("-", 8), Tail("+", 10))
482
483  _expression produces a structure like:
484
485     Expression(Expression(Expression(6, "+", 7), "-", 8), "+", 10)
486
487  This transformation is necessary because strict LR(1) grammars do not allow
488  left recursion.
489
490  Note that this method is used for several productions; each of those
491  productions handles a different precedence level, but are identical in form.
492
493  Arguments:
494    expression: An ir_data.Expression which is the head of the (expr, operator,
495        expr, operator, expr, ...) list.
496    expression_right: A list of _ExpressionTails corresponding to the (operator,
497        expr, operator, expr, ...) list that comes after expression.
498
499  Returns:
500    An ir_data.Expression with the correct recursive structure to represent a
501    list of left-associative operations.
502  """
503  e = expression
504  for right in expression_right.list:
505    location = parser_types.make_location(
506        e.source_location.start, right.source_location.end)
507    e = ir_data.Expression(
508        function=ir_data.Function(
509            function=_text_to_operator(right.operator.text),
510            args=[e, right.expression],
511            function_name=right.operator,
512            source_location=location),
513        source_location=location)
514  return e
515
516
517@_handles('comparison-expression ->'
518          '    additive-expression equality-expression-right+')
519@_handles('comparison-expression ->'
520          '    additive-expression less-expression-right-list')
521@_handles('comparison-expression ->'
522          '    additive-expression greater-expression-right-list')
523def _chained_comparison_expression(expression, expression_right):
524  """Builds the IR for a chain of comparisons, like a == b == c.
525
526  Like _binary_operator_expression, _chained_comparison_expression transforms a
527  right-recursive list of expression tails into a left-associative Expression
528  tree.  Unlike _binary_operator_expression, extra AND nodes are added.  For
529  example, the following expression:
530
531      0 <= b <= 64
532
533  must be translated to the conceptually-equivalent expression:
534
535      0 <= b && b <= 64
536
537  (The middle subexpression is duplicated -- this would be a problem in a
538  programming language like C where expressions like `x++` have side effects,
539  but side effects do not make sense in a data definition language like Emboss.)
540
541  _chained_comparison_expression receives a left-hand head expression and a list
542  of tails, like:
543
544      6, (Tail("<=", b), Tail("<=", 64))
545
546  which it translates to a structure like:
547
548      Expression(Expression(6, "<=", b), "&&", Expression(b, "<=", 64))
549
550  The Emboss grammar is constructed such that sequences of "<", "<=", and "=="
551  comparisons may be chained, and sequences of ">", ">=", and "==" can be
552  chained, but greater and less-than comparisons may not; e.g., "b < 64 > a" is
553  not allowed.
554
555  Arguments:
556    expression: An ir_data.Expression which is the head of the (expr, operator,
557        expr, operator, expr, ...) list.
558    expression_right: A list of _ExpressionTails corresponding to the (operator,
559        expr, operator, expr, ...) list that comes after expression.
560
561  Returns:
562    An ir_data.Expression with the correct recursive structure to represent a
563    chain of left-associative comparison operations.
564  """
565  sequence = [expression]
566  for right in expression_right.list:
567    sequence.append(right.operator)
568    sequence.append(right.expression)
569  comparisons = []
570  for i in range(0, len(sequence) - 1, 2):
571    left, operator, right = sequence[i:i+3]
572    location = parser_types.make_location(
573        left.source_location.start, right.source_location.end)
574    comparisons.append(ir_data.Expression(
575        function=ir_data.Function(
576            function=_text_to_operator(operator.text),
577            args=[left, right],
578            function_name=operator,
579            source_location=location),
580        source_location=location))
581  e = comparisons[0]
582  for comparison in comparisons[1:]:
583    location = parser_types.make_location(
584        e.source_location.start, comparison.source_location.end)
585    e = ir_data.Expression(
586        function=ir_data.Function(
587            function=ir_data.FunctionMapping.AND,
588            args=[e, comparison],
589            function_name=ir_data.Word(
590                text='&&',
591                source_location=comparison.function.args[0].source_location),
592            source_location=location),
593        source_location=location)
594  return e
595
596
597# _chained_comparison_expression, above, handles three types of chains: `a == b
598# == c`, `a < b <= c`, and `a > b >= c`.
599#
600# This requires a bit of subtlety in the productions for
601# `x-expression-right-list`, because the `==` operator may be freely mixed into
602# greater-than or less-than chains, like `a < b == c <= d` or `a > b == c >= d`,
603# but greater-than and less-than may not be mixed; i.e., `a < b >= c` is
604# disallowed.
605#
606# In order to keep the grammar unambiguous -- that is, in order to ensure that
607# every valid input can only be parsed in exactly one way -- the languages
608# defined by `equality-expression-right*`, `greater-expression-right-list`, and
609# `less-expression-right-list` cannot overlap.
610#
611# `equality-expression-right*`, by definition, only contains `== n` elements.
612# By forcing `greater-expression-right-list` to contain at least one
613# `greater-expression-right`, we can ensure that a chain like `== n == m` cannot
614# be parsed as a `greater-expression-right-list`.  Similar logic applies in the
615# less-than case.
616#
617# There is another potential source of ambiguity here: if
618# `greater-expression-right-list` were
619#
620#     greater-expression-right-list ->
621#         equality-or-greater-expression-right* greater-expression-right
622#         equality-or-greater-expression-right*
623#
624# then a sequence like '> b > c > d' could be parsed as any of:
625#
626#     () (> b) ((> c) (> d))
627#     ((> b)) (> c) ((> d))
628#     ((> b) (> c)) (> d) ()
629#
630# By using `equality-expression-right*` for the first symbol, only the first
631# parse is possible.
632@_handles('greater-expression-right-list ->'
633          '    equality-expression-right* greater-expression-right'
634          '    equality-or-greater-expression-right*')
635@_handles('less-expression-right-list ->'
636          '    equality-expression-right* less-expression-right'
637          '    equality-or-less-expression-right*')
638def _chained_comparison_tails(start, middle, end):
639  return _List(start.list + [middle] + end.list)
640
641
642@_handles('equality-or-greater-expression-right -> equality-expression-right')
643@_handles('equality-or-greater-expression-right -> greater-expression-right')
644@_handles('equality-or-less-expression-right -> equality-expression-right')
645@_handles('equality-or-less-expression-right -> less-expression-right')
646def _equality_or_less_or_greater(right):
647  return right
648
649
650@_handles('and-expression-right -> and-operator comparison-expression')
651@_handles('or-expression-right -> or-operator comparison-expression')
652@_handles('additive-expression-right -> additive-operator times-expression')
653@_handles('equality-expression-right -> equality-operator additive-expression')
654@_handles('greater-expression-right -> greater-operator additive-expression')
655@_handles('less-expression-right -> less-operator additive-expression')
656@_handles('times-expression-right ->'
657          '    multiplicative-operator negation-expression')
658def _expression_right_production(operator, expression):
659  return _ExpressionTail(operator, expression)
660
661
662# This supports a single layer of unary plus/minus, so "+5" and "-value" are
663# allowed, but "+-5" or "-+-something" are not.
664@_handles('negation-expression -> additive-operator bottom-expression')
665def _negation_expression_with_operator(operator, expression):
666  phantom_zero_location = ir_data.Location(start=operator.source_location.start,
667                                          end=operator.source_location.start)
668  return ir_data.Expression(
669      function=ir_data.Function(
670          function=_text_to_operator(operator.text),
671          args=[ir_data.Expression(
672              constant=ir_data.NumericConstant(
673                  value='0',
674                  source_location=phantom_zero_location),
675              source_location=phantom_zero_location), expression],
676          function_name=operator,
677          source_location=ir_data.Location(
678              start=operator.source_location.start,
679              end=expression.source_location.end)))
680
681
682@_handles('negation-expression -> bottom-expression')
683def _negation_expression(expression):
684  return expression
685
686
687@_handles('bottom-expression -> "(" expression ")"')
688def _bottom_expression_parentheses(open_paren, expression, close_paren):
689  del open_paren, close_paren  # Unused.
690  return expression
691
692
693@_handles('bottom-expression -> function-name "(" argument-list ")"')
694def _bottom_expression_function(function, open_paren, arguments, close_paren):
695  del open_paren  # Unused.
696  return ir_data.Expression(
697      function=ir_data.Function(
698          function=_text_to_function(function.text),
699          args=arguments.list,
700          function_name=function,
701          source_location=ir_data.Location(
702              start=function.source_location.start,
703              end=close_paren.source_location.end)))
704
705
706@_handles('comma-then-expression -> "," expression')
707def _comma_then_expression(comma, expression):
708  del comma  # Unused.
709  return expression
710
711
712@_handles('argument-list -> expression comma-then-expression*')
713def _argument_list(head, tail):
714  tail.list.insert(0, head)
715  return tail
716
717
718@_handles('argument-list ->')
719def _empty_argument_list():
720  return _List([])
721
722
723@_handles('bottom-expression -> numeric-constant')
724def _bottom_expression_from_numeric_constant(constant):
725  return ir_data.Expression(constant=constant)
726
727
728@_handles('bottom-expression -> constant-reference')
729def _bottom_expression_from_constant_reference(reference):
730  return ir_data.Expression(constant_reference=reference)
731
732
733@_handles('bottom-expression -> builtin-reference')
734def _bottom_expression_from_builtin(reference):
735  return ir_data.Expression(builtin_reference=reference)
736
737
738@_handles('bottom-expression -> boolean-constant')
739def _bottom_expression_from_boolean_constant(boolean):
740  return ir_data.Expression(boolean_constant=boolean)
741
742
743@_handles('bottom-expression -> field-reference')
744def _bottom_expression_from_reference(reference):
745  return reference
746
747
748@_handles('field-reference -> snake-reference field-reference-tail*')
749def _indirect_field_reference(field_reference, field_references):
750  if field_references.source_location.HasField('end'):
751    end_location = field_references.source_location.end
752  else:
753    end_location = field_reference.source_location.end
754  return ir_data.Expression(field_reference=ir_data.FieldReference(
755      path=[field_reference] + field_references.list,
756      source_location=parser_types.make_location(
757          field_reference.source_location.start, end_location)))
758
759
760# If "Type.field" ever becomes syntactically valid, it will be necessary to
761# check that enum values are compile-time constants.
762@_handles('field-reference-tail -> "." snake-reference')
763def _field_reference_tail(dot, reference):
764  del dot  # Unused.
765  return reference
766
767
768@_handles('numeric-constant -> Number')
769def _numeric_constant(number):
770  # All types of numeric constant tokenize to the same symbol, because they are
771  # interchangeable in source code.
772  if number.text[0:2] == '0b':
773    n = int(number.text.replace('_', '')[2:], 2)
774  elif number.text[0:2] == '0x':
775    n = int(number.text.replace('_', '')[2:], 16)
776  else:
777    n = int(number.text.replace('_', ''), 10)
778  return ir_data.NumericConstant(value=str(n))
779
780
781@_handles('type-definition -> struct')
782@_handles('type-definition -> bits')
783@_handles('type-definition -> enum')
784@_handles('type-definition -> external')
785def _type_definition(type_definition):
786  return type_definition
787
788
789# struct StructureName:
790#   ... fields ...
791# bits BitName:
792#   ... fields ...
793@_handles('struct -> "struct" type-name delimited-parameter-definition-list?'
794          '          ":" Comment? eol struct-body')
795@_handles('bits -> "bits" type-name delimited-parameter-definition-list? ":"'
796          '        Comment? eol bits-body')
797def _structure(struct, name, parameters, colon, comment, newline, struct_body):
798  """Composes the top-level IR for an Emboss structure."""
799  del colon, comment, newline  # Unused.
800  ir_data_utils.builder(struct_body.structure).source_location.start.CopyFrom(
801      struct.source_location.start)
802  ir_data_utils.builder(struct_body.structure).source_location.end.CopyFrom(
803      struct_body.source_location.end)
804  if struct_body.name:
805    ir_data_utils.update(struct_body.name, name)
806  else:
807    struct_body.name = ir_data_utils.copy(name)
808  if parameters.list:
809    struct_body.runtime_parameter.extend(parameters.list[0].list)
810  return struct_body
811
812
813@_handles('delimited-parameter-definition-list ->'
814          '    "(" parameter-definition-list ")"')
815def _delimited_parameter_definition_list(open_paren, parameters, close_paren):
816  del open_paren, close_paren  # Unused
817  return parameters
818
819
820@_handles('parameter-definition -> snake-name ":" type')
821def _parameter_definition(name, double_colon, parameter_type):
822  del double_colon  # Unused
823  return ir_data.RuntimeParameter(name=name, physical_type_alias=parameter_type)
824
825
826@_handles('parameter-definition-list-tail -> "," parameter-definition')
827def _parameter_definition_list_tail(comma, parameter):
828  del comma  # Unused.
829  return parameter
830
831
832@_handles('parameter-definition-list -> parameter-definition'
833          '                             parameter-definition-list-tail*')
834def _parameter_definition_list(head, tail):
835  tail.list.insert(0, head)
836  return tail
837
838
839@_handles('parameter-definition-list ->')
840def _empty_parameter_definition_list():
841  return _List([])
842
843
844# The body of a struct: basically, the part after the first line.
845@_handles('struct-body -> Indent doc-line* attribute-line*'
846          '               type-definition* struct-field-block Dedent')
847def _struct_body(indent, docs, attributes, types, fields, dedent):
848  del indent, dedent  # Unused.
849  return _structure_body(docs, attributes, types, fields,
850                         ir_data.AddressableUnit.BYTE)
851
852
853def _structure_body(docs, attributes, types, fields, addressable_unit):
854  """Constructs the body of a structure (bits or struct) definition."""
855  return ir_data.TypeDefinition(
856      structure=ir_data.Structure(field=[field.field for field in fields.list]),
857      documentation=docs.list,
858      attribute=attributes.list,
859      subtype=types.list + [subtype for field in fields.list for subtype in
860                            field.subtypes],
861      addressable_unit=addressable_unit)
862
863
864@_handles('struct-field-block ->')
865@_handles('bits-field-block ->')
866@_handles('anonymous-bits-field-block ->')
867def _empty_field_block():
868  return _List([])
869
870
871@_handles('struct-field-block ->'
872          '    conditional-struct-field-block struct-field-block')
873@_handles('bits-field-block ->'
874          '    conditional-bits-field-block bits-field-block')
875@_handles('anonymous-bits-field-block -> conditional-anonymous-bits-field-block'
876          '                              anonymous-bits-field-block')
877def _conditional_block_plus_field_block(conditional_block, block):
878  return _List(conditional_block.list + block.list)
879
880
881@_handles('struct-field-block ->'
882          '    unconditional-struct-field struct-field-block')
883@_handles('bits-field-block ->'
884          '    unconditional-bits-field bits-field-block')
885@_handles('anonymous-bits-field-block ->'
886          '    unconditional-anonymous-bits-field anonymous-bits-field-block')
887def _unconditional_block_plus_field_block(field, block):
888  """Prepends an unconditional field to block."""
889  ir_data_utils.builder(field.field).existence_condition.source_location.CopyFrom(
890      field.source_location)
891  ir_data_utils.builder(field.field).existence_condition.boolean_constant.source_location.CopyFrom(
892      field.source_location)
893  ir_data_utils.builder(field.field).existence_condition.boolean_constant.value = True
894  return _List([field] + block.list)
895
896
897# Struct "fields" are regular fields, inline enums, bits, or structs, anonymous
898# inline bits, or virtual fields.
899@_handles('unconditional-struct-field -> field')
900@_handles('unconditional-struct-field -> inline-enum-field-definition')
901@_handles('unconditional-struct-field -> inline-bits-field-definition')
902@_handles('unconditional-struct-field -> inline-struct-field-definition')
903@_handles('unconditional-struct-field -> anonymous-bits-field-definition')
904@_handles('unconditional-struct-field -> virtual-field')
905# Bits fields are "regular" fields, inline enums or bits, or virtual fields.
906#
907# Inline structs and anonymous inline bits are not allowed inside of bits:
908# anonymous inline bits are pointless, and inline structs do not make sense,
909# since a struct cannot be a part of a bits.
910#
911# Anonymous inline bits may not include virtual fields; instead, the virtual
912# field should be a direct part of the enclosing structure.
913@_handles('unconditional-anonymous-bits-field -> field')
914@_handles('unconditional-anonymous-bits-field -> inline-enum-field-definition')
915@_handles('unconditional-anonymous-bits-field -> inline-bits-field-definition')
916@_handles('unconditional-bits-field -> unconditional-anonymous-bits-field')
917@_handles('unconditional-bits-field -> virtual-field')
918def _unconditional_field(field):
919  """Handles the unifying grammar production for a struct or bits field."""
920  return field
921
922
923# TODO(bolms): Add 'elif' and 'else' support.
924# TODO(bolms): Should nested 'if' blocks be allowed?
925@_handles('conditional-struct-field-block ->'
926          '    "if" expression ":" Comment? eol'
927          '        Indent unconditional-struct-field+ Dedent')
928@_handles('conditional-bits-field-block ->'
929          '    "if" expression ":" Comment? eol'
930          '        Indent unconditional-bits-field+ Dedent')
931@_handles('conditional-anonymous-bits-field-block ->'
932          '    "if" expression ":" Comment? eol'
933          '        Indent unconditional-anonymous-bits-field+ Dedent')
934def _conditional_field_block(if_keyword, expression, colon, comment, newline,
935                             indent, fields, dedent):
936  """Applies an existence_condition to each element of fields."""
937  del if_keyword, newline, colon, comment, indent, dedent  # Unused.
938  for field in fields.list:
939    condition = ir_data_utils.builder(field.field).existence_condition
940    condition.CopyFrom(expression)
941    condition.source_location.is_disjoint_from_parent = True
942  return fields
943
944
945# The body of a bit field definition: basically, the part after the first line.
946@_handles('bits-body -> Indent doc-line* attribute-line*'
947          '             type-definition* bits-field-block Dedent')
948def _bits_body(indent, docs, attributes, types, fields, dedent):
949  del indent, dedent  # Unused.
950  return _structure_body(docs, attributes, types, fields,
951                         ir_data.AddressableUnit.BIT)
952
953
954# Inline bits (defined as part of a field) are more restricted than standalone
955# bits.
956@_handles('anonymous-bits-body ->'
957          '    Indent attribute-line* anonymous-bits-field-block Dedent')
958def _anonymous_bits_body(indent, attributes, fields, dedent):
959  del indent, dedent  # Unused.
960  return _structure_body(_List([]), attributes, _List([]), fields,
961                         ir_data.AddressableUnit.BIT)
962
963
964# A field is:
965#     range  type  name  (abbr)  [attr: value] [attr2: value] -- doc
966#         -- doc
967#         -- doc
968#         [attr3: value]
969#         [attr4: value]
970@_handles('field ->'
971          '    field-location type snake-name abbreviation? attribute* doc?'
972          '    Comment? eol field-body?')
973def _field(location, field_type, name, abbreviation, attributes, doc, comment,
974           newline, field_body):
975  """Constructs an ir_data.Field from the given components."""
976  del comment  # Unused
977  field_ir = ir_data.Field(location=location,
978                       type=field_type,
979                       name=name,
980                       attribute=attributes.list,
981                       documentation=doc.list)
982  field = ir_data_utils.builder(field_ir)
983  if field_body.list:
984    field.attribute.extend(field_body.list[0].attribute)
985    field.documentation.extend(field_body.list[0].documentation)
986  if abbreviation.list:
987    field.abbreviation.CopyFrom(abbreviation.list[0])
988  field.source_location.start.CopyFrom(location.source_location.start)
989  if field_body.source_location.HasField('end'):
990    field.source_location.end.CopyFrom(field_body.source_location.end)
991  else:
992    field.source_location.end.CopyFrom(newline.source_location.end)
993  return _FieldWithType(field=field_ir)
994
995
996# A "virtual field" is:
997#     let name = value
998#         -- doc
999#         -- doc
1000#         [attr1: value]
1001#         [attr2: value]
1002@_handles('virtual-field ->'
1003          '    "let" snake-name "=" expression Comment? eol field-body?')
1004def _virtual_field(let, name, equals, value, comment, newline, field_body):
1005  """Constructs an ir_data.Field from the given components."""
1006  del equals, comment  # Unused
1007  field_ir = ir_data.Field(read_transform=value, name=name)
1008  field = ir_data_utils.builder(field_ir)
1009  if field_body.list:
1010    field.attribute.extend(field_body.list[0].attribute)
1011    field.documentation.extend(field_body.list[0].documentation)
1012  field.source_location.start.CopyFrom(let.source_location.start)
1013  if field_body.source_location.HasField('end'):
1014    field.source_location.end.CopyFrom(field_body.source_location.end)
1015  else:
1016    field.source_location.end.CopyFrom(newline.source_location.end)
1017  return _FieldWithType(field=field_ir)
1018
1019
1020# An inline enum is:
1021#     range  "enum"  name  (abbr):
1022#         -- doc
1023#         -- doc
1024#         [attr3: value]
1025#         [attr4: value]
1026#         NAME = 10
1027#         NAME2 = 20
1028@_handles('inline-enum-field-definition ->'
1029          '    field-location "enum" snake-name abbreviation? ":" Comment? eol'
1030          '    enum-body')
1031def _inline_enum_field(location, enum, name, abbreviation, colon, comment,
1032                       newline, enum_body):
1033  """Constructs an ir_data.Field for an inline enum field."""
1034  del enum, colon, comment, newline  # Unused.
1035  return _inline_type_field(location, name, abbreviation, enum_body)
1036
1037
1038@_handles(
1039    'inline-struct-field-definition ->'
1040    '    field-location "struct" snake-name abbreviation? ":" Comment? eol'
1041    '    struct-body')
1042def _inline_struct_field(location, struct, name, abbreviation, colon, comment,
1043                         newline, struct_body):
1044  del struct, colon, comment, newline  # Unused.
1045  return _inline_type_field(location, name, abbreviation, struct_body)
1046
1047
1048@_handles('inline-bits-field-definition ->'
1049          '    field-location "bits" snake-name abbreviation? ":" Comment? eol'
1050          '    bits-body')
1051def _inline_bits_field(location, bits, name, abbreviation, colon, comment,
1052                       newline, bits_body):
1053  del bits, colon, comment, newline  # Unused.
1054  return _inline_type_field(location, name, abbreviation, bits_body)
1055
1056
1057def _inline_type_field(location, name, abbreviation, body):
1058  """Shared implementation of _inline_enum_field and _anonymous_bit_field."""
1059  field_ir = ir_data.Field(location=location,
1060                       name=name,
1061                       attribute=body.attribute,
1062                       documentation=body.documentation)
1063  field = ir_data_utils.builder(field_ir)
1064  # All attributes should be attached to the field, not the type definition: if
1065  # the user wants to use type attributes, they should create a separate type
1066  # definition and reference it.
1067  del body.attribute[:]
1068  type_name = ir_data_utils.copy(name)
1069  ir_data_utils.builder(type_name).name.text = name_conversion.snake_to_camel(type_name.name.text)
1070  field.type.atomic_type.reference.source_name.extend([type_name.name])
1071  field.type.atomic_type.reference.source_location.CopyFrom(
1072      type_name.source_location)
1073  field.type.atomic_type.reference.is_local_name = True
1074  field.type.atomic_type.source_location.CopyFrom(type_name.source_location)
1075  field.type.source_location.CopyFrom(type_name.source_location)
1076  if abbreviation.list:
1077    field.abbreviation.CopyFrom(abbreviation.list[0])
1078  field.source_location.start.CopyFrom(location.source_location.start)
1079  ir_data_utils.builder(body.source_location).start.CopyFrom(location.source_location.start)
1080  if body.HasField('enumeration'):
1081    ir_data_utils.builder(body.enumeration).source_location.CopyFrom(body.source_location)
1082  else:
1083    assert body.HasField('structure')
1084    ir_data_utils.builder(body.structure).source_location.CopyFrom(body.source_location)
1085  ir_data_utils.builder(body).name.CopyFrom(type_name)
1086  field.source_location.end.CopyFrom(body.source_location.end)
1087  subtypes = [body] + list(body.subtype)
1088  del body.subtype[:]
1089  return _FieldWithType(field=field_ir, subtypes=subtypes)
1090
1091
1092@_handles('anonymous-bits-field-definition ->'
1093          '    field-location "bits" ":" Comment? eol anonymous-bits-body')
1094def _anonymous_bit_field(location, bits_keyword, colon, comment, newline,
1095                         bits_body):
1096  """Constructs an ir_data.Field for an anonymous bit field."""
1097  del colon, comment, newline  # Unused.
1098  name = ir_data.NameDefinition(
1099      name=ir_data.Word(
1100          text=_get_anonymous_field_name(),
1101          source_location=bits_keyword.source_location),
1102      source_location=bits_keyword.source_location,
1103      is_anonymous=True)
1104  return _inline_type_field(location, name, _List([]), bits_body)
1105
1106
1107@_handles('field-body -> Indent doc-line* attribute-line* Dedent')
1108def _field_body(indent, docs, attributes, dedent):
1109  del indent, dedent  # Unused.
1110  return ir_data.Field(documentation=docs.list, attribute=attributes.list)
1111
1112
1113# A parenthetically-denoted abbreviation.
1114@_handles('abbreviation -> "(" snake-word ")"')
1115def _abbreviation(open_paren, word, close_paren):
1116  del open_paren, close_paren  # Unused.
1117  return word
1118
1119
1120# enum EnumName:
1121#   ... values ...
1122@_handles('enum -> "enum" type-name ":" Comment? eol enum-body')
1123def _enum(enum, name, colon, comment, newline, enum_body):
1124  del colon, comment, newline  # Unused.
1125  ir_data_utils.builder(enum_body.enumeration).source_location.start.CopyFrom(
1126      enum.source_location.start)
1127  ir_data_utils.builder(enum_body.enumeration).source_location.end.CopyFrom(
1128      enum_body.source_location.end)
1129  ir_data_utils.builder(enum_body).name.CopyFrom(name)
1130  return enum_body
1131
1132
1133# [enum Foo:]
1134#   name = value
1135#   name = value
1136@_handles('enum-body -> Indent doc-line* attribute-line* enum-value+ Dedent')
1137def _enum_body(indent, docs, attributes, values, dedent):
1138  del indent, dedent  # Unused.
1139  return ir_data.TypeDefinition(
1140      enumeration=ir_data.Enum(value=values.list),
1141      documentation=docs.list,
1142      attribute=attributes.list,
1143      addressable_unit=ir_data.AddressableUnit.BIT)
1144
1145
1146# name = value
1147@_handles('enum-value -> '
1148          '    constant-name "=" expression attribute* doc? Comment? eol enum-value-body?')
1149def _enum_value(name, equals, expression, attribute, documentation, comment, newline,
1150                body):
1151  del equals, comment, newline  # Unused.
1152  result = ir_data.EnumValue(name=name,
1153                            value=expression,
1154                            documentation=documentation.list,
1155                            attribute=attribute.list)
1156  if body.list:
1157    result.documentation.extend(body.list[0].documentation)
1158    result.attribute.extend(body.list[0].attribute)
1159  return result
1160
1161
1162@_handles('enum-value-body -> Indent doc-line* attribute-line* Dedent')
1163def _enum_value_body(indent, docs, attributes, dedent):
1164  del indent, dedent  # Unused.
1165  return ir_data.EnumValue(documentation=docs.list, attribute=attributes.list)
1166
1167
1168# An external is just a declaration that a type exists and has certain
1169# attributes.
1170@_handles('external -> "external" type-name ":" Comment? eol external-body')
1171def _external(external, name, colon, comment, newline, external_body):
1172  del colon, comment, newline  # Unused.
1173  ir_data_utils.builder(external_body.source_location).start.CopyFrom(external.source_location.start)
1174  if external_body.name:
1175    ir_data_utils.update(external_body.name, name)
1176  else:
1177    external_body.name = ir_data_utils.copy(name)
1178  return external_body
1179
1180
1181# This syntax implicitly requires either a documentation line or a attribute
1182# line, or it won't parse (because no Indent/Dedent tokens will be emitted).
1183@_handles('external-body -> Indent doc-line* attribute-line* Dedent')
1184def _external_body(indent, docs, attributes, dedent):
1185  return ir_data.TypeDefinition(
1186      external=ir_data.External(
1187          # Set source_location here, since it won't be set automatically.
1188          source_location=ir_data.Location(start=indent.source_location.start,
1189                                          end=dedent.source_location.end)),
1190      documentation=docs.list,
1191      attribute=attributes.list)
1192
1193
1194@_handles('field-location -> expression "[" "+" expression "]"')
1195def _field_location(start, open_bracket, plus, size, close_bracket):
1196  del open_bracket, plus, close_bracket  # Unused.
1197  return ir_data.FieldLocation(start=start, size=size)
1198
1199
1200@_handles('delimited-argument-list -> "(" argument-list ")"')
1201def _type_argument_list(open_paren, arguments, close_paren):
1202  del open_paren, close_paren  # Unused
1203  return arguments
1204
1205
1206# A type is "TypeName" or "TypeName[length]" or "TypeName[length][length]", etc.
1207# An array type may have an empty length ("Type[]").  This is only valid for the
1208# outermost length (the last set of brackets), but that must be checked
1209# elsewhere.
1210@_handles('type -> type-reference delimited-argument-list? type-size-specifier?'
1211          '        array-length-specifier*')
1212def _type(reference, parameters, size, array_spec):
1213  """Builds the IR for a type specifier."""
1214  base_type_source_location_end = reference.source_location.end
1215  atomic_type_source_location_end = reference.source_location.end
1216  if parameters.list:
1217    base_type_source_location_end = parameters.source_location.end
1218    atomic_type_source_location_end = parameters.source_location.end
1219  if size.list:
1220    base_type_source_location_end = size.source_location.end
1221  base_type_location = parser_types.make_location(
1222      reference.source_location.start,
1223      base_type_source_location_end)
1224  atomic_type_location = parser_types.make_location(
1225      reference.source_location.start,
1226      atomic_type_source_location_end)
1227  t = ir_data.Type(
1228      atomic_type=ir_data.AtomicType(
1229          reference=ir_data_utils.copy(reference),
1230          source_location=atomic_type_location,
1231          runtime_parameter=parameters.list[0].list if parameters.list else []),
1232      size_in_bits=size.list[0] if size.list else None,
1233      source_location=base_type_location)
1234  for length in array_spec.list:
1235    location = parser_types.make_location(
1236        t.source_location.start, length.source_location.end)
1237    if isinstance(length, ir_data.Expression):
1238      t = ir_data.Type(
1239          array_type=ir_data.ArrayType(base_type=t,
1240                                      element_count=length,
1241                                      source_location=location),
1242          source_location=location)
1243    elif isinstance(length, ir_data.Empty):
1244      t = ir_data.Type(
1245          array_type=ir_data.ArrayType(base_type=t,
1246                                      automatic=length,
1247                                      source_location=location),
1248          source_location=location)
1249    else:
1250      assert False, "Shouldn't be here."
1251  return t
1252
1253
1254# TODO(bolms): Should symbolic names or expressions be allowed?  E.g.,
1255# UInt:FIELD_SIZE or UInt:(16 + 16)?
1256@_handles('type-size-specifier -> ":" numeric-constant')
1257def _type_size_specifier(colon, numeric_constant):
1258  """handles the ":32" part of a type specifier like "UInt:32"."""
1259  del colon
1260  return ir_data.Expression(constant=numeric_constant)
1261
1262
1263# The distinctions between different formats of NameDefinitions, Words, and
1264# References are enforced during parsing, but not propagated to the IR.
1265@_handles('type-name -> type-word')
1266@_handles('snake-name -> snake-word')
1267@_handles('constant-name -> constant-word')
1268def _name(word):
1269  return ir_data.NameDefinition(name=word)
1270
1271
1272@_handles('type-word -> CamelWord')
1273@_handles('snake-word -> SnakeWord')
1274@_handles('builtin-field-word -> "$size_in_bits"')
1275@_handles('builtin-field-word -> "$size_in_bytes"')
1276@_handles('builtin-field-word -> "$max_size_in_bits"')
1277@_handles('builtin-field-word -> "$max_size_in_bytes"')
1278@_handles('builtin-field-word -> "$min_size_in_bits"')
1279@_handles('builtin-field-word -> "$min_size_in_bytes"')
1280@_handles('builtin-word -> "$is_statically_sized"')
1281@_handles('builtin-word -> "$static_size_in_bits"')
1282@_handles('builtin-word -> "$next"')
1283@_handles('constant-word -> ShoutyWord')
1284@_handles('and-operator -> "&&"')
1285@_handles('or-operator -> "||"')
1286@_handles('less-operator -> "<="')
1287@_handles('less-operator -> "<"')
1288@_handles('greater-operator -> ">="')
1289@_handles('greater-operator -> ">"')
1290@_handles('equality-operator -> "=="')
1291@_handles('inequality-operator -> "!="')
1292@_handles('additive-operator -> "+"')
1293@_handles('additive-operator -> "-"')
1294@_handles('multiplicative-operator -> "*"')
1295@_handles('function-name -> "$max"')
1296@_handles('function-name -> "$present"')
1297@_handles('function-name -> "$upper_bound"')
1298@_handles('function-name -> "$lower_bound"')
1299def _word(word):
1300  return ir_data.Word(text=word.text)
1301
1302
1303@_handles('type-reference -> type-reference-tail')
1304@_handles('constant-reference -> constant-reference-tail')
1305def _un_module_qualified_type_reference(reference):
1306  return reference
1307
1308
1309@_handles('constant-reference-tail -> constant-word')
1310@_handles('type-reference-tail -> type-word')
1311@_handles('snake-reference -> snake-word')
1312@_handles('snake-reference -> builtin-field-word')
1313def _reference(word):
1314  return ir_data.Reference(source_name=[word])
1315
1316
1317@_handles('builtin-reference -> builtin-word')
1318def _builtin_reference(word):
1319  return ir_data.Reference(source_name=[word],
1320                          canonical_name=ir_data.CanonicalName(
1321                              object_path=[word.text]))
1322
1323
1324# Because constant-references ("Enum.NAME") are used in the same contexts as
1325# field-references ("field.subfield"), module-qualified constant references
1326# ("module.Enum.VALUE") have to take snake-reference, not snake-word, on the
1327# left side of the dot.  Otherwise, when a "snake_word" is followed by a "." in
1328# an expression context, the LR(1) parser cannot determine whether to reduce the
1329# snake-word to snake-reference (to eventually become field-reference), or to
1330# shift the dot onto the stack (to eventually become constant-reference).  By
1331# using snake-reference as the head of both, the parser can always reduce, then
1332# shift the dot, then determine whether to proceed with constant-reference if it
1333# sees "snake_name.TypeName" or field-reference if it sees
1334# "snake_name.snake_name".
1335@_handles('constant-reference -> snake-reference "." constant-reference-tail')
1336def _module_qualified_constant_reference(new_head, dot, reference):
1337  del dot  # Unused.
1338  new_source_name = list(new_head.source_name) + list(reference.source_name)
1339  del reference.source_name[:]
1340  reference.source_name.extend(new_source_name)
1341  return reference
1342
1343
1344@_handles('constant-reference-tail -> type-word "." constant-reference-tail')
1345# module.Type.SubType.name is a reference to something that *must* be a
1346# constant.
1347@_handles('constant-reference-tail -> type-word "." snake-reference')
1348@_handles('type-reference-tail -> type-word "." type-reference-tail')
1349@_handles('type-reference -> snake-word "." type-reference-tail')
1350def _qualified_reference(word, dot, reference):
1351  """Adds a name. or Type. qualification to the head of a reference."""
1352  del dot  # Unused.
1353  new_source_name = [word] + list(reference.source_name)
1354  del reference.source_name[:]
1355  reference.source_name.extend(new_source_name)
1356  return reference
1357
1358
1359# Arrays are properly translated to IR in _type().
1360@_handles('array-length-specifier -> "[" expression "]"')
1361def _array_length_specifier(open_bracket, length, close_bracket):
1362  del open_bracket, close_bracket  # Unused.
1363  return length
1364
1365
1366# An array specifier can end with empty brackets ("arr[3][]"), in which case the
1367# array's size is inferred from the size of its enclosing field.
1368@_handles('array-length-specifier -> "[" "]"')
1369def _auto_array_length_specifier(open_bracket, close_bracket):
1370  # Note that the Void's source_location is the space between the brackets (if
1371  # any).
1372  return ir_data.Empty(
1373      source_location=ir_data.Location(start=open_bracket.source_location.end,
1374                                      end=close_bracket.source_location.start))
1375
1376
1377@_handles('eol -> "\\n" comment-line*')
1378def _eol(eol, comments):
1379  del comments  # Unused
1380  return eol
1381
1382
1383@_handles('comment-line -> Comment? "\\n"')
1384def _comment_line(comment, eol):
1385  del comment  # Unused
1386  return eol
1387
1388
1389def _finalize_grammar():
1390  """_Finalize adds productions for foo*, foo+, and foo? symbols."""
1391  star_symbols = set()
1392  plus_symbols = set()
1393  option_symbols = set()
1394  for production in _handlers:
1395    for symbol in production.rhs:
1396      if symbol[-1] == '*':
1397        star_symbols.add(symbol[:-1])
1398      elif symbol[-1] == '+':
1399        # symbol+ relies on the rule for symbol*
1400        star_symbols.add(symbol[:-1])
1401        plus_symbols.add(symbol[:-1])
1402      elif symbol[-1] == '?':
1403        option_symbols.add(symbol[:-1])
1404  for symbol in star_symbols:
1405    _handles('{s}* -> {s} {s}*'.format(s=symbol))(
1406        lambda e, r: _List([e] + r.list))
1407    _handles('{s}* ->'.format(s=symbol))(lambda: _List([]))
1408  for symbol in plus_symbols:
1409    _handles('{s}+ -> {s} {s}*'.format(s=symbol))(
1410        lambda e, r: _List([e] + r.list))
1411  for symbol in option_symbols:
1412    _handles('{s}? -> {s}'.format(s=symbol))(lambda e: _List([e]))
1413    _handles('{s}? ->'.format(s=symbol))(lambda: _List([]))
1414
1415
1416_finalize_grammar()
1417
1418# End of grammar.
1419################################################################################
1420
1421# These export the grammar used by module_ir so that parser_generator can build
1422# a parser for the same language.
1423START_SYMBOL = 'module'
1424EXPRESSION_START_SYMBOL = 'expression'
1425PRODUCTIONS = list(_handlers.keys())
1426