xref: /aosp_15_r20/external/emboss/compiler/front_end/symbol_resolver.py (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Symbol resolver for Emboss IR.
16
17The resolve_symbols function should be used to generate canonical resolutions
18for all symbol references in an Emboss IR.
19"""
20
21import collections
22
23from compiler.util import error
24from compiler.util import ir_data
25from compiler.util import ir_data_utils
26from compiler.util import ir_util
27from compiler.util import traverse_ir
28
29# TODO(bolms): Symbol resolution raises an exception at the first error, but
30# this is one place where it can make sense to report multiple errors.
31
32FileLocation = collections.namedtuple("FileLocation", ["file", "location"])
33
34
35def ambiguous_name_error(file_name, location, name, candidate_locations):
36  """A name cannot be resolved because there are two or more candidates."""
37  result = [error.error(file_name, location, "Ambiguous name '{}'".format(name))
38           ]
39  for location in sorted(candidate_locations):
40    result.append(error.note(location.file, location.location,
41                             "Possible resolution"))
42  return result
43
44
45def duplicate_name_error(file_name, location, name, original_location):
46  """A name is defined two or more times."""
47  return [error.error(file_name, location, "Duplicate name '{}'".format(name)),
48          error.note(original_location.file, original_location.location,
49                     "Original definition")]
50
51
52def missing_name_error(file_name, location, name):
53  return [error.error(file_name, location, "No candidate for '{}'".format(name))
54         ]
55
56
57def array_subfield_error(file_name, location, name):
58  return [error.error(file_name, location,
59                      "Cannot access member of array '{}'".format(name))]
60
61
62def noncomposite_subfield_error(file_name, location, name):
63  return [error.error(file_name, location,
64                      "Cannot access member of noncomposite field '{}'".format(
65                          name))]
66
67
68def _nested_name(canonical_name, name):
69  """Creates a new CanonicalName with name appended to the object_path."""
70  return ir_data.CanonicalName(
71      module_file=canonical_name.module_file,
72      object_path=list(canonical_name.object_path) + [name])
73
74
75class _Scope(dict):
76  """A _Scope holds data for a symbol.
77
78  A _Scope is a dict with some additional attributes.  Lexically nested names
79  are kept in the dict, and bookkeeping is kept in the additional attributes.
80
81  For example, each module should have a child _Scope for each type contained in
82  the module.  `struct` and `bits` types should have nested _Scopes for each
83  field; `enum` types should have nested scopes for each enumerated name.
84
85  Attributes:
86    canonical_name: The absolute name of this symbol; e.g. ("file.emb",
87      "TypeName", "SubTypeName", "field_name")
88    source_location: The ir_data.SourceLocation where this symbol is defined.
89    visibility: LOCAL, PRIVATE, or SEARCHABLE; see below.
90    alias: If set, this name is merely a pointer to another name.
91  """
92  __slots__ = ("canonical_name", "source_location", "visibility", "alias")
93
94  # A LOCAL name is visible outside of its enclosing scope, but should not be
95  # found when searching for a name.  That is, this name should be matched in
96  # the tail of a qualified reference (the 'bar' in 'foo.bar'), but not when
97  # searching for names (the 'foo' in 'foo.bar' should not match outside of
98  # 'foo's scope).  This applies to public field names.
99  LOCAL = object()
100
101  # A PRIVATE name is similar to LOCAL except that it is never visible outside
102  # its enclosing scope.  This applies to abbreviations of field names: if 'a'
103  # is an abbreviation for field 'apple', then 'foo.a' is not a valid reference;
104  # instead it should be 'foo.apple'.
105  PRIVATE = object()
106
107  # A SEARCHABLE name is visible as long as it is in a scope in the search list.
108  # This applies to type names ('Foo'), which may be found from many scopes.
109  SEARCHABLE = object()
110
111  def __init__(self, canonical_name, source_location, visibility, alias=None):
112    super(_Scope, self).__init__()
113    self.canonical_name = canonical_name
114    self.source_location = source_location
115    self.visibility = visibility
116    self.alias = alias
117
118
119def _add_name_to_scope(name_ir, scope, canonical_name, visibility, errors):
120  """Adds the given name_ir to the given scope."""
121  name = name_ir.text
122  new_scope = _Scope(canonical_name, name_ir.source_location, visibility)
123  if name in scope:
124    errors.append(duplicate_name_error(
125        scope.canonical_name.module_file, name_ir.source_location, name,
126        FileLocation(scope[name].canonical_name.module_file,
127                     scope[name].source_location)))
128  else:
129    scope[name] = new_scope
130  return new_scope
131
132
133def _add_name_to_scope_and_normalize(name_ir, scope, visibility, errors):
134  """Adds the given name_ir to scope and sets its canonical_name."""
135  name = name_ir.name.text
136  canonical_name = _nested_name(scope.canonical_name, name)
137  ir_data_utils.builder(name_ir).canonical_name.CopyFrom(canonical_name)
138  return _add_name_to_scope(name_ir.name, scope, canonical_name, visibility,
139                            errors)
140
141
142def _add_struct_field_to_scope(field, scope, errors):
143  """Adds the name of the given field to the scope."""
144  new_scope = _add_name_to_scope_and_normalize(field.name, scope, _Scope.LOCAL,
145                                               errors)
146  if field.HasField("abbreviation"):
147    _add_name_to_scope(field.abbreviation, scope, new_scope.canonical_name,
148                       _Scope.PRIVATE, errors)
149
150  value_builtin_name = ir_data.Word(
151      text="this",
152      source_location=ir_data.Location(is_synthetic=True),
153  )
154  # In "inside field" scope, the name `this` maps back to the field itself.
155  # This is important for attributes like `[requires]`.
156  _add_name_to_scope(value_builtin_name, new_scope,
157                     field.name.canonical_name, _Scope.PRIVATE, errors)
158
159
160def _add_parameter_name_to_scope(parameter, scope, errors):
161  """Adds the name of the given parameter to the scope."""
162  _add_name_to_scope_and_normalize(parameter.name, scope, _Scope.LOCAL, errors)
163
164
165def _add_enum_value_to_scope(value, scope, errors):
166  """Adds the name of the enum value to scope."""
167  _add_name_to_scope_and_normalize(value.name, scope, _Scope.LOCAL, errors)
168
169
170def _add_type_name_to_scope(type_definition, scope, errors):
171  """Adds the name of type_definition to the given scope."""
172  new_scope = _add_name_to_scope_and_normalize(type_definition.name, scope,
173                                               _Scope.SEARCHABLE, errors)
174  return {"scope": new_scope}
175
176
177def _set_scope_for_type_definition(type_definition, scope):
178  """Sets the current scope for an ir_data.AddressableUnit."""
179  return {"scope": scope[type_definition.name.name.text]}
180
181
182def _add_module_to_scope(module, scope):
183  """Adds the name of the module to the given scope."""
184  module_symbol_table = _Scope(
185      ir_data.CanonicalName(module_file=module.source_file_name,
186                           object_path=[]),
187      None,
188      _Scope.SEARCHABLE)
189  scope[module.source_file_name] = module_symbol_table
190  return {"scope": scope[module.source_file_name]}
191
192
193def _set_scope_for_module(module, scope):
194  """Adds the name of the module to the given scope."""
195  return {"scope": scope[module.source_file_name]}
196
197
198def _add_import_to_scope(foreign_import, table, module, errors):
199  if not foreign_import.local_name.text:
200    # This is the prelude import; ignore it.
201    return
202  _add_alias_to_scope(foreign_import.local_name, table, module.canonical_name,
203                      [foreign_import.file_name.text], _Scope.SEARCHABLE,
204                      errors)
205
206
207def _construct_symbol_tables(ir):
208  """Constructs per-module symbol tables for each module in ir."""
209  symbol_tables = {}
210  errors = []
211  traverse_ir.fast_traverse_ir_top_down(
212      ir, [ir_data.Module], _add_module_to_scope,
213      parameters={"errors": errors, "scope": symbol_tables})
214  traverse_ir.fast_traverse_ir_top_down(
215      ir, [ir_data.TypeDefinition], _add_type_name_to_scope,
216      incidental_actions={ir_data.Module: _set_scope_for_module},
217      parameters={"errors": errors, "scope": symbol_tables})
218  if errors:
219    # Ideally, we would find duplicate field names elsewhere in the module, even
220    # if there are duplicate type names, but field/enum names in the colliding
221    # types also end up colliding, leading to spurious errors.  E.g., if you
222    # have two `struct Foo`s, then the field check will also discover a
223    # collision for `$size_in_bytes`, since there are two `Foo.$size_in_bytes`.
224    return symbol_tables, errors
225
226  traverse_ir.fast_traverse_ir_top_down(
227      ir, [ir_data.EnumValue], _add_enum_value_to_scope,
228      incidental_actions={
229          ir_data.Module: _set_scope_for_module,
230          ir_data.TypeDefinition: _set_scope_for_type_definition,
231      },
232      parameters={"errors": errors, "scope": symbol_tables})
233  traverse_ir.fast_traverse_ir_top_down(
234      ir, [ir_data.Field], _add_struct_field_to_scope,
235      incidental_actions={
236          ir_data.Module: _set_scope_for_module,
237          ir_data.TypeDefinition: _set_scope_for_type_definition,
238      },
239      parameters={"errors": errors, "scope": symbol_tables})
240  traverse_ir.fast_traverse_ir_top_down(
241      ir, [ir_data.RuntimeParameter], _add_parameter_name_to_scope,
242      incidental_actions={
243          ir_data.Module: _set_scope_for_module,
244          ir_data.TypeDefinition: _set_scope_for_type_definition,
245      },
246      parameters={"errors": errors, "scope": symbol_tables})
247  return symbol_tables, errors
248
249
250def _add_alias_to_scope(name_ir, table, scope, alias, visibility, errors):
251  """Adds the given name to the scope as an alias."""
252  name = name_ir.text
253  new_scope = _Scope(_nested_name(scope, name), name_ir.source_location,
254                     visibility, alias)
255  scoped_table = table[scope.module_file]
256  for path_element in scope.object_path:
257    scoped_table = scoped_table[path_element]
258  if name in scoped_table:
259    errors.append(duplicate_name_error(
260        scoped_table.canonical_name.module_file, name_ir.source_location, name,
261        FileLocation(scoped_table[name].canonical_name.module_file,
262                     scoped_table[name].source_location)))
263  else:
264    scoped_table[name] = new_scope
265  return new_scope
266
267
268def _resolve_head_of_field_reference(field_reference, table, current_scope,
269                                     visible_scopes, source_file_name, errors):
270  return _resolve_reference(
271      field_reference.path[0], table, current_scope,
272      visible_scopes, source_file_name, errors)
273
274
275def _resolve_reference(reference, table, current_scope, visible_scopes,
276                       source_file_name, errors):
277  """Sets the canonical name of the given reference."""
278  if reference.HasField("canonical_name"):
279    # This reference has already been resolved by the _resolve_field_reference
280    # pass.
281    return
282  target = _find_target_of_reference(reference, table, current_scope,
283                                     visible_scopes, source_file_name, errors)
284  if target is not None:
285    assert not target.alias
286    ir_data_utils.builder(reference).canonical_name.CopyFrom(target.canonical_name)
287
288
289def _find_target_of_reference(reference, table, current_scope, visible_scopes,
290                              source_file_name, errors):
291  """Returns the resolved name of the given reference."""
292  found_in_table = None
293  name = reference.source_name[0].text
294  for scope in visible_scopes:
295    scoped_table = table[scope.module_file]
296    for path_element in scope.object_path or []:
297      scoped_table = scoped_table[path_element]
298    if (name in scoped_table and
299        (scope == current_scope or
300         scoped_table[name].visibility == _Scope.SEARCHABLE)):
301      # Prelude is "", so explicitly check for None.
302      if found_in_table is not None:
303        # TODO(bolms): Currently, this catches the case where a module tries to
304        # use a name that is defined (at the same scope) in two different
305        # modules.  It may make sense to raise duplicate_name_error whenever two
306        # modules define the same name (whether it is used or not), and reserve
307        # ambiguous_name_error for cases where a name is found in multiple
308        # scopes.
309        errors.append(ambiguous_name_error(
310            source_file_name, reference.source_location, name, [FileLocation(
311                found_in_table[name].canonical_name.module_file,
312                found_in_table[name].source_location), FileLocation(
313                    scoped_table[name].canonical_name.module_file, scoped_table[
314                        name].source_location)]))
315        continue
316      found_in_table = scoped_table
317      if reference.is_local_name:
318        # This is a little hacky.  When "is_local_name" is True, the name refers
319        # to a type that was defined inline.  In many cases, the type should be
320        # found at the same scope as the field; e.g.:
321        #
322        #     struct Foo:
323        #       0 [+1]  enum  bar:
324        #         BAZ = 1
325        #
326        # In this case, `Foo.bar` has type `Foo.Bar`.  Unfortunately, things
327        # break down a little bit when there is an inline type in an anonymous
328        # `bits`:
329        #
330        #     struct Foo:
331        #       0 [+1]  bits:
332        #         0 [+7]  enum  bar:
333        #           BAZ = 1
334        #
335        # Types inside of anonymous `bits` are hoisted into their parent type,
336        # so instead of `Foo.EmbossReservedAnonymous1.Bar`, `bar`'s type is just
337        # `Foo.Bar`.  Unfortunately, the field is still
338        # `Foo.EmbossReservedAnonymous1.bar`, so `bar`'s type won't be found in
339        # `bar`'s `current_scope`.
340        #
341        # (The name `bar` is exposed from `Foo` as an alias virtual field, so
342        # perhaps the correct answer is to allow type aliases, so that `Bar` can
343        # be found in both `Foo` and `Foo.EmbossReservedAnonymous1`.  That would
344        # involve an entirely new feature, though.)
345        #
346        # The workaround here is to search scopes from the innermost outward,
347        # and just stop as soon as a match is found.  This isn't ideal, because
348        # it relies on other bits of the front end having correctly added the
349        # inline type to the correct scope before symbol resolution, but it does
350        # work.  Names with False `is_local_name` will still be checked for
351        # ambiguity.
352        break
353  if found_in_table is None:
354    errors.append(missing_name_error(
355        source_file_name, reference.source_name[0].source_location, name))
356  if not errors:
357    for subname in reference.source_name:
358      if subname.text not in found_in_table:
359        errors.append(missing_name_error(source_file_name,
360                                         subname.source_location, subname.text))
361        return None
362      found_in_table = found_in_table[subname.text]
363      while found_in_table.alias:
364        referenced_table = table
365        for name in found_in_table.alias:
366          referenced_table = referenced_table[name]
367          # TODO(bolms): This section should really be a recursive lookup
368          # function, which would be able to handle arbitrary aliases through
369          # other aliases.
370          #
371          # This should be fine for now, since the only aliases here should be
372          # imports, which can't refer to other imports.
373          assert not referenced_table.alias, "Alias found to contain alias."
374        found_in_table = referenced_table
375    return found_in_table
376  return None
377
378
379def _resolve_field_reference(field_reference, source_file_name, errors, ir):
380  """Resolves the References inside of a FieldReference."""
381  if field_reference.path[-1].HasField("canonical_name"):
382    # Already done.
383    return
384  previous_field = ir_util.find_object_or_none(field_reference.path[0], ir)
385  previous_reference = field_reference.path[0]
386  for ref in field_reference.path[1:]:
387    while ir_util.field_is_virtual(previous_field):
388      if (previous_field.read_transform.WhichOneof("expression") ==
389          "field_reference"):
390        # Pass a separate error list into the recursive _resolve_field_reference
391        # call so that only one copy of the error for a particular reference
392        # will actually surface: in particular, the one that results from a
393        # direct call from traverse_ir_top_down into _resolve_field_reference.
394        new_errors = []
395        _resolve_field_reference(
396            previous_field.read_transform.field_reference,
397            previous_field.name.canonical_name.module_file, new_errors, ir)
398        # If the recursive _resolve_field_reference was unable to resolve the
399        # field, then bail.  Otherwise we get a cascade of errors, where an
400        # error in `x` leads to errors in anything trying to reach a member of
401        # `x`.
402        if not previous_field.read_transform.field_reference.path[-1].HasField(
403            "canonical_name"):
404          return
405        previous_field = ir_util.find_object(
406            previous_field.read_transform.field_reference.path[-1], ir)
407      else:
408        errors.append(
409            noncomposite_subfield_error(source_file_name,
410                                        previous_reference.source_location,
411                                        previous_reference.source_name[0].text))
412        return
413    if previous_field.type.WhichOneof("type") == "array_type":
414      errors.append(
415          array_subfield_error(source_file_name,
416                               previous_reference.source_location,
417                               previous_reference.source_name[0].text))
418      return
419    assert previous_field.type.WhichOneof("type") == "atomic_type"
420    member_name = ir_data_utils.copy(
421        previous_field.type.atomic_type.reference.canonical_name)
422    ir_data_utils.builder(member_name).object_path.extend([ref.source_name[0].text])
423    previous_field = ir_util.find_object_or_none(member_name, ir)
424    if previous_field is None:
425      errors.append(
426          missing_name_error(source_file_name,
427                             ref.source_name[0].source_location,
428                             ref.source_name[0].text))
429      return
430    ir_data_utils.builder(ref).canonical_name.CopyFrom(member_name)
431    previous_reference = ref
432
433
434def _set_visible_scopes_for_type_definition(type_definition, visible_scopes):
435  """Sets current_scope and visible_scopes for the given type_definition."""
436  return {
437      "current_scope": type_definition.name.canonical_name,
438
439      # In order to ensure that the iteration through scopes in
440      # _find_target_of_reference will go from innermost to outermost, it is
441      # important that the current scope (type_definition.name.canonical_name)
442      # precedes the previous visible_scopes here.
443      "visible_scopes": (type_definition.name.canonical_name,) + visible_scopes,
444  }
445
446
447def _set_visible_scopes_for_module(module):
448  """Sets visible_scopes for the given module."""
449  self_scope = ir_data.CanonicalName(module_file=module.source_file_name)
450  extra_visible_scopes = []
451  for foreign_import in module.foreign_import:
452    # Anonymous imports are searched for top-level names; named imports are not.
453    # As of right now, only the prelude should be imported anonymously; other
454    # modules must be imported with names.
455    if not foreign_import.local_name.text:
456      extra_visible_scopes.append(
457          ir_data.CanonicalName(module_file=foreign_import.file_name.text))
458  return {"visible_scopes": (self_scope,) + tuple(extra_visible_scopes)}
459
460
461def _set_visible_scopes_for_attribute(attribute, field, visible_scopes):
462  """Sets current_scope and visible_scopes for the attribute."""
463  del attribute  # Unused
464  if field is None:
465    return
466  return {
467      "current_scope": field.name.canonical_name,
468      "visible_scopes": (field.name.canonical_name,) + visible_scopes,
469  }
470
471def _module_source_from_table_action(m, table):
472  return {"module": table[m.source_file_name]}
473
474def _resolve_symbols_from_table(ir, table):
475  """Resolves all references in the given IR, given the constructed table."""
476  errors = []
477  # Symbol resolution is broken into five passes.  First, this code resolves any
478  # imports, and adds import aliases to modules.
479  traverse_ir.fast_traverse_ir_top_down(
480      ir, [ir_data.Import], _add_import_to_scope,
481      incidental_actions={
482          ir_data.Module: _module_source_from_table_action,
483      },
484      parameters={"errors": errors, "table": table})
485  if errors:
486    return errors
487  # Next, this resolves all absolute references (e.g., it resolves "UInt" in
488  # "0:1  UInt  field" to [prelude]::UInt).
489  traverse_ir.fast_traverse_ir_top_down(
490      ir, [ir_data.Reference], _resolve_reference,
491      skip_descendants_of=(ir_data.FieldReference,),
492      incidental_actions={
493          ir_data.TypeDefinition: _set_visible_scopes_for_type_definition,
494          ir_data.Module: _set_visible_scopes_for_module,
495          ir_data.Attribute: _set_visible_scopes_for_attribute,
496      },
497      parameters={"table": table, "errors": errors, "field": None})
498  # Lastly, head References to fields (e.g., the `a` of `a.b.c`) are resolved.
499  traverse_ir.fast_traverse_ir_top_down(
500      ir, [ir_data.FieldReference], _resolve_head_of_field_reference,
501      incidental_actions={
502          ir_data.TypeDefinition: _set_visible_scopes_for_type_definition,
503          ir_data.Module: _set_visible_scopes_for_module,
504          ir_data.Attribute: _set_visible_scopes_for_attribute,
505      },
506      parameters={"table": table, "errors": errors, "field": None})
507  return errors
508
509
510def resolve_field_references(ir):
511  """Resolves structure member accesses ("field.subfield") in ir."""
512  errors = []
513  traverse_ir.fast_traverse_ir_top_down(
514      ir, [ir_data.FieldReference], _resolve_field_reference,
515      incidental_actions={
516          ir_data.TypeDefinition: _set_visible_scopes_for_type_definition,
517          ir_data.Module: _set_visible_scopes_for_module,
518          ir_data.Attribute: _set_visible_scopes_for_attribute,
519      },
520      parameters={"errors": errors, "field": None})
521  return errors
522
523
524def resolve_symbols(ir):
525  """Resolves the symbols in all modules in ir."""
526  symbol_tables, errors = _construct_symbol_tables(ir)
527  if errors:
528    return errors
529  return _resolve_symbols_from_table(ir, symbol_tables)
530