1# Copyright 2019 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Symbol resolver for Emboss IR. 16 17The resolve_symbols function should be used to generate canonical resolutions 18for all symbol references in an Emboss IR. 19""" 20 21import collections 22 23from compiler.util import error 24from compiler.util import ir_data 25from compiler.util import ir_data_utils 26from compiler.util import ir_util 27from compiler.util import traverse_ir 28 29# TODO(bolms): Symbol resolution raises an exception at the first error, but 30# this is one place where it can make sense to report multiple errors. 31 32FileLocation = collections.namedtuple("FileLocation", ["file", "location"]) 33 34 35def ambiguous_name_error(file_name, location, name, candidate_locations): 36 """A name cannot be resolved because there are two or more candidates.""" 37 result = [error.error(file_name, location, "Ambiguous name '{}'".format(name)) 38 ] 39 for location in sorted(candidate_locations): 40 result.append(error.note(location.file, location.location, 41 "Possible resolution")) 42 return result 43 44 45def duplicate_name_error(file_name, location, name, original_location): 46 """A name is defined two or more times.""" 47 return [error.error(file_name, location, "Duplicate name '{}'".format(name)), 48 error.note(original_location.file, original_location.location, 49 "Original definition")] 50 51 52def missing_name_error(file_name, location, name): 53 return [error.error(file_name, location, "No candidate for '{}'".format(name)) 54 ] 55 56 57def array_subfield_error(file_name, location, name): 58 return [error.error(file_name, location, 59 "Cannot access member of array '{}'".format(name))] 60 61 62def noncomposite_subfield_error(file_name, location, name): 63 return [error.error(file_name, location, 64 "Cannot access member of noncomposite field '{}'".format( 65 name))] 66 67 68def _nested_name(canonical_name, name): 69 """Creates a new CanonicalName with name appended to the object_path.""" 70 return ir_data.CanonicalName( 71 module_file=canonical_name.module_file, 72 object_path=list(canonical_name.object_path) + [name]) 73 74 75class _Scope(dict): 76 """A _Scope holds data for a symbol. 77 78 A _Scope is a dict with some additional attributes. Lexically nested names 79 are kept in the dict, and bookkeeping is kept in the additional attributes. 80 81 For example, each module should have a child _Scope for each type contained in 82 the module. `struct` and `bits` types should have nested _Scopes for each 83 field; `enum` types should have nested scopes for each enumerated name. 84 85 Attributes: 86 canonical_name: The absolute name of this symbol; e.g. ("file.emb", 87 "TypeName", "SubTypeName", "field_name") 88 source_location: The ir_data.SourceLocation where this symbol is defined. 89 visibility: LOCAL, PRIVATE, or SEARCHABLE; see below. 90 alias: If set, this name is merely a pointer to another name. 91 """ 92 __slots__ = ("canonical_name", "source_location", "visibility", "alias") 93 94 # A LOCAL name is visible outside of its enclosing scope, but should not be 95 # found when searching for a name. That is, this name should be matched in 96 # the tail of a qualified reference (the 'bar' in 'foo.bar'), but not when 97 # searching for names (the 'foo' in 'foo.bar' should not match outside of 98 # 'foo's scope). This applies to public field names. 99 LOCAL = object() 100 101 # A PRIVATE name is similar to LOCAL except that it is never visible outside 102 # its enclosing scope. This applies to abbreviations of field names: if 'a' 103 # is an abbreviation for field 'apple', then 'foo.a' is not a valid reference; 104 # instead it should be 'foo.apple'. 105 PRIVATE = object() 106 107 # A SEARCHABLE name is visible as long as it is in a scope in the search list. 108 # This applies to type names ('Foo'), which may be found from many scopes. 109 SEARCHABLE = object() 110 111 def __init__(self, canonical_name, source_location, visibility, alias=None): 112 super(_Scope, self).__init__() 113 self.canonical_name = canonical_name 114 self.source_location = source_location 115 self.visibility = visibility 116 self.alias = alias 117 118 119def _add_name_to_scope(name_ir, scope, canonical_name, visibility, errors): 120 """Adds the given name_ir to the given scope.""" 121 name = name_ir.text 122 new_scope = _Scope(canonical_name, name_ir.source_location, visibility) 123 if name in scope: 124 errors.append(duplicate_name_error( 125 scope.canonical_name.module_file, name_ir.source_location, name, 126 FileLocation(scope[name].canonical_name.module_file, 127 scope[name].source_location))) 128 else: 129 scope[name] = new_scope 130 return new_scope 131 132 133def _add_name_to_scope_and_normalize(name_ir, scope, visibility, errors): 134 """Adds the given name_ir to scope and sets its canonical_name.""" 135 name = name_ir.name.text 136 canonical_name = _nested_name(scope.canonical_name, name) 137 ir_data_utils.builder(name_ir).canonical_name.CopyFrom(canonical_name) 138 return _add_name_to_scope(name_ir.name, scope, canonical_name, visibility, 139 errors) 140 141 142def _add_struct_field_to_scope(field, scope, errors): 143 """Adds the name of the given field to the scope.""" 144 new_scope = _add_name_to_scope_and_normalize(field.name, scope, _Scope.LOCAL, 145 errors) 146 if field.HasField("abbreviation"): 147 _add_name_to_scope(field.abbreviation, scope, new_scope.canonical_name, 148 _Scope.PRIVATE, errors) 149 150 value_builtin_name = ir_data.Word( 151 text="this", 152 source_location=ir_data.Location(is_synthetic=True), 153 ) 154 # In "inside field" scope, the name `this` maps back to the field itself. 155 # This is important for attributes like `[requires]`. 156 _add_name_to_scope(value_builtin_name, new_scope, 157 field.name.canonical_name, _Scope.PRIVATE, errors) 158 159 160def _add_parameter_name_to_scope(parameter, scope, errors): 161 """Adds the name of the given parameter to the scope.""" 162 _add_name_to_scope_and_normalize(parameter.name, scope, _Scope.LOCAL, errors) 163 164 165def _add_enum_value_to_scope(value, scope, errors): 166 """Adds the name of the enum value to scope.""" 167 _add_name_to_scope_and_normalize(value.name, scope, _Scope.LOCAL, errors) 168 169 170def _add_type_name_to_scope(type_definition, scope, errors): 171 """Adds the name of type_definition to the given scope.""" 172 new_scope = _add_name_to_scope_and_normalize(type_definition.name, scope, 173 _Scope.SEARCHABLE, errors) 174 return {"scope": new_scope} 175 176 177def _set_scope_for_type_definition(type_definition, scope): 178 """Sets the current scope for an ir_data.AddressableUnit.""" 179 return {"scope": scope[type_definition.name.name.text]} 180 181 182def _add_module_to_scope(module, scope): 183 """Adds the name of the module to the given scope.""" 184 module_symbol_table = _Scope( 185 ir_data.CanonicalName(module_file=module.source_file_name, 186 object_path=[]), 187 None, 188 _Scope.SEARCHABLE) 189 scope[module.source_file_name] = module_symbol_table 190 return {"scope": scope[module.source_file_name]} 191 192 193def _set_scope_for_module(module, scope): 194 """Adds the name of the module to the given scope.""" 195 return {"scope": scope[module.source_file_name]} 196 197 198def _add_import_to_scope(foreign_import, table, module, errors): 199 if not foreign_import.local_name.text: 200 # This is the prelude import; ignore it. 201 return 202 _add_alias_to_scope(foreign_import.local_name, table, module.canonical_name, 203 [foreign_import.file_name.text], _Scope.SEARCHABLE, 204 errors) 205 206 207def _construct_symbol_tables(ir): 208 """Constructs per-module symbol tables for each module in ir.""" 209 symbol_tables = {} 210 errors = [] 211 traverse_ir.fast_traverse_ir_top_down( 212 ir, [ir_data.Module], _add_module_to_scope, 213 parameters={"errors": errors, "scope": symbol_tables}) 214 traverse_ir.fast_traverse_ir_top_down( 215 ir, [ir_data.TypeDefinition], _add_type_name_to_scope, 216 incidental_actions={ir_data.Module: _set_scope_for_module}, 217 parameters={"errors": errors, "scope": symbol_tables}) 218 if errors: 219 # Ideally, we would find duplicate field names elsewhere in the module, even 220 # if there are duplicate type names, but field/enum names in the colliding 221 # types also end up colliding, leading to spurious errors. E.g., if you 222 # have two `struct Foo`s, then the field check will also discover a 223 # collision for `$size_in_bytes`, since there are two `Foo.$size_in_bytes`. 224 return symbol_tables, errors 225 226 traverse_ir.fast_traverse_ir_top_down( 227 ir, [ir_data.EnumValue], _add_enum_value_to_scope, 228 incidental_actions={ 229 ir_data.Module: _set_scope_for_module, 230 ir_data.TypeDefinition: _set_scope_for_type_definition, 231 }, 232 parameters={"errors": errors, "scope": symbol_tables}) 233 traverse_ir.fast_traverse_ir_top_down( 234 ir, [ir_data.Field], _add_struct_field_to_scope, 235 incidental_actions={ 236 ir_data.Module: _set_scope_for_module, 237 ir_data.TypeDefinition: _set_scope_for_type_definition, 238 }, 239 parameters={"errors": errors, "scope": symbol_tables}) 240 traverse_ir.fast_traverse_ir_top_down( 241 ir, [ir_data.RuntimeParameter], _add_parameter_name_to_scope, 242 incidental_actions={ 243 ir_data.Module: _set_scope_for_module, 244 ir_data.TypeDefinition: _set_scope_for_type_definition, 245 }, 246 parameters={"errors": errors, "scope": symbol_tables}) 247 return symbol_tables, errors 248 249 250def _add_alias_to_scope(name_ir, table, scope, alias, visibility, errors): 251 """Adds the given name to the scope as an alias.""" 252 name = name_ir.text 253 new_scope = _Scope(_nested_name(scope, name), name_ir.source_location, 254 visibility, alias) 255 scoped_table = table[scope.module_file] 256 for path_element in scope.object_path: 257 scoped_table = scoped_table[path_element] 258 if name in scoped_table: 259 errors.append(duplicate_name_error( 260 scoped_table.canonical_name.module_file, name_ir.source_location, name, 261 FileLocation(scoped_table[name].canonical_name.module_file, 262 scoped_table[name].source_location))) 263 else: 264 scoped_table[name] = new_scope 265 return new_scope 266 267 268def _resolve_head_of_field_reference(field_reference, table, current_scope, 269 visible_scopes, source_file_name, errors): 270 return _resolve_reference( 271 field_reference.path[0], table, current_scope, 272 visible_scopes, source_file_name, errors) 273 274 275def _resolve_reference(reference, table, current_scope, visible_scopes, 276 source_file_name, errors): 277 """Sets the canonical name of the given reference.""" 278 if reference.HasField("canonical_name"): 279 # This reference has already been resolved by the _resolve_field_reference 280 # pass. 281 return 282 target = _find_target_of_reference(reference, table, current_scope, 283 visible_scopes, source_file_name, errors) 284 if target is not None: 285 assert not target.alias 286 ir_data_utils.builder(reference).canonical_name.CopyFrom(target.canonical_name) 287 288 289def _find_target_of_reference(reference, table, current_scope, visible_scopes, 290 source_file_name, errors): 291 """Returns the resolved name of the given reference.""" 292 found_in_table = None 293 name = reference.source_name[0].text 294 for scope in visible_scopes: 295 scoped_table = table[scope.module_file] 296 for path_element in scope.object_path or []: 297 scoped_table = scoped_table[path_element] 298 if (name in scoped_table and 299 (scope == current_scope or 300 scoped_table[name].visibility == _Scope.SEARCHABLE)): 301 # Prelude is "", so explicitly check for None. 302 if found_in_table is not None: 303 # TODO(bolms): Currently, this catches the case where a module tries to 304 # use a name that is defined (at the same scope) in two different 305 # modules. It may make sense to raise duplicate_name_error whenever two 306 # modules define the same name (whether it is used or not), and reserve 307 # ambiguous_name_error for cases where a name is found in multiple 308 # scopes. 309 errors.append(ambiguous_name_error( 310 source_file_name, reference.source_location, name, [FileLocation( 311 found_in_table[name].canonical_name.module_file, 312 found_in_table[name].source_location), FileLocation( 313 scoped_table[name].canonical_name.module_file, scoped_table[ 314 name].source_location)])) 315 continue 316 found_in_table = scoped_table 317 if reference.is_local_name: 318 # This is a little hacky. When "is_local_name" is True, the name refers 319 # to a type that was defined inline. In many cases, the type should be 320 # found at the same scope as the field; e.g.: 321 # 322 # struct Foo: 323 # 0 [+1] enum bar: 324 # BAZ = 1 325 # 326 # In this case, `Foo.bar` has type `Foo.Bar`. Unfortunately, things 327 # break down a little bit when there is an inline type in an anonymous 328 # `bits`: 329 # 330 # struct Foo: 331 # 0 [+1] bits: 332 # 0 [+7] enum bar: 333 # BAZ = 1 334 # 335 # Types inside of anonymous `bits` are hoisted into their parent type, 336 # so instead of `Foo.EmbossReservedAnonymous1.Bar`, `bar`'s type is just 337 # `Foo.Bar`. Unfortunately, the field is still 338 # `Foo.EmbossReservedAnonymous1.bar`, so `bar`'s type won't be found in 339 # `bar`'s `current_scope`. 340 # 341 # (The name `bar` is exposed from `Foo` as an alias virtual field, so 342 # perhaps the correct answer is to allow type aliases, so that `Bar` can 343 # be found in both `Foo` and `Foo.EmbossReservedAnonymous1`. That would 344 # involve an entirely new feature, though.) 345 # 346 # The workaround here is to search scopes from the innermost outward, 347 # and just stop as soon as a match is found. This isn't ideal, because 348 # it relies on other bits of the front end having correctly added the 349 # inline type to the correct scope before symbol resolution, but it does 350 # work. Names with False `is_local_name` will still be checked for 351 # ambiguity. 352 break 353 if found_in_table is None: 354 errors.append(missing_name_error( 355 source_file_name, reference.source_name[0].source_location, name)) 356 if not errors: 357 for subname in reference.source_name: 358 if subname.text not in found_in_table: 359 errors.append(missing_name_error(source_file_name, 360 subname.source_location, subname.text)) 361 return None 362 found_in_table = found_in_table[subname.text] 363 while found_in_table.alias: 364 referenced_table = table 365 for name in found_in_table.alias: 366 referenced_table = referenced_table[name] 367 # TODO(bolms): This section should really be a recursive lookup 368 # function, which would be able to handle arbitrary aliases through 369 # other aliases. 370 # 371 # This should be fine for now, since the only aliases here should be 372 # imports, which can't refer to other imports. 373 assert not referenced_table.alias, "Alias found to contain alias." 374 found_in_table = referenced_table 375 return found_in_table 376 return None 377 378 379def _resolve_field_reference(field_reference, source_file_name, errors, ir): 380 """Resolves the References inside of a FieldReference.""" 381 if field_reference.path[-1].HasField("canonical_name"): 382 # Already done. 383 return 384 previous_field = ir_util.find_object_or_none(field_reference.path[0], ir) 385 previous_reference = field_reference.path[0] 386 for ref in field_reference.path[1:]: 387 while ir_util.field_is_virtual(previous_field): 388 if (previous_field.read_transform.WhichOneof("expression") == 389 "field_reference"): 390 # Pass a separate error list into the recursive _resolve_field_reference 391 # call so that only one copy of the error for a particular reference 392 # will actually surface: in particular, the one that results from a 393 # direct call from traverse_ir_top_down into _resolve_field_reference. 394 new_errors = [] 395 _resolve_field_reference( 396 previous_field.read_transform.field_reference, 397 previous_field.name.canonical_name.module_file, new_errors, ir) 398 # If the recursive _resolve_field_reference was unable to resolve the 399 # field, then bail. Otherwise we get a cascade of errors, where an 400 # error in `x` leads to errors in anything trying to reach a member of 401 # `x`. 402 if not previous_field.read_transform.field_reference.path[-1].HasField( 403 "canonical_name"): 404 return 405 previous_field = ir_util.find_object( 406 previous_field.read_transform.field_reference.path[-1], ir) 407 else: 408 errors.append( 409 noncomposite_subfield_error(source_file_name, 410 previous_reference.source_location, 411 previous_reference.source_name[0].text)) 412 return 413 if previous_field.type.WhichOneof("type") == "array_type": 414 errors.append( 415 array_subfield_error(source_file_name, 416 previous_reference.source_location, 417 previous_reference.source_name[0].text)) 418 return 419 assert previous_field.type.WhichOneof("type") == "atomic_type" 420 member_name = ir_data_utils.copy( 421 previous_field.type.atomic_type.reference.canonical_name) 422 ir_data_utils.builder(member_name).object_path.extend([ref.source_name[0].text]) 423 previous_field = ir_util.find_object_or_none(member_name, ir) 424 if previous_field is None: 425 errors.append( 426 missing_name_error(source_file_name, 427 ref.source_name[0].source_location, 428 ref.source_name[0].text)) 429 return 430 ir_data_utils.builder(ref).canonical_name.CopyFrom(member_name) 431 previous_reference = ref 432 433 434def _set_visible_scopes_for_type_definition(type_definition, visible_scopes): 435 """Sets current_scope and visible_scopes for the given type_definition.""" 436 return { 437 "current_scope": type_definition.name.canonical_name, 438 439 # In order to ensure that the iteration through scopes in 440 # _find_target_of_reference will go from innermost to outermost, it is 441 # important that the current scope (type_definition.name.canonical_name) 442 # precedes the previous visible_scopes here. 443 "visible_scopes": (type_definition.name.canonical_name,) + visible_scopes, 444 } 445 446 447def _set_visible_scopes_for_module(module): 448 """Sets visible_scopes for the given module.""" 449 self_scope = ir_data.CanonicalName(module_file=module.source_file_name) 450 extra_visible_scopes = [] 451 for foreign_import in module.foreign_import: 452 # Anonymous imports are searched for top-level names; named imports are not. 453 # As of right now, only the prelude should be imported anonymously; other 454 # modules must be imported with names. 455 if not foreign_import.local_name.text: 456 extra_visible_scopes.append( 457 ir_data.CanonicalName(module_file=foreign_import.file_name.text)) 458 return {"visible_scopes": (self_scope,) + tuple(extra_visible_scopes)} 459 460 461def _set_visible_scopes_for_attribute(attribute, field, visible_scopes): 462 """Sets current_scope and visible_scopes for the attribute.""" 463 del attribute # Unused 464 if field is None: 465 return 466 return { 467 "current_scope": field.name.canonical_name, 468 "visible_scopes": (field.name.canonical_name,) + visible_scopes, 469 } 470 471def _module_source_from_table_action(m, table): 472 return {"module": table[m.source_file_name]} 473 474def _resolve_symbols_from_table(ir, table): 475 """Resolves all references in the given IR, given the constructed table.""" 476 errors = [] 477 # Symbol resolution is broken into five passes. First, this code resolves any 478 # imports, and adds import aliases to modules. 479 traverse_ir.fast_traverse_ir_top_down( 480 ir, [ir_data.Import], _add_import_to_scope, 481 incidental_actions={ 482 ir_data.Module: _module_source_from_table_action, 483 }, 484 parameters={"errors": errors, "table": table}) 485 if errors: 486 return errors 487 # Next, this resolves all absolute references (e.g., it resolves "UInt" in 488 # "0:1 UInt field" to [prelude]::UInt). 489 traverse_ir.fast_traverse_ir_top_down( 490 ir, [ir_data.Reference], _resolve_reference, 491 skip_descendants_of=(ir_data.FieldReference,), 492 incidental_actions={ 493 ir_data.TypeDefinition: _set_visible_scopes_for_type_definition, 494 ir_data.Module: _set_visible_scopes_for_module, 495 ir_data.Attribute: _set_visible_scopes_for_attribute, 496 }, 497 parameters={"table": table, "errors": errors, "field": None}) 498 # Lastly, head References to fields (e.g., the `a` of `a.b.c`) are resolved. 499 traverse_ir.fast_traverse_ir_top_down( 500 ir, [ir_data.FieldReference], _resolve_head_of_field_reference, 501 incidental_actions={ 502 ir_data.TypeDefinition: _set_visible_scopes_for_type_definition, 503 ir_data.Module: _set_visible_scopes_for_module, 504 ir_data.Attribute: _set_visible_scopes_for_attribute, 505 }, 506 parameters={"table": table, "errors": errors, "field": None}) 507 return errors 508 509 510def resolve_field_references(ir): 511 """Resolves structure member accesses ("field.subfield") in ir.""" 512 errors = [] 513 traverse_ir.fast_traverse_ir_top_down( 514 ir, [ir_data.FieldReference], _resolve_field_reference, 515 incidental_actions={ 516 ir_data.TypeDefinition: _set_visible_scopes_for_type_definition, 517 ir_data.Module: _set_visible_scopes_for_module, 518 ir_data.Attribute: _set_visible_scopes_for_attribute, 519 }, 520 parameters={"errors": errors, "field": None}) 521 return errors 522 523 524def resolve_symbols(ir): 525 """Resolves the symbols in all modules in ir.""" 526 symbol_tables, errors = _construct_symbol_tables(ir) 527 if errors: 528 return errors 529 return _resolve_symbols_from_table(ir, symbol_tables) 530