xref: /aosp_15_r20/external/emboss/compiler/front_end/glue.py (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Main driver for the Emboss front-end.
16
17The parse_emboss_file function performs a complete parse of the specified file,
18and returns an IR or formatted error message.
19"""
20
21import collections
22
23from compiler.front_end import attribute_checker
24from compiler.front_end import constraints
25from compiler.front_end import dependency_checker
26from compiler.front_end import expression_bounds
27from compiler.front_end import lr1
28from compiler.front_end import module_ir
29from compiler.front_end import parser
30from compiler.front_end import symbol_resolver
31from compiler.front_end import synthetics
32from compiler.front_end import tokenizer
33from compiler.front_end import type_check
34from compiler.front_end import write_inference
35from compiler.util import error
36from compiler.util import ir_data
37from compiler.util import ir_data_utils
38from compiler.util import parser_types
39from compiler.util import resources
40
41_IrDebugInfo = collections.namedtuple("IrDebugInfo", ["ir", "debug_info",
42                                                      "errors"])
43
44
45class DebugInfo(object):
46  """Debug information about Emboss parsing."""
47  __slots__ = ("modules")
48
49  def __init__(self):
50    self.modules = {}
51
52  def __eq__(self, other):
53    return self.modules == other.modules
54
55  def __ne__(self, other):
56    return not self == other
57
58
59class ModuleDebugInfo(object):
60  """Debug information about the parse of a single file.
61
62  Attributes:
63    file_name: The name of the file from which this module came.
64    tokens: The tokenization of this module's source text.
65    parse_tree: The raw parse tree for this module.
66    ir: The intermediate representation of this module, before additional
67        processing such as symbol resolution.
68    used_productions: The set of grammar productions used when parsing this
69        module.
70    source_code: The source text of the module.
71  """
72  __slots__ = ("file_name", "tokens", "parse_tree", "ir", "used_productions",
73               "source_code")
74
75  def __init__(self, file_name):
76    self.file_name = file_name
77    self.tokens = None
78    self.parse_tree = None
79    self.ir = None
80    self.used_productions = None
81    self.source_code = None
82
83  def __eq__(self, other):
84    return (self.file_name == other.file_name and self.tokens == other.tokens
85            and self.parse_tree == other.parse_tree and self.ir == other.ir and
86            self.used_productions == other.used_productions and
87            self.source_code == other.source_code)
88
89  def __ne__(self, other):
90    return not self == other
91
92  def format_tokenization(self):
93    """Renders self.tokens in a human-readable format."""
94    return "\n".join([str(token) for token in self.tokens])
95
96  def format_parse_tree(self, parse_tree=None, indent=""):
97    """Renders self.parse_tree in a human-readable format."""
98    if parse_tree is None:
99      parse_tree = self.parse_tree
100    result = []
101    if isinstance(parse_tree, lr1.Reduction):
102      result.append(indent + parse_tree.symbol)
103      if parse_tree.children:
104        result.append(":\n")
105        for child in parse_tree.children:
106          result.append(self.format_parse_tree(child, indent + "  "))
107      else:
108        result.append("\n")
109    else:
110      result.append("{}{}\n".format(indent, parse_tree))
111    return "".join(result)
112
113  def format_module_ir(self):
114    """Renders self.ir in a human-readable format."""
115    return ir_data_utils.IrDataSerializer(self.ir).to_json(indent=2)
116
117
118def format_production_set(productions):
119  """Renders a set of productions in a human-readable format."""
120  return "\n".join([str(production) for production in sorted(productions)])
121
122
123_cached_modules = {}
124
125
126def parse_module_text(source_code, file_name):
127  """Parses the text of a module, returning a module-level IR.
128
129  Arguments:
130    source_code: The text of the module to parse.
131    file_name: The name of the module's source file (will be included in the
132        resulting IR).
133
134  Returns:
135    A module-level intermediate representation (IR), prior to import and symbol
136    resolution, and a corresponding ModuleDebugInfo, for debugging the parser.
137
138  Raises:
139    FrontEndFailure: An error occurred while parsing the module.  str(error)
140        will give a human-readable error message.
141  """
142  # This is strictly an optimization to speed up tests, mostly by avoiding the
143  # need to re-parse the prelude for every test .emb.
144  if (source_code, file_name) in _cached_modules:
145    debug_info = _cached_modules[source_code, file_name]
146    ir = ir_data_utils.copy(debug_info.ir)
147  else:
148    debug_info = ModuleDebugInfo(file_name)
149    debug_info.source_code = source_code
150    tokens, errors = tokenizer.tokenize(source_code, file_name)
151    if errors:
152      return _IrDebugInfo(None, debug_info, errors)
153    debug_info.tokens = tokens
154    parse_result = parser.parse_module(tokens)
155    if parse_result.error:
156      return _IrDebugInfo(
157          None,
158          debug_info,
159          [error.make_error_from_parse_error(file_name, parse_result.error)])
160    debug_info.parse_tree = parse_result.parse_tree
161    used_productions = set()
162    ir = module_ir.build_ir(parse_result.parse_tree, used_productions)
163    ir.source_text = source_code
164    debug_info.used_productions = used_productions
165    debug_info.ir = ir_data_utils.copy(ir)
166    _cached_modules[source_code, file_name] = debug_info
167  ir.source_file_name = file_name
168  return _IrDebugInfo(ir, debug_info, [])
169
170
171def parse_module(file_name, file_reader):
172  """Parses a module, returning a module-level IR.
173
174  Arguments:
175    file_name: The name of the module's source file.
176    file_reader: A callable that returns either:
177        (file_contents, None) or
178        (None, list_of_error_detail_strings)
179
180  Returns:
181    (ir, debug_info, errors), where ir is a module-level intermediate
182    representation (IR), debug_info is a ModuleDebugInfo containing the
183    tokenization, parse tree, and original source text of all modules, and
184    errors is a list of tokenization or parse errors.  If errors is not an empty
185    list, ir will be None.
186
187  Raises:
188    FrontEndFailure: An error occurred while reading or parsing the module.
189        str(error) will give a human-readable error message.
190  """
191  source_code, errors = file_reader(file_name)
192  if errors:
193    location = parser_types.make_location((1, 1), (1, 1))
194    return None, None, [
195        [error.error(file_name, location, "Unable to read file.")] +
196        [error.note(file_name, location, e) for e in errors]
197    ]
198  return parse_module_text(source_code, file_name)
199
200
201def get_prelude():
202  """Returns the module IR and debug info of the Emboss Prelude."""
203  return parse_module_text(
204      resources.load("compiler.front_end", "prelude.emb"), "")
205
206
207def parse_emboss_file(file_name, file_reader, stop_before_step=None):
208  """Fully parses an .emb, and returns an IR suitable for passing to a back end.
209
210  parse_emboss_file is a convenience function which calls only_parse_emboss_file
211  and process_ir.
212
213  Arguments:
214    file_name: The name of the module's source file.
215    file_reader: A callable that returns the contents of files, or raises
216        IOError.
217    stop_before_step: If set, parse_emboss_file will stop normalizing the IR
218        just before the specified step.  This parameter should be None for
219        non-test code.
220
221  Returns:
222    (ir, debug_info, errors), where ir is a complete IR, ready for consumption
223    by an Emboss back end, debug_info is a DebugInfo containing the
224    tokenization, parse tree, and original source text of all modules, and
225    errors is a list of tokenization or parse errors.  If errors is not an empty
226    list, ir will be None.
227  """
228  ir, debug_info, errors = only_parse_emboss_file(file_name, file_reader)
229  if errors:
230    return _IrDebugInfo(None, debug_info, errors)
231  ir, errors = process_ir(ir, stop_before_step)
232  if errors:
233    return _IrDebugInfo(None, debug_info, errors)
234  return _IrDebugInfo(ir, debug_info, errors)
235
236
237def only_parse_emboss_file(file_name, file_reader):
238  """Parses an .emb, and returns an IR suitable for process_ir.
239
240  only_parse_emboss_file parses the given file and all of its transitive
241  imports, and returns a first-stage intermediate representation, which can be
242  passed to process_ir.
243
244  Arguments:
245    file_name: The name of the module's source file.
246    file_reader: A callable that returns the contents of files, or raises
247        IOError.
248
249  Returns:
250    (ir, debug_info, errors), where ir is an intermediate representation (IR),
251    debug_info is a DebugInfo containing the tokenization, parse tree, and
252    original source text of all modules, and errors is a list of tokenization or
253    parse errors.  If errors is not an empty list, ir will be None.
254  """
255  file_queue = [file_name]
256  files = {file_name}
257  debug_info = DebugInfo()
258  ir = ir_data.EmbossIr(module=[])
259  while file_queue:
260    file_to_parse = file_queue[0]
261    del file_queue[0]
262    if file_to_parse:
263      module, module_debug_info, errors = parse_module(file_to_parse,
264                                                       file_reader)
265    else:
266      module, module_debug_info, errors = get_prelude()
267    if module_debug_info:
268      debug_info.modules[file_to_parse] = module_debug_info
269    if errors:
270      return _IrDebugInfo(None, debug_info, errors)
271    ir.module.extend([module])  # Proto supports extend but not append here.
272    for import_ in module.foreign_import:
273      if import_.file_name.text not in files:
274        file_queue.append(import_.file_name.text)
275        files.add(import_.file_name.text)
276  return _IrDebugInfo(ir, debug_info, [])
277
278
279def process_ir(ir, stop_before_step):
280  """Turns a first-stage IR into a fully-processed IR.
281
282  process_ir performs all of the semantic processing steps on `ir`: resolving
283  symbols, checking dependencies, adding type annotations, normalizing
284  attributes, etc.  process_ir is generally meant to be called with the result
285  of parse_emboss_file(), but in theory could be called with a first-stage
286  intermediate representation (IR) from another source.
287
288  Arguments:
289    ir: The IR to process.  This structure will be modified during processing.
290    stop_before_step: If set, process_ir will stop normalizing the IR just
291        before the specified step.  This parameter should be None for non-test
292        code.
293
294  Returns:
295    (ir, errors), where ir is a complete IR, ready for consumption by an Emboss
296    back end, and errors is a list of compilation errors.  If errors is not an
297    empty list, ir will be None.
298  """
299  passes = (synthetics.desugar,
300            symbol_resolver.resolve_symbols,
301            dependency_checker.find_dependency_cycles,
302            dependency_checker.set_dependency_order,
303            symbol_resolver.resolve_field_references,
304            type_check.annotate_types,
305            type_check.check_types,
306            expression_bounds.compute_constants,
307            attribute_checker.normalize_and_verify,
308            constraints.check_constraints,
309            write_inference.set_write_methods)
310  assert stop_before_step in [None] + [f.__name__ for f in passes], (
311      "Bad value for stop_before_step.")
312  # Some parts of the IR are synthesized from "natural" parts of the IR, before
313  # the natural parts have been fully error checked.  Because of this, the
314  # synthesized parts can have errors; in a couple of cases, they can have
315  # errors that show up in an earlier pass than the errors in the natural parts
316  # of the IR.  As an example:
317  #
318  #     struct Foo:
319  #       0 [+1]  bits:
320  #         0 [+1]  Flag  flag
321  #       1 [+flag]  UInt:8  field
322  #
323  # In this case, the use of `flag` as the size of `field` is incorrect, because
324  # `flag` is a boolean, but the size of a field must be an integer.
325  #
326  # Type checking occurs in two passes: in the first pass, expressions are
327  # checked for internal consistency.  In the second pass, expression types are
328  # checked against their location.  The use of `flag` would be caught in the
329  # second pass.
330  #
331  # However, the generated_fields pass will synthesize a $size_in_bytes virtual
332  # field that would look like:
333  #
334  #     struct Foo:
335  #       0 [+1]  bits:
336  #         0 [+1]  Flag  flag
337  #       1 [+flag]  UInt:8  field
338  #       let $size_in_bytes = $max(true ? 0 + 1 : 0, true ? 1 + flag : 0)
339  #
340  # Since `1 + flag` is not internally consistent, this type error would be
341  # caught in the first pass, and the user would see a very strange error
342  # message that "the right-hand argument of operator `+` must be an integer."
343  #
344  # In order to avoid showing these kinds of errors to the user, we defer any
345  # errors in synthetic parts of the IR.  Unless there is a compiler bug, those
346  # errors will show up as errors in the natural parts of the IR, which should
347  # be much more comprehensible to end users.
348  #
349  # If, for some reason, there is an error in the synthetic IR, but no error in
350  # the natural IR, the synthetic errors will be shown.  In this case, the
351  # formatting for the synthetic errors will show '[compiler bug]' for the
352  # error location, which (hopefully) will provide the end user with a cue that
353  # the error is a compiler bug.
354  deferred_errors = []
355  for function in passes:
356    if stop_before_step == function.__name__:
357      return (ir, [])
358    errors, hidden_errors = error.split_errors(function(ir))
359    if errors:
360      return (None, errors)
361    deferred_errors.extend(hidden_errors)
362
363  if deferred_errors:
364    return (None, deferred_errors)
365
366  assert stop_before_step is None, "Bad value for stop_before_step."
367  return (ir, [])
368