xref: /aosp_15_r20/external/emboss/compiler/front_end/parser.py (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Routines to generate a shift-reduce parser from the module_ir module."""
16
17from compiler.front_end import lr1
18from compiler.front_end import module_ir
19from compiler.front_end import tokenizer
20from compiler.util import resources
21from compiler.util import simple_memoizer
22
23
24class ParserGenerationError(Exception):
25  """An error occurred during parser generation."""
26  pass
27
28
29def parse_error_examples(error_example_text):
30  """Parses error examples from error_example_text.
31
32  Arguments:
33    error_example_text: The text of an error example file.
34
35  Returns:
36    A list of tuples, suitable for passing into generate_parser.
37
38  Raises:
39    ParserGenerationError: There is a problem parsing the error examples.
40  """
41  error_examples = error_example_text.split("\n" + "=" * 80 + "\n")
42  result = []
43  # Everything before the first "======" line is explanatory text: ignore it.
44  for error_example in error_examples[1:]:
45    message_and_examples = error_example.split("\n" + "-" * 80 + "\n")
46    if len(message_and_examples) != 2:
47      raise ParserGenerationError(
48          "Expected one error message and one example section in:\n" +
49          error_example)
50    message, example_text = message_and_examples
51    examples = example_text.split("\n---\n")
52    for example in examples:
53      # TODO(bolms): feed a line number into tokenize, so that tokenization
54      # failures refer to the correct line within error_example_text.
55      tokens, errors = tokenizer.tokenize(example, "")
56      if errors:
57        raise ParserGenerationError(str(errors))
58
59      for i in range(len(tokens)):
60        if tokens[i].symbol == "BadWord" and tokens[i].text == "$ANY":
61          tokens[i] = lr1.ANY_TOKEN
62
63      error_token = None
64      for i in range(len(tokens)):
65        if tokens[i].symbol == "BadWord" and tokens[i].text == "$ERR":
66          error_token = tokens[i + 1]
67          del tokens[i]
68          break
69      else:
70        raise ParserGenerationError(
71            "No error token marker '$ERR' in:\n" + error_example)
72
73      result.append((tokens, error_token, message.strip(), example))
74  return result
75
76
77def generate_parser(start_symbol, productions, error_examples):
78  """Generates a parser from grammar, and applies error_examples.
79
80  Arguments:
81      start_symbol: the start symbol of the grammar (a string)
82      productions: a list of parser_types.Production in the grammar
83      error_examples: A list of (source tokens, error message, source text)
84          tuples.
85
86  Returns:
87      A parser.
88
89  Raises:
90      ParserGenerationError: There is a problem generating the parser.
91  """
92  parser = lr1.Grammar(start_symbol, productions).parser()
93  if parser.conflicts:
94    raise ParserGenerationError("\n".join([str(c) for c in parser.conflicts]))
95  for example in error_examples:
96    mark_result = parser.mark_error(example[0], example[1], example[2])
97    if mark_result:
98      raise ParserGenerationError(
99          "error marking example: {}\nExample:\n{}".format(
100              mark_result, example[3]))
101  return parser
102
103
104@simple_memoizer.memoize
105def _load_module_parser():
106  error_examples = parse_error_examples(
107      resources.load("compiler.front_end", "error_examples"))
108  return generate_parser(module_ir.START_SYMBOL, module_ir.PRODUCTIONS,
109                         error_examples)
110
111
112@simple_memoizer.memoize
113def _load_expression_parser():
114  return generate_parser(module_ir.EXPRESSION_START_SYMBOL,
115                         module_ir.PRODUCTIONS, [])
116
117
118def parse_module(tokens):
119  """Parses the provided Emboss token list into an Emboss module parse tree."""
120  return _load_module_parser().parse(tokens)
121
122
123def parse_expression(tokens):
124  """Parses the provided Emboss token list into an expression parse tree."""
125  return _load_expression_parser().parse(tokens)
126