1# Copyright 2019 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Routines to generate a shift-reduce parser from the module_ir module.""" 16 17from compiler.front_end import lr1 18from compiler.front_end import module_ir 19from compiler.front_end import tokenizer 20from compiler.util import resources 21from compiler.util import simple_memoizer 22 23 24class ParserGenerationError(Exception): 25 """An error occurred during parser generation.""" 26 pass 27 28 29def parse_error_examples(error_example_text): 30 """Parses error examples from error_example_text. 31 32 Arguments: 33 error_example_text: The text of an error example file. 34 35 Returns: 36 A list of tuples, suitable for passing into generate_parser. 37 38 Raises: 39 ParserGenerationError: There is a problem parsing the error examples. 40 """ 41 error_examples = error_example_text.split("\n" + "=" * 80 + "\n") 42 result = [] 43 # Everything before the first "======" line is explanatory text: ignore it. 44 for error_example in error_examples[1:]: 45 message_and_examples = error_example.split("\n" + "-" * 80 + "\n") 46 if len(message_and_examples) != 2: 47 raise ParserGenerationError( 48 "Expected one error message and one example section in:\n" + 49 error_example) 50 message, example_text = message_and_examples 51 examples = example_text.split("\n---\n") 52 for example in examples: 53 # TODO(bolms): feed a line number into tokenize, so that tokenization 54 # failures refer to the correct line within error_example_text. 55 tokens, errors = tokenizer.tokenize(example, "") 56 if errors: 57 raise ParserGenerationError(str(errors)) 58 59 for i in range(len(tokens)): 60 if tokens[i].symbol == "BadWord" and tokens[i].text == "$ANY": 61 tokens[i] = lr1.ANY_TOKEN 62 63 error_token = None 64 for i in range(len(tokens)): 65 if tokens[i].symbol == "BadWord" and tokens[i].text == "$ERR": 66 error_token = tokens[i + 1] 67 del tokens[i] 68 break 69 else: 70 raise ParserGenerationError( 71 "No error token marker '$ERR' in:\n" + error_example) 72 73 result.append((tokens, error_token, message.strip(), example)) 74 return result 75 76 77def generate_parser(start_symbol, productions, error_examples): 78 """Generates a parser from grammar, and applies error_examples. 79 80 Arguments: 81 start_symbol: the start symbol of the grammar (a string) 82 productions: a list of parser_types.Production in the grammar 83 error_examples: A list of (source tokens, error message, source text) 84 tuples. 85 86 Returns: 87 A parser. 88 89 Raises: 90 ParserGenerationError: There is a problem generating the parser. 91 """ 92 parser = lr1.Grammar(start_symbol, productions).parser() 93 if parser.conflicts: 94 raise ParserGenerationError("\n".join([str(c) for c in parser.conflicts])) 95 for example in error_examples: 96 mark_result = parser.mark_error(example[0], example[1], example[2]) 97 if mark_result: 98 raise ParserGenerationError( 99 "error marking example: {}\nExample:\n{}".format( 100 mark_result, example[3])) 101 return parser 102 103 104@simple_memoizer.memoize 105def _load_module_parser(): 106 error_examples = parse_error_examples( 107 resources.load("compiler.front_end", "error_examples")) 108 return generate_parser(module_ir.START_SYMBOL, module_ir.PRODUCTIONS, 109 error_examples) 110 111 112@simple_memoizer.memoize 113def _load_expression_parser(): 114 return generate_parser(module_ir.EXPRESSION_START_SYMBOL, 115 module_ir.PRODUCTIONS, []) 116 117 118def parse_module(tokens): 119 """Parses the provided Emboss token list into an Emboss module parse tree.""" 120 return _load_module_parser().parse(tokens) 121 122 123def parse_expression(tokens): 124 """Parses the provided Emboss token list into an expression parse tree.""" 125 return _load_expression_parser().parse(tokens) 126