1# Copyright 2019 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Tests for parser.""" 16 17import unittest 18from compiler.front_end import lr1 19from compiler.front_end import parser 20from compiler.front_end import tokenizer 21from compiler.util import parser_types 22 23 24# TODO(bolms): This is repeated in lr1_test.py; separate into test utils? 25def _parse_productions(*productions): 26 """Parses text into a grammar by calling Production.parse on each line.""" 27 return [parser_types.Production.parse(p) for p in productions] 28 29 30_EXAMPLE_DIVIDER = "\n" + "=" * 80 + "\n" 31_MESSAGE_ERROR_DIVIDER = "\n" + "-" * 80 + "\n" 32_ERROR_DIVIDER = "\n---\n" 33 34 35class ParserGeneratorTest(unittest.TestCase): 36 """Tests parser.parse_error_examples and generate_parser.""" 37 38 def test_parse_good_error_examples(self): 39 errors = parser.parse_error_examples( 40 _EXAMPLE_DIVIDER + # ======... 41 "structure names must be Camel" + # Message. 42 _MESSAGE_ERROR_DIVIDER + # ------... 43 "struct $ERR FOO" + # First example. 44 _ERROR_DIVIDER + # --- 45 "struct $ERR foo" + # Second example. 46 _EXAMPLE_DIVIDER + # ======... 47 ' \n struct must be followed by ":" \n\n' + # Second message. 48 _MESSAGE_ERROR_DIVIDER + # ------... 49 "struct Foo $ERR") # Example for second message. 50 self.assertEqual(tokenizer.tokenize("struct FOO", "")[0], errors[0][0]) 51 self.assertEqual("structure names must be Camel", errors[0][2]) 52 self.assertEqual(tokenizer.tokenize("struct foo", "")[0], errors[1][0]) 53 self.assertEqual("structure names must be Camel", errors[1][2]) 54 self.assertEqual(tokenizer.tokenize("struct Foo ", "")[0], errors[2][0]) 55 self.assertEqual('struct must be followed by ":"', errors[2][2]) 56 57 def test_parse_good_wildcard_example(self): 58 errors = parser.parse_error_examples( 59 _EXAMPLE_DIVIDER + # ======... 60 ' \n struct must be followed by ":" \n\n' + # Second message. 61 _MESSAGE_ERROR_DIVIDER + # ------... 62 "struct Foo $ERR $ANY") 63 tokens = tokenizer.tokenize("struct Foo ", "")[0] 64 # The $ANY token should come just before the end-of-line token in the parsed 65 # result. 66 tokens.insert(-1, lr1.ANY_TOKEN) 67 self.assertEqual(tokens, errors[0][0]) 68 self.assertEqual('struct must be followed by ":"', errors[0][2]) 69 70 def test_parse_with_no_error_marker(self): 71 self.assertRaises( 72 parser.ParserGenerationError, 73 parser.parse_error_examples, 74 _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "-- doc") 75 76 def test_that_no_error_example_fails(self): 77 self.assertRaises(parser.ParserGenerationError, 78 parser.parse_error_examples, 79 _EXAMPLE_DIVIDER + "msg" + _EXAMPLE_DIVIDER + "msg" + 80 _MESSAGE_ERROR_DIVIDER + "example") 81 82 def test_that_message_example_divider_must_be_on_its_own_line(self): 83 self.assertRaises(parser.ParserGenerationError, 84 parser.parse_error_examples, 85 _EXAMPLE_DIVIDER + "msg" + "-" * 80 + "example") 86 self.assertRaises(parser.ParserGenerationError, 87 parser.parse_error_examples, 88 _EXAMPLE_DIVIDER + "msg\n" + "-" * 80 + "example") 89 self.assertRaises(parser.ParserGenerationError, 90 parser.parse_error_examples, 91 _EXAMPLE_DIVIDER + "msg" + "-" * 80 + "\nexample") 92 self.assertRaises(parser.ParserGenerationError, 93 parser.parse_error_examples, 94 _EXAMPLE_DIVIDER + "msg\n" + "-" * 80 + " \nexample") 95 96 def test_that_example_divider_must_be_on_its_own_line(self): 97 self.assertRaises( 98 parser.ParserGenerationError, 99 parser.parse_error_examples, 100 _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example" + "=" * 80 101 + "msg" + _MESSAGE_ERROR_DIVIDER + "example") 102 self.assertRaises( 103 parser.ParserGenerationError, 104 parser.parse_error_examples, 105 _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example\n" + "=" * 106 80 + "msg" + _MESSAGE_ERROR_DIVIDER + "example") 107 self.assertRaises( 108 parser.ParserGenerationError, 109 parser.parse_error_examples, 110 _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example" + "=" * 80 111 + "\nmsg" + _MESSAGE_ERROR_DIVIDER + "example") 112 self.assertRaises( 113 parser.ParserGenerationError, 114 parser.parse_error_examples, 115 _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example\n" + "=" * 116 80 + " \nmsg" + _MESSAGE_ERROR_DIVIDER + "example") 117 118 def test_that_tokenization_failure_results_in_failure(self): 119 self.assertRaises( 120 parser.ParserGenerationError, 121 parser.parse_error_examples, 122 _EXAMPLE_DIVIDER + "message" + _MESSAGE_ERROR_DIVIDER + "|") 123 124 def test_generate_parser(self): 125 self.assertTrue(parser.generate_parser("C", _parse_productions("C -> s"), 126 [])) 127 self.assertTrue(parser.generate_parser( 128 "C", _parse_productions("C -> s", "C -> d"), [])) 129 130 def test_generated_parser_error(self): 131 test_parser = parser.generate_parser( 132 "C", _parse_productions("C -> s", "C -> d"), 133 [([parser_types.Token("s", "s", None), 134 parser_types.Token("s", "s", None)], 135 parser_types.Token("s", "s", None), 136 "double s", "ss")]) 137 parse_result = test_parser.parse([parser_types.Token("s", "s", None), 138 parser_types.Token("s", "s", None)]) 139 self.assertEqual(None, parse_result.parse_tree) 140 self.assertEqual("double s", parse_result.error.code) 141 142 def test_conflict_error(self): 143 self.assertRaises( 144 parser.ParserGenerationError, 145 parser.generate_parser, 146 "C", _parse_productions("C -> S", "C -> D", "S -> a", "D -> a"), []) 147 148 def test_bad_mark_error(self): 149 self.assertRaises(parser.ParserGenerationError, 150 parser.generate_parser, 151 "C", _parse_productions("C -> s", "C -> d"), 152 [([parser_types.Token("s", "s", None), 153 parser_types.Token("s", "s", None)], 154 parser_types.Token("s", "s", None), 155 "double s", "ss"), 156 ([parser_types.Token("s", "s", None), 157 parser_types.Token("s", "s", None)], 158 parser_types.Token("s", "s", None), 159 "double 's'", "ss")]) 160 self.assertRaises(parser.ParserGenerationError, 161 parser.generate_parser, 162 "C", _parse_productions("C -> s", "C -> d"), 163 [([parser_types.Token("s", "s", None)], 164 parser_types.Token("s", "s", None), 165 "single s", "s")]) 166 167 168class ModuleParserTest(unittest.TestCase): 169 """Tests for parser.parse_module(). 170 171 Correct parses should mostly be checked in conjunction with 172 module_ir.build_ir, as the exact data structure returned by 173 parser.parse_module() is determined by the grammar defined in module_ir. 174 These tests only need to cover errors and sanity checking. 175 """ 176 177 def test_error_reporting_by_example(self): 178 parse_result = parser.parse_module( 179 tokenizer.tokenize("struct LogFileStatus:\n" 180 " 0 [+4] UInt\n", "")[0]) 181 self.assertEqual(None, parse_result.parse_tree) 182 self.assertEqual("A name is required for a struct field.", 183 parse_result.error.code) 184 self.assertEqual('"\\n"', parse_result.error.token.symbol) 185 self.assertEqual(set(['"["', "SnakeWord", '"."', '":"', '"("']), 186 parse_result.error.expected_tokens) 187 188 def test_error_reporting_without_example(self): 189 parse_result = parser.parse_module( 190 tokenizer.tokenize("struct LogFileStatus:\n" 191 " 0 [+4] UInt foo +\n", "")[0]) 192 self.assertEqual(None, parse_result.parse_tree) 193 self.assertEqual(None, parse_result.error.code) 194 self.assertEqual('"+"', parse_result.error.token.symbol) 195 self.assertEqual(set(['"("', '"\\n"', '"["', "Documentation", "Comment"]), 196 parse_result.error.expected_tokens) 197 198 def test_ok_parse(self): 199 parse_result = parser.parse_module( 200 tokenizer.tokenize("struct LogFileStatus:\n" 201 " 0 [+4] UInt foo\n", "")[0]) 202 self.assertTrue(parse_result.parse_tree) 203 self.assertEqual(None, parse_result.error) 204 205 206if __name__ == "__main__": 207 unittest.main() 208