xref: /aosp_15_r20/external/emboss/compiler/front_end/parser_test.py (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Tests for parser."""
16
17import unittest
18from compiler.front_end import lr1
19from compiler.front_end import parser
20from compiler.front_end import tokenizer
21from compiler.util import parser_types
22
23
24# TODO(bolms): This is repeated in lr1_test.py; separate into test utils?
25def _parse_productions(*productions):
26  """Parses text into a grammar by calling Production.parse on each line."""
27  return [parser_types.Production.parse(p) for p in productions]
28
29
30_EXAMPLE_DIVIDER = "\n" + "=" * 80 + "\n"
31_MESSAGE_ERROR_DIVIDER = "\n" + "-" * 80 + "\n"
32_ERROR_DIVIDER = "\n---\n"
33
34
35class ParserGeneratorTest(unittest.TestCase):
36  """Tests parser.parse_error_examples and generate_parser."""
37
38  def test_parse_good_error_examples(self):
39    errors = parser.parse_error_examples(
40        _EXAMPLE_DIVIDER +  # ======...
41        "structure names must be Camel" +  # Message.
42        _MESSAGE_ERROR_DIVIDER +  # ------...
43        "struct $ERR FOO" +  # First example.
44        _ERROR_DIVIDER +  # ---
45        "struct $ERR foo" +  # Second example.
46        _EXAMPLE_DIVIDER +  # ======...
47        '   \n   struct must be followed by ":"   \n\n' +  # Second message.
48        _MESSAGE_ERROR_DIVIDER +  # ------...
49        "struct Foo $ERR")  # Example for second message.
50    self.assertEqual(tokenizer.tokenize("struct      FOO", "")[0], errors[0][0])
51    self.assertEqual("structure names must be Camel", errors[0][2])
52    self.assertEqual(tokenizer.tokenize("struct      foo", "")[0], errors[1][0])
53    self.assertEqual("structure names must be Camel", errors[1][2])
54    self.assertEqual(tokenizer.tokenize("struct Foo     ", "")[0], errors[2][0])
55    self.assertEqual('struct must be followed by ":"', errors[2][2])
56
57  def test_parse_good_wildcard_example(self):
58    errors = parser.parse_error_examples(
59        _EXAMPLE_DIVIDER +  # ======...
60        '   \n   struct must be followed by ":"   \n\n' +  # Second message.
61        _MESSAGE_ERROR_DIVIDER +  # ------...
62        "struct Foo $ERR $ANY")
63    tokens = tokenizer.tokenize("struct Foo          ", "")[0]
64    # The $ANY token should come just before the end-of-line token in the parsed
65    # result.
66    tokens.insert(-1, lr1.ANY_TOKEN)
67    self.assertEqual(tokens, errors[0][0])
68    self.assertEqual('struct must be followed by ":"', errors[0][2])
69
70  def test_parse_with_no_error_marker(self):
71    self.assertRaises(
72        parser.ParserGenerationError,
73        parser.parse_error_examples,
74        _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "-- doc")
75
76  def test_that_no_error_example_fails(self):
77    self.assertRaises(parser.ParserGenerationError,
78                      parser.parse_error_examples,
79                      _EXAMPLE_DIVIDER + "msg" + _EXAMPLE_DIVIDER + "msg" +
80                      _MESSAGE_ERROR_DIVIDER + "example")
81
82  def test_that_message_example_divider_must_be_on_its_own_line(self):
83    self.assertRaises(parser.ParserGenerationError,
84                      parser.parse_error_examples,
85                      _EXAMPLE_DIVIDER + "msg" + "-" * 80 + "example")
86    self.assertRaises(parser.ParserGenerationError,
87                      parser.parse_error_examples,
88                      _EXAMPLE_DIVIDER + "msg\n" + "-" * 80 + "example")
89    self.assertRaises(parser.ParserGenerationError,
90                      parser.parse_error_examples,
91                      _EXAMPLE_DIVIDER + "msg" + "-" * 80 + "\nexample")
92    self.assertRaises(parser.ParserGenerationError,
93                      parser.parse_error_examples,
94                      _EXAMPLE_DIVIDER + "msg\n" + "-" * 80 + " \nexample")
95
96  def test_that_example_divider_must_be_on_its_own_line(self):
97    self.assertRaises(
98        parser.ParserGenerationError,
99        parser.parse_error_examples,
100        _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example" + "=" * 80
101        + "msg" + _MESSAGE_ERROR_DIVIDER + "example")
102    self.assertRaises(
103        parser.ParserGenerationError,
104        parser.parse_error_examples,
105        _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example\n" + "=" *
106        80 + "msg" + _MESSAGE_ERROR_DIVIDER + "example")
107    self.assertRaises(
108        parser.ParserGenerationError,
109        parser.parse_error_examples,
110        _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example" + "=" * 80
111        + "\nmsg" + _MESSAGE_ERROR_DIVIDER + "example")
112    self.assertRaises(
113        parser.ParserGenerationError,
114        parser.parse_error_examples,
115        _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example\n" + "=" *
116        80 + " \nmsg" + _MESSAGE_ERROR_DIVIDER + "example")
117
118  def test_that_tokenization_failure_results_in_failure(self):
119    self.assertRaises(
120        parser.ParserGenerationError,
121        parser.parse_error_examples,
122        _EXAMPLE_DIVIDER + "message" + _MESSAGE_ERROR_DIVIDER + "|")
123
124  def test_generate_parser(self):
125    self.assertTrue(parser.generate_parser("C", _parse_productions("C -> s"),
126                                           []))
127    self.assertTrue(parser.generate_parser(
128        "C", _parse_productions("C -> s", "C -> d"), []))
129
130  def test_generated_parser_error(self):
131    test_parser = parser.generate_parser(
132        "C", _parse_productions("C -> s", "C -> d"),
133        [([parser_types.Token("s", "s", None),
134           parser_types.Token("s", "s", None)],
135          parser_types.Token("s", "s", None),
136          "double s", "ss")])
137    parse_result = test_parser.parse([parser_types.Token("s", "s", None),
138                                      parser_types.Token("s", "s", None)])
139    self.assertEqual(None, parse_result.parse_tree)
140    self.assertEqual("double s", parse_result.error.code)
141
142  def test_conflict_error(self):
143    self.assertRaises(
144        parser.ParserGenerationError,
145        parser.generate_parser,
146        "C", _parse_productions("C -> S", "C -> D", "S -> a", "D -> a"), [])
147
148  def test_bad_mark_error(self):
149    self.assertRaises(parser.ParserGenerationError,
150                      parser.generate_parser,
151                      "C", _parse_productions("C -> s", "C -> d"),
152                      [([parser_types.Token("s", "s", None),
153                         parser_types.Token("s", "s", None)],
154                        parser_types.Token("s", "s", None),
155                        "double s", "ss"),
156                       ([parser_types.Token("s", "s", None),
157                         parser_types.Token("s", "s", None)],
158                        parser_types.Token("s", "s", None),
159                        "double 's'", "ss")])
160    self.assertRaises(parser.ParserGenerationError,
161                      parser.generate_parser,
162                      "C", _parse_productions("C -> s", "C -> d"),
163                      [([parser_types.Token("s", "s", None)],
164                        parser_types.Token("s", "s", None),
165                        "single s", "s")])
166
167
168class ModuleParserTest(unittest.TestCase):
169  """Tests for parser.parse_module().
170
171  Correct parses should mostly be checked in conjunction with
172  module_ir.build_ir, as the exact data structure returned by
173  parser.parse_module() is determined by the grammar defined in module_ir.
174  These tests only need to cover errors and sanity checking.
175  """
176
177  def test_error_reporting_by_example(self):
178    parse_result = parser.parse_module(
179        tokenizer.tokenize("struct LogFileStatus:\n"
180                           "  0 [+4]    UInt\n", "")[0])
181    self.assertEqual(None, parse_result.parse_tree)
182    self.assertEqual("A name is required for a struct field.",
183                     parse_result.error.code)
184    self.assertEqual('"\\n"', parse_result.error.token.symbol)
185    self.assertEqual(set(['"["', "SnakeWord", '"."', '":"', '"("']),
186                     parse_result.error.expected_tokens)
187
188  def test_error_reporting_without_example(self):
189    parse_result = parser.parse_module(
190        tokenizer.tokenize("struct LogFileStatus:\n"
191                           "  0 [+4]    UInt    foo +\n", "")[0])
192    self.assertEqual(None, parse_result.parse_tree)
193    self.assertEqual(None, parse_result.error.code)
194    self.assertEqual('"+"', parse_result.error.token.symbol)
195    self.assertEqual(set(['"("', '"\\n"', '"["', "Documentation", "Comment"]),
196                     parse_result.error.expected_tokens)
197
198  def test_ok_parse(self):
199    parse_result = parser.parse_module(
200        tokenizer.tokenize("struct LogFileStatus:\n"
201                           "  0 [+4]    UInt    foo\n", "")[0])
202    self.assertTrue(parse_result.parse_tree)
203    self.assertEqual(None, parse_result.error)
204
205
206if __name__ == "__main__":
207  unittest.main()
208