xref: /aosp_15_r20/external/emboss/compiler/front_end/lr1_test.py (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Tests for lr1."""
16
17import collections
18import unittest
19
20from compiler.front_end import lr1
21from compiler.util import parser_types
22
23
24def _make_items(text):
25  """Makes a list of lr1.Items from the lines in text."""
26  return frozenset([lr1.Item.parse(line.strip()) for line in text.splitlines()])
27
28
29Token = collections.namedtuple("Token", ["symbol", "source_location"])
30
31
32def _tokenize(text):
33  """"Tokenizes" text by making each character into a token."""
34  result = []
35  for i in range(len(text)):
36    result.append(Token(text[i], parser_types.make_location(
37        (1, i + 1), (1, i + 2))))
38  return result
39
40
41def _parse_productions(text):
42  """Parses text into a grammar by calling Production.parse on each line."""
43  return [parser_types.Production.parse(line) for line in text.splitlines()]
44
45# Example grammar 4.54 from Aho, Sethi, Lam, Ullman (ASLU) p263.
46_alsu_grammar = lr1.Grammar("S", _parse_productions("""S -> C C
47                                                      C -> c C
48                                                      C -> d"""))
49
50# Item sets corresponding to the above grammar, ASLU pp263-264.
51_alsu_items = [
52    _make_items("""S' -> . S, $
53                  S -> . C C, $
54                  C -> . c C, c
55                  C -> . c C, d
56                  C -> . d, c
57                  C -> . d, d"""),
58    _make_items("""S' -> S ., $"""),
59    _make_items("""S -> C . C, $
60                  C -> . c C, $
61                  C -> . d, $"""),
62    _make_items("""C -> c . C, c
63                  C -> c . C, d
64                  C -> . c C, c
65                  C -> . c C, d
66                  C -> . d, c
67                  C -> . d, d"""),
68    _make_items("""C -> d ., c
69                  C -> d ., d"""),
70    _make_items("""S -> C C ., $"""),
71    _make_items("""C -> c . C, $
72                  C -> . c C, $
73                  C -> . d, $"""),
74    _make_items("""C -> d ., $"""),
75    _make_items("""C -> c C ., c
76                  C -> c C ., d"""),
77    _make_items("""C -> c C ., $"""),
78]
79
80# ACTION table corresponding to the above grammar, ASLU p266.
81_alsu_action = {
82    (0, "c"): lr1.Shift(3, _alsu_items[3]),
83    (0, "d"): lr1.Shift(4, _alsu_items[4]),
84    (1, lr1.END_OF_INPUT): lr1.Accept(),
85    (2, "c"): lr1.Shift(6, _alsu_items[6]),
86    (2, "d"): lr1.Shift(7, _alsu_items[7]),
87    (3, "c"): lr1.Shift(3, _alsu_items[3]),
88    (3, "d"): lr1.Shift(4, _alsu_items[4]),
89    (4, "c"): lr1.Reduce(parser_types.Production("C", ("d",))),
90    (4, "d"): lr1.Reduce(parser_types.Production("C", ("d",))),
91    (5, lr1.END_OF_INPUT): lr1.Reduce(parser_types.Production("S", ("C", "C"))),
92    (6, "c"): lr1.Shift(6, _alsu_items[6]),
93    (6, "d"): lr1.Shift(7, _alsu_items[7]),
94    (7, lr1.END_OF_INPUT): lr1.Reduce(parser_types.Production("C", ("d",))),
95    (8, "c"): lr1.Reduce(parser_types.Production("C", ("c", "C"))),
96    (8, "d"): lr1.Reduce(parser_types.Production("C", ("c", "C"))),
97    (9, lr1.END_OF_INPUT): lr1.Reduce(parser_types.Production("C", ("c", "C"))),
98}
99
100# GOTO table corresponding to the above grammar, ASLU p266.
101_alsu_goto = {(0, "S"): 1, (0, "C"): 2, (2, "C"): 5, (3, "C"): 8, (6, "C"): 9,}
102
103
104def _normalize_table(items, table):
105  """Returns a canonical-form version of items and table, for comparisons."""
106  item_to_original_index = {}
107  for i in range(len(items)):
108    item_to_original_index[items[i]] = i
109  sorted_items = items[0:1] + sorted(items[1:], key=sorted)
110  original_index_to_index = {}
111  for i in range(len(sorted_items)):
112    original_index_to_index[item_to_original_index[sorted_items[i]]] = i
113  updated_table = {}
114  for k in table:
115    new_k = original_index_to_index[k[0]], k[1]
116    new_value = table[k]
117    if isinstance(new_value, int):
118      new_value = original_index_to_index[new_value]
119    elif isinstance(new_value, lr1.Shift):
120      new_value = lr1.Shift(original_index_to_index[new_value.state],
121                            new_value.items)
122    updated_table[new_k] = new_value
123  return sorted_items, updated_table
124
125
126class Lr1Test(unittest.TestCase):
127  """Tests for lr1."""
128
129  def test_parse_lr1item(self):
130    self.assertEqual(lr1.Item.parse("S' -> . S, $"),
131                     lr1.Item(parser_types.Production(lr1.START_PRIME, ("S",)),
132                              0, lr1.END_OF_INPUT, "S"))
133
134  def test_symbol_extraction(self):
135    self.assertEqual(_alsu_grammar.terminals, set(["c", "d", lr1.END_OF_INPUT]))
136    self.assertEqual(_alsu_grammar.nonterminals, set(["S", "C",
137                                                      lr1.START_PRIME]))
138    self.assertEqual(_alsu_grammar.symbols,
139                     set(["c", "d", "S", "C", lr1.END_OF_INPUT,
140                          lr1.START_PRIME]))
141
142  def test_items(self):
143    self.assertEqual(set(_alsu_grammar._items()[0]), frozenset(_alsu_items))
144
145  def test_terminal_nonterminal_production_tables(self):
146    parser = _alsu_grammar.parser()
147    self.assertEqual(parser.terminals, _alsu_grammar.terminals)
148    self.assertEqual(parser.nonterminals, _alsu_grammar.nonterminals)
149    self.assertEqual(parser.productions, _alsu_grammar.productions)
150
151  def test_action_table(self):
152    parser = _alsu_grammar.parser()
153    norm_items, norm_action = _normalize_table(parser.item_sets, parser.action)
154    test_items, test_action = _normalize_table(_alsu_items, _alsu_action)
155    self.assertEqual(norm_items, test_items)
156    self.assertEqual(norm_action, test_action)
157
158  def test_goto_table(self):
159    parser = _alsu_grammar.parser()
160    norm_items, norm_goto = _normalize_table(parser.item_sets, parser.goto)
161    test_items, test_goto = _normalize_table(_alsu_items, _alsu_goto)
162    self.assertEqual(norm_items, test_items)
163    self.assertEqual(norm_goto, test_goto)
164
165  def test_successful_parse(self):
166    parser = _alsu_grammar.parser()
167    loc = parser_types.parse_location
168    s_to_c_c = parser_types.Production.parse("S -> C C")
169    c_to_c_c = parser_types.Production.parse("C -> c C")
170    c_to_d = parser_types.Production.parse("C -> d")
171    self.assertEqual(
172        lr1.Reduction("S", [lr1.Reduction("C", [
173            Token("c", loc("1:1-1:2")), lr1.Reduction(
174                "C", [Token("c", loc("1:2-1:3")),
175                      lr1.Reduction("C",
176                                    [Token("c", loc("1:3-1:4")), lr1.Reduction(
177                                        "C", [Token("d", loc("1:4-1:5"))],
178                                        c_to_d, loc("1:4-1:5"))], c_to_c_c,
179                                    loc("1:3-1:5"))], c_to_c_c, loc("1:2-1:5"))
180        ], c_to_c_c, loc("1:1-1:5")), lr1.Reduction(
181            "C", [Token("c", loc("1:5-1:6")),
182                  lr1.Reduction("C", [Token("d", loc("1:6-1:7"))], c_to_d,
183                                loc("1:6-1:7"))], c_to_c_c, loc("1:5-1:7"))],
184                      s_to_c_c, loc("1:1-1:7")),
185        parser.parse(_tokenize("cccdcd")).parse_tree)
186    self.assertEqual(
187        lr1.Reduction("S", [
188            lr1.Reduction("C", [Token("d", loc("1:1-1:2"))], c_to_d, loc(
189                "1:1-1:2")), lr1.Reduction("C", [Token("d", loc("1:2-1:3"))],
190                                           c_to_d, loc("1:2-1:3"))
191        ], s_to_c_c, loc("1:1-1:3")), parser.parse(_tokenize("dd")).parse_tree)
192
193  def test_parse_with_no_source_information(self):
194    parser = _alsu_grammar.parser()
195    s_to_c_c = parser_types.Production.parse("S -> C C")
196    c_to_d = parser_types.Production.parse("C -> d")
197    self.assertEqual(
198        lr1.Reduction("S", [
199            lr1.Reduction("C", [Token("d", None)], c_to_d, None),
200            lr1.Reduction("C", [Token("d", None)], c_to_d, None)
201        ], s_to_c_c, None),
202        parser.parse([Token("d", None), Token("d", None)]).parse_tree)
203
204  def test_failed_parses(self):
205    parser = _alsu_grammar.parser()
206    self.assertEqual(None, parser.parse(_tokenize("d")).parse_tree)
207    self.assertEqual(None, parser.parse(_tokenize("cccd")).parse_tree)
208    self.assertEqual(None, parser.parse(_tokenize("")).parse_tree)
209    self.assertEqual(None, parser.parse(_tokenize("cccdc")).parse_tree)
210
211  def test_mark_error(self):
212    parser = _alsu_grammar.parser()
213    self.assertIsNone(parser.mark_error(_tokenize("cccdc"), None,
214                                        "missing last d"))
215    self.assertIsNone(parser.mark_error(_tokenize("d"), None, "missing last C"))
216    # Marking an already-marked error with the same error code should succeed.
217    self.assertIsNone(parser.mark_error(_tokenize("d"), None, "missing last C"))
218    # Marking an already-marked error with a different error code should fail.
219    self.assertRegexpMatches(
220        parser.mark_error(_tokenize("d"), None, "different message"),
221        r"^Attempted to overwrite existing error code 'missing last C' with "
222        r"new error code 'different message' for state \d+, terminal \$$")
223    self.assertEqual(
224        "Input successfully parsed.",
225        parser.mark_error(_tokenize("dd"), None, "good parse"))
226    self.assertEqual(
227        parser.mark_error(_tokenize("x"), None, "wrong location"),
228        "error occurred on x token, not end of input.")
229    self.assertEqual(
230        parser.mark_error([], _tokenize("x")[0], "wrong location"),
231        "error occurred on $ token, not x token.")
232    self.assertIsNone(
233        parser.mark_error([lr1.ANY_TOKEN], lr1.ANY_TOKEN, "default error"))
234    # Marking an already-marked error with the same error code should succeed.
235    self.assertIsNone(
236        parser.mark_error([lr1.ANY_TOKEN], lr1.ANY_TOKEN, "default error"))
237    # Marking an already-marked error with a different error code should fail.
238    self.assertRegexpMatches(
239        parser.mark_error([lr1.ANY_TOKEN], lr1.ANY_TOKEN, "default error 2"),
240        r"^Attempted to overwrite existing default error code 'default error' "
241        r"with new error code 'default error 2' for state \d+$")
242
243    self.assertEqual(
244        "missing last d", parser.parse(_tokenize("cccdc")).error.code)
245    self.assertEqual("missing last d", parser.parse(_tokenize("dc")).error.code)
246    self.assertEqual("missing last C", parser.parse(_tokenize("d")).error.code)
247    self.assertEqual("default error", parser.parse(_tokenize("z")).error.code)
248    self.assertEqual(
249        "missing last C", parser.parse(_tokenize("ccccd")).error.code)
250    self.assertEqual(None, parser.parse(_tokenize("ccc")).error.code)
251
252  def test_grammar_with_empty_rhs(self):
253    grammar = lr1.Grammar("S", _parse_productions("""S -> A B
254                                                    A -> a A
255                                                    A ->
256                                                    B -> b"""))
257    parser = grammar.parser()
258    self.assertFalse(parser.conflicts)
259    self.assertTrue(parser.parse(_tokenize("ab")).parse_tree)
260    self.assertTrue(parser.parse(_tokenize("b")).parse_tree)
261    self.assertTrue(parser.parse(_tokenize("aab")).parse_tree)
262
263  def test_grammar_with_reduce_reduce_conflicts(self):
264    grammar = lr1.Grammar("S", _parse_productions("""S -> A c
265                                                    S -> B c
266                                                    A -> a
267                                                    B -> a"""))
268    parser = grammar.parser()
269    self.assertEqual(len(parser.conflicts), 1)
270    # parser.conflicts is a set
271    for conflict in parser.conflicts:
272      for action in conflict.actions:
273        self.assertTrue(isinstance(action, lr1.Reduce))
274
275  def test_grammar_with_shift_reduce_conflicts(self):
276    grammar = lr1.Grammar("S", _parse_productions("""S -> A B
277                                                    A -> a
278                                                    A ->
279                                                    B -> a
280                                                    B ->"""))
281    parser = grammar.parser()
282    self.assertEqual(len(parser.conflicts), 1)
283    # parser.conflicts is a set
284    for conflict in parser.conflicts:
285      reduces = 0
286      shifts = 0
287      for action in conflict.actions:
288        if isinstance(action, lr1.Reduce):
289          reduces += 1
290        elif isinstance(action, lr1.Shift):
291          shifts += 1
292      self.assertEqual(1, reduces)
293      self.assertEqual(1, shifts)
294
295  def test_item_str(self):
296    self.assertEqual(
297        "a -> b c ., d",
298        str(lr1.make_item(parser_types.Production.parse("a -> b c"), 2, "d")))
299    self.assertEqual(
300        "a -> b . c, d",
301        str(lr1.make_item(parser_types.Production.parse("a -> b c"), 1, "d")))
302    self.assertEqual(
303        "a -> . b c, d",
304        str(lr1.make_item(parser_types.Production.parse("a -> b c"), 0, "d")))
305    self.assertEqual(
306        "a -> ., d",
307        str(lr1.make_item(parser_types.Production.parse("a ->"), 0, "d")))
308
309  def test_conflict_str(self):
310    self.assertEqual("Conflict for 'A' in state 12: R vs S",
311                     str(lr1.Conflict(12, "'A'", ["R", "S"])))
312    self.assertEqual("Conflict for 'A' in state 12: R vs S vs T",
313                     str(lr1.Conflict(12, "'A'", ["R", "S", "T"])))
314
315
316if __name__ == "__main__":
317  unittest.main()
318