1# Copyright 2019 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Tests for lr1.""" 16 17import collections 18import unittest 19 20from compiler.front_end import lr1 21from compiler.util import parser_types 22 23 24def _make_items(text): 25 """Makes a list of lr1.Items from the lines in text.""" 26 return frozenset([lr1.Item.parse(line.strip()) for line in text.splitlines()]) 27 28 29Token = collections.namedtuple("Token", ["symbol", "source_location"]) 30 31 32def _tokenize(text): 33 """"Tokenizes" text by making each character into a token.""" 34 result = [] 35 for i in range(len(text)): 36 result.append(Token(text[i], parser_types.make_location( 37 (1, i + 1), (1, i + 2)))) 38 return result 39 40 41def _parse_productions(text): 42 """Parses text into a grammar by calling Production.parse on each line.""" 43 return [parser_types.Production.parse(line) for line in text.splitlines()] 44 45# Example grammar 4.54 from Aho, Sethi, Lam, Ullman (ASLU) p263. 46_alsu_grammar = lr1.Grammar("S", _parse_productions("""S -> C C 47 C -> c C 48 C -> d""")) 49 50# Item sets corresponding to the above grammar, ASLU pp263-264. 51_alsu_items = [ 52 _make_items("""S' -> . S, $ 53 S -> . C C, $ 54 C -> . c C, c 55 C -> . c C, d 56 C -> . d, c 57 C -> . d, d"""), 58 _make_items("""S' -> S ., $"""), 59 _make_items("""S -> C . C, $ 60 C -> . c C, $ 61 C -> . d, $"""), 62 _make_items("""C -> c . C, c 63 C -> c . C, d 64 C -> . c C, c 65 C -> . c C, d 66 C -> . d, c 67 C -> . d, d"""), 68 _make_items("""C -> d ., c 69 C -> d ., d"""), 70 _make_items("""S -> C C ., $"""), 71 _make_items("""C -> c . C, $ 72 C -> . c C, $ 73 C -> . d, $"""), 74 _make_items("""C -> d ., $"""), 75 _make_items("""C -> c C ., c 76 C -> c C ., d"""), 77 _make_items("""C -> c C ., $"""), 78] 79 80# ACTION table corresponding to the above grammar, ASLU p266. 81_alsu_action = { 82 (0, "c"): lr1.Shift(3, _alsu_items[3]), 83 (0, "d"): lr1.Shift(4, _alsu_items[4]), 84 (1, lr1.END_OF_INPUT): lr1.Accept(), 85 (2, "c"): lr1.Shift(6, _alsu_items[6]), 86 (2, "d"): lr1.Shift(7, _alsu_items[7]), 87 (3, "c"): lr1.Shift(3, _alsu_items[3]), 88 (3, "d"): lr1.Shift(4, _alsu_items[4]), 89 (4, "c"): lr1.Reduce(parser_types.Production("C", ("d",))), 90 (4, "d"): lr1.Reduce(parser_types.Production("C", ("d",))), 91 (5, lr1.END_OF_INPUT): lr1.Reduce(parser_types.Production("S", ("C", "C"))), 92 (6, "c"): lr1.Shift(6, _alsu_items[6]), 93 (6, "d"): lr1.Shift(7, _alsu_items[7]), 94 (7, lr1.END_OF_INPUT): lr1.Reduce(parser_types.Production("C", ("d",))), 95 (8, "c"): lr1.Reduce(parser_types.Production("C", ("c", "C"))), 96 (8, "d"): lr1.Reduce(parser_types.Production("C", ("c", "C"))), 97 (9, lr1.END_OF_INPUT): lr1.Reduce(parser_types.Production("C", ("c", "C"))), 98} 99 100# GOTO table corresponding to the above grammar, ASLU p266. 101_alsu_goto = {(0, "S"): 1, (0, "C"): 2, (2, "C"): 5, (3, "C"): 8, (6, "C"): 9,} 102 103 104def _normalize_table(items, table): 105 """Returns a canonical-form version of items and table, for comparisons.""" 106 item_to_original_index = {} 107 for i in range(len(items)): 108 item_to_original_index[items[i]] = i 109 sorted_items = items[0:1] + sorted(items[1:], key=sorted) 110 original_index_to_index = {} 111 for i in range(len(sorted_items)): 112 original_index_to_index[item_to_original_index[sorted_items[i]]] = i 113 updated_table = {} 114 for k in table: 115 new_k = original_index_to_index[k[0]], k[1] 116 new_value = table[k] 117 if isinstance(new_value, int): 118 new_value = original_index_to_index[new_value] 119 elif isinstance(new_value, lr1.Shift): 120 new_value = lr1.Shift(original_index_to_index[new_value.state], 121 new_value.items) 122 updated_table[new_k] = new_value 123 return sorted_items, updated_table 124 125 126class Lr1Test(unittest.TestCase): 127 """Tests for lr1.""" 128 129 def test_parse_lr1item(self): 130 self.assertEqual(lr1.Item.parse("S' -> . S, $"), 131 lr1.Item(parser_types.Production(lr1.START_PRIME, ("S",)), 132 0, lr1.END_OF_INPUT, "S")) 133 134 def test_symbol_extraction(self): 135 self.assertEqual(_alsu_grammar.terminals, set(["c", "d", lr1.END_OF_INPUT])) 136 self.assertEqual(_alsu_grammar.nonterminals, set(["S", "C", 137 lr1.START_PRIME])) 138 self.assertEqual(_alsu_grammar.symbols, 139 set(["c", "d", "S", "C", lr1.END_OF_INPUT, 140 lr1.START_PRIME])) 141 142 def test_items(self): 143 self.assertEqual(set(_alsu_grammar._items()[0]), frozenset(_alsu_items)) 144 145 def test_terminal_nonterminal_production_tables(self): 146 parser = _alsu_grammar.parser() 147 self.assertEqual(parser.terminals, _alsu_grammar.terminals) 148 self.assertEqual(parser.nonterminals, _alsu_grammar.nonterminals) 149 self.assertEqual(parser.productions, _alsu_grammar.productions) 150 151 def test_action_table(self): 152 parser = _alsu_grammar.parser() 153 norm_items, norm_action = _normalize_table(parser.item_sets, parser.action) 154 test_items, test_action = _normalize_table(_alsu_items, _alsu_action) 155 self.assertEqual(norm_items, test_items) 156 self.assertEqual(norm_action, test_action) 157 158 def test_goto_table(self): 159 parser = _alsu_grammar.parser() 160 norm_items, norm_goto = _normalize_table(parser.item_sets, parser.goto) 161 test_items, test_goto = _normalize_table(_alsu_items, _alsu_goto) 162 self.assertEqual(norm_items, test_items) 163 self.assertEqual(norm_goto, test_goto) 164 165 def test_successful_parse(self): 166 parser = _alsu_grammar.parser() 167 loc = parser_types.parse_location 168 s_to_c_c = parser_types.Production.parse("S -> C C") 169 c_to_c_c = parser_types.Production.parse("C -> c C") 170 c_to_d = parser_types.Production.parse("C -> d") 171 self.assertEqual( 172 lr1.Reduction("S", [lr1.Reduction("C", [ 173 Token("c", loc("1:1-1:2")), lr1.Reduction( 174 "C", [Token("c", loc("1:2-1:3")), 175 lr1.Reduction("C", 176 [Token("c", loc("1:3-1:4")), lr1.Reduction( 177 "C", [Token("d", loc("1:4-1:5"))], 178 c_to_d, loc("1:4-1:5"))], c_to_c_c, 179 loc("1:3-1:5"))], c_to_c_c, loc("1:2-1:5")) 180 ], c_to_c_c, loc("1:1-1:5")), lr1.Reduction( 181 "C", [Token("c", loc("1:5-1:6")), 182 lr1.Reduction("C", [Token("d", loc("1:6-1:7"))], c_to_d, 183 loc("1:6-1:7"))], c_to_c_c, loc("1:5-1:7"))], 184 s_to_c_c, loc("1:1-1:7")), 185 parser.parse(_tokenize("cccdcd")).parse_tree) 186 self.assertEqual( 187 lr1.Reduction("S", [ 188 lr1.Reduction("C", [Token("d", loc("1:1-1:2"))], c_to_d, loc( 189 "1:1-1:2")), lr1.Reduction("C", [Token("d", loc("1:2-1:3"))], 190 c_to_d, loc("1:2-1:3")) 191 ], s_to_c_c, loc("1:1-1:3")), parser.parse(_tokenize("dd")).parse_tree) 192 193 def test_parse_with_no_source_information(self): 194 parser = _alsu_grammar.parser() 195 s_to_c_c = parser_types.Production.parse("S -> C C") 196 c_to_d = parser_types.Production.parse("C -> d") 197 self.assertEqual( 198 lr1.Reduction("S", [ 199 lr1.Reduction("C", [Token("d", None)], c_to_d, None), 200 lr1.Reduction("C", [Token("d", None)], c_to_d, None) 201 ], s_to_c_c, None), 202 parser.parse([Token("d", None), Token("d", None)]).parse_tree) 203 204 def test_failed_parses(self): 205 parser = _alsu_grammar.parser() 206 self.assertEqual(None, parser.parse(_tokenize("d")).parse_tree) 207 self.assertEqual(None, parser.parse(_tokenize("cccd")).parse_tree) 208 self.assertEqual(None, parser.parse(_tokenize("")).parse_tree) 209 self.assertEqual(None, parser.parse(_tokenize("cccdc")).parse_tree) 210 211 def test_mark_error(self): 212 parser = _alsu_grammar.parser() 213 self.assertIsNone(parser.mark_error(_tokenize("cccdc"), None, 214 "missing last d")) 215 self.assertIsNone(parser.mark_error(_tokenize("d"), None, "missing last C")) 216 # Marking an already-marked error with the same error code should succeed. 217 self.assertIsNone(parser.mark_error(_tokenize("d"), None, "missing last C")) 218 # Marking an already-marked error with a different error code should fail. 219 self.assertRegexpMatches( 220 parser.mark_error(_tokenize("d"), None, "different message"), 221 r"^Attempted to overwrite existing error code 'missing last C' with " 222 r"new error code 'different message' for state \d+, terminal \$$") 223 self.assertEqual( 224 "Input successfully parsed.", 225 parser.mark_error(_tokenize("dd"), None, "good parse")) 226 self.assertEqual( 227 parser.mark_error(_tokenize("x"), None, "wrong location"), 228 "error occurred on x token, not end of input.") 229 self.assertEqual( 230 parser.mark_error([], _tokenize("x")[0], "wrong location"), 231 "error occurred on $ token, not x token.") 232 self.assertIsNone( 233 parser.mark_error([lr1.ANY_TOKEN], lr1.ANY_TOKEN, "default error")) 234 # Marking an already-marked error with the same error code should succeed. 235 self.assertIsNone( 236 parser.mark_error([lr1.ANY_TOKEN], lr1.ANY_TOKEN, "default error")) 237 # Marking an already-marked error with a different error code should fail. 238 self.assertRegexpMatches( 239 parser.mark_error([lr1.ANY_TOKEN], lr1.ANY_TOKEN, "default error 2"), 240 r"^Attempted to overwrite existing default error code 'default error' " 241 r"with new error code 'default error 2' for state \d+$") 242 243 self.assertEqual( 244 "missing last d", parser.parse(_tokenize("cccdc")).error.code) 245 self.assertEqual("missing last d", parser.parse(_tokenize("dc")).error.code) 246 self.assertEqual("missing last C", parser.parse(_tokenize("d")).error.code) 247 self.assertEqual("default error", parser.parse(_tokenize("z")).error.code) 248 self.assertEqual( 249 "missing last C", parser.parse(_tokenize("ccccd")).error.code) 250 self.assertEqual(None, parser.parse(_tokenize("ccc")).error.code) 251 252 def test_grammar_with_empty_rhs(self): 253 grammar = lr1.Grammar("S", _parse_productions("""S -> A B 254 A -> a A 255 A -> 256 B -> b""")) 257 parser = grammar.parser() 258 self.assertFalse(parser.conflicts) 259 self.assertTrue(parser.parse(_tokenize("ab")).parse_tree) 260 self.assertTrue(parser.parse(_tokenize("b")).parse_tree) 261 self.assertTrue(parser.parse(_tokenize("aab")).parse_tree) 262 263 def test_grammar_with_reduce_reduce_conflicts(self): 264 grammar = lr1.Grammar("S", _parse_productions("""S -> A c 265 S -> B c 266 A -> a 267 B -> a""")) 268 parser = grammar.parser() 269 self.assertEqual(len(parser.conflicts), 1) 270 # parser.conflicts is a set 271 for conflict in parser.conflicts: 272 for action in conflict.actions: 273 self.assertTrue(isinstance(action, lr1.Reduce)) 274 275 def test_grammar_with_shift_reduce_conflicts(self): 276 grammar = lr1.Grammar("S", _parse_productions("""S -> A B 277 A -> a 278 A -> 279 B -> a 280 B ->""")) 281 parser = grammar.parser() 282 self.assertEqual(len(parser.conflicts), 1) 283 # parser.conflicts is a set 284 for conflict in parser.conflicts: 285 reduces = 0 286 shifts = 0 287 for action in conflict.actions: 288 if isinstance(action, lr1.Reduce): 289 reduces += 1 290 elif isinstance(action, lr1.Shift): 291 shifts += 1 292 self.assertEqual(1, reduces) 293 self.assertEqual(1, shifts) 294 295 def test_item_str(self): 296 self.assertEqual( 297 "a -> b c ., d", 298 str(lr1.make_item(parser_types.Production.parse("a -> b c"), 2, "d"))) 299 self.assertEqual( 300 "a -> b . c, d", 301 str(lr1.make_item(parser_types.Production.parse("a -> b c"), 1, "d"))) 302 self.assertEqual( 303 "a -> . b c, d", 304 str(lr1.make_item(parser_types.Production.parse("a -> b c"), 0, "d"))) 305 self.assertEqual( 306 "a -> ., d", 307 str(lr1.make_item(parser_types.Production.parse("a ->"), 0, "d"))) 308 309 def test_conflict_str(self): 310 self.assertEqual("Conflict for 'A' in state 12: R vs S", 311 str(lr1.Conflict(12, "'A'", ["R", "S"]))) 312 self.assertEqual("Conflict for 'A' in state 12: R vs S vs T", 313 str(lr1.Conflict(12, "'A'", ["R", "S", "T"]))) 314 315 316if __name__ == "__main__": 317 unittest.main() 318