# Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for lr1.""" import collections import unittest from compiler.front_end import lr1 from compiler.util import parser_types def _make_items(text): """Makes a list of lr1.Items from the lines in text.""" return frozenset([lr1.Item.parse(line.strip()) for line in text.splitlines()]) Token = collections.namedtuple("Token", ["symbol", "source_location"]) def _tokenize(text): """"Tokenizes" text by making each character into a token.""" result = [] for i in range(len(text)): result.append(Token(text[i], parser_types.make_location( (1, i + 1), (1, i + 2)))) return result def _parse_productions(text): """Parses text into a grammar by calling Production.parse on each line.""" return [parser_types.Production.parse(line) for line in text.splitlines()] # Example grammar 4.54 from Aho, Sethi, Lam, Ullman (ASLU) p263. _alsu_grammar = lr1.Grammar("S", _parse_productions("""S -> C C C -> c C C -> d""")) # Item sets corresponding to the above grammar, ASLU pp263-264. _alsu_items = [ _make_items("""S' -> . S, $ S -> . C C, $ C -> . c C, c C -> . c C, d C -> . d, c C -> . d, d"""), _make_items("""S' -> S ., $"""), _make_items("""S -> C . C, $ C -> . c C, $ C -> . d, $"""), _make_items("""C -> c . C, c C -> c . C, d C -> . c C, c C -> . c C, d C -> . d, c C -> . d, d"""), _make_items("""C -> d ., c C -> d ., d"""), _make_items("""S -> C C ., $"""), _make_items("""C -> c . C, $ C -> . c C, $ C -> . d, $"""), _make_items("""C -> d ., $"""), _make_items("""C -> c C ., c C -> c C ., d"""), _make_items("""C -> c C ., $"""), ] # ACTION table corresponding to the above grammar, ASLU p266. _alsu_action = { (0, "c"): lr1.Shift(3, _alsu_items[3]), (0, "d"): lr1.Shift(4, _alsu_items[4]), (1, lr1.END_OF_INPUT): lr1.Accept(), (2, "c"): lr1.Shift(6, _alsu_items[6]), (2, "d"): lr1.Shift(7, _alsu_items[7]), (3, "c"): lr1.Shift(3, _alsu_items[3]), (3, "d"): lr1.Shift(4, _alsu_items[4]), (4, "c"): lr1.Reduce(parser_types.Production("C", ("d",))), (4, "d"): lr1.Reduce(parser_types.Production("C", ("d",))), (5, lr1.END_OF_INPUT): lr1.Reduce(parser_types.Production("S", ("C", "C"))), (6, "c"): lr1.Shift(6, _alsu_items[6]), (6, "d"): lr1.Shift(7, _alsu_items[7]), (7, lr1.END_OF_INPUT): lr1.Reduce(parser_types.Production("C", ("d",))), (8, "c"): lr1.Reduce(parser_types.Production("C", ("c", "C"))), (8, "d"): lr1.Reduce(parser_types.Production("C", ("c", "C"))), (9, lr1.END_OF_INPUT): lr1.Reduce(parser_types.Production("C", ("c", "C"))), } # GOTO table corresponding to the above grammar, ASLU p266. _alsu_goto = {(0, "S"): 1, (0, "C"): 2, (2, "C"): 5, (3, "C"): 8, (6, "C"): 9,} def _normalize_table(items, table): """Returns a canonical-form version of items and table, for comparisons.""" item_to_original_index = {} for i in range(len(items)): item_to_original_index[items[i]] = i sorted_items = items[0:1] + sorted(items[1:], key=sorted) original_index_to_index = {} for i in range(len(sorted_items)): original_index_to_index[item_to_original_index[sorted_items[i]]] = i updated_table = {} for k in table: new_k = original_index_to_index[k[0]], k[1] new_value = table[k] if isinstance(new_value, int): new_value = original_index_to_index[new_value] elif isinstance(new_value, lr1.Shift): new_value = lr1.Shift(original_index_to_index[new_value.state], new_value.items) updated_table[new_k] = new_value return sorted_items, updated_table class Lr1Test(unittest.TestCase): """Tests for lr1.""" def test_parse_lr1item(self): self.assertEqual(lr1.Item.parse("S' -> . S, $"), lr1.Item(parser_types.Production(lr1.START_PRIME, ("S",)), 0, lr1.END_OF_INPUT, "S")) def test_symbol_extraction(self): self.assertEqual(_alsu_grammar.terminals, set(["c", "d", lr1.END_OF_INPUT])) self.assertEqual(_alsu_grammar.nonterminals, set(["S", "C", lr1.START_PRIME])) self.assertEqual(_alsu_grammar.symbols, set(["c", "d", "S", "C", lr1.END_OF_INPUT, lr1.START_PRIME])) def test_items(self): self.assertEqual(set(_alsu_grammar._items()[0]), frozenset(_alsu_items)) def test_terminal_nonterminal_production_tables(self): parser = _alsu_grammar.parser() self.assertEqual(parser.terminals, _alsu_grammar.terminals) self.assertEqual(parser.nonterminals, _alsu_grammar.nonterminals) self.assertEqual(parser.productions, _alsu_grammar.productions) def test_action_table(self): parser = _alsu_grammar.parser() norm_items, norm_action = _normalize_table(parser.item_sets, parser.action) test_items, test_action = _normalize_table(_alsu_items, _alsu_action) self.assertEqual(norm_items, test_items) self.assertEqual(norm_action, test_action) def test_goto_table(self): parser = _alsu_grammar.parser() norm_items, norm_goto = _normalize_table(parser.item_sets, parser.goto) test_items, test_goto = _normalize_table(_alsu_items, _alsu_goto) self.assertEqual(norm_items, test_items) self.assertEqual(norm_goto, test_goto) def test_successful_parse(self): parser = _alsu_grammar.parser() loc = parser_types.parse_location s_to_c_c = parser_types.Production.parse("S -> C C") c_to_c_c = parser_types.Production.parse("C -> c C") c_to_d = parser_types.Production.parse("C -> d") self.assertEqual( lr1.Reduction("S", [lr1.Reduction("C", [ Token("c", loc("1:1-1:2")), lr1.Reduction( "C", [Token("c", loc("1:2-1:3")), lr1.Reduction("C", [Token("c", loc("1:3-1:4")), lr1.Reduction( "C", [Token("d", loc("1:4-1:5"))], c_to_d, loc("1:4-1:5"))], c_to_c_c, loc("1:3-1:5"))], c_to_c_c, loc("1:2-1:5")) ], c_to_c_c, loc("1:1-1:5")), lr1.Reduction( "C", [Token("c", loc("1:5-1:6")), lr1.Reduction("C", [Token("d", loc("1:6-1:7"))], c_to_d, loc("1:6-1:7"))], c_to_c_c, loc("1:5-1:7"))], s_to_c_c, loc("1:1-1:7")), parser.parse(_tokenize("cccdcd")).parse_tree) self.assertEqual( lr1.Reduction("S", [ lr1.Reduction("C", [Token("d", loc("1:1-1:2"))], c_to_d, loc( "1:1-1:2")), lr1.Reduction("C", [Token("d", loc("1:2-1:3"))], c_to_d, loc("1:2-1:3")) ], s_to_c_c, loc("1:1-1:3")), parser.parse(_tokenize("dd")).parse_tree) def test_parse_with_no_source_information(self): parser = _alsu_grammar.parser() s_to_c_c = parser_types.Production.parse("S -> C C") c_to_d = parser_types.Production.parse("C -> d") self.assertEqual( lr1.Reduction("S", [ lr1.Reduction("C", [Token("d", None)], c_to_d, None), lr1.Reduction("C", [Token("d", None)], c_to_d, None) ], s_to_c_c, None), parser.parse([Token("d", None), Token("d", None)]).parse_tree) def test_failed_parses(self): parser = _alsu_grammar.parser() self.assertEqual(None, parser.parse(_tokenize("d")).parse_tree) self.assertEqual(None, parser.parse(_tokenize("cccd")).parse_tree) self.assertEqual(None, parser.parse(_tokenize("")).parse_tree) self.assertEqual(None, parser.parse(_tokenize("cccdc")).parse_tree) def test_mark_error(self): parser = _alsu_grammar.parser() self.assertIsNone(parser.mark_error(_tokenize("cccdc"), None, "missing last d")) self.assertIsNone(parser.mark_error(_tokenize("d"), None, "missing last C")) # Marking an already-marked error with the same error code should succeed. self.assertIsNone(parser.mark_error(_tokenize("d"), None, "missing last C")) # Marking an already-marked error with a different error code should fail. self.assertRegexpMatches( parser.mark_error(_tokenize("d"), None, "different message"), r"^Attempted to overwrite existing error code 'missing last C' with " r"new error code 'different message' for state \d+, terminal \$$") self.assertEqual( "Input successfully parsed.", parser.mark_error(_tokenize("dd"), None, "good parse")) self.assertEqual( parser.mark_error(_tokenize("x"), None, "wrong location"), "error occurred on x token, not end of input.") self.assertEqual( parser.mark_error([], _tokenize("x")[0], "wrong location"), "error occurred on $ token, not x token.") self.assertIsNone( parser.mark_error([lr1.ANY_TOKEN], lr1.ANY_TOKEN, "default error")) # Marking an already-marked error with the same error code should succeed. self.assertIsNone( parser.mark_error([lr1.ANY_TOKEN], lr1.ANY_TOKEN, "default error")) # Marking an already-marked error with a different error code should fail. self.assertRegexpMatches( parser.mark_error([lr1.ANY_TOKEN], lr1.ANY_TOKEN, "default error 2"), r"^Attempted to overwrite existing default error code 'default error' " r"with new error code 'default error 2' for state \d+$") self.assertEqual( "missing last d", parser.parse(_tokenize("cccdc")).error.code) self.assertEqual("missing last d", parser.parse(_tokenize("dc")).error.code) self.assertEqual("missing last C", parser.parse(_tokenize("d")).error.code) self.assertEqual("default error", parser.parse(_tokenize("z")).error.code) self.assertEqual( "missing last C", parser.parse(_tokenize("ccccd")).error.code) self.assertEqual(None, parser.parse(_tokenize("ccc")).error.code) def test_grammar_with_empty_rhs(self): grammar = lr1.Grammar("S", _parse_productions("""S -> A B A -> a A A -> B -> b""")) parser = grammar.parser() self.assertFalse(parser.conflicts) self.assertTrue(parser.parse(_tokenize("ab")).parse_tree) self.assertTrue(parser.parse(_tokenize("b")).parse_tree) self.assertTrue(parser.parse(_tokenize("aab")).parse_tree) def test_grammar_with_reduce_reduce_conflicts(self): grammar = lr1.Grammar("S", _parse_productions("""S -> A c S -> B c A -> a B -> a""")) parser = grammar.parser() self.assertEqual(len(parser.conflicts), 1) # parser.conflicts is a set for conflict in parser.conflicts: for action in conflict.actions: self.assertTrue(isinstance(action, lr1.Reduce)) def test_grammar_with_shift_reduce_conflicts(self): grammar = lr1.Grammar("S", _parse_productions("""S -> A B A -> a A -> B -> a B ->""")) parser = grammar.parser() self.assertEqual(len(parser.conflicts), 1) # parser.conflicts is a set for conflict in parser.conflicts: reduces = 0 shifts = 0 for action in conflict.actions: if isinstance(action, lr1.Reduce): reduces += 1 elif isinstance(action, lr1.Shift): shifts += 1 self.assertEqual(1, reduces) self.assertEqual(1, shifts) def test_item_str(self): self.assertEqual( "a -> b c ., d", str(lr1.make_item(parser_types.Production.parse("a -> b c"), 2, "d"))) self.assertEqual( "a -> b . c, d", str(lr1.make_item(parser_types.Production.parse("a -> b c"), 1, "d"))) self.assertEqual( "a -> . b c, d", str(lr1.make_item(parser_types.Production.parse("a -> b c"), 0, "d"))) self.assertEqual( "a -> ., d", str(lr1.make_item(parser_types.Production.parse("a ->"), 0, "d"))) def test_conflict_str(self): self.assertEqual("Conflict for 'A' in state 12: R vs S", str(lr1.Conflict(12, "'A'", ["R", "S"]))) self.assertEqual("Conflict for 'A' in state 12: R vs S vs T", str(lr1.Conflict(12, "'A'", ["R", "S", "T"]))) if __name__ == "__main__": unittest.main()