xref: /aosp_15_r20/external/antlr/runtime/Python3/tests/t012lexerXML.py (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robotimport antlr3
2*16467b97STreehugger Robotimport testbase
3*16467b97STreehugger Robotimport unittest
4*16467b97STreehugger Robotimport os
5*16467b97STreehugger Robotimport sys
6*16467b97STreehugger Robotfrom io import StringIO
7*16467b97STreehugger Robotimport textwrap
8*16467b97STreehugger Robot
9*16467b97STreehugger Robotclass t012lexerXML(testbase.ANTLRTest):
10*16467b97STreehugger Robot    def setUp(self):
11*16467b97STreehugger Robot        self.compileGrammar('t012lexerXMLLexer.g')
12*16467b97STreehugger Robot
13*16467b97STreehugger Robot
14*16467b97STreehugger Robot    def lexerClass(self, base):
15*16467b97STreehugger Robot        class TLexer(base):
16*16467b97STreehugger Robot            def emitErrorMessage(self, msg):
17*16467b97STreehugger Robot                # report errors to /dev/null
18*16467b97STreehugger Robot                pass
19*16467b97STreehugger Robot
20*16467b97STreehugger Robot            def reportError(self, re):
21*16467b97STreehugger Robot                # no error recovery yet, just crash!
22*16467b97STreehugger Robot                raise re
23*16467b97STreehugger Robot
24*16467b97STreehugger Robot        return TLexer
25*16467b97STreehugger Robot
26*16467b97STreehugger Robot
27*16467b97STreehugger Robot    def testValid(self):
28*16467b97STreehugger Robot        inputPath = os.path.splitext(__file__)[0] + '.input'
29*16467b97STreehugger Robot        with open(inputPath) as f:
30*16467b97STreehugger Robot            data = f.read()
31*16467b97STreehugger Robot        stream = antlr3.StringStream(data)
32*16467b97STreehugger Robot        lexer = self.getLexer(stream)
33*16467b97STreehugger Robot
34*16467b97STreehugger Robot        while True:
35*16467b97STreehugger Robot            token = lexer.nextToken()
36*16467b97STreehugger Robot            if token.type == self.lexerModule.EOF:
37*16467b97STreehugger Robot                break
38*16467b97STreehugger Robot
39*16467b97STreehugger Robot
40*16467b97STreehugger Robot        output = lexer.outbuf.getvalue()
41*16467b97STreehugger Robot
42*16467b97STreehugger Robot        outputPath = os.path.splitext(__file__)[0] + '.output'
43*16467b97STreehugger Robot
44*16467b97STreehugger Robot        with open(outputPath) as f:
45*16467b97STreehugger Robot            testOutput = f.read()
46*16467b97STreehugger Robot
47*16467b97STreehugger Robot        self.assertEqual(output, testOutput)
48*16467b97STreehugger Robot
49*16467b97STreehugger Robot
50*16467b97STreehugger Robot    def testMalformedInput1(self):
51*16467b97STreehugger Robot        input = textwrap.dedent("""\
52*16467b97STreehugger Robot        <?xml version='1.0'?>
53*16467b97STreehugger Robot        <document d>
54*16467b97STreehugger Robot        </document>
55*16467b97STreehugger Robot        """)
56*16467b97STreehugger Robot
57*16467b97STreehugger Robot        stream = antlr3.StringStream(input)
58*16467b97STreehugger Robot        lexer = self.getLexer(stream)
59*16467b97STreehugger Robot
60*16467b97STreehugger Robot        try:
61*16467b97STreehugger Robot            while True:
62*16467b97STreehugger Robot                token = lexer.nextToken()
63*16467b97STreehugger Robot                # Should raise NoViableAltException before hitting EOF
64*16467b97STreehugger Robot                if token.type == antlr3.EOF:
65*16467b97STreehugger Robot                    self.fail()
66*16467b97STreehugger Robot
67*16467b97STreehugger Robot        except antlr3.NoViableAltException as exc:
68*16467b97STreehugger Robot            self.assertEqual(exc.unexpectedType, '>')
69*16467b97STreehugger Robot            self.assertEqual(exc.charPositionInLine, 11)
70*16467b97STreehugger Robot            self.assertEqual(exc.line, 2)
71*16467b97STreehugger Robot
72*16467b97STreehugger Robot
73*16467b97STreehugger Robot    def testMalformedInput2(self):
74*16467b97STreehugger Robot        input = textwrap.dedent("""\
75*16467b97STreehugger Robot        <?tml version='1.0'?>
76*16467b97STreehugger Robot        <document>
77*16467b97STreehugger Robot        </document>
78*16467b97STreehugger Robot        """)
79*16467b97STreehugger Robot
80*16467b97STreehugger Robot        stream = antlr3.StringStream(input)
81*16467b97STreehugger Robot        lexer = self.getLexer(stream)
82*16467b97STreehugger Robot
83*16467b97STreehugger Robot        try:
84*16467b97STreehugger Robot            while True:
85*16467b97STreehugger Robot                token = lexer.nextToken()
86*16467b97STreehugger Robot                # Should raise NoViableAltException before hitting EOF
87*16467b97STreehugger Robot                if token.type == antlr3.EOF:
88*16467b97STreehugger Robot                    self.fail()
89*16467b97STreehugger Robot
90*16467b97STreehugger Robot        except antlr3.MismatchedSetException as exc:
91*16467b97STreehugger Robot            self.assertEqual(exc.unexpectedType, 't')
92*16467b97STreehugger Robot            self.assertEqual(exc.charPositionInLine, 2)
93*16467b97STreehugger Robot            self.assertEqual(exc.line, 1)
94*16467b97STreehugger Robot
95*16467b97STreehugger Robot
96*16467b97STreehugger Robot    def testMalformedInput3(self):
97*16467b97STreehugger Robot        input = textwrap.dedent("""\
98*16467b97STreehugger Robot        <?xml version='1.0'?>
99*16467b97STreehugger Robot        <docu ment attr="foo">
100*16467b97STreehugger Robot        </document>
101*16467b97STreehugger Robot        """)
102*16467b97STreehugger Robot
103*16467b97STreehugger Robot        stream = antlr3.StringStream(input)
104*16467b97STreehugger Robot        lexer = self.getLexer(stream)
105*16467b97STreehugger Robot
106*16467b97STreehugger Robot        try:
107*16467b97STreehugger Robot            while True:
108*16467b97STreehugger Robot                token = lexer.nextToken()
109*16467b97STreehugger Robot                # Should raise NoViableAltException before hitting EOF
110*16467b97STreehugger Robot                if token.type == antlr3.EOF:
111*16467b97STreehugger Robot                    self.fail()
112*16467b97STreehugger Robot
113*16467b97STreehugger Robot        except antlr3.NoViableAltException as exc:
114*16467b97STreehugger Robot            self.assertEqual(exc.unexpectedType, 'a')
115*16467b97STreehugger Robot            self.assertEqual(exc.charPositionInLine, 11)
116*16467b97STreehugger Robot            self.assertEqual(exc.line, 2)
117*16467b97STreehugger Robot
118*16467b97STreehugger Robot
119*16467b97STreehugger Robotif __name__ == '__main__':
120*16467b97STreehugger Robot    unittest.main()
121