1*16467b97STreehugger Robotimport antlr3 2*16467b97STreehugger Robotimport testbase 3*16467b97STreehugger Robotimport unittest 4*16467b97STreehugger Robotimport os 5*16467b97STreehugger Robotimport sys 6*16467b97STreehugger Robotfrom io import StringIO 7*16467b97STreehugger Robotimport textwrap 8*16467b97STreehugger Robot 9*16467b97STreehugger Robotclass t012lexerXML(testbase.ANTLRTest): 10*16467b97STreehugger Robot def setUp(self): 11*16467b97STreehugger Robot self.compileGrammar('t012lexerXMLLexer.g') 12*16467b97STreehugger Robot 13*16467b97STreehugger Robot 14*16467b97STreehugger Robot def lexerClass(self, base): 15*16467b97STreehugger Robot class TLexer(base): 16*16467b97STreehugger Robot def emitErrorMessage(self, msg): 17*16467b97STreehugger Robot # report errors to /dev/null 18*16467b97STreehugger Robot pass 19*16467b97STreehugger Robot 20*16467b97STreehugger Robot def reportError(self, re): 21*16467b97STreehugger Robot # no error recovery yet, just crash! 22*16467b97STreehugger Robot raise re 23*16467b97STreehugger Robot 24*16467b97STreehugger Robot return TLexer 25*16467b97STreehugger Robot 26*16467b97STreehugger Robot 27*16467b97STreehugger Robot def testValid(self): 28*16467b97STreehugger Robot inputPath = os.path.splitext(__file__)[0] + '.input' 29*16467b97STreehugger Robot with open(inputPath) as f: 30*16467b97STreehugger Robot data = f.read() 31*16467b97STreehugger Robot stream = antlr3.StringStream(data) 32*16467b97STreehugger Robot lexer = self.getLexer(stream) 33*16467b97STreehugger Robot 34*16467b97STreehugger Robot while True: 35*16467b97STreehugger Robot token = lexer.nextToken() 36*16467b97STreehugger Robot if token.type == self.lexerModule.EOF: 37*16467b97STreehugger Robot break 38*16467b97STreehugger Robot 39*16467b97STreehugger Robot 40*16467b97STreehugger Robot output = lexer.outbuf.getvalue() 41*16467b97STreehugger Robot 42*16467b97STreehugger Robot outputPath = os.path.splitext(__file__)[0] + '.output' 43*16467b97STreehugger Robot 44*16467b97STreehugger Robot with open(outputPath) as f: 45*16467b97STreehugger Robot testOutput = f.read() 46*16467b97STreehugger Robot 47*16467b97STreehugger Robot self.assertEqual(output, testOutput) 48*16467b97STreehugger Robot 49*16467b97STreehugger Robot 50*16467b97STreehugger Robot def testMalformedInput1(self): 51*16467b97STreehugger Robot input = textwrap.dedent("""\ 52*16467b97STreehugger Robot <?xml version='1.0'?> 53*16467b97STreehugger Robot <document d> 54*16467b97STreehugger Robot </document> 55*16467b97STreehugger Robot """) 56*16467b97STreehugger Robot 57*16467b97STreehugger Robot stream = antlr3.StringStream(input) 58*16467b97STreehugger Robot lexer = self.getLexer(stream) 59*16467b97STreehugger Robot 60*16467b97STreehugger Robot try: 61*16467b97STreehugger Robot while True: 62*16467b97STreehugger Robot token = lexer.nextToken() 63*16467b97STreehugger Robot # Should raise NoViableAltException before hitting EOF 64*16467b97STreehugger Robot if token.type == antlr3.EOF: 65*16467b97STreehugger Robot self.fail() 66*16467b97STreehugger Robot 67*16467b97STreehugger Robot except antlr3.NoViableAltException as exc: 68*16467b97STreehugger Robot self.assertEqual(exc.unexpectedType, '>') 69*16467b97STreehugger Robot self.assertEqual(exc.charPositionInLine, 11) 70*16467b97STreehugger Robot self.assertEqual(exc.line, 2) 71*16467b97STreehugger Robot 72*16467b97STreehugger Robot 73*16467b97STreehugger Robot def testMalformedInput2(self): 74*16467b97STreehugger Robot input = textwrap.dedent("""\ 75*16467b97STreehugger Robot <?tml version='1.0'?> 76*16467b97STreehugger Robot <document> 77*16467b97STreehugger Robot </document> 78*16467b97STreehugger Robot """) 79*16467b97STreehugger Robot 80*16467b97STreehugger Robot stream = antlr3.StringStream(input) 81*16467b97STreehugger Robot lexer = self.getLexer(stream) 82*16467b97STreehugger Robot 83*16467b97STreehugger Robot try: 84*16467b97STreehugger Robot while True: 85*16467b97STreehugger Robot token = lexer.nextToken() 86*16467b97STreehugger Robot # Should raise NoViableAltException before hitting EOF 87*16467b97STreehugger Robot if token.type == antlr3.EOF: 88*16467b97STreehugger Robot self.fail() 89*16467b97STreehugger Robot 90*16467b97STreehugger Robot except antlr3.MismatchedSetException as exc: 91*16467b97STreehugger Robot self.assertEqual(exc.unexpectedType, 't') 92*16467b97STreehugger Robot self.assertEqual(exc.charPositionInLine, 2) 93*16467b97STreehugger Robot self.assertEqual(exc.line, 1) 94*16467b97STreehugger Robot 95*16467b97STreehugger Robot 96*16467b97STreehugger Robot def testMalformedInput3(self): 97*16467b97STreehugger Robot input = textwrap.dedent("""\ 98*16467b97STreehugger Robot <?xml version='1.0'?> 99*16467b97STreehugger Robot <docu ment attr="foo"> 100*16467b97STreehugger Robot </document> 101*16467b97STreehugger Robot """) 102*16467b97STreehugger Robot 103*16467b97STreehugger Robot stream = antlr3.StringStream(input) 104*16467b97STreehugger Robot lexer = self.getLexer(stream) 105*16467b97STreehugger Robot 106*16467b97STreehugger Robot try: 107*16467b97STreehugger Robot while True: 108*16467b97STreehugger Robot token = lexer.nextToken() 109*16467b97STreehugger Robot # Should raise NoViableAltException before hitting EOF 110*16467b97STreehugger Robot if token.type == antlr3.EOF: 111*16467b97STreehugger Robot self.fail() 112*16467b97STreehugger Robot 113*16467b97STreehugger Robot except antlr3.NoViableAltException as exc: 114*16467b97STreehugger Robot self.assertEqual(exc.unexpectedType, 'a') 115*16467b97STreehugger Robot self.assertEqual(exc.charPositionInLine, 11) 116*16467b97STreehugger Robot self.assertEqual(exc.line, 2) 117*16467b97STreehugger Robot 118*16467b97STreehugger Robot 119*16467b97STreehugger Robotif __name__ == '__main__': 120*16467b97STreehugger Robot unittest.main() 121