1*16467b97STreehugger Robot 2*16467b97STreehugger Robotimport unittest 3*16467b97STreehugger Robotimport textwrap 4*16467b97STreehugger Robotimport antlr3 5*16467b97STreehugger Robotimport antlr3.tree 6*16467b97STreehugger Robotimport testbase 7*16467b97STreehugger Robotimport sys 8*16467b97STreehugger Robotfrom io import StringIO 9*16467b97STreehugger Robot 10*16467b97STreehugger Robotclass T(testbase.ANTLRTest): 11*16467b97STreehugger Robot def setUp(self): 12*16467b97STreehugger Robot self.oldPath = sys.path[:] 13*16467b97STreehugger Robot sys.path.insert(0, self.baseDir) 14*16467b97STreehugger Robot 15*16467b97STreehugger Robot 16*16467b97STreehugger Robot def tearDown(self): 17*16467b97STreehugger Robot sys.path = self.oldPath 18*16467b97STreehugger Robot 19*16467b97STreehugger Robot 20*16467b97STreehugger Robot def testOverrideMain(self): 21*16467b97STreehugger Robot grammar = textwrap.dedent( 22*16467b97STreehugger Robot r"""lexer grammar T3; 23*16467b97STreehugger Robot options { 24*16467b97STreehugger Robot language = Python3; 25*16467b97STreehugger Robot } 26*16467b97STreehugger Robot 27*16467b97STreehugger Robot @main { 28*16467b97STreehugger Robot def main(argv): 29*16467b97STreehugger Robot raise RuntimeError("no") 30*16467b97STreehugger Robot } 31*16467b97STreehugger Robot 32*16467b97STreehugger Robot ID: ('a'..'z' | '\u00c0'..'\u00ff')+; 33*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN }; 34*16467b97STreehugger Robot """) 35*16467b97STreehugger Robot 36*16467b97STreehugger Robot 37*16467b97STreehugger Robot stdout = StringIO() 38*16467b97STreehugger Robot 39*16467b97STreehugger Robot lexerMod = self.compileInlineGrammar(grammar, returnModule=True) 40*16467b97STreehugger Robot self.assertRaises(RuntimeError, lexerMod.main, ['lexer.py']) 41*16467b97STreehugger Robot 42*16467b97STreehugger Robot 43*16467b97STreehugger Robot def testLexerFromFile(self): 44*16467b97STreehugger Robot input = "foo bar" 45*16467b97STreehugger Robot inputPath = self.writeFile("input.txt", input) 46*16467b97STreehugger Robot 47*16467b97STreehugger Robot grammar = textwrap.dedent( 48*16467b97STreehugger Robot r"""lexer grammar T1; 49*16467b97STreehugger Robot options { 50*16467b97STreehugger Robot language = Python3; 51*16467b97STreehugger Robot } 52*16467b97STreehugger Robot 53*16467b97STreehugger Robot ID: 'a'..'z'+; 54*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN }; 55*16467b97STreehugger Robot """) 56*16467b97STreehugger Robot 57*16467b97STreehugger Robot 58*16467b97STreehugger Robot stdout = StringIO() 59*16467b97STreehugger Robot 60*16467b97STreehugger Robot lexerMod = self.compileInlineGrammar(grammar, returnModule=True) 61*16467b97STreehugger Robot lexerMod.main( 62*16467b97STreehugger Robot ['lexer.py', inputPath], 63*16467b97STreehugger Robot stdout=stdout 64*16467b97STreehugger Robot ) 65*16467b97STreehugger Robot 66*16467b97STreehugger Robot self.assertEqual(len(stdout.getvalue().splitlines()), 3) 67*16467b97STreehugger Robot 68*16467b97STreehugger Robot 69*16467b97STreehugger Robot def testLexerFromStdIO(self): 70*16467b97STreehugger Robot input = "foo bar" 71*16467b97STreehugger Robot 72*16467b97STreehugger Robot grammar = textwrap.dedent( 73*16467b97STreehugger Robot r"""lexer grammar T2; 74*16467b97STreehugger Robot options { 75*16467b97STreehugger Robot language = Python3; 76*16467b97STreehugger Robot } 77*16467b97STreehugger Robot 78*16467b97STreehugger Robot ID: 'a'..'z'+; 79*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN }; 80*16467b97STreehugger Robot """) 81*16467b97STreehugger Robot 82*16467b97STreehugger Robot 83*16467b97STreehugger Robot stdout = StringIO() 84*16467b97STreehugger Robot 85*16467b97STreehugger Robot lexerMod = self.compileInlineGrammar(grammar, returnModule=True) 86*16467b97STreehugger Robot lexerMod.main( 87*16467b97STreehugger Robot ['lexer.py'], 88*16467b97STreehugger Robot stdin=StringIO(input), 89*16467b97STreehugger Robot stdout=stdout 90*16467b97STreehugger Robot ) 91*16467b97STreehugger Robot 92*16467b97STreehugger Robot self.assertEqual(len(stdout.getvalue().splitlines()), 3) 93*16467b97STreehugger Robot 94*16467b97STreehugger Robot 95*16467b97STreehugger Robot def testLexerEncoding(self): 96*16467b97STreehugger Robot input = "föö bär" 97*16467b97STreehugger Robot 98*16467b97STreehugger Robot grammar = textwrap.dedent( 99*16467b97STreehugger Robot r"""lexer grammar T3; 100*16467b97STreehugger Robot options { 101*16467b97STreehugger Robot language = Python3; 102*16467b97STreehugger Robot } 103*16467b97STreehugger Robot 104*16467b97STreehugger Robot ID: ('a'..'z' | '\u00c0'..'\u00ff')+; 105*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN }; 106*16467b97STreehugger Robot """) 107*16467b97STreehugger Robot 108*16467b97STreehugger Robot 109*16467b97STreehugger Robot stdout = StringIO() 110*16467b97STreehugger Robot 111*16467b97STreehugger Robot lexerMod = self.compileInlineGrammar(grammar, returnModule=True) 112*16467b97STreehugger Robot lexerMod.main( 113*16467b97STreehugger Robot ['lexer.py'], 114*16467b97STreehugger Robot stdin=StringIO(input), 115*16467b97STreehugger Robot stdout=stdout 116*16467b97STreehugger Robot ) 117*16467b97STreehugger Robot 118*16467b97STreehugger Robot self.assertEqual(len(stdout.getvalue().splitlines()), 3) 119*16467b97STreehugger Robot 120*16467b97STreehugger Robot 121*16467b97STreehugger Robot def testCombined(self): 122*16467b97STreehugger Robot input = "foo bar" 123*16467b97STreehugger Robot 124*16467b97STreehugger Robot grammar = textwrap.dedent( 125*16467b97STreehugger Robot r"""grammar T4; 126*16467b97STreehugger Robot options { 127*16467b97STreehugger Robot language = Python3; 128*16467b97STreehugger Robot } 129*16467b97STreehugger Robot 130*16467b97STreehugger Robot r returns [res]: (ID)+ EOF { $res = $text }; 131*16467b97STreehugger Robot 132*16467b97STreehugger Robot ID: 'a'..'z'+; 133*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN }; 134*16467b97STreehugger Robot """) 135*16467b97STreehugger Robot 136*16467b97STreehugger Robot 137*16467b97STreehugger Robot stdout = StringIO() 138*16467b97STreehugger Robot 139*16467b97STreehugger Robot lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True) 140*16467b97STreehugger Robot parserMod.main( 141*16467b97STreehugger Robot ['combined.py', '--rule', 'r'], 142*16467b97STreehugger Robot stdin=StringIO(input), 143*16467b97STreehugger Robot stdout=stdout 144*16467b97STreehugger Robot ) 145*16467b97STreehugger Robot 146*16467b97STreehugger Robot stdout = stdout.getvalue() 147*16467b97STreehugger Robot self.assertEqual(len(stdout.splitlines()), 1, stdout) 148*16467b97STreehugger Robot 149*16467b97STreehugger Robot 150*16467b97STreehugger Robot def testCombinedOutputAST(self): 151*16467b97STreehugger Robot input = "foo + bar" 152*16467b97STreehugger Robot 153*16467b97STreehugger Robot grammar = textwrap.dedent( 154*16467b97STreehugger Robot r"""grammar T5; 155*16467b97STreehugger Robot options { 156*16467b97STreehugger Robot language = Python3; 157*16467b97STreehugger Robot output = AST; 158*16467b97STreehugger Robot } 159*16467b97STreehugger Robot 160*16467b97STreehugger Robot r: ID OP^ ID EOF!; 161*16467b97STreehugger Robot 162*16467b97STreehugger Robot ID: 'a'..'z'+; 163*16467b97STreehugger Robot OP: '+'; 164*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN }; 165*16467b97STreehugger Robot """) 166*16467b97STreehugger Robot 167*16467b97STreehugger Robot 168*16467b97STreehugger Robot stdout = StringIO() 169*16467b97STreehugger Robot 170*16467b97STreehugger Robot lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True) 171*16467b97STreehugger Robot parserMod.main( 172*16467b97STreehugger Robot ['combined.py', '--rule', 'r'], 173*16467b97STreehugger Robot stdin=StringIO(input), 174*16467b97STreehugger Robot stdout=stdout 175*16467b97STreehugger Robot ) 176*16467b97STreehugger Robot 177*16467b97STreehugger Robot stdout = stdout.getvalue().strip() 178*16467b97STreehugger Robot self.assertEqual(stdout, "(+ foo bar)") 179*16467b97STreehugger Robot 180*16467b97STreehugger Robot 181*16467b97STreehugger Robot def testTreeParser(self): 182*16467b97STreehugger Robot grammar = textwrap.dedent( 183*16467b97STreehugger Robot r'''grammar T6; 184*16467b97STreehugger Robot options { 185*16467b97STreehugger Robot language = Python3; 186*16467b97STreehugger Robot output = AST; 187*16467b97STreehugger Robot } 188*16467b97STreehugger Robot 189*16467b97STreehugger Robot r: ID OP^ ID EOF!; 190*16467b97STreehugger Robot 191*16467b97STreehugger Robot ID: 'a'..'z'+; 192*16467b97STreehugger Robot OP: '+'; 193*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN }; 194*16467b97STreehugger Robot ''') 195*16467b97STreehugger Robot 196*16467b97STreehugger Robot treeGrammar = textwrap.dedent( 197*16467b97STreehugger Robot r'''tree grammar T6Walker; 198*16467b97STreehugger Robot options { 199*16467b97STreehugger Robot language=Python3; 200*16467b97STreehugger Robot ASTLabelType=CommonTree; 201*16467b97STreehugger Robot tokenVocab=T6; 202*16467b97STreehugger Robot } 203*16467b97STreehugger Robot r returns [res]: ^(OP a=ID b=ID) 204*16467b97STreehugger Robot { $res = "{} {} {}".format($a.text, $OP.text, $b.text) } 205*16467b97STreehugger Robot ; 206*16467b97STreehugger Robot ''') 207*16467b97STreehugger Robot 208*16467b97STreehugger Robot lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True) 209*16467b97STreehugger Robot walkerMod = self.compileInlineGrammar(treeGrammar, returnModule=True) 210*16467b97STreehugger Robot 211*16467b97STreehugger Robot stdout = StringIO() 212*16467b97STreehugger Robot walkerMod.main( 213*16467b97STreehugger Robot ['walker.py', '--rule', 'r', '--parser', 'T6Parser', '--parser-rule', 'r', '--lexer', 'T6Lexer'], 214*16467b97STreehugger Robot stdin=StringIO("a+b"), 215*16467b97STreehugger Robot stdout=stdout 216*16467b97STreehugger Robot ) 217*16467b97STreehugger Robot 218*16467b97STreehugger Robot stdout = stdout.getvalue().strip() 219*16467b97STreehugger Robot self.assertEqual(stdout, "'a + b'") 220*16467b97STreehugger Robot 221*16467b97STreehugger Robot 222*16467b97STreehugger Robot def testTreeParserRewrite(self): 223*16467b97STreehugger Robot grammar = textwrap.dedent( 224*16467b97STreehugger Robot r'''grammar T7; 225*16467b97STreehugger Robot options { 226*16467b97STreehugger Robot language = Python3; 227*16467b97STreehugger Robot output = AST; 228*16467b97STreehugger Robot } 229*16467b97STreehugger Robot 230*16467b97STreehugger Robot r: ID OP^ ID EOF!; 231*16467b97STreehugger Robot 232*16467b97STreehugger Robot ID: 'a'..'z'+; 233*16467b97STreehugger Robot OP: '+'; 234*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN }; 235*16467b97STreehugger Robot ''') 236*16467b97STreehugger Robot 237*16467b97STreehugger Robot treeGrammar = textwrap.dedent( 238*16467b97STreehugger Robot r'''tree grammar T7Walker; 239*16467b97STreehugger Robot options { 240*16467b97STreehugger Robot language=Python3; 241*16467b97STreehugger Robot ASTLabelType=CommonTree; 242*16467b97STreehugger Robot tokenVocab=T7; 243*16467b97STreehugger Robot output=AST; 244*16467b97STreehugger Robot } 245*16467b97STreehugger Robot tokens { 246*16467b97STreehugger Robot ARG; 247*16467b97STreehugger Robot } 248*16467b97STreehugger Robot r: ^(OP a=ID b=ID) -> ^(OP ^(ARG ID) ^(ARG ID)); 249*16467b97STreehugger Robot ''') 250*16467b97STreehugger Robot 251*16467b97STreehugger Robot lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True) 252*16467b97STreehugger Robot walkerMod = self.compileInlineGrammar(treeGrammar, returnModule=True) 253*16467b97STreehugger Robot 254*16467b97STreehugger Robot stdout = StringIO() 255*16467b97STreehugger Robot walkerMod.main( 256*16467b97STreehugger Robot ['walker.py', '--rule', 'r', '--parser', 'T7Parser', '--parser-rule', 'r', '--lexer', 'T7Lexer'], 257*16467b97STreehugger Robot stdin=StringIO("a+b"), 258*16467b97STreehugger Robot stdout=stdout 259*16467b97STreehugger Robot ) 260*16467b97STreehugger Robot 261*16467b97STreehugger Robot stdout = stdout.getvalue().strip() 262*16467b97STreehugger Robot self.assertEqual(stdout, "(+ (ARG a) (ARG b))") 263*16467b97STreehugger Robot 264*16467b97STreehugger Robot 265*16467b97STreehugger Robot 266*16467b97STreehugger Robot def testGrammarImport(self): 267*16467b97STreehugger Robot slave = textwrap.dedent( 268*16467b97STreehugger Robot r''' 269*16467b97STreehugger Robot parser grammar T8S; 270*16467b97STreehugger Robot options { 271*16467b97STreehugger Robot language=Python3; 272*16467b97STreehugger Robot } 273*16467b97STreehugger Robot 274*16467b97STreehugger Robot a : B; 275*16467b97STreehugger Robot ''') 276*16467b97STreehugger Robot 277*16467b97STreehugger Robot parserName = self.writeInlineGrammar(slave)[0] 278*16467b97STreehugger Robot # slave parsers are imported as normal python modules 279*16467b97STreehugger Robot # to force reloading current version, purge module from sys.modules 280*16467b97STreehugger Robot if parserName + 'Parser' in sys.modules: 281*16467b97STreehugger Robot del sys.modules[parserName+'Parser'] 282*16467b97STreehugger Robot 283*16467b97STreehugger Robot master = textwrap.dedent( 284*16467b97STreehugger Robot r''' 285*16467b97STreehugger Robot grammar T8M; 286*16467b97STreehugger Robot options { 287*16467b97STreehugger Robot language=Python3; 288*16467b97STreehugger Robot } 289*16467b97STreehugger Robot import T8S; 290*16467b97STreehugger Robot s returns [res]: a { $res = $a.text }; 291*16467b97STreehugger Robot B : 'b' ; // defines B from inherited token space 292*16467b97STreehugger Robot WS : (' '|'\n') {self.skip()} ; 293*16467b97STreehugger Robot ''') 294*16467b97STreehugger Robot 295*16467b97STreehugger Robot stdout = StringIO() 296*16467b97STreehugger Robot 297*16467b97STreehugger Robot lexerMod, parserMod = self.compileInlineGrammar(master, returnModule=True) 298*16467b97STreehugger Robot parserMod.main( 299*16467b97STreehugger Robot ['import.py', '--rule', 's'], 300*16467b97STreehugger Robot stdin=StringIO("b"), 301*16467b97STreehugger Robot stdout=stdout 302*16467b97STreehugger Robot ) 303*16467b97STreehugger Robot 304*16467b97STreehugger Robot stdout = stdout.getvalue().strip() 305*16467b97STreehugger Robot self.assertEqual(stdout, "'b'") 306*16467b97STreehugger Robot 307*16467b97STreehugger Robot 308*16467b97STreehugger Robotif __name__ == '__main__': 309*16467b97STreehugger Robot unittest.main() 310