1*16467b97STreehugger Robotlexer grammar t012lexerXMLLexer; 2*16467b97STreehugger Robotoptions { 3*16467b97STreehugger Robot language = Python; 4*16467b97STreehugger Robot} 5*16467b97STreehugger Robot 6*16467b97STreehugger Robot@header { 7*16467b97STreehugger Robotfrom cStringIO import StringIO 8*16467b97STreehugger Robot} 9*16467b97STreehugger Robot 10*16467b97STreehugger Robot@lexer::init { 11*16467b97STreehugger Robotself.outbuf = StringIO() 12*16467b97STreehugger Robot} 13*16467b97STreehugger Robot 14*16467b97STreehugger Robot@lexer::members { 15*16467b97STreehugger Robotdef output(self, line): 16*16467b97STreehugger Robot self.outbuf.write(line.encode('utf-8') + "\n") 17*16467b97STreehugger Robot} 18*16467b97STreehugger Robot 19*16467b97STreehugger RobotDOCUMENT 20*16467b97STreehugger Robot : XMLDECL? WS? DOCTYPE? WS? ELEMENT WS? 21*16467b97STreehugger Robot ; 22*16467b97STreehugger Robot 23*16467b97STreehugger Robotfragment DOCTYPE 24*16467b97STreehugger Robot : 25*16467b97STreehugger Robot '<!DOCTYPE' WS rootElementName=GENERIC_ID 26*16467b97STreehugger Robot {self.output("ROOTELEMENT: "+rootElementName.text)} 27*16467b97STreehugger Robot WS 28*16467b97STreehugger Robot ( 29*16467b97STreehugger Robot ( 'SYSTEM' WS sys1=VALUE 30*16467b97STreehugger Robot {self.output("SYSTEM: "+sys1.text)} 31*16467b97STreehugger Robot 32*16467b97STreehugger Robot | 'PUBLIC' WS pub=VALUE WS sys2=VALUE 33*16467b97STreehugger Robot {self.output("PUBLIC: "+pub.text)} 34*16467b97STreehugger Robot {self.output("SYSTEM: "+sys2.text)} 35*16467b97STreehugger Robot ) 36*16467b97STreehugger Robot ( WS )? 37*16467b97STreehugger Robot )? 38*16467b97STreehugger Robot ( dtd=INTERNAL_DTD 39*16467b97STreehugger Robot {self.output("INTERNAL DTD: "+dtd.text)} 40*16467b97STreehugger Robot )? 41*16467b97STreehugger Robot '>' 42*16467b97STreehugger Robot ; 43*16467b97STreehugger Robot 44*16467b97STreehugger Robotfragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ; 45*16467b97STreehugger Robot 46*16467b97STreehugger Robotfragment PI : 47*16467b97STreehugger Robot '<?' target=GENERIC_ID WS? 48*16467b97STreehugger Robot {self.output("PI: "+target.text)} 49*16467b97STreehugger Robot ( ATTRIBUTE WS? )* '?>' 50*16467b97STreehugger Robot ; 51*16467b97STreehugger Robot 52*16467b97STreehugger Robotfragment XMLDECL : 53*16467b97STreehugger Robot '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS? 54*16467b97STreehugger Robot {self.output("XML declaration")} 55*16467b97STreehugger Robot ( ATTRIBUTE WS? )* '?>' 56*16467b97STreehugger Robot ; 57*16467b97STreehugger Robot 58*16467b97STreehugger Robot 59*16467b97STreehugger Robotfragment ELEMENT 60*16467b97STreehugger Robot : ( START_TAG 61*16467b97STreehugger Robot (ELEMENT 62*16467b97STreehugger Robot | t=PCDATA 63*16467b97STreehugger Robot {self.output("PCDATA: \""+$t.text+"\"")} 64*16467b97STreehugger Robot | t=CDATA 65*16467b97STreehugger Robot {self.output("CDATA: \""+$t.text+"\"")} 66*16467b97STreehugger Robot | t=COMMENT 67*16467b97STreehugger Robot {self.output("Comment: \""+$t.text+"\"")} 68*16467b97STreehugger Robot | pi=PI 69*16467b97STreehugger Robot )* 70*16467b97STreehugger Robot END_TAG 71*16467b97STreehugger Robot | EMPTY_ELEMENT 72*16467b97STreehugger Robot ) 73*16467b97STreehugger Robot ; 74*16467b97STreehugger Robot 75*16467b97STreehugger Robotfragment START_TAG 76*16467b97STreehugger Robot : '<' WS? name=GENERIC_ID WS? 77*16467b97STreehugger Robot {self.output("Start Tag: "+name.text)} 78*16467b97STreehugger Robot ( ATTRIBUTE WS? )* '>' 79*16467b97STreehugger Robot ; 80*16467b97STreehugger Robot 81*16467b97STreehugger Robotfragment EMPTY_ELEMENT 82*16467b97STreehugger Robot : '<' WS? name=GENERIC_ID WS? 83*16467b97STreehugger Robot {self.output("Empty Element: "+name.text)} 84*16467b97STreehugger Robot ( ATTRIBUTE WS? )* '/>' 85*16467b97STreehugger Robot ; 86*16467b97STreehugger Robot 87*16467b97STreehugger Robotfragment ATTRIBUTE 88*16467b97STreehugger Robot : name=GENERIC_ID WS? '=' WS? value=VALUE 89*16467b97STreehugger Robot {self.output("Attr: "+name.text+"="+value.text)} 90*16467b97STreehugger Robot ; 91*16467b97STreehugger Robot 92*16467b97STreehugger Robotfragment END_TAG 93*16467b97STreehugger Robot : '</' WS? name=GENERIC_ID WS? '>' 94*16467b97STreehugger Robot {self.output("End Tag: "+name.text)} 95*16467b97STreehugger Robot ; 96*16467b97STreehugger Robot 97*16467b97STreehugger Robotfragment COMMENT 98*16467b97STreehugger Robot : '<!--' (options {greedy=false;} : .)* '-->' 99*16467b97STreehugger Robot ; 100*16467b97STreehugger Robot 101*16467b97STreehugger Robotfragment CDATA 102*16467b97STreehugger Robot : '<![CDATA[' (options {greedy=false;} : .)* ']]>' 103*16467b97STreehugger Robot ; 104*16467b97STreehugger Robot 105*16467b97STreehugger Robotfragment PCDATA : (~'<')+ ; 106*16467b97STreehugger Robot 107*16467b97STreehugger Robotfragment VALUE : 108*16467b97STreehugger Robot ( '\"' (~'\"')* '\"' 109*16467b97STreehugger Robot | '\'' (~'\'')* '\'' 110*16467b97STreehugger Robot ) 111*16467b97STreehugger Robot ; 112*16467b97STreehugger Robot 113*16467b97STreehugger Robotfragment GENERIC_ID 114*16467b97STreehugger Robot : ( LETTER | '_' | ':') 115*16467b97STreehugger Robot ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )* 116*16467b97STreehugger Robot ; 117*16467b97STreehugger Robot 118*16467b97STreehugger Robotfragment LETTER 119*16467b97STreehugger Robot : 'a'..'z' 120*16467b97STreehugger Robot | 'A'..'Z' 121*16467b97STreehugger Robot ; 122*16467b97STreehugger Robot 123*16467b97STreehugger Robotfragment WS : 124*16467b97STreehugger Robot ( ' ' 125*16467b97STreehugger Robot | '\t' 126*16467b97STreehugger Robot | ( '\n' 127*16467b97STreehugger Robot | '\r\n' 128*16467b97STreehugger Robot | '\r' 129*16467b97STreehugger Robot ) 130*16467b97STreehugger Robot )+ 131*16467b97STreehugger Robot ; 132*16467b97STreehugger Robot 133