1*16467b97STreehugger Robot/** XML parser by Oliver Zeigermann October 10, 2005 */ 2*16467b97STreehugger Robotlexer grammar t012lexerXML; 3*16467b97STreehugger Robotoptions { 4*16467b97STreehugger Robot language = JavaScript; 5*16467b97STreehugger Robot} 6*16467b97STreehugger Robot 7*16467b97STreehugger Robot@lexer::members { 8*16467b97STreehugger Robotthis.lout = []; 9*16467b97STreehugger Robotthis.output = function(line) { 10*16467b97STreehugger Robot this.lout.push(line); 11*16467b97STreehugger Robot}; 12*16467b97STreehugger Robot} 13*16467b97STreehugger Robot 14*16467b97STreehugger RobotDOCUMENT 15*16467b97STreehugger Robot : XMLDECL? WS? DOCTYPE? WS? ELEMENT WS? 16*16467b97STreehugger Robot ; 17*16467b97STreehugger Robot 18*16467b97STreehugger Robotfragment DOCTYPE 19*16467b97STreehugger Robot : 20*16467b97STreehugger Robot '<!DOCTYPE' WS rootElementName=GENERIC_ID 21*16467b97STreehugger Robot {this.output("ROOTELEMENT: "+$rootElementName.text)} 22*16467b97STreehugger Robot WS 23*16467b97STreehugger Robot ( 24*16467b97STreehugger Robot ( 'SYSTEM' WS sys1=VALUE 25*16467b97STreehugger Robot {this.output("SYSTEM: "+$sys1.text)} 26*16467b97STreehugger Robot 27*16467b97STreehugger Robot | 'PUBLIC' WS pub=VALUE WS sys2=VALUE 28*16467b97STreehugger Robot {this.output("PUBLIC: "+$pub.text)} 29*16467b97STreehugger Robot {this.output("SYSTEM: "+$sys2.text)} 30*16467b97STreehugger Robot ) 31*16467b97STreehugger Robot ( WS )? 32*16467b97STreehugger Robot )? 33*16467b97STreehugger Robot ( dtd=INTERNAL_DTD 34*16467b97STreehugger Robot {this.output("INTERNAL DTD: "+$dtd.text)} 35*16467b97STreehugger Robot )? 36*16467b97STreehugger Robot '>' 37*16467b97STreehugger Robot ; 38*16467b97STreehugger Robot 39*16467b97STreehugger Robotfragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ; 40*16467b97STreehugger Robot 41*16467b97STreehugger Robotfragment PI : 42*16467b97STreehugger Robot '<?' target=GENERIC_ID WS? 43*16467b97STreehugger Robot {this.output("PI: "+$target.text)} 44*16467b97STreehugger Robot ( ATTRIBUTE WS? )* '?>' 45*16467b97STreehugger Robot ; 46*16467b97STreehugger Robot 47*16467b97STreehugger Robotfragment XMLDECL : 48*16467b97STreehugger Robot '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS? 49*16467b97STreehugger Robot {this.output("XML declaration")} 50*16467b97STreehugger Robot ( ATTRIBUTE WS? )* '?>' 51*16467b97STreehugger Robot ; 52*16467b97STreehugger Robot 53*16467b97STreehugger Robot 54*16467b97STreehugger Robotfragment ELEMENT 55*16467b97STreehugger Robot : ( START_TAG 56*16467b97STreehugger Robot (ELEMENT 57*16467b97STreehugger Robot | t=PCDATA 58*16467b97STreehugger Robot {this.output("PCDATA: \""+$t.text+"\"")} 59*16467b97STreehugger Robot | t=CDATA 60*16467b97STreehugger Robot {this.output("CDATA: \""+$t.text+"\"")} 61*16467b97STreehugger Robot | t=COMMENT 62*16467b97STreehugger Robot {this.output("Comment: \""+$t.text+"\"")} 63*16467b97STreehugger Robot | pi=PI 64*16467b97STreehugger Robot )* 65*16467b97STreehugger Robot END_TAG 66*16467b97STreehugger Robot | EMPTY_ELEMENT 67*16467b97STreehugger Robot ) 68*16467b97STreehugger Robot ; 69*16467b97STreehugger Robot 70*16467b97STreehugger Robotfragment START_TAG 71*16467b97STreehugger Robot : '<' WS? name=GENERIC_ID WS? 72*16467b97STreehugger Robot {this.output("Start Tag: "+$name.text)} 73*16467b97STreehugger Robot ( ATTRIBUTE WS? )* '>' 74*16467b97STreehugger Robot ; 75*16467b97STreehugger Robot 76*16467b97STreehugger Robotfragment EMPTY_ELEMENT 77*16467b97STreehugger Robot : '<' WS? name=GENERIC_ID WS? 78*16467b97STreehugger Robot {this.output("Empty Element: "+$name.text)} 79*16467b97STreehugger Robot ( ATTRIBUTE WS? )* '/>' 80*16467b97STreehugger Robot ; 81*16467b97STreehugger Robot 82*16467b97STreehugger Robotfragment ATTRIBUTE 83*16467b97STreehugger Robot : name=GENERIC_ID WS? '=' WS? value=VALUE 84*16467b97STreehugger Robot {this.output("Attr: "+$name.text+"="+$value.text)} 85*16467b97STreehugger Robot ; 86*16467b97STreehugger Robot 87*16467b97STreehugger Robotfragment END_TAG 88*16467b97STreehugger Robot : '</' WS? name=GENERIC_ID WS? '>' 89*16467b97STreehugger Robot {this.output("End Tag: "+$name.text)} 90*16467b97STreehugger Robot ; 91*16467b97STreehugger Robot 92*16467b97STreehugger Robotfragment COMMENT 93*16467b97STreehugger Robot : '<!--' (options {greedy=false;} : .)* '-->' 94*16467b97STreehugger Robot ; 95*16467b97STreehugger Robot 96*16467b97STreehugger Robotfragment CDATA 97*16467b97STreehugger Robot : '<![CDATA[' (options {greedy=false;} : .)* ']]>' 98*16467b97STreehugger Robot ; 99*16467b97STreehugger Robot 100*16467b97STreehugger Robotfragment PCDATA : (~'<')+ ; 101*16467b97STreehugger Robot 102*16467b97STreehugger Robotfragment VALUE : 103*16467b97STreehugger Robot ( '\"' (~'\"')* '\"' 104*16467b97STreehugger Robot | '\'' (~'\'')* '\'' 105*16467b97STreehugger Robot ) 106*16467b97STreehugger Robot ; 107*16467b97STreehugger Robot 108*16467b97STreehugger Robotfragment GENERIC_ID 109*16467b97STreehugger Robot : ( LETTER | '_' | ':') 110*16467b97STreehugger Robot ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )* 111*16467b97STreehugger Robot ; 112*16467b97STreehugger Robot 113*16467b97STreehugger Robotfragment LETTER 114*16467b97STreehugger Robot : 'a'..'z' 115*16467b97STreehugger Robot | 'A'..'Z' 116*16467b97STreehugger Robot ; 117*16467b97STreehugger Robot 118*16467b97STreehugger Robotfragment WS : 119*16467b97STreehugger Robot ( ' ' 120*16467b97STreehugger Robot | '\t' 121*16467b97STreehugger Robot | ( '\n' 122*16467b97STreehugger Robot | '\r\n' 123*16467b97STreehugger Robot | '\r' 124*16467b97STreehugger Robot ) 125*16467b97STreehugger Robot )+ 126*16467b97STreehugger Robot ; 127*16467b97STreehugger Robot 128