xref: /aosp_15_r20/external/antlr/runtime/JavaScript/tests/functional/t012lexerXML.g (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot/** XML parser by Oliver Zeigermann October 10, 2005 */
2*16467b97STreehugger Robotlexer grammar t012lexerXML;
3*16467b97STreehugger Robotoptions {
4*16467b97STreehugger Robot  language = JavaScript;
5*16467b97STreehugger Robot}
6*16467b97STreehugger Robot
7*16467b97STreehugger Robot@lexer::members {
8*16467b97STreehugger Robotthis.lout = [];
9*16467b97STreehugger Robotthis.output = function(line) {
10*16467b97STreehugger Robot    this.lout.push(line);
11*16467b97STreehugger Robot};
12*16467b97STreehugger Robot}
13*16467b97STreehugger Robot
14*16467b97STreehugger RobotDOCUMENT
15*16467b97STreehugger Robot    :  XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
16*16467b97STreehugger Robot    ;
17*16467b97STreehugger Robot
18*16467b97STreehugger Robotfragment DOCTYPE
19*16467b97STreehugger Robot    :
20*16467b97STreehugger Robot        '<!DOCTYPE' WS rootElementName=GENERIC_ID
21*16467b97STreehugger Robot        {this.output("ROOTELEMENT: "+$rootElementName.text)}
22*16467b97STreehugger Robot        WS
23*16467b97STreehugger Robot        (
24*16467b97STreehugger Robot            ( 'SYSTEM' WS sys1=VALUE
25*16467b97STreehugger Robot                {this.output("SYSTEM: "+$sys1.text)}
26*16467b97STreehugger Robot
27*16467b97STreehugger Robot            | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
28*16467b97STreehugger Robot                {this.output("PUBLIC: "+$pub.text)}
29*16467b97STreehugger Robot                {this.output("SYSTEM: "+$sys2.text)}
30*16467b97STreehugger Robot            )
31*16467b97STreehugger Robot            ( WS )?
32*16467b97STreehugger Robot        )?
33*16467b97STreehugger Robot        ( dtd=INTERNAL_DTD
34*16467b97STreehugger Robot            {this.output("INTERNAL DTD: "+$dtd.text)}
35*16467b97STreehugger Robot        )?
36*16467b97STreehugger Robot		'>'
37*16467b97STreehugger Robot	;
38*16467b97STreehugger Robot
39*16467b97STreehugger Robotfragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
40*16467b97STreehugger Robot
41*16467b97STreehugger Robotfragment PI :
42*16467b97STreehugger Robot        '<?' target=GENERIC_ID WS?
43*16467b97STreehugger Robot          {this.output("PI: "+$target.text)}
44*16467b97STreehugger Robot        ( ATTRIBUTE WS? )*  '?>'
45*16467b97STreehugger Robot	;
46*16467b97STreehugger Robot
47*16467b97STreehugger Robotfragment XMLDECL :
48*16467b97STreehugger Robot        '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
49*16467b97STreehugger Robot          {this.output("XML declaration")}
50*16467b97STreehugger Robot        ( ATTRIBUTE WS? )*  '?>'
51*16467b97STreehugger Robot	;
52*16467b97STreehugger Robot
53*16467b97STreehugger Robot
54*16467b97STreehugger Robotfragment ELEMENT
55*16467b97STreehugger Robot    : ( START_TAG
56*16467b97STreehugger Robot            (ELEMENT
57*16467b97STreehugger Robot            | t=PCDATA
58*16467b97STreehugger Robot                {this.output("PCDATA: \""+$t.text+"\"")}
59*16467b97STreehugger Robot            | t=CDATA
60*16467b97STreehugger Robot                {this.output("CDATA: \""+$t.text+"\"")}
61*16467b97STreehugger Robot            | t=COMMENT
62*16467b97STreehugger Robot                {this.output("Comment: \""+$t.text+"\"")}
63*16467b97STreehugger Robot            | pi=PI
64*16467b97STreehugger Robot            )*
65*16467b97STreehugger Robot            END_TAG
66*16467b97STreehugger Robot        | EMPTY_ELEMENT
67*16467b97STreehugger Robot        )
68*16467b97STreehugger Robot    ;
69*16467b97STreehugger Robot
70*16467b97STreehugger Robotfragment START_TAG
71*16467b97STreehugger Robot    : '<' WS? name=GENERIC_ID WS?
72*16467b97STreehugger Robot          {this.output("Start Tag: "+$name.text)}
73*16467b97STreehugger Robot        ( ATTRIBUTE WS? )* '>'
74*16467b97STreehugger Robot    ;
75*16467b97STreehugger Robot
76*16467b97STreehugger Robotfragment EMPTY_ELEMENT
77*16467b97STreehugger Robot    : '<' WS? name=GENERIC_ID WS?
78*16467b97STreehugger Robot          {this.output("Empty Element: "+$name.text)}
79*16467b97STreehugger Robot        ( ATTRIBUTE WS? )* '/>'
80*16467b97STreehugger Robot    ;
81*16467b97STreehugger Robot
82*16467b97STreehugger Robotfragment ATTRIBUTE
83*16467b97STreehugger Robot    : name=GENERIC_ID WS? '=' WS? value=VALUE
84*16467b97STreehugger Robot        {this.output("Attr: "+$name.text+"="+$value.text)}
85*16467b97STreehugger Robot    ;
86*16467b97STreehugger Robot
87*16467b97STreehugger Robotfragment END_TAG
88*16467b97STreehugger Robot    : '</' WS? name=GENERIC_ID WS? '>'
89*16467b97STreehugger Robot        {this.output("End Tag: "+$name.text)}
90*16467b97STreehugger Robot    ;
91*16467b97STreehugger Robot
92*16467b97STreehugger Robotfragment COMMENT
93*16467b97STreehugger Robot	:	'<!--' (options {greedy=false;} : .)* '-->'
94*16467b97STreehugger Robot	;
95*16467b97STreehugger Robot
96*16467b97STreehugger Robotfragment CDATA
97*16467b97STreehugger Robot	:	'<![CDATA[' (options {greedy=false;} : .)* ']]>'
98*16467b97STreehugger Robot	;
99*16467b97STreehugger Robot
100*16467b97STreehugger Robotfragment PCDATA : (~'<')+ ;
101*16467b97STreehugger Robot
102*16467b97STreehugger Robotfragment VALUE :
103*16467b97STreehugger Robot        ( '\"' (~'\"')* '\"'
104*16467b97STreehugger Robot        | '\'' (~'\'')* '\''
105*16467b97STreehugger Robot        )
106*16467b97STreehugger Robot	;
107*16467b97STreehugger Robot
108*16467b97STreehugger Robotfragment GENERIC_ID
109*16467b97STreehugger Robot    : ( LETTER | '_' | ':')
110*16467b97STreehugger Robot        ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
111*16467b97STreehugger Robot	;
112*16467b97STreehugger Robot
113*16467b97STreehugger Robotfragment LETTER
114*16467b97STreehugger Robot	: 'a'..'z'
115*16467b97STreehugger Robot	| 'A'..'Z'
116*16467b97STreehugger Robot	;
117*16467b97STreehugger Robot
118*16467b97STreehugger Robotfragment WS  :
119*16467b97STreehugger Robot        (   ' '
120*16467b97STreehugger Robot        |   '\t'
121*16467b97STreehugger Robot        |  ( '\n'
122*16467b97STreehugger Robot            |	'\r\n'
123*16467b97STreehugger Robot            |	'\r'
124*16467b97STreehugger Robot            )
125*16467b97STreehugger Robot        )+
126*16467b97STreehugger Robot    ;
127*16467b97STreehugger Robot
128