xref: /aosp_15_r20/external/antlr/runtime/Python/tests/t012lexerXMLLexer.g (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robotlexer grammar t012lexerXMLLexer;
2*16467b97STreehugger Robotoptions {
3*16467b97STreehugger Robot  language = Python;
4*16467b97STreehugger Robot}
5*16467b97STreehugger Robot
6*16467b97STreehugger Robot@header {
7*16467b97STreehugger Robotfrom cStringIO import StringIO
8*16467b97STreehugger Robot}
9*16467b97STreehugger Robot
10*16467b97STreehugger Robot@lexer::init {
11*16467b97STreehugger Robotself.outbuf = StringIO()
12*16467b97STreehugger Robot}
13*16467b97STreehugger Robot
14*16467b97STreehugger Robot@lexer::members {
15*16467b97STreehugger Robotdef output(self, line):
16*16467b97STreehugger Robot    self.outbuf.write(line.encode('utf-8') + "\n")
17*16467b97STreehugger Robot}
18*16467b97STreehugger Robot
19*16467b97STreehugger RobotDOCUMENT
20*16467b97STreehugger Robot    :  XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
21*16467b97STreehugger Robot    ;
22*16467b97STreehugger Robot
23*16467b97STreehugger Robotfragment DOCTYPE
24*16467b97STreehugger Robot    :
25*16467b97STreehugger Robot        '<!DOCTYPE' WS rootElementName=GENERIC_ID
26*16467b97STreehugger Robot        {self.output("ROOTELEMENT: "+rootElementName.text)}
27*16467b97STreehugger Robot        WS
28*16467b97STreehugger Robot        (
29*16467b97STreehugger Robot            ( 'SYSTEM' WS sys1=VALUE
30*16467b97STreehugger Robot                {self.output("SYSTEM: "+sys1.text)}
31*16467b97STreehugger Robot
32*16467b97STreehugger Robot            | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
33*16467b97STreehugger Robot                {self.output("PUBLIC: "+pub.text)}
34*16467b97STreehugger Robot                {self.output("SYSTEM: "+sys2.text)}
35*16467b97STreehugger Robot            )
36*16467b97STreehugger Robot            ( WS )?
37*16467b97STreehugger Robot        )?
38*16467b97STreehugger Robot        ( dtd=INTERNAL_DTD
39*16467b97STreehugger Robot            {self.output("INTERNAL DTD: "+dtd.text)}
40*16467b97STreehugger Robot        )?
41*16467b97STreehugger Robot		'>'
42*16467b97STreehugger Robot	;
43*16467b97STreehugger Robot
44*16467b97STreehugger Robotfragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
45*16467b97STreehugger Robot
46*16467b97STreehugger Robotfragment PI :
47*16467b97STreehugger Robot        '<?' target=GENERIC_ID WS?
48*16467b97STreehugger Robot          {self.output("PI: "+target.text)}
49*16467b97STreehugger Robot        ( ATTRIBUTE WS? )*  '?>'
50*16467b97STreehugger Robot	;
51*16467b97STreehugger Robot
52*16467b97STreehugger Robotfragment XMLDECL :
53*16467b97STreehugger Robot        '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
54*16467b97STreehugger Robot          {self.output("XML declaration")}
55*16467b97STreehugger Robot        ( ATTRIBUTE WS? )*  '?>'
56*16467b97STreehugger Robot	;
57*16467b97STreehugger Robot
58*16467b97STreehugger Robot
59*16467b97STreehugger Robotfragment ELEMENT
60*16467b97STreehugger Robot    : ( START_TAG
61*16467b97STreehugger Robot            (ELEMENT
62*16467b97STreehugger Robot            | t=PCDATA
63*16467b97STreehugger Robot                {self.output("PCDATA: \""+$t.text+"\"")}
64*16467b97STreehugger Robot            | t=CDATA
65*16467b97STreehugger Robot                {self.output("CDATA: \""+$t.text+"\"")}
66*16467b97STreehugger Robot            | t=COMMENT
67*16467b97STreehugger Robot                {self.output("Comment: \""+$t.text+"\"")}
68*16467b97STreehugger Robot            | pi=PI
69*16467b97STreehugger Robot            )*
70*16467b97STreehugger Robot            END_TAG
71*16467b97STreehugger Robot        | EMPTY_ELEMENT
72*16467b97STreehugger Robot        )
73*16467b97STreehugger Robot    ;
74*16467b97STreehugger Robot
75*16467b97STreehugger Robotfragment START_TAG
76*16467b97STreehugger Robot    : '<' WS? name=GENERIC_ID WS?
77*16467b97STreehugger Robot          {self.output("Start Tag: "+name.text)}
78*16467b97STreehugger Robot        ( ATTRIBUTE WS? )* '>'
79*16467b97STreehugger Robot    ;
80*16467b97STreehugger Robot
81*16467b97STreehugger Robotfragment EMPTY_ELEMENT
82*16467b97STreehugger Robot    : '<' WS? name=GENERIC_ID WS?
83*16467b97STreehugger Robot          {self.output("Empty Element: "+name.text)}
84*16467b97STreehugger Robot        ( ATTRIBUTE WS? )* '/>'
85*16467b97STreehugger Robot    ;
86*16467b97STreehugger Robot
87*16467b97STreehugger Robotfragment ATTRIBUTE
88*16467b97STreehugger Robot    : name=GENERIC_ID WS? '=' WS? value=VALUE
89*16467b97STreehugger Robot        {self.output("Attr: "+name.text+"="+value.text)}
90*16467b97STreehugger Robot    ;
91*16467b97STreehugger Robot
92*16467b97STreehugger Robotfragment END_TAG
93*16467b97STreehugger Robot    : '</' WS? name=GENERIC_ID WS? '>'
94*16467b97STreehugger Robot        {self.output("End Tag: "+name.text)}
95*16467b97STreehugger Robot    ;
96*16467b97STreehugger Robot
97*16467b97STreehugger Robotfragment COMMENT
98*16467b97STreehugger Robot	:	'<!--' (options {greedy=false;} : .)* '-->'
99*16467b97STreehugger Robot	;
100*16467b97STreehugger Robot
101*16467b97STreehugger Robotfragment CDATA
102*16467b97STreehugger Robot	:	'<![CDATA[' (options {greedy=false;} : .)* ']]>'
103*16467b97STreehugger Robot	;
104*16467b97STreehugger Robot
105*16467b97STreehugger Robotfragment PCDATA : (~'<')+ ;
106*16467b97STreehugger Robot
107*16467b97STreehugger Robotfragment VALUE :
108*16467b97STreehugger Robot        ( '\"' (~'\"')* '\"'
109*16467b97STreehugger Robot        | '\'' (~'\'')* '\''
110*16467b97STreehugger Robot        )
111*16467b97STreehugger Robot	;
112*16467b97STreehugger Robot
113*16467b97STreehugger Robotfragment GENERIC_ID
114*16467b97STreehugger Robot    : ( LETTER | '_' | ':')
115*16467b97STreehugger Robot        ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
116*16467b97STreehugger Robot	;
117*16467b97STreehugger Robot
118*16467b97STreehugger Robotfragment LETTER
119*16467b97STreehugger Robot	: 'a'..'z'
120*16467b97STreehugger Robot	| 'A'..'Z'
121*16467b97STreehugger Robot	;
122*16467b97STreehugger Robot
123*16467b97STreehugger Robotfragment WS  :
124*16467b97STreehugger Robot        (   ' '
125*16467b97STreehugger Robot        |   '\t'
126*16467b97STreehugger Robot        |  ( '\n'
127*16467b97STreehugger Robot            |	'\r\n'
128*16467b97STreehugger Robot            |	'\r'
129*16467b97STreehugger Robot            )
130*16467b97STreehugger Robot        )+
131*16467b97STreehugger Robot    ;
132*16467b97STreehugger Robot
133