xref: /aosp_15_r20/external/antlr/runtime/Cpp/tests/t012lexerXMLLexer.g (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robotlexer grammar t012lexerXMLLexer;
2*16467b97STreehugger Robotoptions {
3*16467b97STreehugger Robot  language =Cpp;
4*16467b97STreehugger Robot}
5*16467b97STreehugger Robot
6*16467b97STreehugger Robot@lexer::includes
7*16467b97STreehugger Robot{
8*16467b97STreehugger Robot#include "UserTestTraits.hpp"
9*16467b97STreehugger Robot#include <iostream>
10*16467b97STreehugger Robot}
11*16467b97STreehugger Robot@lexer::namespace
12*16467b97STreehugger Robot{ Antlr3Test }
13*16467b97STreehugger Robot
14*16467b97STreehugger Robot@lexer::context {
15*16467b97STreehugger RobotImplTraits::StringStreamType outbuf;
16*16467b97STreehugger Robot
17*16467b97STreehugger Robotvoid output(const char* line)
18*16467b97STreehugger Robot{
19*16467b97STreehugger Robot    outbuf << line << "\r\n";
20*16467b97STreehugger Robot}
21*16467b97STreehugger Robot
22*16467b97STreehugger Robotvoid output(const char* line1, const char *line2)
23*16467b97STreehugger Robot{
24*16467b97STreehugger Robot    outbuf << line1 << line2 << "\r\n";
25*16467b97STreehugger Robot}
26*16467b97STreehugger Robot
27*16467b97STreehugger Robotvoid output(const char* line1, ImplTraits::StringType const& line2)
28*16467b97STreehugger Robot{
29*16467b97STreehugger Robot    outbuf << line1 << line2 << "\r\n";
30*16467b97STreehugger Robot}
31*16467b97STreehugger Robot
32*16467b97STreehugger Robotvoid appendArribute(const char* prefix, ImplTraits::StringType const& name, ImplTraits::StringType const& value)
33*16467b97STreehugger Robot{
34*16467b97STreehugger Robot    outbuf << prefix << name << '=' << value << "\r\n";
35*16467b97STreehugger Robot}
36*16467b97STreehugger Robot
37*16467b97STreehugger Robotvoid appendString(const char* name, ImplTraits::StringType const& value)
38*16467b97STreehugger Robot{
39*16467b97STreehugger Robot    outbuf << name << '"' << value << '"' << "\r\n";
40*16467b97STreehugger Robot}
41*16467b97STreehugger Robot
42*16467b97STreehugger Robot}
43*16467b97STreehugger RobotDOCUMENT
44*16467b97STreehugger Robot    :  XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
45*16467b97STreehugger Robot    ;
46*16467b97STreehugger Robot
47*16467b97STreehugger Robotfragment DOCTYPE
48*16467b97STreehugger Robot    :
49*16467b97STreehugger Robot        '<!DOCTYPE' WS rootElementName=GENERIC_ID
50*16467b97STreehugger Robot        { output("ROOTELEMENT: ", $rootElementName.text);}
51*16467b97STreehugger Robot        WS
52*16467b97STreehugger Robot        (
53*16467b97STreehugger Robot            ( 'SYSTEM' WS sys1=VALUE
54*16467b97STreehugger Robot                {output("SYSTEM: ", $sys1.text);}
55*16467b97STreehugger Robot
56*16467b97STreehugger Robot            | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
57*16467b97STreehugger Robot                {output("PUBLIC: ", $pub.text);}
58*16467b97STreehugger Robot                {output("SYSTEM: ", $sys2.text);}
59*16467b97STreehugger Robot            )
60*16467b97STreehugger Robot            ( WS )?
61*16467b97STreehugger Robot        )?
62*16467b97STreehugger Robot        ( dtd=INTERNAL_DTD
63*16467b97STreehugger Robot            {output("INTERNAL DTD: ", $dtd.text);}
64*16467b97STreehugger Robot        )?
65*16467b97STreehugger Robot		'>'
66*16467b97STreehugger Robot	;
67*16467b97STreehugger Robot
68*16467b97STreehugger Robotfragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
69*16467b97STreehugger Robot
70*16467b97STreehugger Robotfragment PI :
71*16467b97STreehugger Robot        '<?' target=GENERIC_ID WS?
72*16467b97STreehugger Robot          {output("PI: ", $target.text);}
73*16467b97STreehugger Robot        ( ATTRIBUTE WS? )*  '?>'
74*16467b97STreehugger Robot	;
75*16467b97STreehugger Robot
76*16467b97STreehugger Robotfragment XMLDECL :
77*16467b97STreehugger Robot        '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
78*16467b97STreehugger Robot          {output("XML declaration");}
79*16467b97STreehugger Robot        ( ATTRIBUTE WS? )*  '?>'
80*16467b97STreehugger Robot	;
81*16467b97STreehugger Robot
82*16467b97STreehugger Robot
83*16467b97STreehugger Robotfragment ELEMENT
84*16467b97STreehugger Robot    : ( START_TAG
85*16467b97STreehugger Robot            (ELEMENT
86*16467b97STreehugger Robot            | t=PCDATA
87*16467b97STreehugger Robot                {appendString("PCDATA: ", $t.text);}
88*16467b97STreehugger Robot            | t=CDATA
89*16467b97STreehugger Robot                {appendString("CDATA: ", $t.text);}
90*16467b97STreehugger Robot            | t=COMMENT
91*16467b97STreehugger Robot                {appendString("Comment: ", $t.text);}
92*16467b97STreehugger Robot            | pi=PI
93*16467b97STreehugger Robot            )*
94*16467b97STreehugger Robot            END_TAG
95*16467b97STreehugger Robot        | EMPTY_ELEMENT
96*16467b97STreehugger Robot        )
97*16467b97STreehugger Robot    ;
98*16467b97STreehugger Robot
99*16467b97STreehugger Robotfragment START_TAG
100*16467b97STreehugger Robot    : '<' WS? name=GENERIC_ID WS?
101*16467b97STreehugger Robot          {output("Start Tag: ", $name.text);}
102*16467b97STreehugger Robot        ( ATTRIBUTE WS? )* '>'
103*16467b97STreehugger Robot    ;
104*16467b97STreehugger Robot
105*16467b97STreehugger Robotfragment EMPTY_ELEMENT
106*16467b97STreehugger Robot    : '<' WS? name=GENERIC_ID WS?
107*16467b97STreehugger Robot          {output("Empty Element: ", $name.text);}
108*16467b97STreehugger Robot        ( ATTRIBUTE WS? )* '/>'
109*16467b97STreehugger Robot    ;
110*16467b97STreehugger Robot
111*16467b97STreehugger Robotfragment ATTRIBUTE
112*16467b97STreehugger Robot    : name=GENERIC_ID WS? '=' WS? value=VALUE
113*16467b97STreehugger Robot        {appendArribute("Attr: ", $name.text, $value.text);}
114*16467b97STreehugger Robot    ;
115*16467b97STreehugger Robot
116*16467b97STreehugger Robotfragment END_TAG
117*16467b97STreehugger Robot    : '</' WS? name=GENERIC_ID WS? '>'
118*16467b97STreehugger Robot        {output("End Tag: ", $name.text);}
119*16467b97STreehugger Robot    ;
120*16467b97STreehugger Robot
121*16467b97STreehugger Robotfragment COMMENT
122*16467b97STreehugger Robot	:	'<!--' (options {greedy=false;} : .)* '-->'
123*16467b97STreehugger Robot	;
124*16467b97STreehugger Robot
125*16467b97STreehugger Robotfragment CDATA
126*16467b97STreehugger Robot	:	'<![CDATA[' (options {greedy=false;} : .)* ']]>'
127*16467b97STreehugger Robot	;
128*16467b97STreehugger Robot
129*16467b97STreehugger Robotfragment PCDATA : (~'<')+ ;
130*16467b97STreehugger Robot
131*16467b97STreehugger Robotfragment VALUE :
132*16467b97STreehugger Robot        ( '\"' (~'\"')* '\"'
133*16467b97STreehugger Robot        | '\'' (~'\'')* '\''
134*16467b97STreehugger Robot        )
135*16467b97STreehugger Robot	;
136*16467b97STreehugger Robot
137*16467b97STreehugger Robotfragment GENERIC_ID
138*16467b97STreehugger Robot    : ( LETTER | '_' | ':')
139*16467b97STreehugger Robot        ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
140*16467b97STreehugger Robot	;
141*16467b97STreehugger Robot
142*16467b97STreehugger Robotfragment LETTER
143*16467b97STreehugger Robot	: 'a'..'z'
144*16467b97STreehugger Robot	| 'A'..'Z'
145*16467b97STreehugger Robot	;
146*16467b97STreehugger Robot
147*16467b97STreehugger Robotfragment WS  :
148*16467b97STreehugger Robot        (   ' '
149*16467b97STreehugger Robot        |   '\t'
150*16467b97STreehugger Robot        |  ( '\n'
151*16467b97STreehugger Robot            |	'\r\n'
152*16467b97STreehugger Robot            |	'\r'
153*16467b97STreehugger Robot            )
154*16467b97STreehugger Robot        )+
155*16467b97STreehugger Robot    ;
156*16467b97STreehugger Robot
157