xref: /aosp_15_r20/external/antlr/tool/src/main/antlr3/org/antlr/grammar/v3/AssignTokenTypesWalker.g (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1/*
2 [The "BSD license"]
3 Copyright (c) 2005-2011 Terence Parr
4 All rights reserved.
5
6 Grammar conversion to ANTLR v3:
7 Copyright (c) 2011 Sam Harwell
8 All rights reserved.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
12 are met:
13 1. Redistributions of source code must retain the above copyright
14	notice, this list of conditions and the following disclaimer.
15 2. Redistributions in binary form must reproduce the above copyright
16	notice, this list of conditions and the following disclaimer in the
17	documentation and/or other materials provided with the distribution.
18 3. The name of the author may not be used to endorse or promote products
19	derived from this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33/** [Warning: TJP says that this is probably out of date as of 11/19/2005,
34 *   but since it's probably still useful, I'll leave in.  Don't have energy
35 *   to update at the moment.]
36 *
37 *  Compute the token types for all literals and rules etc..  There are
38 *  a few different cases to consider for grammar types and a few situations
39 *  within.
40 *
41 *  CASE 1 : pure parser grammar
42 *	a) Any reference to a token gets a token type.
43 *  b) The tokens section may alias a token name to a string or char
44 *
45 *  CASE 2 : pure lexer grammar
46 *  a) Import token vocabulary if available. Set token types for any new tokens
47 *     to values above last imported token type
48 *  b) token rule definitions get token types if not already defined
49 *  c) literals do NOT get token types
50 *
51 *  CASE 3 : merged parser / lexer grammar
52 *	a) Any char or string literal gets a token type in a parser rule
53 *  b) Any reference to a token gets a token type if not referencing
54 *     a fragment lexer rule
55 *  c) The tokens section may alias a token name to a string or char
56 *     which must add a rule to the lexer
57 *  d) token rule definitions get token types if not already defined
58 *  e) token rule definitions may also alias a token name to a literal.
59 *     E.g., Rule 'FOR : "for";' will alias FOR to "for" in the sense that
60 *     references to either in the parser grammar will yield the token type
61 *
62 *  What this pass does:
63 *
64 *  0. Collects basic info about the grammar like grammar name and type;
65 *     Oh, I have go get the options in case they affect the token types.
66 *     E.g., tokenVocab option.
67 *     Imports any token vocab name/type pairs into a local hashtable.
68 *  1. Finds a list of all literals and token names.
69 *  2. Finds a list of all token name rule definitions;
70 *     no token rules implies pure parser.
71 *  3. Finds a list of all simple token rule defs of form "<NAME> : <literal>;"
72 *     and aliases them.
73 *  4. Walks token names table and assign types to any unassigned
74 *  5. Walks aliases and assign types to referenced literals
75 *  6. Walks literals, assigning types if untyped
76 *  4. Informs the Grammar object of the type definitions such as:
77 *     g.defineToken(<charliteral>, ttype);
78 *     g.defineToken(<stringliteral>, ttype);
79 *     g.defineToken(<tokenID>, ttype);
80 *     where some of the ttype values will be the same for aliases tokens.
81 */
82tree grammar AssignTokenTypesWalker;
83
84options
85{
86	language=Java;
87	tokenVocab = ANTLR;
88	ASTLabelType = GrammarAST;
89}
90
91@header {
92package org.antlr.grammar.v3;
93
94import java.util.*;
95import org.antlr.analysis.*;
96import org.antlr.misc.*;
97import org.antlr.tool.*;
98
99import org.antlr.runtime.BitSet;
100}
101
102@members {
103protected Grammar grammar;
104protected String currentRuleName;
105
106protected static GrammarAST stringAlias;
107protected static GrammarAST charAlias;
108protected static GrammarAST stringAlias2;
109protected static GrammarAST charAlias2;
110
111@Override
112public void reportError(RecognitionException ex)
113{
114    Token token = null;
115    if (ex instanceof MismatchedTokenException) {
116        token = ((MismatchedTokenException)ex).token;
117    } else if (ex instanceof NoViableAltException) {
118        token = ((NoViableAltException)ex).token;
119    }
120
121    ErrorManager.syntaxError(
122        ErrorManager.MSG_SYNTAX_ERROR,
123        grammar,
124        token,
125        "assign.types: " + ex.toString(),
126        ex);
127}
128
129protected void initASTPatterns()
130{
131    TreeAdaptor adaptor = new ANTLRParser.grammar_Adaptor(null);
132
133    /*
134     * stringAlias = ^(BLOCK[] ^(ALT[] STRING_LITERAL[] EOA[]) EOB[])
135     */
136    stringAlias = (GrammarAST)adaptor.create( BLOCK, "BLOCK" );
137    {
138        GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" );
139        adaptor.addChild( alt, adaptor.create( STRING_LITERAL, "STRING_LITERAL" ) );
140        adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) );
141        adaptor.addChild( stringAlias, alt );
142    }
143    adaptor.addChild( stringAlias, adaptor.create( EOB, "EOB" ) );
144
145    /*
146     * charAlias = ^(BLOCK[] ^(ALT[] CHAR_LITERAL[] EOA[]) EOB[])
147     */
148    charAlias = (GrammarAST)adaptor.create( BLOCK, "BLOCK" );
149    {
150        GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" );
151        adaptor.addChild( alt, adaptor.create( CHAR_LITERAL, "CHAR_LITERAL" ) );
152        adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) );
153        adaptor.addChild( charAlias, alt );
154    }
155    adaptor.addChild( charAlias, adaptor.create( EOB, "EOB" ) );
156
157    /*
158     * stringAlias2 = ^(BLOCK[] ^(ALT[] STRING_LITERAL[] ACTION[] EOA[]) EOB[])
159     */
160    stringAlias2 = (GrammarAST)adaptor.create( BLOCK, "BLOCK" );
161    {
162        GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" );
163        adaptor.addChild( alt, adaptor.create( STRING_LITERAL, "STRING_LITERAL" ) );
164        adaptor.addChild( alt, adaptor.create( ACTION, "ACTION" ) );
165        adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) );
166        adaptor.addChild( stringAlias2, alt );
167    }
168    adaptor.addChild( stringAlias2, adaptor.create( EOB, "EOB" ) );
169
170    /*
171     * charAlias = ^(BLOCK[] ^(ALT[] CHAR_LITERAL[] ACTION[] EOA[]) EOB[])
172     */
173    charAlias2 = (GrammarAST)adaptor.create( BLOCK, "BLOCK" );
174    {
175        GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" );
176        adaptor.addChild( alt, adaptor.create( CHAR_LITERAL, "CHAR_LITERAL" ) );
177        adaptor.addChild( alt, adaptor.create( ACTION, "ACTION" ) );
178        adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) );
179        adaptor.addChild( charAlias2, alt );
180    }
181    adaptor.addChild( charAlias2, adaptor.create( EOB, "EOB" ) );
182}
183
184// Behavior moved to AssignTokenTypesBehavior
185protected void trackString(GrammarAST t) {}
186protected void trackToken( GrammarAST t ) {}
187protected void trackTokenRule( GrammarAST t, GrammarAST modifier, GrammarAST block ) {}
188protected void alias( GrammarAST t, GrammarAST s ) {}
189public void defineTokens( Grammar root ) {}
190protected void defineStringLiteralsFromDelegates() {}
191protected void assignStringTypes( Grammar root ) {}
192protected void aliasTokenIDsAndLiterals( Grammar root ) {}
193protected void assignTokenIDTypes( Grammar root ) {}
194protected void defineTokenNamesAndLiteralsInGrammar( Grammar root ) {}
195protected void init( Grammar root ) {}
196}
197
198public
199grammar_[Grammar g]
200@init
201{
202	if ( state.backtracking == 0 )
203		init($g);
204}
205	:	(	^( LEXER_GRAMMAR 	  grammarSpec )
206		|	^( PARSER_GRAMMAR   grammarSpec )
207		|	^( TREE_GRAMMAR     grammarSpec )
208		|	^( COMBINED_GRAMMAR grammarSpec )
209		)
210	;
211
212grammarSpec
213	:	id=ID
214		(cmt=DOC_COMMENT)?
215		(optionsSpec)?
216		(delegateGrammars)?
217		(tokensSpec)?
218		(attrScope)*
219		( ^(AMPERSAND .*) )* // skip actions
220		rules
221	;
222
223attrScope
224	:	^( 'scope' ID ( ^(AMPERSAND .*) )* ACTION )
225	;
226
227optionsSpec returns [Map<Object, Object> opts = new HashMap<Object, Object>()]
228	:	^( OPTIONS (option[$opts])+ )
229	;
230
231option[Map<Object, Object> opts]
232	:	^( ASSIGN ID optionValue )
233		{
234			String key = $ID.text;
235			$opts.put(key, $optionValue.value);
236			// check for grammar-level option to import vocabulary
237			if ( currentRuleName==null && key.equals("tokenVocab") )
238			{
239				grammar.importTokenVocabulary($ID,(String)$optionValue.value);
240			}
241		}
242	;
243
244optionValue returns [Object value=null]
245@init
246{
247	if ( state.backtracking == 0 )
248		$value = $start.getText();
249}
250	:	ID
251	|	STRING_LITERAL
252	|	CHAR_LITERAL
253	|	INT
254		{$value = Integer.parseInt($INT.text);}
255//  |   cs=charSet       {$value = $cs;} // return set AST in this case
256	;
257
258charSet
259	:	^( CHARSET charSetElement )
260	;
261
262charSetElement
263	:	CHAR_LITERAL
264	|	^( OR CHAR_LITERAL CHAR_LITERAL )
265	|	^( RANGE CHAR_LITERAL CHAR_LITERAL )
266	;
267
268delegateGrammars
269	:	^(	'import'
270			(	^(ASSIGN ID ID)
271			|	ID
272			)+
273		)
274	;
275
276tokensSpec
277	:	^(TOKENS tokenSpec*)
278	;
279
280tokenSpec
281	:	t=TOKEN_REF            {trackToken($t);}
282	|	^(	ASSIGN
283			t2=TOKEN_REF       {trackToken($t2);}
284			( s=STRING_LITERAL {trackString($s); alias($t2,$s);}
285			| c=CHAR_LITERAL   {trackString($c); alias($t2,$c);}
286			)
287		)
288	;
289
290rules
291	:	rule+
292	;
293
294rule
295	:	^(RULE ruleBody)
296	|	^(PREC_RULE ruleBody)
297	;
298
299ruleBody
300	:	id=ID {currentRuleName=$id.text;}
301		(m=modifier)?
302		^(ARG (ARG_ACTION)?)
303		^(RET (ARG_ACTION)?)
304		(throwsSpec)?
305		(optionsSpec)?
306		(ruleScopeSpec)?
307		( ^(AMPERSAND .*) )*
308		b=block
309		(exceptionGroup)?
310		EOR
311		{trackTokenRule($id,$m.start,$b.start);}
312	;
313
314modifier
315	:	'protected'
316	|	'public'
317	|	'private'
318	|	'fragment'
319	;
320
321throwsSpec
322	:	^('throws' ID+)
323	;
324
325ruleScopeSpec
326	:	^( 'scope' ( ^(AMPERSAND .*) )* (ACTION)? ( ID )* )
327	;
328
329block
330	:	^(	BLOCK
331			(optionsSpec)?
332			( alternative rewrite )+
333			EOB
334		)
335	;
336
337alternative
338	:	^( ALT (element)+ EOA )
339	;
340
341exceptionGroup
342	:	( exceptionHandler )+ (finallyClause)?
343	|	finallyClause
344	;
345
346exceptionHandler
347	:	^('catch' ARG_ACTION ACTION)
348	;
349
350finallyClause
351	:	^('finally' ACTION)
352	;
353
354rewrite
355	:	^(REWRITES ( ^(REWRITE .*) )* )
356	|
357	;
358
359element
360	:	^(ROOT element)
361	|	^(BANG element)
362	|	atom
363	|	^(NOT element)
364	|	^(RANGE atom atom)
365	|	^(CHAR_RANGE atom atom)
366	|	^(ASSIGN ID element)
367	|	^(PLUS_ASSIGN ID element)
368	|	ebnf
369	|	tree_
370	|	^( SYNPRED block )
371	|	FORCED_ACTION
372	|	ACTION
373	|	SEMPRED
374	|	SYN_SEMPRED
375	|	^(BACKTRACK_SEMPRED .*)
376	|	GATED_SEMPRED
377	|	EPSILON
378	;
379
380ebnf
381	:	block
382	|	^( OPTIONAL block )
383	|	^( CLOSURE block )
384	|	^( POSITIVE_CLOSURE block )
385	;
386
387tree_
388	:	^(TREE_BEGIN element+)
389	;
390
391atom
392	:	^( RULE_REF (ARG_ACTION)? )
393	|	^( t=TOKEN_REF (ARG_ACTION )? ) {trackToken($t);}
394	|	c=CHAR_LITERAL   {trackString($c);}
395	|	s=STRING_LITERAL {trackString($s);}
396	|	WILDCARD
397	|	^(DOT ID atom) // scope override on rule
398	;
399
400ast_suffix
401	:	ROOT
402	|	BANG
403	;
404