1/* 2 [The "BSD license"] 3 Copyright (c) 2005-2011 Terence Parr 4 All rights reserved. 5 6 Grammar conversion to ANTLR v3: 7 Copyright (c) 2011 Sam Harwell 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 2. Redistributions in binary form must reproduce the above copyright 16 notice, this list of conditions and the following disclaimer in the 17 documentation and/or other materials provided with the distribution. 18 3. The name of the author may not be used to endorse or promote products 19 derived from this software without specific prior written permission. 20 21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31*/ 32 33/** [Warning: TJP says that this is probably out of date as of 11/19/2005, 34 * but since it's probably still useful, I'll leave in. Don't have energy 35 * to update at the moment.] 36 * 37 * Compute the token types for all literals and rules etc.. There are 38 * a few different cases to consider for grammar types and a few situations 39 * within. 40 * 41 * CASE 1 : pure parser grammar 42 * a) Any reference to a token gets a token type. 43 * b) The tokens section may alias a token name to a string or char 44 * 45 * CASE 2 : pure lexer grammar 46 * a) Import token vocabulary if available. Set token types for any new tokens 47 * to values above last imported token type 48 * b) token rule definitions get token types if not already defined 49 * c) literals do NOT get token types 50 * 51 * CASE 3 : merged parser / lexer grammar 52 * a) Any char or string literal gets a token type in a parser rule 53 * b) Any reference to a token gets a token type if not referencing 54 * a fragment lexer rule 55 * c) The tokens section may alias a token name to a string or char 56 * which must add a rule to the lexer 57 * d) token rule definitions get token types if not already defined 58 * e) token rule definitions may also alias a token name to a literal. 59 * E.g., Rule 'FOR : "for";' will alias FOR to "for" in the sense that 60 * references to either in the parser grammar will yield the token type 61 * 62 * What this pass does: 63 * 64 * 0. Collects basic info about the grammar like grammar name and type; 65 * Oh, I have go get the options in case they affect the token types. 66 * E.g., tokenVocab option. 67 * Imports any token vocab name/type pairs into a local hashtable. 68 * 1. Finds a list of all literals and token names. 69 * 2. Finds a list of all token name rule definitions; 70 * no token rules implies pure parser. 71 * 3. Finds a list of all simple token rule defs of form "<NAME> : <literal>;" 72 * and aliases them. 73 * 4. Walks token names table and assign types to any unassigned 74 * 5. Walks aliases and assign types to referenced literals 75 * 6. Walks literals, assigning types if untyped 76 * 4. Informs the Grammar object of the type definitions such as: 77 * g.defineToken(<charliteral>, ttype); 78 * g.defineToken(<stringliteral>, ttype); 79 * g.defineToken(<tokenID>, ttype); 80 * where some of the ttype values will be the same for aliases tokens. 81 */ 82tree grammar AssignTokenTypesWalker; 83 84options 85{ 86 language=Java; 87 tokenVocab = ANTLR; 88 ASTLabelType = GrammarAST; 89} 90 91@header { 92package org.antlr.grammar.v3; 93 94import java.util.*; 95import org.antlr.analysis.*; 96import org.antlr.misc.*; 97import org.antlr.tool.*; 98 99import org.antlr.runtime.BitSet; 100} 101 102@members { 103protected Grammar grammar; 104protected String currentRuleName; 105 106protected static GrammarAST stringAlias; 107protected static GrammarAST charAlias; 108protected static GrammarAST stringAlias2; 109protected static GrammarAST charAlias2; 110 111@Override 112public void reportError(RecognitionException ex) 113{ 114 Token token = null; 115 if (ex instanceof MismatchedTokenException) { 116 token = ((MismatchedTokenException)ex).token; 117 } else if (ex instanceof NoViableAltException) { 118 token = ((NoViableAltException)ex).token; 119 } 120 121 ErrorManager.syntaxError( 122 ErrorManager.MSG_SYNTAX_ERROR, 123 grammar, 124 token, 125 "assign.types: " + ex.toString(), 126 ex); 127} 128 129protected void initASTPatterns() 130{ 131 TreeAdaptor adaptor = new ANTLRParser.grammar_Adaptor(null); 132 133 /* 134 * stringAlias = ^(BLOCK[] ^(ALT[] STRING_LITERAL[] EOA[]) EOB[]) 135 */ 136 stringAlias = (GrammarAST)adaptor.create( BLOCK, "BLOCK" ); 137 { 138 GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" ); 139 adaptor.addChild( alt, adaptor.create( STRING_LITERAL, "STRING_LITERAL" ) ); 140 adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) ); 141 adaptor.addChild( stringAlias, alt ); 142 } 143 adaptor.addChild( stringAlias, adaptor.create( EOB, "EOB" ) ); 144 145 /* 146 * charAlias = ^(BLOCK[] ^(ALT[] CHAR_LITERAL[] EOA[]) EOB[]) 147 */ 148 charAlias = (GrammarAST)adaptor.create( BLOCK, "BLOCK" ); 149 { 150 GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" ); 151 adaptor.addChild( alt, adaptor.create( CHAR_LITERAL, "CHAR_LITERAL" ) ); 152 adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) ); 153 adaptor.addChild( charAlias, alt ); 154 } 155 adaptor.addChild( charAlias, adaptor.create( EOB, "EOB" ) ); 156 157 /* 158 * stringAlias2 = ^(BLOCK[] ^(ALT[] STRING_LITERAL[] ACTION[] EOA[]) EOB[]) 159 */ 160 stringAlias2 = (GrammarAST)adaptor.create( BLOCK, "BLOCK" ); 161 { 162 GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" ); 163 adaptor.addChild( alt, adaptor.create( STRING_LITERAL, "STRING_LITERAL" ) ); 164 adaptor.addChild( alt, adaptor.create( ACTION, "ACTION" ) ); 165 adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) ); 166 adaptor.addChild( stringAlias2, alt ); 167 } 168 adaptor.addChild( stringAlias2, adaptor.create( EOB, "EOB" ) ); 169 170 /* 171 * charAlias = ^(BLOCK[] ^(ALT[] CHAR_LITERAL[] ACTION[] EOA[]) EOB[]) 172 */ 173 charAlias2 = (GrammarAST)adaptor.create( BLOCK, "BLOCK" ); 174 { 175 GrammarAST alt = (GrammarAST)adaptor.create( ALT, "ALT" ); 176 adaptor.addChild( alt, adaptor.create( CHAR_LITERAL, "CHAR_LITERAL" ) ); 177 adaptor.addChild( alt, adaptor.create( ACTION, "ACTION" ) ); 178 adaptor.addChild( alt, adaptor.create( EOA, "EOA" ) ); 179 adaptor.addChild( charAlias2, alt ); 180 } 181 adaptor.addChild( charAlias2, adaptor.create( EOB, "EOB" ) ); 182} 183 184// Behavior moved to AssignTokenTypesBehavior 185protected void trackString(GrammarAST t) {} 186protected void trackToken( GrammarAST t ) {} 187protected void trackTokenRule( GrammarAST t, GrammarAST modifier, GrammarAST block ) {} 188protected void alias( GrammarAST t, GrammarAST s ) {} 189public void defineTokens( Grammar root ) {} 190protected void defineStringLiteralsFromDelegates() {} 191protected void assignStringTypes( Grammar root ) {} 192protected void aliasTokenIDsAndLiterals( Grammar root ) {} 193protected void assignTokenIDTypes( Grammar root ) {} 194protected void defineTokenNamesAndLiteralsInGrammar( Grammar root ) {} 195protected void init( Grammar root ) {} 196} 197 198public 199grammar_[Grammar g] 200@init 201{ 202 if ( state.backtracking == 0 ) 203 init($g); 204} 205 : ( ^( LEXER_GRAMMAR grammarSpec ) 206 | ^( PARSER_GRAMMAR grammarSpec ) 207 | ^( TREE_GRAMMAR grammarSpec ) 208 | ^( COMBINED_GRAMMAR grammarSpec ) 209 ) 210 ; 211 212grammarSpec 213 : id=ID 214 (cmt=DOC_COMMENT)? 215 (optionsSpec)? 216 (delegateGrammars)? 217 (tokensSpec)? 218 (attrScope)* 219 ( ^(AMPERSAND .*) )* // skip actions 220 rules 221 ; 222 223attrScope 224 : ^( 'scope' ID ( ^(AMPERSAND .*) )* ACTION ) 225 ; 226 227optionsSpec returns [Map<Object, Object> opts = new HashMap<Object, Object>()] 228 : ^( OPTIONS (option[$opts])+ ) 229 ; 230 231option[Map<Object, Object> opts] 232 : ^( ASSIGN ID optionValue ) 233 { 234 String key = $ID.text; 235 $opts.put(key, $optionValue.value); 236 // check for grammar-level option to import vocabulary 237 if ( currentRuleName==null && key.equals("tokenVocab") ) 238 { 239 grammar.importTokenVocabulary($ID,(String)$optionValue.value); 240 } 241 } 242 ; 243 244optionValue returns [Object value=null] 245@init 246{ 247 if ( state.backtracking == 0 ) 248 $value = $start.getText(); 249} 250 : ID 251 | STRING_LITERAL 252 | CHAR_LITERAL 253 | INT 254 {$value = Integer.parseInt($INT.text);} 255// | cs=charSet {$value = $cs;} // return set AST in this case 256 ; 257 258charSet 259 : ^( CHARSET charSetElement ) 260 ; 261 262charSetElement 263 : CHAR_LITERAL 264 | ^( OR CHAR_LITERAL CHAR_LITERAL ) 265 | ^( RANGE CHAR_LITERAL CHAR_LITERAL ) 266 ; 267 268delegateGrammars 269 : ^( 'import' 270 ( ^(ASSIGN ID ID) 271 | ID 272 )+ 273 ) 274 ; 275 276tokensSpec 277 : ^(TOKENS tokenSpec*) 278 ; 279 280tokenSpec 281 : t=TOKEN_REF {trackToken($t);} 282 | ^( ASSIGN 283 t2=TOKEN_REF {trackToken($t2);} 284 ( s=STRING_LITERAL {trackString($s); alias($t2,$s);} 285 | c=CHAR_LITERAL {trackString($c); alias($t2,$c);} 286 ) 287 ) 288 ; 289 290rules 291 : rule+ 292 ; 293 294rule 295 : ^(RULE ruleBody) 296 | ^(PREC_RULE ruleBody) 297 ; 298 299ruleBody 300 : id=ID {currentRuleName=$id.text;} 301 (m=modifier)? 302 ^(ARG (ARG_ACTION)?) 303 ^(RET (ARG_ACTION)?) 304 (throwsSpec)? 305 (optionsSpec)? 306 (ruleScopeSpec)? 307 ( ^(AMPERSAND .*) )* 308 b=block 309 (exceptionGroup)? 310 EOR 311 {trackTokenRule($id,$m.start,$b.start);} 312 ; 313 314modifier 315 : 'protected' 316 | 'public' 317 | 'private' 318 | 'fragment' 319 ; 320 321throwsSpec 322 : ^('throws' ID+) 323 ; 324 325ruleScopeSpec 326 : ^( 'scope' ( ^(AMPERSAND .*) )* (ACTION)? ( ID )* ) 327 ; 328 329block 330 : ^( BLOCK 331 (optionsSpec)? 332 ( alternative rewrite )+ 333 EOB 334 ) 335 ; 336 337alternative 338 : ^( ALT (element)+ EOA ) 339 ; 340 341exceptionGroup 342 : ( exceptionHandler )+ (finallyClause)? 343 | finallyClause 344 ; 345 346exceptionHandler 347 : ^('catch' ARG_ACTION ACTION) 348 ; 349 350finallyClause 351 : ^('finally' ACTION) 352 ; 353 354rewrite 355 : ^(REWRITES ( ^(REWRITE .*) )* ) 356 | 357 ; 358 359element 360 : ^(ROOT element) 361 | ^(BANG element) 362 | atom 363 | ^(NOT element) 364 | ^(RANGE atom atom) 365 | ^(CHAR_RANGE atom atom) 366 | ^(ASSIGN ID element) 367 | ^(PLUS_ASSIGN ID element) 368 | ebnf 369 | tree_ 370 | ^( SYNPRED block ) 371 | FORCED_ACTION 372 | ACTION 373 | SEMPRED 374 | SYN_SEMPRED 375 | ^(BACKTRACK_SEMPRED .*) 376 | GATED_SEMPRED 377 | EPSILON 378 ; 379 380ebnf 381 : block 382 | ^( OPTIONAL block ) 383 | ^( CLOSURE block ) 384 | ^( POSITIVE_CLOSURE block ) 385 ; 386 387tree_ 388 : ^(TREE_BEGIN element+) 389 ; 390 391atom 392 : ^( RULE_REF (ARG_ACTION)? ) 393 | ^( t=TOKEN_REF (ARG_ACTION )? ) {trackToken($t);} 394 | c=CHAR_LITERAL {trackString($c);} 395 | s=STRING_LITERAL {trackString($s);} 396 | WILDCARD 397 | ^(DOT ID atom) // scope override on rule 398 ; 399 400ast_suffix 401 : ROOT 402 | BANG 403 ; 404