1/* 2 [The "BSD license"] 3 Copyright (c) 2005-2011 Terence Parr 4 All rights reserved. 5 6 Grammar conversion to ANTLR v3: 7 Copyright (c) 2011 Sam Harwell 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 2. Redistributions in binary form must reproduce the above copyright 16 notice, this list of conditions and the following disclaimer in the 17 documentation and/or other materials provided with the distribution. 18 3. The name of the author may not be used to endorse or promote products 19 derived from this software without specific prior written permission. 20 21 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31*/ 32tree grammar DefineGrammarItemsWalker; 33 34options { 35 language=Java; 36 tokenVocab = ANTLR; 37 ASTLabelType = GrammarAST; 38} 39 40scope AttributeScopeActions { 41 HashMap<GrammarAST, GrammarAST> actions; 42} 43 44@header { 45package org.antlr.grammar.v3; 46import org.antlr.tool.*; 47import java.util.HashSet; 48import java.util.Set; 49} 50 51@members { 52protected Grammar grammar; 53protected GrammarAST root; 54protected String currentRuleName; 55protected GrammarAST currentRewriteBlock; 56protected GrammarAST currentRewriteRule; 57protected int outerAltNum = 0; 58protected int blockLevel = 0; 59 60public final int countAltsForRule( CommonTree t ) { 61 CommonTree block = (CommonTree)t.getFirstChildWithType(BLOCK); 62 int altCount = 0; 63 for (int i = 0; i < block.getChildCount(); i++) { 64 if (block.getChild(i).getType() == ALT) 65 altCount++; 66 } 67 return altCount; 68} 69 70protected final void finish() { 71 trimGrammar(); 72} 73 74/** Remove any lexer rules from a COMBINED; already passed to lexer */ 75protected final void trimGrammar() { 76 if ( grammar.type != Grammar.COMBINED ) { 77 return; 78 } 79 // form is (header ... ) ( grammar ID (scope ...) ... ( rule ... ) ( rule ... ) ... ) 80 GrammarAST p = root; 81 // find the grammar spec 82 while ( !p.getText().equals( "grammar" ) ) { 83 p = p.getNextSibling(); 84 } 85 for ( int i = 0; i < p.getChildCount(); i++ ) { 86 if ( p.getChild( i ).getType() != RULE ) 87 continue; 88 89 String ruleName = p.getChild(i).getChild(0).getText(); 90 //Console.Out.WriteLine( "rule " + ruleName + " prev=" + prev.getText() ); 91 if (Rule.getRuleType(ruleName) == Grammar.LEXER) { 92 // remove lexer rule 93 p.deleteChild( i ); 94 i--; 95 } 96 } 97 //Console.Out.WriteLine( "root after removal is: " + root.ToStringList() ); 98} 99 100protected final void trackInlineAction( GrammarAST actionAST ) { 101 Rule r = grammar.getRule( currentRuleName ); 102 if ( r != null ) { 103 r.trackInlineAction( actionAST ); 104 } 105} 106} 107 108public 109grammar_[Grammar g] 110@init 111{ 112grammar = $g; 113root = $start; 114} 115@after 116{ 117finish(); 118} 119 : ^( LEXER_GRAMMAR {grammar.type = Grammar.LEXER;} grammarSpec ) 120 | ^( PARSER_GRAMMAR {grammar.type = Grammar.PARSER;} grammarSpec ) 121 | ^( TREE_GRAMMAR {grammar.type = Grammar.TREE_PARSER;} grammarSpec ) 122 | ^( COMBINED_GRAMMAR {grammar.type = Grammar.COMBINED;} grammarSpec ) 123 ; 124 125attrScope 126scope AttributeScopeActions; 127@init 128{ 129 $AttributeScopeActions::actions = new HashMap<GrammarAST, GrammarAST>(); 130} 131 : ^( 'scope' name=ID attrScopeAction* attrs=ACTION ) 132 { 133 AttributeScope scope = grammar.defineGlobalScope($name.text,$attrs.getToken()); 134 scope.isDynamicGlobalScope = true; 135 scope.addAttributes($attrs.text, ';'); 136 for (GrammarAST action : $AttributeScopeActions::actions.keySet()) 137 scope.defineNamedAction(action, $AttributeScopeActions::actions.get(action)); 138 } 139 ; 140 141attrScopeAction 142 : ^(AMPERSAND ID ACTION) 143 { 144 $AttributeScopeActions::actions.put( $ID, $ACTION ); 145 } 146 ; 147 148grammarSpec 149 : id=ID 150 (cmt=DOC_COMMENT)? 151 ( optionsSpec )? 152 (delegateGrammars)? 153 (tokensSpec)? 154 (attrScope)* 155 (actions)? 156 rules 157 ; 158 159actions 160 : ( action )+ 161 ; 162 163action 164@init 165{ 166 String scope=null; 167 GrammarAST nameAST=null, actionAST=null; 168} 169 : ^(amp=AMPERSAND id1=ID 170 ( id2=ID a1=ACTION 171 {scope=$id1.text; nameAST=$id2; actionAST=$a1;} 172 | a2=ACTION 173 {scope=null; nameAST=$id1; actionAST=$a2;} 174 ) 175 ) 176 { 177 grammar.defineNamedAction($amp,scope,nameAST,actionAST); 178 } 179 ; 180 181optionsSpec 182 : ^(OPTIONS .*) 183 ; 184 185delegateGrammars 186 : ^( 'import' ( ^(ASSIGN ID ID) | ID )+ ) 187 ; 188 189tokensSpec 190 : ^(TOKENS tokenSpec*) 191 ; 192 193tokenSpec 194 : t=TOKEN_REF 195 | ^( ASSIGN 196 TOKEN_REF 197 ( STRING_LITERAL 198 | CHAR_LITERAL 199 ) 200 ) 201 ; 202 203rules 204 : (rule | ^(PREC_RULE .*))+ 205 ; 206 207rule 208@init 209{ 210 String name=null; 211 Map<String, Object> opts=null; 212 Rule r = null; 213} 214 : ^( RULE id=ID {opts = $RULE.getBlockOptions();} 215 (modifier)? 216 ^( ARG (args=ARG_ACTION)? ) 217 ^( RET (ret=ARG_ACTION)? ) 218 (throwsSpec)? 219 (optionsSpec)? 220 { 221 name = $id.text; 222 currentRuleName = name; 223 if ( Rule.getRuleType(name) == Grammar.LEXER && grammar.type==Grammar.COMBINED ) 224 { 225 // a merged grammar spec, track lexer rules and send to another grammar 226 grammar.defineLexerRuleFoundInParser($id.getToken(), $start); 227 } 228 else 229 { 230 int numAlts = countAltsForRule($start); 231 grammar.defineRule($id.getToken(), $modifier.mod, opts, $start, $args, numAlts); 232 r = grammar.getRule(name); 233 if ( $args!=null ) 234 { 235 r.parameterScope = grammar.createParameterScope(name,$args.getToken()); 236 r.parameterScope.addAttributes($args.text, ','); 237 } 238 if ( $ret!=null ) 239 { 240 r.returnScope = grammar.createReturnScope(name,$ret.getToken()); 241 r.returnScope.addAttributes($ret.text, ','); 242 } 243 if ( $throwsSpec.exceptions != null ) 244 { 245 for (String exception : $throwsSpec.exceptions) 246 r.throwsSpec.add( exception ); 247 } 248 } 249 } 250 (ruleScopeSpec[r])? 251 (ruleAction[r])* 252 { this.blockLevel=0; } 253 b=block 254 (exceptionGroup)? 255 EOR 256 { 257 // copy rule options into the block AST, which is where 258 // the analysis will look for k option etc... 259 $b.start.setBlockOptions(opts); 260 } 261 ) 262 ; 263 264ruleAction[Rule r] 265 : ^(amp=AMPERSAND id=ID a=ACTION ) {if (r!=null) r.defineNamedAction($amp,$id,$a);} 266 ; 267 268modifier returns [String mod] 269@init 270{ 271 $mod = $start.getToken().getText(); 272} 273 : 'protected' 274 | 'public' 275 | 'private' 276 | 'fragment' 277 ; 278 279throwsSpec returns [HashSet<String> exceptions] 280@init 281{ 282 $exceptions = new HashSet<String>(); 283} 284 : ^('throws' (ID {$exceptions.add($ID.text);})+ ) 285 ; 286 287ruleScopeSpec[Rule r] 288scope AttributeScopeActions; 289@init 290{ 291 $AttributeScopeActions::actions = new HashMap<GrammarAST, GrammarAST>(); 292} 293 : ^( 'scope' 294 ( attrScopeAction* attrs=ACTION 295 { 296 r.ruleScope = grammar.createRuleScope(r.name,$attrs.getToken()); 297 r.ruleScope.isDynamicRuleScope = true; 298 r.ruleScope.addAttributes($attrs.text, ';'); 299 for (GrammarAST action : $AttributeScopeActions::actions.keySet()) 300 r.ruleScope.defineNamedAction(action, $AttributeScopeActions::actions.get(action)); 301 } 302 )? 303 ( uses=ID 304 { 305 if ( grammar.getGlobalScope($uses.text)==null ) { 306 ErrorManager.grammarError(ErrorManager.MSG_UNKNOWN_DYNAMIC_SCOPE, 307 grammar, 308 $uses.getToken(), 309 $uses.text); 310 } 311 else { 312 if ( r.useScopes==null ) {r.useScopes=new ArrayList<String>();} 313 r.useScopes.add($uses.text); 314 } 315 } 316 )* 317 ) 318 ; 319 320block 321@init 322{ 323 // must run during backtracking 324 this.blockLevel++; 325 if ( blockLevel == 1 ) 326 this.outerAltNum=1; 327} 328 : ^( BLOCK 329 (optionsSpec)? 330 (blockAction)* 331 ( alternative rewrite 332 {{ 333 if ( this.blockLevel == 1 ) 334 this.outerAltNum++; 335 }} 336 )+ 337 EOB 338 ) 339 ; 340finally { blockLevel--; } 341 342// TODO: this does nothing now! subrules cannot have init actions. :( 343blockAction 344 : ^(amp=AMPERSAND id=ID a=ACTION ) // {r.defineAction(#amp,#id,#a);} 345 ; 346 347alternative 348//@init 349//{ 350// if ( state.backtracking == 0 ) 351// { 352// if ( grammar.type!=Grammar.LEXER && grammar.GetOption("output")!=null && blockLevel==1 ) 353// { 354// GrammarAST aRewriteNode = $start.FindFirstType(REWRITE); // alt itself has rewrite? 355// GrammarAST rewriteAST = (GrammarAST)$start.Parent.getChild($start.ChildIndex + 1); 356// // we have a rewrite if alt uses it inside subrule or this alt has one 357// // but don't count -> ... rewrites, which mean "do default auto construction" 358// if ( aRewriteNode!=null|| 359// (firstRewriteAST!=null && 360// firstRewriteAST.getType()==REWRITE && 361// firstRewriteAST.getChild(0)!=null && 362// firstRewriteAST.getChild(0).getType()!=ETC) ) 363// { 364// Rule r = grammar.getRule(currentRuleName); 365// r.TrackAltsWithRewrites($start,this.outerAltNum); 366// } 367// } 368// } 369//} 370 : ^( ALT (element)+ EOA ) 371 ; 372 373exceptionGroup 374 : ( exceptionHandler )+ (finallyClause)? 375 | finallyClause 376 ; 377 378exceptionHandler 379 : ^('catch' ARG_ACTION ACTION) {trackInlineAction($ACTION);} 380 ; 381 382finallyClause 383 : ^('finally' ACTION) {trackInlineAction($ACTION);} 384 ; 385 386element 387 : ^(ROOT element) 388 | ^(BANG element) 389 | atom[null] 390 | ^(NOT element) 391 | ^(RANGE atom[null] atom[null]) 392 | ^(CHAR_RANGE atom[null] atom[null]) 393 | ^( ASSIGN id=ID el=element) 394 { 395 GrammarAST e = $el.start; 396 if ( e.getType()==ANTLRParser.ROOT || e.getType()==ANTLRParser.BANG ) 397 { 398 e = (GrammarAST)e.getChild(0); 399 } 400 if ( e.getType()==RULE_REF) 401 { 402 grammar.defineRuleRefLabel(currentRuleName,$id.getToken(),e); 403 } 404 else if ( e.getType()==WILDCARD && grammar.type==Grammar.TREE_PARSER ) 405 { 406 grammar.defineWildcardTreeLabel(currentRuleName,$id.getToken(),e); 407 } 408 else 409 { 410 grammar.defineTokenRefLabel(currentRuleName,$id.getToken(),e); 411 } 412 } 413 | ^( PLUS_ASSIGN id2=ID a2=element 414 { 415 GrammarAST a = $a2.start; 416 if ( a.getType()==ANTLRParser.ROOT || a.getType()==ANTLRParser.BANG ) 417 { 418 a = (GrammarAST)a.getChild(0); 419 } 420 if ( a.getType()==RULE_REF ) 421 { 422 grammar.defineRuleListLabel(currentRuleName,$id2.getToken(),a); 423 } 424 else if ( a.getType() == WILDCARD && grammar.type == Grammar.TREE_PARSER ) 425 { 426 grammar.defineWildcardTreeListLabel( currentRuleName, $id2.getToken(), a ); 427 } 428 else 429 { 430 grammar.defineTokenListLabel(currentRuleName,$id2.getToken(),a); 431 } 432 } 433 ) 434 | ebnf 435 | tree_ 436 | ^( SYNPRED block ) 437 | act=ACTION 438 { 439 $act.outerAltNum = this.outerAltNum; 440 trackInlineAction($act); 441 } 442 | act2=FORCED_ACTION 443 { 444 $act2.outerAltNum = this.outerAltNum; 445 trackInlineAction($act2); 446 } 447 | SEMPRED 448 { 449 $SEMPRED.outerAltNum = this.outerAltNum; 450 trackInlineAction($SEMPRED); 451 } 452 | SYN_SEMPRED 453 | ^(BACKTRACK_SEMPRED .*) 454 | GATED_SEMPRED 455 { 456 $GATED_SEMPRED.outerAltNum = this.outerAltNum; 457 trackInlineAction($GATED_SEMPRED); 458 } 459 | EPSILON 460 ; 461 462ebnf 463 : (dotLoop) => dotLoop // .* or .+ 464 | block 465 | ^( OPTIONAL block ) 466 | ^( CLOSURE block ) 467 | ^( POSITIVE_CLOSURE block ) 468 ; 469 470/** Track the .* and .+ idioms and make them nongreedy by default. 471 */ 472dotLoop 473 : ( ^( CLOSURE dotBlock ) 474 | ^( POSITIVE_CLOSURE dotBlock ) 475 ) 476 { 477 GrammarAST block = (GrammarAST)$start.getChild(0); 478 Map<String, Object> opts = new HashMap<String, Object>(); 479 opts.put("greedy", "false"); 480 if ( grammar.type!=Grammar.LEXER ) 481 { 482 // parser grammars assume k=1 for .* loops 483 // otherwise they (analysis?) look til EOF! 484 opts.put("k", 1); 485 } 486 block.setOptions(grammar,opts); 487 } 488 ; 489 490dotBlock 491 : ^( BLOCK ^( ALT WILDCARD EOA ) EOB ) 492 ; 493 494tree_ 495 : ^(TREE_BEGIN element+) 496 ; 497 498atom[GrammarAST scope_] 499 : ^( rr=RULE_REF (rarg=ARG_ACTION)? ) 500 { 501 grammar.altReferencesRule( currentRuleName, $scope_, $rr, this.outerAltNum ); 502 if ( $rarg != null ) 503 { 504 $rarg.outerAltNum = this.outerAltNum; 505 trackInlineAction($rarg); 506 } 507 } 508 | ^( t=TOKEN_REF (targ=ARG_ACTION )? ) 509 { 510 if ( $targ != null ) 511 { 512 $targ.outerAltNum = this.outerAltNum; 513 trackInlineAction($targ); 514 } 515 if ( grammar.type == Grammar.LEXER ) 516 { 517 grammar.altReferencesRule( currentRuleName, $scope_, $t, this.outerAltNum ); 518 } 519 else 520 { 521 grammar.altReferencesTokenID( currentRuleName, $t, this.outerAltNum ); 522 } 523 } 524 | c=CHAR_LITERAL 525 { 526 if ( grammar.type != Grammar.LEXER ) 527 { 528 Rule rule = grammar.getRule(currentRuleName); 529 if ( rule != null ) 530 rule.trackTokenReferenceInAlt($c, outerAltNum); 531 } 532 } 533 | s=STRING_LITERAL 534 { 535 if ( grammar.type != Grammar.LEXER ) 536 { 537 Rule rule = grammar.getRule(currentRuleName); 538 if ( rule!=null ) 539 rule.trackTokenReferenceInAlt($s, outerAltNum); 540 } 541 } 542 | WILDCARD 543 | ^(DOT ID atom[$ID]) // scope override on rule 544 ; 545 546ast_suffix 547 : ROOT 548 | BANG 549 ; 550 551rewrite 552@init 553{ 554 // track top level REWRITES node, store stuff there 555 currentRewriteRule = $start; // has to execute during backtracking 556 if ( state.backtracking == 0 ) 557 { 558 if ( grammar.buildAST() ) 559 currentRewriteRule.rewriteRefsDeep = new HashSet<GrammarAST>(); 560 } 561} 562 : ^( REWRITES 563 ( ^( REWRITE (pred=SEMPRED)? rewrite_alternative ) 564 { 565 if ( $pred != null ) 566 { 567 $pred.outerAltNum = this.outerAltNum; 568 trackInlineAction($pred); 569 } 570 } 571 )* 572 ) 573 //{System.out.println("-> refs = "+currentRewriteRule.rewriteRefsDeep);} 574 | 575 ; 576 577rewrite_block 578@init 579{ 580 GrammarAST enclosingBlock = currentRewriteBlock; 581 if ( state.backtracking == 0 ) 582 { 583 // don't do if guessing 584 currentRewriteBlock=$start; // pts to BLOCK node 585 currentRewriteBlock.rewriteRefsShallow = new HashSet<GrammarAST>(); 586 currentRewriteBlock.rewriteRefsDeep = new HashSet<GrammarAST>(); 587 } 588} 589 : ^( BLOCK rewrite_alternative EOB ) 590 //{System.out.println("atoms="+currentRewriteBlock.rewriteRefs);} 591 { 592 // copy the element refs in this block to the surrounding block 593 if ( enclosingBlock != null ) 594 { 595 for (GrammarAST item : currentRewriteBlock.rewriteRefsShallow) 596 enclosingBlock.rewriteRefsDeep.add( item ); 597 } 598 //currentRewriteBlock = enclosingBlock; // restore old BLOCK ptr 599 } 600 ; 601finally { currentRewriteBlock = enclosingBlock; } 602 603rewrite_alternative 604 : {grammar.buildAST()}? => ^( a=ALT ( ( rewrite_element )+ | EPSILON ) EOA ) 605 | {grammar.buildTemplate()}? => rewrite_template 606 | ETC {this.blockLevel==1}? // only valid as outermost rewrite 607 ; 608 609rewrite_element 610 : rewrite_atom 611 | rewrite_ebnf 612 | rewrite_tree 613 ; 614 615rewrite_ebnf 616 : ^( OPTIONAL rewrite_block ) 617 | ^( CLOSURE rewrite_block ) 618 | ^( POSITIVE_CLOSURE rewrite_block ) 619 ; 620 621rewrite_tree 622 : ^( TREE_BEGIN rewrite_atom ( rewrite_element )* ) 623 ; 624 625rewrite_atom 626@init 627{ 628 if ( state.backtracking == 0 ) 629 { 630 Rule r = grammar.getRule(currentRuleName); 631 Set<String> tokenRefsInAlt = r.getTokenRefsInAlt(outerAltNum); 632 boolean imaginary = 633 $start.getType()==TOKEN_REF && 634 !tokenRefsInAlt.contains($start.getText()); 635 if ( !imaginary && grammar.buildAST() && 636 ($start.getType()==RULE_REF || 637 $start.getType()==LABEL || 638 $start.getType()==TOKEN_REF || 639 $start.getType()==CHAR_LITERAL || 640 $start.getType()==STRING_LITERAL) ) 641 { 642 // track per block and for entire rewrite rule 643 if ( currentRewriteBlock!=null ) 644 { 645 currentRewriteBlock.rewriteRefsShallow.add($start); 646 currentRewriteBlock.rewriteRefsDeep.add($start); 647 } 648 649 //System.out.println("adding "+$start.getText()+" to "+currentRewriteRule.getText()); 650 currentRewriteRule.rewriteRefsDeep.add($start); 651 } 652 } 653} 654 : RULE_REF 655 | ( ^( TOKEN_REF 656 ( ARG_ACTION 657 { 658 $ARG_ACTION.outerAltNum = this.outerAltNum; 659 trackInlineAction($ARG_ACTION); 660 } 661 )? 662 ) 663 | CHAR_LITERAL 664 | STRING_LITERAL 665 ) 666 | LABEL 667 | ACTION 668 { 669 $ACTION.outerAltNum = this.outerAltNum; 670 trackInlineAction($ACTION); 671 } 672 ; 673 674rewrite_template 675 : ^( ALT EPSILON EOA ) 676 | ^( TEMPLATE (id=ID|ind=ACTION) 677 ^( ARGLIST 678 ( ^( ARG arg=ID a=ACTION ) 679 { 680 $a.outerAltNum = this.outerAltNum; 681 trackInlineAction($a); 682 } 683 )* 684 ) 685 { 686 if ( $ind!=null ) 687 { 688 $ind.outerAltNum = this.outerAltNum; 689 trackInlineAction($ind); 690 } 691 } 692 ( DOUBLE_QUOTE_STRING_LITERAL 693 | DOUBLE_ANGLE_STRING_LITERAL 694 )? 695 ) 696 | act=ACTION 697 { 698 $act.outerAltNum = this.outerAltNum; 699 trackInlineAction($act); 700 } 701 ; 702