1# ----------------------------------------------------------------------------- 2# ply: yacc.py 3# 4# Copyright (C) 2001-2011, 5# David M. Beazley (Dabeaz LLC) 6# All rights reserved. 7# 8# Redistribution and use in source and binary forms, with or without 9# modification, are permitted provided that the following conditions are 10# met: 11# 12# * Redistributions of source code must retain the above copyright notice, 13# this list of conditions and the following disclaimer. 14# * Redistributions in binary form must reproduce the above copyright notice, 15# this list of conditions and the following disclaimer in the documentation 16# and/or other materials provided with the distribution. 17# * Neither the name of the David Beazley or Dabeaz LLC may be used to 18# endorse or promote products derived from this software without 19# specific prior written permission. 20# 21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32# ----------------------------------------------------------------------------- 33# 34# This implements an LR parser that is constructed from grammar rules defined 35# as Python functions. The grammer is specified by supplying the BNF inside 36# Python documentation strings. The inspiration for this technique was borrowed 37# from John Aycock's Spark parsing system. PLY might be viewed as cross between 38# Spark and the GNU bison utility. 39# 40# The current implementation is only somewhat object-oriented. The 41# LR parser itself is defined in terms of an object (which allows multiple 42# parsers to co-exist). However, most of the variables used during table 43# construction are defined in terms of global variables. Users shouldn't 44# notice unless they are trying to define multiple parsers at the same 45# time using threads (in which case they should have their head examined). 46# 47# This implementation supports both SLR and LALR(1) parsing. LALR(1) 48# support was originally implemented by Elias Ioup ([email protected]), 49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 51# by the more efficient DeRemer and Pennello algorithm. 52# 53# :::::::: WARNING ::::::: 54# 55# Construction of LR parsing tables is fairly complicated and expensive. 56# To make this module run fast, a *LOT* of work has been put into 57# optimization---often at the expensive of readability and what might 58# consider to be good Python "coding style." Modify the code at your 59# own risk! 60# ---------------------------------------------------------------------------- 61 62__version__ = "3.4" 63__tabversion__ = "3.2" # Table version 64 65#----------------------------------------------------------------------------- 66# === User configurable parameters === 67# 68# Change these to modify the default behavior of yacc (if you wish) 69#----------------------------------------------------------------------------- 70 71yaccdebug = 1 # Debugging mode. If set, yacc generates a 72 # a 'parser.out' file in the current directory 73 74debug_file = 'parser.out' # Default name of the debugging file 75tab_module = 'parsetab' # Default name of the table module 76default_lr = 'LALR' # Default LR table generation method 77 78error_count = 3 # Number of symbols that must be shifted to leave recovery mode 79 80yaccdevel = 0 # Set to True if developing yacc. This turns off optimized 81 # implementations of certain functions. 82 83resultlimit = 40 # Size limit of results when running in debug mode. 84 85pickle_protocol = 0 # Protocol to use when writing pickle files 86 87import re, types, sys, os.path 88 89# Compatibility function for python 2.6/3.0 90if sys.version_info[0] < 3: 91 def func_code(f): 92 return f.func_code 93else: 94 def func_code(f): 95 return f.__code__ 96 97# Compatibility 98try: 99 MAXINT = sys.maxint 100except AttributeError: 101 MAXINT = sys.maxsize 102 103# Python 2.x/3.0 compatibility. 104def load_ply_lex(): 105 if sys.version_info[0] < 3: 106 import lex 107 else: 108 import ply.lex as lex 109 return lex 110 111# This object is a stand-in for a logging object created by the 112# logging module. PLY will use this by default to create things 113# such as the parser.out file. If a user wants more detailed 114# information, they can create their own logging object and pass 115# it into PLY. 116 117class PlyLogger(object): 118 def __init__(self,f): 119 self.f = f 120 def debug(self,msg,*args,**kwargs): 121 self.f.write((msg % args) + "\n") 122 info = debug 123 124 def warning(self,msg,*args,**kwargs): 125 self.f.write("WARNING: "+ (msg % args) + "\n") 126 127 def error(self,msg,*args,**kwargs): 128 self.f.write("ERROR: " + (msg % args) + "\n") 129 130 critical = debug 131 132# Null logger is used when no output is generated. Does nothing. 133class NullLogger(object): 134 def __getattribute__(self,name): 135 return self 136 def __call__(self,*args,**kwargs): 137 return self 138 139# Exception raised for yacc-related errors 140class YaccError(Exception): pass 141 142# Format the result message that the parser produces when running in debug mode. 143def format_result(r): 144 repr_str = repr(r) 145 if '\n' in repr_str: repr_str = repr(repr_str) 146 if len(repr_str) > resultlimit: 147 repr_str = repr_str[:resultlimit]+" ..." 148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) 149 return result 150 151 152# Format stack entries when the parser is running in debug mode 153def format_stack_entry(r): 154 repr_str = repr(r) 155 if '\n' in repr_str: repr_str = repr(repr_str) 156 if len(repr_str) < 16: 157 return repr_str 158 else: 159 return "<%s @ 0x%x>" % (type(r).__name__,id(r)) 160 161#----------------------------------------------------------------------------- 162# === LR Parsing Engine === 163# 164# The following classes are used for the LR parser itself. These are not 165# used during table construction and are independent of the actual LR 166# table generation algorithm 167#----------------------------------------------------------------------------- 168 169# This class is used to hold non-terminal grammar symbols during parsing. 170# It normally has the following attributes set: 171# .type = Grammar symbol type 172# .value = Symbol value 173# .lineno = Starting line number 174# .endlineno = Ending line number (optional, set automatically) 175# .lexpos = Starting lex position 176# .endlexpos = Ending lex position (optional, set automatically) 177 178class YaccSymbol: 179 def __str__(self): return self.type 180 def __repr__(self): return str(self) 181 182# This class is a wrapper around the objects actually passed to each 183# grammar rule. Index lookup and assignment actually assign the 184# .value attribute of the underlying YaccSymbol object. 185# The lineno() method returns the line number of a given 186# item (or 0 if not defined). The linespan() method returns 187# a tuple of (startline,endline) representing the range of lines 188# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 189# representing the range of positional information for a symbol. 190 191class YaccProduction: 192 def __init__(self,s,stack=None): 193 self.slice = s 194 self.stack = stack 195 self.lexer = None 196 self.parser= None 197 def __getitem__(self,n): 198 if type(n) is slice: 199 return [s.value for s in self.slice[n]] 200 else: 201 if n >= 0: return self.slice[n].value 202 else: return self.stack[n].value 203 204 def __setitem__(self,n,v): 205 self.slice[n].value = v 206 207 def __getslice__(self,i,j): 208 return [s.value for s in self.slice[i:j]] 209 210 def __len__(self): 211 return len(self.slice) 212 213 def lineno(self,n): 214 return getattr(self.slice[n],"lineno",0) 215 216 def set_lineno(self,n,lineno): 217 self.slice[n].lineno = lineno 218 219 def linespan(self,n): 220 startline = getattr(self.slice[n],"lineno",0) 221 endline = getattr(self.slice[n],"endlineno",startline) 222 return startline,endline 223 224 def lexpos(self,n): 225 return getattr(self.slice[n],"lexpos",0) 226 227 def lexspan(self,n): 228 startpos = getattr(self.slice[n],"lexpos",0) 229 endpos = getattr(self.slice[n],"endlexpos",startpos) 230 return startpos,endpos 231 232 def error(self): 233 raise SyntaxError 234 235 236# ----------------------------------------------------------------------------- 237# == LRParser == 238# 239# The LR Parsing engine. 240# ----------------------------------------------------------------------------- 241 242class LRParser: 243 def __init__(self,lrtab,errorf): 244 self.productions = lrtab.lr_productions 245 self.action = lrtab.lr_action 246 self.goto = lrtab.lr_goto 247 self.errorfunc = errorf 248 249 def errok(self): 250 self.errorok = 1 251 252 def restart(self): 253 del self.statestack[:] 254 del self.symstack[:] 255 sym = YaccSymbol() 256 sym.type = '$end' 257 self.symstack.append(sym) 258 self.statestack.append(0) 259 260 def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 261 if debug or yaccdevel: 262 if isinstance(debug,int): 263 debug = PlyLogger(sys.stderr) 264 return self.parsedebug(input,lexer,debug,tracking,tokenfunc) 265 elif tracking: 266 return self.parseopt(input,lexer,debug,tracking,tokenfunc) 267 else: 268 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) 269 270 271 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 272 # parsedebug(). 273 # 274 # This is the debugging enabled version of parse(). All changes made to the 275 # parsing engine should be made here. For the non-debugging version, 276 # copy this code to a method parseopt() and delete all of the sections 277 # enclosed in: 278 # 279 # #--! DEBUG 280 # statements 281 # #--! DEBUG 282 # 283 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 284 285 def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): 286 lookahead = None # Current lookahead symbol 287 lookaheadstack = [ ] # Stack of lookahead symbols 288 actions = self.action # Local reference to action table (to avoid lookup on self.) 289 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 290 prod = self.productions # Local reference to production list (to avoid lookup on self.) 291 pslice = YaccProduction(None) # Production object passed to grammar rules 292 errorcount = 0 # Used during error recovery 293 294 # --! DEBUG 295 debug.info("PLY: PARSE DEBUG START") 296 # --! DEBUG 297 298 # If no lexer was given, we will try to use the lex module 299 if not lexer: 300 lex = load_ply_lex() 301 lexer = lex.lexer 302 303 # Set up the lexer and parser objects on pslice 304 pslice.lexer = lexer 305 pslice.parser = self 306 307 # If input was supplied, pass to lexer 308 if input is not None: 309 lexer.input(input) 310 311 if tokenfunc is None: 312 # Tokenize function 313 get_token = lexer.token 314 else: 315 get_token = tokenfunc 316 317 # Set up the state and symbol stacks 318 319 statestack = [ ] # Stack of parsing states 320 self.statestack = statestack 321 symstack = [ ] # Stack of grammar symbols 322 self.symstack = symstack 323 324 pslice.stack = symstack # Put in the production 325 errtoken = None # Err token 326 327 # The start state is assumed to be (0,$end) 328 329 statestack.append(0) 330 sym = YaccSymbol() 331 sym.type = "$end" 332 symstack.append(sym) 333 state = 0 334 while 1: 335 # Get the next symbol on the input. If a lookahead symbol 336 # is already set, we just use that. Otherwise, we'll pull 337 # the next token off of the lookaheadstack or from the lexer 338 339 # --! DEBUG 340 debug.debug('') 341 debug.debug('State : %s', state) 342 # --! DEBUG 343 344 if not lookahead: 345 if not lookaheadstack: 346 lookahead = get_token() # Get the next token 347 else: 348 lookahead = lookaheadstack.pop() 349 if not lookahead: 350 lookahead = YaccSymbol() 351 lookahead.type = "$end" 352 353 # --! DEBUG 354 debug.debug('Stack : %s', 355 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 356 # --! DEBUG 357 358 # Check the action table 359 ltype = lookahead.type 360 t = actions[state].get(ltype) 361 362 if t is not None: 363 if t > 0: 364 # shift a symbol on the stack 365 statestack.append(t) 366 state = t 367 368 # --! DEBUG 369 debug.debug("Action : Shift and goto state %s", t) 370 # --! DEBUG 371 372 symstack.append(lookahead) 373 lookahead = None 374 375 # Decrease error count on successful shift 376 if errorcount: errorcount -=1 377 continue 378 379 if t < 0: 380 # reduce a symbol on the stack, emit a production 381 p = prod[-t] 382 pname = p.name 383 plen = p.len 384 385 # Get production function 386 sym = YaccSymbol() 387 sym.type = pname # Production name 388 sym.value = None 389 390 # --! DEBUG 391 if plen: 392 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) 393 else: 394 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) 395 396 # --! DEBUG 397 398 if plen: 399 targ = symstack[-plen-1:] 400 targ[0] = sym 401 402 # --! TRACKING 403 if tracking: 404 t1 = targ[1] 405 sym.lineno = t1.lineno 406 sym.lexpos = t1.lexpos 407 t1 = targ[-1] 408 sym.endlineno = getattr(t1,"endlineno",t1.lineno) 409 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 410 411 # --! TRACKING 412 413 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 414 # The code enclosed in this section is duplicated 415 # below as a performance optimization. Make sure 416 # changes get made in both locations. 417 418 pslice.slice = targ 419 420 try: 421 # Call the grammar rule with our special slice object 422 del symstack[-plen:] 423 del statestack[-plen:] 424 p.callable(pslice) 425 # --! DEBUG 426 debug.info("Result : %s", format_result(pslice[0])) 427 # --! DEBUG 428 symstack.append(sym) 429 state = goto[statestack[-1]][pname] 430 statestack.append(state) 431 except SyntaxError: 432 # If an error was set. Enter error recovery state 433 lookaheadstack.append(lookahead) 434 symstack.pop() 435 statestack.pop() 436 state = statestack[-1] 437 sym.type = 'error' 438 lookahead = sym 439 errorcount = error_count 440 self.errorok = 0 441 continue 442 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 443 444 else: 445 446 # --! TRACKING 447 if tracking: 448 sym.lineno = lexer.lineno 449 sym.lexpos = lexer.lexpos 450 # --! TRACKING 451 452 targ = [ sym ] 453 454 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 455 # The code enclosed in this section is duplicated 456 # above as a performance optimization. Make sure 457 # changes get made in both locations. 458 459 pslice.slice = targ 460 461 try: 462 # Call the grammar rule with our special slice object 463 p.callable(pslice) 464 # --! DEBUG 465 debug.info("Result : %s", format_result(pslice[0])) 466 # --! DEBUG 467 symstack.append(sym) 468 state = goto[statestack[-1]][pname] 469 statestack.append(state) 470 except SyntaxError: 471 # If an error was set. Enter error recovery state 472 lookaheadstack.append(lookahead) 473 symstack.pop() 474 statestack.pop() 475 state = statestack[-1] 476 sym.type = 'error' 477 lookahead = sym 478 errorcount = error_count 479 self.errorok = 0 480 continue 481 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 482 483 if t == 0: 484 n = symstack[-1] 485 result = getattr(n,"value",None) 486 # --! DEBUG 487 debug.info("Done : Returning %s", format_result(result)) 488 debug.info("PLY: PARSE DEBUG END") 489 # --! DEBUG 490 return result 491 492 if t == None: 493 494 # --! DEBUG 495 debug.error('Error : %s', 496 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 497 # --! DEBUG 498 499 # We have some kind of parsing error here. To handle 500 # this, we are going to push the current token onto 501 # the tokenstack and replace it with an 'error' token. 502 # If there are any synchronization rules, they may 503 # catch it. 504 # 505 # In addition to pushing the error token, we call call 506 # the user defined p_error() function if this is the 507 # first syntax error. This function is only called if 508 # errorcount == 0. 509 if errorcount == 0 or self.errorok: 510 errorcount = error_count 511 self.errorok = 0 512 errtoken = lookahead 513 if errtoken.type == "$end": 514 errtoken = None # End of file! 515 if self.errorfunc: 516 global errok,token,restart 517 errok = self.errok # Set some special functions available in error recovery 518 token = get_token 519 restart = self.restart 520 if errtoken and not hasattr(errtoken,'lexer'): 521 errtoken.lexer = lexer 522 tok = self.errorfunc(errtoken) 523 del errok, token, restart # Delete special functions 524 525 if self.errorok: 526 # User must have done some kind of panic 527 # mode recovery on their own. The 528 # returned token is the next lookahead 529 lookahead = tok 530 errtoken = None 531 continue 532 else: 533 if errtoken: 534 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 535 else: lineno = 0 536 if lineno: 537 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 538 else: 539 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 540 else: 541 sys.stderr.write("yacc: Parse error in input. EOF\n") 542 return 543 544 else: 545 errorcount = error_count 546 547 # case 1: the statestack only has 1 entry on it. If we're in this state, the 548 # entire parse has been rolled back and we're completely hosed. The token is 549 # discarded and we just keep going. 550 551 if len(statestack) <= 1 and lookahead.type != "$end": 552 lookahead = None 553 errtoken = None 554 state = 0 555 # Nuke the pushback stack 556 del lookaheadstack[:] 557 continue 558 559 # case 2: the statestack has a couple of entries on it, but we're 560 # at the end of the file. nuke the top entry and generate an error token 561 562 # Start nuking entries on the stack 563 if lookahead.type == "$end": 564 # Whoa. We're really hosed here. Bail out 565 return 566 567 if lookahead.type != 'error': 568 sym = symstack[-1] 569 if sym.type == 'error': 570 # Hmmm. Error is on top of stack, we'll just nuke input 571 # symbol and continue 572 lookahead = None 573 continue 574 t = YaccSymbol() 575 t.type = 'error' 576 if hasattr(lookahead,"lineno"): 577 t.lineno = lookahead.lineno 578 t.value = lookahead 579 lookaheadstack.append(lookahead) 580 lookahead = t 581 else: 582 symstack.pop() 583 statestack.pop() 584 state = statestack[-1] # Potential bug fix 585 586 continue 587 588 # Call an error function here 589 raise RuntimeError("yacc: internal parser error!!!\n") 590 591 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 592 # parseopt(). 593 # 594 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. 595 # Edit the debug version above, then copy any modifications to the method 596 # below while removing #--! DEBUG sections. 597 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 598 599 600 def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 601 lookahead = None # Current lookahead symbol 602 lookaheadstack = [ ] # Stack of lookahead symbols 603 actions = self.action # Local reference to action table (to avoid lookup on self.) 604 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 605 prod = self.productions # Local reference to production list (to avoid lookup on self.) 606 pslice = YaccProduction(None) # Production object passed to grammar rules 607 errorcount = 0 # Used during error recovery 608 609 # If no lexer was given, we will try to use the lex module 610 if not lexer: 611 lex = load_ply_lex() 612 lexer = lex.lexer 613 614 # Set up the lexer and parser objects on pslice 615 pslice.lexer = lexer 616 pslice.parser = self 617 618 # If input was supplied, pass to lexer 619 if input is not None: 620 lexer.input(input) 621 622 if tokenfunc is None: 623 # Tokenize function 624 get_token = lexer.token 625 else: 626 get_token = tokenfunc 627 628 # Set up the state and symbol stacks 629 630 statestack = [ ] # Stack of parsing states 631 self.statestack = statestack 632 symstack = [ ] # Stack of grammar symbols 633 self.symstack = symstack 634 635 pslice.stack = symstack # Put in the production 636 errtoken = None # Err token 637 638 # The start state is assumed to be (0,$end) 639 640 statestack.append(0) 641 sym = YaccSymbol() 642 sym.type = '$end' 643 symstack.append(sym) 644 state = 0 645 while 1: 646 # Get the next symbol on the input. If a lookahead symbol 647 # is already set, we just use that. Otherwise, we'll pull 648 # the next token off of the lookaheadstack or from the lexer 649 650 if not lookahead: 651 if not lookaheadstack: 652 lookahead = get_token() # Get the next token 653 else: 654 lookahead = lookaheadstack.pop() 655 if not lookahead: 656 lookahead = YaccSymbol() 657 lookahead.type = '$end' 658 659 # Check the action table 660 ltype = lookahead.type 661 t = actions[state].get(ltype) 662 663 if t is not None: 664 if t > 0: 665 # shift a symbol on the stack 666 statestack.append(t) 667 state = t 668 669 symstack.append(lookahead) 670 lookahead = None 671 672 # Decrease error count on successful shift 673 if errorcount: errorcount -=1 674 continue 675 676 if t < 0: 677 # reduce a symbol on the stack, emit a production 678 p = prod[-t] 679 pname = p.name 680 plen = p.len 681 682 # Get production function 683 sym = YaccSymbol() 684 sym.type = pname # Production name 685 sym.value = None 686 687 if plen: 688 targ = symstack[-plen-1:] 689 targ[0] = sym 690 691 # --! TRACKING 692 if tracking: 693 t1 = targ[1] 694 sym.lineno = t1.lineno 695 sym.lexpos = t1.lexpos 696 t1 = targ[-1] 697 sym.endlineno = getattr(t1,"endlineno",t1.lineno) 698 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) 699 700 # --! TRACKING 701 702 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 703 # The code enclosed in this section is duplicated 704 # below as a performance optimization. Make sure 705 # changes get made in both locations. 706 707 pslice.slice = targ 708 709 try: 710 # Call the grammar rule with our special slice object 711 del symstack[-plen:] 712 del statestack[-plen:] 713 p.callable(pslice) 714 symstack.append(sym) 715 state = goto[statestack[-1]][pname] 716 statestack.append(state) 717 except SyntaxError: 718 # If an error was set. Enter error recovery state 719 lookaheadstack.append(lookahead) 720 symstack.pop() 721 statestack.pop() 722 state = statestack[-1] 723 sym.type = 'error' 724 lookahead = sym 725 errorcount = error_count 726 self.errorok = 0 727 continue 728 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 729 730 else: 731 732 # --! TRACKING 733 if tracking: 734 sym.lineno = lexer.lineno 735 sym.lexpos = lexer.lexpos 736 # --! TRACKING 737 738 targ = [ sym ] 739 740 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 741 # The code enclosed in this section is duplicated 742 # above as a performance optimization. Make sure 743 # changes get made in both locations. 744 745 pslice.slice = targ 746 747 try: 748 # Call the grammar rule with our special slice object 749 p.callable(pslice) 750 symstack.append(sym) 751 state = goto[statestack[-1]][pname] 752 statestack.append(state) 753 except SyntaxError: 754 # If an error was set. Enter error recovery state 755 lookaheadstack.append(lookahead) 756 symstack.pop() 757 statestack.pop() 758 state = statestack[-1] 759 sym.type = 'error' 760 lookahead = sym 761 errorcount = error_count 762 self.errorok = 0 763 continue 764 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 765 766 if t == 0: 767 n = symstack[-1] 768 return getattr(n,"value",None) 769 770 if t == None: 771 772 # We have some kind of parsing error here. To handle 773 # this, we are going to push the current token onto 774 # the tokenstack and replace it with an 'error' token. 775 # If there are any synchronization rules, they may 776 # catch it. 777 # 778 # In addition to pushing the error token, we call call 779 # the user defined p_error() function if this is the 780 # first syntax error. This function is only called if 781 # errorcount == 0. 782 if errorcount == 0 or self.errorok: 783 errorcount = error_count 784 self.errorok = 0 785 errtoken = lookahead 786 if errtoken.type == '$end': 787 errtoken = None # End of file! 788 if self.errorfunc: 789 global errok,token,restart 790 errok = self.errok # Set some special functions available in error recovery 791 token = get_token 792 restart = self.restart 793 if errtoken and not hasattr(errtoken,'lexer'): 794 errtoken.lexer = lexer 795 tok = self.errorfunc(errtoken) 796 del errok, token, restart # Delete special functions 797 798 if self.errorok: 799 # User must have done some kind of panic 800 # mode recovery on their own. The 801 # returned token is the next lookahead 802 lookahead = tok 803 errtoken = None 804 continue 805 else: 806 if errtoken: 807 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 808 else: lineno = 0 809 if lineno: 810 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 811 else: 812 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 813 else: 814 sys.stderr.write("yacc: Parse error in input. EOF\n") 815 return 816 817 else: 818 errorcount = error_count 819 820 # case 1: the statestack only has 1 entry on it. If we're in this state, the 821 # entire parse has been rolled back and we're completely hosed. The token is 822 # discarded and we just keep going. 823 824 if len(statestack) <= 1 and lookahead.type != '$end': 825 lookahead = None 826 errtoken = None 827 state = 0 828 # Nuke the pushback stack 829 del lookaheadstack[:] 830 continue 831 832 # case 2: the statestack has a couple of entries on it, but we're 833 # at the end of the file. nuke the top entry and generate an error token 834 835 # Start nuking entries on the stack 836 if lookahead.type == '$end': 837 # Whoa. We're really hosed here. Bail out 838 return 839 840 if lookahead.type != 'error': 841 sym = symstack[-1] 842 if sym.type == 'error': 843 # Hmmm. Error is on top of stack, we'll just nuke input 844 # symbol and continue 845 lookahead = None 846 continue 847 t = YaccSymbol() 848 t.type = 'error' 849 if hasattr(lookahead,"lineno"): 850 t.lineno = lookahead.lineno 851 t.value = lookahead 852 lookaheadstack.append(lookahead) 853 lookahead = t 854 else: 855 symstack.pop() 856 statestack.pop() 857 state = statestack[-1] # Potential bug fix 858 859 continue 860 861 # Call an error function here 862 raise RuntimeError("yacc: internal parser error!!!\n") 863 864 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 865 # parseopt_notrack(). 866 # 867 # Optimized version of parseopt() with line number tracking removed. 868 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove 869 # code in the #--! TRACKING sections 870 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 871 872 def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): 873 lookahead = None # Current lookahead symbol 874 lookaheadstack = [ ] # Stack of lookahead symbols 875 actions = self.action # Local reference to action table (to avoid lookup on self.) 876 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 877 prod = self.productions # Local reference to production list (to avoid lookup on self.) 878 pslice = YaccProduction(None) # Production object passed to grammar rules 879 errorcount = 0 # Used during error recovery 880 881 # If no lexer was given, we will try to use the lex module 882 if not lexer: 883 lex = load_ply_lex() 884 lexer = lex.lexer 885 886 # Set up the lexer and parser objects on pslice 887 pslice.lexer = lexer 888 pslice.parser = self 889 890 # If input was supplied, pass to lexer 891 if input is not None: 892 lexer.input(input) 893 894 if tokenfunc is None: 895 # Tokenize function 896 get_token = lexer.token 897 else: 898 get_token = tokenfunc 899 900 # Set up the state and symbol stacks 901 902 statestack = [ ] # Stack of parsing states 903 self.statestack = statestack 904 symstack = [ ] # Stack of grammar symbols 905 self.symstack = symstack 906 907 pslice.stack = symstack # Put in the production 908 errtoken = None # Err token 909 910 # The start state is assumed to be (0,$end) 911 912 statestack.append(0) 913 sym = YaccSymbol() 914 sym.type = '$end' 915 symstack.append(sym) 916 state = 0 917 while 1: 918 # Get the next symbol on the input. If a lookahead symbol 919 # is already set, we just use that. Otherwise, we'll pull 920 # the next token off of the lookaheadstack or from the lexer 921 922 if not lookahead: 923 if not lookaheadstack: 924 lookahead = get_token() # Get the next token 925 else: 926 lookahead = lookaheadstack.pop() 927 if not lookahead: 928 lookahead = YaccSymbol() 929 lookahead.type = '$end' 930 931 # Check the action table 932 ltype = lookahead.type 933 t = actions[state].get(ltype) 934 935 if t is not None: 936 if t > 0: 937 # shift a symbol on the stack 938 statestack.append(t) 939 state = t 940 941 symstack.append(lookahead) 942 lookahead = None 943 944 # Decrease error count on successful shift 945 if errorcount: errorcount -=1 946 continue 947 948 if t < 0: 949 # reduce a symbol on the stack, emit a production 950 p = prod[-t] 951 pname = p.name 952 plen = p.len 953 954 # Get production function 955 sym = YaccSymbol() 956 sym.type = pname # Production name 957 sym.value = None 958 959 if plen: 960 targ = symstack[-plen-1:] 961 targ[0] = sym 962 963 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 964 # The code enclosed in this section is duplicated 965 # below as a performance optimization. Make sure 966 # changes get made in both locations. 967 968 pslice.slice = targ 969 970 try: 971 # Call the grammar rule with our special slice object 972 del symstack[-plen:] 973 del statestack[-plen:] 974 p.callable(pslice) 975 symstack.append(sym) 976 state = goto[statestack[-1]][pname] 977 statestack.append(state) 978 except SyntaxError: 979 # If an error was set. Enter error recovery state 980 lookaheadstack.append(lookahead) 981 symstack.pop() 982 statestack.pop() 983 state = statestack[-1] 984 sym.type = 'error' 985 lookahead = sym 986 errorcount = error_count 987 self.errorok = 0 988 continue 989 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 990 991 else: 992 993 targ = [ sym ] 994 995 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 996 # The code enclosed in this section is duplicated 997 # above as a performance optimization. Make sure 998 # changes get made in both locations. 999 1000 pslice.slice = targ 1001 1002 try: 1003 # Call the grammar rule with our special slice object 1004 p.callable(pslice) 1005 symstack.append(sym) 1006 state = goto[statestack[-1]][pname] 1007 statestack.append(state) 1008 except SyntaxError: 1009 # If an error was set. Enter error recovery state 1010 lookaheadstack.append(lookahead) 1011 symstack.pop() 1012 statestack.pop() 1013 state = statestack[-1] 1014 sym.type = 'error' 1015 lookahead = sym 1016 errorcount = error_count 1017 self.errorok = 0 1018 continue 1019 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1020 1021 if t == 0: 1022 n = symstack[-1] 1023 return getattr(n,"value",None) 1024 1025 if t == None: 1026 1027 # We have some kind of parsing error here. To handle 1028 # this, we are going to push the current token onto 1029 # the tokenstack and replace it with an 'error' token. 1030 # If there are any synchronization rules, they may 1031 # catch it. 1032 # 1033 # In addition to pushing the error token, we call call 1034 # the user defined p_error() function if this is the 1035 # first syntax error. This function is only called if 1036 # errorcount == 0. 1037 if errorcount == 0 or self.errorok: 1038 errorcount = error_count 1039 self.errorok = 0 1040 errtoken = lookahead 1041 if errtoken.type == '$end': 1042 errtoken = None # End of file! 1043 if self.errorfunc: 1044 global errok,token,restart 1045 errok = self.errok # Set some special functions available in error recovery 1046 token = get_token 1047 restart = self.restart 1048 if errtoken and not hasattr(errtoken,'lexer'): 1049 errtoken.lexer = lexer 1050 tok = self.errorfunc(errtoken) 1051 del errok, token, restart # Delete special functions 1052 1053 if self.errorok: 1054 # User must have done some kind of panic 1055 # mode recovery on their own. The 1056 # returned token is the next lookahead 1057 lookahead = tok 1058 errtoken = None 1059 continue 1060 else: 1061 if errtoken: 1062 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno 1063 else: lineno = 0 1064 if lineno: 1065 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) 1066 else: 1067 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) 1068 else: 1069 sys.stderr.write("yacc: Parse error in input. EOF\n") 1070 return 1071 1072 else: 1073 errorcount = error_count 1074 1075 # case 1: the statestack only has 1 entry on it. If we're in this state, the 1076 # entire parse has been rolled back and we're completely hosed. The token is 1077 # discarded and we just keep going. 1078 1079 if len(statestack) <= 1 and lookahead.type != '$end': 1080 lookahead = None 1081 errtoken = None 1082 state = 0 1083 # Nuke the pushback stack 1084 del lookaheadstack[:] 1085 continue 1086 1087 # case 2: the statestack has a couple of entries on it, but we're 1088 # at the end of the file. nuke the top entry and generate an error token 1089 1090 # Start nuking entries on the stack 1091 if lookahead.type == '$end': 1092 # Whoa. We're really hosed here. Bail out 1093 return 1094 1095 if lookahead.type != 'error': 1096 sym = symstack[-1] 1097 if sym.type == 'error': 1098 # Hmmm. Error is on top of stack, we'll just nuke input 1099 # symbol and continue 1100 lookahead = None 1101 continue 1102 t = YaccSymbol() 1103 t.type = 'error' 1104 if hasattr(lookahead,"lineno"): 1105 t.lineno = lookahead.lineno 1106 t.value = lookahead 1107 lookaheadstack.append(lookahead) 1108 lookahead = t 1109 else: 1110 symstack.pop() 1111 statestack.pop() 1112 state = statestack[-1] # Potential bug fix 1113 1114 continue 1115 1116 # Call an error function here 1117 raise RuntimeError("yacc: internal parser error!!!\n") 1118 1119# ----------------------------------------------------------------------------- 1120# === Grammar Representation === 1121# 1122# The following functions, classes, and variables are used to represent and 1123# manipulate the rules that make up a grammar. 1124# ----------------------------------------------------------------------------- 1125 1126import re 1127 1128# regex matching identifiers 1129_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 1130 1131# ----------------------------------------------------------------------------- 1132# class Production: 1133# 1134# This class stores the raw information about a single production or grammar rule. 1135# A grammar rule refers to a specification such as this: 1136# 1137# expr : expr PLUS term 1138# 1139# Here are the basic attributes defined on all productions 1140# 1141# name - Name of the production. For example 'expr' 1142# prod - A list of symbols on the right side ['expr','PLUS','term'] 1143# prec - Production precedence level 1144# number - Production number. 1145# func - Function that executes on reduce 1146# file - File where production function is defined 1147# lineno - Line number where production function is defined 1148# 1149# The following attributes are defined or optional. 1150# 1151# len - Length of the production (number of symbols on right hand side) 1152# usyms - Set of unique symbols found in the production 1153# ----------------------------------------------------------------------------- 1154 1155class Production(object): 1156 reduced = 0 1157 def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): 1158 self.name = name 1159 self.prod = tuple(prod) 1160 self.number = number 1161 self.func = func 1162 self.callable = None 1163 self.file = file 1164 self.line = line 1165 self.prec = precedence 1166 1167 # Internal settings used during table construction 1168 1169 self.len = len(self.prod) # Length of the production 1170 1171 # Create a list of unique production symbols used in the production 1172 self.usyms = [ ] 1173 for s in self.prod: 1174 if s not in self.usyms: 1175 self.usyms.append(s) 1176 1177 # List of all LR items for the production 1178 self.lr_items = [] 1179 self.lr_next = None 1180 1181 # Create a string representation 1182 if self.prod: 1183 self.str = "%s -> %s" % (self.name," ".join(self.prod)) 1184 else: 1185 self.str = "%s -> <empty>" % self.name 1186 1187 def __str__(self): 1188 return self.str 1189 1190 def __repr__(self): 1191 return "Production("+str(self)+")" 1192 1193 def __len__(self): 1194 return len(self.prod) 1195 1196 def __nonzero__(self): 1197 return 1 1198 1199 def __getitem__(self,index): 1200 return self.prod[index] 1201 1202 # Return the nth lr_item from the production (or None if at the end) 1203 def lr_item(self,n): 1204 if n > len(self.prod): return None 1205 p = LRItem(self,n) 1206 1207 # Precompute the list of productions immediately following. Hack. Remove later 1208 try: 1209 p.lr_after = Prodnames[p.prod[n+1]] 1210 except (IndexError,KeyError): 1211 p.lr_after = [] 1212 try: 1213 p.lr_before = p.prod[n-1] 1214 except IndexError: 1215 p.lr_before = None 1216 1217 return p 1218 1219 # Bind the production function name to a callable 1220 def bind(self,pdict): 1221 if self.func: 1222 self.callable = pdict[self.func] 1223 1224# This class serves as a minimal standin for Production objects when 1225# reading table data from files. It only contains information 1226# actually used by the LR parsing engine, plus some additional 1227# debugging information. 1228class MiniProduction(object): 1229 def __init__(self,str,name,len,func,file,line): 1230 self.name = name 1231 self.len = len 1232 self.func = func 1233 self.callable = None 1234 self.file = file 1235 self.line = line 1236 self.str = str 1237 def __str__(self): 1238 return self.str 1239 def __repr__(self): 1240 return "MiniProduction(%s)" % self.str 1241 1242 # Bind the production function name to a callable 1243 def bind(self,pdict): 1244 if self.func: 1245 self.callable = pdict[self.func] 1246 1247 1248# ----------------------------------------------------------------------------- 1249# class LRItem 1250# 1251# This class represents a specific stage of parsing a production rule. For 1252# example: 1253# 1254# expr : expr . PLUS term 1255# 1256# In the above, the "." represents the current location of the parse. Here 1257# basic attributes: 1258# 1259# name - Name of the production. For example 'expr' 1260# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 1261# number - Production number. 1262# 1263# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 1264# then lr_next refers to 'expr -> expr PLUS . term' 1265# lr_index - LR item index (location of the ".") in the prod list. 1266# lookaheads - LALR lookahead symbols for this item 1267# len - Length of the production (number of symbols on right hand side) 1268# lr_after - List of all productions that immediately follow 1269# lr_before - Grammar symbol immediately before 1270# ----------------------------------------------------------------------------- 1271 1272class LRItem(object): 1273 def __init__(self,p,n): 1274 self.name = p.name 1275 self.prod = list(p.prod) 1276 self.number = p.number 1277 self.lr_index = n 1278 self.lookaheads = { } 1279 self.prod.insert(n,".") 1280 self.prod = tuple(self.prod) 1281 self.len = len(self.prod) 1282 self.usyms = p.usyms 1283 1284 def __str__(self): 1285 if self.prod: 1286 s = "%s -> %s" % (self.name," ".join(self.prod)) 1287 else: 1288 s = "%s -> <empty>" % self.name 1289 return s 1290 1291 def __repr__(self): 1292 return "LRItem("+str(self)+")" 1293 1294# ----------------------------------------------------------------------------- 1295# rightmost_terminal() 1296# 1297# Return the rightmost terminal from a list of symbols. Used in add_production() 1298# ----------------------------------------------------------------------------- 1299def rightmost_terminal(symbols, terminals): 1300 i = len(symbols) - 1 1301 while i >= 0: 1302 if symbols[i] in terminals: 1303 return symbols[i] 1304 i -= 1 1305 return None 1306 1307# ----------------------------------------------------------------------------- 1308# === GRAMMAR CLASS === 1309# 1310# The following class represents the contents of the specified grammar along 1311# with various computed properties such as first sets, follow sets, LR items, etc. 1312# This data is used for critical parts of the table generation process later. 1313# ----------------------------------------------------------------------------- 1314 1315class GrammarError(YaccError): pass 1316 1317class Grammar(object): 1318 def __init__(self,terminals): 1319 self.Productions = [None] # A list of all of the productions. The first 1320 # entry is always reserved for the purpose of 1321 # building an augmented grammar 1322 1323 self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all 1324 # productions of that nonterminal. 1325 1326 self.Prodmap = { } # A dictionary that is only used to detect duplicate 1327 # productions. 1328 1329 self.Terminals = { } # A dictionary mapping the names of terminal symbols to a 1330 # list of the rules where they are used. 1331 1332 for term in terminals: 1333 self.Terminals[term] = [] 1334 1335 self.Terminals['error'] = [] 1336 1337 self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list 1338 # of rule numbers where they are used. 1339 1340 self.First = { } # A dictionary of precomputed FIRST(x) symbols 1341 1342 self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols 1343 1344 self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the 1345 # form ('right',level) or ('nonassoc', level) or ('left',level) 1346 1347 self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. 1348 # This is only used to provide error checking and to generate 1349 # a warning about unused precedence rules. 1350 1351 self.Start = None # Starting symbol for the grammar 1352 1353 1354 def __len__(self): 1355 return len(self.Productions) 1356 1357 def __getitem__(self,index): 1358 return self.Productions[index] 1359 1360 # ----------------------------------------------------------------------------- 1361 # set_precedence() 1362 # 1363 # Sets the precedence for a given terminal. assoc is the associativity such as 1364 # 'left','right', or 'nonassoc'. level is a numeric level. 1365 # 1366 # ----------------------------------------------------------------------------- 1367 1368 def set_precedence(self,term,assoc,level): 1369 assert self.Productions == [None],"Must call set_precedence() before add_production()" 1370 if term in self.Precedence: 1371 raise GrammarError("Precedence already specified for terminal '%s'" % term) 1372 if assoc not in ['left','right','nonassoc']: 1373 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 1374 self.Precedence[term] = (assoc,level) 1375 1376 # ----------------------------------------------------------------------------- 1377 # add_production() 1378 # 1379 # Given an action function, this function assembles a production rule and 1380 # computes its precedence level. 1381 # 1382 # The production rule is supplied as a list of symbols. For example, 1383 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 1384 # symbols ['expr','PLUS','term']. 1385 # 1386 # Precedence is determined by the precedence of the right-most non-terminal 1387 # or the precedence of a terminal specified by %prec. 1388 # 1389 # A variety of error checks are performed to make sure production symbols 1390 # are valid and that %prec is used correctly. 1391 # ----------------------------------------------------------------------------- 1392 1393 def add_production(self,prodname,syms,func=None,file='',line=0): 1394 1395 if prodname in self.Terminals: 1396 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) 1397 if prodname == 'error': 1398 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) 1399 if not _is_identifier.match(prodname): 1400 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) 1401 1402 # Look for literal tokens 1403 for n,s in enumerate(syms): 1404 if s[0] in "'\"": 1405 try: 1406 c = eval(s) 1407 if (len(c) > 1): 1408 raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) 1409 if not c in self.Terminals: 1410 self.Terminals[c] = [] 1411 syms[n] = c 1412 continue 1413 except SyntaxError: 1414 pass 1415 if not _is_identifier.match(s) and s != '%prec': 1416 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) 1417 1418 # Determine the precedence level 1419 if '%prec' in syms: 1420 if syms[-1] == '%prec': 1421 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) 1422 if syms[-2] != '%prec': 1423 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) 1424 precname = syms[-1] 1425 prodprec = self.Precedence.get(precname,None) 1426 if not prodprec: 1427 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) 1428 else: 1429 self.UsedPrecedence[precname] = 1 1430 del syms[-2:] # Drop %prec from the rule 1431 else: 1432 # If no %prec, precedence is determined by the rightmost terminal symbol 1433 precname = rightmost_terminal(syms,self.Terminals) 1434 prodprec = self.Precedence.get(precname,('right',0)) 1435 1436 # See if the rule is already in the rulemap 1437 map = "%s -> %s" % (prodname,syms) 1438 if map in self.Prodmap: 1439 m = self.Prodmap[map] 1440 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + 1441 "Previous definition at %s:%d" % (m.file, m.line)) 1442 1443 # From this point on, everything is valid. Create a new Production instance 1444 pnumber = len(self.Productions) 1445 if not prodname in self.Nonterminals: 1446 self.Nonterminals[prodname] = [ ] 1447 1448 # Add the production number to Terminals and Nonterminals 1449 for t in syms: 1450 if t in self.Terminals: 1451 self.Terminals[t].append(pnumber) 1452 else: 1453 if not t in self.Nonterminals: 1454 self.Nonterminals[t] = [ ] 1455 self.Nonterminals[t].append(pnumber) 1456 1457 # Create a production and add it to the list of productions 1458 p = Production(pnumber,prodname,syms,prodprec,func,file,line) 1459 self.Productions.append(p) 1460 self.Prodmap[map] = p 1461 1462 # Add to the global productions list 1463 try: 1464 self.Prodnames[prodname].append(p) 1465 except KeyError: 1466 self.Prodnames[prodname] = [ p ] 1467 return 0 1468 1469 # ----------------------------------------------------------------------------- 1470 # set_start() 1471 # 1472 # Sets the starting symbol and creates the augmented grammar. Production 1473 # rule 0 is S' -> start where start is the start symbol. 1474 # ----------------------------------------------------------------------------- 1475 1476 def set_start(self,start=None): 1477 if not start: 1478 start = self.Productions[1].name 1479 if start not in self.Nonterminals: 1480 raise GrammarError("start symbol %s undefined" % start) 1481 self.Productions[0] = Production(0,"S'",[start]) 1482 self.Nonterminals[start].append(0) 1483 self.Start = start 1484 1485 # ----------------------------------------------------------------------------- 1486 # find_unreachable() 1487 # 1488 # Find all of the nonterminal symbols that can't be reached from the starting 1489 # symbol. Returns a list of nonterminals that can't be reached. 1490 # ----------------------------------------------------------------------------- 1491 1492 def find_unreachable(self): 1493 1494 # Mark all symbols that are reachable from a symbol s 1495 def mark_reachable_from(s): 1496 if reachable[s]: 1497 # We've already reached symbol s. 1498 return 1499 reachable[s] = 1 1500 for p in self.Prodnames.get(s,[]): 1501 for r in p.prod: 1502 mark_reachable_from(r) 1503 1504 reachable = { } 1505 for s in list(self.Terminals) + list(self.Nonterminals): 1506 reachable[s] = 0 1507 1508 mark_reachable_from( self.Productions[0].prod[0] ) 1509 1510 return [s for s in list(self.Nonterminals) 1511 if not reachable[s]] 1512 1513 # ----------------------------------------------------------------------------- 1514 # infinite_cycles() 1515 # 1516 # This function looks at the various parsing rules and tries to detect 1517 # infinite recursion cycles (grammar rules where there is no possible way 1518 # to derive a string of only terminals). 1519 # ----------------------------------------------------------------------------- 1520 1521 def infinite_cycles(self): 1522 terminates = {} 1523 1524 # Terminals: 1525 for t in self.Terminals: 1526 terminates[t] = 1 1527 1528 terminates['$end'] = 1 1529 1530 # Nonterminals: 1531 1532 # Initialize to false: 1533 for n in self.Nonterminals: 1534 terminates[n] = 0 1535 1536 # Then propagate termination until no change: 1537 while 1: 1538 some_change = 0 1539 for (n,pl) in self.Prodnames.items(): 1540 # Nonterminal n terminates iff any of its productions terminates. 1541 for p in pl: 1542 # Production p terminates iff all of its rhs symbols terminate. 1543 for s in p.prod: 1544 if not terminates[s]: 1545 # The symbol s does not terminate, 1546 # so production p does not terminate. 1547 p_terminates = 0 1548 break 1549 else: 1550 # didn't break from the loop, 1551 # so every symbol s terminates 1552 # so production p terminates. 1553 p_terminates = 1 1554 1555 if p_terminates: 1556 # symbol n terminates! 1557 if not terminates[n]: 1558 terminates[n] = 1 1559 some_change = 1 1560 # Don't need to consider any more productions for this n. 1561 break 1562 1563 if not some_change: 1564 break 1565 1566 infinite = [] 1567 for (s,term) in terminates.items(): 1568 if not term: 1569 if not s in self.Prodnames and not s in self.Terminals and s != 'error': 1570 # s is used-but-not-defined, and we've already warned of that, 1571 # so it would be overkill to say that it's also non-terminating. 1572 pass 1573 else: 1574 infinite.append(s) 1575 1576 return infinite 1577 1578 1579 # ----------------------------------------------------------------------------- 1580 # undefined_symbols() 1581 # 1582 # Find all symbols that were used the grammar, but not defined as tokens or 1583 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 1584 # and prod is the production where the symbol was used. 1585 # ----------------------------------------------------------------------------- 1586 def undefined_symbols(self): 1587 result = [] 1588 for p in self.Productions: 1589 if not p: continue 1590 1591 for s in p.prod: 1592 if not s in self.Prodnames and not s in self.Terminals and s != 'error': 1593 result.append((s,p)) 1594 return result 1595 1596 # ----------------------------------------------------------------------------- 1597 # unused_terminals() 1598 # 1599 # Find all terminals that were defined, but not used by the grammar. Returns 1600 # a list of all symbols. 1601 # ----------------------------------------------------------------------------- 1602 def unused_terminals(self): 1603 unused_tok = [] 1604 for s,v in self.Terminals.items(): 1605 if s != 'error' and not v: 1606 unused_tok.append(s) 1607 1608 return unused_tok 1609 1610 # ------------------------------------------------------------------------------ 1611 # unused_rules() 1612 # 1613 # Find all grammar rules that were defined, but not used (maybe not reachable) 1614 # Returns a list of productions. 1615 # ------------------------------------------------------------------------------ 1616 1617 def unused_rules(self): 1618 unused_prod = [] 1619 for s,v in self.Nonterminals.items(): 1620 if not v: 1621 p = self.Prodnames[s][0] 1622 unused_prod.append(p) 1623 return unused_prod 1624 1625 # ----------------------------------------------------------------------------- 1626 # unused_precedence() 1627 # 1628 # Returns a list of tuples (term,precedence) corresponding to precedence 1629 # rules that were never used by the grammar. term is the name of the terminal 1630 # on which precedence was applied and precedence is a string such as 'left' or 1631 # 'right' corresponding to the type of precedence. 1632 # ----------------------------------------------------------------------------- 1633 1634 def unused_precedence(self): 1635 unused = [] 1636 for termname in self.Precedence: 1637 if not (termname in self.Terminals or termname in self.UsedPrecedence): 1638 unused.append((termname,self.Precedence[termname][0])) 1639 1640 return unused 1641 1642 # ------------------------------------------------------------------------- 1643 # _first() 1644 # 1645 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 1646 # 1647 # During execution of compute_first1, the result may be incomplete. 1648 # Afterward (e.g., when called from compute_follow()), it will be complete. 1649 # ------------------------------------------------------------------------- 1650 def _first(self,beta): 1651 1652 # We are computing First(x1,x2,x3,...,xn) 1653 result = [ ] 1654 for x in beta: 1655 x_produces_empty = 0 1656 1657 # Add all the non-<empty> symbols of First[x] to the result. 1658 for f in self.First[x]: 1659 if f == '<empty>': 1660 x_produces_empty = 1 1661 else: 1662 if f not in result: result.append(f) 1663 1664 if x_produces_empty: 1665 # We have to consider the next x in beta, 1666 # i.e. stay in the loop. 1667 pass 1668 else: 1669 # We don't have to consider any further symbols in beta. 1670 break 1671 else: 1672 # There was no 'break' from the loop, 1673 # so x_produces_empty was true for all x in beta, 1674 # so beta produces empty as well. 1675 result.append('<empty>') 1676 1677 return result 1678 1679 # ------------------------------------------------------------------------- 1680 # compute_first() 1681 # 1682 # Compute the value of FIRST1(X) for all symbols 1683 # ------------------------------------------------------------------------- 1684 def compute_first(self): 1685 if self.First: 1686 return self.First 1687 1688 # Terminals: 1689 for t in self.Terminals: 1690 self.First[t] = [t] 1691 1692 self.First['$end'] = ['$end'] 1693 1694 # Nonterminals: 1695 1696 # Initialize to the empty set: 1697 for n in self.Nonterminals: 1698 self.First[n] = [] 1699 1700 # Then propagate symbols until no change: 1701 while 1: 1702 some_change = 0 1703 for n in self.Nonterminals: 1704 for p in self.Prodnames[n]: 1705 for f in self._first(p.prod): 1706 if f not in self.First[n]: 1707 self.First[n].append( f ) 1708 some_change = 1 1709 if not some_change: 1710 break 1711 1712 return self.First 1713 1714 # --------------------------------------------------------------------- 1715 # compute_follow() 1716 # 1717 # Computes all of the follow sets for every non-terminal symbol. The 1718 # follow set is the set of all symbols that might follow a given 1719 # non-terminal. See the Dragon book, 2nd Ed. p. 189. 1720 # --------------------------------------------------------------------- 1721 def compute_follow(self,start=None): 1722 # If already computed, return the result 1723 if self.Follow: 1724 return self.Follow 1725 1726 # If first sets not computed yet, do that first. 1727 if not self.First: 1728 self.compute_first() 1729 1730 # Add '$end' to the follow list of the start symbol 1731 for k in self.Nonterminals: 1732 self.Follow[k] = [ ] 1733 1734 if not start: 1735 start = self.Productions[1].name 1736 1737 self.Follow[start] = [ '$end' ] 1738 1739 while 1: 1740 didadd = 0 1741 for p in self.Productions[1:]: 1742 # Here is the production set 1743 for i in range(len(p.prod)): 1744 B = p.prod[i] 1745 if B in self.Nonterminals: 1746 # Okay. We got a non-terminal in a production 1747 fst = self._first(p.prod[i+1:]) 1748 hasempty = 0 1749 for f in fst: 1750 if f != '<empty>' and f not in self.Follow[B]: 1751 self.Follow[B].append(f) 1752 didadd = 1 1753 if f == '<empty>': 1754 hasempty = 1 1755 if hasempty or i == (len(p.prod)-1): 1756 # Add elements of follow(a) to follow(b) 1757 for f in self.Follow[p.name]: 1758 if f not in self.Follow[B]: 1759 self.Follow[B].append(f) 1760 didadd = 1 1761 if not didadd: break 1762 return self.Follow 1763 1764 1765 # ----------------------------------------------------------------------------- 1766 # build_lritems() 1767 # 1768 # This function walks the list of productions and builds a complete set of the 1769 # LR items. The LR items are stored in two ways: First, they are uniquely 1770 # numbered and placed in the list _lritems. Second, a linked list of LR items 1771 # is built for each production. For example: 1772 # 1773 # E -> E PLUS E 1774 # 1775 # Creates the list 1776 # 1777 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 1778 # ----------------------------------------------------------------------------- 1779 1780 def build_lritems(self): 1781 for p in self.Productions: 1782 lastlri = p 1783 i = 0 1784 lr_items = [] 1785 while 1: 1786 if i > len(p): 1787 lri = None 1788 else: 1789 lri = LRItem(p,i) 1790 # Precompute the list of productions immediately following 1791 try: 1792 lri.lr_after = self.Prodnames[lri.prod[i+1]] 1793 except (IndexError,KeyError): 1794 lri.lr_after = [] 1795 try: 1796 lri.lr_before = lri.prod[i-1] 1797 except IndexError: 1798 lri.lr_before = None 1799 1800 lastlri.lr_next = lri 1801 if not lri: break 1802 lr_items.append(lri) 1803 lastlri = lri 1804 i += 1 1805 p.lr_items = lr_items 1806 1807# ----------------------------------------------------------------------------- 1808# == Class LRTable == 1809# 1810# This basic class represents a basic table of LR parsing information. 1811# Methods for generating the tables are not defined here. They are defined 1812# in the derived class LRGeneratedTable. 1813# ----------------------------------------------------------------------------- 1814 1815class VersionError(YaccError): pass 1816 1817class LRTable(object): 1818 def __init__(self): 1819 self.lr_action = None 1820 self.lr_goto = None 1821 self.lr_productions = None 1822 self.lr_method = None 1823 1824 def read_table(self,module): 1825 if isinstance(module,types.ModuleType): 1826 parsetab = module 1827 else: 1828 if sys.version_info[0] < 3: 1829 exec("import %s as parsetab" % module) 1830 else: 1831 env = { } 1832 exec("import %s as parsetab" % module, env, env) 1833 parsetab = env['parsetab'] 1834 1835 if parsetab._tabversion != __tabversion__: 1836 raise VersionError("yacc table file version is out of date") 1837 1838 self.lr_action = parsetab._lr_action 1839 self.lr_goto = parsetab._lr_goto 1840 1841 self.lr_productions = [] 1842 for p in parsetab._lr_productions: 1843 self.lr_productions.append(MiniProduction(*p)) 1844 1845 self.lr_method = parsetab._lr_method 1846 return parsetab._lr_signature 1847 1848 def read_pickle(self,filename): 1849 try: 1850 import cPickle as pickle 1851 except ImportError: 1852 import pickle 1853 1854 in_f = open(filename,"rb") 1855 1856 tabversion = pickle.load(in_f) 1857 if tabversion != __tabversion__: 1858 raise VersionError("yacc table file version is out of date") 1859 self.lr_method = pickle.load(in_f) 1860 signature = pickle.load(in_f) 1861 self.lr_action = pickle.load(in_f) 1862 self.lr_goto = pickle.load(in_f) 1863 productions = pickle.load(in_f) 1864 1865 self.lr_productions = [] 1866 for p in productions: 1867 self.lr_productions.append(MiniProduction(*p)) 1868 1869 in_f.close() 1870 return signature 1871 1872 # Bind all production function names to callable objects in pdict 1873 def bind_callables(self,pdict): 1874 for p in self.lr_productions: 1875 p.bind(pdict) 1876 1877# ----------------------------------------------------------------------------- 1878# === LR Generator === 1879# 1880# The following classes and functions are used to generate LR parsing tables on 1881# a grammar. 1882# ----------------------------------------------------------------------------- 1883 1884# ----------------------------------------------------------------------------- 1885# digraph() 1886# traverse() 1887# 1888# The following two functions are used to compute set valued functions 1889# of the form: 1890# 1891# F(x) = F'(x) U U{F(y) | x R y} 1892# 1893# This is used to compute the values of Read() sets as well as FOLLOW sets 1894# in LALR(1) generation. 1895# 1896# Inputs: X - An input set 1897# R - A relation 1898# FP - Set-valued function 1899# ------------------------------------------------------------------------------ 1900 1901def digraph(X,R,FP): 1902 N = { } 1903 for x in X: 1904 N[x] = 0 1905 stack = [] 1906 F = { } 1907 for x in X: 1908 if N[x] == 0: traverse(x,N,stack,F,X,R,FP) 1909 return F 1910 1911def traverse(x,N,stack,F,X,R,FP): 1912 stack.append(x) 1913 d = len(stack) 1914 N[x] = d 1915 F[x] = FP(x) # F(X) <- F'(x) 1916 1917 rel = R(x) # Get y's related to x 1918 for y in rel: 1919 if N[y] == 0: 1920 traverse(y,N,stack,F,X,R,FP) 1921 N[x] = min(N[x],N[y]) 1922 for a in F.get(y,[]): 1923 if a not in F[x]: F[x].append(a) 1924 if N[x] == d: 1925 N[stack[-1]] = MAXINT 1926 F[stack[-1]] = F[x] 1927 element = stack.pop() 1928 while element != x: 1929 N[stack[-1]] = MAXINT 1930 F[stack[-1]] = F[x] 1931 element = stack.pop() 1932 1933class LALRError(YaccError): pass 1934 1935# ----------------------------------------------------------------------------- 1936# == LRGeneratedTable == 1937# 1938# This class implements the LR table generation algorithm. There are no 1939# public methods except for write() 1940# ----------------------------------------------------------------------------- 1941 1942class LRGeneratedTable(LRTable): 1943 def __init__(self,grammar,method='LALR',log=None): 1944 if method not in ['SLR','LALR']: 1945 raise LALRError("Unsupported method %s" % method) 1946 1947 self.grammar = grammar 1948 self.lr_method = method 1949 1950 # Set up the logger 1951 if not log: 1952 log = NullLogger() 1953 self.log = log 1954 1955 # Internal attributes 1956 self.lr_action = {} # Action table 1957 self.lr_goto = {} # Goto table 1958 self.lr_productions = grammar.Productions # Copy of grammar Production array 1959 self.lr_goto_cache = {} # Cache of computed gotos 1960 self.lr0_cidhash = {} # Cache of closures 1961 1962 self._add_count = 0 # Internal counter used to detect cycles 1963 1964 # Diagonistic information filled in by the table generator 1965 self.sr_conflict = 0 1966 self.rr_conflict = 0 1967 self.conflicts = [] # List of conflicts 1968 1969 self.sr_conflicts = [] 1970 self.rr_conflicts = [] 1971 1972 # Build the tables 1973 self.grammar.build_lritems() 1974 self.grammar.compute_first() 1975 self.grammar.compute_follow() 1976 self.lr_parse_table() 1977 1978 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 1979 1980 def lr0_closure(self,I): 1981 self._add_count += 1 1982 1983 # Add everything in I to J 1984 J = I[:] 1985 didadd = 1 1986 while didadd: 1987 didadd = 0 1988 for j in J: 1989 for x in j.lr_after: 1990 if getattr(x,"lr0_added",0) == self._add_count: continue 1991 # Add B --> .G to J 1992 J.append(x.lr_next) 1993 x.lr0_added = self._add_count 1994 didadd = 1 1995 1996 return J 1997 1998 # Compute the LR(0) goto function goto(I,X) where I is a set 1999 # of LR(0) items and X is a grammar symbol. This function is written 2000 # in a way that guarantees uniqueness of the generated goto sets 2001 # (i.e. the same goto set will never be returned as two different Python 2002 # objects). With uniqueness, we can later do fast set comparisons using 2003 # id(obj) instead of element-wise comparison. 2004 2005 def lr0_goto(self,I,x): 2006 # First we look for a previously cached entry 2007 g = self.lr_goto_cache.get((id(I),x),None) 2008 if g: return g 2009 2010 # Now we generate the goto set in a way that guarantees uniqueness 2011 # of the result 2012 2013 s = self.lr_goto_cache.get(x,None) 2014 if not s: 2015 s = { } 2016 self.lr_goto_cache[x] = s 2017 2018 gs = [ ] 2019 for p in I: 2020 n = p.lr_next 2021 if n and n.lr_before == x: 2022 s1 = s.get(id(n),None) 2023 if not s1: 2024 s1 = { } 2025 s[id(n)] = s1 2026 gs.append(n) 2027 s = s1 2028 g = s.get('$end',None) 2029 if not g: 2030 if gs: 2031 g = self.lr0_closure(gs) 2032 s['$end'] = g 2033 else: 2034 s['$end'] = gs 2035 self.lr_goto_cache[(id(I),x)] = g 2036 return g 2037 2038 # Compute the LR(0) sets of item function 2039 def lr0_items(self): 2040 2041 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] 2042 i = 0 2043 for I in C: 2044 self.lr0_cidhash[id(I)] = i 2045 i += 1 2046 2047 # Loop over the items in C and each grammar symbols 2048 i = 0 2049 while i < len(C): 2050 I = C[i] 2051 i += 1 2052 2053 # Collect all of the symbols that could possibly be in the goto(I,X) sets 2054 asyms = { } 2055 for ii in I: 2056 for s in ii.usyms: 2057 asyms[s] = None 2058 2059 for x in asyms: 2060 g = self.lr0_goto(I,x) 2061 if not g: continue 2062 if id(g) in self.lr0_cidhash: continue 2063 self.lr0_cidhash[id(g)] = len(C) 2064 C.append(g) 2065 2066 return C 2067 2068 # ----------------------------------------------------------------------------- 2069 # ==== LALR(1) Parsing ==== 2070 # 2071 # LALR(1) parsing is almost exactly the same as SLR except that instead of 2072 # relying upon Follow() sets when performing reductions, a more selective 2073 # lookahead set that incorporates the state of the LR(0) machine is utilized. 2074 # Thus, we mainly just have to focus on calculating the lookahead sets. 2075 # 2076 # The method used here is due to DeRemer and Pennelo (1982). 2077 # 2078 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 2079 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 2080 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 2081 # 2082 # Further details can also be found in: 2083 # 2084 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 2085 # McGraw-Hill Book Company, (1985). 2086 # 2087 # ----------------------------------------------------------------------------- 2088 2089 # ----------------------------------------------------------------------------- 2090 # compute_nullable_nonterminals() 2091 # 2092 # Creates a dictionary containing all of the non-terminals that might produce 2093 # an empty production. 2094 # ----------------------------------------------------------------------------- 2095 2096 def compute_nullable_nonterminals(self): 2097 nullable = {} 2098 num_nullable = 0 2099 while 1: 2100 for p in self.grammar.Productions[1:]: 2101 if p.len == 0: 2102 nullable[p.name] = 1 2103 continue 2104 for t in p.prod: 2105 if not t in nullable: break 2106 else: 2107 nullable[p.name] = 1 2108 if len(nullable) == num_nullable: break 2109 num_nullable = len(nullable) 2110 return nullable 2111 2112 # ----------------------------------------------------------------------------- 2113 # find_nonterminal_trans(C) 2114 # 2115 # Given a set of LR(0) items, this functions finds all of the non-terminal 2116 # transitions. These are transitions in which a dot appears immediately before 2117 # a non-terminal. Returns a list of tuples of the form (state,N) where state 2118 # is the state number and N is the nonterminal symbol. 2119 # 2120 # The input C is the set of LR(0) items. 2121 # ----------------------------------------------------------------------------- 2122 2123 def find_nonterminal_transitions(self,C): 2124 trans = [] 2125 for state in range(len(C)): 2126 for p in C[state]: 2127 if p.lr_index < p.len - 1: 2128 t = (state,p.prod[p.lr_index+1]) 2129 if t[1] in self.grammar.Nonterminals: 2130 if t not in trans: trans.append(t) 2131 state = state + 1 2132 return trans 2133 2134 # ----------------------------------------------------------------------------- 2135 # dr_relation() 2136 # 2137 # Computes the DR(p,A) relationships for non-terminal transitions. The input 2138 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 2139 # 2140 # Returns a list of terminals. 2141 # ----------------------------------------------------------------------------- 2142 2143 def dr_relation(self,C,trans,nullable): 2144 dr_set = { } 2145 state,N = trans 2146 terms = [] 2147 2148 g = self.lr0_goto(C[state],N) 2149 for p in g: 2150 if p.lr_index < p.len - 1: 2151 a = p.prod[p.lr_index+1] 2152 if a in self.grammar.Terminals: 2153 if a not in terms: terms.append(a) 2154 2155 # This extra bit is to handle the start state 2156 if state == 0 and N == self.grammar.Productions[0].prod[0]: 2157 terms.append('$end') 2158 2159 return terms 2160 2161 # ----------------------------------------------------------------------------- 2162 # reads_relation() 2163 # 2164 # Computes the READS() relation (p,A) READS (t,C). 2165 # ----------------------------------------------------------------------------- 2166 2167 def reads_relation(self,C, trans, empty): 2168 # Look for empty transitions 2169 rel = [] 2170 state, N = trans 2171 2172 g = self.lr0_goto(C[state],N) 2173 j = self.lr0_cidhash.get(id(g),-1) 2174 for p in g: 2175 if p.lr_index < p.len - 1: 2176 a = p.prod[p.lr_index + 1] 2177 if a in empty: 2178 rel.append((j,a)) 2179 2180 return rel 2181 2182 # ----------------------------------------------------------------------------- 2183 # compute_lookback_includes() 2184 # 2185 # Determines the lookback and includes relations 2186 # 2187 # LOOKBACK: 2188 # 2189 # This relation is determined by running the LR(0) state machine forward. 2190 # For example, starting with a production "N : . A B C", we run it forward 2191 # to obtain "N : A B C ." We then build a relationship between this final 2192 # state and the starting state. These relationships are stored in a dictionary 2193 # lookdict. 2194 # 2195 # INCLUDES: 2196 # 2197 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 2198 # 2199 # This relation is used to determine non-terminal transitions that occur 2200 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 2201 # if the following holds: 2202 # 2203 # B -> LAT, where T -> epsilon and p' -L-> p 2204 # 2205 # L is essentially a prefix (which may be empty), T is a suffix that must be 2206 # able to derive an empty string. State p' must lead to state p with the string L. 2207 # 2208 # ----------------------------------------------------------------------------- 2209 2210 def compute_lookback_includes(self,C,trans,nullable): 2211 2212 lookdict = {} # Dictionary of lookback relations 2213 includedict = {} # Dictionary of include relations 2214 2215 # Make a dictionary of non-terminal transitions 2216 dtrans = {} 2217 for t in trans: 2218 dtrans[t] = 1 2219 2220 # Loop over all transitions and compute lookbacks and includes 2221 for state,N in trans: 2222 lookb = [] 2223 includes = [] 2224 for p in C[state]: 2225 if p.name != N: continue 2226 2227 # Okay, we have a name match. We now follow the production all the way 2228 # through the state machine until we get the . on the right hand side 2229 2230 lr_index = p.lr_index 2231 j = state 2232 while lr_index < p.len - 1: 2233 lr_index = lr_index + 1 2234 t = p.prod[lr_index] 2235 2236 # Check to see if this symbol and state are a non-terminal transition 2237 if (j,t) in dtrans: 2238 # Yes. Okay, there is some chance that this is an includes relation 2239 # the only way to know for certain is whether the rest of the 2240 # production derives empty 2241 2242 li = lr_index + 1 2243 while li < p.len: 2244 if p.prod[li] in self.grammar.Terminals: break # No forget it 2245 if not p.prod[li] in nullable: break 2246 li = li + 1 2247 else: 2248 # Appears to be a relation between (j,t) and (state,N) 2249 includes.append((j,t)) 2250 2251 g = self.lr0_goto(C[j],t) # Go to next set 2252 j = self.lr0_cidhash.get(id(g),-1) # Go to next state 2253 2254 # When we get here, j is the final state, now we have to locate the production 2255 for r in C[j]: 2256 if r.name != p.name: continue 2257 if r.len != p.len: continue 2258 i = 0 2259 # This look is comparing a production ". A B C" with "A B C ." 2260 while i < r.lr_index: 2261 if r.prod[i] != p.prod[i+1]: break 2262 i = i + 1 2263 else: 2264 lookb.append((j,r)) 2265 for i in includes: 2266 if not i in includedict: includedict[i] = [] 2267 includedict[i].append((state,N)) 2268 lookdict[(state,N)] = lookb 2269 2270 return lookdict,includedict 2271 2272 # ----------------------------------------------------------------------------- 2273 # compute_read_sets() 2274 # 2275 # Given a set of LR(0) items, this function computes the read sets. 2276 # 2277 # Inputs: C = Set of LR(0) items 2278 # ntrans = Set of nonterminal transitions 2279 # nullable = Set of empty transitions 2280 # 2281 # Returns a set containing the read sets 2282 # ----------------------------------------------------------------------------- 2283 2284 def compute_read_sets(self,C, ntrans, nullable): 2285 FP = lambda x: self.dr_relation(C,x,nullable) 2286 R = lambda x: self.reads_relation(C,x,nullable) 2287 F = digraph(ntrans,R,FP) 2288 return F 2289 2290 # ----------------------------------------------------------------------------- 2291 # compute_follow_sets() 2292 # 2293 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 2294 # and an include set, this function computes the follow sets 2295 # 2296 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 2297 # 2298 # Inputs: 2299 # ntrans = Set of nonterminal transitions 2300 # readsets = Readset (previously computed) 2301 # inclsets = Include sets (previously computed) 2302 # 2303 # Returns a set containing the follow sets 2304 # ----------------------------------------------------------------------------- 2305 2306 def compute_follow_sets(self,ntrans,readsets,inclsets): 2307 FP = lambda x: readsets[x] 2308 R = lambda x: inclsets.get(x,[]) 2309 F = digraph(ntrans,R,FP) 2310 return F 2311 2312 # ----------------------------------------------------------------------------- 2313 # add_lookaheads() 2314 # 2315 # Attaches the lookahead symbols to grammar rules. 2316 # 2317 # Inputs: lookbacks - Set of lookback relations 2318 # followset - Computed follow set 2319 # 2320 # This function directly attaches the lookaheads to productions contained 2321 # in the lookbacks set 2322 # ----------------------------------------------------------------------------- 2323 2324 def add_lookaheads(self,lookbacks,followset): 2325 for trans,lb in lookbacks.items(): 2326 # Loop over productions in lookback 2327 for state,p in lb: 2328 if not state in p.lookaheads: 2329 p.lookaheads[state] = [] 2330 f = followset.get(trans,[]) 2331 for a in f: 2332 if a not in p.lookaheads[state]: p.lookaheads[state].append(a) 2333 2334 # ----------------------------------------------------------------------------- 2335 # add_lalr_lookaheads() 2336 # 2337 # This function does all of the work of adding lookahead information for use 2338 # with LALR parsing 2339 # ----------------------------------------------------------------------------- 2340 2341 def add_lalr_lookaheads(self,C): 2342 # Determine all of the nullable nonterminals 2343 nullable = self.compute_nullable_nonterminals() 2344 2345 # Find all non-terminal transitions 2346 trans = self.find_nonterminal_transitions(C) 2347 2348 # Compute read sets 2349 readsets = self.compute_read_sets(C,trans,nullable) 2350 2351 # Compute lookback/includes relations 2352 lookd, included = self.compute_lookback_includes(C,trans,nullable) 2353 2354 # Compute LALR FOLLOW sets 2355 followsets = self.compute_follow_sets(trans,readsets,included) 2356 2357 # Add all of the lookaheads 2358 self.add_lookaheads(lookd,followsets) 2359 2360 # ----------------------------------------------------------------------------- 2361 # lr_parse_table() 2362 # 2363 # This function constructs the parse tables for SLR or LALR 2364 # ----------------------------------------------------------------------------- 2365 def lr_parse_table(self): 2366 Productions = self.grammar.Productions 2367 Precedence = self.grammar.Precedence 2368 goto = self.lr_goto # Goto array 2369 action = self.lr_action # Action array 2370 log = self.log # Logger for output 2371 2372 actionp = { } # Action production array (temporary) 2373 2374 log.info("Parsing method: %s", self.lr_method) 2375 2376 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 2377 # This determines the number of states 2378 2379 C = self.lr0_items() 2380 2381 if self.lr_method == 'LALR': 2382 self.add_lalr_lookaheads(C) 2383 2384 # Build the parser table, state by state 2385 st = 0 2386 for I in C: 2387 # Loop over each production in I 2388 actlist = [ ] # List of actions 2389 st_action = { } 2390 st_actionp = { } 2391 st_goto = { } 2392 log.info("") 2393 log.info("state %d", st) 2394 log.info("") 2395 for p in I: 2396 log.info(" (%d) %s", p.number, str(p)) 2397 log.info("") 2398 2399 for p in I: 2400 if p.len == p.lr_index + 1: 2401 if p.name == "S'": 2402 # Start symbol. Accept! 2403 st_action["$end"] = 0 2404 st_actionp["$end"] = p 2405 else: 2406 # We are at the end of a production. Reduce! 2407 if self.lr_method == 'LALR': 2408 laheads = p.lookaheads[st] 2409 else: 2410 laheads = self.grammar.Follow[p.name] 2411 for a in laheads: 2412 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) 2413 r = st_action.get(a,None) 2414 if r is not None: 2415 # Whoa. Have a shift/reduce or reduce/reduce conflict 2416 if r > 0: 2417 # Need to decide on shift or reduce here 2418 # By default we favor shifting. Need to add 2419 # some precedence rules here. 2420 sprec,slevel = Productions[st_actionp[a].number].prec 2421 rprec,rlevel = Precedence.get(a,('right',0)) 2422 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 2423 # We really need to reduce here. 2424 st_action[a] = -p.number 2425 st_actionp[a] = p 2426 if not slevel and not rlevel: 2427 log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 2428 self.sr_conflicts.append((st,a,'reduce')) 2429 Productions[p.number].reduced += 1 2430 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2431 st_action[a] = None 2432 else: 2433 # Hmmm. Guess we'll keep the shift 2434 if not rlevel: 2435 log.info(" ! shift/reduce conflict for %s resolved as shift",a) 2436 self.sr_conflicts.append((st,a,'shift')) 2437 elif r < 0: 2438 # Reduce/reduce conflict. In this case, we favor the rule 2439 # that was defined first in the grammar file 2440 oldp = Productions[-r] 2441 pp = Productions[p.number] 2442 if oldp.line > pp.line: 2443 st_action[a] = -p.number 2444 st_actionp[a] = p 2445 chosenp,rejectp = pp,oldp 2446 Productions[p.number].reduced += 1 2447 Productions[oldp.number].reduced -= 1 2448 else: 2449 chosenp,rejectp = oldp,pp 2450 self.rr_conflicts.append((st,chosenp,rejectp)) 2451 log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) 2452 else: 2453 raise LALRError("Unknown conflict in state %d" % st) 2454 else: 2455 st_action[a] = -p.number 2456 st_actionp[a] = p 2457 Productions[p.number].reduced += 1 2458 else: 2459 i = p.lr_index 2460 a = p.prod[i+1] # Get symbol right after the "." 2461 if a in self.grammar.Terminals: 2462 g = self.lr0_goto(I,a) 2463 j = self.lr0_cidhash.get(id(g),-1) 2464 if j >= 0: 2465 # We are in a shift state 2466 actlist.append((a,p,"shift and go to state %d" % j)) 2467 r = st_action.get(a,None) 2468 if r is not None: 2469 # Whoa have a shift/reduce or shift/shift conflict 2470 if r > 0: 2471 if r != j: 2472 raise LALRError("Shift/shift conflict in state %d" % st) 2473 elif r < 0: 2474 # Do a precedence check. 2475 # - if precedence of reduce rule is higher, we reduce. 2476 # - if precedence of reduce is same and left assoc, we reduce. 2477 # - otherwise we shift 2478 rprec,rlevel = Productions[st_actionp[a].number].prec 2479 sprec,slevel = Precedence.get(a,('right',0)) 2480 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 2481 # We decide to shift here... highest precedence to shift 2482 Productions[st_actionp[a].number].reduced -= 1 2483 st_action[a] = j 2484 st_actionp[a] = p 2485 if not rlevel: 2486 log.info(" ! shift/reduce conflict for %s resolved as shift",a) 2487 self.sr_conflicts.append((st,a,'shift')) 2488 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2489 st_action[a] = None 2490 else: 2491 # Hmmm. Guess we'll keep the reduce 2492 if not slevel and not rlevel: 2493 log.info(" ! shift/reduce conflict for %s resolved as reduce",a) 2494 self.sr_conflicts.append((st,a,'reduce')) 2495 2496 else: 2497 raise LALRError("Unknown conflict in state %d" % st) 2498 else: 2499 st_action[a] = j 2500 st_actionp[a] = p 2501 2502 # Print the actions associated with each terminal 2503 _actprint = { } 2504 for a,p,m in actlist: 2505 if a in st_action: 2506 if p is st_actionp[a]: 2507 log.info(" %-15s %s",a,m) 2508 _actprint[(a,m)] = 1 2509 log.info("") 2510 # Print the actions that were not used. (debugging) 2511 not_used = 0 2512 for a,p,m in actlist: 2513 if a in st_action: 2514 if p is not st_actionp[a]: 2515 if not (a,m) in _actprint: 2516 log.debug(" ! %-15s [ %s ]",a,m) 2517 not_used = 1 2518 _actprint[(a,m)] = 1 2519 if not_used: 2520 log.debug("") 2521 2522 # Construct the goto table for this state 2523 2524 nkeys = { } 2525 for ii in I: 2526 for s in ii.usyms: 2527 if s in self.grammar.Nonterminals: 2528 nkeys[s] = None 2529 for n in nkeys: 2530 g = self.lr0_goto(I,n) 2531 j = self.lr0_cidhash.get(id(g),-1) 2532 if j >= 0: 2533 st_goto[n] = j 2534 log.info(" %-30s shift and go to state %d",n,j) 2535 2536 action[st] = st_action 2537 actionp[st] = st_actionp 2538 goto[st] = st_goto 2539 st += 1 2540 2541 2542 # ----------------------------------------------------------------------------- 2543 # write() 2544 # 2545 # This function writes the LR parsing tables to a file 2546 # ----------------------------------------------------------------------------- 2547 2548 def write_table(self,modulename,outputdir='',signature=""): 2549 basemodulename = modulename.split(".")[-1] 2550 filename = os.path.join(outputdir,basemodulename) + ".py" 2551 try: 2552 f = open(filename,"w") 2553 2554 f.write(""" 2555# %s 2556# This file is automatically generated. Do not edit. 2557_tabversion = %r 2558 2559_lr_method = %r 2560 2561_lr_signature = %r 2562 """ % (filename, __tabversion__, self.lr_method, signature)) 2563 2564 # Change smaller to 0 to go back to original tables 2565 smaller = 1 2566 2567 # Factor out names to try and make smaller 2568 if smaller: 2569 items = { } 2570 2571 for s,nd in self.lr_action.items(): 2572 for name,v in nd.items(): 2573 i = items.get(name) 2574 if not i: 2575 i = ([],[]) 2576 items[name] = i 2577 i[0].append(s) 2578 i[1].append(v) 2579 2580 f.write("\n_lr_action_items = {") 2581 for k,v in items.items(): 2582 f.write("%r:([" % k) 2583 for i in v[0]: 2584 f.write("%r," % i) 2585 f.write("],[") 2586 for i in v[1]: 2587 f.write("%r," % i) 2588 2589 f.write("]),") 2590 f.write("}\n") 2591 2592 f.write(""" 2593_lr_action = { } 2594for _k, _v in _lr_action_items.items(): 2595 for _x,_y in zip(_v[0],_v[1]): 2596 if not _x in _lr_action: _lr_action[_x] = { } 2597 _lr_action[_x][_k] = _y 2598del _lr_action_items 2599""") 2600 2601 else: 2602 f.write("\n_lr_action = { "); 2603 for k,v in self.lr_action.items(): 2604 f.write("(%r,%r):%r," % (k[0],k[1],v)) 2605 f.write("}\n"); 2606 2607 if smaller: 2608 # Factor out names to try and make smaller 2609 items = { } 2610 2611 for s,nd in self.lr_goto.items(): 2612 for name,v in nd.items(): 2613 i = items.get(name) 2614 if not i: 2615 i = ([],[]) 2616 items[name] = i 2617 i[0].append(s) 2618 i[1].append(v) 2619 2620 f.write("\n_lr_goto_items = {") 2621 for k,v in items.items(): 2622 f.write("%r:([" % k) 2623 for i in v[0]: 2624 f.write("%r," % i) 2625 f.write("],[") 2626 for i in v[1]: 2627 f.write("%r," % i) 2628 2629 f.write("]),") 2630 f.write("}\n") 2631 2632 f.write(""" 2633_lr_goto = { } 2634for _k, _v in _lr_goto_items.items(): 2635 for _x,_y in zip(_v[0],_v[1]): 2636 if not _x in _lr_goto: _lr_goto[_x] = { } 2637 _lr_goto[_x][_k] = _y 2638del _lr_goto_items 2639""") 2640 else: 2641 f.write("\n_lr_goto = { "); 2642 for k,v in self.lr_goto.items(): 2643 f.write("(%r,%r):%r," % (k[0],k[1],v)) 2644 f.write("}\n"); 2645 2646 # Write production table 2647 f.write("_lr_productions = [\n") 2648 for p in self.lr_productions: 2649 if p.func: 2650 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) 2651 else: 2652 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) 2653 f.write("]\n") 2654 f.close() 2655 2656 except IOError: 2657 e = sys.exc_info()[1] 2658 sys.stderr.write("Unable to create '%s'\n" % filename) 2659 sys.stderr.write(str(e)+"\n") 2660 return 2661 2662 2663 # ----------------------------------------------------------------------------- 2664 # pickle_table() 2665 # 2666 # This function pickles the LR parsing tables to a supplied file object 2667 # ----------------------------------------------------------------------------- 2668 2669 def pickle_table(self,filename,signature=""): 2670 try: 2671 import cPickle as pickle 2672 except ImportError: 2673 import pickle 2674 outf = open(filename,"wb") 2675 pickle.dump(__tabversion__,outf,pickle_protocol) 2676 pickle.dump(self.lr_method,outf,pickle_protocol) 2677 pickle.dump(signature,outf,pickle_protocol) 2678 pickle.dump(self.lr_action,outf,pickle_protocol) 2679 pickle.dump(self.lr_goto,outf,pickle_protocol) 2680 2681 outp = [] 2682 for p in self.lr_productions: 2683 if p.func: 2684 outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) 2685 else: 2686 outp.append((str(p),p.name,p.len,None,None,None)) 2687 pickle.dump(outp,outf,pickle_protocol) 2688 outf.close() 2689 2690# ----------------------------------------------------------------------------- 2691# === INTROSPECTION === 2692# 2693# The following functions and classes are used to implement the PLY 2694# introspection features followed by the yacc() function itself. 2695# ----------------------------------------------------------------------------- 2696 2697# ----------------------------------------------------------------------------- 2698# get_caller_module_dict() 2699# 2700# This function returns a dictionary containing all of the symbols defined within 2701# a caller further down the call stack. This is used to get the environment 2702# associated with the yacc() call if none was provided. 2703# ----------------------------------------------------------------------------- 2704 2705def get_caller_module_dict(levels): 2706 try: 2707 raise RuntimeError 2708 except RuntimeError: 2709 e,b,t = sys.exc_info() 2710 f = t.tb_frame 2711 while levels > 0: 2712 f = f.f_back 2713 levels -= 1 2714 ldict = f.f_globals.copy() 2715 if f.f_globals != f.f_locals: 2716 ldict.update(f.f_locals) 2717 2718 return ldict 2719 2720# ----------------------------------------------------------------------------- 2721# parse_grammar() 2722# 2723# This takes a raw grammar rule string and parses it into production data 2724# ----------------------------------------------------------------------------- 2725def parse_grammar(doc,file,line): 2726 grammar = [] 2727 # Split the doc string into lines 2728 pstrings = doc.splitlines() 2729 lastp = None 2730 dline = line 2731 for ps in pstrings: 2732 dline += 1 2733 p = ps.split() 2734 if not p: continue 2735 try: 2736 if p[0] == '|': 2737 # This is a continuation of a previous rule 2738 if not lastp: 2739 raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) 2740 prodname = lastp 2741 syms = p[1:] 2742 else: 2743 prodname = p[0] 2744 lastp = prodname 2745 syms = p[2:] 2746 assign = p[1] 2747 if assign != ':' and assign != '::=': 2748 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) 2749 2750 grammar.append((file,dline,prodname,syms)) 2751 except SyntaxError: 2752 raise 2753 except Exception: 2754 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) 2755 2756 return grammar 2757 2758# ----------------------------------------------------------------------------- 2759# ParserReflect() 2760# 2761# This class represents information extracted for building a parser including 2762# start symbol, error function, tokens, precedence list, action functions, 2763# etc. 2764# ----------------------------------------------------------------------------- 2765class ParserReflect(object): 2766 def __init__(self,pdict,log=None): 2767 self.pdict = pdict 2768 self.start = None 2769 self.error_func = None 2770 self.tokens = None 2771 self.files = {} 2772 self.grammar = [] 2773 self.error = 0 2774 2775 if log is None: 2776 self.log = PlyLogger(sys.stderr) 2777 else: 2778 self.log = log 2779 2780 # Get all of the basic information 2781 def get_all(self): 2782 self.get_start() 2783 self.get_error_func() 2784 self.get_tokens() 2785 self.get_precedence() 2786 self.get_pfunctions() 2787 2788 # Validate all of the information 2789 def validate_all(self): 2790 self.validate_start() 2791 self.validate_error_func() 2792 self.validate_tokens() 2793 self.validate_precedence() 2794 self.validate_pfunctions() 2795 self.validate_files() 2796 return self.error 2797 2798 # Compute a signature over the grammar 2799 def signature(self): 2800 try: 2801 from hashlib import md5 2802 except ImportError: 2803 from md5 import md5 2804 try: 2805 sig = md5() 2806 if self.start: 2807 sig.update(self.start.encode('latin-1')) 2808 if self.prec: 2809 sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) 2810 if self.tokens: 2811 sig.update(" ".join(self.tokens).encode('latin-1')) 2812 for f in self.pfuncs: 2813 if f[3]: 2814 sig.update(f[3].encode('latin-1')) 2815 except (TypeError,ValueError): 2816 pass 2817 return sig.digest() 2818 2819 # ----------------------------------------------------------------------------- 2820 # validate_file() 2821 # 2822 # This method checks to see if there are duplicated p_rulename() functions 2823 # in the parser module file. Without this function, it is really easy for 2824 # users to make mistakes by cutting and pasting code fragments (and it's a real 2825 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 2826 # we just do a little regular expression pattern matching of def statements 2827 # to try and detect duplicates. 2828 # ----------------------------------------------------------------------------- 2829 2830 def validate_files(self): 2831 # Match def p_funcname( 2832 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 2833 2834 for filename in self.files.keys(): 2835 base,ext = os.path.splitext(filename) 2836 if ext != '.py': return 1 # No idea. Assume it's okay. 2837 2838 try: 2839 f = open(filename) 2840 lines = f.readlines() 2841 f.close() 2842 except IOError: 2843 continue 2844 2845 counthash = { } 2846 for linen,l in enumerate(lines): 2847 linen += 1 2848 m = fre.match(l) 2849 if m: 2850 name = m.group(1) 2851 prev = counthash.get(name) 2852 if not prev: 2853 counthash[name] = linen 2854 else: 2855 self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) 2856 2857 # Get the start symbol 2858 def get_start(self): 2859 self.start = self.pdict.get('start') 2860 2861 # Validate the start symbol 2862 def validate_start(self): 2863 if self.start is not None: 2864 if not isinstance(self.start,str): 2865 self.log.error("'start' must be a string") 2866 2867 # Look for error handler 2868 def get_error_func(self): 2869 self.error_func = self.pdict.get('p_error') 2870 2871 # Validate the error function 2872 def validate_error_func(self): 2873 if self.error_func: 2874 if isinstance(self.error_func,types.FunctionType): 2875 ismethod = 0 2876 elif isinstance(self.error_func, types.MethodType): 2877 ismethod = 1 2878 else: 2879 self.log.error("'p_error' defined, but is not a function or method") 2880 self.error = 1 2881 return 2882 2883 eline = func_code(self.error_func).co_firstlineno 2884 efile = func_code(self.error_func).co_filename 2885 self.files[efile] = 1 2886 2887 if (func_code(self.error_func).co_argcount != 1+ismethod): 2888 self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) 2889 self.error = 1 2890 2891 # Get the tokens map 2892 def get_tokens(self): 2893 tokens = self.pdict.get("tokens",None) 2894 if not tokens: 2895 self.log.error("No token list is defined") 2896 self.error = 1 2897 return 2898 2899 if not isinstance(tokens,(list, tuple)): 2900 self.log.error("tokens must be a list or tuple") 2901 self.error = 1 2902 return 2903 2904 if not tokens: 2905 self.log.error("tokens is empty") 2906 self.error = 1 2907 return 2908 2909 self.tokens = tokens 2910 2911 # Validate the tokens 2912 def validate_tokens(self): 2913 # Validate the tokens. 2914 if 'error' in self.tokens: 2915 self.log.error("Illegal token name 'error'. Is a reserved word") 2916 self.error = 1 2917 return 2918 2919 terminals = {} 2920 for n in self.tokens: 2921 if n in terminals: 2922 self.log.warning("Token '%s' multiply defined", n) 2923 terminals[n] = 1 2924 2925 # Get the precedence map (if any) 2926 def get_precedence(self): 2927 self.prec = self.pdict.get("precedence",None) 2928 2929 # Validate and parse the precedence map 2930 def validate_precedence(self): 2931 preclist = [] 2932 if self.prec: 2933 if not isinstance(self.prec,(list,tuple)): 2934 self.log.error("precedence must be a list or tuple") 2935 self.error = 1 2936 return 2937 for level,p in enumerate(self.prec): 2938 if not isinstance(p,(list,tuple)): 2939 self.log.error("Bad precedence table") 2940 self.error = 1 2941 return 2942 2943 if len(p) < 2: 2944 self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) 2945 self.error = 1 2946 return 2947 assoc = p[0] 2948 if not isinstance(assoc,str): 2949 self.log.error("precedence associativity must be a string") 2950 self.error = 1 2951 return 2952 for term in p[1:]: 2953 if not isinstance(term,str): 2954 self.log.error("precedence items must be strings") 2955 self.error = 1 2956 return 2957 preclist.append((term,assoc,level+1)) 2958 self.preclist = preclist 2959 2960 # Get all p_functions from the grammar 2961 def get_pfunctions(self): 2962 p_functions = [] 2963 for name, item in self.pdict.items(): 2964 if name[:2] != 'p_': continue 2965 if name == 'p_error': continue 2966 if isinstance(item,(types.FunctionType,types.MethodType)): 2967 line = func_code(item).co_firstlineno 2968 file = func_code(item).co_filename 2969 p_functions.append((line,file,name,item.__doc__)) 2970 2971 # Sort all of the actions by line number 2972 p_functions.sort() 2973 self.pfuncs = p_functions 2974 2975 2976 # Validate all of the p_functions 2977 def validate_pfunctions(self): 2978 grammar = [] 2979 # Check for non-empty symbols 2980 if len(self.pfuncs) == 0: 2981 self.log.error("no rules of the form p_rulename are defined") 2982 self.error = 1 2983 return 2984 2985 for line, file, name, doc in self.pfuncs: 2986 func = self.pdict[name] 2987 if isinstance(func, types.MethodType): 2988 reqargs = 2 2989 else: 2990 reqargs = 1 2991 if func_code(func).co_argcount > reqargs: 2992 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) 2993 self.error = 1 2994 elif func_code(func).co_argcount < reqargs: 2995 self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) 2996 self.error = 1 2997 elif not func.__doc__: 2998 self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) 2999 else: 3000 try: 3001 parsed_g = parse_grammar(doc,file,line) 3002 for g in parsed_g: 3003 grammar.append((name, g)) 3004 except SyntaxError: 3005 e = sys.exc_info()[1] 3006 self.log.error(str(e)) 3007 self.error = 1 3008 3009 # Looks like a valid grammar rule 3010 # Mark the file in which defined. 3011 self.files[file] = 1 3012 3013 # Secondary validation step that looks for p_ definitions that are not functions 3014 # or functions that look like they might be grammar rules. 3015 3016 for n,v in self.pdict.items(): 3017 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue 3018 if n[0:2] == 't_': continue 3019 if n[0:2] == 'p_' and n != 'p_error': 3020 self.log.warning("'%s' not defined as a function", n) 3021 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or 3022 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): 3023 try: 3024 doc = v.__doc__.split(" ") 3025 if doc[1] == ':': 3026 self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", 3027 func_code(v).co_filename, func_code(v).co_firstlineno,n) 3028 except Exception: 3029 pass 3030 3031 self.grammar = grammar 3032 3033# ----------------------------------------------------------------------------- 3034# yacc(module) 3035# 3036# Build a parser 3037# ----------------------------------------------------------------------------- 3038 3039def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 3040 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', 3041 debuglog=None, errorlog = None, picklefile=None): 3042 3043 global parse # Reference to the parsing method of the last built parser 3044 3045 # If pickling is enabled, table files are not created 3046 3047 if picklefile: 3048 write_tables = 0 3049 3050 if errorlog is None: 3051 errorlog = PlyLogger(sys.stderr) 3052 3053 # Get the module dictionary used for the parser 3054 if module: 3055 _items = [(k,getattr(module,k)) for k in dir(module)] 3056 pdict = dict(_items) 3057 else: 3058 pdict = get_caller_module_dict(2) 3059 3060 # Collect parser information from the dictionary 3061 pinfo = ParserReflect(pdict,log=errorlog) 3062 pinfo.get_all() 3063 3064 if pinfo.error: 3065 raise YaccError("Unable to build parser") 3066 3067 # Check signature against table files (if any) 3068 signature = pinfo.signature() 3069 3070 # Read the tables 3071 try: 3072 lr = LRTable() 3073 if picklefile: 3074 read_signature = lr.read_pickle(picklefile) 3075 else: 3076 read_signature = lr.read_table(tabmodule) 3077 if optimize or (read_signature == signature): 3078 try: 3079 lr.bind_callables(pinfo.pdict) 3080 parser = LRParser(lr,pinfo.error_func) 3081 parse = parser.parse 3082 return parser 3083 except Exception: 3084 e = sys.exc_info()[1] 3085 errorlog.warning("There was a problem loading the table file: %s", repr(e)) 3086 except VersionError: 3087 e = sys.exc_info() 3088 errorlog.warning(str(e)) 3089 except Exception: 3090 pass 3091 3092 if debuglog is None: 3093 if debug: 3094 debuglog = PlyLogger(open(debugfile,"w")) 3095 else: 3096 debuglog = NullLogger() 3097 3098 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) 3099 3100 3101 errors = 0 3102 3103 # Validate the parser information 3104 if pinfo.validate_all(): 3105 raise YaccError("Unable to build parser") 3106 3107 if not pinfo.error_func: 3108 errorlog.warning("no p_error() function is defined") 3109 3110 # Create a grammar object 3111 grammar = Grammar(pinfo.tokens) 3112 3113 # Set precedence level for terminals 3114 for term, assoc, level in pinfo.preclist: 3115 try: 3116 grammar.set_precedence(term,assoc,level) 3117 except GrammarError: 3118 e = sys.exc_info()[1] 3119 errorlog.warning("%s",str(e)) 3120 3121 # Add productions to the grammar 3122 for funcname, gram in pinfo.grammar: 3123 file, line, prodname, syms = gram 3124 try: 3125 grammar.add_production(prodname,syms,funcname,file,line) 3126 except GrammarError: 3127 e = sys.exc_info()[1] 3128 errorlog.error("%s",str(e)) 3129 errors = 1 3130 3131 # Set the grammar start symbols 3132 try: 3133 if start is None: 3134 grammar.set_start(pinfo.start) 3135 else: 3136 grammar.set_start(start) 3137 except GrammarError: 3138 e = sys.exc_info()[1] 3139 errorlog.error(str(e)) 3140 errors = 1 3141 3142 if errors: 3143 raise YaccError("Unable to build parser") 3144 3145 # Verify the grammar structure 3146 undefined_symbols = grammar.undefined_symbols() 3147 for sym, prod in undefined_symbols: 3148 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) 3149 errors = 1 3150 3151 unused_terminals = grammar.unused_terminals() 3152 if unused_terminals: 3153 debuglog.info("") 3154 debuglog.info("Unused terminals:") 3155 debuglog.info("") 3156 for term in unused_terminals: 3157 errorlog.warning("Token '%s' defined, but not used", term) 3158 debuglog.info(" %s", term) 3159 3160 # Print out all productions to the debug log 3161 if debug: 3162 debuglog.info("") 3163 debuglog.info("Grammar") 3164 debuglog.info("") 3165 for n,p in enumerate(grammar.Productions): 3166 debuglog.info("Rule %-5d %s", n, p) 3167 3168 # Find unused non-terminals 3169 unused_rules = grammar.unused_rules() 3170 for prod in unused_rules: 3171 errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) 3172 3173 if len(unused_terminals) == 1: 3174 errorlog.warning("There is 1 unused token") 3175 if len(unused_terminals) > 1: 3176 errorlog.warning("There are %d unused tokens", len(unused_terminals)) 3177 3178 if len(unused_rules) == 1: 3179 errorlog.warning("There is 1 unused rule") 3180 if len(unused_rules) > 1: 3181 errorlog.warning("There are %d unused rules", len(unused_rules)) 3182 3183 if debug: 3184 debuglog.info("") 3185 debuglog.info("Terminals, with rules where they appear") 3186 debuglog.info("") 3187 terms = list(grammar.Terminals) 3188 terms.sort() 3189 for term in terms: 3190 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) 3191 3192 debuglog.info("") 3193 debuglog.info("Nonterminals, with rules where they appear") 3194 debuglog.info("") 3195 nonterms = list(grammar.Nonterminals) 3196 nonterms.sort() 3197 for nonterm in nonterms: 3198 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) 3199 debuglog.info("") 3200 3201 if check_recursion: 3202 unreachable = grammar.find_unreachable() 3203 for u in unreachable: 3204 errorlog.warning("Symbol '%s' is unreachable",u) 3205 3206 infinite = grammar.infinite_cycles() 3207 for inf in infinite: 3208 errorlog.error("Infinite recursion detected for symbol '%s'", inf) 3209 errors = 1 3210 3211 unused_prec = grammar.unused_precedence() 3212 for term, assoc in unused_prec: 3213 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) 3214 errors = 1 3215 3216 if errors: 3217 raise YaccError("Unable to build parser") 3218 3219 # Run the LRGeneratedTable on the grammar 3220 if debug: 3221 errorlog.debug("Generating %s tables", method) 3222 3223 lr = LRGeneratedTable(grammar,method,debuglog) 3224 3225 if debug: 3226 num_sr = len(lr.sr_conflicts) 3227 3228 # Report shift/reduce and reduce/reduce conflicts 3229 if num_sr == 1: 3230 errorlog.warning("1 shift/reduce conflict") 3231 elif num_sr > 1: 3232 errorlog.warning("%d shift/reduce conflicts", num_sr) 3233 3234 num_rr = len(lr.rr_conflicts) 3235 if num_rr == 1: 3236 errorlog.warning("1 reduce/reduce conflict") 3237 elif num_rr > 1: 3238 errorlog.warning("%d reduce/reduce conflicts", num_rr) 3239 3240 # Write out conflicts to the output file 3241 if debug and (lr.sr_conflicts or lr.rr_conflicts): 3242 debuglog.warning("") 3243 debuglog.warning("Conflicts:") 3244 debuglog.warning("") 3245 3246 for state, tok, resolution in lr.sr_conflicts: 3247 debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) 3248 3249 already_reported = {} 3250 for state, rule, rejected in lr.rr_conflicts: 3251 if (state,id(rule),id(rejected)) in already_reported: 3252 continue 3253 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 3254 debuglog.warning("rejected rule (%s) in state %d", rejected,state) 3255 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) 3256 errorlog.warning("rejected rule (%s) in state %d", rejected, state) 3257 already_reported[state,id(rule),id(rejected)] = 1 3258 3259 warned_never = [] 3260 for state, rule, rejected in lr.rr_conflicts: 3261 if not rejected.reduced and (rejected not in warned_never): 3262 debuglog.warning("Rule (%s) is never reduced", rejected) 3263 errorlog.warning("Rule (%s) is never reduced", rejected) 3264 warned_never.append(rejected) 3265 3266 # Write the table file if requested 3267 if write_tables: 3268 lr.write_table(tabmodule,outputdir,signature) 3269 3270 # Write a pickled version of the tables 3271 if picklefile: 3272 lr.pickle_table(picklefile,signature) 3273 3274 # Build the parser 3275 lr.bind_callables(pinfo.pdict) 3276 parser = LRParser(lr,pinfo.error_func) 3277 3278 parse = parser.parse 3279 return parser 3280