xref: /aosp_15_r20/external/libchrome/third_party/ply/yacc.py (revision 635a864187cb8b6c713ff48b7e790a6b21769273)
1# -----------------------------------------------------------------------------
2# ply: yacc.py
3#
4# Copyright (C) 2001-2011,
5# David M. Beazley (Dabeaz LLC)
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are
10# met:
11#
12# * Redistributions of source code must retain the above copyright notice,
13#   this list of conditions and the following disclaimer.
14# * Redistributions in binary form must reproduce the above copyright notice,
15#   this list of conditions and the following disclaimer in the documentation
16#   and/or other materials provided with the distribution.
17# * Neither the name of the David Beazley or Dabeaz LLC may be used to
18#   endorse or promote products derived from this software without
19#  specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32# -----------------------------------------------------------------------------
33#
34# This implements an LR parser that is constructed from grammar rules defined
35# as Python functions. The grammer is specified by supplying the BNF inside
36# Python documentation strings.  The inspiration for this technique was borrowed
37# from John Aycock's Spark parsing system.  PLY might be viewed as cross between
38# Spark and the GNU bison utility.
39#
40# The current implementation is only somewhat object-oriented. The
41# LR parser itself is defined in terms of an object (which allows multiple
42# parsers to co-exist).  However, most of the variables used during table
43# construction are defined in terms of global variables.  Users shouldn't
44# notice unless they are trying to define multiple parsers at the same
45# time using threads (in which case they should have their head examined).
46#
47# This implementation supports both SLR and LALR(1) parsing.  LALR(1)
48# support was originally implemented by Elias Ioup ([email protected]),
49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50# Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
51# by the more efficient DeRemer and Pennello algorithm.
52#
53# :::::::: WARNING :::::::
54#
55# Construction of LR parsing tables is fairly complicated and expensive.
56# To make this module run fast, a *LOT* of work has been put into
57# optimization---often at the expensive of readability and what might
58# consider to be good Python "coding style."   Modify the code at your
59# own risk!
60# ----------------------------------------------------------------------------
61
62__version__    = "3.4"
63__tabversion__ = "3.2"       # Table version
64
65#-----------------------------------------------------------------------------
66#                     === User configurable parameters ===
67#
68# Change these to modify the default behavior of yacc (if you wish)
69#-----------------------------------------------------------------------------
70
71yaccdebug   = 1                # Debugging mode.  If set, yacc generates a
72                               # a 'parser.out' file in the current directory
73
74debug_file  = 'parser.out'     # Default name of the debugging file
75tab_module  = 'parsetab'       # Default name of the table module
76default_lr  = 'LALR'           # Default LR table generation method
77
78error_count = 3                # Number of symbols that must be shifted to leave recovery mode
79
80yaccdevel   = 0                # Set to True if developing yacc.  This turns off optimized
81                               # implementations of certain functions.
82
83resultlimit = 40               # Size limit of results when running in debug mode.
84
85pickle_protocol = 0            # Protocol to use when writing pickle files
86
87import re, types, sys, os.path
88
89# Compatibility function for python 2.6/3.0
90if sys.version_info[0] < 3:
91    def func_code(f):
92        return f.func_code
93else:
94    def func_code(f):
95        return f.__code__
96
97# Compatibility
98try:
99    MAXINT = sys.maxint
100except AttributeError:
101    MAXINT = sys.maxsize
102
103# Python 2.x/3.0 compatibility.
104def load_ply_lex():
105    if sys.version_info[0] < 3:
106        import lex
107    else:
108        import ply.lex as lex
109    return lex
110
111# This object is a stand-in for a logging object created by the
112# logging module.   PLY will use this by default to create things
113# such as the parser.out file.  If a user wants more detailed
114# information, they can create their own logging object and pass
115# it into PLY.
116
117class PlyLogger(object):
118    def __init__(self,f):
119        self.f = f
120    def debug(self,msg,*args,**kwargs):
121        self.f.write((msg % args) + "\n")
122    info     = debug
123
124    def warning(self,msg,*args,**kwargs):
125        self.f.write("WARNING: "+ (msg % args) + "\n")
126
127    def error(self,msg,*args,**kwargs):
128        self.f.write("ERROR: " + (msg % args) + "\n")
129
130    critical = debug
131
132# Null logger is used when no output is generated. Does nothing.
133class NullLogger(object):
134    def __getattribute__(self,name):
135        return self
136    def __call__(self,*args,**kwargs):
137        return self
138
139# Exception raised for yacc-related errors
140class YaccError(Exception):   pass
141
142# Format the result message that the parser produces when running in debug mode.
143def format_result(r):
144    repr_str = repr(r)
145    if '\n' in repr_str: repr_str = repr(repr_str)
146    if len(repr_str) > resultlimit:
147        repr_str = repr_str[:resultlimit]+" ..."
148    result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
149    return result
150
151
152# Format stack entries when the parser is running in debug mode
153def format_stack_entry(r):
154    repr_str = repr(r)
155    if '\n' in repr_str: repr_str = repr(repr_str)
156    if len(repr_str) < 16:
157        return repr_str
158    else:
159        return "<%s @ 0x%x>" % (type(r).__name__,id(r))
160
161#-----------------------------------------------------------------------------
162#                        ===  LR Parsing Engine ===
163#
164# The following classes are used for the LR parser itself.  These are not
165# used during table construction and are independent of the actual LR
166# table generation algorithm
167#-----------------------------------------------------------------------------
168
169# This class is used to hold non-terminal grammar symbols during parsing.
170# It normally has the following attributes set:
171#        .type       = Grammar symbol type
172#        .value      = Symbol value
173#        .lineno     = Starting line number
174#        .endlineno  = Ending line number (optional, set automatically)
175#        .lexpos     = Starting lex position
176#        .endlexpos  = Ending lex position (optional, set automatically)
177
178class YaccSymbol:
179    def __str__(self):    return self.type
180    def __repr__(self):   return str(self)
181
182# This class is a wrapper around the objects actually passed to each
183# grammar rule.   Index lookup and assignment actually assign the
184# .value attribute of the underlying YaccSymbol object.
185# The lineno() method returns the line number of a given
186# item (or 0 if not defined).   The linespan() method returns
187# a tuple of (startline,endline) representing the range of lines
188# for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
189# representing the range of positional information for a symbol.
190
191class YaccProduction:
192    def __init__(self,s,stack=None):
193        self.slice = s
194        self.stack = stack
195        self.lexer = None
196        self.parser= None
197    def __getitem__(self,n):
198        if type(n) is slice:
199            return [s.value for s in self.slice[n]]
200        else:
201            if n >= 0: return self.slice[n].value
202            else: return self.stack[n].value
203
204    def __setitem__(self,n,v):
205        self.slice[n].value = v
206
207    def __getslice__(self,i,j):
208        return [s.value for s in self.slice[i:j]]
209
210    def __len__(self):
211        return len(self.slice)
212
213    def lineno(self,n):
214        return getattr(self.slice[n],"lineno",0)
215
216    def set_lineno(self,n,lineno):
217        self.slice[n].lineno = lineno
218
219    def linespan(self,n):
220        startline = getattr(self.slice[n],"lineno",0)
221        endline = getattr(self.slice[n],"endlineno",startline)
222        return startline,endline
223
224    def lexpos(self,n):
225        return getattr(self.slice[n],"lexpos",0)
226
227    def lexspan(self,n):
228        startpos = getattr(self.slice[n],"lexpos",0)
229        endpos = getattr(self.slice[n],"endlexpos",startpos)
230        return startpos,endpos
231
232    def error(self):
233       raise SyntaxError
234
235
236# -----------------------------------------------------------------------------
237#                               == LRParser ==
238#
239# The LR Parsing engine.
240# -----------------------------------------------------------------------------
241
242class LRParser:
243    def __init__(self,lrtab,errorf):
244        self.productions = lrtab.lr_productions
245        self.action      = lrtab.lr_action
246        self.goto        = lrtab.lr_goto
247        self.errorfunc   = errorf
248
249    def errok(self):
250        self.errorok     = 1
251
252    def restart(self):
253        del self.statestack[:]
254        del self.symstack[:]
255        sym = YaccSymbol()
256        sym.type = '$end'
257        self.symstack.append(sym)
258        self.statestack.append(0)
259
260    def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
261        if debug or yaccdevel:
262            if isinstance(debug,int):
263                debug = PlyLogger(sys.stderr)
264            return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
265        elif tracking:
266            return self.parseopt(input,lexer,debug,tracking,tokenfunc)
267        else:
268            return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
269
270
271    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
272    # parsedebug().
273    #
274    # This is the debugging enabled version of parse().  All changes made to the
275    # parsing engine should be made here.   For the non-debugging version,
276    # copy this code to a method parseopt() and delete all of the sections
277    # enclosed in:
278    #
279    #      #--! DEBUG
280    #      statements
281    #      #--! DEBUG
282    #
283    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
284
285    def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
286        lookahead = None                 # Current lookahead symbol
287        lookaheadstack = [ ]             # Stack of lookahead symbols
288        actions = self.action            # Local reference to action table (to avoid lookup on self.)
289        goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
290        prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
291        pslice  = YaccProduction(None)   # Production object passed to grammar rules
292        errorcount = 0                   # Used during error recovery
293
294        # --! DEBUG
295        debug.info("PLY: PARSE DEBUG START")
296        # --! DEBUG
297
298        # If no lexer was given, we will try to use the lex module
299        if not lexer:
300            lex = load_ply_lex()
301            lexer = lex.lexer
302
303        # Set up the lexer and parser objects on pslice
304        pslice.lexer = lexer
305        pslice.parser = self
306
307        # If input was supplied, pass to lexer
308        if input is not None:
309            lexer.input(input)
310
311        if tokenfunc is None:
312           # Tokenize function
313           get_token = lexer.token
314        else:
315           get_token = tokenfunc
316
317        # Set up the state and symbol stacks
318
319        statestack = [ ]                # Stack of parsing states
320        self.statestack = statestack
321        symstack   = [ ]                # Stack of grammar symbols
322        self.symstack = symstack
323
324        pslice.stack = symstack         # Put in the production
325        errtoken   = None               # Err token
326
327        # The start state is assumed to be (0,$end)
328
329        statestack.append(0)
330        sym = YaccSymbol()
331        sym.type = "$end"
332        symstack.append(sym)
333        state = 0
334        while 1:
335            # Get the next symbol on the input.  If a lookahead symbol
336            # is already set, we just use that. Otherwise, we'll pull
337            # the next token off of the lookaheadstack or from the lexer
338
339            # --! DEBUG
340            debug.debug('')
341            debug.debug('State  : %s', state)
342            # --! DEBUG
343
344            if not lookahead:
345                if not lookaheadstack:
346                    lookahead = get_token()     # Get the next token
347                else:
348                    lookahead = lookaheadstack.pop()
349                if not lookahead:
350                    lookahead = YaccSymbol()
351                    lookahead.type = "$end"
352
353            # --! DEBUG
354            debug.debug('Stack  : %s',
355                        ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
356            # --! DEBUG
357
358            # Check the action table
359            ltype = lookahead.type
360            t = actions[state].get(ltype)
361
362            if t is not None:
363                if t > 0:
364                    # shift a symbol on the stack
365                    statestack.append(t)
366                    state = t
367
368                    # --! DEBUG
369                    debug.debug("Action : Shift and goto state %s", t)
370                    # --! DEBUG
371
372                    symstack.append(lookahead)
373                    lookahead = None
374
375                    # Decrease error count on successful shift
376                    if errorcount: errorcount -=1
377                    continue
378
379                if t < 0:
380                    # reduce a symbol on the stack, emit a production
381                    p = prod[-t]
382                    pname = p.name
383                    plen  = p.len
384
385                    # Get production function
386                    sym = YaccSymbol()
387                    sym.type = pname       # Production name
388                    sym.value = None
389
390                    # --! DEBUG
391                    if plen:
392                        debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
393                    else:
394                        debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
395
396                    # --! DEBUG
397
398                    if plen:
399                        targ = symstack[-plen-1:]
400                        targ[0] = sym
401
402                        # --! TRACKING
403                        if tracking:
404                           t1 = targ[1]
405                           sym.lineno = t1.lineno
406                           sym.lexpos = t1.lexpos
407                           t1 = targ[-1]
408                           sym.endlineno = getattr(t1,"endlineno",t1.lineno)
409                           sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
410
411                        # --! TRACKING
412
413                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
414                        # The code enclosed in this section is duplicated
415                        # below as a performance optimization.  Make sure
416                        # changes get made in both locations.
417
418                        pslice.slice = targ
419
420                        try:
421                            # Call the grammar rule with our special slice object
422                            del symstack[-plen:]
423                            del statestack[-plen:]
424                            p.callable(pslice)
425                            # --! DEBUG
426                            debug.info("Result : %s", format_result(pslice[0]))
427                            # --! DEBUG
428                            symstack.append(sym)
429                            state = goto[statestack[-1]][pname]
430                            statestack.append(state)
431                        except SyntaxError:
432                            # If an error was set. Enter error recovery state
433                            lookaheadstack.append(lookahead)
434                            symstack.pop()
435                            statestack.pop()
436                            state = statestack[-1]
437                            sym.type = 'error'
438                            lookahead = sym
439                            errorcount = error_count
440                            self.errorok = 0
441                        continue
442                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
443
444                    else:
445
446                        # --! TRACKING
447                        if tracking:
448                           sym.lineno = lexer.lineno
449                           sym.lexpos = lexer.lexpos
450                        # --! TRACKING
451
452                        targ = [ sym ]
453
454                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
455                        # The code enclosed in this section is duplicated
456                        # above as a performance optimization.  Make sure
457                        # changes get made in both locations.
458
459                        pslice.slice = targ
460
461                        try:
462                            # Call the grammar rule with our special slice object
463                            p.callable(pslice)
464                            # --! DEBUG
465                            debug.info("Result : %s", format_result(pslice[0]))
466                            # --! DEBUG
467                            symstack.append(sym)
468                            state = goto[statestack[-1]][pname]
469                            statestack.append(state)
470                        except SyntaxError:
471                            # If an error was set. Enter error recovery state
472                            lookaheadstack.append(lookahead)
473                            symstack.pop()
474                            statestack.pop()
475                            state = statestack[-1]
476                            sym.type = 'error'
477                            lookahead = sym
478                            errorcount = error_count
479                            self.errorok = 0
480                        continue
481                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
482
483                if t == 0:
484                    n = symstack[-1]
485                    result = getattr(n,"value",None)
486                    # --! DEBUG
487                    debug.info("Done   : Returning %s", format_result(result))
488                    debug.info("PLY: PARSE DEBUG END")
489                    # --! DEBUG
490                    return result
491
492            if t == None:
493
494                # --! DEBUG
495                debug.error('Error  : %s',
496                            ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
497                # --! DEBUG
498
499                # We have some kind of parsing error here.  To handle
500                # this, we are going to push the current token onto
501                # the tokenstack and replace it with an 'error' token.
502                # If there are any synchronization rules, they may
503                # catch it.
504                #
505                # In addition to pushing the error token, we call call
506                # the user defined p_error() function if this is the
507                # first syntax error.  This function is only called if
508                # errorcount == 0.
509                if errorcount == 0 or self.errorok:
510                    errorcount = error_count
511                    self.errorok = 0
512                    errtoken = lookahead
513                    if errtoken.type == "$end":
514                        errtoken = None               # End of file!
515                    if self.errorfunc:
516                        global errok,token,restart
517                        errok = self.errok        # Set some special functions available in error recovery
518                        token = get_token
519                        restart = self.restart
520                        if errtoken and not hasattr(errtoken,'lexer'):
521                            errtoken.lexer = lexer
522                        tok = self.errorfunc(errtoken)
523                        del errok, token, restart   # Delete special functions
524
525                        if self.errorok:
526                            # User must have done some kind of panic
527                            # mode recovery on their own.  The
528                            # returned token is the next lookahead
529                            lookahead = tok
530                            errtoken = None
531                            continue
532                    else:
533                        if errtoken:
534                            if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
535                            else: lineno = 0
536                            if lineno:
537                                sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
538                            else:
539                                sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
540                        else:
541                            sys.stderr.write("yacc: Parse error in input. EOF\n")
542                            return
543
544                else:
545                    errorcount = error_count
546
547                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
548                # entire parse has been rolled back and we're completely hosed.   The token is
549                # discarded and we just keep going.
550
551                if len(statestack) <= 1 and lookahead.type != "$end":
552                    lookahead = None
553                    errtoken = None
554                    state = 0
555                    # Nuke the pushback stack
556                    del lookaheadstack[:]
557                    continue
558
559                # case 2: the statestack has a couple of entries on it, but we're
560                # at the end of the file. nuke the top entry and generate an error token
561
562                # Start nuking entries on the stack
563                if lookahead.type == "$end":
564                    # Whoa. We're really hosed here. Bail out
565                    return
566
567                if lookahead.type != 'error':
568                    sym = symstack[-1]
569                    if sym.type == 'error':
570                        # Hmmm. Error is on top of stack, we'll just nuke input
571                        # symbol and continue
572                        lookahead = None
573                        continue
574                    t = YaccSymbol()
575                    t.type = 'error'
576                    if hasattr(lookahead,"lineno"):
577                        t.lineno = lookahead.lineno
578                    t.value = lookahead
579                    lookaheadstack.append(lookahead)
580                    lookahead = t
581                else:
582                    symstack.pop()
583                    statestack.pop()
584                    state = statestack[-1]       # Potential bug fix
585
586                continue
587
588            # Call an error function here
589            raise RuntimeError("yacc: internal parser error!!!\n")
590
591    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
592    # parseopt().
593    #
594    # Optimized version of parse() method.  DO NOT EDIT THIS CODE DIRECTLY.
595    # Edit the debug version above, then copy any modifications to the method
596    # below while removing #--! DEBUG sections.
597    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
598
599
600    def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
601        lookahead = None                 # Current lookahead symbol
602        lookaheadstack = [ ]             # Stack of lookahead symbols
603        actions = self.action            # Local reference to action table (to avoid lookup on self.)
604        goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
605        prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
606        pslice  = YaccProduction(None)   # Production object passed to grammar rules
607        errorcount = 0                   # Used during error recovery
608
609        # If no lexer was given, we will try to use the lex module
610        if not lexer:
611            lex = load_ply_lex()
612            lexer = lex.lexer
613
614        # Set up the lexer and parser objects on pslice
615        pslice.lexer = lexer
616        pslice.parser = self
617
618        # If input was supplied, pass to lexer
619        if input is not None:
620            lexer.input(input)
621
622        if tokenfunc is None:
623           # Tokenize function
624           get_token = lexer.token
625        else:
626           get_token = tokenfunc
627
628        # Set up the state and symbol stacks
629
630        statestack = [ ]                # Stack of parsing states
631        self.statestack = statestack
632        symstack   = [ ]                # Stack of grammar symbols
633        self.symstack = symstack
634
635        pslice.stack = symstack         # Put in the production
636        errtoken   = None               # Err token
637
638        # The start state is assumed to be (0,$end)
639
640        statestack.append(0)
641        sym = YaccSymbol()
642        sym.type = '$end'
643        symstack.append(sym)
644        state = 0
645        while 1:
646            # Get the next symbol on the input.  If a lookahead symbol
647            # is already set, we just use that. Otherwise, we'll pull
648            # the next token off of the lookaheadstack or from the lexer
649
650            if not lookahead:
651                if not lookaheadstack:
652                    lookahead = get_token()     # Get the next token
653                else:
654                    lookahead = lookaheadstack.pop()
655                if not lookahead:
656                    lookahead = YaccSymbol()
657                    lookahead.type = '$end'
658
659            # Check the action table
660            ltype = lookahead.type
661            t = actions[state].get(ltype)
662
663            if t is not None:
664                if t > 0:
665                    # shift a symbol on the stack
666                    statestack.append(t)
667                    state = t
668
669                    symstack.append(lookahead)
670                    lookahead = None
671
672                    # Decrease error count on successful shift
673                    if errorcount: errorcount -=1
674                    continue
675
676                if t < 0:
677                    # reduce a symbol on the stack, emit a production
678                    p = prod[-t]
679                    pname = p.name
680                    plen  = p.len
681
682                    # Get production function
683                    sym = YaccSymbol()
684                    sym.type = pname       # Production name
685                    sym.value = None
686
687                    if plen:
688                        targ = symstack[-plen-1:]
689                        targ[0] = sym
690
691                        # --! TRACKING
692                        if tracking:
693                           t1 = targ[1]
694                           sym.lineno = t1.lineno
695                           sym.lexpos = t1.lexpos
696                           t1 = targ[-1]
697                           sym.endlineno = getattr(t1,"endlineno",t1.lineno)
698                           sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
699
700                        # --! TRACKING
701
702                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
703                        # The code enclosed in this section is duplicated
704                        # below as a performance optimization.  Make sure
705                        # changes get made in both locations.
706
707                        pslice.slice = targ
708
709                        try:
710                            # Call the grammar rule with our special slice object
711                            del symstack[-plen:]
712                            del statestack[-plen:]
713                            p.callable(pslice)
714                            symstack.append(sym)
715                            state = goto[statestack[-1]][pname]
716                            statestack.append(state)
717                        except SyntaxError:
718                            # If an error was set. Enter error recovery state
719                            lookaheadstack.append(lookahead)
720                            symstack.pop()
721                            statestack.pop()
722                            state = statestack[-1]
723                            sym.type = 'error'
724                            lookahead = sym
725                            errorcount = error_count
726                            self.errorok = 0
727                        continue
728                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
729
730                    else:
731
732                        # --! TRACKING
733                        if tracking:
734                           sym.lineno = lexer.lineno
735                           sym.lexpos = lexer.lexpos
736                        # --! TRACKING
737
738                        targ = [ sym ]
739
740                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
741                        # The code enclosed in this section is duplicated
742                        # above as a performance optimization.  Make sure
743                        # changes get made in both locations.
744
745                        pslice.slice = targ
746
747                        try:
748                            # Call the grammar rule with our special slice object
749                            p.callable(pslice)
750                            symstack.append(sym)
751                            state = goto[statestack[-1]][pname]
752                            statestack.append(state)
753                        except SyntaxError:
754                            # If an error was set. Enter error recovery state
755                            lookaheadstack.append(lookahead)
756                            symstack.pop()
757                            statestack.pop()
758                            state = statestack[-1]
759                            sym.type = 'error'
760                            lookahead = sym
761                            errorcount = error_count
762                            self.errorok = 0
763                        continue
764                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
765
766                if t == 0:
767                    n = symstack[-1]
768                    return getattr(n,"value",None)
769
770            if t == None:
771
772                # We have some kind of parsing error here.  To handle
773                # this, we are going to push the current token onto
774                # the tokenstack and replace it with an 'error' token.
775                # If there are any synchronization rules, they may
776                # catch it.
777                #
778                # In addition to pushing the error token, we call call
779                # the user defined p_error() function if this is the
780                # first syntax error.  This function is only called if
781                # errorcount == 0.
782                if errorcount == 0 or self.errorok:
783                    errorcount = error_count
784                    self.errorok = 0
785                    errtoken = lookahead
786                    if errtoken.type == '$end':
787                        errtoken = None               # End of file!
788                    if self.errorfunc:
789                        global errok,token,restart
790                        errok = self.errok        # Set some special functions available in error recovery
791                        token = get_token
792                        restart = self.restart
793                        if errtoken and not hasattr(errtoken,'lexer'):
794                            errtoken.lexer = lexer
795                        tok = self.errorfunc(errtoken)
796                        del errok, token, restart   # Delete special functions
797
798                        if self.errorok:
799                            # User must have done some kind of panic
800                            # mode recovery on their own.  The
801                            # returned token is the next lookahead
802                            lookahead = tok
803                            errtoken = None
804                            continue
805                    else:
806                        if errtoken:
807                            if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
808                            else: lineno = 0
809                            if lineno:
810                                sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
811                            else:
812                                sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
813                        else:
814                            sys.stderr.write("yacc: Parse error in input. EOF\n")
815                            return
816
817                else:
818                    errorcount = error_count
819
820                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
821                # entire parse has been rolled back and we're completely hosed.   The token is
822                # discarded and we just keep going.
823
824                if len(statestack) <= 1 and lookahead.type != '$end':
825                    lookahead = None
826                    errtoken = None
827                    state = 0
828                    # Nuke the pushback stack
829                    del lookaheadstack[:]
830                    continue
831
832                # case 2: the statestack has a couple of entries on it, but we're
833                # at the end of the file. nuke the top entry and generate an error token
834
835                # Start nuking entries on the stack
836                if lookahead.type == '$end':
837                    # Whoa. We're really hosed here. Bail out
838                    return
839
840                if lookahead.type != 'error':
841                    sym = symstack[-1]
842                    if sym.type == 'error':
843                        # Hmmm. Error is on top of stack, we'll just nuke input
844                        # symbol and continue
845                        lookahead = None
846                        continue
847                    t = YaccSymbol()
848                    t.type = 'error'
849                    if hasattr(lookahead,"lineno"):
850                        t.lineno = lookahead.lineno
851                    t.value = lookahead
852                    lookaheadstack.append(lookahead)
853                    lookahead = t
854                else:
855                    symstack.pop()
856                    statestack.pop()
857                    state = statestack[-1]       # Potential bug fix
858
859                continue
860
861            # Call an error function here
862            raise RuntimeError("yacc: internal parser error!!!\n")
863
864    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
865    # parseopt_notrack().
866    #
867    # Optimized version of parseopt() with line number tracking removed.
868    # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
869    # code in the #--! TRACKING sections
870    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
871
872    def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
873        lookahead = None                 # Current lookahead symbol
874        lookaheadstack = [ ]             # Stack of lookahead symbols
875        actions = self.action            # Local reference to action table (to avoid lookup on self.)
876        goto    = self.goto              # Local reference to goto table (to avoid lookup on self.)
877        prod    = self.productions       # Local reference to production list (to avoid lookup on self.)
878        pslice  = YaccProduction(None)   # Production object passed to grammar rules
879        errorcount = 0                   # Used during error recovery
880
881        # If no lexer was given, we will try to use the lex module
882        if not lexer:
883            lex = load_ply_lex()
884            lexer = lex.lexer
885
886        # Set up the lexer and parser objects on pslice
887        pslice.lexer = lexer
888        pslice.parser = self
889
890        # If input was supplied, pass to lexer
891        if input is not None:
892            lexer.input(input)
893
894        if tokenfunc is None:
895           # Tokenize function
896           get_token = lexer.token
897        else:
898           get_token = tokenfunc
899
900        # Set up the state and symbol stacks
901
902        statestack = [ ]                # Stack of parsing states
903        self.statestack = statestack
904        symstack   = [ ]                # Stack of grammar symbols
905        self.symstack = symstack
906
907        pslice.stack = symstack         # Put in the production
908        errtoken   = None               # Err token
909
910        # The start state is assumed to be (0,$end)
911
912        statestack.append(0)
913        sym = YaccSymbol()
914        sym.type = '$end'
915        symstack.append(sym)
916        state = 0
917        while 1:
918            # Get the next symbol on the input.  If a lookahead symbol
919            # is already set, we just use that. Otherwise, we'll pull
920            # the next token off of the lookaheadstack or from the lexer
921
922            if not lookahead:
923                if not lookaheadstack:
924                    lookahead = get_token()     # Get the next token
925                else:
926                    lookahead = lookaheadstack.pop()
927                if not lookahead:
928                    lookahead = YaccSymbol()
929                    lookahead.type = '$end'
930
931            # Check the action table
932            ltype = lookahead.type
933            t = actions[state].get(ltype)
934
935            if t is not None:
936                if t > 0:
937                    # shift a symbol on the stack
938                    statestack.append(t)
939                    state = t
940
941                    symstack.append(lookahead)
942                    lookahead = None
943
944                    # Decrease error count on successful shift
945                    if errorcount: errorcount -=1
946                    continue
947
948                if t < 0:
949                    # reduce a symbol on the stack, emit a production
950                    p = prod[-t]
951                    pname = p.name
952                    plen  = p.len
953
954                    # Get production function
955                    sym = YaccSymbol()
956                    sym.type = pname       # Production name
957                    sym.value = None
958
959                    if plen:
960                        targ = symstack[-plen-1:]
961                        targ[0] = sym
962
963                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
964                        # The code enclosed in this section is duplicated
965                        # below as a performance optimization.  Make sure
966                        # changes get made in both locations.
967
968                        pslice.slice = targ
969
970                        try:
971                            # Call the grammar rule with our special slice object
972                            del symstack[-plen:]
973                            del statestack[-plen:]
974                            p.callable(pslice)
975                            symstack.append(sym)
976                            state = goto[statestack[-1]][pname]
977                            statestack.append(state)
978                        except SyntaxError:
979                            # If an error was set. Enter error recovery state
980                            lookaheadstack.append(lookahead)
981                            symstack.pop()
982                            statestack.pop()
983                            state = statestack[-1]
984                            sym.type = 'error'
985                            lookahead = sym
986                            errorcount = error_count
987                            self.errorok = 0
988                        continue
989                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
990
991                    else:
992
993                        targ = [ sym ]
994
995                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
996                        # The code enclosed in this section is duplicated
997                        # above as a performance optimization.  Make sure
998                        # changes get made in both locations.
999
1000                        pslice.slice = targ
1001
1002                        try:
1003                            # Call the grammar rule with our special slice object
1004                            p.callable(pslice)
1005                            symstack.append(sym)
1006                            state = goto[statestack[-1]][pname]
1007                            statestack.append(state)
1008                        except SyntaxError:
1009                            # If an error was set. Enter error recovery state
1010                            lookaheadstack.append(lookahead)
1011                            symstack.pop()
1012                            statestack.pop()
1013                            state = statestack[-1]
1014                            sym.type = 'error'
1015                            lookahead = sym
1016                            errorcount = error_count
1017                            self.errorok = 0
1018                        continue
1019                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1020
1021                if t == 0:
1022                    n = symstack[-1]
1023                    return getattr(n,"value",None)
1024
1025            if t == None:
1026
1027                # We have some kind of parsing error here.  To handle
1028                # this, we are going to push the current token onto
1029                # the tokenstack and replace it with an 'error' token.
1030                # If there are any synchronization rules, they may
1031                # catch it.
1032                #
1033                # In addition to pushing the error token, we call call
1034                # the user defined p_error() function if this is the
1035                # first syntax error.  This function is only called if
1036                # errorcount == 0.
1037                if errorcount == 0 or self.errorok:
1038                    errorcount = error_count
1039                    self.errorok = 0
1040                    errtoken = lookahead
1041                    if errtoken.type == '$end':
1042                        errtoken = None               # End of file!
1043                    if self.errorfunc:
1044                        global errok,token,restart
1045                        errok = self.errok        # Set some special functions available in error recovery
1046                        token = get_token
1047                        restart = self.restart
1048                        if errtoken and not hasattr(errtoken,'lexer'):
1049                            errtoken.lexer = lexer
1050                        tok = self.errorfunc(errtoken)
1051                        del errok, token, restart   # Delete special functions
1052
1053                        if self.errorok:
1054                            # User must have done some kind of panic
1055                            # mode recovery on their own.  The
1056                            # returned token is the next lookahead
1057                            lookahead = tok
1058                            errtoken = None
1059                            continue
1060                    else:
1061                        if errtoken:
1062                            if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
1063                            else: lineno = 0
1064                            if lineno:
1065                                sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
1066                            else:
1067                                sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
1068                        else:
1069                            sys.stderr.write("yacc: Parse error in input. EOF\n")
1070                            return
1071
1072                else:
1073                    errorcount = error_count
1074
1075                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
1076                # entire parse has been rolled back and we're completely hosed.   The token is
1077                # discarded and we just keep going.
1078
1079                if len(statestack) <= 1 and lookahead.type != '$end':
1080                    lookahead = None
1081                    errtoken = None
1082                    state = 0
1083                    # Nuke the pushback stack
1084                    del lookaheadstack[:]
1085                    continue
1086
1087                # case 2: the statestack has a couple of entries on it, but we're
1088                # at the end of the file. nuke the top entry and generate an error token
1089
1090                # Start nuking entries on the stack
1091                if lookahead.type == '$end':
1092                    # Whoa. We're really hosed here. Bail out
1093                    return
1094
1095                if lookahead.type != 'error':
1096                    sym = symstack[-1]
1097                    if sym.type == 'error':
1098                        # Hmmm. Error is on top of stack, we'll just nuke input
1099                        # symbol and continue
1100                        lookahead = None
1101                        continue
1102                    t = YaccSymbol()
1103                    t.type = 'error'
1104                    if hasattr(lookahead,"lineno"):
1105                        t.lineno = lookahead.lineno
1106                    t.value = lookahead
1107                    lookaheadstack.append(lookahead)
1108                    lookahead = t
1109                else:
1110                    symstack.pop()
1111                    statestack.pop()
1112                    state = statestack[-1]       # Potential bug fix
1113
1114                continue
1115
1116            # Call an error function here
1117            raise RuntimeError("yacc: internal parser error!!!\n")
1118
1119# -----------------------------------------------------------------------------
1120#                          === Grammar Representation ===
1121#
1122# The following functions, classes, and variables are used to represent and
1123# manipulate the rules that make up a grammar.
1124# -----------------------------------------------------------------------------
1125
1126import re
1127
1128# regex matching identifiers
1129_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1130
1131# -----------------------------------------------------------------------------
1132# class Production:
1133#
1134# This class stores the raw information about a single production or grammar rule.
1135# A grammar rule refers to a specification such as this:
1136#
1137#       expr : expr PLUS term
1138#
1139# Here are the basic attributes defined on all productions
1140#
1141#       name     - Name of the production.  For example 'expr'
1142#       prod     - A list of symbols on the right side ['expr','PLUS','term']
1143#       prec     - Production precedence level
1144#       number   - Production number.
1145#       func     - Function that executes on reduce
1146#       file     - File where production function is defined
1147#       lineno   - Line number where production function is defined
1148#
1149# The following attributes are defined or optional.
1150#
1151#       len       - Length of the production (number of symbols on right hand side)
1152#       usyms     - Set of unique symbols found in the production
1153# -----------------------------------------------------------------------------
1154
1155class Production(object):
1156    reduced = 0
1157    def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
1158        self.name     = name
1159        self.prod     = tuple(prod)
1160        self.number   = number
1161        self.func     = func
1162        self.callable = None
1163        self.file     = file
1164        self.line     = line
1165        self.prec     = precedence
1166
1167        # Internal settings used during table construction
1168
1169        self.len  = len(self.prod)   # Length of the production
1170
1171        # Create a list of unique production symbols used in the production
1172        self.usyms = [ ]
1173        for s in self.prod:
1174            if s not in self.usyms:
1175                self.usyms.append(s)
1176
1177        # List of all LR items for the production
1178        self.lr_items = []
1179        self.lr_next = None
1180
1181        # Create a string representation
1182        if self.prod:
1183            self.str = "%s -> %s" % (self.name," ".join(self.prod))
1184        else:
1185            self.str = "%s -> <empty>" % self.name
1186
1187    def __str__(self):
1188        return self.str
1189
1190    def __repr__(self):
1191        return "Production("+str(self)+")"
1192
1193    def __len__(self):
1194        return len(self.prod)
1195
1196    def __nonzero__(self):
1197        return 1
1198
1199    def __getitem__(self,index):
1200        return self.prod[index]
1201
1202    # Return the nth lr_item from the production (or None if at the end)
1203    def lr_item(self,n):
1204        if n > len(self.prod): return None
1205        p = LRItem(self,n)
1206
1207        # Precompute the list of productions immediately following.  Hack. Remove later
1208        try:
1209            p.lr_after = Prodnames[p.prod[n+1]]
1210        except (IndexError,KeyError):
1211            p.lr_after = []
1212        try:
1213            p.lr_before = p.prod[n-1]
1214        except IndexError:
1215            p.lr_before = None
1216
1217        return p
1218
1219    # Bind the production function name to a callable
1220    def bind(self,pdict):
1221        if self.func:
1222            self.callable = pdict[self.func]
1223
1224# This class serves as a minimal standin for Production objects when
1225# reading table data from files.   It only contains information
1226# actually used by the LR parsing engine, plus some additional
1227# debugging information.
1228class MiniProduction(object):
1229    def __init__(self,str,name,len,func,file,line):
1230        self.name     = name
1231        self.len      = len
1232        self.func     = func
1233        self.callable = None
1234        self.file     = file
1235        self.line     = line
1236        self.str      = str
1237    def __str__(self):
1238        return self.str
1239    def __repr__(self):
1240        return "MiniProduction(%s)" % self.str
1241
1242    # Bind the production function name to a callable
1243    def bind(self,pdict):
1244        if self.func:
1245            self.callable = pdict[self.func]
1246
1247
1248# -----------------------------------------------------------------------------
1249# class LRItem
1250#
1251# This class represents a specific stage of parsing a production rule.  For
1252# example:
1253#
1254#       expr : expr . PLUS term
1255#
1256# In the above, the "." represents the current location of the parse.  Here
1257# basic attributes:
1258#
1259#       name       - Name of the production.  For example 'expr'
1260#       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
1261#       number     - Production number.
1262#
1263#       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
1264#                    then lr_next refers to 'expr -> expr PLUS . term'
1265#       lr_index   - LR item index (location of the ".") in the prod list.
1266#       lookaheads - LALR lookahead symbols for this item
1267#       len        - Length of the production (number of symbols on right hand side)
1268#       lr_after    - List of all productions that immediately follow
1269#       lr_before   - Grammar symbol immediately before
1270# -----------------------------------------------------------------------------
1271
1272class LRItem(object):
1273    def __init__(self,p,n):
1274        self.name       = p.name
1275        self.prod       = list(p.prod)
1276        self.number     = p.number
1277        self.lr_index   = n
1278        self.lookaheads = { }
1279        self.prod.insert(n,".")
1280        self.prod       = tuple(self.prod)
1281        self.len        = len(self.prod)
1282        self.usyms      = p.usyms
1283
1284    def __str__(self):
1285        if self.prod:
1286            s = "%s -> %s" % (self.name," ".join(self.prod))
1287        else:
1288            s = "%s -> <empty>" % self.name
1289        return s
1290
1291    def __repr__(self):
1292        return "LRItem("+str(self)+")"
1293
1294# -----------------------------------------------------------------------------
1295# rightmost_terminal()
1296#
1297# Return the rightmost terminal from a list of symbols.  Used in add_production()
1298# -----------------------------------------------------------------------------
1299def rightmost_terminal(symbols, terminals):
1300    i = len(symbols) - 1
1301    while i >= 0:
1302        if symbols[i] in terminals:
1303            return symbols[i]
1304        i -= 1
1305    return None
1306
1307# -----------------------------------------------------------------------------
1308#                           === GRAMMAR CLASS ===
1309#
1310# The following class represents the contents of the specified grammar along
1311# with various computed properties such as first sets, follow sets, LR items, etc.
1312# This data is used for critical parts of the table generation process later.
1313# -----------------------------------------------------------------------------
1314
1315class GrammarError(YaccError): pass
1316
1317class Grammar(object):
1318    def __init__(self,terminals):
1319        self.Productions  = [None]  # A list of all of the productions.  The first
1320                                    # entry is always reserved for the purpose of
1321                                    # building an augmented grammar
1322
1323        self.Prodnames    = { }     # A dictionary mapping the names of nonterminals to a list of all
1324                                    # productions of that nonterminal.
1325
1326        self.Prodmap      = { }     # A dictionary that is only used to detect duplicate
1327                                    # productions.
1328
1329        self.Terminals    = { }     # A dictionary mapping the names of terminal symbols to a
1330                                    # list of the rules where they are used.
1331
1332        for term in terminals:
1333            self.Terminals[term] = []
1334
1335        self.Terminals['error'] = []
1336
1337        self.Nonterminals = { }     # A dictionary mapping names of nonterminals to a list
1338                                    # of rule numbers where they are used.
1339
1340        self.First        = { }     # A dictionary of precomputed FIRST(x) symbols
1341
1342        self.Follow       = { }     # A dictionary of precomputed FOLLOW(x) symbols
1343
1344        self.Precedence   = { }     # Precedence rules for each terminal. Contains tuples of the
1345                                    # form ('right',level) or ('nonassoc', level) or ('left',level)
1346
1347        self.UsedPrecedence = { }   # Precedence rules that were actually used by the grammer.
1348                                    # This is only used to provide error checking and to generate
1349                                    # a warning about unused precedence rules.
1350
1351        self.Start = None           # Starting symbol for the grammar
1352
1353
1354    def __len__(self):
1355        return len(self.Productions)
1356
1357    def __getitem__(self,index):
1358        return self.Productions[index]
1359
1360    # -----------------------------------------------------------------------------
1361    # set_precedence()
1362    #
1363    # Sets the precedence for a given terminal. assoc is the associativity such as
1364    # 'left','right', or 'nonassoc'.  level is a numeric level.
1365    #
1366    # -----------------------------------------------------------------------------
1367
1368    def set_precedence(self,term,assoc,level):
1369        assert self.Productions == [None],"Must call set_precedence() before add_production()"
1370        if term in self.Precedence:
1371            raise GrammarError("Precedence already specified for terminal '%s'" % term)
1372        if assoc not in ['left','right','nonassoc']:
1373            raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1374        self.Precedence[term] = (assoc,level)
1375
1376    # -----------------------------------------------------------------------------
1377    # add_production()
1378    #
1379    # Given an action function, this function assembles a production rule and
1380    # computes its precedence level.
1381    #
1382    # The production rule is supplied as a list of symbols.   For example,
1383    # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1384    # symbols ['expr','PLUS','term'].
1385    #
1386    # Precedence is determined by the precedence of the right-most non-terminal
1387    # or the precedence of a terminal specified by %prec.
1388    #
1389    # A variety of error checks are performed to make sure production symbols
1390    # are valid and that %prec is used correctly.
1391    # -----------------------------------------------------------------------------
1392
1393    def add_production(self,prodname,syms,func=None,file='',line=0):
1394
1395        if prodname in self.Terminals:
1396            raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))
1397        if prodname == 'error':
1398            raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))
1399        if not _is_identifier.match(prodname):
1400            raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))
1401
1402        # Look for literal tokens
1403        for n,s in enumerate(syms):
1404            if s[0] in "'\"":
1405                 try:
1406                     c = eval(s)
1407                     if (len(c) > 1):
1408                          raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))
1409                     if not c in self.Terminals:
1410                          self.Terminals[c] = []
1411                     syms[n] = c
1412                     continue
1413                 except SyntaxError:
1414                     pass
1415            if not _is_identifier.match(s) and s != '%prec':
1416                raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))
1417
1418        # Determine the precedence level
1419        if '%prec' in syms:
1420            if syms[-1] == '%prec':
1421                raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
1422            if syms[-2] != '%prec':
1423                raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
1424            precname = syms[-1]
1425            prodprec = self.Precedence.get(precname,None)
1426            if not prodprec:
1427                raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
1428            else:
1429                self.UsedPrecedence[precname] = 1
1430            del syms[-2:]     # Drop %prec from the rule
1431        else:
1432            # If no %prec, precedence is determined by the rightmost terminal symbol
1433            precname = rightmost_terminal(syms,self.Terminals)
1434            prodprec = self.Precedence.get(precname,('right',0))
1435
1436        # See if the rule is already in the rulemap
1437        map = "%s -> %s" % (prodname,syms)
1438        if map in self.Prodmap:
1439            m = self.Prodmap[map]
1440            raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
1441                               "Previous definition at %s:%d" % (m.file, m.line))
1442
1443        # From this point on, everything is valid.  Create a new Production instance
1444        pnumber  = len(self.Productions)
1445        if not prodname in self.Nonterminals:
1446            self.Nonterminals[prodname] = [ ]
1447
1448        # Add the production number to Terminals and Nonterminals
1449        for t in syms:
1450            if t in self.Terminals:
1451                self.Terminals[t].append(pnumber)
1452            else:
1453                if not t in self.Nonterminals:
1454                    self.Nonterminals[t] = [ ]
1455                self.Nonterminals[t].append(pnumber)
1456
1457        # Create a production and add it to the list of productions
1458        p = Production(pnumber,prodname,syms,prodprec,func,file,line)
1459        self.Productions.append(p)
1460        self.Prodmap[map] = p
1461
1462        # Add to the global productions list
1463        try:
1464            self.Prodnames[prodname].append(p)
1465        except KeyError:
1466            self.Prodnames[prodname] = [ p ]
1467        return 0
1468
1469    # -----------------------------------------------------------------------------
1470    # set_start()
1471    #
1472    # Sets the starting symbol and creates the augmented grammar.  Production
1473    # rule 0 is S' -> start where start is the start symbol.
1474    # -----------------------------------------------------------------------------
1475
1476    def set_start(self,start=None):
1477        if not start:
1478            start = self.Productions[1].name
1479        if start not in self.Nonterminals:
1480            raise GrammarError("start symbol %s undefined" % start)
1481        self.Productions[0] = Production(0,"S'",[start])
1482        self.Nonterminals[start].append(0)
1483        self.Start = start
1484
1485    # -----------------------------------------------------------------------------
1486    # find_unreachable()
1487    #
1488    # Find all of the nonterminal symbols that can't be reached from the starting
1489    # symbol.  Returns a list of nonterminals that can't be reached.
1490    # -----------------------------------------------------------------------------
1491
1492    def find_unreachable(self):
1493
1494        # Mark all symbols that are reachable from a symbol s
1495        def mark_reachable_from(s):
1496            if reachable[s]:
1497                # We've already reached symbol s.
1498                return
1499            reachable[s] = 1
1500            for p in self.Prodnames.get(s,[]):
1501                for r in p.prod:
1502                    mark_reachable_from(r)
1503
1504        reachable   = { }
1505        for s in list(self.Terminals) + list(self.Nonterminals):
1506            reachable[s] = 0
1507
1508        mark_reachable_from( self.Productions[0].prod[0] )
1509
1510        return [s for s in list(self.Nonterminals)
1511                        if not reachable[s]]
1512
1513    # -----------------------------------------------------------------------------
1514    # infinite_cycles()
1515    #
1516    # This function looks at the various parsing rules and tries to detect
1517    # infinite recursion cycles (grammar rules where there is no possible way
1518    # to derive a string of only terminals).
1519    # -----------------------------------------------------------------------------
1520
1521    def infinite_cycles(self):
1522        terminates = {}
1523
1524        # Terminals:
1525        for t in self.Terminals:
1526            terminates[t] = 1
1527
1528        terminates['$end'] = 1
1529
1530        # Nonterminals:
1531
1532        # Initialize to false:
1533        for n in self.Nonterminals:
1534            terminates[n] = 0
1535
1536        # Then propagate termination until no change:
1537        while 1:
1538            some_change = 0
1539            for (n,pl) in self.Prodnames.items():
1540                # Nonterminal n terminates iff any of its productions terminates.
1541                for p in pl:
1542                    # Production p terminates iff all of its rhs symbols terminate.
1543                    for s in p.prod:
1544                        if not terminates[s]:
1545                            # The symbol s does not terminate,
1546                            # so production p does not terminate.
1547                            p_terminates = 0
1548                            break
1549                    else:
1550                        # didn't break from the loop,
1551                        # so every symbol s terminates
1552                        # so production p terminates.
1553                        p_terminates = 1
1554
1555                    if p_terminates:
1556                        # symbol n terminates!
1557                        if not terminates[n]:
1558                            terminates[n] = 1
1559                            some_change = 1
1560                        # Don't need to consider any more productions for this n.
1561                        break
1562
1563            if not some_change:
1564                break
1565
1566        infinite = []
1567        for (s,term) in terminates.items():
1568            if not term:
1569                if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1570                    # s is used-but-not-defined, and we've already warned of that,
1571                    # so it would be overkill to say that it's also non-terminating.
1572                    pass
1573                else:
1574                    infinite.append(s)
1575
1576        return infinite
1577
1578
1579    # -----------------------------------------------------------------------------
1580    # undefined_symbols()
1581    #
1582    # Find all symbols that were used the grammar, but not defined as tokens or
1583    # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
1584    # and prod is the production where the symbol was used.
1585    # -----------------------------------------------------------------------------
1586    def undefined_symbols(self):
1587        result = []
1588        for p in self.Productions:
1589            if not p: continue
1590
1591            for s in p.prod:
1592                if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1593                    result.append((s,p))
1594        return result
1595
1596    # -----------------------------------------------------------------------------
1597    # unused_terminals()
1598    #
1599    # Find all terminals that were defined, but not used by the grammar.  Returns
1600    # a list of all symbols.
1601    # -----------------------------------------------------------------------------
1602    def unused_terminals(self):
1603        unused_tok = []
1604        for s,v in self.Terminals.items():
1605            if s != 'error' and not v:
1606                unused_tok.append(s)
1607
1608        return unused_tok
1609
1610    # ------------------------------------------------------------------------------
1611    # unused_rules()
1612    #
1613    # Find all grammar rules that were defined,  but not used (maybe not reachable)
1614    # Returns a list of productions.
1615    # ------------------------------------------------------------------------------
1616
1617    def unused_rules(self):
1618        unused_prod = []
1619        for s,v in self.Nonterminals.items():
1620            if not v:
1621                p = self.Prodnames[s][0]
1622                unused_prod.append(p)
1623        return unused_prod
1624
1625    # -----------------------------------------------------------------------------
1626    # unused_precedence()
1627    #
1628    # Returns a list of tuples (term,precedence) corresponding to precedence
1629    # rules that were never used by the grammar.  term is the name of the terminal
1630    # on which precedence was applied and precedence is a string such as 'left' or
1631    # 'right' corresponding to the type of precedence.
1632    # -----------------------------------------------------------------------------
1633
1634    def unused_precedence(self):
1635        unused = []
1636        for termname in self.Precedence:
1637            if not (termname in self.Terminals or termname in self.UsedPrecedence):
1638                unused.append((termname,self.Precedence[termname][0]))
1639
1640        return unused
1641
1642    # -------------------------------------------------------------------------
1643    # _first()
1644    #
1645    # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1646    #
1647    # During execution of compute_first1, the result may be incomplete.
1648    # Afterward (e.g., when called from compute_follow()), it will be complete.
1649    # -------------------------------------------------------------------------
1650    def _first(self,beta):
1651
1652        # We are computing First(x1,x2,x3,...,xn)
1653        result = [ ]
1654        for x in beta:
1655            x_produces_empty = 0
1656
1657            # Add all the non-<empty> symbols of First[x] to the result.
1658            for f in self.First[x]:
1659                if f == '<empty>':
1660                    x_produces_empty = 1
1661                else:
1662                    if f not in result: result.append(f)
1663
1664            if x_produces_empty:
1665                # We have to consider the next x in beta,
1666                # i.e. stay in the loop.
1667                pass
1668            else:
1669                # We don't have to consider any further symbols in beta.
1670                break
1671        else:
1672            # There was no 'break' from the loop,
1673            # so x_produces_empty was true for all x in beta,
1674            # so beta produces empty as well.
1675            result.append('<empty>')
1676
1677        return result
1678
1679    # -------------------------------------------------------------------------
1680    # compute_first()
1681    #
1682    # Compute the value of FIRST1(X) for all symbols
1683    # -------------------------------------------------------------------------
1684    def compute_first(self):
1685        if self.First:
1686            return self.First
1687
1688        # Terminals:
1689        for t in self.Terminals:
1690            self.First[t] = [t]
1691
1692        self.First['$end'] = ['$end']
1693
1694        # Nonterminals:
1695
1696        # Initialize to the empty set:
1697        for n in self.Nonterminals:
1698            self.First[n] = []
1699
1700        # Then propagate symbols until no change:
1701        while 1:
1702            some_change = 0
1703            for n in self.Nonterminals:
1704                for p in self.Prodnames[n]:
1705                    for f in self._first(p.prod):
1706                        if f not in self.First[n]:
1707                            self.First[n].append( f )
1708                            some_change = 1
1709            if not some_change:
1710                break
1711
1712        return self.First
1713
1714    # ---------------------------------------------------------------------
1715    # compute_follow()
1716    #
1717    # Computes all of the follow sets for every non-terminal symbol.  The
1718    # follow set is the set of all symbols that might follow a given
1719    # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
1720    # ---------------------------------------------------------------------
1721    def compute_follow(self,start=None):
1722        # If already computed, return the result
1723        if self.Follow:
1724            return self.Follow
1725
1726        # If first sets not computed yet, do that first.
1727        if not self.First:
1728            self.compute_first()
1729
1730        # Add '$end' to the follow list of the start symbol
1731        for k in self.Nonterminals:
1732            self.Follow[k] = [ ]
1733
1734        if not start:
1735            start = self.Productions[1].name
1736
1737        self.Follow[start] = [ '$end' ]
1738
1739        while 1:
1740            didadd = 0
1741            for p in self.Productions[1:]:
1742                # Here is the production set
1743                for i in range(len(p.prod)):
1744                    B = p.prod[i]
1745                    if B in self.Nonterminals:
1746                        # Okay. We got a non-terminal in a production
1747                        fst = self._first(p.prod[i+1:])
1748                        hasempty = 0
1749                        for f in fst:
1750                            if f != '<empty>' and f not in self.Follow[B]:
1751                                self.Follow[B].append(f)
1752                                didadd = 1
1753                            if f == '<empty>':
1754                                hasempty = 1
1755                        if hasempty or i == (len(p.prod)-1):
1756                            # Add elements of follow(a) to follow(b)
1757                            for f in self.Follow[p.name]:
1758                                if f not in self.Follow[B]:
1759                                    self.Follow[B].append(f)
1760                                    didadd = 1
1761            if not didadd: break
1762        return self.Follow
1763
1764
1765    # -----------------------------------------------------------------------------
1766    # build_lritems()
1767    #
1768    # This function walks the list of productions and builds a complete set of the
1769    # LR items.  The LR items are stored in two ways:  First, they are uniquely
1770    # numbered and placed in the list _lritems.  Second, a linked list of LR items
1771    # is built for each production.  For example:
1772    #
1773    #   E -> E PLUS E
1774    #
1775    # Creates the list
1776    #
1777    #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1778    # -----------------------------------------------------------------------------
1779
1780    def build_lritems(self):
1781        for p in self.Productions:
1782            lastlri = p
1783            i = 0
1784            lr_items = []
1785            while 1:
1786                if i > len(p):
1787                    lri = None
1788                else:
1789                    lri = LRItem(p,i)
1790                    # Precompute the list of productions immediately following
1791                    try:
1792                        lri.lr_after = self.Prodnames[lri.prod[i+1]]
1793                    except (IndexError,KeyError):
1794                        lri.lr_after = []
1795                    try:
1796                        lri.lr_before = lri.prod[i-1]
1797                    except IndexError:
1798                        lri.lr_before = None
1799
1800                lastlri.lr_next = lri
1801                if not lri: break
1802                lr_items.append(lri)
1803                lastlri = lri
1804                i += 1
1805            p.lr_items = lr_items
1806
1807# -----------------------------------------------------------------------------
1808#                            == Class LRTable ==
1809#
1810# This basic class represents a basic table of LR parsing information.
1811# Methods for generating the tables are not defined here.  They are defined
1812# in the derived class LRGeneratedTable.
1813# -----------------------------------------------------------------------------
1814
1815class VersionError(YaccError): pass
1816
1817class LRTable(object):
1818    def __init__(self):
1819        self.lr_action = None
1820        self.lr_goto = None
1821        self.lr_productions = None
1822        self.lr_method = None
1823
1824    def read_table(self,module):
1825        if isinstance(module,types.ModuleType):
1826            parsetab = module
1827        else:
1828            if sys.version_info[0] < 3:
1829                exec("import %s as parsetab" % module)
1830            else:
1831                env = { }
1832                exec("import %s as parsetab" % module, env, env)
1833                parsetab = env['parsetab']
1834
1835        if parsetab._tabversion != __tabversion__:
1836            raise VersionError("yacc table file version is out of date")
1837
1838        self.lr_action = parsetab._lr_action
1839        self.lr_goto = parsetab._lr_goto
1840
1841        self.lr_productions = []
1842        for p in parsetab._lr_productions:
1843            self.lr_productions.append(MiniProduction(*p))
1844
1845        self.lr_method = parsetab._lr_method
1846        return parsetab._lr_signature
1847
1848    def read_pickle(self,filename):
1849        try:
1850            import cPickle as pickle
1851        except ImportError:
1852            import pickle
1853
1854        in_f = open(filename,"rb")
1855
1856        tabversion = pickle.load(in_f)
1857        if tabversion != __tabversion__:
1858            raise VersionError("yacc table file version is out of date")
1859        self.lr_method = pickle.load(in_f)
1860        signature      = pickle.load(in_f)
1861        self.lr_action = pickle.load(in_f)
1862        self.lr_goto   = pickle.load(in_f)
1863        productions    = pickle.load(in_f)
1864
1865        self.lr_productions = []
1866        for p in productions:
1867            self.lr_productions.append(MiniProduction(*p))
1868
1869        in_f.close()
1870        return signature
1871
1872    # Bind all production function names to callable objects in pdict
1873    def bind_callables(self,pdict):
1874        for p in self.lr_productions:
1875            p.bind(pdict)
1876
1877# -----------------------------------------------------------------------------
1878#                           === LR Generator ===
1879#
1880# The following classes and functions are used to generate LR parsing tables on
1881# a grammar.
1882# -----------------------------------------------------------------------------
1883
1884# -----------------------------------------------------------------------------
1885# digraph()
1886# traverse()
1887#
1888# The following two functions are used to compute set valued functions
1889# of the form:
1890#
1891#     F(x) = F'(x) U U{F(y) | x R y}
1892#
1893# This is used to compute the values of Read() sets as well as FOLLOW sets
1894# in LALR(1) generation.
1895#
1896# Inputs:  X    - An input set
1897#          R    - A relation
1898#          FP   - Set-valued function
1899# ------------------------------------------------------------------------------
1900
1901def digraph(X,R,FP):
1902    N = { }
1903    for x in X:
1904       N[x] = 0
1905    stack = []
1906    F = { }
1907    for x in X:
1908        if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
1909    return F
1910
1911def traverse(x,N,stack,F,X,R,FP):
1912    stack.append(x)
1913    d = len(stack)
1914    N[x] = d
1915    F[x] = FP(x)             # F(X) <- F'(x)
1916
1917    rel = R(x)               # Get y's related to x
1918    for y in rel:
1919        if N[y] == 0:
1920             traverse(y,N,stack,F,X,R,FP)
1921        N[x] = min(N[x],N[y])
1922        for a in F.get(y,[]):
1923            if a not in F[x]: F[x].append(a)
1924    if N[x] == d:
1925       N[stack[-1]] = MAXINT
1926       F[stack[-1]] = F[x]
1927       element = stack.pop()
1928       while element != x:
1929           N[stack[-1]] = MAXINT
1930           F[stack[-1]] = F[x]
1931           element = stack.pop()
1932
1933class LALRError(YaccError): pass
1934
1935# -----------------------------------------------------------------------------
1936#                             == LRGeneratedTable ==
1937#
1938# This class implements the LR table generation algorithm.  There are no
1939# public methods except for write()
1940# -----------------------------------------------------------------------------
1941
1942class LRGeneratedTable(LRTable):
1943    def __init__(self,grammar,method='LALR',log=None):
1944        if method not in ['SLR','LALR']:
1945            raise LALRError("Unsupported method %s" % method)
1946
1947        self.grammar = grammar
1948        self.lr_method = method
1949
1950        # Set up the logger
1951        if not log:
1952            log = NullLogger()
1953        self.log = log
1954
1955        # Internal attributes
1956        self.lr_action     = {}        # Action table
1957        self.lr_goto       = {}        # Goto table
1958        self.lr_productions  = grammar.Productions    # Copy of grammar Production array
1959        self.lr_goto_cache = {}        # Cache of computed gotos
1960        self.lr0_cidhash   = {}        # Cache of closures
1961
1962        self._add_count    = 0         # Internal counter used to detect cycles
1963
1964        # Diagonistic information filled in by the table generator
1965        self.sr_conflict   = 0
1966        self.rr_conflict   = 0
1967        self.conflicts     = []        # List of conflicts
1968
1969        self.sr_conflicts  = []
1970        self.rr_conflicts  = []
1971
1972        # Build the tables
1973        self.grammar.build_lritems()
1974        self.grammar.compute_first()
1975        self.grammar.compute_follow()
1976        self.lr_parse_table()
1977
1978    # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1979
1980    def lr0_closure(self,I):
1981        self._add_count += 1
1982
1983        # Add everything in I to J
1984        J = I[:]
1985        didadd = 1
1986        while didadd:
1987            didadd = 0
1988            for j in J:
1989                for x in j.lr_after:
1990                    if getattr(x,"lr0_added",0) == self._add_count: continue
1991                    # Add B --> .G to J
1992                    J.append(x.lr_next)
1993                    x.lr0_added = self._add_count
1994                    didadd = 1
1995
1996        return J
1997
1998    # Compute the LR(0) goto function goto(I,X) where I is a set
1999    # of LR(0) items and X is a grammar symbol.   This function is written
2000    # in a way that guarantees uniqueness of the generated goto sets
2001    # (i.e. the same goto set will never be returned as two different Python
2002    # objects).  With uniqueness, we can later do fast set comparisons using
2003    # id(obj) instead of element-wise comparison.
2004
2005    def lr0_goto(self,I,x):
2006        # First we look for a previously cached entry
2007        g = self.lr_goto_cache.get((id(I),x),None)
2008        if g: return g
2009
2010        # Now we generate the goto set in a way that guarantees uniqueness
2011        # of the result
2012
2013        s = self.lr_goto_cache.get(x,None)
2014        if not s:
2015            s = { }
2016            self.lr_goto_cache[x] = s
2017
2018        gs = [ ]
2019        for p in I:
2020            n = p.lr_next
2021            if n and n.lr_before == x:
2022                s1 = s.get(id(n),None)
2023                if not s1:
2024                    s1 = { }
2025                    s[id(n)] = s1
2026                gs.append(n)
2027                s = s1
2028        g = s.get('$end',None)
2029        if not g:
2030            if gs:
2031                g = self.lr0_closure(gs)
2032                s['$end'] = g
2033            else:
2034                s['$end'] = gs
2035        self.lr_goto_cache[(id(I),x)] = g
2036        return g
2037
2038    # Compute the LR(0) sets of item function
2039    def lr0_items(self):
2040
2041        C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
2042        i = 0
2043        for I in C:
2044            self.lr0_cidhash[id(I)] = i
2045            i += 1
2046
2047        # Loop over the items in C and each grammar symbols
2048        i = 0
2049        while i < len(C):
2050            I = C[i]
2051            i += 1
2052
2053            # Collect all of the symbols that could possibly be in the goto(I,X) sets
2054            asyms = { }
2055            for ii in I:
2056                for s in ii.usyms:
2057                    asyms[s] = None
2058
2059            for x in asyms:
2060                g = self.lr0_goto(I,x)
2061                if not g:  continue
2062                if id(g) in self.lr0_cidhash: continue
2063                self.lr0_cidhash[id(g)] = len(C)
2064                C.append(g)
2065
2066        return C
2067
2068    # -----------------------------------------------------------------------------
2069    #                       ==== LALR(1) Parsing ====
2070    #
2071    # LALR(1) parsing is almost exactly the same as SLR except that instead of
2072    # relying upon Follow() sets when performing reductions, a more selective
2073    # lookahead set that incorporates the state of the LR(0) machine is utilized.
2074    # Thus, we mainly just have to focus on calculating the lookahead sets.
2075    #
2076    # The method used here is due to DeRemer and Pennelo (1982).
2077    #
2078    # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2079    #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2080    #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
2081    #
2082    # Further details can also be found in:
2083    #
2084    #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2085    #      McGraw-Hill Book Company, (1985).
2086    #
2087    # -----------------------------------------------------------------------------
2088
2089    # -----------------------------------------------------------------------------
2090    # compute_nullable_nonterminals()
2091    #
2092    # Creates a dictionary containing all of the non-terminals that might produce
2093    # an empty production.
2094    # -----------------------------------------------------------------------------
2095
2096    def compute_nullable_nonterminals(self):
2097        nullable = {}
2098        num_nullable = 0
2099        while 1:
2100           for p in self.grammar.Productions[1:]:
2101               if p.len == 0:
2102                    nullable[p.name] = 1
2103                    continue
2104               for t in p.prod:
2105                    if not t in nullable: break
2106               else:
2107                    nullable[p.name] = 1
2108           if len(nullable) == num_nullable: break
2109           num_nullable = len(nullable)
2110        return nullable
2111
2112    # -----------------------------------------------------------------------------
2113    # find_nonterminal_trans(C)
2114    #
2115    # Given a set of LR(0) items, this functions finds all of the non-terminal
2116    # transitions.    These are transitions in which a dot appears immediately before
2117    # a non-terminal.   Returns a list of tuples of the form (state,N) where state
2118    # is the state number and N is the nonterminal symbol.
2119    #
2120    # The input C is the set of LR(0) items.
2121    # -----------------------------------------------------------------------------
2122
2123    def find_nonterminal_transitions(self,C):
2124         trans = []
2125         for state in range(len(C)):
2126             for p in C[state]:
2127                 if p.lr_index < p.len - 1:
2128                      t = (state,p.prod[p.lr_index+1])
2129                      if t[1] in self.grammar.Nonterminals:
2130                            if t not in trans: trans.append(t)
2131             state = state + 1
2132         return trans
2133
2134    # -----------------------------------------------------------------------------
2135    # dr_relation()
2136    #
2137    # Computes the DR(p,A) relationships for non-terminal transitions.  The input
2138    # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2139    #
2140    # Returns a list of terminals.
2141    # -----------------------------------------------------------------------------
2142
2143    def dr_relation(self,C,trans,nullable):
2144        dr_set = { }
2145        state,N = trans
2146        terms = []
2147
2148        g = self.lr0_goto(C[state],N)
2149        for p in g:
2150           if p.lr_index < p.len - 1:
2151               a = p.prod[p.lr_index+1]
2152               if a in self.grammar.Terminals:
2153                   if a not in terms: terms.append(a)
2154
2155        # This extra bit is to handle the start state
2156        if state == 0 and N == self.grammar.Productions[0].prod[0]:
2157           terms.append('$end')
2158
2159        return terms
2160
2161    # -----------------------------------------------------------------------------
2162    # reads_relation()
2163    #
2164    # Computes the READS() relation (p,A) READS (t,C).
2165    # -----------------------------------------------------------------------------
2166
2167    def reads_relation(self,C, trans, empty):
2168        # Look for empty transitions
2169        rel = []
2170        state, N = trans
2171
2172        g = self.lr0_goto(C[state],N)
2173        j = self.lr0_cidhash.get(id(g),-1)
2174        for p in g:
2175            if p.lr_index < p.len - 1:
2176                 a = p.prod[p.lr_index + 1]
2177                 if a in empty:
2178                      rel.append((j,a))
2179
2180        return rel
2181
2182    # -----------------------------------------------------------------------------
2183    # compute_lookback_includes()
2184    #
2185    # Determines the lookback and includes relations
2186    #
2187    # LOOKBACK:
2188    #
2189    # This relation is determined by running the LR(0) state machine forward.
2190    # For example, starting with a production "N : . A B C", we run it forward
2191    # to obtain "N : A B C ."   We then build a relationship between this final
2192    # state and the starting state.   These relationships are stored in a dictionary
2193    # lookdict.
2194    #
2195    # INCLUDES:
2196    #
2197    # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2198    #
2199    # This relation is used to determine non-terminal transitions that occur
2200    # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
2201    # if the following holds:
2202    #
2203    #       B -> LAT, where T -> epsilon and p' -L-> p
2204    #
2205    # L is essentially a prefix (which may be empty), T is a suffix that must be
2206    # able to derive an empty string.  State p' must lead to state p with the string L.
2207    #
2208    # -----------------------------------------------------------------------------
2209
2210    def compute_lookback_includes(self,C,trans,nullable):
2211
2212        lookdict = {}          # Dictionary of lookback relations
2213        includedict = {}       # Dictionary of include relations
2214
2215        # Make a dictionary of non-terminal transitions
2216        dtrans = {}
2217        for t in trans:
2218            dtrans[t] = 1
2219
2220        # Loop over all transitions and compute lookbacks and includes
2221        for state,N in trans:
2222            lookb = []
2223            includes = []
2224            for p in C[state]:
2225                if p.name != N: continue
2226
2227                # Okay, we have a name match.  We now follow the production all the way
2228                # through the state machine until we get the . on the right hand side
2229
2230                lr_index = p.lr_index
2231                j = state
2232                while lr_index < p.len - 1:
2233                     lr_index = lr_index + 1
2234                     t = p.prod[lr_index]
2235
2236                     # Check to see if this symbol and state are a non-terminal transition
2237                     if (j,t) in dtrans:
2238                           # Yes.  Okay, there is some chance that this is an includes relation
2239                           # the only way to know for certain is whether the rest of the
2240                           # production derives empty
2241
2242                           li = lr_index + 1
2243                           while li < p.len:
2244                                if p.prod[li] in self.grammar.Terminals: break      # No forget it
2245                                if not p.prod[li] in nullable: break
2246                                li = li + 1
2247                           else:
2248                                # Appears to be a relation between (j,t) and (state,N)
2249                                includes.append((j,t))
2250
2251                     g = self.lr0_goto(C[j],t)               # Go to next set
2252                     j = self.lr0_cidhash.get(id(g),-1)     # Go to next state
2253
2254                # When we get here, j is the final state, now we have to locate the production
2255                for r in C[j]:
2256                     if r.name != p.name: continue
2257                     if r.len != p.len:   continue
2258                     i = 0
2259                     # This look is comparing a production ". A B C" with "A B C ."
2260                     while i < r.lr_index:
2261                          if r.prod[i] != p.prod[i+1]: break
2262                          i = i + 1
2263                     else:
2264                          lookb.append((j,r))
2265            for i in includes:
2266                 if not i in includedict: includedict[i] = []
2267                 includedict[i].append((state,N))
2268            lookdict[(state,N)] = lookb
2269
2270        return lookdict,includedict
2271
2272    # -----------------------------------------------------------------------------
2273    # compute_read_sets()
2274    #
2275    # Given a set of LR(0) items, this function computes the read sets.
2276    #
2277    # Inputs:  C        =  Set of LR(0) items
2278    #          ntrans   = Set of nonterminal transitions
2279    #          nullable = Set of empty transitions
2280    #
2281    # Returns a set containing the read sets
2282    # -----------------------------------------------------------------------------
2283
2284    def compute_read_sets(self,C, ntrans, nullable):
2285        FP = lambda x: self.dr_relation(C,x,nullable)
2286        R =  lambda x: self.reads_relation(C,x,nullable)
2287        F = digraph(ntrans,R,FP)
2288        return F
2289
2290    # -----------------------------------------------------------------------------
2291    # compute_follow_sets()
2292    #
2293    # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2294    # and an include set, this function computes the follow sets
2295    #
2296    # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2297    #
2298    # Inputs:
2299    #            ntrans     = Set of nonterminal transitions
2300    #            readsets   = Readset (previously computed)
2301    #            inclsets   = Include sets (previously computed)
2302    #
2303    # Returns a set containing the follow sets
2304    # -----------------------------------------------------------------------------
2305
2306    def compute_follow_sets(self,ntrans,readsets,inclsets):
2307         FP = lambda x: readsets[x]
2308         R  = lambda x: inclsets.get(x,[])
2309         F = digraph(ntrans,R,FP)
2310         return F
2311
2312    # -----------------------------------------------------------------------------
2313    # add_lookaheads()
2314    #
2315    # Attaches the lookahead symbols to grammar rules.
2316    #
2317    # Inputs:    lookbacks         -  Set of lookback relations
2318    #            followset         -  Computed follow set
2319    #
2320    # This function directly attaches the lookaheads to productions contained
2321    # in the lookbacks set
2322    # -----------------------------------------------------------------------------
2323
2324    def add_lookaheads(self,lookbacks,followset):
2325        for trans,lb in lookbacks.items():
2326            # Loop over productions in lookback
2327            for state,p in lb:
2328                 if not state in p.lookaheads:
2329                      p.lookaheads[state] = []
2330                 f = followset.get(trans,[])
2331                 for a in f:
2332                      if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
2333
2334    # -----------------------------------------------------------------------------
2335    # add_lalr_lookaheads()
2336    #
2337    # This function does all of the work of adding lookahead information for use
2338    # with LALR parsing
2339    # -----------------------------------------------------------------------------
2340
2341    def add_lalr_lookaheads(self,C):
2342        # Determine all of the nullable nonterminals
2343        nullable = self.compute_nullable_nonterminals()
2344
2345        # Find all non-terminal transitions
2346        trans = self.find_nonterminal_transitions(C)
2347
2348        # Compute read sets
2349        readsets = self.compute_read_sets(C,trans,nullable)
2350
2351        # Compute lookback/includes relations
2352        lookd, included = self.compute_lookback_includes(C,trans,nullable)
2353
2354        # Compute LALR FOLLOW sets
2355        followsets = self.compute_follow_sets(trans,readsets,included)
2356
2357        # Add all of the lookaheads
2358        self.add_lookaheads(lookd,followsets)
2359
2360    # -----------------------------------------------------------------------------
2361    # lr_parse_table()
2362    #
2363    # This function constructs the parse tables for SLR or LALR
2364    # -----------------------------------------------------------------------------
2365    def lr_parse_table(self):
2366        Productions = self.grammar.Productions
2367        Precedence  = self.grammar.Precedence
2368        goto   = self.lr_goto         # Goto array
2369        action = self.lr_action       # Action array
2370        log    = self.log             # Logger for output
2371
2372        actionp = { }                 # Action production array (temporary)
2373
2374        log.info("Parsing method: %s", self.lr_method)
2375
2376        # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2377        # This determines the number of states
2378
2379        C = self.lr0_items()
2380
2381        if self.lr_method == 'LALR':
2382            self.add_lalr_lookaheads(C)
2383
2384        # Build the parser table, state by state
2385        st = 0
2386        for I in C:
2387            # Loop over each production in I
2388            actlist = [ ]              # List of actions
2389            st_action  = { }
2390            st_actionp = { }
2391            st_goto    = { }
2392            log.info("")
2393            log.info("state %d", st)
2394            log.info("")
2395            for p in I:
2396                log.info("    (%d) %s", p.number, str(p))
2397            log.info("")
2398
2399            for p in I:
2400                    if p.len == p.lr_index + 1:
2401                        if p.name == "S'":
2402                            # Start symbol. Accept!
2403                            st_action["$end"] = 0
2404                            st_actionp["$end"] = p
2405                        else:
2406                            # We are at the end of a production.  Reduce!
2407                            if self.lr_method == 'LALR':
2408                                laheads = p.lookaheads[st]
2409                            else:
2410                                laheads = self.grammar.Follow[p.name]
2411                            for a in laheads:
2412                                actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
2413                                r = st_action.get(a,None)
2414                                if r is not None:
2415                                    # Whoa. Have a shift/reduce or reduce/reduce conflict
2416                                    if r > 0:
2417                                        # Need to decide on shift or reduce here
2418                                        # By default we favor shifting. Need to add
2419                                        # some precedence rules here.
2420                                        sprec,slevel = Productions[st_actionp[a].number].prec
2421                                        rprec,rlevel = Precedence.get(a,('right',0))
2422                                        if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2423                                            # We really need to reduce here.
2424                                            st_action[a] = -p.number
2425                                            st_actionp[a] = p
2426                                            if not slevel and not rlevel:
2427                                                log.info("  ! shift/reduce conflict for %s resolved as reduce",a)
2428                                                self.sr_conflicts.append((st,a,'reduce'))
2429                                            Productions[p.number].reduced += 1
2430                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
2431                                            st_action[a] = None
2432                                        else:
2433                                            # Hmmm. Guess we'll keep the shift
2434                                            if not rlevel:
2435                                                log.info("  ! shift/reduce conflict for %s resolved as shift",a)
2436                                                self.sr_conflicts.append((st,a,'shift'))
2437                                    elif r < 0:
2438                                        # Reduce/reduce conflict.   In this case, we favor the rule
2439                                        # that was defined first in the grammar file
2440                                        oldp = Productions[-r]
2441                                        pp = Productions[p.number]
2442                                        if oldp.line > pp.line:
2443                                            st_action[a] = -p.number
2444                                            st_actionp[a] = p
2445                                            chosenp,rejectp = pp,oldp
2446                                            Productions[p.number].reduced += 1
2447                                            Productions[oldp.number].reduced -= 1
2448                                        else:
2449                                            chosenp,rejectp = oldp,pp
2450                                        self.rr_conflicts.append((st,chosenp,rejectp))
2451                                        log.info("  ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
2452                                    else:
2453                                        raise LALRError("Unknown conflict in state %d" % st)
2454                                else:
2455                                    st_action[a] = -p.number
2456                                    st_actionp[a] = p
2457                                    Productions[p.number].reduced += 1
2458                    else:
2459                        i = p.lr_index
2460                        a = p.prod[i+1]       # Get symbol right after the "."
2461                        if a in self.grammar.Terminals:
2462                            g = self.lr0_goto(I,a)
2463                            j = self.lr0_cidhash.get(id(g),-1)
2464                            if j >= 0:
2465                                # We are in a shift state
2466                                actlist.append((a,p,"shift and go to state %d" % j))
2467                                r = st_action.get(a,None)
2468                                if r is not None:
2469                                    # Whoa have a shift/reduce or shift/shift conflict
2470                                    if r > 0:
2471                                        if r != j:
2472                                            raise LALRError("Shift/shift conflict in state %d" % st)
2473                                    elif r < 0:
2474                                        # Do a precedence check.
2475                                        #   -  if precedence of reduce rule is higher, we reduce.
2476                                        #   -  if precedence of reduce is same and left assoc, we reduce.
2477                                        #   -  otherwise we shift
2478                                        rprec,rlevel = Productions[st_actionp[a].number].prec
2479                                        sprec,slevel = Precedence.get(a,('right',0))
2480                                        if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2481                                            # We decide to shift here... highest precedence to shift
2482                                            Productions[st_actionp[a].number].reduced -= 1
2483                                            st_action[a] = j
2484                                            st_actionp[a] = p
2485                                            if not rlevel:
2486                                                log.info("  ! shift/reduce conflict for %s resolved as shift",a)
2487                                                self.sr_conflicts.append((st,a,'shift'))
2488                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
2489                                            st_action[a] = None
2490                                        else:
2491                                            # Hmmm. Guess we'll keep the reduce
2492                                            if not slevel and not rlevel:
2493                                                log.info("  ! shift/reduce conflict for %s resolved as reduce",a)
2494                                                self.sr_conflicts.append((st,a,'reduce'))
2495
2496                                    else:
2497                                        raise LALRError("Unknown conflict in state %d" % st)
2498                                else:
2499                                    st_action[a] = j
2500                                    st_actionp[a] = p
2501
2502            # Print the actions associated with each terminal
2503            _actprint = { }
2504            for a,p,m in actlist:
2505                if a in st_action:
2506                    if p is st_actionp[a]:
2507                        log.info("    %-15s %s",a,m)
2508                        _actprint[(a,m)] = 1
2509            log.info("")
2510            # Print the actions that were not used. (debugging)
2511            not_used = 0
2512            for a,p,m in actlist:
2513                if a in st_action:
2514                    if p is not st_actionp[a]:
2515                        if not (a,m) in _actprint:
2516                            log.debug("  ! %-15s [ %s ]",a,m)
2517                            not_used = 1
2518                            _actprint[(a,m)] = 1
2519            if not_used:
2520                log.debug("")
2521
2522            # Construct the goto table for this state
2523
2524            nkeys = { }
2525            for ii in I:
2526                for s in ii.usyms:
2527                    if s in self.grammar.Nonterminals:
2528                        nkeys[s] = None
2529            for n in nkeys:
2530                g = self.lr0_goto(I,n)
2531                j = self.lr0_cidhash.get(id(g),-1)
2532                if j >= 0:
2533                    st_goto[n] = j
2534                    log.info("    %-30s shift and go to state %d",n,j)
2535
2536            action[st] = st_action
2537            actionp[st] = st_actionp
2538            goto[st] = st_goto
2539            st += 1
2540
2541
2542    # -----------------------------------------------------------------------------
2543    # write()
2544    #
2545    # This function writes the LR parsing tables to a file
2546    # -----------------------------------------------------------------------------
2547
2548    def write_table(self,modulename,outputdir='',signature=""):
2549        basemodulename = modulename.split(".")[-1]
2550        filename = os.path.join(outputdir,basemodulename) + ".py"
2551        try:
2552            f = open(filename,"w")
2553
2554            f.write("""
2555# %s
2556# This file is automatically generated. Do not edit.
2557_tabversion = %r
2558
2559_lr_method = %r
2560
2561_lr_signature = %r
2562    """ % (filename, __tabversion__, self.lr_method, signature))
2563
2564            # Change smaller to 0 to go back to original tables
2565            smaller = 1
2566
2567            # Factor out names to try and make smaller
2568            if smaller:
2569                items = { }
2570
2571                for s,nd in self.lr_action.items():
2572                   for name,v in nd.items():
2573                      i = items.get(name)
2574                      if not i:
2575                         i = ([],[])
2576                         items[name] = i
2577                      i[0].append(s)
2578                      i[1].append(v)
2579
2580                f.write("\n_lr_action_items = {")
2581                for k,v in items.items():
2582                    f.write("%r:([" % k)
2583                    for i in v[0]:
2584                        f.write("%r," % i)
2585                    f.write("],[")
2586                    for i in v[1]:
2587                        f.write("%r," % i)
2588
2589                    f.write("]),")
2590                f.write("}\n")
2591
2592                f.write("""
2593_lr_action = { }
2594for _k, _v in _lr_action_items.items():
2595   for _x,_y in zip(_v[0],_v[1]):
2596      if not _x in _lr_action:  _lr_action[_x] = { }
2597      _lr_action[_x][_k] = _y
2598del _lr_action_items
2599""")
2600
2601            else:
2602                f.write("\n_lr_action = { ");
2603                for k,v in self.lr_action.items():
2604                    f.write("(%r,%r):%r," % (k[0],k[1],v))
2605                f.write("}\n");
2606
2607            if smaller:
2608                # Factor out names to try and make smaller
2609                items = { }
2610
2611                for s,nd in self.lr_goto.items():
2612                   for name,v in nd.items():
2613                      i = items.get(name)
2614                      if not i:
2615                         i = ([],[])
2616                         items[name] = i
2617                      i[0].append(s)
2618                      i[1].append(v)
2619
2620                f.write("\n_lr_goto_items = {")
2621                for k,v in items.items():
2622                    f.write("%r:([" % k)
2623                    for i in v[0]:
2624                        f.write("%r," % i)
2625                    f.write("],[")
2626                    for i in v[1]:
2627                        f.write("%r," % i)
2628
2629                    f.write("]),")
2630                f.write("}\n")
2631
2632                f.write("""
2633_lr_goto = { }
2634for _k, _v in _lr_goto_items.items():
2635   for _x,_y in zip(_v[0],_v[1]):
2636       if not _x in _lr_goto: _lr_goto[_x] = { }
2637       _lr_goto[_x][_k] = _y
2638del _lr_goto_items
2639""")
2640            else:
2641                f.write("\n_lr_goto = { ");
2642                for k,v in self.lr_goto.items():
2643                    f.write("(%r,%r):%r," % (k[0],k[1],v))
2644                f.write("}\n");
2645
2646            # Write production table
2647            f.write("_lr_productions = [\n")
2648            for p in self.lr_productions:
2649                if p.func:
2650                    f.write("  (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
2651                else:
2652                    f.write("  (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
2653            f.write("]\n")
2654            f.close()
2655
2656        except IOError:
2657            e = sys.exc_info()[1]
2658            sys.stderr.write("Unable to create '%s'\n" % filename)
2659            sys.stderr.write(str(e)+"\n")
2660            return
2661
2662
2663    # -----------------------------------------------------------------------------
2664    # pickle_table()
2665    #
2666    # This function pickles the LR parsing tables to a supplied file object
2667    # -----------------------------------------------------------------------------
2668
2669    def pickle_table(self,filename,signature=""):
2670        try:
2671            import cPickle as pickle
2672        except ImportError:
2673            import pickle
2674        outf = open(filename,"wb")
2675        pickle.dump(__tabversion__,outf,pickle_protocol)
2676        pickle.dump(self.lr_method,outf,pickle_protocol)
2677        pickle.dump(signature,outf,pickle_protocol)
2678        pickle.dump(self.lr_action,outf,pickle_protocol)
2679        pickle.dump(self.lr_goto,outf,pickle_protocol)
2680
2681        outp = []
2682        for p in self.lr_productions:
2683            if p.func:
2684                outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
2685            else:
2686                outp.append((str(p),p.name,p.len,None,None,None))
2687        pickle.dump(outp,outf,pickle_protocol)
2688        outf.close()
2689
2690# -----------------------------------------------------------------------------
2691#                            === INTROSPECTION ===
2692#
2693# The following functions and classes are used to implement the PLY
2694# introspection features followed by the yacc() function itself.
2695# -----------------------------------------------------------------------------
2696
2697# -----------------------------------------------------------------------------
2698# get_caller_module_dict()
2699#
2700# This function returns a dictionary containing all of the symbols defined within
2701# a caller further down the call stack.  This is used to get the environment
2702# associated with the yacc() call if none was provided.
2703# -----------------------------------------------------------------------------
2704
2705def get_caller_module_dict(levels):
2706    try:
2707        raise RuntimeError
2708    except RuntimeError:
2709        e,b,t = sys.exc_info()
2710        f = t.tb_frame
2711        while levels > 0:
2712            f = f.f_back
2713            levels -= 1
2714        ldict = f.f_globals.copy()
2715        if f.f_globals != f.f_locals:
2716            ldict.update(f.f_locals)
2717
2718        return ldict
2719
2720# -----------------------------------------------------------------------------
2721# parse_grammar()
2722#
2723# This takes a raw grammar rule string and parses it into production data
2724# -----------------------------------------------------------------------------
2725def parse_grammar(doc,file,line):
2726    grammar = []
2727    # Split the doc string into lines
2728    pstrings = doc.splitlines()
2729    lastp = None
2730    dline = line
2731    for ps in pstrings:
2732        dline += 1
2733        p = ps.split()
2734        if not p: continue
2735        try:
2736            if p[0] == '|':
2737                # This is a continuation of a previous rule
2738                if not lastp:
2739                    raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))
2740                prodname = lastp
2741                syms = p[1:]
2742            else:
2743                prodname = p[0]
2744                lastp = prodname
2745                syms   = p[2:]
2746                assign = p[1]
2747                if assign != ':' and assign != '::=':
2748                    raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
2749
2750            grammar.append((file,dline,prodname,syms))
2751        except SyntaxError:
2752            raise
2753        except Exception:
2754            raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))
2755
2756    return grammar
2757
2758# -----------------------------------------------------------------------------
2759# ParserReflect()
2760#
2761# This class represents information extracted for building a parser including
2762# start symbol, error function, tokens, precedence list, action functions,
2763# etc.
2764# -----------------------------------------------------------------------------
2765class ParserReflect(object):
2766    def __init__(self,pdict,log=None):
2767        self.pdict      = pdict
2768        self.start      = None
2769        self.error_func = None
2770        self.tokens     = None
2771        self.files      = {}
2772        self.grammar    = []
2773        self.error      = 0
2774
2775        if log is None:
2776            self.log = PlyLogger(sys.stderr)
2777        else:
2778            self.log = log
2779
2780    # Get all of the basic information
2781    def get_all(self):
2782        self.get_start()
2783        self.get_error_func()
2784        self.get_tokens()
2785        self.get_precedence()
2786        self.get_pfunctions()
2787
2788    # Validate all of the information
2789    def validate_all(self):
2790        self.validate_start()
2791        self.validate_error_func()
2792        self.validate_tokens()
2793        self.validate_precedence()
2794        self.validate_pfunctions()
2795        self.validate_files()
2796        return self.error
2797
2798    # Compute a signature over the grammar
2799    def signature(self):
2800        try:
2801            from hashlib import md5
2802        except ImportError:
2803            from md5 import md5
2804        try:
2805            sig = md5()
2806            if self.start:
2807                sig.update(self.start.encode('latin-1'))
2808            if self.prec:
2809                sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
2810            if self.tokens:
2811                sig.update(" ".join(self.tokens).encode('latin-1'))
2812            for f in self.pfuncs:
2813                if f[3]:
2814                    sig.update(f[3].encode('latin-1'))
2815        except (TypeError,ValueError):
2816            pass
2817        return sig.digest()
2818
2819    # -----------------------------------------------------------------------------
2820    # validate_file()
2821    #
2822    # This method checks to see if there are duplicated p_rulename() functions
2823    # in the parser module file.  Without this function, it is really easy for
2824    # users to make mistakes by cutting and pasting code fragments (and it's a real
2825    # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
2826    # we just do a little regular expression pattern matching of def statements
2827    # to try and detect duplicates.
2828    # -----------------------------------------------------------------------------
2829
2830    def validate_files(self):
2831        # Match def p_funcname(
2832        fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2833
2834        for filename in self.files.keys():
2835            base,ext = os.path.splitext(filename)
2836            if ext != '.py': return 1          # No idea. Assume it's okay.
2837
2838            try:
2839                f = open(filename)
2840                lines = f.readlines()
2841                f.close()
2842            except IOError:
2843                continue
2844
2845            counthash = { }
2846            for linen,l in enumerate(lines):
2847                linen += 1
2848                m = fre.match(l)
2849                if m:
2850                    name = m.group(1)
2851                    prev = counthash.get(name)
2852                    if not prev:
2853                        counthash[name] = linen
2854                    else:
2855                        self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
2856
2857    # Get the start symbol
2858    def get_start(self):
2859        self.start = self.pdict.get('start')
2860
2861    # Validate the start symbol
2862    def validate_start(self):
2863        if self.start is not None:
2864            if not isinstance(self.start,str):
2865                self.log.error("'start' must be a string")
2866
2867    # Look for error handler
2868    def get_error_func(self):
2869        self.error_func = self.pdict.get('p_error')
2870
2871    # Validate the error function
2872    def validate_error_func(self):
2873        if self.error_func:
2874            if isinstance(self.error_func,types.FunctionType):
2875                ismethod = 0
2876            elif isinstance(self.error_func, types.MethodType):
2877                ismethod = 1
2878            else:
2879                self.log.error("'p_error' defined, but is not a function or method")
2880                self.error = 1
2881                return
2882
2883            eline = func_code(self.error_func).co_firstlineno
2884            efile = func_code(self.error_func).co_filename
2885            self.files[efile] = 1
2886
2887            if (func_code(self.error_func).co_argcount != 1+ismethod):
2888                self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
2889                self.error = 1
2890
2891    # Get the tokens map
2892    def get_tokens(self):
2893        tokens = self.pdict.get("tokens",None)
2894        if not tokens:
2895            self.log.error("No token list is defined")
2896            self.error = 1
2897            return
2898
2899        if not isinstance(tokens,(list, tuple)):
2900            self.log.error("tokens must be a list or tuple")
2901            self.error = 1
2902            return
2903
2904        if not tokens:
2905            self.log.error("tokens is empty")
2906            self.error = 1
2907            return
2908
2909        self.tokens = tokens
2910
2911    # Validate the tokens
2912    def validate_tokens(self):
2913        # Validate the tokens.
2914        if 'error' in self.tokens:
2915            self.log.error("Illegal token name 'error'. Is a reserved word")
2916            self.error = 1
2917            return
2918
2919        terminals = {}
2920        for n in self.tokens:
2921            if n in terminals:
2922                self.log.warning("Token '%s' multiply defined", n)
2923            terminals[n] = 1
2924
2925    # Get the precedence map (if any)
2926    def get_precedence(self):
2927        self.prec = self.pdict.get("precedence",None)
2928
2929    # Validate and parse the precedence map
2930    def validate_precedence(self):
2931        preclist = []
2932        if self.prec:
2933            if not isinstance(self.prec,(list,tuple)):
2934                self.log.error("precedence must be a list or tuple")
2935                self.error = 1
2936                return
2937            for level,p in enumerate(self.prec):
2938                if not isinstance(p,(list,tuple)):
2939                    self.log.error("Bad precedence table")
2940                    self.error = 1
2941                    return
2942
2943                if len(p) < 2:
2944                    self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
2945                    self.error = 1
2946                    return
2947                assoc = p[0]
2948                if not isinstance(assoc,str):
2949                    self.log.error("precedence associativity must be a string")
2950                    self.error = 1
2951                    return
2952                for term in p[1:]:
2953                    if not isinstance(term,str):
2954                        self.log.error("precedence items must be strings")
2955                        self.error = 1
2956                        return
2957                    preclist.append((term,assoc,level+1))
2958        self.preclist = preclist
2959
2960    # Get all p_functions from the grammar
2961    def get_pfunctions(self):
2962        p_functions = []
2963        for name, item in self.pdict.items():
2964            if name[:2] != 'p_': continue
2965            if name == 'p_error': continue
2966            if isinstance(item,(types.FunctionType,types.MethodType)):
2967                line = func_code(item).co_firstlineno
2968                file = func_code(item).co_filename
2969                p_functions.append((line,file,name,item.__doc__))
2970
2971        # Sort all of the actions by line number
2972        p_functions.sort()
2973        self.pfuncs = p_functions
2974
2975
2976    # Validate all of the p_functions
2977    def validate_pfunctions(self):
2978        grammar = []
2979        # Check for non-empty symbols
2980        if len(self.pfuncs) == 0:
2981            self.log.error("no rules of the form p_rulename are defined")
2982            self.error = 1
2983            return
2984
2985        for line, file, name, doc in self.pfuncs:
2986            func = self.pdict[name]
2987            if isinstance(func, types.MethodType):
2988                reqargs = 2
2989            else:
2990                reqargs = 1
2991            if func_code(func).co_argcount > reqargs:
2992                self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)
2993                self.error = 1
2994            elif func_code(func).co_argcount < reqargs:
2995                self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)
2996                self.error = 1
2997            elif not func.__doc__:
2998                self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)
2999            else:
3000                try:
3001                    parsed_g = parse_grammar(doc,file,line)
3002                    for g in parsed_g:
3003                        grammar.append((name, g))
3004                except SyntaxError:
3005                    e = sys.exc_info()[1]
3006                    self.log.error(str(e))
3007                    self.error = 1
3008
3009                # Looks like a valid grammar rule
3010                # Mark the file in which defined.
3011                self.files[file] = 1
3012
3013        # Secondary validation step that looks for p_ definitions that are not functions
3014        # or functions that look like they might be grammar rules.
3015
3016        for n,v in self.pdict.items():
3017            if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue
3018            if n[0:2] == 't_': continue
3019            if n[0:2] == 'p_' and n != 'p_error':
3020                self.log.warning("'%s' not defined as a function", n)
3021            if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
3022                (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
3023                try:
3024                    doc = v.__doc__.split(" ")
3025                    if doc[1] == ':':
3026                        self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
3027                                         func_code(v).co_filename, func_code(v).co_firstlineno,n)
3028                except Exception:
3029                    pass
3030
3031        self.grammar = grammar
3032
3033# -----------------------------------------------------------------------------
3034# yacc(module)
3035#
3036# Build a parser
3037# -----------------------------------------------------------------------------
3038
3039def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3040         check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
3041         debuglog=None, errorlog = None, picklefile=None):
3042
3043    global parse                 # Reference to the parsing method of the last built parser
3044
3045    # If pickling is enabled, table files are not created
3046
3047    if picklefile:
3048        write_tables = 0
3049
3050    if errorlog is None:
3051        errorlog = PlyLogger(sys.stderr)
3052
3053    # Get the module dictionary used for the parser
3054    if module:
3055        _items = [(k,getattr(module,k)) for k in dir(module)]
3056        pdict = dict(_items)
3057    else:
3058        pdict = get_caller_module_dict(2)
3059
3060    # Collect parser information from the dictionary
3061    pinfo = ParserReflect(pdict,log=errorlog)
3062    pinfo.get_all()
3063
3064    if pinfo.error:
3065        raise YaccError("Unable to build parser")
3066
3067    # Check signature against table files (if any)
3068    signature = pinfo.signature()
3069
3070    # Read the tables
3071    try:
3072        lr = LRTable()
3073        if picklefile:
3074            read_signature = lr.read_pickle(picklefile)
3075        else:
3076            read_signature = lr.read_table(tabmodule)
3077        if optimize or (read_signature == signature):
3078            try:
3079                lr.bind_callables(pinfo.pdict)
3080                parser = LRParser(lr,pinfo.error_func)
3081                parse = parser.parse
3082                return parser
3083            except Exception:
3084                e = sys.exc_info()[1]
3085                errorlog.warning("There was a problem loading the table file: %s", repr(e))
3086    except VersionError:
3087        e = sys.exc_info()
3088        errorlog.warning(str(e))
3089    except Exception:
3090        pass
3091
3092    if debuglog is None:
3093        if debug:
3094            debuglog = PlyLogger(open(debugfile,"w"))
3095        else:
3096            debuglog = NullLogger()
3097
3098    debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
3099
3100
3101    errors = 0
3102
3103    # Validate the parser information
3104    if pinfo.validate_all():
3105        raise YaccError("Unable to build parser")
3106
3107    if not pinfo.error_func:
3108        errorlog.warning("no p_error() function is defined")
3109
3110    # Create a grammar object
3111    grammar = Grammar(pinfo.tokens)
3112
3113    # Set precedence level for terminals
3114    for term, assoc, level in pinfo.preclist:
3115        try:
3116            grammar.set_precedence(term,assoc,level)
3117        except GrammarError:
3118            e = sys.exc_info()[1]
3119            errorlog.warning("%s",str(e))
3120
3121    # Add productions to the grammar
3122    for funcname, gram in pinfo.grammar:
3123        file, line, prodname, syms = gram
3124        try:
3125            grammar.add_production(prodname,syms,funcname,file,line)
3126        except GrammarError:
3127            e = sys.exc_info()[1]
3128            errorlog.error("%s",str(e))
3129            errors = 1
3130
3131    # Set the grammar start symbols
3132    try:
3133        if start is None:
3134            grammar.set_start(pinfo.start)
3135        else:
3136            grammar.set_start(start)
3137    except GrammarError:
3138        e = sys.exc_info()[1]
3139        errorlog.error(str(e))
3140        errors = 1
3141
3142    if errors:
3143        raise YaccError("Unable to build parser")
3144
3145    # Verify the grammar structure
3146    undefined_symbols = grammar.undefined_symbols()
3147    for sym, prod in undefined_symbols:
3148        errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
3149        errors = 1
3150
3151    unused_terminals = grammar.unused_terminals()
3152    if unused_terminals:
3153        debuglog.info("")
3154        debuglog.info("Unused terminals:")
3155        debuglog.info("")
3156        for term in unused_terminals:
3157            errorlog.warning("Token '%s' defined, but not used", term)
3158            debuglog.info("    %s", term)
3159
3160    # Print out all productions to the debug log
3161    if debug:
3162        debuglog.info("")
3163        debuglog.info("Grammar")
3164        debuglog.info("")
3165        for n,p in enumerate(grammar.Productions):
3166            debuglog.info("Rule %-5d %s", n, p)
3167
3168    # Find unused non-terminals
3169    unused_rules = grammar.unused_rules()
3170    for prod in unused_rules:
3171        errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name)
3172
3173    if len(unused_terminals) == 1:
3174        errorlog.warning("There is 1 unused token")
3175    if len(unused_terminals) > 1:
3176        errorlog.warning("There are %d unused tokens", len(unused_terminals))
3177
3178    if len(unused_rules) == 1:
3179        errorlog.warning("There is 1 unused rule")
3180    if len(unused_rules) > 1:
3181        errorlog.warning("There are %d unused rules", len(unused_rules))
3182
3183    if debug:
3184        debuglog.info("")
3185        debuglog.info("Terminals, with rules where they appear")
3186        debuglog.info("")
3187        terms = list(grammar.Terminals)
3188        terms.sort()
3189        for term in terms:
3190            debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
3191
3192        debuglog.info("")
3193        debuglog.info("Nonterminals, with rules where they appear")
3194        debuglog.info("")
3195        nonterms = list(grammar.Nonterminals)
3196        nonterms.sort()
3197        for nonterm in nonterms:
3198            debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
3199        debuglog.info("")
3200
3201    if check_recursion:
3202        unreachable = grammar.find_unreachable()
3203        for u in unreachable:
3204            errorlog.warning("Symbol '%s' is unreachable",u)
3205
3206        infinite = grammar.infinite_cycles()
3207        for inf in infinite:
3208            errorlog.error("Infinite recursion detected for symbol '%s'", inf)
3209            errors = 1
3210
3211    unused_prec = grammar.unused_precedence()
3212    for term, assoc in unused_prec:
3213        errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)
3214        errors = 1
3215
3216    if errors:
3217        raise YaccError("Unable to build parser")
3218
3219    # Run the LRGeneratedTable on the grammar
3220    if debug:
3221        errorlog.debug("Generating %s tables", method)
3222
3223    lr = LRGeneratedTable(grammar,method,debuglog)
3224
3225    if debug:
3226        num_sr = len(lr.sr_conflicts)
3227
3228        # Report shift/reduce and reduce/reduce conflicts
3229        if num_sr == 1:
3230            errorlog.warning("1 shift/reduce conflict")
3231        elif num_sr > 1:
3232            errorlog.warning("%d shift/reduce conflicts", num_sr)
3233
3234        num_rr = len(lr.rr_conflicts)
3235        if num_rr == 1:
3236            errorlog.warning("1 reduce/reduce conflict")
3237        elif num_rr > 1:
3238            errorlog.warning("%d reduce/reduce conflicts", num_rr)
3239
3240    # Write out conflicts to the output file
3241    if debug and (lr.sr_conflicts or lr.rr_conflicts):
3242        debuglog.warning("")
3243        debuglog.warning("Conflicts:")
3244        debuglog.warning("")
3245
3246        for state, tok, resolution in lr.sr_conflicts:
3247            debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s",  tok, state, resolution)
3248
3249        already_reported = {}
3250        for state, rule, rejected in lr.rr_conflicts:
3251            if (state,id(rule),id(rejected)) in already_reported:
3252                continue
3253            debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3254            debuglog.warning("rejected rule (%s) in state %d", rejected,state)
3255            errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3256            errorlog.warning("rejected rule (%s) in state %d", rejected, state)
3257            already_reported[state,id(rule),id(rejected)] = 1
3258
3259        warned_never = []
3260        for state, rule, rejected in lr.rr_conflicts:
3261            if not rejected.reduced and (rejected not in warned_never):
3262                debuglog.warning("Rule (%s) is never reduced", rejected)
3263                errorlog.warning("Rule (%s) is never reduced", rejected)
3264                warned_never.append(rejected)
3265
3266    # Write the table file if requested
3267    if write_tables:
3268        lr.write_table(tabmodule,outputdir,signature)
3269
3270    # Write a pickled version of the tables
3271    if picklefile:
3272        lr.pickle_table(picklefile,signature)
3273
3274    # Build the parser
3275    lr.bind_callables(pinfo.pdict)
3276    parser = LRParser(lr,pinfo.error_func)
3277
3278    parse = parser.parse
3279    return parser
3280