xref: /aosp_15_r20/external/antlr/runtime/Python/antlr3/dfa.py (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot"""ANTLR3 runtime package"""
2*16467b97STreehugger Robot
3*16467b97STreehugger Robot# begin[licence]
4*16467b97STreehugger Robot#
5*16467b97STreehugger Robot# [The "BSD licence"]
6*16467b97STreehugger Robot# Copyright (c) 2005-2008 Terence Parr
7*16467b97STreehugger Robot# All rights reserved.
8*16467b97STreehugger Robot#
9*16467b97STreehugger Robot# Redistribution and use in source and binary forms, with or without
10*16467b97STreehugger Robot# modification, are permitted provided that the following conditions
11*16467b97STreehugger Robot# are met:
12*16467b97STreehugger Robot# 1. Redistributions of source code must retain the above copyright
13*16467b97STreehugger Robot#    notice, this list of conditions and the following disclaimer.
14*16467b97STreehugger Robot# 2. Redistributions in binary form must reproduce the above copyright
15*16467b97STreehugger Robot#    notice, this list of conditions and the following disclaimer in the
16*16467b97STreehugger Robot#    documentation and/or other materials provided with the distribution.
17*16467b97STreehugger Robot# 3. The name of the author may not be used to endorse or promote products
18*16467b97STreehugger Robot#    derived from this software without specific prior written permission.
19*16467b97STreehugger Robot#
20*16467b97STreehugger Robot# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21*16467b97STreehugger Robot# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22*16467b97STreehugger Robot# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23*16467b97STreehugger Robot# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24*16467b97STreehugger Robot# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25*16467b97STreehugger Robot# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26*16467b97STreehugger Robot# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27*16467b97STreehugger Robot# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28*16467b97STreehugger Robot# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29*16467b97STreehugger Robot# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*16467b97STreehugger Robot#
31*16467b97STreehugger Robot# end[licensc]
32*16467b97STreehugger Robot
33*16467b97STreehugger Robotfrom antlr3.constants import EOF
34*16467b97STreehugger Robotfrom antlr3.exceptions import NoViableAltException, BacktrackingFailed
35*16467b97STreehugger Robot
36*16467b97STreehugger Robot
37*16467b97STreehugger Robotclass DFA(object):
38*16467b97STreehugger Robot    """@brief A DFA implemented as a set of transition tables.
39*16467b97STreehugger Robot
40*16467b97STreehugger Robot    Any state that has a semantic predicate edge is special; those states
41*16467b97STreehugger Robot    are generated with if-then-else structures in a specialStateTransition()
42*16467b97STreehugger Robot    which is generated by cyclicDFA template.
43*16467b97STreehugger Robot
44*16467b97STreehugger Robot    """
45*16467b97STreehugger Robot
46*16467b97STreehugger Robot    def __init__(
47*16467b97STreehugger Robot        self,
48*16467b97STreehugger Robot        recognizer, decisionNumber,
49*16467b97STreehugger Robot        eot, eof, min, max, accept, special, transition
50*16467b97STreehugger Robot        ):
51*16467b97STreehugger Robot        ## Which recognizer encloses this DFA?  Needed to check backtracking
52*16467b97STreehugger Robot        self.recognizer = recognizer
53*16467b97STreehugger Robot
54*16467b97STreehugger Robot        self.decisionNumber = decisionNumber
55*16467b97STreehugger Robot        self.eot = eot
56*16467b97STreehugger Robot        self.eof = eof
57*16467b97STreehugger Robot        self.min = min
58*16467b97STreehugger Robot        self.max = max
59*16467b97STreehugger Robot        self.accept = accept
60*16467b97STreehugger Robot        self.special = special
61*16467b97STreehugger Robot        self.transition = transition
62*16467b97STreehugger Robot
63*16467b97STreehugger Robot
64*16467b97STreehugger Robot    def predict(self, input):
65*16467b97STreehugger Robot        """
66*16467b97STreehugger Robot        From the input stream, predict what alternative will succeed
67*16467b97STreehugger Robot	using this DFA (representing the covering regular approximation
68*16467b97STreehugger Robot	to the underlying CFL).  Return an alternative number 1..n.  Throw
69*16467b97STreehugger Robot	 an exception upon error.
70*16467b97STreehugger Robot	"""
71*16467b97STreehugger Robot        mark = input.mark()
72*16467b97STreehugger Robot        s = 0 # we always start at s0
73*16467b97STreehugger Robot        try:
74*16467b97STreehugger Robot            for _ in xrange(50000):
75*16467b97STreehugger Robot                #print "***Current state = %d" % s
76*16467b97STreehugger Robot
77*16467b97STreehugger Robot                specialState = self.special[s]
78*16467b97STreehugger Robot                if specialState >= 0:
79*16467b97STreehugger Robot                    #print "is special"
80*16467b97STreehugger Robot                    s = self.specialStateTransition(specialState, input)
81*16467b97STreehugger Robot                    if s == -1:
82*16467b97STreehugger Robot                        self.noViableAlt(s, input)
83*16467b97STreehugger Robot                        return 0
84*16467b97STreehugger Robot                    input.consume()
85*16467b97STreehugger Robot                    continue
86*16467b97STreehugger Robot
87*16467b97STreehugger Robot                if self.accept[s] >= 1:
88*16467b97STreehugger Robot                    #print "accept state for alt %d" % self.accept[s]
89*16467b97STreehugger Robot                    return self.accept[s]
90*16467b97STreehugger Robot
91*16467b97STreehugger Robot                # look for a normal char transition
92*16467b97STreehugger Robot                c = input.LA(1)
93*16467b97STreehugger Robot
94*16467b97STreehugger Robot                #print "LA = %d (%r)" % (c, unichr(c) if c >= 0 else 'EOF')
95*16467b97STreehugger Robot                #print "range = %d..%d" % (self.min[s], self.max[s])
96*16467b97STreehugger Robot
97*16467b97STreehugger Robot                if c >= self.min[s] and c <= self.max[s]:
98*16467b97STreehugger Robot                    # move to next state
99*16467b97STreehugger Robot                    snext = self.transition[s][c-self.min[s]]
100*16467b97STreehugger Robot                    #print "in range, next state = %d" % snext
101*16467b97STreehugger Robot
102*16467b97STreehugger Robot                    if snext < 0:
103*16467b97STreehugger Robot                        #print "not a normal transition"
104*16467b97STreehugger Robot                        # was in range but not a normal transition
105*16467b97STreehugger Robot                        # must check EOT, which is like the else clause.
106*16467b97STreehugger Robot                        # eot[s]>=0 indicates that an EOT edge goes to another
107*16467b97STreehugger Robot                        # state.
108*16467b97STreehugger Robot                        if self.eot[s] >= 0: # EOT Transition to accept state?
109*16467b97STreehugger Robot                            #print "EOT trans to accept state %d" % self.eot[s]
110*16467b97STreehugger Robot
111*16467b97STreehugger Robot                            s = self.eot[s]
112*16467b97STreehugger Robot                            input.consume()
113*16467b97STreehugger Robot                            # TODO: I had this as return accept[eot[s]]
114*16467b97STreehugger Robot                            # which assumed here that the EOT edge always
115*16467b97STreehugger Robot                            # went to an accept...faster to do this, but
116*16467b97STreehugger Robot                            # what about predicated edges coming from EOT
117*16467b97STreehugger Robot                            # target?
118*16467b97STreehugger Robot                            continue
119*16467b97STreehugger Robot
120*16467b97STreehugger Robot                        #print "no viable alt"
121*16467b97STreehugger Robot                        self.noViableAlt(s, input)
122*16467b97STreehugger Robot                        return 0
123*16467b97STreehugger Robot
124*16467b97STreehugger Robot                    s = snext
125*16467b97STreehugger Robot                    input.consume()
126*16467b97STreehugger Robot                    continue
127*16467b97STreehugger Robot
128*16467b97STreehugger Robot                if self.eot[s] >= 0:
129*16467b97STreehugger Robot                    #print "EOT to %d" % self.eot[s]
130*16467b97STreehugger Robot
131*16467b97STreehugger Robot                    s = self.eot[s]
132*16467b97STreehugger Robot                    input.consume()
133*16467b97STreehugger Robot                    continue
134*16467b97STreehugger Robot
135*16467b97STreehugger Robot                # EOF Transition to accept state?
136*16467b97STreehugger Robot                if c == EOF and self.eof[s] >= 0:
137*16467b97STreehugger Robot                    #print "EOF Transition to accept state %d" \
138*16467b97STreehugger Robot                    #  % self.accept[self.eof[s]]
139*16467b97STreehugger Robot                    return self.accept[self.eof[s]]
140*16467b97STreehugger Robot
141*16467b97STreehugger Robot                # not in range and not EOF/EOT, must be invalid symbol
142*16467b97STreehugger Robot                self.noViableAlt(s, input)
143*16467b97STreehugger Robot                return 0
144*16467b97STreehugger Robot
145*16467b97STreehugger Robot            else:
146*16467b97STreehugger Robot                raise RuntimeError("DFA bang!")
147*16467b97STreehugger Robot
148*16467b97STreehugger Robot        finally:
149*16467b97STreehugger Robot            input.rewind(mark)
150*16467b97STreehugger Robot
151*16467b97STreehugger Robot
152*16467b97STreehugger Robot    def noViableAlt(self, s, input):
153*16467b97STreehugger Robot        if self.recognizer._state.backtracking > 0:
154*16467b97STreehugger Robot            raise BacktrackingFailed
155*16467b97STreehugger Robot
156*16467b97STreehugger Robot        nvae = NoViableAltException(
157*16467b97STreehugger Robot            self.getDescription(),
158*16467b97STreehugger Robot            self.decisionNumber,
159*16467b97STreehugger Robot            s,
160*16467b97STreehugger Robot            input
161*16467b97STreehugger Robot            )
162*16467b97STreehugger Robot
163*16467b97STreehugger Robot        self.error(nvae)
164*16467b97STreehugger Robot        raise nvae
165*16467b97STreehugger Robot
166*16467b97STreehugger Robot
167*16467b97STreehugger Robot    def error(self, nvae):
168*16467b97STreehugger Robot        """A hook for debugging interface"""
169*16467b97STreehugger Robot        pass
170*16467b97STreehugger Robot
171*16467b97STreehugger Robot
172*16467b97STreehugger Robot    def specialStateTransition(self, s, input):
173*16467b97STreehugger Robot        return -1
174*16467b97STreehugger Robot
175*16467b97STreehugger Robot
176*16467b97STreehugger Robot    def getDescription(self):
177*16467b97STreehugger Robot        return "n/a"
178*16467b97STreehugger Robot
179*16467b97STreehugger Robot
180*16467b97STreehugger Robot##     def specialTransition(self, state, symbol):
181*16467b97STreehugger Robot##         return 0
182*16467b97STreehugger Robot
183*16467b97STreehugger Robot
184*16467b97STreehugger Robot    def unpack(cls, string):
185*16467b97STreehugger Robot        """@brief Unpack the runlength encoded table data.
186*16467b97STreehugger Robot
187*16467b97STreehugger Robot        Terence implemented packed table initializers, because Java has a
188*16467b97STreehugger Robot        size restriction on .class files and the lookup tables can grow
189*16467b97STreehugger Robot        pretty large. The generated JavaLexer.java of the Java.g example
190*16467b97STreehugger Robot        would be about 15MB with uncompressed array initializers.
191*16467b97STreehugger Robot
192*16467b97STreehugger Robot        Python does not have any size restrictions, but the compilation of
193*16467b97STreehugger Robot        such large source files seems to be pretty memory hungry. The memory
194*16467b97STreehugger Robot        consumption of the python process grew to >1.5GB when importing a
195*16467b97STreehugger Robot        15MB lexer, eating all my swap space and I was to impacient to see,
196*16467b97STreehugger Robot        if it could finish at all. With packed initializers that are unpacked
197*16467b97STreehugger Robot        at import time of the lexer module, everything works like a charm.
198*16467b97STreehugger Robot
199*16467b97STreehugger Robot        """
200*16467b97STreehugger Robot
201*16467b97STreehugger Robot        ret = []
202*16467b97STreehugger Robot        for i in range(len(string) / 2):
203*16467b97STreehugger Robot            (n, v) = ord(string[i*2]), ord(string[i*2+1])
204*16467b97STreehugger Robot
205*16467b97STreehugger Robot            # Is there a bitwise operation to do this?
206*16467b97STreehugger Robot            if v == 0xFFFF:
207*16467b97STreehugger Robot                v = -1
208*16467b97STreehugger Robot
209*16467b97STreehugger Robot            ret += [v] * n
210*16467b97STreehugger Robot
211*16467b97STreehugger Robot        return ret
212*16467b97STreehugger Robot
213*16467b97STreehugger Robot    unpack = classmethod(unpack)
214