xref: /aosp_15_r20/external/antlr/runtime/Python3/antlr3/dfa.py (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot"""ANTLR3 runtime package"""
2*16467b97STreehugger Robot
3*16467b97STreehugger Robot# begin[licence]
4*16467b97STreehugger Robot#
5*16467b97STreehugger Robot# [The "BSD licence"]
6*16467b97STreehugger Robot# Copyright (c) 2005-2012 Terence Parr
7*16467b97STreehugger Robot# All rights reserved.
8*16467b97STreehugger Robot#
9*16467b97STreehugger Robot# Redistribution and use in source and binary forms, with or without
10*16467b97STreehugger Robot# modification, are permitted provided that the following conditions
11*16467b97STreehugger Robot# are met:
12*16467b97STreehugger Robot# 1. Redistributions of source code must retain the above copyright
13*16467b97STreehugger Robot#    notice, this list of conditions and the following disclaimer.
14*16467b97STreehugger Robot# 2. Redistributions in binary form must reproduce the above copyright
15*16467b97STreehugger Robot#    notice, this list of conditions and the following disclaimer in the
16*16467b97STreehugger Robot#    documentation and/or other materials provided with the distribution.
17*16467b97STreehugger Robot# 3. The name of the author may not be used to endorse or promote products
18*16467b97STreehugger Robot#    derived from this software without specific prior written permission.
19*16467b97STreehugger Robot#
20*16467b97STreehugger Robot# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21*16467b97STreehugger Robot# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22*16467b97STreehugger Robot# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23*16467b97STreehugger Robot# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24*16467b97STreehugger Robot# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25*16467b97STreehugger Robot# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26*16467b97STreehugger Robot# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27*16467b97STreehugger Robot# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28*16467b97STreehugger Robot# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29*16467b97STreehugger Robot# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*16467b97STreehugger Robot#
31*16467b97STreehugger Robot# end[licence]
32*16467b97STreehugger Robot
33*16467b97STreehugger Robotfrom .constants import EOF
34*16467b97STreehugger Robotfrom .exceptions import NoViableAltException, BacktrackingFailed
35*16467b97STreehugger Robot
36*16467b97STreehugger Robot
37*16467b97STreehugger Robotclass DFA(object):
38*16467b97STreehugger Robot    """@brief A DFA implemented as a set of transition tables.
39*16467b97STreehugger Robot
40*16467b97STreehugger Robot    Any state that has a semantic predicate edge is special; those states
41*16467b97STreehugger Robot    are generated with if-then-else structures in a specialStateTransition()
42*16467b97STreehugger Robot    which is generated by cyclicDFA template.
43*16467b97STreehugger Robot
44*16467b97STreehugger Robot    """
45*16467b97STreehugger Robot
46*16467b97STreehugger Robot    def __init__(
47*16467b97STreehugger Robot        self,
48*16467b97STreehugger Robot        recognizer, decisionNumber,
49*16467b97STreehugger Robot        eot, eof, min, max, accept, special, transition
50*16467b97STreehugger Robot        ):
51*16467b97STreehugger Robot        ## Which recognizer encloses this DFA?  Needed to check backtracking
52*16467b97STreehugger Robot        self.recognizer = recognizer
53*16467b97STreehugger Robot
54*16467b97STreehugger Robot        self.decisionNumber = decisionNumber
55*16467b97STreehugger Robot        self.eot = eot
56*16467b97STreehugger Robot        self.eof = eof
57*16467b97STreehugger Robot        self.min = min
58*16467b97STreehugger Robot        self.max = max
59*16467b97STreehugger Robot        self.accept = accept
60*16467b97STreehugger Robot        self.special = special
61*16467b97STreehugger Robot        self.transition = transition
62*16467b97STreehugger Robot
63*16467b97STreehugger Robot
64*16467b97STreehugger Robot    def predict(self, input):
65*16467b97STreehugger Robot        """
66*16467b97STreehugger Robot        From the input stream, predict what alternative will succeed
67*16467b97STreehugger Robot        using this DFA (representing the covering regular approximation
68*16467b97STreehugger Robot        to the underlying CFL).  Return an alternative number 1..n.  Throw
69*16467b97STreehugger Robot        an exception upon error.
70*16467b97STreehugger Robot        """
71*16467b97STreehugger Robot        mark = input.mark()
72*16467b97STreehugger Robot        s = 0 # we always start at s0
73*16467b97STreehugger Robot        try:
74*16467b97STreehugger Robot            for _ in range(50000):
75*16467b97STreehugger Robot                specialState = self.special[s]
76*16467b97STreehugger Robot                if specialState >= 0:
77*16467b97STreehugger Robot                    s = self.specialStateTransition(specialState, input)
78*16467b97STreehugger Robot                    if s == -1:
79*16467b97STreehugger Robot                        self.noViableAlt(s, input)
80*16467b97STreehugger Robot                        return 0
81*16467b97STreehugger Robot                    input.consume()
82*16467b97STreehugger Robot                    continue
83*16467b97STreehugger Robot
84*16467b97STreehugger Robot                if self.accept[s] >= 1:
85*16467b97STreehugger Robot                    return self.accept[s]
86*16467b97STreehugger Robot
87*16467b97STreehugger Robot                # look for a normal char transition
88*16467b97STreehugger Robot                c = input.LA(1)
89*16467b97STreehugger Robot
90*16467b97STreehugger Robot                if c >= self.min[s] and c <= self.max[s]:
91*16467b97STreehugger Robot                    # move to next state
92*16467b97STreehugger Robot                    snext = self.transition[s][c-self.min[s]]
93*16467b97STreehugger Robot
94*16467b97STreehugger Robot                    if snext < 0:
95*16467b97STreehugger Robot                        # was in range but not a normal transition
96*16467b97STreehugger Robot                        # must check EOT, which is like the else clause.
97*16467b97STreehugger Robot                        # eot[s]>=0 indicates that an EOT edge goes to another
98*16467b97STreehugger Robot                        # state.
99*16467b97STreehugger Robot                        if self.eot[s] >= 0: # EOT Transition to accept state?
100*16467b97STreehugger Robot                            s = self.eot[s]
101*16467b97STreehugger Robot                            input.consume()
102*16467b97STreehugger Robot                            # TODO: I had this as return accept[eot[s]]
103*16467b97STreehugger Robot                            # which assumed here that the EOT edge always
104*16467b97STreehugger Robot                            # went to an accept...faster to do this, but
105*16467b97STreehugger Robot                            # what about predicated edges coming from EOT
106*16467b97STreehugger Robot                            # target?
107*16467b97STreehugger Robot                            continue
108*16467b97STreehugger Robot
109*16467b97STreehugger Robot                        self.noViableAlt(s, input)
110*16467b97STreehugger Robot                        return 0
111*16467b97STreehugger Robot
112*16467b97STreehugger Robot                    s = snext
113*16467b97STreehugger Robot                    input.consume()
114*16467b97STreehugger Robot                    continue
115*16467b97STreehugger Robot
116*16467b97STreehugger Robot                if self.eot[s] >= 0:
117*16467b97STreehugger Robot                    s = self.eot[s]
118*16467b97STreehugger Robot                    input.consume()
119*16467b97STreehugger Robot                    continue
120*16467b97STreehugger Robot
121*16467b97STreehugger Robot                # EOF Transition to accept state?
122*16467b97STreehugger Robot                if c == EOF and self.eof[s] >= 0:
123*16467b97STreehugger Robot                    return self.accept[self.eof[s]]
124*16467b97STreehugger Robot
125*16467b97STreehugger Robot                # not in range and not EOF/EOT, must be invalid symbol
126*16467b97STreehugger Robot                self.noViableAlt(s, input)
127*16467b97STreehugger Robot                return 0
128*16467b97STreehugger Robot
129*16467b97STreehugger Robot            else:
130*16467b97STreehugger Robot                raise RuntimeError("DFA bang!")
131*16467b97STreehugger Robot
132*16467b97STreehugger Robot        finally:
133*16467b97STreehugger Robot            input.rewind(mark)
134*16467b97STreehugger Robot
135*16467b97STreehugger Robot
136*16467b97STreehugger Robot    def noViableAlt(self, s, input):
137*16467b97STreehugger Robot        if self.recognizer._state.backtracking > 0:
138*16467b97STreehugger Robot            raise BacktrackingFailed
139*16467b97STreehugger Robot
140*16467b97STreehugger Robot        nvae = NoViableAltException(
141*16467b97STreehugger Robot            self.getDescription(),
142*16467b97STreehugger Robot            self.decisionNumber,
143*16467b97STreehugger Robot            s,
144*16467b97STreehugger Robot            input
145*16467b97STreehugger Robot            )
146*16467b97STreehugger Robot
147*16467b97STreehugger Robot        self.error(nvae)
148*16467b97STreehugger Robot        raise nvae
149*16467b97STreehugger Robot
150*16467b97STreehugger Robot
151*16467b97STreehugger Robot    def error(self, nvae):
152*16467b97STreehugger Robot        """A hook for debugging interface"""
153*16467b97STreehugger Robot        pass
154*16467b97STreehugger Robot
155*16467b97STreehugger Robot
156*16467b97STreehugger Robot    def specialStateTransition(self, s, input):
157*16467b97STreehugger Robot        return -1
158*16467b97STreehugger Robot
159*16467b97STreehugger Robot
160*16467b97STreehugger Robot    def getDescription(self):
161*16467b97STreehugger Robot        return "n/a"
162*16467b97STreehugger Robot
163*16467b97STreehugger Robot
164*16467b97STreehugger Robot##     def specialTransition(self, state, symbol):
165*16467b97STreehugger Robot##         return 0
166*16467b97STreehugger Robot
167*16467b97STreehugger Robot
168*16467b97STreehugger Robot    @classmethod
169*16467b97STreehugger Robot    def unpack(cls, string):
170*16467b97STreehugger Robot        """@brief Unpack the runlength encoded table data.
171*16467b97STreehugger Robot
172*16467b97STreehugger Robot        Terence implemented packed table initializers, because Java has a
173*16467b97STreehugger Robot        size restriction on .class files and the lookup tables can grow
174*16467b97STreehugger Robot        pretty large. The generated JavaLexer.java of the Java.g example
175*16467b97STreehugger Robot        would be about 15MB with uncompressed array initializers.
176*16467b97STreehugger Robot
177*16467b97STreehugger Robot        Python does not have any size restrictions, but the compilation of
178*16467b97STreehugger Robot        such large source files seems to be pretty memory hungry. The memory
179*16467b97STreehugger Robot        consumption of the python process grew to >1.5GB when importing a
180*16467b97STreehugger Robot        15MB lexer, eating all my swap space and I was to impacient to see,
181*16467b97STreehugger Robot        if it could finish at all. With packed initializers that are unpacked
182*16467b97STreehugger Robot        at import time of the lexer module, everything works like a charm.
183*16467b97STreehugger Robot
184*16467b97STreehugger Robot        """
185*16467b97STreehugger Robot
186*16467b97STreehugger Robot        ret = []
187*16467b97STreehugger Robot        for i in range(0, len(string) - 1, 2):
188*16467b97STreehugger Robot            (n, v) = ord(string[i]), ord(string[i + 1])
189*16467b97STreehugger Robot
190*16467b97STreehugger Robot            if v == 0xFFFF:
191*16467b97STreehugger Robot                v = -1
192*16467b97STreehugger Robot
193*16467b97STreehugger Robot            ret += [v] * n
194*16467b97STreehugger Robot
195*16467b97STreehugger Robot        return ret
196