1*16467b97STreehugger Robot"""ANTLR3 runtime package""" 2*16467b97STreehugger Robot 3*16467b97STreehugger Robot# begin[licence] 4*16467b97STreehugger Robot# 5*16467b97STreehugger Robot# [The "BSD licence"] 6*16467b97STreehugger Robot# Copyright (c) 2005-2008 Terence Parr 7*16467b97STreehugger Robot# All rights reserved. 8*16467b97STreehugger Robot# 9*16467b97STreehugger Robot# Redistribution and use in source and binary forms, with or without 10*16467b97STreehugger Robot# modification, are permitted provided that the following conditions 11*16467b97STreehugger Robot# are met: 12*16467b97STreehugger Robot# 1. Redistributions of source code must retain the above copyright 13*16467b97STreehugger Robot# notice, this list of conditions and the following disclaimer. 14*16467b97STreehugger Robot# 2. Redistributions in binary form must reproduce the above copyright 15*16467b97STreehugger Robot# notice, this list of conditions and the following disclaimer in the 16*16467b97STreehugger Robot# documentation and/or other materials provided with the distribution. 17*16467b97STreehugger Robot# 3. The name of the author may not be used to endorse or promote products 18*16467b97STreehugger Robot# derived from this software without specific prior written permission. 19*16467b97STreehugger Robot# 20*16467b97STreehugger Robot# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21*16467b97STreehugger Robot# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22*16467b97STreehugger Robot# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23*16467b97STreehugger Robot# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24*16467b97STreehugger Robot# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25*16467b97STreehugger Robot# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26*16467b97STreehugger Robot# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27*16467b97STreehugger Robot# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28*16467b97STreehugger Robot# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29*16467b97STreehugger Robot# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30*16467b97STreehugger Robot# 31*16467b97STreehugger Robot# end[licensc] 32*16467b97STreehugger Robot 33*16467b97STreehugger Robotfrom antlr3.constants import EOF 34*16467b97STreehugger Robotfrom antlr3.exceptions import NoViableAltException, BacktrackingFailed 35*16467b97STreehugger Robot 36*16467b97STreehugger Robot 37*16467b97STreehugger Robotclass DFA(object): 38*16467b97STreehugger Robot """@brief A DFA implemented as a set of transition tables. 39*16467b97STreehugger Robot 40*16467b97STreehugger Robot Any state that has a semantic predicate edge is special; those states 41*16467b97STreehugger Robot are generated with if-then-else structures in a specialStateTransition() 42*16467b97STreehugger Robot which is generated by cyclicDFA template. 43*16467b97STreehugger Robot 44*16467b97STreehugger Robot """ 45*16467b97STreehugger Robot 46*16467b97STreehugger Robot def __init__( 47*16467b97STreehugger Robot self, 48*16467b97STreehugger Robot recognizer, decisionNumber, 49*16467b97STreehugger Robot eot, eof, min, max, accept, special, transition 50*16467b97STreehugger Robot ): 51*16467b97STreehugger Robot ## Which recognizer encloses this DFA? Needed to check backtracking 52*16467b97STreehugger Robot self.recognizer = recognizer 53*16467b97STreehugger Robot 54*16467b97STreehugger Robot self.decisionNumber = decisionNumber 55*16467b97STreehugger Robot self.eot = eot 56*16467b97STreehugger Robot self.eof = eof 57*16467b97STreehugger Robot self.min = min 58*16467b97STreehugger Robot self.max = max 59*16467b97STreehugger Robot self.accept = accept 60*16467b97STreehugger Robot self.special = special 61*16467b97STreehugger Robot self.transition = transition 62*16467b97STreehugger Robot 63*16467b97STreehugger Robot 64*16467b97STreehugger Robot def predict(self, input): 65*16467b97STreehugger Robot """ 66*16467b97STreehugger Robot From the input stream, predict what alternative will succeed 67*16467b97STreehugger Robot using this DFA (representing the covering regular approximation 68*16467b97STreehugger Robot to the underlying CFL). Return an alternative number 1..n. Throw 69*16467b97STreehugger Robot an exception upon error. 70*16467b97STreehugger Robot """ 71*16467b97STreehugger Robot mark = input.mark() 72*16467b97STreehugger Robot s = 0 # we always start at s0 73*16467b97STreehugger Robot try: 74*16467b97STreehugger Robot for _ in xrange(50000): 75*16467b97STreehugger Robot #print "***Current state = %d" % s 76*16467b97STreehugger Robot 77*16467b97STreehugger Robot specialState = self.special[s] 78*16467b97STreehugger Robot if specialState >= 0: 79*16467b97STreehugger Robot #print "is special" 80*16467b97STreehugger Robot s = self.specialStateTransition(specialState, input) 81*16467b97STreehugger Robot if s == -1: 82*16467b97STreehugger Robot self.noViableAlt(s, input) 83*16467b97STreehugger Robot return 0 84*16467b97STreehugger Robot input.consume() 85*16467b97STreehugger Robot continue 86*16467b97STreehugger Robot 87*16467b97STreehugger Robot if self.accept[s] >= 1: 88*16467b97STreehugger Robot #print "accept state for alt %d" % self.accept[s] 89*16467b97STreehugger Robot return self.accept[s] 90*16467b97STreehugger Robot 91*16467b97STreehugger Robot # look for a normal char transition 92*16467b97STreehugger Robot c = input.LA(1) 93*16467b97STreehugger Robot 94*16467b97STreehugger Robot #print "LA = %d (%r)" % (c, unichr(c) if c >= 0 else 'EOF') 95*16467b97STreehugger Robot #print "range = %d..%d" % (self.min[s], self.max[s]) 96*16467b97STreehugger Robot 97*16467b97STreehugger Robot if c >= self.min[s] and c <= self.max[s]: 98*16467b97STreehugger Robot # move to next state 99*16467b97STreehugger Robot snext = self.transition[s][c-self.min[s]] 100*16467b97STreehugger Robot #print "in range, next state = %d" % snext 101*16467b97STreehugger Robot 102*16467b97STreehugger Robot if snext < 0: 103*16467b97STreehugger Robot #print "not a normal transition" 104*16467b97STreehugger Robot # was in range but not a normal transition 105*16467b97STreehugger Robot # must check EOT, which is like the else clause. 106*16467b97STreehugger Robot # eot[s]>=0 indicates that an EOT edge goes to another 107*16467b97STreehugger Robot # state. 108*16467b97STreehugger Robot if self.eot[s] >= 0: # EOT Transition to accept state? 109*16467b97STreehugger Robot #print "EOT trans to accept state %d" % self.eot[s] 110*16467b97STreehugger Robot 111*16467b97STreehugger Robot s = self.eot[s] 112*16467b97STreehugger Robot input.consume() 113*16467b97STreehugger Robot # TODO: I had this as return accept[eot[s]] 114*16467b97STreehugger Robot # which assumed here that the EOT edge always 115*16467b97STreehugger Robot # went to an accept...faster to do this, but 116*16467b97STreehugger Robot # what about predicated edges coming from EOT 117*16467b97STreehugger Robot # target? 118*16467b97STreehugger Robot continue 119*16467b97STreehugger Robot 120*16467b97STreehugger Robot #print "no viable alt" 121*16467b97STreehugger Robot self.noViableAlt(s, input) 122*16467b97STreehugger Robot return 0 123*16467b97STreehugger Robot 124*16467b97STreehugger Robot s = snext 125*16467b97STreehugger Robot input.consume() 126*16467b97STreehugger Robot continue 127*16467b97STreehugger Robot 128*16467b97STreehugger Robot if self.eot[s] >= 0: 129*16467b97STreehugger Robot #print "EOT to %d" % self.eot[s] 130*16467b97STreehugger Robot 131*16467b97STreehugger Robot s = self.eot[s] 132*16467b97STreehugger Robot input.consume() 133*16467b97STreehugger Robot continue 134*16467b97STreehugger Robot 135*16467b97STreehugger Robot # EOF Transition to accept state? 136*16467b97STreehugger Robot if c == EOF and self.eof[s] >= 0: 137*16467b97STreehugger Robot #print "EOF Transition to accept state %d" \ 138*16467b97STreehugger Robot # % self.accept[self.eof[s]] 139*16467b97STreehugger Robot return self.accept[self.eof[s]] 140*16467b97STreehugger Robot 141*16467b97STreehugger Robot # not in range and not EOF/EOT, must be invalid symbol 142*16467b97STreehugger Robot self.noViableAlt(s, input) 143*16467b97STreehugger Robot return 0 144*16467b97STreehugger Robot 145*16467b97STreehugger Robot else: 146*16467b97STreehugger Robot raise RuntimeError("DFA bang!") 147*16467b97STreehugger Robot 148*16467b97STreehugger Robot finally: 149*16467b97STreehugger Robot input.rewind(mark) 150*16467b97STreehugger Robot 151*16467b97STreehugger Robot 152*16467b97STreehugger Robot def noViableAlt(self, s, input): 153*16467b97STreehugger Robot if self.recognizer._state.backtracking > 0: 154*16467b97STreehugger Robot raise BacktrackingFailed 155*16467b97STreehugger Robot 156*16467b97STreehugger Robot nvae = NoViableAltException( 157*16467b97STreehugger Robot self.getDescription(), 158*16467b97STreehugger Robot self.decisionNumber, 159*16467b97STreehugger Robot s, 160*16467b97STreehugger Robot input 161*16467b97STreehugger Robot ) 162*16467b97STreehugger Robot 163*16467b97STreehugger Robot self.error(nvae) 164*16467b97STreehugger Robot raise nvae 165*16467b97STreehugger Robot 166*16467b97STreehugger Robot 167*16467b97STreehugger Robot def error(self, nvae): 168*16467b97STreehugger Robot """A hook for debugging interface""" 169*16467b97STreehugger Robot pass 170*16467b97STreehugger Robot 171*16467b97STreehugger Robot 172*16467b97STreehugger Robot def specialStateTransition(self, s, input): 173*16467b97STreehugger Robot return -1 174*16467b97STreehugger Robot 175*16467b97STreehugger Robot 176*16467b97STreehugger Robot def getDescription(self): 177*16467b97STreehugger Robot return "n/a" 178*16467b97STreehugger Robot 179*16467b97STreehugger Robot 180*16467b97STreehugger Robot## def specialTransition(self, state, symbol): 181*16467b97STreehugger Robot## return 0 182*16467b97STreehugger Robot 183*16467b97STreehugger Robot 184*16467b97STreehugger Robot def unpack(cls, string): 185*16467b97STreehugger Robot """@brief Unpack the runlength encoded table data. 186*16467b97STreehugger Robot 187*16467b97STreehugger Robot Terence implemented packed table initializers, because Java has a 188*16467b97STreehugger Robot size restriction on .class files and the lookup tables can grow 189*16467b97STreehugger Robot pretty large. The generated JavaLexer.java of the Java.g example 190*16467b97STreehugger Robot would be about 15MB with uncompressed array initializers. 191*16467b97STreehugger Robot 192*16467b97STreehugger Robot Python does not have any size restrictions, but the compilation of 193*16467b97STreehugger Robot such large source files seems to be pretty memory hungry. The memory 194*16467b97STreehugger Robot consumption of the python process grew to >1.5GB when importing a 195*16467b97STreehugger Robot 15MB lexer, eating all my swap space and I was to impacient to see, 196*16467b97STreehugger Robot if it could finish at all. With packed initializers that are unpacked 197*16467b97STreehugger Robot at import time of the lexer module, everything works like a charm. 198*16467b97STreehugger Robot 199*16467b97STreehugger Robot """ 200*16467b97STreehugger Robot 201*16467b97STreehugger Robot ret = [] 202*16467b97STreehugger Robot for i in range(len(string) / 2): 203*16467b97STreehugger Robot (n, v) = ord(string[i*2]), ord(string[i*2+1]) 204*16467b97STreehugger Robot 205*16467b97STreehugger Robot # Is there a bitwise operation to do this? 206*16467b97STreehugger Robot if v == 0xFFFF: 207*16467b97STreehugger Robot v = -1 208*16467b97STreehugger Robot 209*16467b97STreehugger Robot ret += [v] * n 210*16467b97STreehugger Robot 211*16467b97STreehugger Robot return ret 212*16467b97STreehugger Robot 213*16467b97STreehugger Robot unpack = classmethod(unpack) 214