1*16467b97STreehugger Robot"""ANTLR3 runtime package""" 2*16467b97STreehugger Robot 3*16467b97STreehugger Robot# begin[licence] 4*16467b97STreehugger Robot# 5*16467b97STreehugger Robot# [The "BSD licence"] 6*16467b97STreehugger Robot# Copyright (c) 2005-2012 Terence Parr 7*16467b97STreehugger Robot# All rights reserved. 8*16467b97STreehugger Robot# 9*16467b97STreehugger Robot# Redistribution and use in source and binary forms, with or without 10*16467b97STreehugger Robot# modification, are permitted provided that the following conditions 11*16467b97STreehugger Robot# are met: 12*16467b97STreehugger Robot# 1. Redistributions of source code must retain the above copyright 13*16467b97STreehugger Robot# notice, this list of conditions and the following disclaimer. 14*16467b97STreehugger Robot# 2. Redistributions in binary form must reproduce the above copyright 15*16467b97STreehugger Robot# notice, this list of conditions and the following disclaimer in the 16*16467b97STreehugger Robot# documentation and/or other materials provided with the distribution. 17*16467b97STreehugger Robot# 3. The name of the author may not be used to endorse or promote products 18*16467b97STreehugger Robot# derived from this software without specific prior written permission. 19*16467b97STreehugger Robot# 20*16467b97STreehugger Robot# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21*16467b97STreehugger Robot# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22*16467b97STreehugger Robot# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23*16467b97STreehugger Robot# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24*16467b97STreehugger Robot# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25*16467b97STreehugger Robot# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26*16467b97STreehugger Robot# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27*16467b97STreehugger Robot# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28*16467b97STreehugger Robot# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29*16467b97STreehugger Robot# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30*16467b97STreehugger Robot# 31*16467b97STreehugger Robot# end[licence] 32*16467b97STreehugger Robot 33*16467b97STreehugger Robotfrom .constants import DEFAULT_CHANNEL, EOF, INVALID_TOKEN_TYPE 34*16467b97STreehugger Robot 35*16467b97STreehugger Robot############################################################################ 36*16467b97STreehugger Robot# 37*16467b97STreehugger Robot# basic token interface 38*16467b97STreehugger Robot# 39*16467b97STreehugger Robot############################################################################ 40*16467b97STreehugger Robot 41*16467b97STreehugger Robotclass Token(object): 42*16467b97STreehugger Robot """@brief Abstract token baseclass.""" 43*16467b97STreehugger Robot 44*16467b97STreehugger Robot TOKEN_NAMES_MAP = None 45*16467b97STreehugger Robot 46*16467b97STreehugger Robot @classmethod 47*16467b97STreehugger Robot def registerTokenNamesMap(cls, tokenNamesMap): 48*16467b97STreehugger Robot """@brief Store a mapping from token type to token name. 49*16467b97STreehugger Robot 50*16467b97STreehugger Robot This enables token.typeName to give something more meaningful 51*16467b97STreehugger Robot than, e.g., '6'. 52*16467b97STreehugger Robot """ 53*16467b97STreehugger Robot cls.TOKEN_NAMES_MAP = tokenNamesMap 54*16467b97STreehugger Robot cls.TOKEN_NAMES_MAP[EOF] = "EOF" 55*16467b97STreehugger Robot 56*16467b97STreehugger Robot def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None, 57*16467b97STreehugger Robot index=-1, line=0, charPositionInLine=-1, input=None): 58*16467b97STreehugger Robot # We use -1 for index and charPositionInLine as an invalid index 59*16467b97STreehugger Robot self._type = type 60*16467b97STreehugger Robot self._channel = channel 61*16467b97STreehugger Robot self._text = text 62*16467b97STreehugger Robot self._index = index 63*16467b97STreehugger Robot self._line = 0 64*16467b97STreehugger Robot self._charPositionInLine = charPositionInLine 65*16467b97STreehugger Robot self.input = input 66*16467b97STreehugger Robot 67*16467b97STreehugger Robot # To override a property, you'll need to override both the getter and setter. 68*16467b97STreehugger Robot @property 69*16467b97STreehugger Robot def text(self): 70*16467b97STreehugger Robot return self._text 71*16467b97STreehugger Robot 72*16467b97STreehugger Robot @text.setter 73*16467b97STreehugger Robot def text(self, value): 74*16467b97STreehugger Robot self._text = value 75*16467b97STreehugger Robot 76*16467b97STreehugger Robot 77*16467b97STreehugger Robot @property 78*16467b97STreehugger Robot def type(self): 79*16467b97STreehugger Robot return self._type 80*16467b97STreehugger Robot 81*16467b97STreehugger Robot @type.setter 82*16467b97STreehugger Robot def type(self, value): 83*16467b97STreehugger Robot self._type = value 84*16467b97STreehugger Robot 85*16467b97STreehugger Robot # For compatibility 86*16467b97STreehugger Robot def getType(self): 87*16467b97STreehugger Robot return self._type 88*16467b97STreehugger Robot 89*16467b97STreehugger Robot @property 90*16467b97STreehugger Robot def typeName(self): 91*16467b97STreehugger Robot if self.TOKEN_NAMES_MAP: 92*16467b97STreehugger Robot return self.TOKEN_NAMES_MAP.get(self._type, "INVALID_TOKEN_TYPE") 93*16467b97STreehugger Robot else: 94*16467b97STreehugger Robot return str(self._type) 95*16467b97STreehugger Robot 96*16467b97STreehugger Robot @property 97*16467b97STreehugger Robot def line(self): 98*16467b97STreehugger Robot """Lines are numbered 1..n.""" 99*16467b97STreehugger Robot return self._line 100*16467b97STreehugger Robot 101*16467b97STreehugger Robot @line.setter 102*16467b97STreehugger Robot def line(self, value): 103*16467b97STreehugger Robot self._line = value 104*16467b97STreehugger Robot 105*16467b97STreehugger Robot 106*16467b97STreehugger Robot @property 107*16467b97STreehugger Robot def charPositionInLine(self): 108*16467b97STreehugger Robot """Columns are numbered 0..n-1.""" 109*16467b97STreehugger Robot return self._charPositionInLine 110*16467b97STreehugger Robot 111*16467b97STreehugger Robot @charPositionInLine.setter 112*16467b97STreehugger Robot def charPositionInLine(self, pos): 113*16467b97STreehugger Robot self._charPositionInLine = pos 114*16467b97STreehugger Robot 115*16467b97STreehugger Robot 116*16467b97STreehugger Robot @property 117*16467b97STreehugger Robot def channel(self): 118*16467b97STreehugger Robot return self._channel 119*16467b97STreehugger Robot 120*16467b97STreehugger Robot @channel.setter 121*16467b97STreehugger Robot def channel(self, value): 122*16467b97STreehugger Robot self._channel = value 123*16467b97STreehugger Robot 124*16467b97STreehugger Robot 125*16467b97STreehugger Robot @property 126*16467b97STreehugger Robot def index(self): 127*16467b97STreehugger Robot """ 128*16467b97STreehugger Robot An index from 0..n-1 of the token object in the input stream. 129*16467b97STreehugger Robot This must be valid in order to use the ANTLRWorks debugger. 130*16467b97STreehugger Robot """ 131*16467b97STreehugger Robot return self._index 132*16467b97STreehugger Robot 133*16467b97STreehugger Robot @index.setter 134*16467b97STreehugger Robot def index(self, value): 135*16467b97STreehugger Robot self._index = value 136*16467b97STreehugger Robot 137*16467b97STreehugger Robot 138*16467b97STreehugger Robot def getInputStream(self): 139*16467b97STreehugger Robot """@brief From what character stream was this token created. 140*16467b97STreehugger Robot 141*16467b97STreehugger Robot You don't have to implement but it's nice to know where a Token 142*16467b97STreehugger Robot comes from if you have include files etc... on the input.""" 143*16467b97STreehugger Robot 144*16467b97STreehugger Robot raise NotImplementedError 145*16467b97STreehugger Robot 146*16467b97STreehugger Robot def setInputStream(self, input): 147*16467b97STreehugger Robot """@brief From what character stream was this token created. 148*16467b97STreehugger Robot 149*16467b97STreehugger Robot You don't have to implement but it's nice to know where a Token 150*16467b97STreehugger Robot comes from if you have include files etc... on the input.""" 151*16467b97STreehugger Robot 152*16467b97STreehugger Robot raise NotImplementedError 153*16467b97STreehugger Robot 154*16467b97STreehugger Robot 155*16467b97STreehugger Robot############################################################################ 156*16467b97STreehugger Robot# 157*16467b97STreehugger Robot# token implementations 158*16467b97STreehugger Robot# 159*16467b97STreehugger Robot# Token 160*16467b97STreehugger Robot# +- CommonToken 161*16467b97STreehugger Robot# \- ClassicToken 162*16467b97STreehugger Robot# 163*16467b97STreehugger Robot############################################################################ 164*16467b97STreehugger Robot 165*16467b97STreehugger Robotclass CommonToken(Token): 166*16467b97STreehugger Robot """@brief Basic token implementation. 167*16467b97STreehugger Robot 168*16467b97STreehugger Robot This implementation does not copy the text from the input stream upon 169*16467b97STreehugger Robot creation, but keeps start/stop pointers into the stream to avoid 170*16467b97STreehugger Robot unnecessary copy operations. 171*16467b97STreehugger Robot 172*16467b97STreehugger Robot """ 173*16467b97STreehugger Robot 174*16467b97STreehugger Robot def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None, 175*16467b97STreehugger Robot input=None, start=None, stop=None, oldToken=None): 176*16467b97STreehugger Robot 177*16467b97STreehugger Robot if oldToken: 178*16467b97STreehugger Robot super().__init__(oldToken.type, oldToken.channel, oldToken.text, 179*16467b97STreehugger Robot oldToken.index, oldToken.line, 180*16467b97STreehugger Robot oldToken.charPositionInLine, oldToken.input) 181*16467b97STreehugger Robot if isinstance(oldToken, CommonToken): 182*16467b97STreehugger Robot self.start = oldToken.start 183*16467b97STreehugger Robot self.stop = oldToken.stop 184*16467b97STreehugger Robot else: 185*16467b97STreehugger Robot self.start = start 186*16467b97STreehugger Robot self.stop = stop 187*16467b97STreehugger Robot 188*16467b97STreehugger Robot else: 189*16467b97STreehugger Robot super().__init__(type=type, channel=channel, input=input) 190*16467b97STreehugger Robot 191*16467b97STreehugger Robot # We need to be able to change the text once in a while. If 192*16467b97STreehugger Robot # this is non-null, then getText should return this. Note that 193*16467b97STreehugger Robot # start/stop are not affected by changing this. 194*16467b97STreehugger Robot self._text = text 195*16467b97STreehugger Robot 196*16467b97STreehugger Robot # The char position into the input buffer where this token starts 197*16467b97STreehugger Robot self.start = start 198*16467b97STreehugger Robot 199*16467b97STreehugger Robot # The char position into the input buffer where this token stops 200*16467b97STreehugger Robot # This is the index of the last char, *not* the index after it! 201*16467b97STreehugger Robot self.stop = stop 202*16467b97STreehugger Robot 203*16467b97STreehugger Robot 204*16467b97STreehugger Robot @property 205*16467b97STreehugger Robot def text(self): 206*16467b97STreehugger Robot # Could be the empty string, and we want to return that. 207*16467b97STreehugger Robot if self._text is not None: 208*16467b97STreehugger Robot return self._text 209*16467b97STreehugger Robot 210*16467b97STreehugger Robot if not self.input: 211*16467b97STreehugger Robot return None 212*16467b97STreehugger Robot 213*16467b97STreehugger Robot if self.start < self.input.size() and self.stop < self.input.size(): 214*16467b97STreehugger Robot return self.input.substring(self.start, self.stop) 215*16467b97STreehugger Robot 216*16467b97STreehugger Robot return '<EOF>' 217*16467b97STreehugger Robot 218*16467b97STreehugger Robot @text.setter 219*16467b97STreehugger Robot def text(self, value): 220*16467b97STreehugger Robot """ 221*16467b97STreehugger Robot Override the text for this token. getText() will return this text 222*16467b97STreehugger Robot rather than pulling from the buffer. Note that this does not mean 223*16467b97STreehugger Robot that start/stop indexes are not valid. It means that that input 224*16467b97STreehugger Robot was converted to a new string in the token object. 225*16467b97STreehugger Robot """ 226*16467b97STreehugger Robot self._text = value 227*16467b97STreehugger Robot 228*16467b97STreehugger Robot 229*16467b97STreehugger Robot def getInputStream(self): 230*16467b97STreehugger Robot return self.input 231*16467b97STreehugger Robot 232*16467b97STreehugger Robot def setInputStream(self, input): 233*16467b97STreehugger Robot self.input = input 234*16467b97STreehugger Robot 235*16467b97STreehugger Robot 236*16467b97STreehugger Robot def __str__(self): 237*16467b97STreehugger Robot if self.type == EOF: 238*16467b97STreehugger Robot return "<EOF>" 239*16467b97STreehugger Robot 240*16467b97STreehugger Robot channelStr = "" 241*16467b97STreehugger Robot if self.channel > 0: 242*16467b97STreehugger Robot channelStr = ",channel=" + str(self.channel) 243*16467b97STreehugger Robot 244*16467b97STreehugger Robot txt = self.text 245*16467b97STreehugger Robot if txt: 246*16467b97STreehugger Robot # Put 2 backslashes in front of each character 247*16467b97STreehugger Robot txt = txt.replace("\n", r"\\n") 248*16467b97STreehugger Robot txt = txt.replace("\r", r"\\r") 249*16467b97STreehugger Robot txt = txt.replace("\t", r"\\t") 250*16467b97STreehugger Robot else: 251*16467b97STreehugger Robot txt = "<no text>" 252*16467b97STreehugger Robot 253*16467b97STreehugger Robot return ("[@{0.index},{0.start}:{0.stop}={txt!r}," 254*16467b97STreehugger Robot "<{0.typeName}>{channelStr}," 255*16467b97STreehugger Robot "{0.line}:{0.charPositionInLine}]" 256*16467b97STreehugger Robot .format(self, txt=txt, channelStr=channelStr)) 257*16467b97STreehugger Robot 258*16467b97STreehugger Robot 259*16467b97STreehugger Robotclass ClassicToken(Token): 260*16467b97STreehugger Robot """@brief Alternative token implementation. 261*16467b97STreehugger Robot 262*16467b97STreehugger Robot A Token object like we'd use in ANTLR 2.x; has an actual string created 263*16467b97STreehugger Robot and associated with this object. These objects are needed for imaginary 264*16467b97STreehugger Robot tree nodes that have payload objects. We need to create a Token object 265*16467b97STreehugger Robot that has a string; the tree node will point at this token. CommonToken 266*16467b97STreehugger Robot has indexes into a char stream and hence cannot be used to introduce 267*16467b97STreehugger Robot new strings. 268*16467b97STreehugger Robot """ 269*16467b97STreehugger Robot 270*16467b97STreehugger Robot def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL, 271*16467b97STreehugger Robot oldToken=None): 272*16467b97STreehugger Robot if oldToken: 273*16467b97STreehugger Robot super().__init__(type=oldToken.type, channel=oldToken.channel, 274*16467b97STreehugger Robot text=oldToken.text, line=oldToken.line, 275*16467b97STreehugger Robot charPositionInLine=oldToken.charPositionInLine) 276*16467b97STreehugger Robot 277*16467b97STreehugger Robot else: 278*16467b97STreehugger Robot super().__init__(type=type, channel=channel, text=text, 279*16467b97STreehugger Robot index=None, line=None, charPositionInLine=None) 280*16467b97STreehugger Robot 281*16467b97STreehugger Robot 282*16467b97STreehugger Robot def getInputStream(self): 283*16467b97STreehugger Robot return None 284*16467b97STreehugger Robot 285*16467b97STreehugger Robot def setInputStream(self, input): 286*16467b97STreehugger Robot pass 287*16467b97STreehugger Robot 288*16467b97STreehugger Robot 289*16467b97STreehugger Robot def toString(self): 290*16467b97STreehugger Robot channelStr = "" 291*16467b97STreehugger Robot if self.channel > 0: 292*16467b97STreehugger Robot channelStr = ",channel=" + str(self.channel) 293*16467b97STreehugger Robot 294*16467b97STreehugger Robot txt = self.text 295*16467b97STreehugger Robot if not txt: 296*16467b97STreehugger Robot txt = "<no text>" 297*16467b97STreehugger Robot 298*16467b97STreehugger Robot return ("[@{0.index!r},{txt!r},<{0.type!r}>{channelStr}," 299*16467b97STreehugger Robot "{0.line!r}:{0.charPositionInLine!r}]" 300*16467b97STreehugger Robot .format(self, txt=txt, channelStr=channelStr)) 301*16467b97STreehugger Robot 302*16467b97STreehugger Robot __str__ = toString 303*16467b97STreehugger Robot __repr__ = toString 304*16467b97STreehugger Robot 305*16467b97STreehugger Robot 306*16467b97STreehugger RobotINVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) 307*16467b97STreehugger Robot 308*16467b97STreehugger Robot# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR 309*16467b97STreehugger Robot# will avoid creating a token for this symbol and try to fetch another. 310*16467b97STreehugger RobotSKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) 311