xref: /aosp_15_r20/external/antlr/runtime/Python3/antlr3/tokens.py (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot"""ANTLR3 runtime package"""
2*16467b97STreehugger Robot
3*16467b97STreehugger Robot# begin[licence]
4*16467b97STreehugger Robot#
5*16467b97STreehugger Robot# [The "BSD licence"]
6*16467b97STreehugger Robot# Copyright (c) 2005-2012 Terence Parr
7*16467b97STreehugger Robot# All rights reserved.
8*16467b97STreehugger Robot#
9*16467b97STreehugger Robot# Redistribution and use in source and binary forms, with or without
10*16467b97STreehugger Robot# modification, are permitted provided that the following conditions
11*16467b97STreehugger Robot# are met:
12*16467b97STreehugger Robot# 1. Redistributions of source code must retain the above copyright
13*16467b97STreehugger Robot#    notice, this list of conditions and the following disclaimer.
14*16467b97STreehugger Robot# 2. Redistributions in binary form must reproduce the above copyright
15*16467b97STreehugger Robot#    notice, this list of conditions and the following disclaimer in the
16*16467b97STreehugger Robot#    documentation and/or other materials provided with the distribution.
17*16467b97STreehugger Robot# 3. The name of the author may not be used to endorse or promote products
18*16467b97STreehugger Robot#    derived from this software without specific prior written permission.
19*16467b97STreehugger Robot#
20*16467b97STreehugger Robot# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21*16467b97STreehugger Robot# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22*16467b97STreehugger Robot# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23*16467b97STreehugger Robot# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24*16467b97STreehugger Robot# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25*16467b97STreehugger Robot# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26*16467b97STreehugger Robot# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27*16467b97STreehugger Robot# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28*16467b97STreehugger Robot# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29*16467b97STreehugger Robot# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*16467b97STreehugger Robot#
31*16467b97STreehugger Robot# end[licence]
32*16467b97STreehugger Robot
33*16467b97STreehugger Robotfrom .constants import DEFAULT_CHANNEL, EOF, INVALID_TOKEN_TYPE
34*16467b97STreehugger Robot
35*16467b97STreehugger Robot############################################################################
36*16467b97STreehugger Robot#
37*16467b97STreehugger Robot# basic token interface
38*16467b97STreehugger Robot#
39*16467b97STreehugger Robot############################################################################
40*16467b97STreehugger Robot
41*16467b97STreehugger Robotclass Token(object):
42*16467b97STreehugger Robot    """@brief Abstract token baseclass."""
43*16467b97STreehugger Robot
44*16467b97STreehugger Robot    TOKEN_NAMES_MAP = None
45*16467b97STreehugger Robot
46*16467b97STreehugger Robot    @classmethod
47*16467b97STreehugger Robot    def registerTokenNamesMap(cls, tokenNamesMap):
48*16467b97STreehugger Robot        """@brief Store a mapping from token type to token name.
49*16467b97STreehugger Robot
50*16467b97STreehugger Robot        This enables token.typeName to give something more meaningful
51*16467b97STreehugger Robot        than, e.g., '6'.
52*16467b97STreehugger Robot        """
53*16467b97STreehugger Robot        cls.TOKEN_NAMES_MAP = tokenNamesMap
54*16467b97STreehugger Robot        cls.TOKEN_NAMES_MAP[EOF] = "EOF"
55*16467b97STreehugger Robot
56*16467b97STreehugger Robot    def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
57*16467b97STreehugger Robot                 index=-1, line=0, charPositionInLine=-1, input=None):
58*16467b97STreehugger Robot        # We use -1 for index and charPositionInLine as an invalid index
59*16467b97STreehugger Robot        self._type = type
60*16467b97STreehugger Robot        self._channel = channel
61*16467b97STreehugger Robot        self._text = text
62*16467b97STreehugger Robot        self._index = index
63*16467b97STreehugger Robot        self._line = 0
64*16467b97STreehugger Robot        self._charPositionInLine = charPositionInLine
65*16467b97STreehugger Robot        self.input = input
66*16467b97STreehugger Robot
67*16467b97STreehugger Robot    # To override a property, you'll need to override both the getter and setter.
68*16467b97STreehugger Robot    @property
69*16467b97STreehugger Robot    def text(self):
70*16467b97STreehugger Robot        return self._text
71*16467b97STreehugger Robot
72*16467b97STreehugger Robot    @text.setter
73*16467b97STreehugger Robot    def text(self, value):
74*16467b97STreehugger Robot        self._text = value
75*16467b97STreehugger Robot
76*16467b97STreehugger Robot
77*16467b97STreehugger Robot    @property
78*16467b97STreehugger Robot    def type(self):
79*16467b97STreehugger Robot        return self._type
80*16467b97STreehugger Robot
81*16467b97STreehugger Robot    @type.setter
82*16467b97STreehugger Robot    def type(self, value):
83*16467b97STreehugger Robot        self._type = value
84*16467b97STreehugger Robot
85*16467b97STreehugger Robot    # For compatibility
86*16467b97STreehugger Robot    def getType(self):
87*16467b97STreehugger Robot        return self._type
88*16467b97STreehugger Robot
89*16467b97STreehugger Robot    @property
90*16467b97STreehugger Robot    def typeName(self):
91*16467b97STreehugger Robot        if self.TOKEN_NAMES_MAP:
92*16467b97STreehugger Robot            return self.TOKEN_NAMES_MAP.get(self._type, "INVALID_TOKEN_TYPE")
93*16467b97STreehugger Robot        else:
94*16467b97STreehugger Robot            return str(self._type)
95*16467b97STreehugger Robot
96*16467b97STreehugger Robot    @property
97*16467b97STreehugger Robot    def line(self):
98*16467b97STreehugger Robot        """Lines are numbered 1..n."""
99*16467b97STreehugger Robot        return self._line
100*16467b97STreehugger Robot
101*16467b97STreehugger Robot    @line.setter
102*16467b97STreehugger Robot    def line(self, value):
103*16467b97STreehugger Robot        self._line = value
104*16467b97STreehugger Robot
105*16467b97STreehugger Robot
106*16467b97STreehugger Robot    @property
107*16467b97STreehugger Robot    def charPositionInLine(self):
108*16467b97STreehugger Robot        """Columns are numbered 0..n-1."""
109*16467b97STreehugger Robot        return self._charPositionInLine
110*16467b97STreehugger Robot
111*16467b97STreehugger Robot    @charPositionInLine.setter
112*16467b97STreehugger Robot    def charPositionInLine(self, pos):
113*16467b97STreehugger Robot        self._charPositionInLine = pos
114*16467b97STreehugger Robot
115*16467b97STreehugger Robot
116*16467b97STreehugger Robot    @property
117*16467b97STreehugger Robot    def channel(self):
118*16467b97STreehugger Robot        return self._channel
119*16467b97STreehugger Robot
120*16467b97STreehugger Robot    @channel.setter
121*16467b97STreehugger Robot    def channel(self, value):
122*16467b97STreehugger Robot        self._channel = value
123*16467b97STreehugger Robot
124*16467b97STreehugger Robot
125*16467b97STreehugger Robot    @property
126*16467b97STreehugger Robot    def index(self):
127*16467b97STreehugger Robot        """
128*16467b97STreehugger Robot        An index from 0..n-1 of the token object in the input stream.
129*16467b97STreehugger Robot        This must be valid in order to use the ANTLRWorks debugger.
130*16467b97STreehugger Robot        """
131*16467b97STreehugger Robot        return self._index
132*16467b97STreehugger Robot
133*16467b97STreehugger Robot    @index.setter
134*16467b97STreehugger Robot    def index(self, value):
135*16467b97STreehugger Robot        self._index = value
136*16467b97STreehugger Robot
137*16467b97STreehugger Robot
138*16467b97STreehugger Robot    def getInputStream(self):
139*16467b97STreehugger Robot        """@brief From what character stream was this token created.
140*16467b97STreehugger Robot
141*16467b97STreehugger Robot        You don't have to implement but it's nice to know where a Token
142*16467b97STreehugger Robot        comes from if you have include files etc... on the input."""
143*16467b97STreehugger Robot
144*16467b97STreehugger Robot        raise NotImplementedError
145*16467b97STreehugger Robot
146*16467b97STreehugger Robot    def setInputStream(self, input):
147*16467b97STreehugger Robot        """@brief From what character stream was this token created.
148*16467b97STreehugger Robot
149*16467b97STreehugger Robot        You don't have to implement but it's nice to know where a Token
150*16467b97STreehugger Robot        comes from if you have include files etc... on the input."""
151*16467b97STreehugger Robot
152*16467b97STreehugger Robot        raise NotImplementedError
153*16467b97STreehugger Robot
154*16467b97STreehugger Robot
155*16467b97STreehugger Robot############################################################################
156*16467b97STreehugger Robot#
157*16467b97STreehugger Robot# token implementations
158*16467b97STreehugger Robot#
159*16467b97STreehugger Robot# Token
160*16467b97STreehugger Robot# +- CommonToken
161*16467b97STreehugger Robot# \- ClassicToken
162*16467b97STreehugger Robot#
163*16467b97STreehugger Robot############################################################################
164*16467b97STreehugger Robot
165*16467b97STreehugger Robotclass CommonToken(Token):
166*16467b97STreehugger Robot    """@brief Basic token implementation.
167*16467b97STreehugger Robot
168*16467b97STreehugger Robot    This implementation does not copy the text from the input stream upon
169*16467b97STreehugger Robot    creation, but keeps start/stop pointers into the stream to avoid
170*16467b97STreehugger Robot    unnecessary copy operations.
171*16467b97STreehugger Robot
172*16467b97STreehugger Robot    """
173*16467b97STreehugger Robot
174*16467b97STreehugger Robot    def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
175*16467b97STreehugger Robot                 input=None, start=None, stop=None, oldToken=None):
176*16467b97STreehugger Robot
177*16467b97STreehugger Robot        if oldToken:
178*16467b97STreehugger Robot            super().__init__(oldToken.type, oldToken.channel, oldToken.text,
179*16467b97STreehugger Robot                             oldToken.index, oldToken.line,
180*16467b97STreehugger Robot                             oldToken.charPositionInLine, oldToken.input)
181*16467b97STreehugger Robot            if isinstance(oldToken, CommonToken):
182*16467b97STreehugger Robot                self.start = oldToken.start
183*16467b97STreehugger Robot                self.stop = oldToken.stop
184*16467b97STreehugger Robot            else:
185*16467b97STreehugger Robot                self.start = start
186*16467b97STreehugger Robot                self.stop = stop
187*16467b97STreehugger Robot
188*16467b97STreehugger Robot        else:
189*16467b97STreehugger Robot            super().__init__(type=type, channel=channel, input=input)
190*16467b97STreehugger Robot
191*16467b97STreehugger Robot            # We need to be able to change the text once in a while.  If
192*16467b97STreehugger Robot            # this is non-null, then getText should return this.  Note that
193*16467b97STreehugger Robot            # start/stop are not affected by changing this.
194*16467b97STreehugger Robot            self._text = text
195*16467b97STreehugger Robot
196*16467b97STreehugger Robot            # The char position into the input buffer where this token starts
197*16467b97STreehugger Robot            self.start = start
198*16467b97STreehugger Robot
199*16467b97STreehugger Robot            # The char position into the input buffer where this token stops
200*16467b97STreehugger Robot            # This is the index of the last char, *not* the index after it!
201*16467b97STreehugger Robot            self.stop = stop
202*16467b97STreehugger Robot
203*16467b97STreehugger Robot
204*16467b97STreehugger Robot    @property
205*16467b97STreehugger Robot    def text(self):
206*16467b97STreehugger Robot        # Could be the empty string, and we want to return that.
207*16467b97STreehugger Robot        if self._text is not None:
208*16467b97STreehugger Robot            return self._text
209*16467b97STreehugger Robot
210*16467b97STreehugger Robot        if not self.input:
211*16467b97STreehugger Robot            return None
212*16467b97STreehugger Robot
213*16467b97STreehugger Robot        if self.start < self.input.size() and self.stop < self.input.size():
214*16467b97STreehugger Robot            return self.input.substring(self.start, self.stop)
215*16467b97STreehugger Robot
216*16467b97STreehugger Robot        return '<EOF>'
217*16467b97STreehugger Robot
218*16467b97STreehugger Robot    @text.setter
219*16467b97STreehugger Robot    def text(self, value):
220*16467b97STreehugger Robot        """
221*16467b97STreehugger Robot        Override the text for this token.  getText() will return this text
222*16467b97STreehugger Robot        rather than pulling from the buffer.  Note that this does not mean
223*16467b97STreehugger Robot        that start/stop indexes are not valid.  It means that that input
224*16467b97STreehugger Robot        was converted to a new string in the token object.
225*16467b97STreehugger Robot        """
226*16467b97STreehugger Robot        self._text = value
227*16467b97STreehugger Robot
228*16467b97STreehugger Robot
229*16467b97STreehugger Robot    def getInputStream(self):
230*16467b97STreehugger Robot        return self.input
231*16467b97STreehugger Robot
232*16467b97STreehugger Robot    def setInputStream(self, input):
233*16467b97STreehugger Robot        self.input = input
234*16467b97STreehugger Robot
235*16467b97STreehugger Robot
236*16467b97STreehugger Robot    def __str__(self):
237*16467b97STreehugger Robot        if self.type == EOF:
238*16467b97STreehugger Robot            return "<EOF>"
239*16467b97STreehugger Robot
240*16467b97STreehugger Robot        channelStr = ""
241*16467b97STreehugger Robot        if self.channel > 0:
242*16467b97STreehugger Robot            channelStr = ",channel=" + str(self.channel)
243*16467b97STreehugger Robot
244*16467b97STreehugger Robot        txt = self.text
245*16467b97STreehugger Robot        if txt:
246*16467b97STreehugger Robot            # Put 2 backslashes in front of each character
247*16467b97STreehugger Robot            txt = txt.replace("\n", r"\\n")
248*16467b97STreehugger Robot            txt = txt.replace("\r", r"\\r")
249*16467b97STreehugger Robot            txt = txt.replace("\t", r"\\t")
250*16467b97STreehugger Robot        else:
251*16467b97STreehugger Robot            txt = "<no text>"
252*16467b97STreehugger Robot
253*16467b97STreehugger Robot        return ("[@{0.index},{0.start}:{0.stop}={txt!r},"
254*16467b97STreehugger Robot                "<{0.typeName}>{channelStr},"
255*16467b97STreehugger Robot                "{0.line}:{0.charPositionInLine}]"
256*16467b97STreehugger Robot                .format(self, txt=txt, channelStr=channelStr))
257*16467b97STreehugger Robot
258*16467b97STreehugger Robot
259*16467b97STreehugger Robotclass ClassicToken(Token):
260*16467b97STreehugger Robot    """@brief Alternative token implementation.
261*16467b97STreehugger Robot
262*16467b97STreehugger Robot    A Token object like we'd use in ANTLR 2.x; has an actual string created
263*16467b97STreehugger Robot    and associated with this object.  These objects are needed for imaginary
264*16467b97STreehugger Robot    tree nodes that have payload objects.  We need to create a Token object
265*16467b97STreehugger Robot    that has a string; the tree node will point at this token.  CommonToken
266*16467b97STreehugger Robot    has indexes into a char stream and hence cannot be used to introduce
267*16467b97STreehugger Robot    new strings.
268*16467b97STreehugger Robot    """
269*16467b97STreehugger Robot
270*16467b97STreehugger Robot    def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL,
271*16467b97STreehugger Robot                 oldToken=None):
272*16467b97STreehugger Robot        if oldToken:
273*16467b97STreehugger Robot            super().__init__(type=oldToken.type, channel=oldToken.channel,
274*16467b97STreehugger Robot                             text=oldToken.text, line=oldToken.line,
275*16467b97STreehugger Robot                             charPositionInLine=oldToken.charPositionInLine)
276*16467b97STreehugger Robot
277*16467b97STreehugger Robot        else:
278*16467b97STreehugger Robot            super().__init__(type=type, channel=channel, text=text,
279*16467b97STreehugger Robot                             index=None, line=None, charPositionInLine=None)
280*16467b97STreehugger Robot
281*16467b97STreehugger Robot
282*16467b97STreehugger Robot    def getInputStream(self):
283*16467b97STreehugger Robot        return None
284*16467b97STreehugger Robot
285*16467b97STreehugger Robot    def setInputStream(self, input):
286*16467b97STreehugger Robot        pass
287*16467b97STreehugger Robot
288*16467b97STreehugger Robot
289*16467b97STreehugger Robot    def toString(self):
290*16467b97STreehugger Robot        channelStr = ""
291*16467b97STreehugger Robot        if self.channel > 0:
292*16467b97STreehugger Robot            channelStr = ",channel=" + str(self.channel)
293*16467b97STreehugger Robot
294*16467b97STreehugger Robot        txt = self.text
295*16467b97STreehugger Robot        if not txt:
296*16467b97STreehugger Robot            txt = "<no text>"
297*16467b97STreehugger Robot
298*16467b97STreehugger Robot        return ("[@{0.index!r},{txt!r},<{0.type!r}>{channelStr},"
299*16467b97STreehugger Robot                "{0.line!r}:{0.charPositionInLine!r}]"
300*16467b97STreehugger Robot                .format(self, txt=txt, channelStr=channelStr))
301*16467b97STreehugger Robot
302*16467b97STreehugger Robot    __str__ = toString
303*16467b97STreehugger Robot    __repr__ = toString
304*16467b97STreehugger Robot
305*16467b97STreehugger Robot
306*16467b97STreehugger RobotINVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
307*16467b97STreehugger Robot
308*16467b97STreehugger Robot# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
309*16467b97STreehugger Robot# will avoid creating a token for this symbol and try to fetch another.
310*16467b97STreehugger RobotSKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
311