xref: /aosp_15_r20/external/antlr/runtime/Python3/antlr3/streams.py (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot"""ANTLR3 runtime package"""
2*16467b97STreehugger Robot
3*16467b97STreehugger Robot# begin[licence]
4*16467b97STreehugger Robot#
5*16467b97STreehugger Robot# [The "BSD licence"]
6*16467b97STreehugger Robot# Copyright (c) 2005-2012 Terence Parr
7*16467b97STreehugger Robot# All rights reserved.
8*16467b97STreehugger Robot#
9*16467b97STreehugger Robot# Redistribution and use in source and binary forms, with or without
10*16467b97STreehugger Robot# modification, are permitted provided that the following conditions
11*16467b97STreehugger Robot# are met:
12*16467b97STreehugger Robot# 1. Redistributions of source code must retain the above copyright
13*16467b97STreehugger Robot#    notice, this list of conditions and the following disclaimer.
14*16467b97STreehugger Robot# 2. Redistributions in binary form must reproduce the above copyright
15*16467b97STreehugger Robot#    notice, this list of conditions and the following disclaimer in the
16*16467b97STreehugger Robot#    documentation and/or other materials provided with the distribution.
17*16467b97STreehugger Robot# 3. The name of the author may not be used to endorse or promote products
18*16467b97STreehugger Robot#    derived from this software without specific prior written permission.
19*16467b97STreehugger Robot#
20*16467b97STreehugger Robot# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21*16467b97STreehugger Robot# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22*16467b97STreehugger Robot# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23*16467b97STreehugger Robot# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24*16467b97STreehugger Robot# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25*16467b97STreehugger Robot# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26*16467b97STreehugger Robot# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27*16467b97STreehugger Robot# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28*16467b97STreehugger Robot# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29*16467b97STreehugger Robot# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*16467b97STreehugger Robot#
31*16467b97STreehugger Robot# end[licence]
32*16467b97STreehugger Robot
33*16467b97STreehugger Robotfrom io import StringIO
34*16467b97STreehugger Robot
35*16467b97STreehugger Robotfrom .constants import DEFAULT_CHANNEL, EOF
36*16467b97STreehugger Robotfrom .tokens import Token
37*16467b97STreehugger Robot
38*16467b97STreehugger Robot
39*16467b97STreehugger Robot############################################################################
40*16467b97STreehugger Robot#
41*16467b97STreehugger Robot# basic interfaces
42*16467b97STreehugger Robot#   IntStream
43*16467b97STreehugger Robot#    +- CharStream
44*16467b97STreehugger Robot#    \- TokenStream
45*16467b97STreehugger Robot#
46*16467b97STreehugger Robot# subclasses must implemented all methods
47*16467b97STreehugger Robot#
48*16467b97STreehugger Robot############################################################################
49*16467b97STreehugger Robot
50*16467b97STreehugger Robotclass IntStream(object):
51*16467b97STreehugger Robot    """
52*16467b97STreehugger Robot    @brief Base interface for streams of integer values.
53*16467b97STreehugger Robot
54*16467b97STreehugger Robot    A simple stream of integers used when all I care about is the char
55*16467b97STreehugger Robot    or token type sequence (such as interpretation).
56*16467b97STreehugger Robot    """
57*16467b97STreehugger Robot
58*16467b97STreehugger Robot    def consume(self):
59*16467b97STreehugger Robot        raise NotImplementedError
60*16467b97STreehugger Robot
61*16467b97STreehugger Robot
62*16467b97STreehugger Robot    def LA(self, i):
63*16467b97STreehugger Robot        """Get int at current input pointer + i ahead where i=1 is next int.
64*16467b97STreehugger Robot
65*16467b97STreehugger Robot        Negative indexes are allowed.  LA(-1) is previous token (token
66*16467b97STreehugger Robot        just matched).  LA(-i) where i is before first token should
67*16467b97STreehugger Robot        yield -1, invalid char / EOF.
68*16467b97STreehugger Robot        """
69*16467b97STreehugger Robot
70*16467b97STreehugger Robot        raise NotImplementedError
71*16467b97STreehugger Robot
72*16467b97STreehugger Robot
73*16467b97STreehugger Robot    def mark(self):
74*16467b97STreehugger Robot        """
75*16467b97STreehugger Robot        Tell the stream to start buffering if it hasn't already.  Return
76*16467b97STreehugger Robot        current input position, index(), or some other marker so that
77*16467b97STreehugger Robot        when passed to rewind() you get back to the same spot.
78*16467b97STreehugger Robot        rewind(mark()) should not affect the input cursor.  The Lexer
79*16467b97STreehugger Robot        track line/col info as well as input index so its markers are
80*16467b97STreehugger Robot        not pure input indexes.  Same for tree node streams.
81*16467b97STreehugger Robot        """
82*16467b97STreehugger Robot
83*16467b97STreehugger Robot        raise NotImplementedError
84*16467b97STreehugger Robot
85*16467b97STreehugger Robot
86*16467b97STreehugger Robot    def index(self):
87*16467b97STreehugger Robot        """
88*16467b97STreehugger Robot        Return the current input symbol index 0..n where n indicates the
89*16467b97STreehugger Robot        last symbol has been read.  The index is the symbol about to be
90*16467b97STreehugger Robot        read not the most recently read symbol.
91*16467b97STreehugger Robot        """
92*16467b97STreehugger Robot
93*16467b97STreehugger Robot        raise NotImplementedError
94*16467b97STreehugger Robot
95*16467b97STreehugger Robot
96*16467b97STreehugger Robot    def rewind(self, marker=None):
97*16467b97STreehugger Robot        """
98*16467b97STreehugger Robot        Reset the stream so that next call to index would return marker.
99*16467b97STreehugger Robot        The marker will usually be index() but it doesn't have to be.  It's
100*16467b97STreehugger Robot        just a marker to indicate what state the stream was in.  This is
101*16467b97STreehugger Robot        essentially calling release() and seek().  If there are markers
102*16467b97STreehugger Robot        created after this marker argument, this routine must unroll them
103*16467b97STreehugger Robot        like a stack.  Assume the state the stream was in when this marker
104*16467b97STreehugger Robot        was created.
105*16467b97STreehugger Robot
106*16467b97STreehugger Robot        If marker is None:
107*16467b97STreehugger Robot        Rewind to the input position of the last marker.
108*16467b97STreehugger Robot        Used currently only after a cyclic DFA and just
109*16467b97STreehugger Robot        before starting a sem/syn predicate to get the
110*16467b97STreehugger Robot        input position back to the start of the decision.
111*16467b97STreehugger Robot        Do not "pop" the marker off the state.  mark(i)
112*16467b97STreehugger Robot        and rewind(i) should balance still. It is
113*16467b97STreehugger Robot        like invoking rewind(last marker) but it should not "pop"
114*16467b97STreehugger Robot        the marker off.  It's like seek(last marker's input position).
115*16467b97STreehugger Robot        """
116*16467b97STreehugger Robot
117*16467b97STreehugger Robot        raise NotImplementedError
118*16467b97STreehugger Robot
119*16467b97STreehugger Robot
120*16467b97STreehugger Robot    def release(self, marker=None):
121*16467b97STreehugger Robot        """
122*16467b97STreehugger Robot        You may want to commit to a backtrack but don't want to force the
123*16467b97STreehugger Robot        stream to keep bookkeeping objects around for a marker that is
124*16467b97STreehugger Robot        no longer necessary.  This will have the same behavior as
125*16467b97STreehugger Robot        rewind() except it releases resources without the backward seek.
126*16467b97STreehugger Robot        This must throw away resources for all markers back to the marker
127*16467b97STreehugger Robot        argument.  So if you're nested 5 levels of mark(), and then release(2)
128*16467b97STreehugger Robot        you have to release resources for depths 2..5.
129*16467b97STreehugger Robot        """
130*16467b97STreehugger Robot
131*16467b97STreehugger Robot        raise NotImplementedError
132*16467b97STreehugger Robot
133*16467b97STreehugger Robot
134*16467b97STreehugger Robot    def seek(self, index):
135*16467b97STreehugger Robot        """
136*16467b97STreehugger Robot        Set the input cursor to the position indicated by index.  This is
137*16467b97STreehugger Robot        normally used to seek ahead in the input stream.  No buffering is
138*16467b97STreehugger Robot        required to do this unless you know your stream will use seek to
139*16467b97STreehugger Robot        move backwards such as when backtracking.
140*16467b97STreehugger Robot
141*16467b97STreehugger Robot        This is different from rewind in its multi-directional
142*16467b97STreehugger Robot        requirement and in that its argument is strictly an input cursor
143*16467b97STreehugger Robot        (index).
144*16467b97STreehugger Robot
145*16467b97STreehugger Robot        For char streams, seeking forward must update the stream state such
146*16467b97STreehugger Robot        as line number.  For seeking backwards, you will be presumably
147*16467b97STreehugger Robot        backtracking using the mark/rewind mechanism that restores state and
148*16467b97STreehugger Robot        so this method does not need to update state when seeking backwards.
149*16467b97STreehugger Robot
150*16467b97STreehugger Robot        Currently, this method is only used for efficient backtracking using
151*16467b97STreehugger Robot        memoization, but in the future it may be used for incremental parsing.
152*16467b97STreehugger Robot
153*16467b97STreehugger Robot        The index is 0..n-1.  A seek to position i means that LA(1) will
154*16467b97STreehugger Robot        return the ith symbol.  So, seeking to 0 means LA(1) will return the
155*16467b97STreehugger Robot        first element in the stream.
156*16467b97STreehugger Robot        """
157*16467b97STreehugger Robot
158*16467b97STreehugger Robot        raise NotImplementedError
159*16467b97STreehugger Robot
160*16467b97STreehugger Robot
161*16467b97STreehugger Robot    def size(self):
162*16467b97STreehugger Robot        """
163*16467b97STreehugger Robot        Only makes sense for streams that buffer everything up probably, but
164*16467b97STreehugger Robot        might be useful to display the entire stream or for testing.  This
165*16467b97STreehugger Robot        value includes a single EOF.
166*16467b97STreehugger Robot        """
167*16467b97STreehugger Robot
168*16467b97STreehugger Robot        raise NotImplementedError
169*16467b97STreehugger Robot
170*16467b97STreehugger Robot
171*16467b97STreehugger Robot    def getSourceName(self):
172*16467b97STreehugger Robot        """
173*16467b97STreehugger Robot        Where are you getting symbols from?  Normally, implementations will
174*16467b97STreehugger Robot        pass the buck all the way to the lexer who can ask its input stream
175*16467b97STreehugger Robot        for the file name or whatever.
176*16467b97STreehugger Robot        """
177*16467b97STreehugger Robot
178*16467b97STreehugger Robot        raise NotImplementedError
179*16467b97STreehugger Robot
180*16467b97STreehugger Robot
181*16467b97STreehugger Robotclass CharStream(IntStream):
182*16467b97STreehugger Robot    """
183*16467b97STreehugger Robot    @brief A source of characters for an ANTLR lexer.
184*16467b97STreehugger Robot
185*16467b97STreehugger Robot    This is an abstract class that must be implemented by a subclass.
186*16467b97STreehugger Robot
187*16467b97STreehugger Robot    """
188*16467b97STreehugger Robot
189*16467b97STreehugger Robot    # pylint does not realize that this is an interface, too
190*16467b97STreehugger Robot    #pylint: disable-msg=W0223
191*16467b97STreehugger Robot
192*16467b97STreehugger Robot    EOF = -1
193*16467b97STreehugger Robot
194*16467b97STreehugger Robot    def __init__(self):
195*16467b97STreehugger Robot        # line number 1..n within the input
196*16467b97STreehugger Robot        self._line = 1
197*16467b97STreehugger Robot
198*16467b97STreehugger Robot        # The index of the character relative to the beginning of the
199*16467b97STreehugger Robot        # line 0..n-1
200*16467b97STreehugger Robot        self._charPositionInLine = 0
201*16467b97STreehugger Robot
202*16467b97STreehugger Robot
203*16467b97STreehugger Robot    def substring(self, start, stop):
204*16467b97STreehugger Robot        """
205*16467b97STreehugger Robot        For infinite streams, you don't need this; primarily I'm providing
206*16467b97STreehugger Robot        a useful interface for action code.  Just make sure actions don't
207*16467b97STreehugger Robot        use this on streams that don't support it.
208*16467b97STreehugger Robot        """
209*16467b97STreehugger Robot
210*16467b97STreehugger Robot        raise NotImplementedError
211*16467b97STreehugger Robot
212*16467b97STreehugger Robot
213*16467b97STreehugger Robot    def LT(self, i):
214*16467b97STreehugger Robot        """
215*16467b97STreehugger Robot        Get the ith character of lookahead.  This is the same usually as
216*16467b97STreehugger Robot        LA(i).  This will be used for labels in the generated
217*16467b97STreehugger Robot        lexer code.  I'd prefer to return a char here type-wise, but it's
218*16467b97STreehugger Robot        probably better to be 32-bit clean and be consistent with LA.
219*16467b97STreehugger Robot        """
220*16467b97STreehugger Robot
221*16467b97STreehugger Robot        raise NotImplementedError
222*16467b97STreehugger Robot
223*16467b97STreehugger Robot
224*16467b97STreehugger Robot    @property
225*16467b97STreehugger Robot    def line(self):
226*16467b97STreehugger Robot        """ANTLR tracks the line information automatically"""
227*16467b97STreehugger Robot        return self._line
228*16467b97STreehugger Robot
229*16467b97STreehugger Robot    @line.setter
230*16467b97STreehugger Robot    def line(self, value):
231*16467b97STreehugger Robot        """
232*16467b97STreehugger Robot        Because this stream can rewind, we need to be able to reset the line
233*16467b97STreehugger Robot        """
234*16467b97STreehugger Robot        self._line = value
235*16467b97STreehugger Robot
236*16467b97STreehugger Robot
237*16467b97STreehugger Robot    @property
238*16467b97STreehugger Robot    def charPositionInLine(self):
239*16467b97STreehugger Robot        """
240*16467b97STreehugger Robot        The index of the character relative to the beginning of the line 0..n-1
241*16467b97STreehugger Robot        """
242*16467b97STreehugger Robot        return self._charPositionInLine
243*16467b97STreehugger Robot
244*16467b97STreehugger Robot    @charPositionInLine.setter
245*16467b97STreehugger Robot    def charPositionInLine(self, pos):
246*16467b97STreehugger Robot        self._charPositionInLine = pos
247*16467b97STreehugger Robot
248*16467b97STreehugger Robot
249*16467b97STreehugger Robotclass TokenStream(IntStream):
250*16467b97STreehugger Robot    """
251*16467b97STreehugger Robot
252*16467b97STreehugger Robot    @brief A stream of tokens accessing tokens from a TokenSource
253*16467b97STreehugger Robot
254*16467b97STreehugger Robot    This is an abstract class that must be implemented by a subclass.
255*16467b97STreehugger Robot
256*16467b97STreehugger Robot    """
257*16467b97STreehugger Robot
258*16467b97STreehugger Robot    # pylint does not realize that this is an interface, too
259*16467b97STreehugger Robot    #pylint: disable-msg=W0223
260*16467b97STreehugger Robot
261*16467b97STreehugger Robot    def LT(self, k):
262*16467b97STreehugger Robot        """
263*16467b97STreehugger Robot        Get Token at current input pointer + i ahead where i=1 is next Token.
264*16467b97STreehugger Robot        i<0 indicates tokens in the past.  So -1 is previous token and -2 is
265*16467b97STreehugger Robot        two tokens ago. LT(0) is undefined.  For i>=n, return Token.EOFToken.
266*16467b97STreehugger Robot        Return null for LT(0) and any index that results in an absolute address
267*16467b97STreehugger Robot        that is negative.
268*16467b97STreehugger Robot        """
269*16467b97STreehugger Robot
270*16467b97STreehugger Robot        raise NotImplementedError
271*16467b97STreehugger Robot
272*16467b97STreehugger Robot
273*16467b97STreehugger Robot    def range(self):
274*16467b97STreehugger Robot        """
275*16467b97STreehugger Robot        How far ahead has the stream been asked to look?  The return
276*16467b97STreehugger Robot        value is a valid index from 0..n-1.
277*16467b97STreehugger Robot        """
278*16467b97STreehugger Robot
279*16467b97STreehugger Robot        raise NotImplementedError
280*16467b97STreehugger Robot
281*16467b97STreehugger Robot
282*16467b97STreehugger Robot    def get(self, i):
283*16467b97STreehugger Robot        """
284*16467b97STreehugger Robot        Get a token at an absolute index i; 0..n-1.  This is really only
285*16467b97STreehugger Robot        needed for profiling and debugging and token stream rewriting.
286*16467b97STreehugger Robot        If you don't want to buffer up tokens, then this method makes no
287*16467b97STreehugger Robot        sense for you.  Naturally you can't use the rewrite stream feature.
288*16467b97STreehugger Robot        I believe DebugTokenStream can easily be altered to not use
289*16467b97STreehugger Robot        this method, removing the dependency.
290*16467b97STreehugger Robot        """
291*16467b97STreehugger Robot
292*16467b97STreehugger Robot        raise NotImplementedError
293*16467b97STreehugger Robot
294*16467b97STreehugger Robot
295*16467b97STreehugger Robot    def getTokenSource(self):
296*16467b97STreehugger Robot        """
297*16467b97STreehugger Robot        Where is this stream pulling tokens from?  This is not the name, but
298*16467b97STreehugger Robot        the object that provides Token objects.
299*16467b97STreehugger Robot        """
300*16467b97STreehugger Robot
301*16467b97STreehugger Robot        raise NotImplementedError
302*16467b97STreehugger Robot
303*16467b97STreehugger Robot
304*16467b97STreehugger Robot    def toString(self, start=None, stop=None):
305*16467b97STreehugger Robot        """
306*16467b97STreehugger Robot        Return the text of all tokens from start to stop, inclusive.
307*16467b97STreehugger Robot        If the stream does not buffer all the tokens then it can just
308*16467b97STreehugger Robot        return "" or null;  Users should not access $ruleLabel.text in
309*16467b97STreehugger Robot        an action of course in that case.
310*16467b97STreehugger Robot
311*16467b97STreehugger Robot        Because the user is not required to use a token with an index stored
312*16467b97STreehugger Robot        in it, we must provide a means for two token objects themselves to
313*16467b97STreehugger Robot        indicate the start/end location.  Most often this will just delegate
314*16467b97STreehugger Robot        to the other toString(int,int).  This is also parallel with
315*16467b97STreehugger Robot        the TreeNodeStream.toString(Object,Object).
316*16467b97STreehugger Robot        """
317*16467b97STreehugger Robot
318*16467b97STreehugger Robot        raise NotImplementedError
319*16467b97STreehugger Robot
320*16467b97STreehugger Robot
321*16467b97STreehugger Robot############################################################################
322*16467b97STreehugger Robot#
323*16467b97STreehugger Robot# character streams for use in lexers
324*16467b97STreehugger Robot#   CharStream
325*16467b97STreehugger Robot#   \- ANTLRStringStream
326*16467b97STreehugger Robot#
327*16467b97STreehugger Robot############################################################################
328*16467b97STreehugger Robot
329*16467b97STreehugger Robot
330*16467b97STreehugger Robotclass ANTLRStringStream(CharStream):
331*16467b97STreehugger Robot    """
332*16467b97STreehugger Robot    @brief CharStream that pull data from a unicode string.
333*16467b97STreehugger Robot
334*16467b97STreehugger Robot    A pretty quick CharStream that pulls all data from an array
335*16467b97STreehugger Robot    directly.  Every method call counts in the lexer.
336*16467b97STreehugger Robot
337*16467b97STreehugger Robot    """
338*16467b97STreehugger Robot
339*16467b97STreehugger Robot
340*16467b97STreehugger Robot    def __init__(self, data):
341*16467b97STreehugger Robot        """
342*16467b97STreehugger Robot        @param data This should be a unicode string holding the data you want
343*16467b97STreehugger Robot        to parse. If you pass in a byte string, the Lexer will choke on
344*16467b97STreehugger Robot        non-ascii data.
345*16467b97STreehugger Robot        """
346*16467b97STreehugger Robot
347*16467b97STreehugger Robot        super().__init__()
348*16467b97STreehugger Robot
349*16467b97STreehugger Robot        # The data being scanned
350*16467b97STreehugger Robot        self.strdata = str(data)
351*16467b97STreehugger Robot        self.data = [ord(c) for c in self.strdata]
352*16467b97STreehugger Robot
353*16467b97STreehugger Robot        # How many characters are actually in the buffer
354*16467b97STreehugger Robot        self.n = len(data)
355*16467b97STreehugger Robot
356*16467b97STreehugger Robot        # 0..n-1 index into string of next char
357*16467b97STreehugger Robot        self.p = 0
358*16467b97STreehugger Robot
359*16467b97STreehugger Robot        # A list of CharStreamState objects that tracks the stream state
360*16467b97STreehugger Robot        # values line, charPositionInLine, and p that can change as you
361*16467b97STreehugger Robot        # move through the input stream.  Indexed from 0..markDepth-1.
362*16467b97STreehugger Robot        self._markers = [ ]
363*16467b97STreehugger Robot        self.lastMarker = None
364*16467b97STreehugger Robot        self.markDepth = 0
365*16467b97STreehugger Robot
366*16467b97STreehugger Robot        # What is name or source of this char stream?
367*16467b97STreehugger Robot        self.name = None
368*16467b97STreehugger Robot
369*16467b97STreehugger Robot
370*16467b97STreehugger Robot    def reset(self):
371*16467b97STreehugger Robot        """
372*16467b97STreehugger Robot        Reset the stream so that it's in the same state it was
373*16467b97STreehugger Robot        when the object was created *except* the data array is not
374*16467b97STreehugger Robot        touched.
375*16467b97STreehugger Robot        """
376*16467b97STreehugger Robot
377*16467b97STreehugger Robot        self.p = 0
378*16467b97STreehugger Robot        self._line = 1
379*16467b97STreehugger Robot        self.charPositionInLine = 0
380*16467b97STreehugger Robot        self._markers = [ ]
381*16467b97STreehugger Robot        self.lastMarker = None
382*16467b97STreehugger Robot        self.markDepth = 0
383*16467b97STreehugger Robot
384*16467b97STreehugger Robot
385*16467b97STreehugger Robot    def consume(self):
386*16467b97STreehugger Robot        if self.p < self.n:
387*16467b97STreehugger Robot            if self.data[self.p] == 10: # ord('\n')
388*16467b97STreehugger Robot                self._line += 1
389*16467b97STreehugger Robot                self.charPositionInLine = 0
390*16467b97STreehugger Robot            else:
391*16467b97STreehugger Robot                self.charPositionInLine += 1
392*16467b97STreehugger Robot
393*16467b97STreehugger Robot            self.p += 1
394*16467b97STreehugger Robot
395*16467b97STreehugger Robot        # else we reached EOF
396*16467b97STreehugger Robot        # just do nothing
397*16467b97STreehugger Robot
398*16467b97STreehugger Robot
399*16467b97STreehugger Robot    def LA(self, i):
400*16467b97STreehugger Robot        if i == 0:
401*16467b97STreehugger Robot            return 0 # undefined
402*16467b97STreehugger Robot
403*16467b97STreehugger Robot        if i < 0:
404*16467b97STreehugger Robot            i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
405*16467b97STreehugger Robot
406*16467b97STreehugger Robot        if self.p + i - 1 < self.n:
407*16467b97STreehugger Robot            return self.data[self.p + i - 1]
408*16467b97STreehugger Robot        else:
409*16467b97STreehugger Robot            return EOF
410*16467b97STreehugger Robot
411*16467b97STreehugger Robot
412*16467b97STreehugger Robot
413*16467b97STreehugger Robot    def LT(self, i):
414*16467b97STreehugger Robot        if i == 0:
415*16467b97STreehugger Robot            return 0 # undefined
416*16467b97STreehugger Robot
417*16467b97STreehugger Robot        if i < 0:
418*16467b97STreehugger Robot            i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
419*16467b97STreehugger Robot
420*16467b97STreehugger Robot        if self.p + i - 1 < self.n:
421*16467b97STreehugger Robot            return self.strdata[self.p + i - 1]
422*16467b97STreehugger Robot        else:
423*16467b97STreehugger Robot            return EOF
424*16467b97STreehugger Robot
425*16467b97STreehugger Robot
426*16467b97STreehugger Robot    def index(self):
427*16467b97STreehugger Robot        """
428*16467b97STreehugger Robot        Return the current input symbol index 0..n where n indicates the
429*16467b97STreehugger Robot        last symbol has been read.  The index is the index of char to
430*16467b97STreehugger Robot        be returned from LA(1).
431*16467b97STreehugger Robot        """
432*16467b97STreehugger Robot
433*16467b97STreehugger Robot        return self.p
434*16467b97STreehugger Robot
435*16467b97STreehugger Robot
436*16467b97STreehugger Robot    def size(self):
437*16467b97STreehugger Robot        return self.n
438*16467b97STreehugger Robot
439*16467b97STreehugger Robot
440*16467b97STreehugger Robot    def mark(self):
441*16467b97STreehugger Robot        state = (self.p, self.line, self.charPositionInLine)
442*16467b97STreehugger Robot        if self.markDepth < len(self._markers):
443*16467b97STreehugger Robot            self._markers[self.markDepth] = state
444*16467b97STreehugger Robot        else:
445*16467b97STreehugger Robot            self._markers.append(state)
446*16467b97STreehugger Robot        self.markDepth += 1
447*16467b97STreehugger Robot
448*16467b97STreehugger Robot        self.lastMarker = self.markDepth
449*16467b97STreehugger Robot
450*16467b97STreehugger Robot        return self.lastMarker
451*16467b97STreehugger Robot
452*16467b97STreehugger Robot
453*16467b97STreehugger Robot    def rewind(self, marker=None):
454*16467b97STreehugger Robot        if marker is None:
455*16467b97STreehugger Robot            marker = self.lastMarker
456*16467b97STreehugger Robot
457*16467b97STreehugger Robot        p, line, charPositionInLine = self._markers[marker - 1]
458*16467b97STreehugger Robot
459*16467b97STreehugger Robot        self.seek(p)
460*16467b97STreehugger Robot        self._line = line
461*16467b97STreehugger Robot        self.charPositionInLine = charPositionInLine
462*16467b97STreehugger Robot        self.release(marker)
463*16467b97STreehugger Robot
464*16467b97STreehugger Robot
465*16467b97STreehugger Robot    def release(self, marker=None):
466*16467b97STreehugger Robot        if marker is None:
467*16467b97STreehugger Robot            marker = self.lastMarker
468*16467b97STreehugger Robot
469*16467b97STreehugger Robot        self.markDepth = marker - 1
470*16467b97STreehugger Robot
471*16467b97STreehugger Robot
472*16467b97STreehugger Robot    def seek(self, index):
473*16467b97STreehugger Robot        """
474*16467b97STreehugger Robot        consume() ahead until p==index; can't just set p=index as we must
475*16467b97STreehugger Robot        update line and charPositionInLine.
476*16467b97STreehugger Robot        """
477*16467b97STreehugger Robot
478*16467b97STreehugger Robot        if index <= self.p:
479*16467b97STreehugger Robot            self.p = index # just jump; don't update stream state (line, ...)
480*16467b97STreehugger Robot            return
481*16467b97STreehugger Robot
482*16467b97STreehugger Robot        # seek forward, consume until p hits index
483*16467b97STreehugger Robot        while self.p < index:
484*16467b97STreehugger Robot            self.consume()
485*16467b97STreehugger Robot
486*16467b97STreehugger Robot
487*16467b97STreehugger Robot    def substring(self, start, stop):
488*16467b97STreehugger Robot        return self.strdata[start:stop + 1]
489*16467b97STreehugger Robot
490*16467b97STreehugger Robot
491*16467b97STreehugger Robot    def getSourceName(self):
492*16467b97STreehugger Robot        return self.name
493*16467b97STreehugger Robot
494*16467b97STreehugger Robot
495*16467b97STreehugger Robotclass ANTLRFileStream(ANTLRStringStream):
496*16467b97STreehugger Robot    """
497*16467b97STreehugger Robot    @brief CharStream that opens a file to read the data.
498*16467b97STreehugger Robot
499*16467b97STreehugger Robot    This is a char buffer stream that is loaded from a file
500*16467b97STreehugger Robot    all at once when you construct the object.
501*16467b97STreehugger Robot    """
502*16467b97STreehugger Robot
503*16467b97STreehugger Robot    def __init__(self, fileName):
504*16467b97STreehugger Robot        """
505*16467b97STreehugger Robot        @param fileName The path to the file to be opened. The file will be
506*16467b97STreehugger Robot           opened with mode 'r'.
507*16467b97STreehugger Robot
508*16467b97STreehugger Robot        """
509*16467b97STreehugger Robot
510*16467b97STreehugger Robot        self._fileName = fileName
511*16467b97STreehugger Robot
512*16467b97STreehugger Robot        with open(fileName, 'r') as fp:
513*16467b97STreehugger Robot            super().__init__(fp.read())
514*16467b97STreehugger Robot
515*16467b97STreehugger Robot
516*16467b97STreehugger Robot    @property
517*16467b97STreehugger Robot    def fileName(self):
518*16467b97STreehugger Robot        return self._fileName
519*16467b97STreehugger Robot
520*16467b97STreehugger Robot
521*16467b97STreehugger Robotclass ANTLRInputStream(ANTLRStringStream):
522*16467b97STreehugger Robot    """
523*16467b97STreehugger Robot    @brief CharStream that reads data from a file-like object.
524*16467b97STreehugger Robot
525*16467b97STreehugger Robot    This is a char buffer stream that is loaded from a file like object
526*16467b97STreehugger Robot    all at once when you construct the object.
527*16467b97STreehugger Robot
528*16467b97STreehugger Robot    All input is consumed from the file, but it is not closed.
529*16467b97STreehugger Robot    """
530*16467b97STreehugger Robot
531*16467b97STreehugger Robot    def __init__(self, file):
532*16467b97STreehugger Robot        """
533*16467b97STreehugger Robot        @param file A file-like object holding your input. Only the read()
534*16467b97STreehugger Robot           method must be implemented.
535*16467b97STreehugger Robot
536*16467b97STreehugger Robot        """
537*16467b97STreehugger Robot
538*16467b97STreehugger Robot        data = file.read()
539*16467b97STreehugger Robot
540*16467b97STreehugger Robot        super().__init__(data)
541*16467b97STreehugger Robot
542*16467b97STreehugger Robot
543*16467b97STreehugger Robot# I guess the ANTLR prefix exists only to avoid a name clash with some Java
544*16467b97STreehugger Robot# mumbojumbo. A plain "StringStream" looks better to me, which should be
545*16467b97STreehugger Robot# the preferred name in Python.
546*16467b97STreehugger RobotStringStream = ANTLRStringStream
547*16467b97STreehugger RobotFileStream = ANTLRFileStream
548*16467b97STreehugger RobotInputStream = ANTLRInputStream
549*16467b97STreehugger Robot
550*16467b97STreehugger Robot
551*16467b97STreehugger Robot############################################################################
552*16467b97STreehugger Robot#
553*16467b97STreehugger Robot# Token streams
554*16467b97STreehugger Robot#   TokenStream
555*16467b97STreehugger Robot#   +- CommonTokenStream
556*16467b97STreehugger Robot#   \- TokenRewriteStream
557*16467b97STreehugger Robot#
558*16467b97STreehugger Robot############################################################################
559*16467b97STreehugger Robot
560*16467b97STreehugger Robot
561*16467b97STreehugger Robotclass CommonTokenStream(TokenStream):
562*16467b97STreehugger Robot    """
563*16467b97STreehugger Robot    @brief The most common stream of tokens
564*16467b97STreehugger Robot
565*16467b97STreehugger Robot    The most common stream of tokens is one where every token is buffered up
566*16467b97STreehugger Robot    and tokens are prefiltered for a certain channel (the parser will only
567*16467b97STreehugger Robot    see these tokens and cannot change the filter channel number during the
568*16467b97STreehugger Robot    parse).
569*16467b97STreehugger Robot    """
570*16467b97STreehugger Robot
571*16467b97STreehugger Robot    def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL):
572*16467b97STreehugger Robot        """
573*16467b97STreehugger Robot        @param tokenSource A TokenSource instance (usually a Lexer) to pull
574*16467b97STreehugger Robot            the tokens from.
575*16467b97STreehugger Robot
576*16467b97STreehugger Robot        @param channel Skip tokens on any channel but this one; this is how we
577*16467b97STreehugger Robot            skip whitespace...
578*16467b97STreehugger Robot
579*16467b97STreehugger Robot        """
580*16467b97STreehugger Robot
581*16467b97STreehugger Robot        super().__init__()
582*16467b97STreehugger Robot
583*16467b97STreehugger Robot        self.tokenSource = tokenSource
584*16467b97STreehugger Robot
585*16467b97STreehugger Robot        # Record every single token pulled from the source so we can reproduce
586*16467b97STreehugger Robot        # chunks of it later.
587*16467b97STreehugger Robot        self.tokens = []
588*16467b97STreehugger Robot
589*16467b97STreehugger Robot        # Map<tokentype, channel> to override some Tokens' channel numbers
590*16467b97STreehugger Robot        self.channelOverrideMap = {}
591*16467b97STreehugger Robot
592*16467b97STreehugger Robot        # Set<tokentype>; discard any tokens with this type
593*16467b97STreehugger Robot        self.discardSet = set()
594*16467b97STreehugger Robot
595*16467b97STreehugger Robot        # Skip tokens on any channel but this one; this is how we skip
596*16467b97STreehugger Robot        # whitespace...
597*16467b97STreehugger Robot        self.channel = channel
598*16467b97STreehugger Robot
599*16467b97STreehugger Robot        # By default, track all incoming tokens
600*16467b97STreehugger Robot        self.discardOffChannelTokens = False
601*16467b97STreehugger Robot
602*16467b97STreehugger Robot        # The index into the tokens list of the current token (next token
603*16467b97STreehugger Robot        # to consume).  p==-1 indicates that the tokens list is empty
604*16467b97STreehugger Robot        self.p = -1
605*16467b97STreehugger Robot
606*16467b97STreehugger Robot        # Remember last marked position
607*16467b97STreehugger Robot        self.lastMarker = None
608*16467b97STreehugger Robot
609*16467b97STreehugger Robot        # how deep have we gone?
610*16467b97STreehugger Robot        self._range = -1
611*16467b97STreehugger Robot
612*16467b97STreehugger Robot
613*16467b97STreehugger Robot    def makeEOFToken(self):
614*16467b97STreehugger Robot        return self.tokenSource.makeEOFToken()
615*16467b97STreehugger Robot
616*16467b97STreehugger Robot
617*16467b97STreehugger Robot    def setTokenSource(self, tokenSource):
618*16467b97STreehugger Robot        """Reset this token stream by setting its token source."""
619*16467b97STreehugger Robot
620*16467b97STreehugger Robot        self.tokenSource = tokenSource
621*16467b97STreehugger Robot        self.tokens = []
622*16467b97STreehugger Robot        self.p = -1
623*16467b97STreehugger Robot        self.channel = DEFAULT_CHANNEL
624*16467b97STreehugger Robot
625*16467b97STreehugger Robot
626*16467b97STreehugger Robot    def reset(self):
627*16467b97STreehugger Robot        self.p = 0
628*16467b97STreehugger Robot        self.lastMarker = None
629*16467b97STreehugger Robot
630*16467b97STreehugger Robot
631*16467b97STreehugger Robot    def fillBuffer(self):
632*16467b97STreehugger Robot        """
633*16467b97STreehugger Robot        Load all tokens from the token source and put in tokens.
634*16467b97STreehugger Robot        This is done upon first LT request because you might want to
635*16467b97STreehugger Robot        set some token type / channel overrides before filling buffer.
636*16467b97STreehugger Robot        """
637*16467b97STreehugger Robot
638*16467b97STreehugger Robot
639*16467b97STreehugger Robot        index = 0
640*16467b97STreehugger Robot        t = self.tokenSource.nextToken()
641*16467b97STreehugger Robot        while t and t.type != EOF:
642*16467b97STreehugger Robot            discard = False
643*16467b97STreehugger Robot
644*16467b97STreehugger Robot            if self.discardSet and t.type in self.discardSet:
645*16467b97STreehugger Robot                discard = True
646*16467b97STreehugger Robot
647*16467b97STreehugger Robot            elif self.discardOffChannelTokens and t.channel != self.channel:
648*16467b97STreehugger Robot                discard = True
649*16467b97STreehugger Robot
650*16467b97STreehugger Robot            # is there a channel override for token type?
651*16467b97STreehugger Robot            if t.type in self.channelOverrideMap:
652*16467b97STreehugger Robot                overrideChannel = self.channelOverrideMap[t.type]
653*16467b97STreehugger Robot
654*16467b97STreehugger Robot                if overrideChannel == self.channel:
655*16467b97STreehugger Robot                    t.channel = overrideChannel
656*16467b97STreehugger Robot                else:
657*16467b97STreehugger Robot                    discard = True
658*16467b97STreehugger Robot
659*16467b97STreehugger Robot            if not discard:
660*16467b97STreehugger Robot                t.index = index
661*16467b97STreehugger Robot                self.tokens.append(t)
662*16467b97STreehugger Robot                index += 1
663*16467b97STreehugger Robot
664*16467b97STreehugger Robot            t = self.tokenSource.nextToken()
665*16467b97STreehugger Robot
666*16467b97STreehugger Robot        # leave p pointing at first token on channel
667*16467b97STreehugger Robot        self.p = 0
668*16467b97STreehugger Robot        self.p = self.skipOffTokenChannels(self.p)
669*16467b97STreehugger Robot
670*16467b97STreehugger Robot
671*16467b97STreehugger Robot    def consume(self):
672*16467b97STreehugger Robot        """
673*16467b97STreehugger Robot        Move the input pointer to the next incoming token.  The stream
674*16467b97STreehugger Robot        must become active with LT(1) available.  consume() simply
675*16467b97STreehugger Robot        moves the input pointer so that LT(1) points at the next
676*16467b97STreehugger Robot        input symbol. Consume at least one token.
677*16467b97STreehugger Robot
678*16467b97STreehugger Robot        Walk past any token not on the channel the parser is listening to.
679*16467b97STreehugger Robot        """
680*16467b97STreehugger Robot
681*16467b97STreehugger Robot        if self.p < len(self.tokens):
682*16467b97STreehugger Robot            self.p += 1
683*16467b97STreehugger Robot
684*16467b97STreehugger Robot            self.p = self.skipOffTokenChannels(self.p) # leave p on valid token
685*16467b97STreehugger Robot
686*16467b97STreehugger Robot
687*16467b97STreehugger Robot    def skipOffTokenChannels(self, i):
688*16467b97STreehugger Robot        """
689*16467b97STreehugger Robot        Given a starting index, return the index of the first on-channel
690*16467b97STreehugger Robot        token.
691*16467b97STreehugger Robot        """
692*16467b97STreehugger Robot
693*16467b97STreehugger Robot        n = len(self.tokens)
694*16467b97STreehugger Robot        while i < n and self.tokens[i].channel != self.channel:
695*16467b97STreehugger Robot            i += 1
696*16467b97STreehugger Robot
697*16467b97STreehugger Robot        return i
698*16467b97STreehugger Robot
699*16467b97STreehugger Robot
700*16467b97STreehugger Robot    def skipOffTokenChannelsReverse(self, i):
701*16467b97STreehugger Robot        while i >= 0 and self.tokens[i].channel != self.channel:
702*16467b97STreehugger Robot            i -= 1
703*16467b97STreehugger Robot
704*16467b97STreehugger Robot        return i
705*16467b97STreehugger Robot
706*16467b97STreehugger Robot
707*16467b97STreehugger Robot    def setTokenTypeChannel(self, ttype, channel):
708*16467b97STreehugger Robot        """
709*16467b97STreehugger Robot        A simple filter mechanism whereby you can tell this token stream
710*16467b97STreehugger Robot        to force all tokens of type ttype to be on channel.  For example,
711*16467b97STreehugger Robot        when interpreting, we cannot exec actions so we need to tell
712*16467b97STreehugger Robot        the stream to force all WS and NEWLINE to be a different, ignored
713*16467b97STreehugger Robot        channel.
714*16467b97STreehugger Robot        """
715*16467b97STreehugger Robot
716*16467b97STreehugger Robot        self.channelOverrideMap[ttype] = channel
717*16467b97STreehugger Robot
718*16467b97STreehugger Robot
719*16467b97STreehugger Robot    def discardTokenType(self, ttype):
720*16467b97STreehugger Robot        self.discardSet.add(ttype)
721*16467b97STreehugger Robot
722*16467b97STreehugger Robot
723*16467b97STreehugger Robot    def getTokens(self, start=None, stop=None, types=None):
724*16467b97STreehugger Robot        """
725*16467b97STreehugger Robot        Given a start and stop index, return a list of all tokens in
726*16467b97STreehugger Robot        the token type set.  Return None if no tokens were found.  This
727*16467b97STreehugger Robot        method looks at both on and off channel tokens.
728*16467b97STreehugger Robot        """
729*16467b97STreehugger Robot
730*16467b97STreehugger Robot        if self.p == -1:
731*16467b97STreehugger Robot            self.fillBuffer()
732*16467b97STreehugger Robot
733*16467b97STreehugger Robot        if stop is None or stop > len(self.tokens):
734*16467b97STreehugger Robot            stop = len(self.tokens)
735*16467b97STreehugger Robot
736*16467b97STreehugger Robot        if start is None or start < 0:
737*16467b97STreehugger Robot            start = 0
738*16467b97STreehugger Robot
739*16467b97STreehugger Robot        if start > stop:
740*16467b97STreehugger Robot            return None
741*16467b97STreehugger Robot
742*16467b97STreehugger Robot        if isinstance(types, int):
743*16467b97STreehugger Robot            # called with a single type, wrap into set
744*16467b97STreehugger Robot            types = set([types])
745*16467b97STreehugger Robot
746*16467b97STreehugger Robot        filteredTokens = [
747*16467b97STreehugger Robot            token for token in self.tokens[start:stop]
748*16467b97STreehugger Robot            if types is None or token.type in types
749*16467b97STreehugger Robot            ]
750*16467b97STreehugger Robot
751*16467b97STreehugger Robot        if len(filteredTokens) == 0:
752*16467b97STreehugger Robot            return None
753*16467b97STreehugger Robot
754*16467b97STreehugger Robot        return filteredTokens
755*16467b97STreehugger Robot
756*16467b97STreehugger Robot
757*16467b97STreehugger Robot    def LT(self, k):
758*16467b97STreehugger Robot        """
759*16467b97STreehugger Robot        Get the ith token from the current position 1..n where k=1 is the
760*16467b97STreehugger Robot        first symbol of lookahead.
761*16467b97STreehugger Robot        """
762*16467b97STreehugger Robot
763*16467b97STreehugger Robot        if self.p == -1:
764*16467b97STreehugger Robot            self.fillBuffer()
765*16467b97STreehugger Robot
766*16467b97STreehugger Robot        if k == 0:
767*16467b97STreehugger Robot            return None
768*16467b97STreehugger Robot
769*16467b97STreehugger Robot        if k < 0:
770*16467b97STreehugger Robot            return self.LB(-k)
771*16467b97STreehugger Robot
772*16467b97STreehugger Robot        i = self.p
773*16467b97STreehugger Robot        n = 1
774*16467b97STreehugger Robot        # find k good tokens
775*16467b97STreehugger Robot        while n < k:
776*16467b97STreehugger Robot            # skip off-channel tokens
777*16467b97STreehugger Robot            i = self.skipOffTokenChannels(i + 1) # leave p on valid token
778*16467b97STreehugger Robot            n += 1
779*16467b97STreehugger Robot
780*16467b97STreehugger Robot        if i > self._range:
781*16467b97STreehugger Robot            self._range = i
782*16467b97STreehugger Robot
783*16467b97STreehugger Robot        if i < len(self.tokens):
784*16467b97STreehugger Robot            return self.tokens[i]
785*16467b97STreehugger Robot        else:
786*16467b97STreehugger Robot            return self.makeEOFToken()
787*16467b97STreehugger Robot
788*16467b97STreehugger Robot
789*16467b97STreehugger Robot    def LB(self, k):
790*16467b97STreehugger Robot        """Look backwards k tokens on-channel tokens"""
791*16467b97STreehugger Robot
792*16467b97STreehugger Robot        if self.p == -1:
793*16467b97STreehugger Robot            self.fillBuffer()
794*16467b97STreehugger Robot
795*16467b97STreehugger Robot        if k == 0:
796*16467b97STreehugger Robot            return None
797*16467b97STreehugger Robot
798*16467b97STreehugger Robot        if self.p - k < 0:
799*16467b97STreehugger Robot            return None
800*16467b97STreehugger Robot
801*16467b97STreehugger Robot        i = self.p
802*16467b97STreehugger Robot        n = 1
803*16467b97STreehugger Robot        # find k good tokens looking backwards
804*16467b97STreehugger Robot        while n <= k:
805*16467b97STreehugger Robot            # skip off-channel tokens
806*16467b97STreehugger Robot            i = self.skipOffTokenChannelsReverse(i - 1) # leave p on valid token
807*16467b97STreehugger Robot            n += 1
808*16467b97STreehugger Robot
809*16467b97STreehugger Robot        if i < 0:
810*16467b97STreehugger Robot            return None
811*16467b97STreehugger Robot
812*16467b97STreehugger Robot        return self.tokens[i]
813*16467b97STreehugger Robot
814*16467b97STreehugger Robot
815*16467b97STreehugger Robot    def get(self, i):
816*16467b97STreehugger Robot        """
817*16467b97STreehugger Robot        Return absolute token i; ignore which channel the tokens are on;
818*16467b97STreehugger Robot        that is, count all tokens not just on-channel tokens.
819*16467b97STreehugger Robot        """
820*16467b97STreehugger Robot
821*16467b97STreehugger Robot        return self.tokens[i]
822*16467b97STreehugger Robot
823*16467b97STreehugger Robot
824*16467b97STreehugger Robot    def slice(self, start, stop):
825*16467b97STreehugger Robot        if self.p == -1:
826*16467b97STreehugger Robot            self.fillBuffer()
827*16467b97STreehugger Robot
828*16467b97STreehugger Robot        if start < 0 or stop < 0:
829*16467b97STreehugger Robot            return None
830*16467b97STreehugger Robot
831*16467b97STreehugger Robot        return self.tokens[start:stop + 1]
832*16467b97STreehugger Robot
833*16467b97STreehugger Robot
834*16467b97STreehugger Robot    def LA(self, i):
835*16467b97STreehugger Robot        return self.LT(i).type
836*16467b97STreehugger Robot
837*16467b97STreehugger Robot
838*16467b97STreehugger Robot    def mark(self):
839*16467b97STreehugger Robot        self.lastMarker = self.index()
840*16467b97STreehugger Robot        return self.lastMarker
841*16467b97STreehugger Robot
842*16467b97STreehugger Robot
843*16467b97STreehugger Robot    def release(self, marker=None):
844*16467b97STreehugger Robot        # no resources to release
845*16467b97STreehugger Robot        pass
846*16467b97STreehugger Robot
847*16467b97STreehugger Robot
848*16467b97STreehugger Robot    def size(self):
849*16467b97STreehugger Robot        return len(self.tokens)
850*16467b97STreehugger Robot
851*16467b97STreehugger Robot
852*16467b97STreehugger Robot    def range(self):
853*16467b97STreehugger Robot        return self._range
854*16467b97STreehugger Robot
855*16467b97STreehugger Robot
856*16467b97STreehugger Robot    def index(self):
857*16467b97STreehugger Robot        return self.p
858*16467b97STreehugger Robot
859*16467b97STreehugger Robot
860*16467b97STreehugger Robot    def rewind(self, marker=None):
861*16467b97STreehugger Robot        if marker is None:
862*16467b97STreehugger Robot            marker = self.lastMarker
863*16467b97STreehugger Robot
864*16467b97STreehugger Robot        self.seek(marker)
865*16467b97STreehugger Robot
866*16467b97STreehugger Robot
867*16467b97STreehugger Robot    def seek(self, index):
868*16467b97STreehugger Robot        self.p = index
869*16467b97STreehugger Robot
870*16467b97STreehugger Robot
871*16467b97STreehugger Robot    def getTokenSource(self):
872*16467b97STreehugger Robot        return self.tokenSource
873*16467b97STreehugger Robot
874*16467b97STreehugger Robot
875*16467b97STreehugger Robot    def getSourceName(self):
876*16467b97STreehugger Robot        return self.tokenSource.getSourceName()
877*16467b97STreehugger Robot
878*16467b97STreehugger Robot
879*16467b97STreehugger Robot    def toString(self, start=None, stop=None):
880*16467b97STreehugger Robot        """Returns a string of all tokens between start and stop (inclusive)."""
881*16467b97STreehugger Robot        if self.p == -1:
882*16467b97STreehugger Robot            self.fillBuffer()
883*16467b97STreehugger Robot
884*16467b97STreehugger Robot        if start is None:
885*16467b97STreehugger Robot            start = 0
886*16467b97STreehugger Robot        elif not isinstance(start, int):
887*16467b97STreehugger Robot            start = start.index
888*16467b97STreehugger Robot
889*16467b97STreehugger Robot        if stop is None:
890*16467b97STreehugger Robot            stop = len(self.tokens) - 1
891*16467b97STreehugger Robot        elif not isinstance(stop, int):
892*16467b97STreehugger Robot            stop = stop.index
893*16467b97STreehugger Robot
894*16467b97STreehugger Robot        if stop >= len(self.tokens):
895*16467b97STreehugger Robot            stop = len(self.tokens) - 1
896*16467b97STreehugger Robot
897*16467b97STreehugger Robot        return ''.join([t.text for t in self.tokens[start:stop + 1]])
898*16467b97STreehugger Robot
899*16467b97STreehugger Robot
900*16467b97STreehugger Robotclass RewriteOperation(object):
901*16467b97STreehugger Robot    """@brief Internal helper class."""
902*16467b97STreehugger Robot
903*16467b97STreehugger Robot    def __init__(self, stream, index, text):
904*16467b97STreehugger Robot        self.stream = stream
905*16467b97STreehugger Robot
906*16467b97STreehugger Robot        # What index into rewrites List are we?
907*16467b97STreehugger Robot        self.instructionIndex = None
908*16467b97STreehugger Robot
909*16467b97STreehugger Robot        # Token buffer index.
910*16467b97STreehugger Robot        self.index = index
911*16467b97STreehugger Robot        self.text = text
912*16467b97STreehugger Robot
913*16467b97STreehugger Robot    def execute(self, buf):
914*16467b97STreehugger Robot        """Execute the rewrite operation by possibly adding to the buffer.
915*16467b97STreehugger Robot        Return the index of the next token to operate on.
916*16467b97STreehugger Robot        """
917*16467b97STreehugger Robot
918*16467b97STreehugger Robot        return self.index
919*16467b97STreehugger Robot
920*16467b97STreehugger Robot    def toString(self):
921*16467b97STreehugger Robot        opName = self.__class__.__name__
922*16467b97STreehugger Robot        return '<{opName}@{0.index}:"{0.text}">'.format(self, opName=opName)
923*16467b97STreehugger Robot
924*16467b97STreehugger Robot    __str__ = toString
925*16467b97STreehugger Robot    __repr__ = toString
926*16467b97STreehugger Robot
927*16467b97STreehugger Robot
928*16467b97STreehugger Robotclass InsertBeforeOp(RewriteOperation):
929*16467b97STreehugger Robot    """@brief Internal helper class."""
930*16467b97STreehugger Robot
931*16467b97STreehugger Robot    def execute(self, buf):
932*16467b97STreehugger Robot        buf.write(self.text)
933*16467b97STreehugger Robot        if self.stream.tokens[self.index].type != EOF:
934*16467b97STreehugger Robot            buf.write(self.stream.tokens[self.index].text)
935*16467b97STreehugger Robot        return self.index + 1
936*16467b97STreehugger Robot
937*16467b97STreehugger Robot
938*16467b97STreehugger Robotclass ReplaceOp(RewriteOperation):
939*16467b97STreehugger Robot    """
940*16467b97STreehugger Robot    @brief Internal helper class.
941*16467b97STreehugger Robot
942*16467b97STreehugger Robot    I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
943*16467b97STreehugger Robot    instructions.
944*16467b97STreehugger Robot    """
945*16467b97STreehugger Robot
946*16467b97STreehugger Robot    def __init__(self, stream, first, last, text):
947*16467b97STreehugger Robot        super().__init__(stream, first, text)
948*16467b97STreehugger Robot        self.lastIndex = last
949*16467b97STreehugger Robot
950*16467b97STreehugger Robot
951*16467b97STreehugger Robot    def execute(self, buf):
952*16467b97STreehugger Robot        if self.text is not None:
953*16467b97STreehugger Robot            buf.write(self.text)
954*16467b97STreehugger Robot
955*16467b97STreehugger Robot        return self.lastIndex + 1
956*16467b97STreehugger Robot
957*16467b97STreehugger Robot
958*16467b97STreehugger Robot    def toString(self):
959*16467b97STreehugger Robot        if self.text is None:
960*16467b97STreehugger Robot            return '<DeleteOp@{0.index}..{0.lastindex}>'.format(self)
961*16467b97STreehugger Robot
962*16467b97STreehugger Robot        return '<ReplaceOp@{0.index}..{0.lastIndex}:"{0.text}">'.format(self)
963*16467b97STreehugger Robot
964*16467b97STreehugger Robot    __str__ = toString
965*16467b97STreehugger Robot    __repr__ = toString
966*16467b97STreehugger Robot
967*16467b97STreehugger Robot
968*16467b97STreehugger Robotclass TokenRewriteStream(CommonTokenStream):
969*16467b97STreehugger Robot    """@brief CommonTokenStream that can be modified.
970*16467b97STreehugger Robot
971*16467b97STreehugger Robot    Useful for dumping out the input stream after doing some
972*16467b97STreehugger Robot    augmentation or other manipulations.
973*16467b97STreehugger Robot
974*16467b97STreehugger Robot    You can insert stuff, replace, and delete chunks.  Note that the
975*16467b97STreehugger Robot    operations are done lazily--only if you convert the buffer to a
976*16467b97STreehugger Robot    String.  This is very efficient because you are not moving data around
977*16467b97STreehugger Robot    all the time.  As the buffer of tokens is converted to strings, the
978*16467b97STreehugger Robot    toString() method(s) check to see if there is an operation at the
979*16467b97STreehugger Robot    current index.  If so, the operation is done and then normal String
980*16467b97STreehugger Robot    rendering continues on the buffer.  This is like having multiple Turing
981*16467b97STreehugger Robot    machine instruction streams (programs) operating on a single input tape. :)
982*16467b97STreehugger Robot
983*16467b97STreehugger Robot    Since the operations are done lazily at toString-time, operations do not
984*16467b97STreehugger Robot    screw up the token index values.  That is, an insert operation at token
985*16467b97STreehugger Robot    index i does not change the index values for tokens i+1..n-1.
986*16467b97STreehugger Robot
987*16467b97STreehugger Robot    Because operations never actually alter the buffer, you may always get
988*16467b97STreehugger Robot    the original token stream back without undoing anything.  Since
989*16467b97STreehugger Robot    the instructions are queued up, you can easily simulate transactions and
990*16467b97STreehugger Robot    roll back any changes if there is an error just by removing instructions.
991*16467b97STreehugger Robot    For example,
992*16467b97STreehugger Robot
993*16467b97STreehugger Robot     CharStream input = new ANTLRFileStream("input");
994*16467b97STreehugger Robot     TLexer lex = new TLexer(input);
995*16467b97STreehugger Robot     TokenRewriteStream tokens = new TokenRewriteStream(lex);
996*16467b97STreehugger Robot     T parser = new T(tokens);
997*16467b97STreehugger Robot     parser.startRule();
998*16467b97STreehugger Robot
999*16467b97STreehugger Robot     Then in the rules, you can execute
1000*16467b97STreehugger Robot        Token t,u;
1001*16467b97STreehugger Robot        ...
1002*16467b97STreehugger Robot        input.insertAfter(t, "text to put after t");}
1003*16467b97STreehugger Robot        input.insertAfter(u, "text after u");}
1004*16467b97STreehugger Robot        System.out.println(tokens.toString());
1005*16467b97STreehugger Robot
1006*16467b97STreehugger Robot    Actually, you have to cast the 'input' to a TokenRewriteStream. :(
1007*16467b97STreehugger Robot
1008*16467b97STreehugger Robot    You can also have multiple "instruction streams" and get multiple
1009*16467b97STreehugger Robot    rewrites from a single pass over the input.  Just name the instruction
1010*16467b97STreehugger Robot    streams and use that name again when printing the buffer.  This could be
1011*16467b97STreehugger Robot    useful for generating a C file and also its header file--all from the
1012*16467b97STreehugger Robot    same buffer:
1013*16467b97STreehugger Robot
1014*16467b97STreehugger Robot        tokens.insertAfter("pass1", t, "text to put after t");}
1015*16467b97STreehugger Robot        tokens.insertAfter("pass2", u, "text after u");}
1016*16467b97STreehugger Robot        System.out.println(tokens.toString("pass1"));
1017*16467b97STreehugger Robot        System.out.println(tokens.toString("pass2"));
1018*16467b97STreehugger Robot
1019*16467b97STreehugger Robot    If you don't use named rewrite streams, a "default" stream is used as
1020*16467b97STreehugger Robot    the first example shows.
1021*16467b97STreehugger Robot    """
1022*16467b97STreehugger Robot
1023*16467b97STreehugger Robot    DEFAULT_PROGRAM_NAME = "default"
1024*16467b97STreehugger Robot    MIN_TOKEN_INDEX = 0
1025*16467b97STreehugger Robot
1026*16467b97STreehugger Robot    def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL):
1027*16467b97STreehugger Robot        super().__init__(tokenSource, channel)
1028*16467b97STreehugger Robot
1029*16467b97STreehugger Robot        # You may have multiple, named streams of rewrite operations.
1030*16467b97STreehugger Robot        # I'm calling these things "programs."
1031*16467b97STreehugger Robot        #  Maps String (name) -> rewrite (List)
1032*16467b97STreehugger Robot        self.programs = {}
1033*16467b97STreehugger Robot        self.programs[self.DEFAULT_PROGRAM_NAME] = []
1034*16467b97STreehugger Robot
1035*16467b97STreehugger Robot        # Map String (program name) -> Integer index
1036*16467b97STreehugger Robot        self.lastRewriteTokenIndexes = {}
1037*16467b97STreehugger Robot
1038*16467b97STreehugger Robot
1039*16467b97STreehugger Robot    def rollback(self, *args):
1040*16467b97STreehugger Robot        """
1041*16467b97STreehugger Robot        Rollback the instruction stream for a program so that
1042*16467b97STreehugger Robot        the indicated instruction (via instructionIndex) is no
1043*16467b97STreehugger Robot        longer in the stream.  UNTESTED!
1044*16467b97STreehugger Robot        """
1045*16467b97STreehugger Robot
1046*16467b97STreehugger Robot        if len(args) == 2:
1047*16467b97STreehugger Robot            programName = args[0]
1048*16467b97STreehugger Robot            instructionIndex = args[1]
1049*16467b97STreehugger Robot        elif len(args) == 1:
1050*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1051*16467b97STreehugger Robot            instructionIndex = args[0]
1052*16467b97STreehugger Robot        else:
1053*16467b97STreehugger Robot            raise TypeError("Invalid arguments")
1054*16467b97STreehugger Robot
1055*16467b97STreehugger Robot        p = self.programs.get(programName)
1056*16467b97STreehugger Robot        if p:
1057*16467b97STreehugger Robot            self.programs[programName] = (
1058*16467b97STreehugger Robot                p[self.MIN_TOKEN_INDEX:instructionIndex])
1059*16467b97STreehugger Robot
1060*16467b97STreehugger Robot
1061*16467b97STreehugger Robot    def deleteProgram(self, programName=DEFAULT_PROGRAM_NAME):
1062*16467b97STreehugger Robot        """Reset the program so that no instructions exist"""
1063*16467b97STreehugger Robot
1064*16467b97STreehugger Robot        self.rollback(programName, self.MIN_TOKEN_INDEX)
1065*16467b97STreehugger Robot
1066*16467b97STreehugger Robot
1067*16467b97STreehugger Robot    def insertAfter(self, *args):
1068*16467b97STreehugger Robot        if len(args) == 2:
1069*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1070*16467b97STreehugger Robot            index = args[0]
1071*16467b97STreehugger Robot            text = args[1]
1072*16467b97STreehugger Robot
1073*16467b97STreehugger Robot        elif len(args) == 3:
1074*16467b97STreehugger Robot            programName = args[0]
1075*16467b97STreehugger Robot            index = args[1]
1076*16467b97STreehugger Robot            text = args[2]
1077*16467b97STreehugger Robot
1078*16467b97STreehugger Robot        else:
1079*16467b97STreehugger Robot            raise TypeError("Invalid arguments")
1080*16467b97STreehugger Robot
1081*16467b97STreehugger Robot        if isinstance(index, Token):
1082*16467b97STreehugger Robot            # index is a Token, grap the stream index from it
1083*16467b97STreehugger Robot            index = index.index
1084*16467b97STreehugger Robot
1085*16467b97STreehugger Robot        # to insert after, just insert before next index (even if past end)
1086*16467b97STreehugger Robot        self.insertBefore(programName, index + 1, text)
1087*16467b97STreehugger Robot
1088*16467b97STreehugger Robot
1089*16467b97STreehugger Robot    def insertBefore(self, *args):
1090*16467b97STreehugger Robot        if len(args) == 2:
1091*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1092*16467b97STreehugger Robot            index = args[0]
1093*16467b97STreehugger Robot            text = args[1]
1094*16467b97STreehugger Robot
1095*16467b97STreehugger Robot        elif len(args) == 3:
1096*16467b97STreehugger Robot            programName = args[0]
1097*16467b97STreehugger Robot            index = args[1]
1098*16467b97STreehugger Robot            text = args[2]
1099*16467b97STreehugger Robot
1100*16467b97STreehugger Robot        else:
1101*16467b97STreehugger Robot            raise TypeError("Invalid arguments")
1102*16467b97STreehugger Robot
1103*16467b97STreehugger Robot        if isinstance(index, Token):
1104*16467b97STreehugger Robot            # index is a Token, grab the stream index from it
1105*16467b97STreehugger Robot            index = index.index
1106*16467b97STreehugger Robot
1107*16467b97STreehugger Robot        op = InsertBeforeOp(self, index, text)
1108*16467b97STreehugger Robot        rewrites = self.getProgram(programName)
1109*16467b97STreehugger Robot        op.instructionIndex = len(rewrites)
1110*16467b97STreehugger Robot        rewrites.append(op)
1111*16467b97STreehugger Robot
1112*16467b97STreehugger Robot
1113*16467b97STreehugger Robot    def replace(self, *args):
1114*16467b97STreehugger Robot        if len(args) == 2:
1115*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1116*16467b97STreehugger Robot            first = args[0]
1117*16467b97STreehugger Robot            last = args[0]
1118*16467b97STreehugger Robot            text = args[1]
1119*16467b97STreehugger Robot
1120*16467b97STreehugger Robot        elif len(args) == 3:
1121*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1122*16467b97STreehugger Robot            first = args[0]
1123*16467b97STreehugger Robot            last = args[1]
1124*16467b97STreehugger Robot            text = args[2]
1125*16467b97STreehugger Robot
1126*16467b97STreehugger Robot        elif len(args) == 4:
1127*16467b97STreehugger Robot            programName = args[0]
1128*16467b97STreehugger Robot            first = args[1]
1129*16467b97STreehugger Robot            last = args[2]
1130*16467b97STreehugger Robot            text = args[3]
1131*16467b97STreehugger Robot
1132*16467b97STreehugger Robot        else:
1133*16467b97STreehugger Robot            raise TypeError("Invalid arguments")
1134*16467b97STreehugger Robot
1135*16467b97STreehugger Robot        if isinstance(first, Token):
1136*16467b97STreehugger Robot            # first is a Token, grap the stream index from it
1137*16467b97STreehugger Robot            first = first.index
1138*16467b97STreehugger Robot
1139*16467b97STreehugger Robot        if isinstance(last, Token):
1140*16467b97STreehugger Robot            # last is a Token, grap the stream index from it
1141*16467b97STreehugger Robot            last = last.index
1142*16467b97STreehugger Robot
1143*16467b97STreehugger Robot        if first > last or first < 0 or last < 0 or last >= len(self.tokens):
1144*16467b97STreehugger Robot            raise ValueError(
1145*16467b97STreehugger Robot                "replace: range invalid: {}..{} (size={})"
1146*16467b97STreehugger Robot                .format(first, last, len(self.tokens)))
1147*16467b97STreehugger Robot
1148*16467b97STreehugger Robot        op = ReplaceOp(self, first, last, text)
1149*16467b97STreehugger Robot        rewrites = self.getProgram(programName)
1150*16467b97STreehugger Robot        op.instructionIndex = len(rewrites)
1151*16467b97STreehugger Robot        rewrites.append(op)
1152*16467b97STreehugger Robot
1153*16467b97STreehugger Robot
1154*16467b97STreehugger Robot    def delete(self, *args):
1155*16467b97STreehugger Robot        self.replace(*(list(args) + [None]))
1156*16467b97STreehugger Robot
1157*16467b97STreehugger Robot
1158*16467b97STreehugger Robot    def getLastRewriteTokenIndex(self, programName=DEFAULT_PROGRAM_NAME):
1159*16467b97STreehugger Robot        return self.lastRewriteTokenIndexes.get(programName, -1)
1160*16467b97STreehugger Robot
1161*16467b97STreehugger Robot
1162*16467b97STreehugger Robot    def setLastRewriteTokenIndex(self, programName, i):
1163*16467b97STreehugger Robot        self.lastRewriteTokenIndexes[programName] = i
1164*16467b97STreehugger Robot
1165*16467b97STreehugger Robot
1166*16467b97STreehugger Robot    def getProgram(self, name):
1167*16467b97STreehugger Robot        p = self.programs.get(name)
1168*16467b97STreehugger Robot        if not p:
1169*16467b97STreehugger Robot            p = self.initializeProgram(name)
1170*16467b97STreehugger Robot
1171*16467b97STreehugger Robot        return p
1172*16467b97STreehugger Robot
1173*16467b97STreehugger Robot
1174*16467b97STreehugger Robot    def initializeProgram(self, name):
1175*16467b97STreehugger Robot        p = []
1176*16467b97STreehugger Robot        self.programs[name] = p
1177*16467b97STreehugger Robot        return p
1178*16467b97STreehugger Robot
1179*16467b97STreehugger Robot
1180*16467b97STreehugger Robot    def toOriginalString(self, start=None, end=None):
1181*16467b97STreehugger Robot        if self.p == -1:
1182*16467b97STreehugger Robot            self.fillBuffer()
1183*16467b97STreehugger Robot
1184*16467b97STreehugger Robot        if start is None:
1185*16467b97STreehugger Robot            start = self.MIN_TOKEN_INDEX
1186*16467b97STreehugger Robot        if end is None:
1187*16467b97STreehugger Robot            end = self.size() - 1
1188*16467b97STreehugger Robot
1189*16467b97STreehugger Robot        buf = StringIO()
1190*16467b97STreehugger Robot        i = start
1191*16467b97STreehugger Robot        while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens):
1192*16467b97STreehugger Robot            if self.get(i).type != EOF:
1193*16467b97STreehugger Robot                buf.write(self.get(i).text)
1194*16467b97STreehugger Robot            i += 1
1195*16467b97STreehugger Robot
1196*16467b97STreehugger Robot        return buf.getvalue()
1197*16467b97STreehugger Robot
1198*16467b97STreehugger Robot
1199*16467b97STreehugger Robot    def toString(self, *args):
1200*16467b97STreehugger Robot        if self.p == -1:
1201*16467b97STreehugger Robot            self.fillBuffer()
1202*16467b97STreehugger Robot
1203*16467b97STreehugger Robot        if len(args) == 0:
1204*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1205*16467b97STreehugger Robot            start = self.MIN_TOKEN_INDEX
1206*16467b97STreehugger Robot            end = self.size() - 1
1207*16467b97STreehugger Robot
1208*16467b97STreehugger Robot        elif len(args) == 1:
1209*16467b97STreehugger Robot            programName = args[0]
1210*16467b97STreehugger Robot            start = self.MIN_TOKEN_INDEX
1211*16467b97STreehugger Robot            end = self.size() - 1
1212*16467b97STreehugger Robot
1213*16467b97STreehugger Robot        elif len(args) == 2:
1214*16467b97STreehugger Robot            programName = self.DEFAULT_PROGRAM_NAME
1215*16467b97STreehugger Robot            start = args[0]
1216*16467b97STreehugger Robot            end = args[1]
1217*16467b97STreehugger Robot
1218*16467b97STreehugger Robot        if start is None:
1219*16467b97STreehugger Robot            start = self.MIN_TOKEN_INDEX
1220*16467b97STreehugger Robot        elif not isinstance(start, int):
1221*16467b97STreehugger Robot            start = start.index
1222*16467b97STreehugger Robot
1223*16467b97STreehugger Robot        if end is None:
1224*16467b97STreehugger Robot            end = len(self.tokens) - 1
1225*16467b97STreehugger Robot        elif not isinstance(end, int):
1226*16467b97STreehugger Robot            end = end.index
1227*16467b97STreehugger Robot
1228*16467b97STreehugger Robot        # ensure start/end are in range
1229*16467b97STreehugger Robot        if end >= len(self.tokens):
1230*16467b97STreehugger Robot            end = len(self.tokens) - 1
1231*16467b97STreehugger Robot
1232*16467b97STreehugger Robot        if start < 0:
1233*16467b97STreehugger Robot            start = 0
1234*16467b97STreehugger Robot
1235*16467b97STreehugger Robot        rewrites = self.programs.get(programName)
1236*16467b97STreehugger Robot        if not rewrites:
1237*16467b97STreehugger Robot            # no instructions to execute
1238*16467b97STreehugger Robot            return self.toOriginalString(start, end)
1239*16467b97STreehugger Robot
1240*16467b97STreehugger Robot        buf = StringIO()
1241*16467b97STreehugger Robot
1242*16467b97STreehugger Robot        # First, optimize instruction stream
1243*16467b97STreehugger Robot        indexToOp = self.reduceToSingleOperationPerIndex(rewrites)
1244*16467b97STreehugger Robot
1245*16467b97STreehugger Robot        # Walk buffer, executing instructions and emitting tokens
1246*16467b97STreehugger Robot        i = start
1247*16467b97STreehugger Robot        while i <= end and i < len(self.tokens):
1248*16467b97STreehugger Robot            # remove so any left have index size-1
1249*16467b97STreehugger Robot            op = indexToOp.pop(i, None)
1250*16467b97STreehugger Robot
1251*16467b97STreehugger Robot            t = self.tokens[i]
1252*16467b97STreehugger Robot            if op is None:
1253*16467b97STreehugger Robot                # no operation at that index, just dump token
1254*16467b97STreehugger Robot                if t.type != EOF:
1255*16467b97STreehugger Robot                    buf.write(t.text)
1256*16467b97STreehugger Robot                i += 1 # move to next token
1257*16467b97STreehugger Robot
1258*16467b97STreehugger Robot            else:
1259*16467b97STreehugger Robot                i = op.execute(buf) # execute operation and skip
1260*16467b97STreehugger Robot
1261*16467b97STreehugger Robot        # include stuff after end if it's last index in buffer
1262*16467b97STreehugger Robot        # So, if they did an insertAfter(lastValidIndex, "foo"), include
1263*16467b97STreehugger Robot        # foo if end == lastValidIndex.
1264*16467b97STreehugger Robot        if end == len(self.tokens) - 1:
1265*16467b97STreehugger Robot            # Scan any remaining operations after last token
1266*16467b97STreehugger Robot            # should be included (they will be inserts).
1267*16467b97STreehugger Robot            for i, op in sorted(indexToOp.items()):
1268*16467b97STreehugger Robot                if op.index >= len(self.tokens) - 1:
1269*16467b97STreehugger Robot                    buf.write(op.text)
1270*16467b97STreehugger Robot
1271*16467b97STreehugger Robot        return buf.getvalue()
1272*16467b97STreehugger Robot
1273*16467b97STreehugger Robot    __str__ = toString
1274*16467b97STreehugger Robot
1275*16467b97STreehugger Robot
1276*16467b97STreehugger Robot    def reduceToSingleOperationPerIndex(self, rewrites):
1277*16467b97STreehugger Robot        """
1278*16467b97STreehugger Robot        We need to combine operations and report invalid operations (like
1279*16467b97STreehugger Robot        overlapping replaces that are not completed nested).  Inserts to
1280*16467b97STreehugger Robot        same index need to be combined etc...   Here are the cases:
1281*16467b97STreehugger Robot
1282*16467b97STreehugger Robot        I.i.u I.j.v                           leave alone, nonoverlapping
1283*16467b97STreehugger Robot        I.i.u I.i.v                           combine: Iivu
1284*16467b97STreehugger Robot
1285*16467b97STreehugger Robot        R.i-j.u R.x-y.v | i-j in x-y          delete first R
1286*16467b97STreehugger Robot        R.i-j.u R.i-j.v                       delete first R
1287*16467b97STreehugger Robot        R.i-j.u R.x-y.v | x-y in i-j          ERROR
1288*16467b97STreehugger Robot        R.i-j.u R.x-y.v | boundaries overlap  ERROR
1289*16467b97STreehugger Robot
1290*16467b97STreehugger Robot        Delete special case of replace (text==null):
1291*16467b97STreehugger Robot        D.i-j.u D.x-y.v |                     boundaries overlapcombine to
1292*16467b97STreehugger Robot                                              max(min)..max(right)
1293*16467b97STreehugger Robot
1294*16467b97STreehugger Robot        I.i.u R.x-y.v   |                     i in (x+1)-ydelete I (since
1295*16467b97STreehugger Robot                                              insert before we're not deleting
1296*16467b97STreehugger Robot                                              i)
1297*16467b97STreehugger Robot        I.i.u R.x-y.v   |                     i not in (x+1)-yleave alone,
1298*16467b97STreehugger Robot                                              nonoverlapping
1299*16467b97STreehugger Robot
1300*16467b97STreehugger Robot        R.x-y.v I.i.u   | i in x-y            ERROR
1301*16467b97STreehugger Robot        R.x-y.v I.x.u                         R.x-y.uv (combine, delete I)
1302*16467b97STreehugger Robot        R.x-y.v I.i.u   | i not in x-y        leave alone, nonoverlapping
1303*16467b97STreehugger Robot
1304*16467b97STreehugger Robot        I.i.u = insert u before op @ index i
1305*16467b97STreehugger Robot        R.x-y.u = replace x-y indexed tokens with u
1306*16467b97STreehugger Robot
1307*16467b97STreehugger Robot        First we need to examine replaces.  For any replace op:
1308*16467b97STreehugger Robot
1309*16467b97STreehugger Robot          1. wipe out any insertions before op within that range.
1310*16467b97STreehugger Robot          2. Drop any replace op before that is contained completely within
1311*16467b97STreehugger Robot             that range.
1312*16467b97STreehugger Robot          3. Throw exception upon boundary overlap with any previous replace.
1313*16467b97STreehugger Robot
1314*16467b97STreehugger Robot        Then we can deal with inserts:
1315*16467b97STreehugger Robot
1316*16467b97STreehugger Robot          1. for any inserts to same index, combine even if not adjacent.
1317*16467b97STreehugger Robot          2. for any prior replace with same left boundary, combine this
1318*16467b97STreehugger Robot             insert with replace and delete this replace.
1319*16467b97STreehugger Robot          3. throw exception if index in same range as previous replace
1320*16467b97STreehugger Robot
1321*16467b97STreehugger Robot        Don't actually delete; make op null in list. Easier to walk list.
1322*16467b97STreehugger Robot        Later we can throw as we add to index -> op map.
1323*16467b97STreehugger Robot
1324*16467b97STreehugger Robot        Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
1325*16467b97STreehugger Robot        inserted stuff would be before the replace range.  But, if you
1326*16467b97STreehugger Robot        add tokens in front of a method body '{' and then delete the method
1327*16467b97STreehugger Robot        body, I think the stuff before the '{' you added should disappear too.
1328*16467b97STreehugger Robot
1329*16467b97STreehugger Robot        Return a map from token index to operation.
1330*16467b97STreehugger Robot        """
1331*16467b97STreehugger Robot
1332*16467b97STreehugger Robot        # WALK REPLACES
1333*16467b97STreehugger Robot        for i, rop in enumerate(rewrites):
1334*16467b97STreehugger Robot            if not rop:
1335*16467b97STreehugger Robot                continue
1336*16467b97STreehugger Robot
1337*16467b97STreehugger Robot            if not isinstance(rop, ReplaceOp):
1338*16467b97STreehugger Robot                continue
1339*16467b97STreehugger Robot
1340*16467b97STreehugger Robot            # Wipe prior inserts within range
1341*16467b97STreehugger Robot            for j, iop in self.getKindOfOps(rewrites, InsertBeforeOp, i):
1342*16467b97STreehugger Robot                if iop.index == rop.index:
1343*16467b97STreehugger Robot                    # E.g., insert before 2, delete 2..2; update replace
1344*16467b97STreehugger Robot                    # text to include insert before, kill insert
1345*16467b97STreehugger Robot                    rewrites[iop.instructionIndex] = None
1346*16467b97STreehugger Robot                    rop.text = self.catOpText(iop.text, rop.text)
1347*16467b97STreehugger Robot
1348*16467b97STreehugger Robot                elif iop.index > rop.index and iop.index <= rop.lastIndex:
1349*16467b97STreehugger Robot                    # delete insert as it's a no-op.
1350*16467b97STreehugger Robot                    rewrites[j] = None
1351*16467b97STreehugger Robot
1352*16467b97STreehugger Robot            # Drop any prior replaces contained within
1353*16467b97STreehugger Robot            for j, prevRop in self.getKindOfOps(rewrites, ReplaceOp, i):
1354*16467b97STreehugger Robot                if (prevRop.index >= rop.index
1355*16467b97STreehugger Robot                    and prevRop.lastIndex <= rop.lastIndex):
1356*16467b97STreehugger Robot                    # delete replace as it's a no-op.
1357*16467b97STreehugger Robot                    rewrites[j] = None
1358*16467b97STreehugger Robot                    continue
1359*16467b97STreehugger Robot
1360*16467b97STreehugger Robot                # throw exception unless disjoint or identical
1361*16467b97STreehugger Robot                disjoint = (prevRop.lastIndex < rop.index
1362*16467b97STreehugger Robot                            or prevRop.index > rop.lastIndex)
1363*16467b97STreehugger Robot                same = (prevRop.index == rop.index
1364*16467b97STreehugger Robot                        and prevRop.lastIndex == rop.lastIndex)
1365*16467b97STreehugger Robot
1366*16467b97STreehugger Robot                # Delete special case of replace (text==null):
1367*16467b97STreehugger Robot                # D.i-j.u D.x-y.v| boundaries overlapcombine to
1368*16467b97STreehugger Robot                # max(min)..max(right)
1369*16467b97STreehugger Robot                if prevRop.text is None and rop.text is None and not disjoint:
1370*16467b97STreehugger Robot                    # kill first delete
1371*16467b97STreehugger Robot                    rewrites[prevRop.instructionIndex] = None
1372*16467b97STreehugger Robot
1373*16467b97STreehugger Robot                    rop.index = min(prevRop.index, rop.index)
1374*16467b97STreehugger Robot                    rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex)
1375*16467b97STreehugger Robot
1376*16467b97STreehugger Robot                elif not disjoint and not same:
1377*16467b97STreehugger Robot                    raise ValueError(
1378*16467b97STreehugger Robot                        "replace op boundaries of {} overlap with previous {}"
1379*16467b97STreehugger Robot                        .format(rop, prevRop))
1380*16467b97STreehugger Robot
1381*16467b97STreehugger Robot        # WALK INSERTS
1382*16467b97STreehugger Robot        for i, iop in enumerate(rewrites):
1383*16467b97STreehugger Robot            if iop is None:
1384*16467b97STreehugger Robot                continue
1385*16467b97STreehugger Robot
1386*16467b97STreehugger Robot            if not isinstance(iop, InsertBeforeOp):
1387*16467b97STreehugger Robot                continue
1388*16467b97STreehugger Robot
1389*16467b97STreehugger Robot            # combine current insert with prior if any at same index
1390*16467b97STreehugger Robot            for j, prevIop in self.getKindOfOps(rewrites, InsertBeforeOp, i):
1391*16467b97STreehugger Robot                if prevIop.index == iop.index: # combine objects
1392*16467b97STreehugger Robot                    # convert to strings...we're in process of toString'ing
1393*16467b97STreehugger Robot                    # whole token buffer so no lazy eval issue with any
1394*16467b97STreehugger Robot                    # templates
1395*16467b97STreehugger Robot                    iop.text = self.catOpText(iop.text, prevIop.text)
1396*16467b97STreehugger Robot                    # delete redundant prior insert
1397*16467b97STreehugger Robot                    rewrites[j] = None
1398*16467b97STreehugger Robot
1399*16467b97STreehugger Robot            # look for replaces where iop.index is in range; error
1400*16467b97STreehugger Robot            for j, rop in self.getKindOfOps(rewrites, ReplaceOp, i):
1401*16467b97STreehugger Robot                if iop.index == rop.index:
1402*16467b97STreehugger Robot                    rop.text = self.catOpText(iop.text, rop.text)
1403*16467b97STreehugger Robot                    # delete current insert
1404*16467b97STreehugger Robot                    rewrites[i] = None
1405*16467b97STreehugger Robot                    continue
1406*16467b97STreehugger Robot
1407*16467b97STreehugger Robot                if iop.index >= rop.index and iop.index <= rop.lastIndex:
1408*16467b97STreehugger Robot                    raise ValueError(
1409*16467b97STreehugger Robot                        "insert op {} within boundaries of previous {}"
1410*16467b97STreehugger Robot                        .format(iop, rop))
1411*16467b97STreehugger Robot
1412*16467b97STreehugger Robot        m = {}
1413*16467b97STreehugger Robot        for i, op in enumerate(rewrites):
1414*16467b97STreehugger Robot            if op is None:
1415*16467b97STreehugger Robot                # ignore deleted ops
1416*16467b97STreehugger Robot                continue
1417*16467b97STreehugger Robot
1418*16467b97STreehugger Robot            assert op.index not in m, "should only be one op per index"
1419*16467b97STreehugger Robot            m[op.index] = op
1420*16467b97STreehugger Robot
1421*16467b97STreehugger Robot        return m
1422*16467b97STreehugger Robot
1423*16467b97STreehugger Robot
1424*16467b97STreehugger Robot    def catOpText(self, a, b):
1425*16467b97STreehugger Robot        x = ""
1426*16467b97STreehugger Robot        y = ""
1427*16467b97STreehugger Robot        if a:
1428*16467b97STreehugger Robot            x = a
1429*16467b97STreehugger Robot        if b:
1430*16467b97STreehugger Robot            y = b
1431*16467b97STreehugger Robot        return x + y
1432*16467b97STreehugger Robot
1433*16467b97STreehugger Robot
1434*16467b97STreehugger Robot    def getKindOfOps(self, rewrites, kind, before=None):
1435*16467b97STreehugger Robot        """Get all operations before an index of a particular kind."""
1436*16467b97STreehugger Robot
1437*16467b97STreehugger Robot        if before is None:
1438*16467b97STreehugger Robot            before = len(rewrites)
1439*16467b97STreehugger Robot        elif before > len(rewrites):
1440*16467b97STreehugger Robot            before = len(rewrites)
1441*16467b97STreehugger Robot
1442*16467b97STreehugger Robot        for i, op in enumerate(rewrites[:before]):
1443*16467b97STreehugger Robot            # ignore deleted
1444*16467b97STreehugger Robot            if op and op.__class__ == kind:
1445*16467b97STreehugger Robot                yield i, op
1446*16467b97STreehugger Robot
1447*16467b97STreehugger Robot
1448*16467b97STreehugger Robot    def toDebugString(self, start=None, end=None):
1449*16467b97STreehugger Robot        if start is None:
1450*16467b97STreehugger Robot            start = self.MIN_TOKEN_INDEX
1451*16467b97STreehugger Robot        if end is None:
1452*16467b97STreehugger Robot            end = self.size() - 1
1453*16467b97STreehugger Robot
1454*16467b97STreehugger Robot        buf = StringIO()
1455*16467b97STreehugger Robot        i = start
1456*16467b97STreehugger Robot        while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens):
1457*16467b97STreehugger Robot            buf.write(self.get(i))
1458*16467b97STreehugger Robot            i += 1
1459*16467b97STreehugger Robot
1460*16467b97STreehugger Robot        return buf.getvalue()
1461