1*16467b97STreehugger Robot"""ANTLR3 runtime package""" 2*16467b97STreehugger Robot 3*16467b97STreehugger Robot# begin[licence] 4*16467b97STreehugger Robot# 5*16467b97STreehugger Robot# [The "BSD licence"] 6*16467b97STreehugger Robot# Copyright (c) 2005-2012 Terence Parr 7*16467b97STreehugger Robot# All rights reserved. 8*16467b97STreehugger Robot# 9*16467b97STreehugger Robot# Redistribution and use in source and binary forms, with or without 10*16467b97STreehugger Robot# modification, are permitted provided that the following conditions 11*16467b97STreehugger Robot# are met: 12*16467b97STreehugger Robot# 1. Redistributions of source code must retain the above copyright 13*16467b97STreehugger Robot# notice, this list of conditions and the following disclaimer. 14*16467b97STreehugger Robot# 2. Redistributions in binary form must reproduce the above copyright 15*16467b97STreehugger Robot# notice, this list of conditions and the following disclaimer in the 16*16467b97STreehugger Robot# documentation and/or other materials provided with the distribution. 17*16467b97STreehugger Robot# 3. The name of the author may not be used to endorse or promote products 18*16467b97STreehugger Robot# derived from this software without specific prior written permission. 19*16467b97STreehugger Robot# 20*16467b97STreehugger Robot# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21*16467b97STreehugger Robot# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22*16467b97STreehugger Robot# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23*16467b97STreehugger Robot# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24*16467b97STreehugger Robot# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25*16467b97STreehugger Robot# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26*16467b97STreehugger Robot# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27*16467b97STreehugger Robot# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28*16467b97STreehugger Robot# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29*16467b97STreehugger Robot# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30*16467b97STreehugger Robot# 31*16467b97STreehugger Robot# end[licence] 32*16467b97STreehugger Robot 33*16467b97STreehugger Robotfrom io import StringIO 34*16467b97STreehugger Robot 35*16467b97STreehugger Robotfrom .constants import DEFAULT_CHANNEL, EOF 36*16467b97STreehugger Robotfrom .tokens import Token 37*16467b97STreehugger Robot 38*16467b97STreehugger Robot 39*16467b97STreehugger Robot############################################################################ 40*16467b97STreehugger Robot# 41*16467b97STreehugger Robot# basic interfaces 42*16467b97STreehugger Robot# IntStream 43*16467b97STreehugger Robot# +- CharStream 44*16467b97STreehugger Robot# \- TokenStream 45*16467b97STreehugger Robot# 46*16467b97STreehugger Robot# subclasses must implemented all methods 47*16467b97STreehugger Robot# 48*16467b97STreehugger Robot############################################################################ 49*16467b97STreehugger Robot 50*16467b97STreehugger Robotclass IntStream(object): 51*16467b97STreehugger Robot """ 52*16467b97STreehugger Robot @brief Base interface for streams of integer values. 53*16467b97STreehugger Robot 54*16467b97STreehugger Robot A simple stream of integers used when all I care about is the char 55*16467b97STreehugger Robot or token type sequence (such as interpretation). 56*16467b97STreehugger Robot """ 57*16467b97STreehugger Robot 58*16467b97STreehugger Robot def consume(self): 59*16467b97STreehugger Robot raise NotImplementedError 60*16467b97STreehugger Robot 61*16467b97STreehugger Robot 62*16467b97STreehugger Robot def LA(self, i): 63*16467b97STreehugger Robot """Get int at current input pointer + i ahead where i=1 is next int. 64*16467b97STreehugger Robot 65*16467b97STreehugger Robot Negative indexes are allowed. LA(-1) is previous token (token 66*16467b97STreehugger Robot just matched). LA(-i) where i is before first token should 67*16467b97STreehugger Robot yield -1, invalid char / EOF. 68*16467b97STreehugger Robot """ 69*16467b97STreehugger Robot 70*16467b97STreehugger Robot raise NotImplementedError 71*16467b97STreehugger Robot 72*16467b97STreehugger Robot 73*16467b97STreehugger Robot def mark(self): 74*16467b97STreehugger Robot """ 75*16467b97STreehugger Robot Tell the stream to start buffering if it hasn't already. Return 76*16467b97STreehugger Robot current input position, index(), or some other marker so that 77*16467b97STreehugger Robot when passed to rewind() you get back to the same spot. 78*16467b97STreehugger Robot rewind(mark()) should not affect the input cursor. The Lexer 79*16467b97STreehugger Robot track line/col info as well as input index so its markers are 80*16467b97STreehugger Robot not pure input indexes. Same for tree node streams. 81*16467b97STreehugger Robot """ 82*16467b97STreehugger Robot 83*16467b97STreehugger Robot raise NotImplementedError 84*16467b97STreehugger Robot 85*16467b97STreehugger Robot 86*16467b97STreehugger Robot def index(self): 87*16467b97STreehugger Robot """ 88*16467b97STreehugger Robot Return the current input symbol index 0..n where n indicates the 89*16467b97STreehugger Robot last symbol has been read. The index is the symbol about to be 90*16467b97STreehugger Robot read not the most recently read symbol. 91*16467b97STreehugger Robot """ 92*16467b97STreehugger Robot 93*16467b97STreehugger Robot raise NotImplementedError 94*16467b97STreehugger Robot 95*16467b97STreehugger Robot 96*16467b97STreehugger Robot def rewind(self, marker=None): 97*16467b97STreehugger Robot """ 98*16467b97STreehugger Robot Reset the stream so that next call to index would return marker. 99*16467b97STreehugger Robot The marker will usually be index() but it doesn't have to be. It's 100*16467b97STreehugger Robot just a marker to indicate what state the stream was in. This is 101*16467b97STreehugger Robot essentially calling release() and seek(). If there are markers 102*16467b97STreehugger Robot created after this marker argument, this routine must unroll them 103*16467b97STreehugger Robot like a stack. Assume the state the stream was in when this marker 104*16467b97STreehugger Robot was created. 105*16467b97STreehugger Robot 106*16467b97STreehugger Robot If marker is None: 107*16467b97STreehugger Robot Rewind to the input position of the last marker. 108*16467b97STreehugger Robot Used currently only after a cyclic DFA and just 109*16467b97STreehugger Robot before starting a sem/syn predicate to get the 110*16467b97STreehugger Robot input position back to the start of the decision. 111*16467b97STreehugger Robot Do not "pop" the marker off the state. mark(i) 112*16467b97STreehugger Robot and rewind(i) should balance still. It is 113*16467b97STreehugger Robot like invoking rewind(last marker) but it should not "pop" 114*16467b97STreehugger Robot the marker off. It's like seek(last marker's input position). 115*16467b97STreehugger Robot """ 116*16467b97STreehugger Robot 117*16467b97STreehugger Robot raise NotImplementedError 118*16467b97STreehugger Robot 119*16467b97STreehugger Robot 120*16467b97STreehugger Robot def release(self, marker=None): 121*16467b97STreehugger Robot """ 122*16467b97STreehugger Robot You may want to commit to a backtrack but don't want to force the 123*16467b97STreehugger Robot stream to keep bookkeeping objects around for a marker that is 124*16467b97STreehugger Robot no longer necessary. This will have the same behavior as 125*16467b97STreehugger Robot rewind() except it releases resources without the backward seek. 126*16467b97STreehugger Robot This must throw away resources for all markers back to the marker 127*16467b97STreehugger Robot argument. So if you're nested 5 levels of mark(), and then release(2) 128*16467b97STreehugger Robot you have to release resources for depths 2..5. 129*16467b97STreehugger Robot """ 130*16467b97STreehugger Robot 131*16467b97STreehugger Robot raise NotImplementedError 132*16467b97STreehugger Robot 133*16467b97STreehugger Robot 134*16467b97STreehugger Robot def seek(self, index): 135*16467b97STreehugger Robot """ 136*16467b97STreehugger Robot Set the input cursor to the position indicated by index. This is 137*16467b97STreehugger Robot normally used to seek ahead in the input stream. No buffering is 138*16467b97STreehugger Robot required to do this unless you know your stream will use seek to 139*16467b97STreehugger Robot move backwards such as when backtracking. 140*16467b97STreehugger Robot 141*16467b97STreehugger Robot This is different from rewind in its multi-directional 142*16467b97STreehugger Robot requirement and in that its argument is strictly an input cursor 143*16467b97STreehugger Robot (index). 144*16467b97STreehugger Robot 145*16467b97STreehugger Robot For char streams, seeking forward must update the stream state such 146*16467b97STreehugger Robot as line number. For seeking backwards, you will be presumably 147*16467b97STreehugger Robot backtracking using the mark/rewind mechanism that restores state and 148*16467b97STreehugger Robot so this method does not need to update state when seeking backwards. 149*16467b97STreehugger Robot 150*16467b97STreehugger Robot Currently, this method is only used for efficient backtracking using 151*16467b97STreehugger Robot memoization, but in the future it may be used for incremental parsing. 152*16467b97STreehugger Robot 153*16467b97STreehugger Robot The index is 0..n-1. A seek to position i means that LA(1) will 154*16467b97STreehugger Robot return the ith symbol. So, seeking to 0 means LA(1) will return the 155*16467b97STreehugger Robot first element in the stream. 156*16467b97STreehugger Robot """ 157*16467b97STreehugger Robot 158*16467b97STreehugger Robot raise NotImplementedError 159*16467b97STreehugger Robot 160*16467b97STreehugger Robot 161*16467b97STreehugger Robot def size(self): 162*16467b97STreehugger Robot """ 163*16467b97STreehugger Robot Only makes sense for streams that buffer everything up probably, but 164*16467b97STreehugger Robot might be useful to display the entire stream or for testing. This 165*16467b97STreehugger Robot value includes a single EOF. 166*16467b97STreehugger Robot """ 167*16467b97STreehugger Robot 168*16467b97STreehugger Robot raise NotImplementedError 169*16467b97STreehugger Robot 170*16467b97STreehugger Robot 171*16467b97STreehugger Robot def getSourceName(self): 172*16467b97STreehugger Robot """ 173*16467b97STreehugger Robot Where are you getting symbols from? Normally, implementations will 174*16467b97STreehugger Robot pass the buck all the way to the lexer who can ask its input stream 175*16467b97STreehugger Robot for the file name or whatever. 176*16467b97STreehugger Robot """ 177*16467b97STreehugger Robot 178*16467b97STreehugger Robot raise NotImplementedError 179*16467b97STreehugger Robot 180*16467b97STreehugger Robot 181*16467b97STreehugger Robotclass CharStream(IntStream): 182*16467b97STreehugger Robot """ 183*16467b97STreehugger Robot @brief A source of characters for an ANTLR lexer. 184*16467b97STreehugger Robot 185*16467b97STreehugger Robot This is an abstract class that must be implemented by a subclass. 186*16467b97STreehugger Robot 187*16467b97STreehugger Robot """ 188*16467b97STreehugger Robot 189*16467b97STreehugger Robot # pylint does not realize that this is an interface, too 190*16467b97STreehugger Robot #pylint: disable-msg=W0223 191*16467b97STreehugger Robot 192*16467b97STreehugger Robot EOF = -1 193*16467b97STreehugger Robot 194*16467b97STreehugger Robot def __init__(self): 195*16467b97STreehugger Robot # line number 1..n within the input 196*16467b97STreehugger Robot self._line = 1 197*16467b97STreehugger Robot 198*16467b97STreehugger Robot # The index of the character relative to the beginning of the 199*16467b97STreehugger Robot # line 0..n-1 200*16467b97STreehugger Robot self._charPositionInLine = 0 201*16467b97STreehugger Robot 202*16467b97STreehugger Robot 203*16467b97STreehugger Robot def substring(self, start, stop): 204*16467b97STreehugger Robot """ 205*16467b97STreehugger Robot For infinite streams, you don't need this; primarily I'm providing 206*16467b97STreehugger Robot a useful interface for action code. Just make sure actions don't 207*16467b97STreehugger Robot use this on streams that don't support it. 208*16467b97STreehugger Robot """ 209*16467b97STreehugger Robot 210*16467b97STreehugger Robot raise NotImplementedError 211*16467b97STreehugger Robot 212*16467b97STreehugger Robot 213*16467b97STreehugger Robot def LT(self, i): 214*16467b97STreehugger Robot """ 215*16467b97STreehugger Robot Get the ith character of lookahead. This is the same usually as 216*16467b97STreehugger Robot LA(i). This will be used for labels in the generated 217*16467b97STreehugger Robot lexer code. I'd prefer to return a char here type-wise, but it's 218*16467b97STreehugger Robot probably better to be 32-bit clean and be consistent with LA. 219*16467b97STreehugger Robot """ 220*16467b97STreehugger Robot 221*16467b97STreehugger Robot raise NotImplementedError 222*16467b97STreehugger Robot 223*16467b97STreehugger Robot 224*16467b97STreehugger Robot @property 225*16467b97STreehugger Robot def line(self): 226*16467b97STreehugger Robot """ANTLR tracks the line information automatically""" 227*16467b97STreehugger Robot return self._line 228*16467b97STreehugger Robot 229*16467b97STreehugger Robot @line.setter 230*16467b97STreehugger Robot def line(self, value): 231*16467b97STreehugger Robot """ 232*16467b97STreehugger Robot Because this stream can rewind, we need to be able to reset the line 233*16467b97STreehugger Robot """ 234*16467b97STreehugger Robot self._line = value 235*16467b97STreehugger Robot 236*16467b97STreehugger Robot 237*16467b97STreehugger Robot @property 238*16467b97STreehugger Robot def charPositionInLine(self): 239*16467b97STreehugger Robot """ 240*16467b97STreehugger Robot The index of the character relative to the beginning of the line 0..n-1 241*16467b97STreehugger Robot """ 242*16467b97STreehugger Robot return self._charPositionInLine 243*16467b97STreehugger Robot 244*16467b97STreehugger Robot @charPositionInLine.setter 245*16467b97STreehugger Robot def charPositionInLine(self, pos): 246*16467b97STreehugger Robot self._charPositionInLine = pos 247*16467b97STreehugger Robot 248*16467b97STreehugger Robot 249*16467b97STreehugger Robotclass TokenStream(IntStream): 250*16467b97STreehugger Robot """ 251*16467b97STreehugger Robot 252*16467b97STreehugger Robot @brief A stream of tokens accessing tokens from a TokenSource 253*16467b97STreehugger Robot 254*16467b97STreehugger Robot This is an abstract class that must be implemented by a subclass. 255*16467b97STreehugger Robot 256*16467b97STreehugger Robot """ 257*16467b97STreehugger Robot 258*16467b97STreehugger Robot # pylint does not realize that this is an interface, too 259*16467b97STreehugger Robot #pylint: disable-msg=W0223 260*16467b97STreehugger Robot 261*16467b97STreehugger Robot def LT(self, k): 262*16467b97STreehugger Robot """ 263*16467b97STreehugger Robot Get Token at current input pointer + i ahead where i=1 is next Token. 264*16467b97STreehugger Robot i<0 indicates tokens in the past. So -1 is previous token and -2 is 265*16467b97STreehugger Robot two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. 266*16467b97STreehugger Robot Return null for LT(0) and any index that results in an absolute address 267*16467b97STreehugger Robot that is negative. 268*16467b97STreehugger Robot """ 269*16467b97STreehugger Robot 270*16467b97STreehugger Robot raise NotImplementedError 271*16467b97STreehugger Robot 272*16467b97STreehugger Robot 273*16467b97STreehugger Robot def range(self): 274*16467b97STreehugger Robot """ 275*16467b97STreehugger Robot How far ahead has the stream been asked to look? The return 276*16467b97STreehugger Robot value is a valid index from 0..n-1. 277*16467b97STreehugger Robot """ 278*16467b97STreehugger Robot 279*16467b97STreehugger Robot raise NotImplementedError 280*16467b97STreehugger Robot 281*16467b97STreehugger Robot 282*16467b97STreehugger Robot def get(self, i): 283*16467b97STreehugger Robot """ 284*16467b97STreehugger Robot Get a token at an absolute index i; 0..n-1. This is really only 285*16467b97STreehugger Robot needed for profiling and debugging and token stream rewriting. 286*16467b97STreehugger Robot If you don't want to buffer up tokens, then this method makes no 287*16467b97STreehugger Robot sense for you. Naturally you can't use the rewrite stream feature. 288*16467b97STreehugger Robot I believe DebugTokenStream can easily be altered to not use 289*16467b97STreehugger Robot this method, removing the dependency. 290*16467b97STreehugger Robot """ 291*16467b97STreehugger Robot 292*16467b97STreehugger Robot raise NotImplementedError 293*16467b97STreehugger Robot 294*16467b97STreehugger Robot 295*16467b97STreehugger Robot def getTokenSource(self): 296*16467b97STreehugger Robot """ 297*16467b97STreehugger Robot Where is this stream pulling tokens from? This is not the name, but 298*16467b97STreehugger Robot the object that provides Token objects. 299*16467b97STreehugger Robot """ 300*16467b97STreehugger Robot 301*16467b97STreehugger Robot raise NotImplementedError 302*16467b97STreehugger Robot 303*16467b97STreehugger Robot 304*16467b97STreehugger Robot def toString(self, start=None, stop=None): 305*16467b97STreehugger Robot """ 306*16467b97STreehugger Robot Return the text of all tokens from start to stop, inclusive. 307*16467b97STreehugger Robot If the stream does not buffer all the tokens then it can just 308*16467b97STreehugger Robot return "" or null; Users should not access $ruleLabel.text in 309*16467b97STreehugger Robot an action of course in that case. 310*16467b97STreehugger Robot 311*16467b97STreehugger Robot Because the user is not required to use a token with an index stored 312*16467b97STreehugger Robot in it, we must provide a means for two token objects themselves to 313*16467b97STreehugger Robot indicate the start/end location. Most often this will just delegate 314*16467b97STreehugger Robot to the other toString(int,int). This is also parallel with 315*16467b97STreehugger Robot the TreeNodeStream.toString(Object,Object). 316*16467b97STreehugger Robot """ 317*16467b97STreehugger Robot 318*16467b97STreehugger Robot raise NotImplementedError 319*16467b97STreehugger Robot 320*16467b97STreehugger Robot 321*16467b97STreehugger Robot############################################################################ 322*16467b97STreehugger Robot# 323*16467b97STreehugger Robot# character streams for use in lexers 324*16467b97STreehugger Robot# CharStream 325*16467b97STreehugger Robot# \- ANTLRStringStream 326*16467b97STreehugger Robot# 327*16467b97STreehugger Robot############################################################################ 328*16467b97STreehugger Robot 329*16467b97STreehugger Robot 330*16467b97STreehugger Robotclass ANTLRStringStream(CharStream): 331*16467b97STreehugger Robot """ 332*16467b97STreehugger Robot @brief CharStream that pull data from a unicode string. 333*16467b97STreehugger Robot 334*16467b97STreehugger Robot A pretty quick CharStream that pulls all data from an array 335*16467b97STreehugger Robot directly. Every method call counts in the lexer. 336*16467b97STreehugger Robot 337*16467b97STreehugger Robot """ 338*16467b97STreehugger Robot 339*16467b97STreehugger Robot 340*16467b97STreehugger Robot def __init__(self, data): 341*16467b97STreehugger Robot """ 342*16467b97STreehugger Robot @param data This should be a unicode string holding the data you want 343*16467b97STreehugger Robot to parse. If you pass in a byte string, the Lexer will choke on 344*16467b97STreehugger Robot non-ascii data. 345*16467b97STreehugger Robot """ 346*16467b97STreehugger Robot 347*16467b97STreehugger Robot super().__init__() 348*16467b97STreehugger Robot 349*16467b97STreehugger Robot # The data being scanned 350*16467b97STreehugger Robot self.strdata = str(data) 351*16467b97STreehugger Robot self.data = [ord(c) for c in self.strdata] 352*16467b97STreehugger Robot 353*16467b97STreehugger Robot # How many characters are actually in the buffer 354*16467b97STreehugger Robot self.n = len(data) 355*16467b97STreehugger Robot 356*16467b97STreehugger Robot # 0..n-1 index into string of next char 357*16467b97STreehugger Robot self.p = 0 358*16467b97STreehugger Robot 359*16467b97STreehugger Robot # A list of CharStreamState objects that tracks the stream state 360*16467b97STreehugger Robot # values line, charPositionInLine, and p that can change as you 361*16467b97STreehugger Robot # move through the input stream. Indexed from 0..markDepth-1. 362*16467b97STreehugger Robot self._markers = [ ] 363*16467b97STreehugger Robot self.lastMarker = None 364*16467b97STreehugger Robot self.markDepth = 0 365*16467b97STreehugger Robot 366*16467b97STreehugger Robot # What is name or source of this char stream? 367*16467b97STreehugger Robot self.name = None 368*16467b97STreehugger Robot 369*16467b97STreehugger Robot 370*16467b97STreehugger Robot def reset(self): 371*16467b97STreehugger Robot """ 372*16467b97STreehugger Robot Reset the stream so that it's in the same state it was 373*16467b97STreehugger Robot when the object was created *except* the data array is not 374*16467b97STreehugger Robot touched. 375*16467b97STreehugger Robot """ 376*16467b97STreehugger Robot 377*16467b97STreehugger Robot self.p = 0 378*16467b97STreehugger Robot self._line = 1 379*16467b97STreehugger Robot self.charPositionInLine = 0 380*16467b97STreehugger Robot self._markers = [ ] 381*16467b97STreehugger Robot self.lastMarker = None 382*16467b97STreehugger Robot self.markDepth = 0 383*16467b97STreehugger Robot 384*16467b97STreehugger Robot 385*16467b97STreehugger Robot def consume(self): 386*16467b97STreehugger Robot if self.p < self.n: 387*16467b97STreehugger Robot if self.data[self.p] == 10: # ord('\n') 388*16467b97STreehugger Robot self._line += 1 389*16467b97STreehugger Robot self.charPositionInLine = 0 390*16467b97STreehugger Robot else: 391*16467b97STreehugger Robot self.charPositionInLine += 1 392*16467b97STreehugger Robot 393*16467b97STreehugger Robot self.p += 1 394*16467b97STreehugger Robot 395*16467b97STreehugger Robot # else we reached EOF 396*16467b97STreehugger Robot # just do nothing 397*16467b97STreehugger Robot 398*16467b97STreehugger Robot 399*16467b97STreehugger Robot def LA(self, i): 400*16467b97STreehugger Robot if i == 0: 401*16467b97STreehugger Robot return 0 # undefined 402*16467b97STreehugger Robot 403*16467b97STreehugger Robot if i < 0: 404*16467b97STreehugger Robot i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1] 405*16467b97STreehugger Robot 406*16467b97STreehugger Robot if self.p + i - 1 < self.n: 407*16467b97STreehugger Robot return self.data[self.p + i - 1] 408*16467b97STreehugger Robot else: 409*16467b97STreehugger Robot return EOF 410*16467b97STreehugger Robot 411*16467b97STreehugger Robot 412*16467b97STreehugger Robot 413*16467b97STreehugger Robot def LT(self, i): 414*16467b97STreehugger Robot if i == 0: 415*16467b97STreehugger Robot return 0 # undefined 416*16467b97STreehugger Robot 417*16467b97STreehugger Robot if i < 0: 418*16467b97STreehugger Robot i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1] 419*16467b97STreehugger Robot 420*16467b97STreehugger Robot if self.p + i - 1 < self.n: 421*16467b97STreehugger Robot return self.strdata[self.p + i - 1] 422*16467b97STreehugger Robot else: 423*16467b97STreehugger Robot return EOF 424*16467b97STreehugger Robot 425*16467b97STreehugger Robot 426*16467b97STreehugger Robot def index(self): 427*16467b97STreehugger Robot """ 428*16467b97STreehugger Robot Return the current input symbol index 0..n where n indicates the 429*16467b97STreehugger Robot last symbol has been read. The index is the index of char to 430*16467b97STreehugger Robot be returned from LA(1). 431*16467b97STreehugger Robot """ 432*16467b97STreehugger Robot 433*16467b97STreehugger Robot return self.p 434*16467b97STreehugger Robot 435*16467b97STreehugger Robot 436*16467b97STreehugger Robot def size(self): 437*16467b97STreehugger Robot return self.n 438*16467b97STreehugger Robot 439*16467b97STreehugger Robot 440*16467b97STreehugger Robot def mark(self): 441*16467b97STreehugger Robot state = (self.p, self.line, self.charPositionInLine) 442*16467b97STreehugger Robot if self.markDepth < len(self._markers): 443*16467b97STreehugger Robot self._markers[self.markDepth] = state 444*16467b97STreehugger Robot else: 445*16467b97STreehugger Robot self._markers.append(state) 446*16467b97STreehugger Robot self.markDepth += 1 447*16467b97STreehugger Robot 448*16467b97STreehugger Robot self.lastMarker = self.markDepth 449*16467b97STreehugger Robot 450*16467b97STreehugger Robot return self.lastMarker 451*16467b97STreehugger Robot 452*16467b97STreehugger Robot 453*16467b97STreehugger Robot def rewind(self, marker=None): 454*16467b97STreehugger Robot if marker is None: 455*16467b97STreehugger Robot marker = self.lastMarker 456*16467b97STreehugger Robot 457*16467b97STreehugger Robot p, line, charPositionInLine = self._markers[marker - 1] 458*16467b97STreehugger Robot 459*16467b97STreehugger Robot self.seek(p) 460*16467b97STreehugger Robot self._line = line 461*16467b97STreehugger Robot self.charPositionInLine = charPositionInLine 462*16467b97STreehugger Robot self.release(marker) 463*16467b97STreehugger Robot 464*16467b97STreehugger Robot 465*16467b97STreehugger Robot def release(self, marker=None): 466*16467b97STreehugger Robot if marker is None: 467*16467b97STreehugger Robot marker = self.lastMarker 468*16467b97STreehugger Robot 469*16467b97STreehugger Robot self.markDepth = marker - 1 470*16467b97STreehugger Robot 471*16467b97STreehugger Robot 472*16467b97STreehugger Robot def seek(self, index): 473*16467b97STreehugger Robot """ 474*16467b97STreehugger Robot consume() ahead until p==index; can't just set p=index as we must 475*16467b97STreehugger Robot update line and charPositionInLine. 476*16467b97STreehugger Robot """ 477*16467b97STreehugger Robot 478*16467b97STreehugger Robot if index <= self.p: 479*16467b97STreehugger Robot self.p = index # just jump; don't update stream state (line, ...) 480*16467b97STreehugger Robot return 481*16467b97STreehugger Robot 482*16467b97STreehugger Robot # seek forward, consume until p hits index 483*16467b97STreehugger Robot while self.p < index: 484*16467b97STreehugger Robot self.consume() 485*16467b97STreehugger Robot 486*16467b97STreehugger Robot 487*16467b97STreehugger Robot def substring(self, start, stop): 488*16467b97STreehugger Robot return self.strdata[start:stop + 1] 489*16467b97STreehugger Robot 490*16467b97STreehugger Robot 491*16467b97STreehugger Robot def getSourceName(self): 492*16467b97STreehugger Robot return self.name 493*16467b97STreehugger Robot 494*16467b97STreehugger Robot 495*16467b97STreehugger Robotclass ANTLRFileStream(ANTLRStringStream): 496*16467b97STreehugger Robot """ 497*16467b97STreehugger Robot @brief CharStream that opens a file to read the data. 498*16467b97STreehugger Robot 499*16467b97STreehugger Robot This is a char buffer stream that is loaded from a file 500*16467b97STreehugger Robot all at once when you construct the object. 501*16467b97STreehugger Robot """ 502*16467b97STreehugger Robot 503*16467b97STreehugger Robot def __init__(self, fileName): 504*16467b97STreehugger Robot """ 505*16467b97STreehugger Robot @param fileName The path to the file to be opened. The file will be 506*16467b97STreehugger Robot opened with mode 'r'. 507*16467b97STreehugger Robot 508*16467b97STreehugger Robot """ 509*16467b97STreehugger Robot 510*16467b97STreehugger Robot self._fileName = fileName 511*16467b97STreehugger Robot 512*16467b97STreehugger Robot with open(fileName, 'r') as fp: 513*16467b97STreehugger Robot super().__init__(fp.read()) 514*16467b97STreehugger Robot 515*16467b97STreehugger Robot 516*16467b97STreehugger Robot @property 517*16467b97STreehugger Robot def fileName(self): 518*16467b97STreehugger Robot return self._fileName 519*16467b97STreehugger Robot 520*16467b97STreehugger Robot 521*16467b97STreehugger Robotclass ANTLRInputStream(ANTLRStringStream): 522*16467b97STreehugger Robot """ 523*16467b97STreehugger Robot @brief CharStream that reads data from a file-like object. 524*16467b97STreehugger Robot 525*16467b97STreehugger Robot This is a char buffer stream that is loaded from a file like object 526*16467b97STreehugger Robot all at once when you construct the object. 527*16467b97STreehugger Robot 528*16467b97STreehugger Robot All input is consumed from the file, but it is not closed. 529*16467b97STreehugger Robot """ 530*16467b97STreehugger Robot 531*16467b97STreehugger Robot def __init__(self, file): 532*16467b97STreehugger Robot """ 533*16467b97STreehugger Robot @param file A file-like object holding your input. Only the read() 534*16467b97STreehugger Robot method must be implemented. 535*16467b97STreehugger Robot 536*16467b97STreehugger Robot """ 537*16467b97STreehugger Robot 538*16467b97STreehugger Robot data = file.read() 539*16467b97STreehugger Robot 540*16467b97STreehugger Robot super().__init__(data) 541*16467b97STreehugger Robot 542*16467b97STreehugger Robot 543*16467b97STreehugger Robot# I guess the ANTLR prefix exists only to avoid a name clash with some Java 544*16467b97STreehugger Robot# mumbojumbo. A plain "StringStream" looks better to me, which should be 545*16467b97STreehugger Robot# the preferred name in Python. 546*16467b97STreehugger RobotStringStream = ANTLRStringStream 547*16467b97STreehugger RobotFileStream = ANTLRFileStream 548*16467b97STreehugger RobotInputStream = ANTLRInputStream 549*16467b97STreehugger Robot 550*16467b97STreehugger Robot 551*16467b97STreehugger Robot############################################################################ 552*16467b97STreehugger Robot# 553*16467b97STreehugger Robot# Token streams 554*16467b97STreehugger Robot# TokenStream 555*16467b97STreehugger Robot# +- CommonTokenStream 556*16467b97STreehugger Robot# \- TokenRewriteStream 557*16467b97STreehugger Robot# 558*16467b97STreehugger Robot############################################################################ 559*16467b97STreehugger Robot 560*16467b97STreehugger Robot 561*16467b97STreehugger Robotclass CommonTokenStream(TokenStream): 562*16467b97STreehugger Robot """ 563*16467b97STreehugger Robot @brief The most common stream of tokens 564*16467b97STreehugger Robot 565*16467b97STreehugger Robot The most common stream of tokens is one where every token is buffered up 566*16467b97STreehugger Robot and tokens are prefiltered for a certain channel (the parser will only 567*16467b97STreehugger Robot see these tokens and cannot change the filter channel number during the 568*16467b97STreehugger Robot parse). 569*16467b97STreehugger Robot """ 570*16467b97STreehugger Robot 571*16467b97STreehugger Robot def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): 572*16467b97STreehugger Robot """ 573*16467b97STreehugger Robot @param tokenSource A TokenSource instance (usually a Lexer) to pull 574*16467b97STreehugger Robot the tokens from. 575*16467b97STreehugger Robot 576*16467b97STreehugger Robot @param channel Skip tokens on any channel but this one; this is how we 577*16467b97STreehugger Robot skip whitespace... 578*16467b97STreehugger Robot 579*16467b97STreehugger Robot """ 580*16467b97STreehugger Robot 581*16467b97STreehugger Robot super().__init__() 582*16467b97STreehugger Robot 583*16467b97STreehugger Robot self.tokenSource = tokenSource 584*16467b97STreehugger Robot 585*16467b97STreehugger Robot # Record every single token pulled from the source so we can reproduce 586*16467b97STreehugger Robot # chunks of it later. 587*16467b97STreehugger Robot self.tokens = [] 588*16467b97STreehugger Robot 589*16467b97STreehugger Robot # Map<tokentype, channel> to override some Tokens' channel numbers 590*16467b97STreehugger Robot self.channelOverrideMap = {} 591*16467b97STreehugger Robot 592*16467b97STreehugger Robot # Set<tokentype>; discard any tokens with this type 593*16467b97STreehugger Robot self.discardSet = set() 594*16467b97STreehugger Robot 595*16467b97STreehugger Robot # Skip tokens on any channel but this one; this is how we skip 596*16467b97STreehugger Robot # whitespace... 597*16467b97STreehugger Robot self.channel = channel 598*16467b97STreehugger Robot 599*16467b97STreehugger Robot # By default, track all incoming tokens 600*16467b97STreehugger Robot self.discardOffChannelTokens = False 601*16467b97STreehugger Robot 602*16467b97STreehugger Robot # The index into the tokens list of the current token (next token 603*16467b97STreehugger Robot # to consume). p==-1 indicates that the tokens list is empty 604*16467b97STreehugger Robot self.p = -1 605*16467b97STreehugger Robot 606*16467b97STreehugger Robot # Remember last marked position 607*16467b97STreehugger Robot self.lastMarker = None 608*16467b97STreehugger Robot 609*16467b97STreehugger Robot # how deep have we gone? 610*16467b97STreehugger Robot self._range = -1 611*16467b97STreehugger Robot 612*16467b97STreehugger Robot 613*16467b97STreehugger Robot def makeEOFToken(self): 614*16467b97STreehugger Robot return self.tokenSource.makeEOFToken() 615*16467b97STreehugger Robot 616*16467b97STreehugger Robot 617*16467b97STreehugger Robot def setTokenSource(self, tokenSource): 618*16467b97STreehugger Robot """Reset this token stream by setting its token source.""" 619*16467b97STreehugger Robot 620*16467b97STreehugger Robot self.tokenSource = tokenSource 621*16467b97STreehugger Robot self.tokens = [] 622*16467b97STreehugger Robot self.p = -1 623*16467b97STreehugger Robot self.channel = DEFAULT_CHANNEL 624*16467b97STreehugger Robot 625*16467b97STreehugger Robot 626*16467b97STreehugger Robot def reset(self): 627*16467b97STreehugger Robot self.p = 0 628*16467b97STreehugger Robot self.lastMarker = None 629*16467b97STreehugger Robot 630*16467b97STreehugger Robot 631*16467b97STreehugger Robot def fillBuffer(self): 632*16467b97STreehugger Robot """ 633*16467b97STreehugger Robot Load all tokens from the token source and put in tokens. 634*16467b97STreehugger Robot This is done upon first LT request because you might want to 635*16467b97STreehugger Robot set some token type / channel overrides before filling buffer. 636*16467b97STreehugger Robot """ 637*16467b97STreehugger Robot 638*16467b97STreehugger Robot 639*16467b97STreehugger Robot index = 0 640*16467b97STreehugger Robot t = self.tokenSource.nextToken() 641*16467b97STreehugger Robot while t and t.type != EOF: 642*16467b97STreehugger Robot discard = False 643*16467b97STreehugger Robot 644*16467b97STreehugger Robot if self.discardSet and t.type in self.discardSet: 645*16467b97STreehugger Robot discard = True 646*16467b97STreehugger Robot 647*16467b97STreehugger Robot elif self.discardOffChannelTokens and t.channel != self.channel: 648*16467b97STreehugger Robot discard = True 649*16467b97STreehugger Robot 650*16467b97STreehugger Robot # is there a channel override for token type? 651*16467b97STreehugger Robot if t.type in self.channelOverrideMap: 652*16467b97STreehugger Robot overrideChannel = self.channelOverrideMap[t.type] 653*16467b97STreehugger Robot 654*16467b97STreehugger Robot if overrideChannel == self.channel: 655*16467b97STreehugger Robot t.channel = overrideChannel 656*16467b97STreehugger Robot else: 657*16467b97STreehugger Robot discard = True 658*16467b97STreehugger Robot 659*16467b97STreehugger Robot if not discard: 660*16467b97STreehugger Robot t.index = index 661*16467b97STreehugger Robot self.tokens.append(t) 662*16467b97STreehugger Robot index += 1 663*16467b97STreehugger Robot 664*16467b97STreehugger Robot t = self.tokenSource.nextToken() 665*16467b97STreehugger Robot 666*16467b97STreehugger Robot # leave p pointing at first token on channel 667*16467b97STreehugger Robot self.p = 0 668*16467b97STreehugger Robot self.p = self.skipOffTokenChannels(self.p) 669*16467b97STreehugger Robot 670*16467b97STreehugger Robot 671*16467b97STreehugger Robot def consume(self): 672*16467b97STreehugger Robot """ 673*16467b97STreehugger Robot Move the input pointer to the next incoming token. The stream 674*16467b97STreehugger Robot must become active with LT(1) available. consume() simply 675*16467b97STreehugger Robot moves the input pointer so that LT(1) points at the next 676*16467b97STreehugger Robot input symbol. Consume at least one token. 677*16467b97STreehugger Robot 678*16467b97STreehugger Robot Walk past any token not on the channel the parser is listening to. 679*16467b97STreehugger Robot """ 680*16467b97STreehugger Robot 681*16467b97STreehugger Robot if self.p < len(self.tokens): 682*16467b97STreehugger Robot self.p += 1 683*16467b97STreehugger Robot 684*16467b97STreehugger Robot self.p = self.skipOffTokenChannels(self.p) # leave p on valid token 685*16467b97STreehugger Robot 686*16467b97STreehugger Robot 687*16467b97STreehugger Robot def skipOffTokenChannels(self, i): 688*16467b97STreehugger Robot """ 689*16467b97STreehugger Robot Given a starting index, return the index of the first on-channel 690*16467b97STreehugger Robot token. 691*16467b97STreehugger Robot """ 692*16467b97STreehugger Robot 693*16467b97STreehugger Robot n = len(self.tokens) 694*16467b97STreehugger Robot while i < n and self.tokens[i].channel != self.channel: 695*16467b97STreehugger Robot i += 1 696*16467b97STreehugger Robot 697*16467b97STreehugger Robot return i 698*16467b97STreehugger Robot 699*16467b97STreehugger Robot 700*16467b97STreehugger Robot def skipOffTokenChannelsReverse(self, i): 701*16467b97STreehugger Robot while i >= 0 and self.tokens[i].channel != self.channel: 702*16467b97STreehugger Robot i -= 1 703*16467b97STreehugger Robot 704*16467b97STreehugger Robot return i 705*16467b97STreehugger Robot 706*16467b97STreehugger Robot 707*16467b97STreehugger Robot def setTokenTypeChannel(self, ttype, channel): 708*16467b97STreehugger Robot """ 709*16467b97STreehugger Robot A simple filter mechanism whereby you can tell this token stream 710*16467b97STreehugger Robot to force all tokens of type ttype to be on channel. For example, 711*16467b97STreehugger Robot when interpreting, we cannot exec actions so we need to tell 712*16467b97STreehugger Robot the stream to force all WS and NEWLINE to be a different, ignored 713*16467b97STreehugger Robot channel. 714*16467b97STreehugger Robot """ 715*16467b97STreehugger Robot 716*16467b97STreehugger Robot self.channelOverrideMap[ttype] = channel 717*16467b97STreehugger Robot 718*16467b97STreehugger Robot 719*16467b97STreehugger Robot def discardTokenType(self, ttype): 720*16467b97STreehugger Robot self.discardSet.add(ttype) 721*16467b97STreehugger Robot 722*16467b97STreehugger Robot 723*16467b97STreehugger Robot def getTokens(self, start=None, stop=None, types=None): 724*16467b97STreehugger Robot """ 725*16467b97STreehugger Robot Given a start and stop index, return a list of all tokens in 726*16467b97STreehugger Robot the token type set. Return None if no tokens were found. This 727*16467b97STreehugger Robot method looks at both on and off channel tokens. 728*16467b97STreehugger Robot """ 729*16467b97STreehugger Robot 730*16467b97STreehugger Robot if self.p == -1: 731*16467b97STreehugger Robot self.fillBuffer() 732*16467b97STreehugger Robot 733*16467b97STreehugger Robot if stop is None or stop > len(self.tokens): 734*16467b97STreehugger Robot stop = len(self.tokens) 735*16467b97STreehugger Robot 736*16467b97STreehugger Robot if start is None or start < 0: 737*16467b97STreehugger Robot start = 0 738*16467b97STreehugger Robot 739*16467b97STreehugger Robot if start > stop: 740*16467b97STreehugger Robot return None 741*16467b97STreehugger Robot 742*16467b97STreehugger Robot if isinstance(types, int): 743*16467b97STreehugger Robot # called with a single type, wrap into set 744*16467b97STreehugger Robot types = set([types]) 745*16467b97STreehugger Robot 746*16467b97STreehugger Robot filteredTokens = [ 747*16467b97STreehugger Robot token for token in self.tokens[start:stop] 748*16467b97STreehugger Robot if types is None or token.type in types 749*16467b97STreehugger Robot ] 750*16467b97STreehugger Robot 751*16467b97STreehugger Robot if len(filteredTokens) == 0: 752*16467b97STreehugger Robot return None 753*16467b97STreehugger Robot 754*16467b97STreehugger Robot return filteredTokens 755*16467b97STreehugger Robot 756*16467b97STreehugger Robot 757*16467b97STreehugger Robot def LT(self, k): 758*16467b97STreehugger Robot """ 759*16467b97STreehugger Robot Get the ith token from the current position 1..n where k=1 is the 760*16467b97STreehugger Robot first symbol of lookahead. 761*16467b97STreehugger Robot """ 762*16467b97STreehugger Robot 763*16467b97STreehugger Robot if self.p == -1: 764*16467b97STreehugger Robot self.fillBuffer() 765*16467b97STreehugger Robot 766*16467b97STreehugger Robot if k == 0: 767*16467b97STreehugger Robot return None 768*16467b97STreehugger Robot 769*16467b97STreehugger Robot if k < 0: 770*16467b97STreehugger Robot return self.LB(-k) 771*16467b97STreehugger Robot 772*16467b97STreehugger Robot i = self.p 773*16467b97STreehugger Robot n = 1 774*16467b97STreehugger Robot # find k good tokens 775*16467b97STreehugger Robot while n < k: 776*16467b97STreehugger Robot # skip off-channel tokens 777*16467b97STreehugger Robot i = self.skipOffTokenChannels(i + 1) # leave p on valid token 778*16467b97STreehugger Robot n += 1 779*16467b97STreehugger Robot 780*16467b97STreehugger Robot if i > self._range: 781*16467b97STreehugger Robot self._range = i 782*16467b97STreehugger Robot 783*16467b97STreehugger Robot if i < len(self.tokens): 784*16467b97STreehugger Robot return self.tokens[i] 785*16467b97STreehugger Robot else: 786*16467b97STreehugger Robot return self.makeEOFToken() 787*16467b97STreehugger Robot 788*16467b97STreehugger Robot 789*16467b97STreehugger Robot def LB(self, k): 790*16467b97STreehugger Robot """Look backwards k tokens on-channel tokens""" 791*16467b97STreehugger Robot 792*16467b97STreehugger Robot if self.p == -1: 793*16467b97STreehugger Robot self.fillBuffer() 794*16467b97STreehugger Robot 795*16467b97STreehugger Robot if k == 0: 796*16467b97STreehugger Robot return None 797*16467b97STreehugger Robot 798*16467b97STreehugger Robot if self.p - k < 0: 799*16467b97STreehugger Robot return None 800*16467b97STreehugger Robot 801*16467b97STreehugger Robot i = self.p 802*16467b97STreehugger Robot n = 1 803*16467b97STreehugger Robot # find k good tokens looking backwards 804*16467b97STreehugger Robot while n <= k: 805*16467b97STreehugger Robot # skip off-channel tokens 806*16467b97STreehugger Robot i = self.skipOffTokenChannelsReverse(i - 1) # leave p on valid token 807*16467b97STreehugger Robot n += 1 808*16467b97STreehugger Robot 809*16467b97STreehugger Robot if i < 0: 810*16467b97STreehugger Robot return None 811*16467b97STreehugger Robot 812*16467b97STreehugger Robot return self.tokens[i] 813*16467b97STreehugger Robot 814*16467b97STreehugger Robot 815*16467b97STreehugger Robot def get(self, i): 816*16467b97STreehugger Robot """ 817*16467b97STreehugger Robot Return absolute token i; ignore which channel the tokens are on; 818*16467b97STreehugger Robot that is, count all tokens not just on-channel tokens. 819*16467b97STreehugger Robot """ 820*16467b97STreehugger Robot 821*16467b97STreehugger Robot return self.tokens[i] 822*16467b97STreehugger Robot 823*16467b97STreehugger Robot 824*16467b97STreehugger Robot def slice(self, start, stop): 825*16467b97STreehugger Robot if self.p == -1: 826*16467b97STreehugger Robot self.fillBuffer() 827*16467b97STreehugger Robot 828*16467b97STreehugger Robot if start < 0 or stop < 0: 829*16467b97STreehugger Robot return None 830*16467b97STreehugger Robot 831*16467b97STreehugger Robot return self.tokens[start:stop + 1] 832*16467b97STreehugger Robot 833*16467b97STreehugger Robot 834*16467b97STreehugger Robot def LA(self, i): 835*16467b97STreehugger Robot return self.LT(i).type 836*16467b97STreehugger Robot 837*16467b97STreehugger Robot 838*16467b97STreehugger Robot def mark(self): 839*16467b97STreehugger Robot self.lastMarker = self.index() 840*16467b97STreehugger Robot return self.lastMarker 841*16467b97STreehugger Robot 842*16467b97STreehugger Robot 843*16467b97STreehugger Robot def release(self, marker=None): 844*16467b97STreehugger Robot # no resources to release 845*16467b97STreehugger Robot pass 846*16467b97STreehugger Robot 847*16467b97STreehugger Robot 848*16467b97STreehugger Robot def size(self): 849*16467b97STreehugger Robot return len(self.tokens) 850*16467b97STreehugger Robot 851*16467b97STreehugger Robot 852*16467b97STreehugger Robot def range(self): 853*16467b97STreehugger Robot return self._range 854*16467b97STreehugger Robot 855*16467b97STreehugger Robot 856*16467b97STreehugger Robot def index(self): 857*16467b97STreehugger Robot return self.p 858*16467b97STreehugger Robot 859*16467b97STreehugger Robot 860*16467b97STreehugger Robot def rewind(self, marker=None): 861*16467b97STreehugger Robot if marker is None: 862*16467b97STreehugger Robot marker = self.lastMarker 863*16467b97STreehugger Robot 864*16467b97STreehugger Robot self.seek(marker) 865*16467b97STreehugger Robot 866*16467b97STreehugger Robot 867*16467b97STreehugger Robot def seek(self, index): 868*16467b97STreehugger Robot self.p = index 869*16467b97STreehugger Robot 870*16467b97STreehugger Robot 871*16467b97STreehugger Robot def getTokenSource(self): 872*16467b97STreehugger Robot return self.tokenSource 873*16467b97STreehugger Robot 874*16467b97STreehugger Robot 875*16467b97STreehugger Robot def getSourceName(self): 876*16467b97STreehugger Robot return self.tokenSource.getSourceName() 877*16467b97STreehugger Robot 878*16467b97STreehugger Robot 879*16467b97STreehugger Robot def toString(self, start=None, stop=None): 880*16467b97STreehugger Robot """Returns a string of all tokens between start and stop (inclusive).""" 881*16467b97STreehugger Robot if self.p == -1: 882*16467b97STreehugger Robot self.fillBuffer() 883*16467b97STreehugger Robot 884*16467b97STreehugger Robot if start is None: 885*16467b97STreehugger Robot start = 0 886*16467b97STreehugger Robot elif not isinstance(start, int): 887*16467b97STreehugger Robot start = start.index 888*16467b97STreehugger Robot 889*16467b97STreehugger Robot if stop is None: 890*16467b97STreehugger Robot stop = len(self.tokens) - 1 891*16467b97STreehugger Robot elif not isinstance(stop, int): 892*16467b97STreehugger Robot stop = stop.index 893*16467b97STreehugger Robot 894*16467b97STreehugger Robot if stop >= len(self.tokens): 895*16467b97STreehugger Robot stop = len(self.tokens) - 1 896*16467b97STreehugger Robot 897*16467b97STreehugger Robot return ''.join([t.text for t in self.tokens[start:stop + 1]]) 898*16467b97STreehugger Robot 899*16467b97STreehugger Robot 900*16467b97STreehugger Robotclass RewriteOperation(object): 901*16467b97STreehugger Robot """@brief Internal helper class.""" 902*16467b97STreehugger Robot 903*16467b97STreehugger Robot def __init__(self, stream, index, text): 904*16467b97STreehugger Robot self.stream = stream 905*16467b97STreehugger Robot 906*16467b97STreehugger Robot # What index into rewrites List are we? 907*16467b97STreehugger Robot self.instructionIndex = None 908*16467b97STreehugger Robot 909*16467b97STreehugger Robot # Token buffer index. 910*16467b97STreehugger Robot self.index = index 911*16467b97STreehugger Robot self.text = text 912*16467b97STreehugger Robot 913*16467b97STreehugger Robot def execute(self, buf): 914*16467b97STreehugger Robot """Execute the rewrite operation by possibly adding to the buffer. 915*16467b97STreehugger Robot Return the index of the next token to operate on. 916*16467b97STreehugger Robot """ 917*16467b97STreehugger Robot 918*16467b97STreehugger Robot return self.index 919*16467b97STreehugger Robot 920*16467b97STreehugger Robot def toString(self): 921*16467b97STreehugger Robot opName = self.__class__.__name__ 922*16467b97STreehugger Robot return '<{opName}@{0.index}:"{0.text}">'.format(self, opName=opName) 923*16467b97STreehugger Robot 924*16467b97STreehugger Robot __str__ = toString 925*16467b97STreehugger Robot __repr__ = toString 926*16467b97STreehugger Robot 927*16467b97STreehugger Robot 928*16467b97STreehugger Robotclass InsertBeforeOp(RewriteOperation): 929*16467b97STreehugger Robot """@brief Internal helper class.""" 930*16467b97STreehugger Robot 931*16467b97STreehugger Robot def execute(self, buf): 932*16467b97STreehugger Robot buf.write(self.text) 933*16467b97STreehugger Robot if self.stream.tokens[self.index].type != EOF: 934*16467b97STreehugger Robot buf.write(self.stream.tokens[self.index].text) 935*16467b97STreehugger Robot return self.index + 1 936*16467b97STreehugger Robot 937*16467b97STreehugger Robot 938*16467b97STreehugger Robotclass ReplaceOp(RewriteOperation): 939*16467b97STreehugger Robot """ 940*16467b97STreehugger Robot @brief Internal helper class. 941*16467b97STreehugger Robot 942*16467b97STreehugger Robot I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp 943*16467b97STreehugger Robot instructions. 944*16467b97STreehugger Robot """ 945*16467b97STreehugger Robot 946*16467b97STreehugger Robot def __init__(self, stream, first, last, text): 947*16467b97STreehugger Robot super().__init__(stream, first, text) 948*16467b97STreehugger Robot self.lastIndex = last 949*16467b97STreehugger Robot 950*16467b97STreehugger Robot 951*16467b97STreehugger Robot def execute(self, buf): 952*16467b97STreehugger Robot if self.text is not None: 953*16467b97STreehugger Robot buf.write(self.text) 954*16467b97STreehugger Robot 955*16467b97STreehugger Robot return self.lastIndex + 1 956*16467b97STreehugger Robot 957*16467b97STreehugger Robot 958*16467b97STreehugger Robot def toString(self): 959*16467b97STreehugger Robot if self.text is None: 960*16467b97STreehugger Robot return '<DeleteOp@{0.index}..{0.lastindex}>'.format(self) 961*16467b97STreehugger Robot 962*16467b97STreehugger Robot return '<ReplaceOp@{0.index}..{0.lastIndex}:"{0.text}">'.format(self) 963*16467b97STreehugger Robot 964*16467b97STreehugger Robot __str__ = toString 965*16467b97STreehugger Robot __repr__ = toString 966*16467b97STreehugger Robot 967*16467b97STreehugger Robot 968*16467b97STreehugger Robotclass TokenRewriteStream(CommonTokenStream): 969*16467b97STreehugger Robot """@brief CommonTokenStream that can be modified. 970*16467b97STreehugger Robot 971*16467b97STreehugger Robot Useful for dumping out the input stream after doing some 972*16467b97STreehugger Robot augmentation or other manipulations. 973*16467b97STreehugger Robot 974*16467b97STreehugger Robot You can insert stuff, replace, and delete chunks. Note that the 975*16467b97STreehugger Robot operations are done lazily--only if you convert the buffer to a 976*16467b97STreehugger Robot String. This is very efficient because you are not moving data around 977*16467b97STreehugger Robot all the time. As the buffer of tokens is converted to strings, the 978*16467b97STreehugger Robot toString() method(s) check to see if there is an operation at the 979*16467b97STreehugger Robot current index. If so, the operation is done and then normal String 980*16467b97STreehugger Robot rendering continues on the buffer. This is like having multiple Turing 981*16467b97STreehugger Robot machine instruction streams (programs) operating on a single input tape. :) 982*16467b97STreehugger Robot 983*16467b97STreehugger Robot Since the operations are done lazily at toString-time, operations do not 984*16467b97STreehugger Robot screw up the token index values. That is, an insert operation at token 985*16467b97STreehugger Robot index i does not change the index values for tokens i+1..n-1. 986*16467b97STreehugger Robot 987*16467b97STreehugger Robot Because operations never actually alter the buffer, you may always get 988*16467b97STreehugger Robot the original token stream back without undoing anything. Since 989*16467b97STreehugger Robot the instructions are queued up, you can easily simulate transactions and 990*16467b97STreehugger Robot roll back any changes if there is an error just by removing instructions. 991*16467b97STreehugger Robot For example, 992*16467b97STreehugger Robot 993*16467b97STreehugger Robot CharStream input = new ANTLRFileStream("input"); 994*16467b97STreehugger Robot TLexer lex = new TLexer(input); 995*16467b97STreehugger Robot TokenRewriteStream tokens = new TokenRewriteStream(lex); 996*16467b97STreehugger Robot T parser = new T(tokens); 997*16467b97STreehugger Robot parser.startRule(); 998*16467b97STreehugger Robot 999*16467b97STreehugger Robot Then in the rules, you can execute 1000*16467b97STreehugger Robot Token t,u; 1001*16467b97STreehugger Robot ... 1002*16467b97STreehugger Robot input.insertAfter(t, "text to put after t");} 1003*16467b97STreehugger Robot input.insertAfter(u, "text after u");} 1004*16467b97STreehugger Robot System.out.println(tokens.toString()); 1005*16467b97STreehugger Robot 1006*16467b97STreehugger Robot Actually, you have to cast the 'input' to a TokenRewriteStream. :( 1007*16467b97STreehugger Robot 1008*16467b97STreehugger Robot You can also have multiple "instruction streams" and get multiple 1009*16467b97STreehugger Robot rewrites from a single pass over the input. Just name the instruction 1010*16467b97STreehugger Robot streams and use that name again when printing the buffer. This could be 1011*16467b97STreehugger Robot useful for generating a C file and also its header file--all from the 1012*16467b97STreehugger Robot same buffer: 1013*16467b97STreehugger Robot 1014*16467b97STreehugger Robot tokens.insertAfter("pass1", t, "text to put after t");} 1015*16467b97STreehugger Robot tokens.insertAfter("pass2", u, "text after u");} 1016*16467b97STreehugger Robot System.out.println(tokens.toString("pass1")); 1017*16467b97STreehugger Robot System.out.println(tokens.toString("pass2")); 1018*16467b97STreehugger Robot 1019*16467b97STreehugger Robot If you don't use named rewrite streams, a "default" stream is used as 1020*16467b97STreehugger Robot the first example shows. 1021*16467b97STreehugger Robot """ 1022*16467b97STreehugger Robot 1023*16467b97STreehugger Robot DEFAULT_PROGRAM_NAME = "default" 1024*16467b97STreehugger Robot MIN_TOKEN_INDEX = 0 1025*16467b97STreehugger Robot 1026*16467b97STreehugger Robot def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL): 1027*16467b97STreehugger Robot super().__init__(tokenSource, channel) 1028*16467b97STreehugger Robot 1029*16467b97STreehugger Robot # You may have multiple, named streams of rewrite operations. 1030*16467b97STreehugger Robot # I'm calling these things "programs." 1031*16467b97STreehugger Robot # Maps String (name) -> rewrite (List) 1032*16467b97STreehugger Robot self.programs = {} 1033*16467b97STreehugger Robot self.programs[self.DEFAULT_PROGRAM_NAME] = [] 1034*16467b97STreehugger Robot 1035*16467b97STreehugger Robot # Map String (program name) -> Integer index 1036*16467b97STreehugger Robot self.lastRewriteTokenIndexes = {} 1037*16467b97STreehugger Robot 1038*16467b97STreehugger Robot 1039*16467b97STreehugger Robot def rollback(self, *args): 1040*16467b97STreehugger Robot """ 1041*16467b97STreehugger Robot Rollback the instruction stream for a program so that 1042*16467b97STreehugger Robot the indicated instruction (via instructionIndex) is no 1043*16467b97STreehugger Robot longer in the stream. UNTESTED! 1044*16467b97STreehugger Robot """ 1045*16467b97STreehugger Robot 1046*16467b97STreehugger Robot if len(args) == 2: 1047*16467b97STreehugger Robot programName = args[0] 1048*16467b97STreehugger Robot instructionIndex = args[1] 1049*16467b97STreehugger Robot elif len(args) == 1: 1050*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1051*16467b97STreehugger Robot instructionIndex = args[0] 1052*16467b97STreehugger Robot else: 1053*16467b97STreehugger Robot raise TypeError("Invalid arguments") 1054*16467b97STreehugger Robot 1055*16467b97STreehugger Robot p = self.programs.get(programName) 1056*16467b97STreehugger Robot if p: 1057*16467b97STreehugger Robot self.programs[programName] = ( 1058*16467b97STreehugger Robot p[self.MIN_TOKEN_INDEX:instructionIndex]) 1059*16467b97STreehugger Robot 1060*16467b97STreehugger Robot 1061*16467b97STreehugger Robot def deleteProgram(self, programName=DEFAULT_PROGRAM_NAME): 1062*16467b97STreehugger Robot """Reset the program so that no instructions exist""" 1063*16467b97STreehugger Robot 1064*16467b97STreehugger Robot self.rollback(programName, self.MIN_TOKEN_INDEX) 1065*16467b97STreehugger Robot 1066*16467b97STreehugger Robot 1067*16467b97STreehugger Robot def insertAfter(self, *args): 1068*16467b97STreehugger Robot if len(args) == 2: 1069*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1070*16467b97STreehugger Robot index = args[0] 1071*16467b97STreehugger Robot text = args[1] 1072*16467b97STreehugger Robot 1073*16467b97STreehugger Robot elif len(args) == 3: 1074*16467b97STreehugger Robot programName = args[0] 1075*16467b97STreehugger Robot index = args[1] 1076*16467b97STreehugger Robot text = args[2] 1077*16467b97STreehugger Robot 1078*16467b97STreehugger Robot else: 1079*16467b97STreehugger Robot raise TypeError("Invalid arguments") 1080*16467b97STreehugger Robot 1081*16467b97STreehugger Robot if isinstance(index, Token): 1082*16467b97STreehugger Robot # index is a Token, grap the stream index from it 1083*16467b97STreehugger Robot index = index.index 1084*16467b97STreehugger Robot 1085*16467b97STreehugger Robot # to insert after, just insert before next index (even if past end) 1086*16467b97STreehugger Robot self.insertBefore(programName, index + 1, text) 1087*16467b97STreehugger Robot 1088*16467b97STreehugger Robot 1089*16467b97STreehugger Robot def insertBefore(self, *args): 1090*16467b97STreehugger Robot if len(args) == 2: 1091*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1092*16467b97STreehugger Robot index = args[0] 1093*16467b97STreehugger Robot text = args[1] 1094*16467b97STreehugger Robot 1095*16467b97STreehugger Robot elif len(args) == 3: 1096*16467b97STreehugger Robot programName = args[0] 1097*16467b97STreehugger Robot index = args[1] 1098*16467b97STreehugger Robot text = args[2] 1099*16467b97STreehugger Robot 1100*16467b97STreehugger Robot else: 1101*16467b97STreehugger Robot raise TypeError("Invalid arguments") 1102*16467b97STreehugger Robot 1103*16467b97STreehugger Robot if isinstance(index, Token): 1104*16467b97STreehugger Robot # index is a Token, grab the stream index from it 1105*16467b97STreehugger Robot index = index.index 1106*16467b97STreehugger Robot 1107*16467b97STreehugger Robot op = InsertBeforeOp(self, index, text) 1108*16467b97STreehugger Robot rewrites = self.getProgram(programName) 1109*16467b97STreehugger Robot op.instructionIndex = len(rewrites) 1110*16467b97STreehugger Robot rewrites.append(op) 1111*16467b97STreehugger Robot 1112*16467b97STreehugger Robot 1113*16467b97STreehugger Robot def replace(self, *args): 1114*16467b97STreehugger Robot if len(args) == 2: 1115*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1116*16467b97STreehugger Robot first = args[0] 1117*16467b97STreehugger Robot last = args[0] 1118*16467b97STreehugger Robot text = args[1] 1119*16467b97STreehugger Robot 1120*16467b97STreehugger Robot elif len(args) == 3: 1121*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1122*16467b97STreehugger Robot first = args[0] 1123*16467b97STreehugger Robot last = args[1] 1124*16467b97STreehugger Robot text = args[2] 1125*16467b97STreehugger Robot 1126*16467b97STreehugger Robot elif len(args) == 4: 1127*16467b97STreehugger Robot programName = args[0] 1128*16467b97STreehugger Robot first = args[1] 1129*16467b97STreehugger Robot last = args[2] 1130*16467b97STreehugger Robot text = args[3] 1131*16467b97STreehugger Robot 1132*16467b97STreehugger Robot else: 1133*16467b97STreehugger Robot raise TypeError("Invalid arguments") 1134*16467b97STreehugger Robot 1135*16467b97STreehugger Robot if isinstance(first, Token): 1136*16467b97STreehugger Robot # first is a Token, grap the stream index from it 1137*16467b97STreehugger Robot first = first.index 1138*16467b97STreehugger Robot 1139*16467b97STreehugger Robot if isinstance(last, Token): 1140*16467b97STreehugger Robot # last is a Token, grap the stream index from it 1141*16467b97STreehugger Robot last = last.index 1142*16467b97STreehugger Robot 1143*16467b97STreehugger Robot if first > last or first < 0 or last < 0 or last >= len(self.tokens): 1144*16467b97STreehugger Robot raise ValueError( 1145*16467b97STreehugger Robot "replace: range invalid: {}..{} (size={})" 1146*16467b97STreehugger Robot .format(first, last, len(self.tokens))) 1147*16467b97STreehugger Robot 1148*16467b97STreehugger Robot op = ReplaceOp(self, first, last, text) 1149*16467b97STreehugger Robot rewrites = self.getProgram(programName) 1150*16467b97STreehugger Robot op.instructionIndex = len(rewrites) 1151*16467b97STreehugger Robot rewrites.append(op) 1152*16467b97STreehugger Robot 1153*16467b97STreehugger Robot 1154*16467b97STreehugger Robot def delete(self, *args): 1155*16467b97STreehugger Robot self.replace(*(list(args) + [None])) 1156*16467b97STreehugger Robot 1157*16467b97STreehugger Robot 1158*16467b97STreehugger Robot def getLastRewriteTokenIndex(self, programName=DEFAULT_PROGRAM_NAME): 1159*16467b97STreehugger Robot return self.lastRewriteTokenIndexes.get(programName, -1) 1160*16467b97STreehugger Robot 1161*16467b97STreehugger Robot 1162*16467b97STreehugger Robot def setLastRewriteTokenIndex(self, programName, i): 1163*16467b97STreehugger Robot self.lastRewriteTokenIndexes[programName] = i 1164*16467b97STreehugger Robot 1165*16467b97STreehugger Robot 1166*16467b97STreehugger Robot def getProgram(self, name): 1167*16467b97STreehugger Robot p = self.programs.get(name) 1168*16467b97STreehugger Robot if not p: 1169*16467b97STreehugger Robot p = self.initializeProgram(name) 1170*16467b97STreehugger Robot 1171*16467b97STreehugger Robot return p 1172*16467b97STreehugger Robot 1173*16467b97STreehugger Robot 1174*16467b97STreehugger Robot def initializeProgram(self, name): 1175*16467b97STreehugger Robot p = [] 1176*16467b97STreehugger Robot self.programs[name] = p 1177*16467b97STreehugger Robot return p 1178*16467b97STreehugger Robot 1179*16467b97STreehugger Robot 1180*16467b97STreehugger Robot def toOriginalString(self, start=None, end=None): 1181*16467b97STreehugger Robot if self.p == -1: 1182*16467b97STreehugger Robot self.fillBuffer() 1183*16467b97STreehugger Robot 1184*16467b97STreehugger Robot if start is None: 1185*16467b97STreehugger Robot start = self.MIN_TOKEN_INDEX 1186*16467b97STreehugger Robot if end is None: 1187*16467b97STreehugger Robot end = self.size() - 1 1188*16467b97STreehugger Robot 1189*16467b97STreehugger Robot buf = StringIO() 1190*16467b97STreehugger Robot i = start 1191*16467b97STreehugger Robot while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): 1192*16467b97STreehugger Robot if self.get(i).type != EOF: 1193*16467b97STreehugger Robot buf.write(self.get(i).text) 1194*16467b97STreehugger Robot i += 1 1195*16467b97STreehugger Robot 1196*16467b97STreehugger Robot return buf.getvalue() 1197*16467b97STreehugger Robot 1198*16467b97STreehugger Robot 1199*16467b97STreehugger Robot def toString(self, *args): 1200*16467b97STreehugger Robot if self.p == -1: 1201*16467b97STreehugger Robot self.fillBuffer() 1202*16467b97STreehugger Robot 1203*16467b97STreehugger Robot if len(args) == 0: 1204*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1205*16467b97STreehugger Robot start = self.MIN_TOKEN_INDEX 1206*16467b97STreehugger Robot end = self.size() - 1 1207*16467b97STreehugger Robot 1208*16467b97STreehugger Robot elif len(args) == 1: 1209*16467b97STreehugger Robot programName = args[0] 1210*16467b97STreehugger Robot start = self.MIN_TOKEN_INDEX 1211*16467b97STreehugger Robot end = self.size() - 1 1212*16467b97STreehugger Robot 1213*16467b97STreehugger Robot elif len(args) == 2: 1214*16467b97STreehugger Robot programName = self.DEFAULT_PROGRAM_NAME 1215*16467b97STreehugger Robot start = args[0] 1216*16467b97STreehugger Robot end = args[1] 1217*16467b97STreehugger Robot 1218*16467b97STreehugger Robot if start is None: 1219*16467b97STreehugger Robot start = self.MIN_TOKEN_INDEX 1220*16467b97STreehugger Robot elif not isinstance(start, int): 1221*16467b97STreehugger Robot start = start.index 1222*16467b97STreehugger Robot 1223*16467b97STreehugger Robot if end is None: 1224*16467b97STreehugger Robot end = len(self.tokens) - 1 1225*16467b97STreehugger Robot elif not isinstance(end, int): 1226*16467b97STreehugger Robot end = end.index 1227*16467b97STreehugger Robot 1228*16467b97STreehugger Robot # ensure start/end are in range 1229*16467b97STreehugger Robot if end >= len(self.tokens): 1230*16467b97STreehugger Robot end = len(self.tokens) - 1 1231*16467b97STreehugger Robot 1232*16467b97STreehugger Robot if start < 0: 1233*16467b97STreehugger Robot start = 0 1234*16467b97STreehugger Robot 1235*16467b97STreehugger Robot rewrites = self.programs.get(programName) 1236*16467b97STreehugger Robot if not rewrites: 1237*16467b97STreehugger Robot # no instructions to execute 1238*16467b97STreehugger Robot return self.toOriginalString(start, end) 1239*16467b97STreehugger Robot 1240*16467b97STreehugger Robot buf = StringIO() 1241*16467b97STreehugger Robot 1242*16467b97STreehugger Robot # First, optimize instruction stream 1243*16467b97STreehugger Robot indexToOp = self.reduceToSingleOperationPerIndex(rewrites) 1244*16467b97STreehugger Robot 1245*16467b97STreehugger Robot # Walk buffer, executing instructions and emitting tokens 1246*16467b97STreehugger Robot i = start 1247*16467b97STreehugger Robot while i <= end and i < len(self.tokens): 1248*16467b97STreehugger Robot # remove so any left have index size-1 1249*16467b97STreehugger Robot op = indexToOp.pop(i, None) 1250*16467b97STreehugger Robot 1251*16467b97STreehugger Robot t = self.tokens[i] 1252*16467b97STreehugger Robot if op is None: 1253*16467b97STreehugger Robot # no operation at that index, just dump token 1254*16467b97STreehugger Robot if t.type != EOF: 1255*16467b97STreehugger Robot buf.write(t.text) 1256*16467b97STreehugger Robot i += 1 # move to next token 1257*16467b97STreehugger Robot 1258*16467b97STreehugger Robot else: 1259*16467b97STreehugger Robot i = op.execute(buf) # execute operation and skip 1260*16467b97STreehugger Robot 1261*16467b97STreehugger Robot # include stuff after end if it's last index in buffer 1262*16467b97STreehugger Robot # So, if they did an insertAfter(lastValidIndex, "foo"), include 1263*16467b97STreehugger Robot # foo if end == lastValidIndex. 1264*16467b97STreehugger Robot if end == len(self.tokens) - 1: 1265*16467b97STreehugger Robot # Scan any remaining operations after last token 1266*16467b97STreehugger Robot # should be included (they will be inserts). 1267*16467b97STreehugger Robot for i, op in sorted(indexToOp.items()): 1268*16467b97STreehugger Robot if op.index >= len(self.tokens) - 1: 1269*16467b97STreehugger Robot buf.write(op.text) 1270*16467b97STreehugger Robot 1271*16467b97STreehugger Robot return buf.getvalue() 1272*16467b97STreehugger Robot 1273*16467b97STreehugger Robot __str__ = toString 1274*16467b97STreehugger Robot 1275*16467b97STreehugger Robot 1276*16467b97STreehugger Robot def reduceToSingleOperationPerIndex(self, rewrites): 1277*16467b97STreehugger Robot """ 1278*16467b97STreehugger Robot We need to combine operations and report invalid operations (like 1279*16467b97STreehugger Robot overlapping replaces that are not completed nested). Inserts to 1280*16467b97STreehugger Robot same index need to be combined etc... Here are the cases: 1281*16467b97STreehugger Robot 1282*16467b97STreehugger Robot I.i.u I.j.v leave alone, nonoverlapping 1283*16467b97STreehugger Robot I.i.u I.i.v combine: Iivu 1284*16467b97STreehugger Robot 1285*16467b97STreehugger Robot R.i-j.u R.x-y.v | i-j in x-y delete first R 1286*16467b97STreehugger Robot R.i-j.u R.i-j.v delete first R 1287*16467b97STreehugger Robot R.i-j.u R.x-y.v | x-y in i-j ERROR 1288*16467b97STreehugger Robot R.i-j.u R.x-y.v | boundaries overlap ERROR 1289*16467b97STreehugger Robot 1290*16467b97STreehugger Robot Delete special case of replace (text==null): 1291*16467b97STreehugger Robot D.i-j.u D.x-y.v | boundaries overlapcombine to 1292*16467b97STreehugger Robot max(min)..max(right) 1293*16467b97STreehugger Robot 1294*16467b97STreehugger Robot I.i.u R.x-y.v | i in (x+1)-ydelete I (since 1295*16467b97STreehugger Robot insert before we're not deleting 1296*16467b97STreehugger Robot i) 1297*16467b97STreehugger Robot I.i.u R.x-y.v | i not in (x+1)-yleave alone, 1298*16467b97STreehugger Robot nonoverlapping 1299*16467b97STreehugger Robot 1300*16467b97STreehugger Robot R.x-y.v I.i.u | i in x-y ERROR 1301*16467b97STreehugger Robot R.x-y.v I.x.u R.x-y.uv (combine, delete I) 1302*16467b97STreehugger Robot R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping 1303*16467b97STreehugger Robot 1304*16467b97STreehugger Robot I.i.u = insert u before op @ index i 1305*16467b97STreehugger Robot R.x-y.u = replace x-y indexed tokens with u 1306*16467b97STreehugger Robot 1307*16467b97STreehugger Robot First we need to examine replaces. For any replace op: 1308*16467b97STreehugger Robot 1309*16467b97STreehugger Robot 1. wipe out any insertions before op within that range. 1310*16467b97STreehugger Robot 2. Drop any replace op before that is contained completely within 1311*16467b97STreehugger Robot that range. 1312*16467b97STreehugger Robot 3. Throw exception upon boundary overlap with any previous replace. 1313*16467b97STreehugger Robot 1314*16467b97STreehugger Robot Then we can deal with inserts: 1315*16467b97STreehugger Robot 1316*16467b97STreehugger Robot 1. for any inserts to same index, combine even if not adjacent. 1317*16467b97STreehugger Robot 2. for any prior replace with same left boundary, combine this 1318*16467b97STreehugger Robot insert with replace and delete this replace. 1319*16467b97STreehugger Robot 3. throw exception if index in same range as previous replace 1320*16467b97STreehugger Robot 1321*16467b97STreehugger Robot Don't actually delete; make op null in list. Easier to walk list. 1322*16467b97STreehugger Robot Later we can throw as we add to index -> op map. 1323*16467b97STreehugger Robot 1324*16467b97STreehugger Robot Note that I.2 R.2-2 will wipe out I.2 even though, technically, the 1325*16467b97STreehugger Robot inserted stuff would be before the replace range. But, if you 1326*16467b97STreehugger Robot add tokens in front of a method body '{' and then delete the method 1327*16467b97STreehugger Robot body, I think the stuff before the '{' you added should disappear too. 1328*16467b97STreehugger Robot 1329*16467b97STreehugger Robot Return a map from token index to operation. 1330*16467b97STreehugger Robot """ 1331*16467b97STreehugger Robot 1332*16467b97STreehugger Robot # WALK REPLACES 1333*16467b97STreehugger Robot for i, rop in enumerate(rewrites): 1334*16467b97STreehugger Robot if not rop: 1335*16467b97STreehugger Robot continue 1336*16467b97STreehugger Robot 1337*16467b97STreehugger Robot if not isinstance(rop, ReplaceOp): 1338*16467b97STreehugger Robot continue 1339*16467b97STreehugger Robot 1340*16467b97STreehugger Robot # Wipe prior inserts within range 1341*16467b97STreehugger Robot for j, iop in self.getKindOfOps(rewrites, InsertBeforeOp, i): 1342*16467b97STreehugger Robot if iop.index == rop.index: 1343*16467b97STreehugger Robot # E.g., insert before 2, delete 2..2; update replace 1344*16467b97STreehugger Robot # text to include insert before, kill insert 1345*16467b97STreehugger Robot rewrites[iop.instructionIndex] = None 1346*16467b97STreehugger Robot rop.text = self.catOpText(iop.text, rop.text) 1347*16467b97STreehugger Robot 1348*16467b97STreehugger Robot elif iop.index > rop.index and iop.index <= rop.lastIndex: 1349*16467b97STreehugger Robot # delete insert as it's a no-op. 1350*16467b97STreehugger Robot rewrites[j] = None 1351*16467b97STreehugger Robot 1352*16467b97STreehugger Robot # Drop any prior replaces contained within 1353*16467b97STreehugger Robot for j, prevRop in self.getKindOfOps(rewrites, ReplaceOp, i): 1354*16467b97STreehugger Robot if (prevRop.index >= rop.index 1355*16467b97STreehugger Robot and prevRop.lastIndex <= rop.lastIndex): 1356*16467b97STreehugger Robot # delete replace as it's a no-op. 1357*16467b97STreehugger Robot rewrites[j] = None 1358*16467b97STreehugger Robot continue 1359*16467b97STreehugger Robot 1360*16467b97STreehugger Robot # throw exception unless disjoint or identical 1361*16467b97STreehugger Robot disjoint = (prevRop.lastIndex < rop.index 1362*16467b97STreehugger Robot or prevRop.index > rop.lastIndex) 1363*16467b97STreehugger Robot same = (prevRop.index == rop.index 1364*16467b97STreehugger Robot and prevRop.lastIndex == rop.lastIndex) 1365*16467b97STreehugger Robot 1366*16467b97STreehugger Robot # Delete special case of replace (text==null): 1367*16467b97STreehugger Robot # D.i-j.u D.x-y.v| boundaries overlapcombine to 1368*16467b97STreehugger Robot # max(min)..max(right) 1369*16467b97STreehugger Robot if prevRop.text is None and rop.text is None and not disjoint: 1370*16467b97STreehugger Robot # kill first delete 1371*16467b97STreehugger Robot rewrites[prevRop.instructionIndex] = None 1372*16467b97STreehugger Robot 1373*16467b97STreehugger Robot rop.index = min(prevRop.index, rop.index) 1374*16467b97STreehugger Robot rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex) 1375*16467b97STreehugger Robot 1376*16467b97STreehugger Robot elif not disjoint and not same: 1377*16467b97STreehugger Robot raise ValueError( 1378*16467b97STreehugger Robot "replace op boundaries of {} overlap with previous {}" 1379*16467b97STreehugger Robot .format(rop, prevRop)) 1380*16467b97STreehugger Robot 1381*16467b97STreehugger Robot # WALK INSERTS 1382*16467b97STreehugger Robot for i, iop in enumerate(rewrites): 1383*16467b97STreehugger Robot if iop is None: 1384*16467b97STreehugger Robot continue 1385*16467b97STreehugger Robot 1386*16467b97STreehugger Robot if not isinstance(iop, InsertBeforeOp): 1387*16467b97STreehugger Robot continue 1388*16467b97STreehugger Robot 1389*16467b97STreehugger Robot # combine current insert with prior if any at same index 1390*16467b97STreehugger Robot for j, prevIop in self.getKindOfOps(rewrites, InsertBeforeOp, i): 1391*16467b97STreehugger Robot if prevIop.index == iop.index: # combine objects 1392*16467b97STreehugger Robot # convert to strings...we're in process of toString'ing 1393*16467b97STreehugger Robot # whole token buffer so no lazy eval issue with any 1394*16467b97STreehugger Robot # templates 1395*16467b97STreehugger Robot iop.text = self.catOpText(iop.text, prevIop.text) 1396*16467b97STreehugger Robot # delete redundant prior insert 1397*16467b97STreehugger Robot rewrites[j] = None 1398*16467b97STreehugger Robot 1399*16467b97STreehugger Robot # look for replaces where iop.index is in range; error 1400*16467b97STreehugger Robot for j, rop in self.getKindOfOps(rewrites, ReplaceOp, i): 1401*16467b97STreehugger Robot if iop.index == rop.index: 1402*16467b97STreehugger Robot rop.text = self.catOpText(iop.text, rop.text) 1403*16467b97STreehugger Robot # delete current insert 1404*16467b97STreehugger Robot rewrites[i] = None 1405*16467b97STreehugger Robot continue 1406*16467b97STreehugger Robot 1407*16467b97STreehugger Robot if iop.index >= rop.index and iop.index <= rop.lastIndex: 1408*16467b97STreehugger Robot raise ValueError( 1409*16467b97STreehugger Robot "insert op {} within boundaries of previous {}" 1410*16467b97STreehugger Robot .format(iop, rop)) 1411*16467b97STreehugger Robot 1412*16467b97STreehugger Robot m = {} 1413*16467b97STreehugger Robot for i, op in enumerate(rewrites): 1414*16467b97STreehugger Robot if op is None: 1415*16467b97STreehugger Robot # ignore deleted ops 1416*16467b97STreehugger Robot continue 1417*16467b97STreehugger Robot 1418*16467b97STreehugger Robot assert op.index not in m, "should only be one op per index" 1419*16467b97STreehugger Robot m[op.index] = op 1420*16467b97STreehugger Robot 1421*16467b97STreehugger Robot return m 1422*16467b97STreehugger Robot 1423*16467b97STreehugger Robot 1424*16467b97STreehugger Robot def catOpText(self, a, b): 1425*16467b97STreehugger Robot x = "" 1426*16467b97STreehugger Robot y = "" 1427*16467b97STreehugger Robot if a: 1428*16467b97STreehugger Robot x = a 1429*16467b97STreehugger Robot if b: 1430*16467b97STreehugger Robot y = b 1431*16467b97STreehugger Robot return x + y 1432*16467b97STreehugger Robot 1433*16467b97STreehugger Robot 1434*16467b97STreehugger Robot def getKindOfOps(self, rewrites, kind, before=None): 1435*16467b97STreehugger Robot """Get all operations before an index of a particular kind.""" 1436*16467b97STreehugger Robot 1437*16467b97STreehugger Robot if before is None: 1438*16467b97STreehugger Robot before = len(rewrites) 1439*16467b97STreehugger Robot elif before > len(rewrites): 1440*16467b97STreehugger Robot before = len(rewrites) 1441*16467b97STreehugger Robot 1442*16467b97STreehugger Robot for i, op in enumerate(rewrites[:before]): 1443*16467b97STreehugger Robot # ignore deleted 1444*16467b97STreehugger Robot if op and op.__class__ == kind: 1445*16467b97STreehugger Robot yield i, op 1446*16467b97STreehugger Robot 1447*16467b97STreehugger Robot 1448*16467b97STreehugger Robot def toDebugString(self, start=None, end=None): 1449*16467b97STreehugger Robot if start is None: 1450*16467b97STreehugger Robot start = self.MIN_TOKEN_INDEX 1451*16467b97STreehugger Robot if end is None: 1452*16467b97STreehugger Robot end = self.size() - 1 1453*16467b97STreehugger Robot 1454*16467b97STreehugger Robot buf = StringIO() 1455*16467b97STreehugger Robot i = start 1456*16467b97STreehugger Robot while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens): 1457*16467b97STreehugger Robot buf.write(self.get(i)) 1458*16467b97STreehugger Robot i += 1 1459*16467b97STreehugger Robot 1460*16467b97STreehugger Robot return buf.getvalue() 1461