1*16467b97STreehugger Robot""" @package antlr3.tree 2*16467b97STreehugger Robot@brief ANTLR3 runtime package, treewizard module 3*16467b97STreehugger Robot 4*16467b97STreehugger RobotA utility module to create ASTs at runtime. 5*16467b97STreehugger RobotSee <http://www.antlr.org/wiki/display/~admin/2007/07/02/Exploring+Concept+of+TreeWizard> for an overview. Note that the API of the Python implementation is slightly different. 6*16467b97STreehugger Robot 7*16467b97STreehugger Robot""" 8*16467b97STreehugger Robot 9*16467b97STreehugger Robot# begin[licence] 10*16467b97STreehugger Robot# 11*16467b97STreehugger Robot# [The "BSD licence"] 12*16467b97STreehugger Robot# Copyright (c) 2005-2012 Terence Parr 13*16467b97STreehugger Robot# All rights reserved. 14*16467b97STreehugger Robot# 15*16467b97STreehugger Robot# Redistribution and use in source and binary forms, with or without 16*16467b97STreehugger Robot# modification, are permitted provided that the following conditions 17*16467b97STreehugger Robot# are met: 18*16467b97STreehugger Robot# 1. Redistributions of source code must retain the above copyright 19*16467b97STreehugger Robot# notice, this list of conditions and the following disclaimer. 20*16467b97STreehugger Robot# 2. Redistributions in binary form must reproduce the above copyright 21*16467b97STreehugger Robot# notice, this list of conditions and the following disclaimer in the 22*16467b97STreehugger Robot# documentation and/or other materials provided with the distribution. 23*16467b97STreehugger Robot# 3. The name of the author may not be used to endorse or promote products 24*16467b97STreehugger Robot# derived from this software without specific prior written permission. 25*16467b97STreehugger Robot# 26*16467b97STreehugger Robot# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 27*16467b97STreehugger Robot# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 28*16467b97STreehugger Robot# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 29*16467b97STreehugger Robot# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 30*16467b97STreehugger Robot# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 31*16467b97STreehugger Robot# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32*16467b97STreehugger Robot# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33*16467b97STreehugger Robot# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34*16467b97STreehugger Robot# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 35*16467b97STreehugger Robot# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36*16467b97STreehugger Robot# 37*16467b97STreehugger Robot# end[licence] 38*16467b97STreehugger Robot 39*16467b97STreehugger Robotfrom .constants import INVALID_TOKEN_TYPE 40*16467b97STreehugger Robotfrom .tokens import CommonToken 41*16467b97STreehugger Robotfrom .tree import CommonTree, CommonTreeAdaptor 42*16467b97STreehugger Robot 43*16467b97STreehugger Robot 44*16467b97STreehugger Robotdef computeTokenTypes(tokenNames): 45*16467b97STreehugger Robot """ 46*16467b97STreehugger Robot Compute a dict that is an inverted index of 47*16467b97STreehugger Robot tokenNames (which maps int token types to names). 48*16467b97STreehugger Robot """ 49*16467b97STreehugger Robot 50*16467b97STreehugger Robot if tokenNames: 51*16467b97STreehugger Robot return dict((name, type) for type, name in enumerate(tokenNames)) 52*16467b97STreehugger Robot 53*16467b97STreehugger Robot return {} 54*16467b97STreehugger Robot 55*16467b97STreehugger Robot 56*16467b97STreehugger Robot## token types for pattern parser 57*16467b97STreehugger RobotEOF = -1 58*16467b97STreehugger RobotBEGIN = 1 59*16467b97STreehugger RobotEND = 2 60*16467b97STreehugger RobotID = 3 61*16467b97STreehugger RobotARG = 4 62*16467b97STreehugger RobotPERCENT = 5 63*16467b97STreehugger RobotCOLON = 6 64*16467b97STreehugger RobotDOT = 7 65*16467b97STreehugger Robot 66*16467b97STreehugger Robotclass TreePatternLexer(object): 67*16467b97STreehugger Robot def __init__(self, pattern): 68*16467b97STreehugger Robot ## The tree pattern to lex like "(A B C)" 69*16467b97STreehugger Robot self.pattern = pattern 70*16467b97STreehugger Robot 71*16467b97STreehugger Robot ## Index into input string 72*16467b97STreehugger Robot self.p = -1 73*16467b97STreehugger Robot 74*16467b97STreehugger Robot ## Current char 75*16467b97STreehugger Robot self.c = None 76*16467b97STreehugger Robot 77*16467b97STreehugger Robot ## How long is the pattern in char? 78*16467b97STreehugger Robot self.n = len(pattern) 79*16467b97STreehugger Robot 80*16467b97STreehugger Robot ## Set when token type is ID or ARG 81*16467b97STreehugger Robot self.sval = None 82*16467b97STreehugger Robot 83*16467b97STreehugger Robot self.error = False 84*16467b97STreehugger Robot 85*16467b97STreehugger Robot self.consume() 86*16467b97STreehugger Robot 87*16467b97STreehugger Robot 88*16467b97STreehugger Robot __idStartChar = frozenset( 89*16467b97STreehugger Robot 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' 90*16467b97STreehugger Robot ) 91*16467b97STreehugger Robot __idChar = __idStartChar | frozenset('0123456789') 92*16467b97STreehugger Robot 93*16467b97STreehugger Robot def nextToken(self): 94*16467b97STreehugger Robot self.sval = "" 95*16467b97STreehugger Robot while self.c != EOF: 96*16467b97STreehugger Robot if self.c in (' ', '\n', '\r', '\t'): 97*16467b97STreehugger Robot self.consume() 98*16467b97STreehugger Robot continue 99*16467b97STreehugger Robot 100*16467b97STreehugger Robot if self.c in self.__idStartChar: 101*16467b97STreehugger Robot self.sval += self.c 102*16467b97STreehugger Robot self.consume() 103*16467b97STreehugger Robot while self.c in self.__idChar: 104*16467b97STreehugger Robot self.sval += self.c 105*16467b97STreehugger Robot self.consume() 106*16467b97STreehugger Robot 107*16467b97STreehugger Robot return ID 108*16467b97STreehugger Robot 109*16467b97STreehugger Robot if self.c == '(': 110*16467b97STreehugger Robot self.consume() 111*16467b97STreehugger Robot return BEGIN 112*16467b97STreehugger Robot 113*16467b97STreehugger Robot if self.c == ')': 114*16467b97STreehugger Robot self.consume() 115*16467b97STreehugger Robot return END 116*16467b97STreehugger Robot 117*16467b97STreehugger Robot if self.c == '%': 118*16467b97STreehugger Robot self.consume() 119*16467b97STreehugger Robot return PERCENT 120*16467b97STreehugger Robot 121*16467b97STreehugger Robot if self.c == ':': 122*16467b97STreehugger Robot self.consume() 123*16467b97STreehugger Robot return COLON 124*16467b97STreehugger Robot 125*16467b97STreehugger Robot if self.c == '.': 126*16467b97STreehugger Robot self.consume() 127*16467b97STreehugger Robot return DOT 128*16467b97STreehugger Robot 129*16467b97STreehugger Robot if self.c == '[': # grab [x] as a string, returning x 130*16467b97STreehugger Robot self.consume() 131*16467b97STreehugger Robot while self.c != ']': 132*16467b97STreehugger Robot if self.c == '\\': 133*16467b97STreehugger Robot self.consume() 134*16467b97STreehugger Robot if self.c != ']': 135*16467b97STreehugger Robot self.sval += '\\' 136*16467b97STreehugger Robot 137*16467b97STreehugger Robot self.sval += self.c 138*16467b97STreehugger Robot 139*16467b97STreehugger Robot else: 140*16467b97STreehugger Robot self.sval += self.c 141*16467b97STreehugger Robot 142*16467b97STreehugger Robot self.consume() 143*16467b97STreehugger Robot 144*16467b97STreehugger Robot self.consume() 145*16467b97STreehugger Robot return ARG 146*16467b97STreehugger Robot 147*16467b97STreehugger Robot self.consume() 148*16467b97STreehugger Robot self.error = True 149*16467b97STreehugger Robot return EOF 150*16467b97STreehugger Robot 151*16467b97STreehugger Robot return EOF 152*16467b97STreehugger Robot 153*16467b97STreehugger Robot 154*16467b97STreehugger Robot def consume(self): 155*16467b97STreehugger Robot self.p += 1 156*16467b97STreehugger Robot if self.p >= self.n: 157*16467b97STreehugger Robot self.c = EOF 158*16467b97STreehugger Robot 159*16467b97STreehugger Robot else: 160*16467b97STreehugger Robot self.c = self.pattern[self.p] 161*16467b97STreehugger Robot 162*16467b97STreehugger Robot 163*16467b97STreehugger Robotclass TreePatternParser(object): 164*16467b97STreehugger Robot def __init__(self, tokenizer, wizard, adaptor): 165*16467b97STreehugger Robot self.tokenizer = tokenizer 166*16467b97STreehugger Robot self.wizard = wizard 167*16467b97STreehugger Robot self.adaptor = adaptor 168*16467b97STreehugger Robot self.ttype = tokenizer.nextToken() # kickstart 169*16467b97STreehugger Robot 170*16467b97STreehugger Robot 171*16467b97STreehugger Robot def pattern(self): 172*16467b97STreehugger Robot if self.ttype == BEGIN: 173*16467b97STreehugger Robot return self.parseTree() 174*16467b97STreehugger Robot 175*16467b97STreehugger Robot elif self.ttype == ID: 176*16467b97STreehugger Robot node = self.parseNode() 177*16467b97STreehugger Robot if self.ttype == EOF: 178*16467b97STreehugger Robot return node 179*16467b97STreehugger Robot 180*16467b97STreehugger Robot return None # extra junk on end 181*16467b97STreehugger Robot 182*16467b97STreehugger Robot return None 183*16467b97STreehugger Robot 184*16467b97STreehugger Robot 185*16467b97STreehugger Robot def parseTree(self): 186*16467b97STreehugger Robot if self.ttype != BEGIN: 187*16467b97STreehugger Robot return None 188*16467b97STreehugger Robot 189*16467b97STreehugger Robot self.ttype = self.tokenizer.nextToken() 190*16467b97STreehugger Robot root = self.parseNode() 191*16467b97STreehugger Robot if root is None: 192*16467b97STreehugger Robot return None 193*16467b97STreehugger Robot 194*16467b97STreehugger Robot while self.ttype in (BEGIN, ID, PERCENT, DOT): 195*16467b97STreehugger Robot if self.ttype == BEGIN: 196*16467b97STreehugger Robot subtree = self.parseTree() 197*16467b97STreehugger Robot self.adaptor.addChild(root, subtree) 198*16467b97STreehugger Robot 199*16467b97STreehugger Robot else: 200*16467b97STreehugger Robot child = self.parseNode() 201*16467b97STreehugger Robot if child is None: 202*16467b97STreehugger Robot return None 203*16467b97STreehugger Robot 204*16467b97STreehugger Robot self.adaptor.addChild(root, child) 205*16467b97STreehugger Robot 206*16467b97STreehugger Robot if self.ttype != END: 207*16467b97STreehugger Robot return None 208*16467b97STreehugger Robot 209*16467b97STreehugger Robot self.ttype = self.tokenizer.nextToken() 210*16467b97STreehugger Robot return root 211*16467b97STreehugger Robot 212*16467b97STreehugger Robot 213*16467b97STreehugger Robot def parseNode(self): 214*16467b97STreehugger Robot # "%label:" prefix 215*16467b97STreehugger Robot label = None 216*16467b97STreehugger Robot 217*16467b97STreehugger Robot if self.ttype == PERCENT: 218*16467b97STreehugger Robot self.ttype = self.tokenizer.nextToken() 219*16467b97STreehugger Robot if self.ttype != ID: 220*16467b97STreehugger Robot return None 221*16467b97STreehugger Robot 222*16467b97STreehugger Robot label = self.tokenizer.sval 223*16467b97STreehugger Robot self.ttype = self.tokenizer.nextToken() 224*16467b97STreehugger Robot if self.ttype != COLON: 225*16467b97STreehugger Robot return None 226*16467b97STreehugger Robot 227*16467b97STreehugger Robot self.ttype = self.tokenizer.nextToken() # move to ID following colon 228*16467b97STreehugger Robot 229*16467b97STreehugger Robot # Wildcard? 230*16467b97STreehugger Robot if self.ttype == DOT: 231*16467b97STreehugger Robot self.ttype = self.tokenizer.nextToken() 232*16467b97STreehugger Robot wildcardPayload = CommonToken(0, ".") 233*16467b97STreehugger Robot node = WildcardTreePattern(wildcardPayload) 234*16467b97STreehugger Robot if label is not None: 235*16467b97STreehugger Robot node.label = label 236*16467b97STreehugger Robot return node 237*16467b97STreehugger Robot 238*16467b97STreehugger Robot # "ID" or "ID[arg]" 239*16467b97STreehugger Robot if self.ttype != ID: 240*16467b97STreehugger Robot return None 241*16467b97STreehugger Robot 242*16467b97STreehugger Robot tokenName = self.tokenizer.sval 243*16467b97STreehugger Robot self.ttype = self.tokenizer.nextToken() 244*16467b97STreehugger Robot 245*16467b97STreehugger Robot if tokenName == "nil": 246*16467b97STreehugger Robot return self.adaptor.nil() 247*16467b97STreehugger Robot 248*16467b97STreehugger Robot text = tokenName 249*16467b97STreehugger Robot # check for arg 250*16467b97STreehugger Robot arg = None 251*16467b97STreehugger Robot if self.ttype == ARG: 252*16467b97STreehugger Robot arg = self.tokenizer.sval 253*16467b97STreehugger Robot text = arg 254*16467b97STreehugger Robot self.ttype = self.tokenizer.nextToken() 255*16467b97STreehugger Robot 256*16467b97STreehugger Robot # create node 257*16467b97STreehugger Robot treeNodeType = self.wizard.getTokenType(tokenName) 258*16467b97STreehugger Robot if treeNodeType == INVALID_TOKEN_TYPE: 259*16467b97STreehugger Robot return None 260*16467b97STreehugger Robot 261*16467b97STreehugger Robot node = self.adaptor.createFromType(treeNodeType, text) 262*16467b97STreehugger Robot if label is not None and isinstance(node, TreePattern): 263*16467b97STreehugger Robot node.label = label 264*16467b97STreehugger Robot 265*16467b97STreehugger Robot if arg is not None and isinstance(node, TreePattern): 266*16467b97STreehugger Robot node.hasTextArg = True 267*16467b97STreehugger Robot 268*16467b97STreehugger Robot return node 269*16467b97STreehugger Robot 270*16467b97STreehugger Robot 271*16467b97STreehugger Robotclass TreePattern(CommonTree): 272*16467b97STreehugger Robot """ 273*16467b97STreehugger Robot When using %label:TOKENNAME in a tree for parse(), we must 274*16467b97STreehugger Robot track the label. 275*16467b97STreehugger Robot """ 276*16467b97STreehugger Robot 277*16467b97STreehugger Robot def __init__(self, payload): 278*16467b97STreehugger Robot super().__init__(payload) 279*16467b97STreehugger Robot 280*16467b97STreehugger Robot self.label = None 281*16467b97STreehugger Robot self.hasTextArg = None 282*16467b97STreehugger Robot 283*16467b97STreehugger Robot 284*16467b97STreehugger Robot def toString(self): 285*16467b97STreehugger Robot if self.label: 286*16467b97STreehugger Robot return '%' + self.label + ':' + super().toString() 287*16467b97STreehugger Robot 288*16467b97STreehugger Robot else: 289*16467b97STreehugger Robot return super().toString() 290*16467b97STreehugger Robot 291*16467b97STreehugger Robot 292*16467b97STreehugger Robotclass WildcardTreePattern(TreePattern): 293*16467b97STreehugger Robot pass 294*16467b97STreehugger Robot 295*16467b97STreehugger Robot 296*16467b97STreehugger Robotclass TreePatternTreeAdaptor(CommonTreeAdaptor): 297*16467b97STreehugger Robot """This adaptor creates TreePattern objects for use during scan()""" 298*16467b97STreehugger Robot 299*16467b97STreehugger Robot def createWithPayload(self, payload): 300*16467b97STreehugger Robot return TreePattern(payload) 301*16467b97STreehugger Robot 302*16467b97STreehugger Robot 303*16467b97STreehugger Robotclass TreeWizard(object): 304*16467b97STreehugger Robot """ 305*16467b97STreehugger Robot Build and navigate trees with this object. Must know about the names 306*16467b97STreehugger Robot of tokens so you have to pass in a map or array of token names (from which 307*16467b97STreehugger Robot this class can build the map). I.e., Token DECL means nothing unless the 308*16467b97STreehugger Robot class can translate it to a token type. 309*16467b97STreehugger Robot 310*16467b97STreehugger Robot In order to create nodes and navigate, this class needs a TreeAdaptor. 311*16467b97STreehugger Robot 312*16467b97STreehugger Robot This class can build a token type -> node index for repeated use or for 313*16467b97STreehugger Robot iterating over the various nodes with a particular type. 314*16467b97STreehugger Robot 315*16467b97STreehugger Robot This class works in conjunction with the TreeAdaptor rather than moving 316*16467b97STreehugger Robot all this functionality into the adaptor. An adaptor helps build and 317*16467b97STreehugger Robot navigate trees using methods. This class helps you do it with string 318*16467b97STreehugger Robot patterns like "(A B C)". You can create a tree from that pattern or 319*16467b97STreehugger Robot match subtrees against it. 320*16467b97STreehugger Robot """ 321*16467b97STreehugger Robot 322*16467b97STreehugger Robot def __init__(self, adaptor=None, tokenNames=None, typeMap=None): 323*16467b97STreehugger Robot if adaptor is None: 324*16467b97STreehugger Robot self.adaptor = CommonTreeAdaptor() 325*16467b97STreehugger Robot 326*16467b97STreehugger Robot else: 327*16467b97STreehugger Robot self.adaptor = adaptor 328*16467b97STreehugger Robot 329*16467b97STreehugger Robot if typeMap is None: 330*16467b97STreehugger Robot self.tokenNameToTypeMap = computeTokenTypes(tokenNames) 331*16467b97STreehugger Robot 332*16467b97STreehugger Robot else: 333*16467b97STreehugger Robot if tokenNames: 334*16467b97STreehugger Robot raise ValueError("Can't have both tokenNames and typeMap") 335*16467b97STreehugger Robot 336*16467b97STreehugger Robot self.tokenNameToTypeMap = typeMap 337*16467b97STreehugger Robot 338*16467b97STreehugger Robot 339*16467b97STreehugger Robot def getTokenType(self, tokenName): 340*16467b97STreehugger Robot """Using the map of token names to token types, return the type.""" 341*16467b97STreehugger Robot 342*16467b97STreehugger Robot if tokenName in self.tokenNameToTypeMap: 343*16467b97STreehugger Robot return self.tokenNameToTypeMap[tokenName] 344*16467b97STreehugger Robot else: 345*16467b97STreehugger Robot return INVALID_TOKEN_TYPE 346*16467b97STreehugger Robot 347*16467b97STreehugger Robot 348*16467b97STreehugger Robot def create(self, pattern): 349*16467b97STreehugger Robot """ 350*16467b97STreehugger Robot Create a tree or node from the indicated tree pattern that closely 351*16467b97STreehugger Robot follows ANTLR tree grammar tree element syntax: 352*16467b97STreehugger Robot 353*16467b97STreehugger Robot (root child1 ... child2). 354*16467b97STreehugger Robot 355*16467b97STreehugger Robot You can also just pass in a node: ID 356*16467b97STreehugger Robot 357*16467b97STreehugger Robot Any node can have a text argument: ID[foo] 358*16467b97STreehugger Robot (notice there are no quotes around foo--it's clear it's a string). 359*16467b97STreehugger Robot 360*16467b97STreehugger Robot nil is a special name meaning "give me a nil node". Useful for 361*16467b97STreehugger Robot making lists: (nil A B C) is a list of A B C. 362*16467b97STreehugger Robot """ 363*16467b97STreehugger Robot 364*16467b97STreehugger Robot tokenizer = TreePatternLexer(pattern) 365*16467b97STreehugger Robot parser = TreePatternParser(tokenizer, self, self.adaptor) 366*16467b97STreehugger Robot return parser.pattern() 367*16467b97STreehugger Robot 368*16467b97STreehugger Robot 369*16467b97STreehugger Robot def index(self, tree): 370*16467b97STreehugger Robot """Walk the entire tree and make a node name to nodes mapping. 371*16467b97STreehugger Robot 372*16467b97STreehugger Robot For now, use recursion but later nonrecursive version may be 373*16467b97STreehugger Robot more efficient. Returns a dict int -> list where the list is 374*16467b97STreehugger Robot of your AST node type. The int is the token type of the node. 375*16467b97STreehugger Robot """ 376*16467b97STreehugger Robot 377*16467b97STreehugger Robot m = {} 378*16467b97STreehugger Robot self._index(tree, m) 379*16467b97STreehugger Robot return m 380*16467b97STreehugger Robot 381*16467b97STreehugger Robot 382*16467b97STreehugger Robot def _index(self, t, m): 383*16467b97STreehugger Robot """Do the work for index""" 384*16467b97STreehugger Robot 385*16467b97STreehugger Robot if t is None: 386*16467b97STreehugger Robot return 387*16467b97STreehugger Robot 388*16467b97STreehugger Robot ttype = self.adaptor.getType(t) 389*16467b97STreehugger Robot elements = m.get(ttype) 390*16467b97STreehugger Robot if elements is None: 391*16467b97STreehugger Robot m[ttype] = elements = [] 392*16467b97STreehugger Robot 393*16467b97STreehugger Robot elements.append(t) 394*16467b97STreehugger Robot for i in range(self.adaptor.getChildCount(t)): 395*16467b97STreehugger Robot child = self.adaptor.getChild(t, i) 396*16467b97STreehugger Robot self._index(child, m) 397*16467b97STreehugger Robot 398*16467b97STreehugger Robot 399*16467b97STreehugger Robot def find(self, tree, what): 400*16467b97STreehugger Robot """Return a list of matching token. 401*16467b97STreehugger Robot 402*16467b97STreehugger Robot what may either be an integer specifzing the token type to find or 403*16467b97STreehugger Robot a string with a pattern that must be matched. 404*16467b97STreehugger Robot 405*16467b97STreehugger Robot """ 406*16467b97STreehugger Robot 407*16467b97STreehugger Robot if isinstance(what, int): 408*16467b97STreehugger Robot return self._findTokenType(tree, what) 409*16467b97STreehugger Robot 410*16467b97STreehugger Robot elif isinstance(what, str): 411*16467b97STreehugger Robot return self._findPattern(tree, what) 412*16467b97STreehugger Robot 413*16467b97STreehugger Robot else: 414*16467b97STreehugger Robot raise TypeError("'what' must be string or integer") 415*16467b97STreehugger Robot 416*16467b97STreehugger Robot 417*16467b97STreehugger Robot def _findTokenType(self, t, ttype): 418*16467b97STreehugger Robot """Return a List of tree nodes with token type ttype""" 419*16467b97STreehugger Robot 420*16467b97STreehugger Robot nodes = [] 421*16467b97STreehugger Robot 422*16467b97STreehugger Robot def visitor(tree, parent, childIndex, labels): 423*16467b97STreehugger Robot nodes.append(tree) 424*16467b97STreehugger Robot 425*16467b97STreehugger Robot self.visit(t, ttype, visitor) 426*16467b97STreehugger Robot 427*16467b97STreehugger Robot return nodes 428*16467b97STreehugger Robot 429*16467b97STreehugger Robot 430*16467b97STreehugger Robot def _findPattern(self, t, pattern): 431*16467b97STreehugger Robot """Return a List of subtrees matching pattern.""" 432*16467b97STreehugger Robot 433*16467b97STreehugger Robot subtrees = [] 434*16467b97STreehugger Robot 435*16467b97STreehugger Robot # Create a TreePattern from the pattern 436*16467b97STreehugger Robot tokenizer = TreePatternLexer(pattern) 437*16467b97STreehugger Robot parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor()) 438*16467b97STreehugger Robot tpattern = parser.pattern() 439*16467b97STreehugger Robot 440*16467b97STreehugger Robot # don't allow invalid patterns 441*16467b97STreehugger Robot if (tpattern is None or tpattern.isNil() 442*16467b97STreehugger Robot or isinstance(tpattern, WildcardTreePattern)): 443*16467b97STreehugger Robot return None 444*16467b97STreehugger Robot 445*16467b97STreehugger Robot rootTokenType = tpattern.getType() 446*16467b97STreehugger Robot 447*16467b97STreehugger Robot def visitor(tree, parent, childIndex, label): 448*16467b97STreehugger Robot if self._parse(tree, tpattern, None): 449*16467b97STreehugger Robot subtrees.append(tree) 450*16467b97STreehugger Robot 451*16467b97STreehugger Robot self.visit(t, rootTokenType, visitor) 452*16467b97STreehugger Robot 453*16467b97STreehugger Robot return subtrees 454*16467b97STreehugger Robot 455*16467b97STreehugger Robot 456*16467b97STreehugger Robot def visit(self, tree, what, visitor): 457*16467b97STreehugger Robot """Visit every node in tree matching what, invoking the visitor. 458*16467b97STreehugger Robot 459*16467b97STreehugger Robot If what is a string, it is parsed as a pattern and only matching 460*16467b97STreehugger Robot subtrees will be visited. 461*16467b97STreehugger Robot The implementation uses the root node of the pattern in combination 462*16467b97STreehugger Robot with visit(t, ttype, visitor) so nil-rooted patterns are not allowed. 463*16467b97STreehugger Robot Patterns with wildcard roots are also not allowed. 464*16467b97STreehugger Robot 465*16467b97STreehugger Robot If what is an integer, it is used as a token type and visit will match 466*16467b97STreehugger Robot all nodes of that type (this is faster than the pattern match). 467*16467b97STreehugger Robot The labels arg of the visitor action method is never set (it's None) 468*16467b97STreehugger Robot since using a token type rather than a pattern doesn't let us set a 469*16467b97STreehugger Robot label. 470*16467b97STreehugger Robot """ 471*16467b97STreehugger Robot 472*16467b97STreehugger Robot if isinstance(what, int): 473*16467b97STreehugger Robot self._visitType(tree, None, 0, what, visitor) 474*16467b97STreehugger Robot 475*16467b97STreehugger Robot elif isinstance(what, str): 476*16467b97STreehugger Robot self._visitPattern(tree, what, visitor) 477*16467b97STreehugger Robot 478*16467b97STreehugger Robot else: 479*16467b97STreehugger Robot raise TypeError("'what' must be string or integer") 480*16467b97STreehugger Robot 481*16467b97STreehugger Robot 482*16467b97STreehugger Robot def _visitType(self, t, parent, childIndex, ttype, visitor): 483*16467b97STreehugger Robot """Do the recursive work for visit""" 484*16467b97STreehugger Robot 485*16467b97STreehugger Robot if t is None: 486*16467b97STreehugger Robot return 487*16467b97STreehugger Robot 488*16467b97STreehugger Robot if self.adaptor.getType(t) == ttype: 489*16467b97STreehugger Robot visitor(t, parent, childIndex, None) 490*16467b97STreehugger Robot 491*16467b97STreehugger Robot for i in range(self.adaptor.getChildCount(t)): 492*16467b97STreehugger Robot child = self.adaptor.getChild(t, i) 493*16467b97STreehugger Robot self._visitType(child, t, i, ttype, visitor) 494*16467b97STreehugger Robot 495*16467b97STreehugger Robot 496*16467b97STreehugger Robot def _visitPattern(self, tree, pattern, visitor): 497*16467b97STreehugger Robot """ 498*16467b97STreehugger Robot For all subtrees that match the pattern, execute the visit action. 499*16467b97STreehugger Robot """ 500*16467b97STreehugger Robot 501*16467b97STreehugger Robot # Create a TreePattern from the pattern 502*16467b97STreehugger Robot tokenizer = TreePatternLexer(pattern) 503*16467b97STreehugger Robot parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor()) 504*16467b97STreehugger Robot tpattern = parser.pattern() 505*16467b97STreehugger Robot 506*16467b97STreehugger Robot # don't allow invalid patterns 507*16467b97STreehugger Robot if (tpattern is None or tpattern.isNil() 508*16467b97STreehugger Robot or isinstance(tpattern, WildcardTreePattern)): 509*16467b97STreehugger Robot return 510*16467b97STreehugger Robot 511*16467b97STreehugger Robot rootTokenType = tpattern.getType() 512*16467b97STreehugger Robot 513*16467b97STreehugger Robot def rootvisitor(tree, parent, childIndex, labels): 514*16467b97STreehugger Robot labels = {} 515*16467b97STreehugger Robot if self._parse(tree, tpattern, labels): 516*16467b97STreehugger Robot visitor(tree, parent, childIndex, labels) 517*16467b97STreehugger Robot 518*16467b97STreehugger Robot self.visit(tree, rootTokenType, rootvisitor) 519*16467b97STreehugger Robot 520*16467b97STreehugger Robot 521*16467b97STreehugger Robot def parse(self, t, pattern, labels=None): 522*16467b97STreehugger Robot """ 523*16467b97STreehugger Robot Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels 524*16467b97STreehugger Robot on the various nodes and '.' (dot) as the node/subtree wildcard, 525*16467b97STreehugger Robot return true if the pattern matches and fill the labels Map with 526*16467b97STreehugger Robot the labels pointing at the appropriate nodes. Return false if 527*16467b97STreehugger Robot the pattern is malformed or the tree does not match. 528*16467b97STreehugger Robot 529*16467b97STreehugger Robot If a node specifies a text arg in pattern, then that must match 530*16467b97STreehugger Robot for that node in t. 531*16467b97STreehugger Robot """ 532*16467b97STreehugger Robot 533*16467b97STreehugger Robot tokenizer = TreePatternLexer(pattern) 534*16467b97STreehugger Robot parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor()) 535*16467b97STreehugger Robot tpattern = parser.pattern() 536*16467b97STreehugger Robot 537*16467b97STreehugger Robot return self._parse(t, tpattern, labels) 538*16467b97STreehugger Robot 539*16467b97STreehugger Robot 540*16467b97STreehugger Robot def _parse(self, t1, tpattern, labels): 541*16467b97STreehugger Robot """ 542*16467b97STreehugger Robot Do the work for parse. Check to see if the tpattern fits the 543*16467b97STreehugger Robot structure and token types in t1. Check text if the pattern has 544*16467b97STreehugger Robot text arguments on nodes. Fill labels map with pointers to nodes 545*16467b97STreehugger Robot in tree matched against nodes in pattern with labels. 546*16467b97STreehugger Robot """ 547*16467b97STreehugger Robot 548*16467b97STreehugger Robot # make sure both are non-null 549*16467b97STreehugger Robot if t1 is None or tpattern is None: 550*16467b97STreehugger Robot return False 551*16467b97STreehugger Robot 552*16467b97STreehugger Robot # check roots (wildcard matches anything) 553*16467b97STreehugger Robot if not isinstance(tpattern, WildcardTreePattern): 554*16467b97STreehugger Robot if self.adaptor.getType(t1) != tpattern.getType(): 555*16467b97STreehugger Robot return False 556*16467b97STreehugger Robot 557*16467b97STreehugger Robot # if pattern has text, check node text 558*16467b97STreehugger Robot if (tpattern.hasTextArg 559*16467b97STreehugger Robot and self.adaptor.getText(t1) != tpattern.getText()): 560*16467b97STreehugger Robot return False 561*16467b97STreehugger Robot 562*16467b97STreehugger Robot if tpattern.label is not None and labels is not None: 563*16467b97STreehugger Robot # map label in pattern to node in t1 564*16467b97STreehugger Robot labels[tpattern.label] = t1 565*16467b97STreehugger Robot 566*16467b97STreehugger Robot # check children 567*16467b97STreehugger Robot n1 = self.adaptor.getChildCount(t1) 568*16467b97STreehugger Robot n2 = tpattern.getChildCount() 569*16467b97STreehugger Robot if n1 != n2: 570*16467b97STreehugger Robot return False 571*16467b97STreehugger Robot 572*16467b97STreehugger Robot for i in range(n1): 573*16467b97STreehugger Robot child1 = self.adaptor.getChild(t1, i) 574*16467b97STreehugger Robot child2 = tpattern.getChild(i) 575*16467b97STreehugger Robot if not self._parse(child1, child2, labels): 576*16467b97STreehugger Robot return False 577*16467b97STreehugger Robot 578*16467b97STreehugger Robot return True 579*16467b97STreehugger Robot 580*16467b97STreehugger Robot 581*16467b97STreehugger Robot def equals(self, t1, t2, adaptor=None): 582*16467b97STreehugger Robot """ 583*16467b97STreehugger Robot Compare t1 and t2; return true if token types/text, structure match 584*16467b97STreehugger Robot exactly. 585*16467b97STreehugger Robot The trees are examined in their entirety so that (A B) does not match 586*16467b97STreehugger Robot (A B C) nor (A (B C)). 587*16467b97STreehugger Robot """ 588*16467b97STreehugger Robot 589*16467b97STreehugger Robot if adaptor is None: 590*16467b97STreehugger Robot adaptor = self.adaptor 591*16467b97STreehugger Robot 592*16467b97STreehugger Robot return self._equals(t1, t2, adaptor) 593*16467b97STreehugger Robot 594*16467b97STreehugger Robot 595*16467b97STreehugger Robot def _equals(self, t1, t2, adaptor): 596*16467b97STreehugger Robot # make sure both are non-null 597*16467b97STreehugger Robot if t1 is None or t2 is None: 598*16467b97STreehugger Robot return False 599*16467b97STreehugger Robot 600*16467b97STreehugger Robot # check roots 601*16467b97STreehugger Robot if adaptor.getType(t1) != adaptor.getType(t2): 602*16467b97STreehugger Robot return False 603*16467b97STreehugger Robot 604*16467b97STreehugger Robot if adaptor.getText(t1) != adaptor.getText(t2): 605*16467b97STreehugger Robot return False 606*16467b97STreehugger Robot 607*16467b97STreehugger Robot # check children 608*16467b97STreehugger Robot n1 = adaptor.getChildCount(t1) 609*16467b97STreehugger Robot n2 = adaptor.getChildCount(t2) 610*16467b97STreehugger Robot if n1 != n2: 611*16467b97STreehugger Robot return False 612*16467b97STreehugger Robot 613*16467b97STreehugger Robot for i in range(n1): 614*16467b97STreehugger Robot child1 = adaptor.getChild(t1, i) 615*16467b97STreehugger Robot child2 = adaptor.getChild(t2, i) 616*16467b97STreehugger Robot if not self._equals(child1, child2, adaptor): 617*16467b97STreehugger Robot return False 618*16467b97STreehugger Robot 619*16467b97STreehugger Robot return True 620