xref: /aosp_15_r20/external/antlr/runtime/Python3/antlr3/treewizard.py (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot""" @package antlr3.tree
2*16467b97STreehugger Robot@brief ANTLR3 runtime package, treewizard module
3*16467b97STreehugger Robot
4*16467b97STreehugger RobotA utility module to create ASTs at runtime.
5*16467b97STreehugger RobotSee <http://www.antlr.org/wiki/display/~admin/2007/07/02/Exploring+Concept+of+TreeWizard> for an overview. Note that the API of the Python implementation is slightly different.
6*16467b97STreehugger Robot
7*16467b97STreehugger Robot"""
8*16467b97STreehugger Robot
9*16467b97STreehugger Robot# begin[licence]
10*16467b97STreehugger Robot#
11*16467b97STreehugger Robot# [The "BSD licence"]
12*16467b97STreehugger Robot# Copyright (c) 2005-2012 Terence Parr
13*16467b97STreehugger Robot# All rights reserved.
14*16467b97STreehugger Robot#
15*16467b97STreehugger Robot# Redistribution and use in source and binary forms, with or without
16*16467b97STreehugger Robot# modification, are permitted provided that the following conditions
17*16467b97STreehugger Robot# are met:
18*16467b97STreehugger Robot# 1. Redistributions of source code must retain the above copyright
19*16467b97STreehugger Robot#    notice, this list of conditions and the following disclaimer.
20*16467b97STreehugger Robot# 2. Redistributions in binary form must reproduce the above copyright
21*16467b97STreehugger Robot#    notice, this list of conditions and the following disclaimer in the
22*16467b97STreehugger Robot#    documentation and/or other materials provided with the distribution.
23*16467b97STreehugger Robot# 3. The name of the author may not be used to endorse or promote products
24*16467b97STreehugger Robot#    derived from this software without specific prior written permission.
25*16467b97STreehugger Robot#
26*16467b97STreehugger Robot# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
27*16467b97STreehugger Robot# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
28*16467b97STreehugger Robot# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29*16467b97STreehugger Robot# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
30*16467b97STreehugger Robot# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
31*16467b97STreehugger Robot# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32*16467b97STreehugger Robot# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33*16467b97STreehugger Robot# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34*16467b97STreehugger Robot# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
35*16467b97STreehugger Robot# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36*16467b97STreehugger Robot#
37*16467b97STreehugger Robot# end[licence]
38*16467b97STreehugger Robot
39*16467b97STreehugger Robotfrom .constants import INVALID_TOKEN_TYPE
40*16467b97STreehugger Robotfrom .tokens import CommonToken
41*16467b97STreehugger Robotfrom .tree import CommonTree, CommonTreeAdaptor
42*16467b97STreehugger Robot
43*16467b97STreehugger Robot
44*16467b97STreehugger Robotdef computeTokenTypes(tokenNames):
45*16467b97STreehugger Robot    """
46*16467b97STreehugger Robot    Compute a dict that is an inverted index of
47*16467b97STreehugger Robot    tokenNames (which maps int token types to names).
48*16467b97STreehugger Robot    """
49*16467b97STreehugger Robot
50*16467b97STreehugger Robot    if tokenNames:
51*16467b97STreehugger Robot        return dict((name, type) for type, name in enumerate(tokenNames))
52*16467b97STreehugger Robot
53*16467b97STreehugger Robot    return {}
54*16467b97STreehugger Robot
55*16467b97STreehugger Robot
56*16467b97STreehugger Robot## token types for pattern parser
57*16467b97STreehugger RobotEOF = -1
58*16467b97STreehugger RobotBEGIN = 1
59*16467b97STreehugger RobotEND = 2
60*16467b97STreehugger RobotID = 3
61*16467b97STreehugger RobotARG = 4
62*16467b97STreehugger RobotPERCENT = 5
63*16467b97STreehugger RobotCOLON = 6
64*16467b97STreehugger RobotDOT = 7
65*16467b97STreehugger Robot
66*16467b97STreehugger Robotclass TreePatternLexer(object):
67*16467b97STreehugger Robot    def __init__(self, pattern):
68*16467b97STreehugger Robot        ## The tree pattern to lex like "(A B C)"
69*16467b97STreehugger Robot        self.pattern = pattern
70*16467b97STreehugger Robot
71*16467b97STreehugger Robot	## Index into input string
72*16467b97STreehugger Robot        self.p = -1
73*16467b97STreehugger Robot
74*16467b97STreehugger Robot	## Current char
75*16467b97STreehugger Robot        self.c = None
76*16467b97STreehugger Robot
77*16467b97STreehugger Robot	## How long is the pattern in char?
78*16467b97STreehugger Robot        self.n = len(pattern)
79*16467b97STreehugger Robot
80*16467b97STreehugger Robot	## Set when token type is ID or ARG
81*16467b97STreehugger Robot        self.sval = None
82*16467b97STreehugger Robot
83*16467b97STreehugger Robot        self.error = False
84*16467b97STreehugger Robot
85*16467b97STreehugger Robot        self.consume()
86*16467b97STreehugger Robot
87*16467b97STreehugger Robot
88*16467b97STreehugger Robot    __idStartChar = frozenset(
89*16467b97STreehugger Robot        'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
90*16467b97STreehugger Robot        )
91*16467b97STreehugger Robot    __idChar = __idStartChar | frozenset('0123456789')
92*16467b97STreehugger Robot
93*16467b97STreehugger Robot    def nextToken(self):
94*16467b97STreehugger Robot        self.sval = ""
95*16467b97STreehugger Robot        while self.c != EOF:
96*16467b97STreehugger Robot            if self.c in (' ', '\n', '\r', '\t'):
97*16467b97STreehugger Robot                self.consume()
98*16467b97STreehugger Robot                continue
99*16467b97STreehugger Robot
100*16467b97STreehugger Robot            if self.c in self.__idStartChar:
101*16467b97STreehugger Robot                self.sval += self.c
102*16467b97STreehugger Robot                self.consume()
103*16467b97STreehugger Robot                while self.c in self.__idChar:
104*16467b97STreehugger Robot                    self.sval += self.c
105*16467b97STreehugger Robot                    self.consume()
106*16467b97STreehugger Robot
107*16467b97STreehugger Robot                return ID
108*16467b97STreehugger Robot
109*16467b97STreehugger Robot            if self.c == '(':
110*16467b97STreehugger Robot                self.consume()
111*16467b97STreehugger Robot                return BEGIN
112*16467b97STreehugger Robot
113*16467b97STreehugger Robot            if self.c == ')':
114*16467b97STreehugger Robot                self.consume()
115*16467b97STreehugger Robot                return END
116*16467b97STreehugger Robot
117*16467b97STreehugger Robot            if self.c == '%':
118*16467b97STreehugger Robot                self.consume()
119*16467b97STreehugger Robot                return PERCENT
120*16467b97STreehugger Robot
121*16467b97STreehugger Robot            if self.c == ':':
122*16467b97STreehugger Robot                self.consume()
123*16467b97STreehugger Robot                return COLON
124*16467b97STreehugger Robot
125*16467b97STreehugger Robot            if self.c == '.':
126*16467b97STreehugger Robot                self.consume()
127*16467b97STreehugger Robot                return DOT
128*16467b97STreehugger Robot
129*16467b97STreehugger Robot            if self.c == '[': # grab [x] as a string, returning x
130*16467b97STreehugger Robot                self.consume()
131*16467b97STreehugger Robot                while self.c != ']':
132*16467b97STreehugger Robot                    if self.c == '\\':
133*16467b97STreehugger Robot                        self.consume()
134*16467b97STreehugger Robot                        if self.c != ']':
135*16467b97STreehugger Robot                            self.sval += '\\'
136*16467b97STreehugger Robot
137*16467b97STreehugger Robot                        self.sval += self.c
138*16467b97STreehugger Robot
139*16467b97STreehugger Robot                    else:
140*16467b97STreehugger Robot                        self.sval += self.c
141*16467b97STreehugger Robot
142*16467b97STreehugger Robot                    self.consume()
143*16467b97STreehugger Robot
144*16467b97STreehugger Robot                self.consume()
145*16467b97STreehugger Robot                return ARG
146*16467b97STreehugger Robot
147*16467b97STreehugger Robot            self.consume()
148*16467b97STreehugger Robot            self.error = True
149*16467b97STreehugger Robot            return EOF
150*16467b97STreehugger Robot
151*16467b97STreehugger Robot        return EOF
152*16467b97STreehugger Robot
153*16467b97STreehugger Robot
154*16467b97STreehugger Robot    def consume(self):
155*16467b97STreehugger Robot        self.p += 1
156*16467b97STreehugger Robot        if self.p >= self.n:
157*16467b97STreehugger Robot            self.c = EOF
158*16467b97STreehugger Robot
159*16467b97STreehugger Robot        else:
160*16467b97STreehugger Robot            self.c = self.pattern[self.p]
161*16467b97STreehugger Robot
162*16467b97STreehugger Robot
163*16467b97STreehugger Robotclass TreePatternParser(object):
164*16467b97STreehugger Robot    def __init__(self, tokenizer, wizard, adaptor):
165*16467b97STreehugger Robot        self.tokenizer = tokenizer
166*16467b97STreehugger Robot        self.wizard = wizard
167*16467b97STreehugger Robot        self.adaptor = adaptor
168*16467b97STreehugger Robot        self.ttype = tokenizer.nextToken() # kickstart
169*16467b97STreehugger Robot
170*16467b97STreehugger Robot
171*16467b97STreehugger Robot    def pattern(self):
172*16467b97STreehugger Robot        if self.ttype == BEGIN:
173*16467b97STreehugger Robot            return self.parseTree()
174*16467b97STreehugger Robot
175*16467b97STreehugger Robot        elif self.ttype == ID:
176*16467b97STreehugger Robot            node = self.parseNode()
177*16467b97STreehugger Robot            if self.ttype == EOF:
178*16467b97STreehugger Robot                return node
179*16467b97STreehugger Robot
180*16467b97STreehugger Robot            return None # extra junk on end
181*16467b97STreehugger Robot
182*16467b97STreehugger Robot        return None
183*16467b97STreehugger Robot
184*16467b97STreehugger Robot
185*16467b97STreehugger Robot    def parseTree(self):
186*16467b97STreehugger Robot        if self.ttype != BEGIN:
187*16467b97STreehugger Robot            return None
188*16467b97STreehugger Robot
189*16467b97STreehugger Robot        self.ttype = self.tokenizer.nextToken()
190*16467b97STreehugger Robot        root = self.parseNode()
191*16467b97STreehugger Robot        if root is None:
192*16467b97STreehugger Robot            return None
193*16467b97STreehugger Robot
194*16467b97STreehugger Robot        while self.ttype in (BEGIN, ID, PERCENT, DOT):
195*16467b97STreehugger Robot            if self.ttype == BEGIN:
196*16467b97STreehugger Robot                subtree = self.parseTree()
197*16467b97STreehugger Robot                self.adaptor.addChild(root, subtree)
198*16467b97STreehugger Robot
199*16467b97STreehugger Robot            else:
200*16467b97STreehugger Robot                child = self.parseNode()
201*16467b97STreehugger Robot                if child is None:
202*16467b97STreehugger Robot                    return None
203*16467b97STreehugger Robot
204*16467b97STreehugger Robot                self.adaptor.addChild(root, child)
205*16467b97STreehugger Robot
206*16467b97STreehugger Robot        if self.ttype != END:
207*16467b97STreehugger Robot            return None
208*16467b97STreehugger Robot
209*16467b97STreehugger Robot        self.ttype = self.tokenizer.nextToken()
210*16467b97STreehugger Robot        return root
211*16467b97STreehugger Robot
212*16467b97STreehugger Robot
213*16467b97STreehugger Robot    def parseNode(self):
214*16467b97STreehugger Robot        # "%label:" prefix
215*16467b97STreehugger Robot        label = None
216*16467b97STreehugger Robot
217*16467b97STreehugger Robot        if self.ttype == PERCENT:
218*16467b97STreehugger Robot            self.ttype = self.tokenizer.nextToken()
219*16467b97STreehugger Robot            if self.ttype != ID:
220*16467b97STreehugger Robot                return None
221*16467b97STreehugger Robot
222*16467b97STreehugger Robot            label = self.tokenizer.sval
223*16467b97STreehugger Robot            self.ttype = self.tokenizer.nextToken()
224*16467b97STreehugger Robot            if self.ttype != COLON:
225*16467b97STreehugger Robot                return None
226*16467b97STreehugger Robot
227*16467b97STreehugger Robot            self.ttype = self.tokenizer.nextToken() # move to ID following colon
228*16467b97STreehugger Robot
229*16467b97STreehugger Robot        # Wildcard?
230*16467b97STreehugger Robot        if self.ttype == DOT:
231*16467b97STreehugger Robot            self.ttype = self.tokenizer.nextToken()
232*16467b97STreehugger Robot            wildcardPayload = CommonToken(0, ".")
233*16467b97STreehugger Robot            node = WildcardTreePattern(wildcardPayload)
234*16467b97STreehugger Robot            if label is not None:
235*16467b97STreehugger Robot                node.label = label
236*16467b97STreehugger Robot            return node
237*16467b97STreehugger Robot
238*16467b97STreehugger Robot        # "ID" or "ID[arg]"
239*16467b97STreehugger Robot        if self.ttype != ID:
240*16467b97STreehugger Robot            return None
241*16467b97STreehugger Robot
242*16467b97STreehugger Robot        tokenName = self.tokenizer.sval
243*16467b97STreehugger Robot        self.ttype = self.tokenizer.nextToken()
244*16467b97STreehugger Robot
245*16467b97STreehugger Robot        if tokenName == "nil":
246*16467b97STreehugger Robot            return self.adaptor.nil()
247*16467b97STreehugger Robot
248*16467b97STreehugger Robot        text = tokenName
249*16467b97STreehugger Robot        # check for arg
250*16467b97STreehugger Robot        arg = None
251*16467b97STreehugger Robot        if self.ttype == ARG:
252*16467b97STreehugger Robot            arg = self.tokenizer.sval
253*16467b97STreehugger Robot            text = arg
254*16467b97STreehugger Robot            self.ttype = self.tokenizer.nextToken()
255*16467b97STreehugger Robot
256*16467b97STreehugger Robot        # create node
257*16467b97STreehugger Robot        treeNodeType = self.wizard.getTokenType(tokenName)
258*16467b97STreehugger Robot        if treeNodeType == INVALID_TOKEN_TYPE:
259*16467b97STreehugger Robot            return None
260*16467b97STreehugger Robot
261*16467b97STreehugger Robot        node = self.adaptor.createFromType(treeNodeType, text)
262*16467b97STreehugger Robot        if label is not None and isinstance(node, TreePattern):
263*16467b97STreehugger Robot            node.label = label
264*16467b97STreehugger Robot
265*16467b97STreehugger Robot        if arg is not None and isinstance(node, TreePattern):
266*16467b97STreehugger Robot            node.hasTextArg = True
267*16467b97STreehugger Robot
268*16467b97STreehugger Robot        return node
269*16467b97STreehugger Robot
270*16467b97STreehugger Robot
271*16467b97STreehugger Robotclass TreePattern(CommonTree):
272*16467b97STreehugger Robot    """
273*16467b97STreehugger Robot    When using %label:TOKENNAME in a tree for parse(), we must
274*16467b97STreehugger Robot    track the label.
275*16467b97STreehugger Robot    """
276*16467b97STreehugger Robot
277*16467b97STreehugger Robot    def __init__(self, payload):
278*16467b97STreehugger Robot        super().__init__(payload)
279*16467b97STreehugger Robot
280*16467b97STreehugger Robot        self.label = None
281*16467b97STreehugger Robot        self.hasTextArg = None
282*16467b97STreehugger Robot
283*16467b97STreehugger Robot
284*16467b97STreehugger Robot    def toString(self):
285*16467b97STreehugger Robot        if self.label:
286*16467b97STreehugger Robot            return '%' + self.label + ':' + super().toString()
287*16467b97STreehugger Robot
288*16467b97STreehugger Robot        else:
289*16467b97STreehugger Robot            return super().toString()
290*16467b97STreehugger Robot
291*16467b97STreehugger Robot
292*16467b97STreehugger Robotclass WildcardTreePattern(TreePattern):
293*16467b97STreehugger Robot    pass
294*16467b97STreehugger Robot
295*16467b97STreehugger Robot
296*16467b97STreehugger Robotclass TreePatternTreeAdaptor(CommonTreeAdaptor):
297*16467b97STreehugger Robot    """This adaptor creates TreePattern objects for use during scan()"""
298*16467b97STreehugger Robot
299*16467b97STreehugger Robot    def createWithPayload(self, payload):
300*16467b97STreehugger Robot        return TreePattern(payload)
301*16467b97STreehugger Robot
302*16467b97STreehugger Robot
303*16467b97STreehugger Robotclass TreeWizard(object):
304*16467b97STreehugger Robot    """
305*16467b97STreehugger Robot    Build and navigate trees with this object.  Must know about the names
306*16467b97STreehugger Robot    of tokens so you have to pass in a map or array of token names (from which
307*16467b97STreehugger Robot    this class can build the map).  I.e., Token DECL means nothing unless the
308*16467b97STreehugger Robot    class can translate it to a token type.
309*16467b97STreehugger Robot
310*16467b97STreehugger Robot    In order to create nodes and navigate, this class needs a TreeAdaptor.
311*16467b97STreehugger Robot
312*16467b97STreehugger Robot    This class can build a token type -> node index for repeated use or for
313*16467b97STreehugger Robot    iterating over the various nodes with a particular type.
314*16467b97STreehugger Robot
315*16467b97STreehugger Robot    This class works in conjunction with the TreeAdaptor rather than moving
316*16467b97STreehugger Robot    all this functionality into the adaptor.  An adaptor helps build and
317*16467b97STreehugger Robot    navigate trees using methods.  This class helps you do it with string
318*16467b97STreehugger Robot    patterns like "(A B C)".  You can create a tree from that pattern or
319*16467b97STreehugger Robot    match subtrees against it.
320*16467b97STreehugger Robot    """
321*16467b97STreehugger Robot
322*16467b97STreehugger Robot    def __init__(self, adaptor=None, tokenNames=None, typeMap=None):
323*16467b97STreehugger Robot        if adaptor is None:
324*16467b97STreehugger Robot            self.adaptor = CommonTreeAdaptor()
325*16467b97STreehugger Robot
326*16467b97STreehugger Robot        else:
327*16467b97STreehugger Robot            self.adaptor = adaptor
328*16467b97STreehugger Robot
329*16467b97STreehugger Robot        if typeMap is None:
330*16467b97STreehugger Robot            self.tokenNameToTypeMap = computeTokenTypes(tokenNames)
331*16467b97STreehugger Robot
332*16467b97STreehugger Robot        else:
333*16467b97STreehugger Robot            if tokenNames:
334*16467b97STreehugger Robot                raise ValueError("Can't have both tokenNames and typeMap")
335*16467b97STreehugger Robot
336*16467b97STreehugger Robot            self.tokenNameToTypeMap = typeMap
337*16467b97STreehugger Robot
338*16467b97STreehugger Robot
339*16467b97STreehugger Robot    def getTokenType(self, tokenName):
340*16467b97STreehugger Robot        """Using the map of token names to token types, return the type."""
341*16467b97STreehugger Robot
342*16467b97STreehugger Robot        if tokenName in self.tokenNameToTypeMap:
343*16467b97STreehugger Robot            return self.tokenNameToTypeMap[tokenName]
344*16467b97STreehugger Robot        else:
345*16467b97STreehugger Robot            return INVALID_TOKEN_TYPE
346*16467b97STreehugger Robot
347*16467b97STreehugger Robot
348*16467b97STreehugger Robot    def create(self, pattern):
349*16467b97STreehugger Robot        """
350*16467b97STreehugger Robot        Create a tree or node from the indicated tree pattern that closely
351*16467b97STreehugger Robot        follows ANTLR tree grammar tree element syntax:
352*16467b97STreehugger Robot
353*16467b97STreehugger Robot        (root child1 ... child2).
354*16467b97STreehugger Robot
355*16467b97STreehugger Robot        You can also just pass in a node: ID
356*16467b97STreehugger Robot
357*16467b97STreehugger Robot        Any node can have a text argument: ID[foo]
358*16467b97STreehugger Robot        (notice there are no quotes around foo--it's clear it's a string).
359*16467b97STreehugger Robot
360*16467b97STreehugger Robot        nil is a special name meaning "give me a nil node".  Useful for
361*16467b97STreehugger Robot        making lists: (nil A B C) is a list of A B C.
362*16467b97STreehugger Robot        """
363*16467b97STreehugger Robot
364*16467b97STreehugger Robot        tokenizer = TreePatternLexer(pattern)
365*16467b97STreehugger Robot        parser = TreePatternParser(tokenizer, self, self.adaptor)
366*16467b97STreehugger Robot        return parser.pattern()
367*16467b97STreehugger Robot
368*16467b97STreehugger Robot
369*16467b97STreehugger Robot    def index(self, tree):
370*16467b97STreehugger Robot        """Walk the entire tree and make a node name to nodes mapping.
371*16467b97STreehugger Robot
372*16467b97STreehugger Robot        For now, use recursion but later nonrecursive version may be
373*16467b97STreehugger Robot        more efficient.  Returns a dict int -> list where the list is
374*16467b97STreehugger Robot        of your AST node type.  The int is the token type of the node.
375*16467b97STreehugger Robot        """
376*16467b97STreehugger Robot
377*16467b97STreehugger Robot        m = {}
378*16467b97STreehugger Robot        self._index(tree, m)
379*16467b97STreehugger Robot        return m
380*16467b97STreehugger Robot
381*16467b97STreehugger Robot
382*16467b97STreehugger Robot    def _index(self, t, m):
383*16467b97STreehugger Robot        """Do the work for index"""
384*16467b97STreehugger Robot
385*16467b97STreehugger Robot        if t is None:
386*16467b97STreehugger Robot            return
387*16467b97STreehugger Robot
388*16467b97STreehugger Robot        ttype = self.adaptor.getType(t)
389*16467b97STreehugger Robot        elements = m.get(ttype)
390*16467b97STreehugger Robot        if elements is None:
391*16467b97STreehugger Robot            m[ttype] = elements = []
392*16467b97STreehugger Robot
393*16467b97STreehugger Robot        elements.append(t)
394*16467b97STreehugger Robot        for i in range(self.adaptor.getChildCount(t)):
395*16467b97STreehugger Robot            child = self.adaptor.getChild(t, i)
396*16467b97STreehugger Robot            self._index(child, m)
397*16467b97STreehugger Robot
398*16467b97STreehugger Robot
399*16467b97STreehugger Robot    def find(self, tree, what):
400*16467b97STreehugger Robot        """Return a list of matching token.
401*16467b97STreehugger Robot
402*16467b97STreehugger Robot        what may either be an integer specifzing the token type to find or
403*16467b97STreehugger Robot        a string with a pattern that must be matched.
404*16467b97STreehugger Robot
405*16467b97STreehugger Robot        """
406*16467b97STreehugger Robot
407*16467b97STreehugger Robot        if isinstance(what, int):
408*16467b97STreehugger Robot            return self._findTokenType(tree, what)
409*16467b97STreehugger Robot
410*16467b97STreehugger Robot        elif isinstance(what, str):
411*16467b97STreehugger Robot            return self._findPattern(tree, what)
412*16467b97STreehugger Robot
413*16467b97STreehugger Robot        else:
414*16467b97STreehugger Robot            raise TypeError("'what' must be string or integer")
415*16467b97STreehugger Robot
416*16467b97STreehugger Robot
417*16467b97STreehugger Robot    def _findTokenType(self, t, ttype):
418*16467b97STreehugger Robot        """Return a List of tree nodes with token type ttype"""
419*16467b97STreehugger Robot
420*16467b97STreehugger Robot        nodes = []
421*16467b97STreehugger Robot
422*16467b97STreehugger Robot        def visitor(tree, parent, childIndex, labels):
423*16467b97STreehugger Robot            nodes.append(tree)
424*16467b97STreehugger Robot
425*16467b97STreehugger Robot        self.visit(t, ttype, visitor)
426*16467b97STreehugger Robot
427*16467b97STreehugger Robot        return nodes
428*16467b97STreehugger Robot
429*16467b97STreehugger Robot
430*16467b97STreehugger Robot    def _findPattern(self, t, pattern):
431*16467b97STreehugger Robot        """Return a List of subtrees matching pattern."""
432*16467b97STreehugger Robot
433*16467b97STreehugger Robot        subtrees = []
434*16467b97STreehugger Robot
435*16467b97STreehugger Robot        # Create a TreePattern from the pattern
436*16467b97STreehugger Robot        tokenizer = TreePatternLexer(pattern)
437*16467b97STreehugger Robot        parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor())
438*16467b97STreehugger Robot        tpattern = parser.pattern()
439*16467b97STreehugger Robot
440*16467b97STreehugger Robot        # don't allow invalid patterns
441*16467b97STreehugger Robot        if (tpattern is None or tpattern.isNil()
442*16467b97STreehugger Robot            or isinstance(tpattern, WildcardTreePattern)):
443*16467b97STreehugger Robot            return None
444*16467b97STreehugger Robot
445*16467b97STreehugger Robot        rootTokenType = tpattern.getType()
446*16467b97STreehugger Robot
447*16467b97STreehugger Robot        def visitor(tree, parent, childIndex, label):
448*16467b97STreehugger Robot            if self._parse(tree, tpattern, None):
449*16467b97STreehugger Robot                subtrees.append(tree)
450*16467b97STreehugger Robot
451*16467b97STreehugger Robot        self.visit(t, rootTokenType, visitor)
452*16467b97STreehugger Robot
453*16467b97STreehugger Robot        return subtrees
454*16467b97STreehugger Robot
455*16467b97STreehugger Robot
456*16467b97STreehugger Robot    def visit(self, tree, what, visitor):
457*16467b97STreehugger Robot        """Visit every node in tree matching what, invoking the visitor.
458*16467b97STreehugger Robot
459*16467b97STreehugger Robot        If what is a string, it is parsed as a pattern and only matching
460*16467b97STreehugger Robot        subtrees will be visited.
461*16467b97STreehugger Robot        The implementation uses the root node of the pattern in combination
462*16467b97STreehugger Robot        with visit(t, ttype, visitor) so nil-rooted patterns are not allowed.
463*16467b97STreehugger Robot        Patterns with wildcard roots are also not allowed.
464*16467b97STreehugger Robot
465*16467b97STreehugger Robot        If what is an integer, it is used as a token type and visit will match
466*16467b97STreehugger Robot        all nodes of that type (this is faster than the pattern match).
467*16467b97STreehugger Robot        The labels arg of the visitor action method is never set (it's None)
468*16467b97STreehugger Robot        since using a token type rather than a pattern doesn't let us set a
469*16467b97STreehugger Robot        label.
470*16467b97STreehugger Robot        """
471*16467b97STreehugger Robot
472*16467b97STreehugger Robot        if isinstance(what, int):
473*16467b97STreehugger Robot            self._visitType(tree, None, 0, what, visitor)
474*16467b97STreehugger Robot
475*16467b97STreehugger Robot        elif isinstance(what, str):
476*16467b97STreehugger Robot            self._visitPattern(tree, what, visitor)
477*16467b97STreehugger Robot
478*16467b97STreehugger Robot        else:
479*16467b97STreehugger Robot            raise TypeError("'what' must be string or integer")
480*16467b97STreehugger Robot
481*16467b97STreehugger Robot
482*16467b97STreehugger Robot    def _visitType(self, t, parent, childIndex, ttype, visitor):
483*16467b97STreehugger Robot        """Do the recursive work for visit"""
484*16467b97STreehugger Robot
485*16467b97STreehugger Robot        if t is None:
486*16467b97STreehugger Robot            return
487*16467b97STreehugger Robot
488*16467b97STreehugger Robot        if self.adaptor.getType(t) == ttype:
489*16467b97STreehugger Robot            visitor(t, parent, childIndex, None)
490*16467b97STreehugger Robot
491*16467b97STreehugger Robot        for i in range(self.adaptor.getChildCount(t)):
492*16467b97STreehugger Robot            child = self.adaptor.getChild(t, i)
493*16467b97STreehugger Robot            self._visitType(child, t, i, ttype, visitor)
494*16467b97STreehugger Robot
495*16467b97STreehugger Robot
496*16467b97STreehugger Robot    def _visitPattern(self, tree, pattern, visitor):
497*16467b97STreehugger Robot        """
498*16467b97STreehugger Robot        For all subtrees that match the pattern, execute the visit action.
499*16467b97STreehugger Robot        """
500*16467b97STreehugger Robot
501*16467b97STreehugger Robot        # Create a TreePattern from the pattern
502*16467b97STreehugger Robot        tokenizer = TreePatternLexer(pattern)
503*16467b97STreehugger Robot        parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor())
504*16467b97STreehugger Robot        tpattern = parser.pattern()
505*16467b97STreehugger Robot
506*16467b97STreehugger Robot        # don't allow invalid patterns
507*16467b97STreehugger Robot        if (tpattern is None or tpattern.isNil()
508*16467b97STreehugger Robot            or isinstance(tpattern, WildcardTreePattern)):
509*16467b97STreehugger Robot            return
510*16467b97STreehugger Robot
511*16467b97STreehugger Robot        rootTokenType = tpattern.getType()
512*16467b97STreehugger Robot
513*16467b97STreehugger Robot        def rootvisitor(tree, parent, childIndex, labels):
514*16467b97STreehugger Robot            labels = {}
515*16467b97STreehugger Robot            if self._parse(tree, tpattern, labels):
516*16467b97STreehugger Robot                visitor(tree, parent, childIndex, labels)
517*16467b97STreehugger Robot
518*16467b97STreehugger Robot        self.visit(tree, rootTokenType, rootvisitor)
519*16467b97STreehugger Robot
520*16467b97STreehugger Robot
521*16467b97STreehugger Robot    def parse(self, t, pattern, labels=None):
522*16467b97STreehugger Robot        """
523*16467b97STreehugger Robot        Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels
524*16467b97STreehugger Robot        on the various nodes and '.' (dot) as the node/subtree wildcard,
525*16467b97STreehugger Robot        return true if the pattern matches and fill the labels Map with
526*16467b97STreehugger Robot        the labels pointing at the appropriate nodes.  Return false if
527*16467b97STreehugger Robot        the pattern is malformed or the tree does not match.
528*16467b97STreehugger Robot
529*16467b97STreehugger Robot        If a node specifies a text arg in pattern, then that must match
530*16467b97STreehugger Robot        for that node in t.
531*16467b97STreehugger Robot        """
532*16467b97STreehugger Robot
533*16467b97STreehugger Robot        tokenizer = TreePatternLexer(pattern)
534*16467b97STreehugger Robot        parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor())
535*16467b97STreehugger Robot        tpattern = parser.pattern()
536*16467b97STreehugger Robot
537*16467b97STreehugger Robot        return self._parse(t, tpattern, labels)
538*16467b97STreehugger Robot
539*16467b97STreehugger Robot
540*16467b97STreehugger Robot    def _parse(self, t1, tpattern, labels):
541*16467b97STreehugger Robot        """
542*16467b97STreehugger Robot        Do the work for parse. Check to see if the tpattern fits the
543*16467b97STreehugger Robot        structure and token types in t1.  Check text if the pattern has
544*16467b97STreehugger Robot        text arguments on nodes.  Fill labels map with pointers to nodes
545*16467b97STreehugger Robot        in tree matched against nodes in pattern with labels.
546*16467b97STreehugger Robot	"""
547*16467b97STreehugger Robot
548*16467b97STreehugger Robot        # make sure both are non-null
549*16467b97STreehugger Robot        if t1 is None or tpattern is None:
550*16467b97STreehugger Robot            return False
551*16467b97STreehugger Robot
552*16467b97STreehugger Robot        # check roots (wildcard matches anything)
553*16467b97STreehugger Robot        if not isinstance(tpattern, WildcardTreePattern):
554*16467b97STreehugger Robot            if self.adaptor.getType(t1) != tpattern.getType():
555*16467b97STreehugger Robot                return False
556*16467b97STreehugger Robot
557*16467b97STreehugger Robot            # if pattern has text, check node text
558*16467b97STreehugger Robot            if (tpattern.hasTextArg
559*16467b97STreehugger Robot                and self.adaptor.getText(t1) != tpattern.getText()):
560*16467b97STreehugger Robot                return False
561*16467b97STreehugger Robot
562*16467b97STreehugger Robot        if tpattern.label is not None and labels is not None:
563*16467b97STreehugger Robot            # map label in pattern to node in t1
564*16467b97STreehugger Robot            labels[tpattern.label] = t1
565*16467b97STreehugger Robot
566*16467b97STreehugger Robot        # check children
567*16467b97STreehugger Robot        n1 = self.adaptor.getChildCount(t1)
568*16467b97STreehugger Robot        n2 = tpattern.getChildCount()
569*16467b97STreehugger Robot        if n1 != n2:
570*16467b97STreehugger Robot            return False
571*16467b97STreehugger Robot
572*16467b97STreehugger Robot        for i in range(n1):
573*16467b97STreehugger Robot            child1 = self.adaptor.getChild(t1, i)
574*16467b97STreehugger Robot            child2 = tpattern.getChild(i)
575*16467b97STreehugger Robot            if not self._parse(child1, child2, labels):
576*16467b97STreehugger Robot                return False
577*16467b97STreehugger Robot
578*16467b97STreehugger Robot        return True
579*16467b97STreehugger Robot
580*16467b97STreehugger Robot
581*16467b97STreehugger Robot    def equals(self, t1, t2, adaptor=None):
582*16467b97STreehugger Robot        """
583*16467b97STreehugger Robot        Compare t1 and t2; return true if token types/text, structure match
584*16467b97STreehugger Robot        exactly.
585*16467b97STreehugger Robot        The trees are examined in their entirety so that (A B) does not match
586*16467b97STreehugger Robot        (A B C) nor (A (B C)).
587*16467b97STreehugger Robot        """
588*16467b97STreehugger Robot
589*16467b97STreehugger Robot        if adaptor is None:
590*16467b97STreehugger Robot            adaptor = self.adaptor
591*16467b97STreehugger Robot
592*16467b97STreehugger Robot        return self._equals(t1, t2, adaptor)
593*16467b97STreehugger Robot
594*16467b97STreehugger Robot
595*16467b97STreehugger Robot    def _equals(self, t1, t2, adaptor):
596*16467b97STreehugger Robot        # make sure both are non-null
597*16467b97STreehugger Robot        if t1 is None or t2 is None:
598*16467b97STreehugger Robot            return False
599*16467b97STreehugger Robot
600*16467b97STreehugger Robot        # check roots
601*16467b97STreehugger Robot        if adaptor.getType(t1) != adaptor.getType(t2):
602*16467b97STreehugger Robot            return False
603*16467b97STreehugger Robot
604*16467b97STreehugger Robot        if adaptor.getText(t1) != adaptor.getText(t2):
605*16467b97STreehugger Robot            return False
606*16467b97STreehugger Robot
607*16467b97STreehugger Robot        # check children
608*16467b97STreehugger Robot        n1 = adaptor.getChildCount(t1)
609*16467b97STreehugger Robot        n2 = adaptor.getChildCount(t2)
610*16467b97STreehugger Robot        if n1 != n2:
611*16467b97STreehugger Robot            return False
612*16467b97STreehugger Robot
613*16467b97STreehugger Robot        for i in range(n1):
614*16467b97STreehugger Robot            child1 = adaptor.getChild(t1, i)
615*16467b97STreehugger Robot            child2 = adaptor.getChild(t2, i)
616*16467b97STreehugger Robot            if not self._equals(child1, child2, adaptor):
617*16467b97STreehugger Robot                return False
618*16467b97STreehugger Robot
619*16467b97STreehugger Robot        return True
620