1*16467b97STreehugger Robot /// \file 2*16467b97STreehugger Robot /// Definition of the ANTLR3 common tree node stream. 3*16467b97STreehugger Robot /// 4*16467b97STreehugger Robot 5*16467b97STreehugger Robot #ifndef _ANTLR_COMMON_TREE_NODE_STREAM__HPP 6*16467b97STreehugger Robot #define _ANTLR_COMMON_TREE_NODE_STREAM__HPP 7*16467b97STreehugger Robot 8*16467b97STreehugger Robot // [The "BSD licence"] 9*16467b97STreehugger Robot // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB 10*16467b97STreehugger Robot 11*16467b97STreehugger Robot // 12*16467b97STreehugger Robot // All rights reserved. 13*16467b97STreehugger Robot // 14*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without 15*16467b97STreehugger Robot // modification, are permitted provided that the following conditions 16*16467b97STreehugger Robot // are met: 17*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright 18*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer. 19*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright 20*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer in the 21*16467b97STreehugger Robot // documentation and/or other materials provided with the distribution. 22*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products 23*16467b97STreehugger Robot // derived from this software without specific prior written permission. 24*16467b97STreehugger Robot // 25*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 26*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 27*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 28*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 29*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 30*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 31*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 32*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 33*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 34*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 35*16467b97STreehugger Robot 36*16467b97STreehugger Robot #include "antlr3defs.hpp" 37*16467b97STreehugger Robot 38*16467b97STreehugger Robot ANTLR_BEGIN_NAMESPACE() 39*16467b97STreehugger Robot 40*16467b97STreehugger Robot template<class ImplTraits> 41*16467b97STreehugger Robot class CommonTreeNodeStream : public ImplTraits::TreeNodeIntStreamType 42*16467b97STreehugger Robot { 43*16467b97STreehugger Robot public: 44*16467b97STreehugger Robot enum Constants 45*16467b97STreehugger Robot { 46*16467b97STreehugger Robot /// Token buffer initial size settings ( will auto increase) 47*16467b97STreehugger Robot /// 48*16467b97STreehugger Robot DEFAULT_INITIAL_BUFFER_SIZE = 100 49*16467b97STreehugger Robot , INITIAL_CALL_STACK_SIZE = 10 50*16467b97STreehugger Robot }; 51*16467b97STreehugger Robot 52*16467b97STreehugger Robot typedef typename ImplTraits::TreeType TreeType; 53*16467b97STreehugger Robot typedef TreeType UnitType; 54*16467b97STreehugger Robot typedef typename ImplTraits::StringType StringType; 55*16467b97STreehugger Robot typedef typename ImplTraits::StringStreamType StringStreamType; 56*16467b97STreehugger Robot typedef typename ImplTraits::TreeAdaptorType TreeAdaptorType; 57*16467b97STreehugger Robot typedef typename ImplTraits::TreeNodeIntStreamType IntStreamType; 58*16467b97STreehugger Robot typedef typename ImplTraits::AllocPolicyType AllocPolicyType; 59*16467b97STreehugger Robot typedef typename AllocPolicyType::template VectorType<TreeType*> NodesType; 60*16467b97STreehugger Robot typedef typename AllocPolicyType::template VectorType< TreeWalkState<ImplTraits> > MarkersType; 61*16467b97STreehugger Robot typedef typename AllocPolicyType::template StackType< ANTLR_INT32 > NodeStackType; 62*16467b97STreehugger Robot typedef typename ImplTraits::TreeParserType ComponentType; 63*16467b97STreehugger Robot typedef typename ImplTraits::CommonTokenType CommonTokenType; 64*16467b97STreehugger Robot typedef typename ImplTraits::TreeNodeIntStreamType BaseType; 65*16467b97STreehugger Robot 66*16467b97STreehugger Robot public: 67*16467b97STreehugger Robot /// Dummy tree node that indicates a descent into a child 68*16467b97STreehugger Robot /// tree. Initialized by a call to create a new interface. 69*16467b97STreehugger Robot /// 70*16467b97STreehugger Robot TreeType m_DOWN; 71*16467b97STreehugger Robot 72*16467b97STreehugger Robot /// Dummy tree node that indicates a descent up to a parent 73*16467b97STreehugger Robot /// tree. Initialized by a call to create a new interface. 74*16467b97STreehugger Robot /// 75*16467b97STreehugger Robot TreeType m_UP; 76*16467b97STreehugger Robot 77*16467b97STreehugger Robot /// Dummy tree node that indicates the termination point of the 78*16467b97STreehugger Robot /// tree. Initialized by a call to create a new interface. 79*16467b97STreehugger Robot /// 80*16467b97STreehugger Robot TreeType m_EOF_NODE; 81*16467b97STreehugger Robot 82*16467b97STreehugger Robot /// Dummy node that is returned if we need to indicate an invalid node 83*16467b97STreehugger Robot /// for any reason. 84*16467b97STreehugger Robot /// 85*16467b97STreehugger Robot TreeType m_INVALID_NODE; 86*16467b97STreehugger Robot 87*16467b97STreehugger Robot /// The complete mapping from stream index to tree node. 88*16467b97STreehugger Robot /// This buffer includes pointers to DOWN, UP, and EOF nodes. 89*16467b97STreehugger Robot /// It is built upon ctor invocation. The elements are type 90*16467b97STreehugger Robot /// Object as we don't what the trees look like. 91*16467b97STreehugger Robot /// 92*16467b97STreehugger Robot /// Load upon first need of the buffer so we can set token types 93*16467b97STreehugger Robot /// of interest for reverseIndexing. Slows us down a wee bit to 94*16467b97STreehugger Robot /// do all of the if p==-1 testing everywhere though, though in C 95*16467b97STreehugger Robot /// you won't really be able to measure this. 96*16467b97STreehugger Robot /// 97*16467b97STreehugger Robot /// Must be freed when the tree node stream is torn down. 98*16467b97STreehugger Robot /// 99*16467b97STreehugger Robot NodesType m_nodes; 100*16467b97STreehugger Robot 101*16467b97STreehugger Robot /// Which tree are we navigating ? 102*16467b97STreehugger Robot /// 103*16467b97STreehugger Robot TreeType* m_root; 104*16467b97STreehugger Robot 105*16467b97STreehugger Robot /// Pointer to tree adaptor interface that manipulates/builds 106*16467b97STreehugger Robot /// the tree. 107*16467b97STreehugger Robot /// 108*16467b97STreehugger Robot TreeAdaptorType* m_adaptor; 109*16467b97STreehugger Robot 110*16467b97STreehugger Robot /// As we walk down the nodes, we must track parent nodes so we know 111*16467b97STreehugger Robot /// where to go after walking the last child of a node. When visiting 112*16467b97STreehugger Robot /// a child, push current node and current index (current index 113*16467b97STreehugger Robot /// is first stored in the tree node structure to avoid two stacks. 114*16467b97STreehugger Robot /// 115*16467b97STreehugger Robot NodeStackType m_nodeStack; 116*16467b97STreehugger Robot 117*16467b97STreehugger Robot /// The current index into the nodes vector of the current tree 118*16467b97STreehugger Robot /// we are parsing and possibly rewriting. 119*16467b97STreehugger Robot /// 120*16467b97STreehugger Robot ANTLR_INT32 m_p; 121*16467b97STreehugger Robot 122*16467b97STreehugger Robot /// Which node are we currently visiting? 123*16467b97STreehugger Robot /// 124*16467b97STreehugger Robot TreeType* m_currentNode; 125*16467b97STreehugger Robot 126*16467b97STreehugger Robot /// Which node did we last visit? Used for LT(-1) 127*16467b97STreehugger Robot /// 128*16467b97STreehugger Robot TreeType* m_previousNode; 129*16467b97STreehugger Robot 130*16467b97STreehugger Robot /// Which child are we currently visiting? If -1 we have not visited 131*16467b97STreehugger Robot /// this node yet; next consume() request will set currentIndex to 0. 132*16467b97STreehugger Robot /// 133*16467b97STreehugger Robot ANTLR_INT32 m_currentChildIndex; 134*16467b97STreehugger Robot 135*16467b97STreehugger Robot /// What node index did we just consume? i=0..n-1 for n node trees. 136*16467b97STreehugger Robot /// IntStream.next is hence 1 + this value. Size will be same. 137*16467b97STreehugger Robot /// 138*16467b97STreehugger Robot ANTLR_MARKER m_absoluteNodeIndex; 139*16467b97STreehugger Robot 140*16467b97STreehugger Robot /// Buffer tree node stream for use with LT(i). This list grows 141*16467b97STreehugger Robot /// to fit new lookahead depths, but consume() wraps like a circular 142*16467b97STreehugger Robot /// buffer. 143*16467b97STreehugger Robot /// 144*16467b97STreehugger Robot TreeType** m_lookAhead; 145*16467b97STreehugger Robot 146*16467b97STreehugger Robot /// Number of elements available in the lookahead buffer at any point in 147*16467b97STreehugger Robot /// time. This is the current size of the array. 148*16467b97STreehugger Robot /// 149*16467b97STreehugger Robot ANTLR_UINT32 m_lookAheadLength; 150*16467b97STreehugger Robot 151*16467b97STreehugger Robot /// lookAhead[head] is the first symbol of lookahead, LT(1). 152*16467b97STreehugger Robot /// 153*16467b97STreehugger Robot ANTLR_UINT32 m_head; 154*16467b97STreehugger Robot 155*16467b97STreehugger Robot /// Add new lookahead at lookahead[tail]. tail wraps around at the 156*16467b97STreehugger Robot /// end of the lookahead buffer so tail could be less than head. 157*16467b97STreehugger Robot /// 158*16467b97STreehugger Robot ANTLR_UINT32 m_tail; 159*16467b97STreehugger Robot 160*16467b97STreehugger Robot /// Calls to mark() may be nested so we have to track a stack of 161*16467b97STreehugger Robot /// them. The marker is an index into this stack. Index 0 is 162*16467b97STreehugger Robot /// the first marker. This is a List<TreeWalkState> 163*16467b97STreehugger Robot /// 164*16467b97STreehugger Robot MarkersType m_markers; 165*16467b97STreehugger Robot 166*16467b97STreehugger Robot /// Indicates whether this node stream was derived from a prior 167*16467b97STreehugger Robot /// node stream to be used by a rewriting tree parser for instance. 168*16467b97STreehugger Robot /// If this flag is set to ANTLR_TRUE, then when this stream is 169*16467b97STreehugger Robot /// closed it will not free the root tree as this tree always 170*16467b97STreehugger Robot /// belongs to the origniating node stream. 171*16467b97STreehugger Robot /// 172*16467b97STreehugger Robot bool m_isRewriter; 173*16467b97STreehugger Robot 174*16467b97STreehugger Robot /// If set to ANTLR_TRUE then the navigation nodes UP, DOWN are 175*16467b97STreehugger Robot /// duplicated rather than reused within the tree. 176*16467b97STreehugger Robot /// 177*16467b97STreehugger Robot bool m_uniqueNavigationNodes; 178*16467b97STreehugger Robot 179*16467b97STreehugger Robot public: 180*16467b97STreehugger Robot // INTERFACE 181*16467b97STreehugger Robot // 182*16467b97STreehugger Robot CommonTreeNodeStream( ANTLR_UINT32 hint ); 183*16467b97STreehugger Robot CommonTreeNodeStream( const CommonTreeNodeStream& ctn ); 184*16467b97STreehugger Robot CommonTreeNodeStream( TreeType* tree, ANTLR_UINT32 hint ); 185*16467b97STreehugger Robot 186*16467b97STreehugger Robot void init( ANTLR_UINT32 hint ); 187*16467b97STreehugger Robot ~CommonTreeNodeStream(); 188*16467b97STreehugger Robot 189*16467b97STreehugger Robot /// Get tree node at current input pointer + i ahead where i=1 is next node. 190*16467b97STreehugger Robot /// i<0 indicates nodes in the past. So LT(-1) is previous node, but 191*16467b97STreehugger Robot /// implementations are not required to provide results for k < -1. 192*16467b97STreehugger Robot /// LT(0) is undefined. For i>=n, return null. 193*16467b97STreehugger Robot /// Return NULL for LT(0) and any index that results in an absolute address 194*16467b97STreehugger Robot /// that is negative (beyond the start of the list). 195*16467b97STreehugger Robot /// 196*16467b97STreehugger Robot /// This is analogous to the LT() method of the TokenStream, but this 197*16467b97STreehugger Robot /// returns a tree node instead of a token. Makes code gen identical 198*16467b97STreehugger Robot /// for both parser and tree grammars. :) 199*16467b97STreehugger Robot /// 200*16467b97STreehugger Robot TreeType* _LT(ANTLR_INT32 k); 201*16467b97STreehugger Robot 202*16467b97STreehugger Robot /// Where is this stream pulling nodes from? This is not the name, but 203*16467b97STreehugger Robot /// the object that provides node objects. 204*16467b97STreehugger Robot /// 205*16467b97STreehugger Robot TreeType* getTreeSource(); 206*16467b97STreehugger Robot 207*16467b97STreehugger Robot /// What adaptor can tell me how to interpret/navigate nodes and 208*16467b97STreehugger Robot /// trees. E.g., get text of a node. 209*16467b97STreehugger Robot /// 210*16467b97STreehugger Robot TreeAdaptorType* getTreeAdaptor(); 211*16467b97STreehugger Robot 212*16467b97STreehugger Robot /// As we flatten the tree, we use UP, DOWN nodes to represent 213*16467b97STreehugger Robot /// the tree structure. When debugging we need unique nodes 214*16467b97STreehugger Robot /// so we have to instantiate new ones. When doing normal tree 215*16467b97STreehugger Robot /// parsing, it's slow and a waste of memory to create unique 216*16467b97STreehugger Robot /// navigation nodes. Default should be false; 217*16467b97STreehugger Robot /// 218*16467b97STreehugger Robot void set_uniqueNavigationNodes(bool uniqueNavigationNodes); 219*16467b97STreehugger Robot 220*16467b97STreehugger Robot StringType toString(); 221*16467b97STreehugger Robot 222*16467b97STreehugger Robot /// Return the text of all nodes from start to stop, inclusive. 223*16467b97STreehugger Robot /// If the stream does not buffer all the nodes then it can still 224*16467b97STreehugger Robot /// walk recursively from start until stop. You can always return 225*16467b97STreehugger Robot /// null or "" too, but users should not access $ruleLabel.text in 226*16467b97STreehugger Robot /// an action of course in that case. 227*16467b97STreehugger Robot /// 228*16467b97STreehugger Robot StringType toStringSS(TreeType* start, TreeType* stop); 229*16467b97STreehugger Robot 230*16467b97STreehugger Robot /// Return the text of all nodes from start to stop, inclusive, into the 231*16467b97STreehugger Robot /// supplied buffer. 232*16467b97STreehugger Robot /// If the stream does not buffer all the nodes then it can still 233*16467b97STreehugger Robot /// walk recursively from start until stop. You can always return 234*16467b97STreehugger Robot /// null or "" too, but users should not access $ruleLabel.text in 235*16467b97STreehugger Robot /// an action of course in that case. 236*16467b97STreehugger Robot /// 237*16467b97STreehugger Robot void toStringWork(TreeType* start, TreeType* stop, StringType& buf); 238*16467b97STreehugger Robot 239*16467b97STreehugger Robot /// Get a tree node at an absolute index i; 0..n-1. 240*16467b97STreehugger Robot /// If you don't want to buffer up nodes, then this method makes no 241*16467b97STreehugger Robot /// sense for you. 242*16467b97STreehugger Robot /// 243*16467b97STreehugger Robot TreeType* get(ANTLR_INT32 i); 244*16467b97STreehugger Robot 245*16467b97STreehugger Robot // REWRITING TREES (used by tree parser) 246*16467b97STreehugger Robot 247*16467b97STreehugger Robot /// Replace from start to stop child index of parent with t, which might 248*16467b97STreehugger Robot /// be a list. Number of children may be different 249*16467b97STreehugger Robot /// after this call. The stream is notified because it is walking the 250*16467b97STreehugger Robot /// tree and might need to know you are monkeying with the underlying 251*16467b97STreehugger Robot /// tree. Also, it might be able to modify the node stream to avoid 252*16467b97STreehugger Robot /// restreaming for future phases. 253*16467b97STreehugger Robot /// 254*16467b97STreehugger Robot /// If parent is null, don't do anything; must be at root of overall tree. 255*16467b97STreehugger Robot /// Can't replace whatever points to the parent externally. Do nothing. 256*16467b97STreehugger Robot /// 257*16467b97STreehugger Robot void replaceChildren(TreeType* parent, ANTLR_INT32 startChildIndex, 258*16467b97STreehugger Robot ANTLR_INT32 stopChildIndex, TreeType* t); 259*16467b97STreehugger Robot 260*16467b97STreehugger Robot TreeType* LB(ANTLR_INT32 k); 261*16467b97STreehugger Robot 262*16467b97STreehugger Robot /// As we flatten the tree, we use UP, DOWN nodes to represent 263*16467b97STreehugger Robot /// the tree structure. When debugging we need unique nodes 264*16467b97STreehugger Robot /// so instantiate new ones when uniqueNavigationNodes is true. 265*16467b97STreehugger Robot /// 266*16467b97STreehugger Robot void addNavigationNode(ANTLR_UINT32 ttype); 267*16467b97STreehugger Robot 268*16467b97STreehugger Robot TreeType* newDownNode(); 269*16467b97STreehugger Robot 270*16467b97STreehugger Robot TreeType* newUpNode(); 271*16467b97STreehugger Robot 272*16467b97STreehugger Robot bool hasUniqueNavigationNodes() const; 273*16467b97STreehugger Robot 274*16467b97STreehugger Robot ANTLR_UINT32 getLookaheadSize(); 275*16467b97STreehugger Robot 276*16467b97STreehugger Robot void push(ANTLR_INT32 index); 277*16467b97STreehugger Robot 278*16467b97STreehugger Robot ANTLR_INT32 pop(); 279*16467b97STreehugger Robot 280*16467b97STreehugger Robot void reset(); 281*16467b97STreehugger Robot 282*16467b97STreehugger Robot void fillBufferRoot(); 283*16467b97STreehugger Robot void fillBuffer(TreeType* t); 284*16467b97STreehugger Robot 285*16467b97STreehugger Robot }; 286*16467b97STreehugger Robot 287*16467b97STreehugger Robot /** This structure is used to save the state information in the treenodestream 288*16467b97STreehugger Robot * when walking ahead with cyclic DFA or for syntactic predicates, 289*16467b97STreehugger Robot * we need to record the state of the tree node stream. This 290*16467b97STreehugger Robot * class wraps up the current state of the CommonTreeNodeStream. 291*16467b97STreehugger Robot * Calling mark() will push another of these on the markers stack. 292*16467b97STreehugger Robot */ 293*16467b97STreehugger Robot template<class ImplTraits> 294*16467b97STreehugger Robot class TreeWalkState : public ImplTraits::AllocPolicyType 295*16467b97STreehugger Robot { 296*16467b97STreehugger Robot public: 297*16467b97STreehugger Robot typedef typename ImplTraits::TreeType TreeType; 298*16467b97STreehugger Robot 299*16467b97STreehugger Robot private: 300*16467b97STreehugger Robot ANTLR_UINT32 m_currentChildIndex; 301*16467b97STreehugger Robot ANTLR_MARKER m_absoluteNodeIndex; 302*16467b97STreehugger Robot TreeType* m_currentNode; 303*16467b97STreehugger Robot TreeType* m_previousNode; 304*16467b97STreehugger Robot ANTLR_UINT32 m_nodeStackSize; 305*16467b97STreehugger Robot TreeType* m_lookAhead; 306*16467b97STreehugger Robot ANTLR_UINT32 m_lookAheadLength; 307*16467b97STreehugger Robot ANTLR_UINT32 m_tail; 308*16467b97STreehugger Robot ANTLR_UINT32 m_head; 309*16467b97STreehugger Robot 310*16467b97STreehugger Robot 311*16467b97STreehugger Robot }; 312*16467b97STreehugger Robot 313*16467b97STreehugger Robot ANTLR_END_NAMESPACE() 314*16467b97STreehugger Robot 315*16467b97STreehugger Robot #include "antlr3commontreenodestream.inl" 316*16467b97STreehugger Robot 317*16467b97STreehugger Robot #endif 318