1*16467b97STreehugger Robot #ifndef ANTLR3COLLECTIONS_HPP 2*16467b97STreehugger Robot #define ANTLR3COLLECTIONS_HPP 3*16467b97STreehugger Robot 4*16467b97STreehugger Robot // [The "BSD licence"] 5*16467b97STreehugger Robot // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB 6*16467b97STreehugger Robot 7*16467b97STreehugger Robot // 8*16467b97STreehugger Robot // All rights reserved. 9*16467b97STreehugger Robot // 10*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without 11*16467b97STreehugger Robot // modification, are permitted provided that the following conditions 12*16467b97STreehugger Robot // are met: 13*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright 14*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer. 15*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright 16*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer in the 17*16467b97STreehugger Robot // documentation and/or other materials provided with the distribution. 18*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products 19*16467b97STreehugger Robot // derived from this software without specific prior written permission. 20*16467b97STreehugger Robot // 21*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31*16467b97STreehugger Robot 32*16467b97STreehugger Robot #include "antlr3defs.hpp" 33*16467b97STreehugger Robot 34*16467b97STreehugger Robot ANTLR_BEGIN_NAMESPACE() 35*16467b97STreehugger Robot 36*16467b97STreehugger Robot /* -------------- TRIE Interfaces ---------------- */ 37*16467b97STreehugger Robot 38*16467b97STreehugger Robot /** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE 39*16467b97STreehugger Robot */ 40*16467b97STreehugger Robot template< class ImplTraits, class DataType > 41*16467b97STreehugger Robot class TrieEntry : public ImplTraits::AllocPolicyType 42*16467b97STreehugger Robot { 43*16467b97STreehugger Robot public: 44*16467b97STreehugger Robot typedef typename ImplTraits::AllocPolicyType AllocPolicy; 45*16467b97STreehugger Robot 46*16467b97STreehugger Robot private: 47*16467b97STreehugger Robot DataType m_data; 48*16467b97STreehugger Robot TrieEntry* m_next; /* Allows duplicate entries for same key in insertion order */ 49*16467b97STreehugger Robot 50*16467b97STreehugger Robot public: 51*16467b97STreehugger Robot TrieEntry(const DataType& data, TrieEntry* next); 52*16467b97STreehugger Robot DataType& get_data(); 53*16467b97STreehugger Robot const DataType& get_data() const; 54*16467b97STreehugger Robot TrieEntry* get_next() const; 55*16467b97STreehugger Robot void set_next( TrieEntry* next ); 56*16467b97STreehugger Robot }; 57*16467b97STreehugger Robot 58*16467b97STreehugger Robot /** Structure that defines an element/node in an ANTLR_INT_TRIE 59*16467b97STreehugger Robot */ 60*16467b97STreehugger Robot template< class ImplTraits, class DataType > 61*16467b97STreehugger Robot class IntTrieNode : public ImplTraits::AllocPolicyType 62*16467b97STreehugger Robot { 63*16467b97STreehugger Robot public: 64*16467b97STreehugger Robot typedef TrieEntry<ImplTraits, DataType> TrieEntryType; 65*16467b97STreehugger Robot typedef TrieEntryType BucketsType; 66*16467b97STreehugger Robot 67*16467b97STreehugger Robot private: 68*16467b97STreehugger Robot ANTLR_UINT32 m_bitNum; /**< This is the left/right bit index for traversal along the nodes */ 69*16467b97STreehugger Robot ANTLR_INTKEY m_key; /**< This is the actual key that the entry represents if it is a terminal node */ 70*16467b97STreehugger Robot BucketsType* m_buckets; /**< This is the data bucket(s) that the key indexes, which may be NULL */ 71*16467b97STreehugger Robot IntTrieNode* m_leftN; /**< Pointer to the left node from here when sKey & bitNum = 0 */ 72*16467b97STreehugger Robot IntTrieNode* m_rightN; /**< Pointer to the right node from here when sKey & bitNum, = 1 */ 73*16467b97STreehugger Robot 74*16467b97STreehugger Robot public: 75*16467b97STreehugger Robot IntTrieNode(); 76*16467b97STreehugger Robot ~IntTrieNode(); 77*16467b97STreehugger Robot 78*16467b97STreehugger Robot ANTLR_UINT32 get_bitNum() const; 79*16467b97STreehugger Robot ANTLR_INTKEY get_key() const; 80*16467b97STreehugger Robot BucketsType* get_buckets() const; 81*16467b97STreehugger Robot IntTrieNode* get_leftN() const; 82*16467b97STreehugger Robot IntTrieNode* get_rightN() const; 83*16467b97STreehugger Robot void set_bitNum( ANTLR_UINT32 bitNum ); 84*16467b97STreehugger Robot void set_key( ANTLR_INTKEY key ); 85*16467b97STreehugger Robot void set_buckets( BucketsType* buckets ); 86*16467b97STreehugger Robot void set_leftN( IntTrieNode* leftN ); 87*16467b97STreehugger Robot void set_rightN( IntTrieNode* rightN ); 88*16467b97STreehugger Robot }; 89*16467b97STreehugger Robot 90*16467b97STreehugger Robot /** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation, 91*16467b97STreehugger Robot * as you might expect, the key is turned into a "string" by looking at bit(key, depth) 92*16467b97STreehugger Robot * of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63) 93*16467b97STreehugger Robot * and potentially a huge trie. This is the algorithm for a Patricia Trie. 94*16467b97STreehugger Robot * Note also that this trie [can] accept multiple entries for the same key and is 95*16467b97STreehugger Robot * therefore a kind of elastic bucket patricia trie. 96*16467b97STreehugger Robot * 97*16467b97STreehugger Robot * If you find this code useful, please feel free to 'steal' it for any purpose 98*16467b97STreehugger Robot * as covered by the BSD license under which ANTLR is issued. You can cut the code 99*16467b97STreehugger Robot * but as the ANTLR library is only about 50K (Windows Vista), you might find it 100*16467b97STreehugger Robot * easier to just link the library. Please keep all comments and licenses and so on 101*16467b97STreehugger Robot * in any version of this you create of course. 102*16467b97STreehugger Robot * 103*16467b97STreehugger Robot * Jim Idle. 104*16467b97STreehugger Robot * 105*16467b97STreehugger Robot */ 106*16467b97STreehugger Robot class IntTrieBase 107*16467b97STreehugger Robot { 108*16467b97STreehugger Robot public: 109*16467b97STreehugger Robot static const ANTLR_UINT8* get_bitIndex(); 110*16467b97STreehugger Robot static const ANTLR_UINT64* get_bitMask(); 111*16467b97STreehugger Robot }; 112*16467b97STreehugger Robot 113*16467b97STreehugger Robot template< class ImplTraits, class DataType > 114*16467b97STreehugger Robot class IntTrie : public ImplTraits::AllocPolicyType, public IntTrieBase 115*16467b97STreehugger Robot { 116*16467b97STreehugger Robot public: 117*16467b97STreehugger Robot typedef TrieEntry<ImplTraits, DataType> TrieEntryType; 118*16467b97STreehugger Robot typedef IntTrieNode<ImplTraits, DataType> IntTrieNodeType; 119*16467b97STreehugger Robot 120*16467b97STreehugger Robot private: 121*16467b97STreehugger Robot IntTrieNodeType* m_root; /* Root node of this integer trie */ 122*16467b97STreehugger Robot IntTrieNodeType* m_current; /* Used to traverse the TRIE with the next() method */ 123*16467b97STreehugger Robot ANTLR_UINT32 m_count; /* Current entry count */ 124*16467b97STreehugger Robot bool m_allowDups; /* Whether this trie accepts duplicate keys */ 125*16467b97STreehugger Robot 126*16467b97STreehugger Robot public: 127*16467b97STreehugger Robot /* INT TRIE Implementation of depth 64 bits, being the number of bits 128*16467b97STreehugger Robot * in a 64 bit integer. 129*16467b97STreehugger Robot */ 130*16467b97STreehugger Robot IntTrie( ANTLR_UINT32 depth ); 131*16467b97STreehugger Robot 132*16467b97STreehugger Robot /** Search the int Trie and return a pointer to the first bucket indexed 133*16467b97STreehugger Robot * by the key if it is contained in the trie, otherwise NULL. 134*16467b97STreehugger Robot */ 135*16467b97STreehugger Robot TrieEntryType* get( ANTLR_INTKEY key); 136*16467b97STreehugger Robot bool del( ANTLR_INTKEY key); 137*16467b97STreehugger Robot 138*16467b97STreehugger Robot /** Add an entry into the INT trie. 139*16467b97STreehugger Robot * Basically we descend the trie as we do when searching it, which will 140*16467b97STreehugger Robot * locate the only node in the trie that can be reached by the bit pattern of the 141*16467b97STreehugger Robot * key. If the key is actually at that node, then if the trie accepts duplicates 142*16467b97STreehugger Robot * we add the supplied data in a new chained bucket to that data node. If it does 143*16467b97STreehugger Robot * not accept duplicates then we merely return FALSE in case the caller wants to know 144*16467b97STreehugger Robot * whether the key was already in the trie. 145*16467b97STreehugger Robot * If the node we locate is not the key we are looking to add, then we insert a new node 146*16467b97STreehugger Robot * into the trie with a bit index of the leftmost differing bit and the left or right 147*16467b97STreehugger Robot * node pointing to itself or the data node we are inserting 'before'. 148*16467b97STreehugger Robot */ 149*16467b97STreehugger Robot bool add( ANTLR_INTKEY key, const DataType& data ); 150*16467b97STreehugger Robot ~IntTrie(); 151*16467b97STreehugger Robot }; 152*16467b97STreehugger Robot 153*16467b97STreehugger Robot /** 154*16467b97STreehugger Robot * A topological sort system that given a set of dependencies of a node m on node n, 155*16467b97STreehugger Robot * can sort them in dependency order. This is a generally useful utility object 156*16467b97STreehugger Robot * that does not care what the things are it is sorting. Generally the set 157*16467b97STreehugger Robot * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR. 158*16467b97STreehugger Robot * I have provided a sort method that given ANTLR3_VECTOR as an input will sort 159*16467b97STreehugger Robot * the vector entries in place, as well as a sort method that just returns an 160*16467b97STreehugger Robot * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but 161*16467b97STreehugger Robot * some set of your own device. 162*16467b97STreehugger Robot * 163*16467b97STreehugger Robot * Of the two main algorithms that could be used, I chose to use the depth first 164*16467b97STreehugger Robot * search for unvisited nodes as a) This runs in linear time, and b) it is what 165*16467b97STreehugger Robot * we used in the ANTLR Tool to perform a topological sort of the input grammar files 166*16467b97STreehugger Robot * based on their dependencies. 167*16467b97STreehugger Robot */ 168*16467b97STreehugger Robot template<class ImplTraits> 169*16467b97STreehugger Robot class Topo : public ImplTraits::AllocPolicyType 170*16467b97STreehugger Robot { 171*16467b97STreehugger Robot public: 172*16467b97STreehugger Robot typedef typename ImplTraits::BitsetType BitsetType; 173*16467b97STreehugger Robot typedef typename ImplTraits::AllocPolicyType AllocPolicyType; 174*16467b97STreehugger Robot 175*16467b97STreehugger Robot private: 176*16467b97STreehugger Robot /** 177*16467b97STreehugger Robot * A vector of vectors of edges, built by calling the addEdge method() 178*16467b97STreehugger Robot * to indicate that node number n depends on node number m. Each entry in the vector 179*16467b97STreehugger Robot * contains a bitset, which has a bit index set for each node upon which the 180*16467b97STreehugger Robot * entry node depends. 181*16467b97STreehugger Robot */ 182*16467b97STreehugger Robot BitsetType** m_edges; 183*16467b97STreehugger Robot 184*16467b97STreehugger Robot /** 185*16467b97STreehugger Robot * A vector used to build up the sorted output order. Note that 186*16467b97STreehugger Robot * as the vector contains UINT32 then the maximum node index is 187*16467b97STreehugger Robot * 'limited' to 2^32, as nodes should be zero based. 188*16467b97STreehugger Robot */ 189*16467b97STreehugger Robot ANTLR_UINT32* m_sorted; 190*16467b97STreehugger Robot 191*16467b97STreehugger Robot /** 192*16467b97STreehugger Robot * A vector used to detect cycles in the edge dependecies. It is used 193*16467b97STreehugger Robot * as a stack and each time we descend a node to one of its edges we 194*16467b97STreehugger Robot * add the node into this stack. If we find a node that we have already 195*16467b97STreehugger Robot * visited in the stack, then it means there wasa cycle such as 9->8->1->9 196*16467b97STreehugger Robot * as the only way a node can be on the stack is if we are currently 197*16467b97STreehugger Robot * descnding from it as we remove it from the stack as we exit from 198*16467b97STreehugger Robot * descending its dependencies 199*16467b97STreehugger Robot */ 200*16467b97STreehugger Robot ANTLR_UINT32* m_cycle; 201*16467b97STreehugger Robot 202*16467b97STreehugger Robot /** 203*16467b97STreehugger Robot * A flag that indicates the algorithm found a cycle in the edges 204*16467b97STreehugger Robot * such as 9->8->1->9 205*16467b97STreehugger Robot * If this flag is set after you have called one of the sort routines 206*16467b97STreehugger Robot * then the detected cycle will be contained in the cycle array and 207*16467b97STreehugger Robot * cycleLimit will point to the one after the last entry in the cycle. 208*16467b97STreehugger Robot */ 209*16467b97STreehugger Robot bool m_hasCycle; 210*16467b97STreehugger Robot 211*16467b97STreehugger Robot /** 212*16467b97STreehugger Robot * A watermark used to accumulate potential cycles in the cycle array. 213*16467b97STreehugger Robot * This should be zero when we are done. Check hasCycle after calling one 214*16467b97STreehugger Robot * of the sort methods and if it is true then you can find the cycle 215*16467b97STreehugger Robot * in cycle[0]...cycle[cycleMark-1] 216*16467b97STreehugger Robot */ 217*16467b97STreehugger Robot ANTLR_UINT32 m_cycleMark; 218*16467b97STreehugger Robot 219*16467b97STreehugger Robot /** 220*16467b97STreehugger Robot * One more than the largest node index that is contained in edges/sorted. 221*16467b97STreehugger Robot */ 222*16467b97STreehugger Robot ANTLR_UINT32 m_limit; 223*16467b97STreehugger Robot 224*16467b97STreehugger Robot /** 225*16467b97STreehugger Robot * The set of visited nodes as determined by a set entry in 226*16467b97STreehugger Robot * the bitmap. 227*16467b97STreehugger Robot */ 228*16467b97STreehugger Robot BitsetType* m_visited; 229*16467b97STreehugger Robot 230*16467b97STreehugger Robot public: 231*16467b97STreehugger Robot Topo(); 232*16467b97STreehugger Robot /** 233*16467b97STreehugger Robot * A method that adds an edge from one node to another. An edge 234*16467b97STreehugger Robot * of n -> m indicates that node n is dependent on node m. Note that 235*16467b97STreehugger Robot * while building these edges, it is perfectly OK to add nodes out of 236*16467b97STreehugger Robot * sequence. So, if you have edges: 237*16467b97STreehugger Robot * 238*16467b97STreehugger Robot * 3 -> 0 239*16467b97STreehugger Robot * 2 -> 1 240*16467b97STreehugger Robot * 1 -> 3 241*16467b97STreehugger Robot * 242*16467b97STreehugger Robot * The you can add them in that order and so add node 3 before nodes 2 and 1 243*16467b97STreehugger Robot * 244*16467b97STreehugger Robot */ 245*16467b97STreehugger Robot void addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency); 246*16467b97STreehugger Robot 247*16467b97STreehugger Robot 248*16467b97STreehugger Robot /** 249*16467b97STreehugger Robot * A method that returns a pointer to an array of sorted node indexes. 250*16467b97STreehugger Robot * The array is sorted in topological sorted order. Note that the array 251*16467b97STreehugger Robot * is only as large as the largest node index you created an edge for. This means 252*16467b97STreehugger Robot * that if you had an input of 32 nodes, but that largest node with an edge 253*16467b97STreehugger Robot * was 16, then the returned array will be the sorted order of the first 16 254*16467b97STreehugger Robot * nodes and the last 16 nodes of your array are basically fine as they are 255*16467b97STreehugger Robot * as they had no dependencies and do not need any particular sort order. 256*16467b97STreehugger Robot * 257*16467b97STreehugger Robot * NB: If the structure that contains the array is freed, then the sorted 258*16467b97STreehugger Robot * array will be freed too so you should use the value of limit to 259*16467b97STreehugger Robot * make a long term copy of this array if you do not want to keep the topo 260*16467b97STreehugger Robot * structure around as well. 261*16467b97STreehugger Robot */ 262*16467b97STreehugger Robot ANTLR_UINT32* sortToArray(); 263*16467b97STreehugger Robot 264*16467b97STreehugger Robot /** 265*16467b97STreehugger Robot * A method that sorts the supplied ANTLR3_VECTOR in place based 266*16467b97STreehugger Robot * on the previously supplied edge data. 267*16467b97STreehugger Robot */ 268*16467b97STreehugger Robot template<typename DataType> 269*16467b97STreehugger Robot void sortVector( typename ImplTraits::template VectorType<DataType>& v); 270*16467b97STreehugger Robot 271*16467b97STreehugger Robot void DFS(ANTLR_UINT32 node); 272*16467b97STreehugger Robot 273*16467b97STreehugger Robot /** 274*16467b97STreehugger Robot * A method to free this structure and any associated memory. 275*16467b97STreehugger Robot */ 276*16467b97STreehugger Robot ~Topo(); 277*16467b97STreehugger Robot }; 278*16467b97STreehugger Robot 279*16467b97STreehugger Robot ANTLR_END_NAMESPACE() 280*16467b97STreehugger Robot 281*16467b97STreehugger Robot #include "antlr3collections.inl" 282*16467b97STreehugger Robot 283*16467b97STreehugger Robot #endif 284*16467b97STreehugger Robot 285*16467b97STreehugger Robot 286