xref: /aosp_15_r20/external/antlr/runtime/Cpp/include/antlr3collections.hpp (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot #ifndef	ANTLR3COLLECTIONS_HPP
2*16467b97STreehugger Robot #define	ANTLR3COLLECTIONS_HPP
3*16467b97STreehugger Robot 
4*16467b97STreehugger Robot // [The "BSD licence"]
5*16467b97STreehugger Robot // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB
6*16467b97STreehugger Robot 
7*16467b97STreehugger Robot //
8*16467b97STreehugger Robot // All rights reserved.
9*16467b97STreehugger Robot //
10*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without
11*16467b97STreehugger Robot // modification, are permitted provided that the following conditions
12*16467b97STreehugger Robot // are met:
13*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright
14*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer.
15*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright
16*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer in the
17*16467b97STreehugger Robot //    documentation and/or other materials provided with the distribution.
18*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products
19*16467b97STreehugger Robot //    derived from this software without specific prior written permission.
20*16467b97STreehugger Robot //
21*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*16467b97STreehugger Robot 
32*16467b97STreehugger Robot #include    "antlr3defs.hpp"
33*16467b97STreehugger Robot 
34*16467b97STreehugger Robot ANTLR_BEGIN_NAMESPACE()
35*16467b97STreehugger Robot 
36*16467b97STreehugger Robot /* -------------- TRIE Interfaces ---------------- */
37*16467b97STreehugger Robot 
38*16467b97STreehugger Robot /** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE
39*16467b97STreehugger Robot  */
40*16467b97STreehugger Robot template< class ImplTraits, class DataType >
41*16467b97STreehugger Robot class TrieEntry : public ImplTraits::AllocPolicyType
42*16467b97STreehugger Robot {
43*16467b97STreehugger Robot public:
44*16467b97STreehugger Robot 	typedef typename ImplTraits::AllocPolicyType AllocPolicy;
45*16467b97STreehugger Robot 
46*16467b97STreehugger Robot private:
47*16467b97STreehugger Robot 	DataType			m_data;
48*16467b97STreehugger Robot 	TrieEntry*			m_next;	    /* Allows duplicate entries for same key in insertion order	*/
49*16467b97STreehugger Robot 
50*16467b97STreehugger Robot public:
51*16467b97STreehugger Robot 	TrieEntry(const DataType& data, TrieEntry* next);
52*16467b97STreehugger Robot 	DataType& get_data();
53*16467b97STreehugger Robot 	const DataType& get_data() const;
54*16467b97STreehugger Robot 	TrieEntry* get_next() const;
55*16467b97STreehugger Robot 	void set_next( TrieEntry* next );
56*16467b97STreehugger Robot };
57*16467b97STreehugger Robot 
58*16467b97STreehugger Robot /** Structure that defines an element/node in an ANTLR_INT_TRIE
59*16467b97STreehugger Robot  */
60*16467b97STreehugger Robot template< class ImplTraits, class DataType >
61*16467b97STreehugger Robot class IntTrieNode : public ImplTraits::AllocPolicyType
62*16467b97STreehugger Robot {
63*16467b97STreehugger Robot public:
64*16467b97STreehugger Robot 	typedef TrieEntry<ImplTraits, DataType> TrieEntryType;
65*16467b97STreehugger Robot 	typedef TrieEntryType BucketsType;
66*16467b97STreehugger Robot 
67*16467b97STreehugger Robot private:
68*16467b97STreehugger Robot     ANTLR_UINT32	m_bitNum;	/**< This is the left/right bit index for traversal along the nodes				*/
69*16467b97STreehugger Robot     ANTLR_INTKEY	m_key;		/**< This is the actual key that the entry represents if it is a terminal node  */
70*16467b97STreehugger Robot     BucketsType*	m_buckets;	/**< This is the data bucket(s) that the key indexes, which may be NULL			*/
71*16467b97STreehugger Robot     IntTrieNode*	m_leftN;	/**< Pointer to the left node from here when sKey & bitNum = 0					*/
72*16467b97STreehugger Robot     IntTrieNode*	m_rightN;	/**< Pointer to the right node from here when sKey & bitNum, = 1				*/
73*16467b97STreehugger Robot 
74*16467b97STreehugger Robot public:
75*16467b97STreehugger Robot 	IntTrieNode();
76*16467b97STreehugger Robot 	~IntTrieNode();
77*16467b97STreehugger Robot 
78*16467b97STreehugger Robot 	ANTLR_UINT32 get_bitNum() const;
79*16467b97STreehugger Robot 	ANTLR_INTKEY get_key() const;
80*16467b97STreehugger Robot 	BucketsType* get_buckets() const;
81*16467b97STreehugger Robot 	IntTrieNode* get_leftN() const;
82*16467b97STreehugger Robot 	IntTrieNode* get_rightN() const;
83*16467b97STreehugger Robot 	void  set_bitNum( ANTLR_UINT32 bitNum );
84*16467b97STreehugger Robot 	void  set_key( ANTLR_INTKEY key );
85*16467b97STreehugger Robot 	void  set_buckets( BucketsType* buckets );
86*16467b97STreehugger Robot 	void  set_leftN( IntTrieNode* leftN );
87*16467b97STreehugger Robot 	void  set_rightN( IntTrieNode* rightN );
88*16467b97STreehugger Robot };
89*16467b97STreehugger Robot 
90*16467b97STreehugger Robot /** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation,
91*16467b97STreehugger Robot  *  as you might expect, the key is turned into a "string" by looking at bit(key, depth)
92*16467b97STreehugger Robot  *  of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63)
93*16467b97STreehugger Robot  *  and potentially a huge trie. This is the algorithm for a Patricia Trie.
94*16467b97STreehugger Robot  *  Note also that this trie [can] accept multiple entries for the same key and is
95*16467b97STreehugger Robot  *  therefore a kind of elastic bucket patricia trie.
96*16467b97STreehugger Robot  *
97*16467b97STreehugger Robot  *  If you find this code useful, please feel free to 'steal' it for any purpose
98*16467b97STreehugger Robot  *  as covered by the BSD license under which ANTLR is issued. You can cut the code
99*16467b97STreehugger Robot  *  but as the ANTLR library is only about 50K (Windows Vista), you might find it
100*16467b97STreehugger Robot  *  easier to just link the library. Please keep all comments and licenses and so on
101*16467b97STreehugger Robot  *  in any version of this you create of course.
102*16467b97STreehugger Robot  *
103*16467b97STreehugger Robot  *  Jim Idle.
104*16467b97STreehugger Robot  *
105*16467b97STreehugger Robot  */
106*16467b97STreehugger Robot class IntTrieBase
107*16467b97STreehugger Robot {
108*16467b97STreehugger Robot public:
109*16467b97STreehugger Robot 	static const ANTLR_UINT8* get_bitIndex();
110*16467b97STreehugger Robot 	static const ANTLR_UINT64* get_bitMask();
111*16467b97STreehugger Robot };
112*16467b97STreehugger Robot 
113*16467b97STreehugger Robot template< class ImplTraits, class DataType >
114*16467b97STreehugger Robot class IntTrie : public ImplTraits::AllocPolicyType, public IntTrieBase
115*16467b97STreehugger Robot {
116*16467b97STreehugger Robot public:
117*16467b97STreehugger Robot 	typedef TrieEntry<ImplTraits, DataType> TrieEntryType;
118*16467b97STreehugger Robot 	typedef IntTrieNode<ImplTraits, DataType> IntTrieNodeType;
119*16467b97STreehugger Robot 
120*16467b97STreehugger Robot private:
121*16467b97STreehugger Robot     IntTrieNodeType*	m_root;			/* Root node of this integer trie					*/
122*16467b97STreehugger Robot     IntTrieNodeType*	m_current;		/* Used to traverse the TRIE with the next() method	*/
123*16467b97STreehugger Robot     ANTLR_UINT32	m_count;			/* Current entry count								*/
124*16467b97STreehugger Robot     bool			m_allowDups;		/* Whether this trie accepts duplicate keys			*/
125*16467b97STreehugger Robot 
126*16467b97STreehugger Robot public:
127*16467b97STreehugger Robot 	/* INT TRIE Implementation of depth 64 bits, being the number of bits
128*16467b97STreehugger Robot 	 * in a 64 bit integer.
129*16467b97STreehugger Robot 	 */
130*16467b97STreehugger Robot     IntTrie( ANTLR_UINT32 depth );
131*16467b97STreehugger Robot 
132*16467b97STreehugger Robot 	/** Search the int Trie and return a pointer to the first bucket indexed
133*16467b97STreehugger Robot 	 *  by the key if it is contained in the trie, otherwise NULL.
134*16467b97STreehugger Robot 	 */
135*16467b97STreehugger Robot     TrieEntryType*	get( ANTLR_INTKEY key);
136*16467b97STreehugger Robot     bool		del( ANTLR_INTKEY key);
137*16467b97STreehugger Robot 
138*16467b97STreehugger Robot 	/** Add an entry into the INT trie.
139*16467b97STreehugger Robot 	 *  Basically we descend the trie as we do when searching it, which will
140*16467b97STreehugger Robot 	 *  locate the only node in the trie that can be reached by the bit pattern of the
141*16467b97STreehugger Robot 	 *  key. If the key is actually at that node, then if the trie accepts duplicates
142*16467b97STreehugger Robot 	 *  we add the supplied data in a new chained bucket to that data node. If it does
143*16467b97STreehugger Robot 	 *  not accept duplicates then we merely return FALSE in case the caller wants to know
144*16467b97STreehugger Robot 	 *  whether the key was already in the trie.
145*16467b97STreehugger Robot 	 *  If the node we locate is not the key we are looking to add, then we insert a new node
146*16467b97STreehugger Robot 	 *  into the trie with a bit index of the leftmost differing bit and the left or right
147*16467b97STreehugger Robot 	 *  node pointing to itself or the data node we are inserting 'before'.
148*16467b97STreehugger Robot 	 */
149*16467b97STreehugger Robot     bool		add( ANTLR_INTKEY key, const DataType& data );
150*16467b97STreehugger Robot     ~IntTrie();
151*16467b97STreehugger Robot };
152*16467b97STreehugger Robot 
153*16467b97STreehugger Robot /**
154*16467b97STreehugger Robot  * A topological sort system that given a set of dependencies of a node m on node n,
155*16467b97STreehugger Robot  * can sort them in dependency order. This is a generally useful utility object
156*16467b97STreehugger Robot  * that does not care what the things are it is sorting. Generally the set
157*16467b97STreehugger Robot  * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR.
158*16467b97STreehugger Robot  * I have provided a sort method that given ANTLR3_VECTOR as an input will sort
159*16467b97STreehugger Robot  * the vector entries in place, as well as a sort method that just returns an
160*16467b97STreehugger Robot  * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but
161*16467b97STreehugger Robot  * some set of your own device.
162*16467b97STreehugger Robot  *
163*16467b97STreehugger Robot  * Of the two main algorithms that could be used, I chose to use the depth first
164*16467b97STreehugger Robot  * search for unvisited nodes as a) This runs in linear time, and b) it is what
165*16467b97STreehugger Robot  * we used in the ANTLR Tool to perform a topological sort of the input grammar files
166*16467b97STreehugger Robot  * based on their dependencies.
167*16467b97STreehugger Robot  */
168*16467b97STreehugger Robot template<class ImplTraits>
169*16467b97STreehugger Robot class Topo : public ImplTraits::AllocPolicyType
170*16467b97STreehugger Robot {
171*16467b97STreehugger Robot public:
172*16467b97STreehugger Robot 	typedef typename ImplTraits::BitsetType BitsetType;
173*16467b97STreehugger Robot 	typedef typename ImplTraits::AllocPolicyType AllocPolicyType;
174*16467b97STreehugger Robot 
175*16467b97STreehugger Robot private:
176*16467b97STreehugger Robot     /**
177*16467b97STreehugger Robot      * A vector of vectors of edges, built by calling the addEdge method()
178*16467b97STreehugger Robot      * to indicate that node number n depends on node number m. Each entry in the vector
179*16467b97STreehugger Robot      * contains a bitset, which has a bit index set for each node upon which the
180*16467b97STreehugger Robot      * entry node depends.
181*16467b97STreehugger Robot      */
182*16467b97STreehugger Robot     BitsetType**	m_edges;
183*16467b97STreehugger Robot 
184*16467b97STreehugger Robot     /**
185*16467b97STreehugger Robot      * A vector used to build up the sorted output order. Note that
186*16467b97STreehugger Robot      * as the vector contains UINT32 then the maximum node index is
187*16467b97STreehugger Robot      * 'limited' to 2^32, as nodes should be zero based.
188*16467b97STreehugger Robot      */
189*16467b97STreehugger Robot     ANTLR_UINT32*				m_sorted;
190*16467b97STreehugger Robot 
191*16467b97STreehugger Robot     /**
192*16467b97STreehugger Robot      * A vector used to detect cycles in the edge dependecies. It is used
193*16467b97STreehugger Robot      * as a stack and each time we descend a node to one of its edges we
194*16467b97STreehugger Robot      * add the node into this stack. If we find a node that we have already
195*16467b97STreehugger Robot      * visited in the stack, then it means there wasa cycle such as 9->8->1->9
196*16467b97STreehugger Robot      * as the only way a node can be on the stack is if we are currently
197*16467b97STreehugger Robot      * descnding from it as we remove it from the stack as we exit from
198*16467b97STreehugger Robot      * descending its dependencies
199*16467b97STreehugger Robot      */
200*16467b97STreehugger Robot     ANTLR_UINT32*		m_cycle;
201*16467b97STreehugger Robot 
202*16467b97STreehugger Robot     /**
203*16467b97STreehugger Robot      * A flag that indicates the algorithm found a cycle in the edges
204*16467b97STreehugger Robot      * such as 9->8->1->9
205*16467b97STreehugger Robot      * If this flag is set after you have called one of the sort routines
206*16467b97STreehugger Robot      * then the detected cycle will be contained in the cycle array and
207*16467b97STreehugger Robot      * cycleLimit will point to the one after the last entry in the cycle.
208*16467b97STreehugger Robot      */
209*16467b97STreehugger Robot     bool				m_hasCycle;
210*16467b97STreehugger Robot 
211*16467b97STreehugger Robot     /**
212*16467b97STreehugger Robot      * A watermark used to accumulate potential cycles in the cycle array.
213*16467b97STreehugger Robot      * This should be zero when we are done. Check hasCycle after calling one
214*16467b97STreehugger Robot      * of the sort methods and if it is true then you can find the cycle
215*16467b97STreehugger Robot      * in cycle[0]...cycle[cycleMark-1]
216*16467b97STreehugger Robot      */
217*16467b97STreehugger Robot     ANTLR_UINT32		m_cycleMark;
218*16467b97STreehugger Robot 
219*16467b97STreehugger Robot     /**
220*16467b97STreehugger Robot      * One more than the largest node index that is contained in edges/sorted.
221*16467b97STreehugger Robot      */
222*16467b97STreehugger Robot     ANTLR_UINT32		m_limit;
223*16467b97STreehugger Robot 
224*16467b97STreehugger Robot     /**
225*16467b97STreehugger Robot      * The set of visited nodes as determined by a set entry in
226*16467b97STreehugger Robot      * the bitmap.
227*16467b97STreehugger Robot      */
228*16467b97STreehugger Robot     BitsetType*			m_visited;
229*16467b97STreehugger Robot 
230*16467b97STreehugger Robot public:
231*16467b97STreehugger Robot 	Topo();
232*16467b97STreehugger Robot     /**
233*16467b97STreehugger Robot      * A method that adds an edge from one node to another. An edge
234*16467b97STreehugger Robot      * of n -> m indicates that node n is dependent on node m. Note that
235*16467b97STreehugger Robot      * while building these edges, it is perfectly OK to add nodes out of
236*16467b97STreehugger Robot      * sequence. So, if you have edges:
237*16467b97STreehugger Robot      *
238*16467b97STreehugger Robot      * 3 -> 0
239*16467b97STreehugger Robot      * 2 -> 1
240*16467b97STreehugger Robot      * 1 -> 3
241*16467b97STreehugger Robot      *
242*16467b97STreehugger Robot      * The you can add them in that order and so add node 3 before nodes 2 and 1
243*16467b97STreehugger Robot      *
244*16467b97STreehugger Robot      */
245*16467b97STreehugger Robot     void  addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency);
246*16467b97STreehugger Robot 
247*16467b97STreehugger Robot 
248*16467b97STreehugger Robot     /**
249*16467b97STreehugger Robot      * A method that returns a pointer to an array of sorted node indexes.
250*16467b97STreehugger Robot      * The array is sorted in topological sorted order. Note that the array
251*16467b97STreehugger Robot      * is only as large as the largest node index you created an edge for. This means
252*16467b97STreehugger Robot      * that if you had an input of 32 nodes, but that largest node with an edge
253*16467b97STreehugger Robot      * was 16, then the returned array will be the sorted order of the first 16
254*16467b97STreehugger Robot      * nodes and the last 16 nodes of your array are basically fine as they are
255*16467b97STreehugger Robot      * as they had no dependencies and do not need any particular sort order.
256*16467b97STreehugger Robot      *
257*16467b97STreehugger Robot      * NB: If the structure that contains the array is freed, then the sorted
258*16467b97STreehugger Robot      * array will be freed too so you should use the value of limit to
259*16467b97STreehugger Robot      * make a long term copy of this array if you do not want to keep the topo
260*16467b97STreehugger Robot      * structure around as well.
261*16467b97STreehugger Robot      */
262*16467b97STreehugger Robot     ANTLR_UINT32*  sortToArray();
263*16467b97STreehugger Robot 
264*16467b97STreehugger Robot     /**
265*16467b97STreehugger Robot      * A method that sorts the supplied ANTLR3_VECTOR in place based
266*16467b97STreehugger Robot      * on the previously supplied edge data.
267*16467b97STreehugger Robot      */
268*16467b97STreehugger Robot 	template<typename DataType>
269*16467b97STreehugger Robot     void   sortVector( typename ImplTraits::template VectorType<DataType>& v);
270*16467b97STreehugger Robot 
271*16467b97STreehugger Robot 	void   DFS(ANTLR_UINT32 node);
272*16467b97STreehugger Robot 
273*16467b97STreehugger Robot     /**
274*16467b97STreehugger Robot      *  A method to free this structure and any associated memory.
275*16467b97STreehugger Robot      */
276*16467b97STreehugger Robot 	~Topo();
277*16467b97STreehugger Robot };
278*16467b97STreehugger Robot 
279*16467b97STreehugger Robot ANTLR_END_NAMESPACE()
280*16467b97STreehugger Robot 
281*16467b97STreehugger Robot #include "antlr3collections.inl"
282*16467b97STreehugger Robot 
283*16467b97STreehugger Robot #endif
284*16467b97STreehugger Robot 
285*16467b97STreehugger Robot 
286