1*16467b97STreehugger Robot #ifndef ANTLR3COLLECTIONS_H 2*16467b97STreehugger Robot #define ANTLR3COLLECTIONS_H 3*16467b97STreehugger Robot 4*16467b97STreehugger Robot // [The "BSD licence"] 5*16467b97STreehugger Robot // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 6*16467b97STreehugger Robot // http://www.temporal-wave.com 7*16467b97STreehugger Robot // http://www.linkedin.com/in/jimidle 8*16467b97STreehugger Robot // 9*16467b97STreehugger Robot // All rights reserved. 10*16467b97STreehugger Robot // 11*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without 12*16467b97STreehugger Robot // modification, are permitted provided that the following conditions 13*16467b97STreehugger Robot // are met: 14*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright 15*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer. 16*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright 17*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer in the 18*16467b97STreehugger Robot // documentation and/or other materials provided with the distribution. 19*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products 20*16467b97STreehugger Robot // derived from this software without specific prior written permission. 21*16467b97STreehugger Robot // 22*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32*16467b97STreehugger Robot 33*16467b97STreehugger Robot #include <antlr3defs.h> 34*16467b97STreehugger Robot #include <antlr3bitset.h> 35*16467b97STreehugger Robot 36*16467b97STreehugger Robot #define ANTLR3_HASH_TYPE_INT 0 /**< Indicates the hashed file has integer keys */ 37*16467b97STreehugger Robot #define ANTLR3_HASH_TYPE_STR 1 /**< Indicates the hashed file has numeric keys */ 38*16467b97STreehugger Robot 39*16467b97STreehugger Robot #ifdef __cplusplus 40*16467b97STreehugger Robot extern "C" { 41*16467b97STreehugger Robot #endif 42*16467b97STreehugger Robot 43*16467b97STreehugger Robot typedef struct ANTLR3_HASH_KEY_struct 44*16467b97STreehugger Robot { 45*16467b97STreehugger Robot ANTLR3_UINT8 type; /**< One of ##ANTLR3_HASH_TYPE_INT or ##ANTLR3_HASH_TYPE_STR */ 46*16467b97STreehugger Robot 47*16467b97STreehugger Robot union 48*16467b97STreehugger Robot { 49*16467b97STreehugger Robot pANTLR3_UINT8 sKey; /**< Used if type is ANTLR3_HASH_TYPE_STR */ 50*16467b97STreehugger Robot ANTLR3_INTKEY iKey; /**< used if type is ANTLR3_HASH_TYPE_INT */ 51*16467b97STreehugger Robot } 52*16467b97STreehugger Robot key; 53*16467b97STreehugger Robot 54*16467b97STreehugger Robot } ANTLR3_HASH_KEY, *pANTLR3_HASH_KEY; 55*16467b97STreehugger Robot 56*16467b97STreehugger Robot /** Internal structure representing an element in a hash bucket. 57*16467b97STreehugger Robot * Stores the original key so that duplicate keys can be rejected 58*16467b97STreehugger Robot * if necessary, and contains function can be supported. If the hash key 59*16467b97STreehugger Robot * could be unique I would have invented the perfect compression algorithm ;-) 60*16467b97STreehugger Robot */ 61*16467b97STreehugger Robot typedef struct ANTLR3_HASH_ENTRY_struct 62*16467b97STreehugger Robot { 63*16467b97STreehugger Robot /** Key that created this particular entry 64*16467b97STreehugger Robot */ 65*16467b97STreehugger Robot ANTLR3_HASH_KEY keybase; 66*16467b97STreehugger Robot 67*16467b97STreehugger Robot /** Pointer to the data for this particular entry 68*16467b97STreehugger Robot */ 69*16467b97STreehugger Robot void * data; 70*16467b97STreehugger Robot 71*16467b97STreehugger Robot /** Pointer to routine that knows how to release the memory 72*16467b97STreehugger Robot * structure pointed at by data. If this is NULL then we assume 73*16467b97STreehugger Robot * that the data pointer does not need to be freed when the entry 74*16467b97STreehugger Robot * is deleted from the table. 75*16467b97STreehugger Robot */ 76*16467b97STreehugger Robot void (ANTLR3_CDECL *free)(void * data); 77*16467b97STreehugger Robot 78*16467b97STreehugger Robot /** Pointer to the next entry in this bucket if there 79*16467b97STreehugger Robot * is one. Sometimes different keys will hash to the same bucket (especially 80*16467b97STreehugger Robot * if the number of buckets is small). We could implement dual hashing algorithms 81*16467b97STreehugger Robot * to minimize this, but that seems over the top for what this is needed for. 82*16467b97STreehugger Robot */ 83*16467b97STreehugger Robot struct ANTLR3_HASH_ENTRY_struct * nextEntry; 84*16467b97STreehugger Robot } 85*16467b97STreehugger Robot ANTLR3_HASH_ENTRY; 86*16467b97STreehugger Robot 87*16467b97STreehugger Robot /** Internal structure of a hash table bucket, which tracks 88*16467b97STreehugger Robot * all keys that hash to the same bucket. 89*16467b97STreehugger Robot */ 90*16467b97STreehugger Robot typedef struct ANTLR3_HASH_BUCKET_struct 91*16467b97STreehugger Robot { 92*16467b97STreehugger Robot /** Pointer to the first entry in the bucket (if any, it 93*16467b97STreehugger Robot * may be NULL). Duplicate entries are chained from 94*16467b97STreehugger Robot * here. 95*16467b97STreehugger Robot */ 96*16467b97STreehugger Robot pANTLR3_HASH_ENTRY entries; 97*16467b97STreehugger Robot 98*16467b97STreehugger Robot } 99*16467b97STreehugger Robot ANTLR3_HASH_BUCKET; 100*16467b97STreehugger Robot 101*16467b97STreehugger Robot /** Structure that tracks a hash table 102*16467b97STreehugger Robot */ 103*16467b97STreehugger Robot typedef struct ANTLR3_HASH_TABLE_struct 104*16467b97STreehugger Robot { 105*16467b97STreehugger Robot /** Indicates whether the table allows duplicate keys 106*16467b97STreehugger Robot */ 107*16467b97STreehugger Robot int allowDups; 108*16467b97STreehugger Robot 109*16467b97STreehugger Robot /** Number of buckets available in this table 110*16467b97STreehugger Robot */ 111*16467b97STreehugger Robot ANTLR3_UINT32 modulo; 112*16467b97STreehugger Robot 113*16467b97STreehugger Robot /** Points to the memory where the array of buckets 114*16467b97STreehugger Robot * starts. 115*16467b97STreehugger Robot */ 116*16467b97STreehugger Robot pANTLR3_HASH_BUCKET buckets; 117*16467b97STreehugger Robot 118*16467b97STreehugger Robot /** How many elements currently exist in the table. 119*16467b97STreehugger Robot */ 120*16467b97STreehugger Robot ANTLR3_UINT32 count; 121*16467b97STreehugger Robot 122*16467b97STreehugger Robot /** Whether the hash table should strdup the keys it is given or not. 123*16467b97STreehugger Robot */ 124*16467b97STreehugger Robot ANTLR3_BOOLEAN doStrdup; 125*16467b97STreehugger Robot 126*16467b97STreehugger Robot /** Pointer to function to completely delete this table 127*16467b97STreehugger Robot */ 128*16467b97STreehugger Robot void (*free) (struct ANTLR3_HASH_TABLE_struct * table); 129*16467b97STreehugger Robot 130*16467b97STreehugger Robot /* String keyed hashtable functions */ 131*16467b97STreehugger Robot void (*del) (struct ANTLR3_HASH_TABLE_struct * table, void * key); 132*16467b97STreehugger Robot pANTLR3_HASH_ENTRY (*remove) (struct ANTLR3_HASH_TABLE_struct * table, void * key); 133*16467b97STreehugger Robot void * (*get) (struct ANTLR3_HASH_TABLE_struct * table, void * key); 134*16467b97STreehugger Robot ANTLR3_INT32 (*put) (struct ANTLR3_HASH_TABLE_struct * table, void * key, void * element, void (ANTLR3_CDECL *freeptr)(void *)); 135*16467b97STreehugger Robot 136*16467b97STreehugger Robot /* Integer based hash functions */ 137*16467b97STreehugger Robot void (*delI) (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key); 138*16467b97STreehugger Robot pANTLR3_HASH_ENTRY (*removeI) (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key); 139*16467b97STreehugger Robot void * (*getI) (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key); 140*16467b97STreehugger Robot ANTLR3_INT32 (*putI) (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key, void * element, void (ANTLR3_CDECL *freeptr)(void *)); 141*16467b97STreehugger Robot 142*16467b97STreehugger Robot ANTLR3_UINT32 (*size) (struct ANTLR3_HASH_TABLE_struct * table); 143*16467b97STreehugger Robot } 144*16467b97STreehugger Robot ANTLR3_HASH_TABLE; 145*16467b97STreehugger Robot 146*16467b97STreehugger Robot 147*16467b97STreehugger Robot /** Internal structure representing an enumeration of a table. 148*16467b97STreehugger Robot * This is returned by antlr3Enumeration() 149*16467b97STreehugger Robot * Allows the programmer to traverse the table in hash order without 150*16467b97STreehugger Robot * knowing what is in the actual table. 151*16467b97STreehugger Robot * 152*16467b97STreehugger Robot * Note that it is up to the caller to ensure that the table 153*16467b97STreehugger Robot * structure does not change in the hash bucket that is currently being 154*16467b97STreehugger Robot * enumerated as this structure just tracks the next pointers in the 155*16467b97STreehugger Robot * bucket series. 156*16467b97STreehugger Robot */ 157*16467b97STreehugger Robot typedef struct ANTLR3_HASH_ENUM_struct 158*16467b97STreehugger Robot { 159*16467b97STreehugger Robot /* Pointer to the table we are enumerating 160*16467b97STreehugger Robot */ 161*16467b97STreehugger Robot pANTLR3_HASH_TABLE table; 162*16467b97STreehugger Robot 163*16467b97STreehugger Robot /* Bucket we are currently enumerating (if NULL then we are done) 164*16467b97STreehugger Robot */ 165*16467b97STreehugger Robot ANTLR3_UINT32 bucket; 166*16467b97STreehugger Robot 167*16467b97STreehugger Robot /* Next entry to return, if NULL, then move to next bucket if any 168*16467b97STreehugger Robot */ 169*16467b97STreehugger Robot pANTLR3_HASH_ENTRY entry; 170*16467b97STreehugger Robot 171*16467b97STreehugger Robot /* Interface 172*16467b97STreehugger Robot */ 173*16467b97STreehugger Robot int (*next) (struct ANTLR3_HASH_ENUM_struct * en, pANTLR3_HASH_KEY *key, void ** data); 174*16467b97STreehugger Robot void (*free) (struct ANTLR3_HASH_ENUM_struct * table); 175*16467b97STreehugger Robot } 176*16467b97STreehugger Robot ANTLR3_HASH_ENUM; 177*16467b97STreehugger Robot 178*16467b97STreehugger Robot /** Structure that represents a LIST collection 179*16467b97STreehugger Robot */ 180*16467b97STreehugger Robot typedef struct ANTLR3_LIST_struct 181*16467b97STreehugger Robot { 182*16467b97STreehugger Robot /** Hash table that is storing the list elements 183*16467b97STreehugger Robot */ 184*16467b97STreehugger Robot pANTLR3_HASH_TABLE table; 185*16467b97STreehugger Robot 186*16467b97STreehugger Robot void (*free) (struct ANTLR3_LIST_struct * list); 187*16467b97STreehugger Robot void (*del) (struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key); 188*16467b97STreehugger Robot void * (*get) (struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key); 189*16467b97STreehugger Robot void * (*remove) (struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key); 190*16467b97STreehugger Robot ANTLR3_INT32 (*add) (struct ANTLR3_LIST_struct * list, void * element, void (ANTLR3_CDECL *freeptr)(void *)); 191*16467b97STreehugger Robot ANTLR3_INT32 (*put) (struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key, void * element, void (ANTLR3_CDECL *freeptr)(void *)); 192*16467b97STreehugger Robot ANTLR3_UINT32 (*size) (struct ANTLR3_LIST_struct * list); 193*16467b97STreehugger Robot 194*16467b97STreehugger Robot } 195*16467b97STreehugger Robot ANTLR3_LIST; 196*16467b97STreehugger Robot 197*16467b97STreehugger Robot /** Structure that represents a Stack collection 198*16467b97STreehugger Robot */ 199*16467b97STreehugger Robot typedef struct ANTLR3_STACK_struct 200*16467b97STreehugger Robot { 201*16467b97STreehugger Robot /** List that supports the stack structure 202*16467b97STreehugger Robot */ 203*16467b97STreehugger Robot pANTLR3_VECTOR vector; 204*16467b97STreehugger Robot 205*16467b97STreehugger Robot /** Used for quick access to the top of the stack 206*16467b97STreehugger Robot */ 207*16467b97STreehugger Robot void * top; 208*16467b97STreehugger Robot void (*free) (struct ANTLR3_STACK_struct * stack); 209*16467b97STreehugger Robot void * (*pop) (struct ANTLR3_STACK_struct * stack); 210*16467b97STreehugger Robot void * (*get) (struct ANTLR3_STACK_struct * stack, ANTLR3_INTKEY key); 211*16467b97STreehugger Robot ANTLR3_BOOLEAN (*push) (struct ANTLR3_STACK_struct * stack, void * element, void (ANTLR3_CDECL *freeptr)(void *)); 212*16467b97STreehugger Robot ANTLR3_UINT32 (*size) (struct ANTLR3_STACK_struct * stack); 213*16467b97STreehugger Robot void * (*peek) (struct ANTLR3_STACK_struct * stack); 214*16467b97STreehugger Robot 215*16467b97STreehugger Robot } 216*16467b97STreehugger Robot ANTLR3_STACK; 217*16467b97STreehugger Robot 218*16467b97STreehugger Robot /* Structure that represents a vector element 219*16467b97STreehugger Robot */ 220*16467b97STreehugger Robot typedef struct ANTLR3_VECTOR_ELEMENT_struct 221*16467b97STreehugger Robot { 222*16467b97STreehugger Robot void * element; 223*16467b97STreehugger Robot void (ANTLR3_CDECL *freeptr)(void *); 224*16467b97STreehugger Robot } 225*16467b97STreehugger Robot ANTLR3_VECTOR_ELEMENT, *pANTLR3_VECTOR_ELEMENT; 226*16467b97STreehugger Robot 227*16467b97STreehugger Robot #define ANTLR3_VECTOR_INTERNAL_SIZE 16 228*16467b97STreehugger Robot /* Structure that represents a vector collection. A vector is a simple list 229*16467b97STreehugger Robot * that contains a pointer to the element and a pointer to a function that 230*16467b97STreehugger Robot * that can free the element if it is removed. It auto resizes but does not 231*16467b97STreehugger Robot * use hash techniques as it is referenced by a simple numeric index. It is not a 232*16467b97STreehugger Robot * sparse list, so if any element is deleted, then the ones following are moved 233*16467b97STreehugger Robot * down in memory and the count is adjusted. 234*16467b97STreehugger Robot */ 235*16467b97STreehugger Robot typedef struct ANTLR3_VECTOR_struct 236*16467b97STreehugger Robot { 237*16467b97STreehugger Robot /** Array of pointers to vector elements 238*16467b97STreehugger Robot */ 239*16467b97STreehugger Robot pANTLR3_VECTOR_ELEMENT elements; 240*16467b97STreehugger Robot 241*16467b97STreehugger Robot /** Number of entries currently in the list; 242*16467b97STreehugger Robot */ 243*16467b97STreehugger Robot ANTLR3_UINT32 count; 244*16467b97STreehugger Robot 245*16467b97STreehugger Robot /** Many times, a vector holds just a few nodes in an AST and it 246*16467b97STreehugger Robot * is too much overhead to malloc the space for elements so 247*16467b97STreehugger Robot * at the expense of a few bytes of memory, we hold the first 248*16467b97STreehugger Robot * few elements internally. It means we must copy them when 249*16467b97STreehugger Robot * we grow beyond this initial size, but that is less overhead than 250*16467b97STreehugger Robot * the malloc/free callas we would otherwise require. 251*16467b97STreehugger Robot */ 252*16467b97STreehugger Robot ANTLR3_VECTOR_ELEMENT internal[ANTLR3_VECTOR_INTERNAL_SIZE]; 253*16467b97STreehugger Robot 254*16467b97STreehugger Robot /** Indicates if the structure was made by a factory, in which 255*16467b97STreehugger Robot * case only the factory can free the memory for the actual vector, 256*16467b97STreehugger Robot * though the vector free function is called and will recurse through its 257*16467b97STreehugger Robot * entries calling any free pointers for each entry. 258*16467b97STreehugger Robot */ 259*16467b97STreehugger Robot ANTLR3_BOOLEAN factoryMade; 260*16467b97STreehugger Robot 261*16467b97STreehugger Robot /** Total number of entries in elements at any point in time 262*16467b97STreehugger Robot */ 263*16467b97STreehugger Robot ANTLR3_UINT32 elementsSize; 264*16467b97STreehugger Robot 265*16467b97STreehugger Robot void (ANTLR3_CDECL *free) (struct ANTLR3_VECTOR_struct * vector); 266*16467b97STreehugger Robot void (*del) (struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry); 267*16467b97STreehugger Robot void * (*get) (struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry); 268*16467b97STreehugger Robot void * (*remove) (struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry); 269*16467b97STreehugger Robot void (*clear) (struct ANTLR3_VECTOR_struct * vector); 270*16467b97STreehugger Robot ANTLR3_BOOLEAN (*swap) (struct ANTLR3_VECTOR_struct *, ANTLR3_UINT32 entry1, ANTLR3_UINT32 entry2); 271*16467b97STreehugger Robot ANTLR3_UINT32 (*add) (struct ANTLR3_VECTOR_struct * vector, void * element, void (ANTLR3_CDECL *freeptr)(void *)); 272*16467b97STreehugger Robot ANTLR3_UINT32 (*set) (struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry, void * element, void (ANTLR3_CDECL *freeptr)(void *), ANTLR3_BOOLEAN freeExisting); 273*16467b97STreehugger Robot ANTLR3_UINT32 (*size) (struct ANTLR3_VECTOR_struct * vector); 274*16467b97STreehugger Robot } 275*16467b97STreehugger Robot ANTLR3_VECTOR; 276*16467b97STreehugger Robot 277*16467b97STreehugger Robot /** Default vector pool size if otherwise unspecified 278*16467b97STreehugger Robot */ 279*16467b97STreehugger Robot #define ANTLR3_FACTORY_VPOOL_SIZE 256 280*16467b97STreehugger Robot 281*16467b97STreehugger Robot /** Structure that tracks vectors in a vector and auto deletes the vectors 282*16467b97STreehugger Robot * in the vector factory when closed. 283*16467b97STreehugger Robot */ 284*16467b97STreehugger Robot typedef struct ANTLR3_VECTOR_FACTORY_struct 285*16467b97STreehugger Robot { 286*16467b97STreehugger Robot 287*16467b97STreehugger Robot /** List of all vector pools allocated so far 288*16467b97STreehugger Robot */ 289*16467b97STreehugger Robot pANTLR3_VECTOR *pools; 290*16467b97STreehugger Robot 291*16467b97STreehugger Robot /** Count of the vector pools allocated so far (current active pool) 292*16467b97STreehugger Robot */ 293*16467b97STreehugger Robot ANTLR3_INT32 thisPool; 294*16467b97STreehugger Robot 295*16467b97STreehugger Robot /** The next vector available in the pool 296*16467b97STreehugger Robot */ 297*16467b97STreehugger Robot ANTLR3_UINT32 nextVector; 298*16467b97STreehugger Robot 299*16467b97STreehugger Robot /** Trick to quickly initialize a new vector via memcpy and not a function call 300*16467b97STreehugger Robot */ 301*16467b97STreehugger Robot ANTLR3_VECTOR unTruc; 302*16467b97STreehugger Robot 303*16467b97STreehugger Robot /** Consumers from the factory can release a factory produced vector 304*16467b97STreehugger Robot * back to the factory so that it may be reused (and thus conserve memory) 305*16467b97STreehugger Robot * by another caller. The available vectors are stored here. Note that 306*16467b97STreehugger Robot * the only vectors avaible in the free chain are produced by this factory, so they 307*16467b97STreehugger Robot * need not be explicitly freed when the factory is closed. 308*16467b97STreehugger Robot */ 309*16467b97STreehugger Robot pANTLR3_STACK freeStack; 310*16467b97STreehugger Robot 311*16467b97STreehugger Robot /** Function to close the vector factory 312*16467b97STreehugger Robot */ 313*16467b97STreehugger Robot void (*close) (struct ANTLR3_VECTOR_FACTORY_struct * factory); 314*16467b97STreehugger Robot 315*16467b97STreehugger Robot /** Function to supply a new vector 316*16467b97STreehugger Robot */ 317*16467b97STreehugger Robot pANTLR3_VECTOR (*newVector) (struct ANTLR3_VECTOR_FACTORY_struct * factory); 318*16467b97STreehugger Robot 319*16467b97STreehugger Robot /// Function to return a vector to the factory for reuse 320*16467b97STreehugger Robot /// 321*16467b97STreehugger Robot void (*returnVector) (struct ANTLR3_VECTOR_FACTORY_struct * factory, pANTLR3_VECTOR vector); 322*16467b97STreehugger Robot 323*16467b97STreehugger Robot } 324*16467b97STreehugger Robot ANTLR3_VECTOR_FACTORY; 325*16467b97STreehugger Robot 326*16467b97STreehugger Robot 327*16467b97STreehugger Robot /* -------------- TRIE Interfaces ---------------- */ 328*16467b97STreehugger Robot 329*16467b97STreehugger Robot 330*16467b97STreehugger Robot /** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE 331*16467b97STreehugger Robot */ 332*16467b97STreehugger Robot typedef struct ANTLR3_TRIE_ENTRY_struct 333*16467b97STreehugger Robot { 334*16467b97STreehugger Robot ANTLR3_UINT32 type; 335*16467b97STreehugger Robot void (ANTLR3_CDECL *freeptr)(void *); 336*16467b97STreehugger Robot union 337*16467b97STreehugger Robot { 338*16467b97STreehugger Robot ANTLR3_INTKEY intVal; 339*16467b97STreehugger Robot void * ptr; 340*16467b97STreehugger Robot } data; 341*16467b97STreehugger Robot 342*16467b97STreehugger Robot struct ANTLR3_TRIE_ENTRY_struct * next; /* Allows duplicate entries for same key in insertion order */ 343*16467b97STreehugger Robot } 344*16467b97STreehugger Robot ANTLR3_TRIE_ENTRY, * pANTLR3_TRIE_ENTRY; 345*16467b97STreehugger Robot 346*16467b97STreehugger Robot 347*16467b97STreehugger Robot /** Structure that defines an element/node in an ANTLR3_INT_TRIE 348*16467b97STreehugger Robot */ 349*16467b97STreehugger Robot typedef struct ANTLR3_INT_TRIE_NODE_struct 350*16467b97STreehugger Robot { 351*16467b97STreehugger Robot ANTLR3_UINT32 bitNum; /**< This is the left/right bit index for traversal along the nodes */ 352*16467b97STreehugger Robot ANTLR3_INTKEY key; /**< This is the actual key that the entry represents if it is a terminal node */ 353*16467b97STreehugger Robot pANTLR3_TRIE_ENTRY buckets; /**< This is the data bucket(s) that the key indexes, which may be NULL */ 354*16467b97STreehugger Robot struct ANTLR3_INT_TRIE_NODE_struct * leftN; /**< Pointer to the left node from here when sKey & bitNum = 0 */ 355*16467b97STreehugger Robot struct ANTLR3_INT_TRIE_NODE_struct * rightN; /**< Pointer to the right node from here when sKey & bitNum, = 1 */ 356*16467b97STreehugger Robot } 357*16467b97STreehugger Robot ANTLR3_INT_TRIE_NODE, * pANTLR3_INT_TRIE_NODE; 358*16467b97STreehugger Robot 359*16467b97STreehugger Robot /** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation, 360*16467b97STreehugger Robot * as you might expect, the key is turned into a "string" by looking at bit(key, depth) 361*16467b97STreehugger Robot * of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63) 362*16467b97STreehugger Robot * and potentially a huge trie. This is the algorithm for a Patricia Trie. 363*16467b97STreehugger Robot * Note also that this trie [can] accept multiple entries for the same key and is 364*16467b97STreehugger Robot * therefore a kind of elastic bucket patricia trie. 365*16467b97STreehugger Robot * 366*16467b97STreehugger Robot * If you find this code useful, please feel free to 'steal' it for any purpose 367*16467b97STreehugger Robot * as covered by the BSD license under which ANTLR is issued. You can cut the code 368*16467b97STreehugger Robot * but as the ANTLR library is only about 50K (Windows Vista), you might find it 369*16467b97STreehugger Robot * easier to just link the library. Please keep all comments and licenses and so on 370*16467b97STreehugger Robot * in any version of this you create of course. 371*16467b97STreehugger Robot * 372*16467b97STreehugger Robot * Jim Idle. 373*16467b97STreehugger Robot * 374*16467b97STreehugger Robot */ 375*16467b97STreehugger Robot typedef struct ANTLR3_INT_TRIE_struct 376*16467b97STreehugger Robot { 377*16467b97STreehugger Robot pANTLR3_INT_TRIE_NODE root; /* Root node of this integer trie */ 378*16467b97STreehugger Robot pANTLR3_INT_TRIE_NODE current; /* Used to traverse the TRIE with the next() method */ 379*16467b97STreehugger Robot ANTLR3_UINT32 count; /* Current entry count */ 380*16467b97STreehugger Robot ANTLR3_BOOLEAN allowDups; /* Whether this trie accepts duplicate keys */ 381*16467b97STreehugger Robot 382*16467b97STreehugger Robot 383*16467b97STreehugger Robot pANTLR3_TRIE_ENTRY (*get) (struct ANTLR3_INT_TRIE_struct * trie, ANTLR3_INTKEY key); 384*16467b97STreehugger Robot ANTLR3_BOOLEAN (*del) (struct ANTLR3_INT_TRIE_struct * trie, ANTLR3_INTKEY key); 385*16467b97STreehugger Robot ANTLR3_BOOLEAN (*add) (struct ANTLR3_INT_TRIE_struct * trie, ANTLR3_INTKEY key, ANTLR3_UINT32 type, ANTLR3_INTKEY intVal, void * data, void (ANTLR3_CDECL *freeptr)(void *)); 386*16467b97STreehugger Robot void (*free) (struct ANTLR3_INT_TRIE_struct * trie); 387*16467b97STreehugger Robot 388*16467b97STreehugger Robot } 389*16467b97STreehugger Robot ANTLR3_INT_TRIE; 390*16467b97STreehugger Robot 391*16467b97STreehugger Robot /** 392*16467b97STreehugger Robot * A topological sort system that given a set of dependencies of a node m on node n, 393*16467b97STreehugger Robot * can sort them in dependency order. This is a generally useful utility object 394*16467b97STreehugger Robot * that does not care what the things are it is sorting. Generally the set 395*16467b97STreehugger Robot * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR. 396*16467b97STreehugger Robot * I have provided a sort method that given ANTLR3_VECTOR as an input will sort 397*16467b97STreehugger Robot * the vector entries in place, as well as a sort method that just returns an 398*16467b97STreehugger Robot * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but 399*16467b97STreehugger Robot * some set of your own device. 400*16467b97STreehugger Robot * 401*16467b97STreehugger Robot * Of the two main algorithms that could be used, I chose to use the depth first 402*16467b97STreehugger Robot * search for unvisited nodes as a) This runs in linear time, and b) it is what 403*16467b97STreehugger Robot * we used in the ANTLR Tool to perform a topological sort of the input grammar files 404*16467b97STreehugger Robot * based on their dependencies. 405*16467b97STreehugger Robot */ 406*16467b97STreehugger Robot typedef struct ANTLR3_TOPO_struct 407*16467b97STreehugger Robot { 408*16467b97STreehugger Robot /** 409*16467b97STreehugger Robot * A vector of vectors of edges, built by calling the addEdge method() 410*16467b97STreehugger Robot * to indicate that node number n depends on node number m. Each entry in the vector 411*16467b97STreehugger Robot * contains a bitset, which has a bit index set for each node upon which the 412*16467b97STreehugger Robot * entry node depends. 413*16467b97STreehugger Robot */ 414*16467b97STreehugger Robot pANTLR3_BITSET * edges; 415*16467b97STreehugger Robot 416*16467b97STreehugger Robot /** 417*16467b97STreehugger Robot * A vector used to build up the sorted output order. Note that 418*16467b97STreehugger Robot * as the vector contains UINT32 then the maximum node index is 419*16467b97STreehugger Robot * 'limited' to 2^32, as nodes should be zero based. 420*16467b97STreehugger Robot */ 421*16467b97STreehugger Robot pANTLR3_UINT32 sorted; 422*16467b97STreehugger Robot 423*16467b97STreehugger Robot /** 424*16467b97STreehugger Robot * A vector used to detect cycles in the edge dependecies. It is used 425*16467b97STreehugger Robot * as a stack and each time we descend a node to one of its edges we 426*16467b97STreehugger Robot * add the node into this stack. If we find a node that we have already 427*16467b97STreehugger Robot * visited in the stack, then it means there wasa cycle such as 9->8->1->9 428*16467b97STreehugger Robot * as the only way a node can be on the stack is if we are currently 429*16467b97STreehugger Robot * descnding from it as we remove it from the stack as we exit from 430*16467b97STreehugger Robot * descending its dependencies 431*16467b97STreehugger Robot */ 432*16467b97STreehugger Robot pANTLR3_UINT32 cycle; 433*16467b97STreehugger Robot 434*16467b97STreehugger Robot /** 435*16467b97STreehugger Robot * A flag that indicates the algorithm found a cycle in the edges 436*16467b97STreehugger Robot * such as 9->8->1->9 437*16467b97STreehugger Robot * If this flag is set after you have called one of the sort routines 438*16467b97STreehugger Robot * then the detected cycle will be contained in the cycle array and 439*16467b97STreehugger Robot * cycleLimit will point to the one after the last entry in the cycle. 440*16467b97STreehugger Robot */ 441*16467b97STreehugger Robot ANTLR3_BOOLEAN hasCycle; 442*16467b97STreehugger Robot 443*16467b97STreehugger Robot /** 444*16467b97STreehugger Robot * A watermark used to accumulate potential cycles in the cycle array. 445*16467b97STreehugger Robot * This should be zero when we are done. Check hasCycle after calling one 446*16467b97STreehugger Robot * of the sort methods and if it is ANTLR3_TRUE then you can find the cycle 447*16467b97STreehugger Robot * in cycle[0]...cycle[cycleMark-1] 448*16467b97STreehugger Robot */ 449*16467b97STreehugger Robot ANTLR3_UINT32 cycleMark; 450*16467b97STreehugger Robot 451*16467b97STreehugger Robot /** 452*16467b97STreehugger Robot * One more than the largest node index that is contained in edges/sorted. 453*16467b97STreehugger Robot */ 454*16467b97STreehugger Robot ANTLR3_UINT32 limit; 455*16467b97STreehugger Robot 456*16467b97STreehugger Robot /** 457*16467b97STreehugger Robot * The set of visited nodes as determined by a set entry in 458*16467b97STreehugger Robot * the bitmap. 459*16467b97STreehugger Robot */ 460*16467b97STreehugger Robot pANTLR3_BITSET visited; 461*16467b97STreehugger Robot 462*16467b97STreehugger Robot /** 463*16467b97STreehugger Robot * A method that adds an edge from one node to another. An edge 464*16467b97STreehugger Robot * of n -> m indicates that node n is dependent on node m. Note that 465*16467b97STreehugger Robot * while building these edges, it is perfectly OK to add nodes out of 466*16467b97STreehugger Robot * sequence. So, if you have edges: 467*16467b97STreehugger Robot * 468*16467b97STreehugger Robot * 3 -> 0 469*16467b97STreehugger Robot * 2 -> 1 470*16467b97STreehugger Robot * 1 -> 3 471*16467b97STreehugger Robot * 472*16467b97STreehugger Robot * The you can add them in that order and so add node 3 before nodes 2 and 1 473*16467b97STreehugger Robot * 474*16467b97STreehugger Robot */ 475*16467b97STreehugger Robot void (*addEdge) (struct ANTLR3_TOPO_struct * topo, ANTLR3_UINT32 edge, ANTLR3_UINT32 dependency); 476*16467b97STreehugger Robot 477*16467b97STreehugger Robot 478*16467b97STreehugger Robot /** 479*16467b97STreehugger Robot * A method that returns a pointer to an array of sorted node indexes. 480*16467b97STreehugger Robot * The array is sorted in topological sorted order. Note that the array 481*16467b97STreehugger Robot * is only as large as the largest node index you created an edge for. This means 482*16467b97STreehugger Robot * that if you had an input of 32 nodes, but that largest node with an edge 483*16467b97STreehugger Robot * was 16, then the returned array will be the sorted order of the first 16 484*16467b97STreehugger Robot * nodes and the last 16 nodes of your array are basically fine as they are 485*16467b97STreehugger Robot * as they had no dependencies and do not need any particular sort order. 486*16467b97STreehugger Robot * 487*16467b97STreehugger Robot * NB: If the structure that contains the array is freed, then the sorted 488*16467b97STreehugger Robot * array will be freed too so you should use the value of limit to 489*16467b97STreehugger Robot * make a long term copy of this array if you do not want to keep the topo 490*16467b97STreehugger Robot * structure around as well. 491*16467b97STreehugger Robot */ 492*16467b97STreehugger Robot pANTLR3_UINT32 (*sortToArray) (struct ANTLR3_TOPO_struct * topo); 493*16467b97STreehugger Robot 494*16467b97STreehugger Robot /** 495*16467b97STreehugger Robot * A method that sorts the supplied ANTLR3_VECTOR in place based 496*16467b97STreehugger Robot * on the previously supplied edge data. 497*16467b97STreehugger Robot */ 498*16467b97STreehugger Robot void (*sortVector) (struct ANTLR3_TOPO_struct * topo, pANTLR3_VECTOR v); 499*16467b97STreehugger Robot 500*16467b97STreehugger Robot /** 501*16467b97STreehugger Robot * A method to free this structure and any associated memory. 502*16467b97STreehugger Robot */ 503*16467b97STreehugger Robot void (*free) (struct ANTLR3_TOPO_struct * topo); 504*16467b97STreehugger Robot } 505*16467b97STreehugger Robot ANTLR3_TOPO; 506*16467b97STreehugger Robot 507*16467b97STreehugger Robot #ifdef __cplusplus 508*16467b97STreehugger Robot } 509*16467b97STreehugger Robot #endif 510*16467b97STreehugger Robot 511*16467b97STreehugger Robot #endif 512*16467b97STreehugger Robot 513*16467b97STreehugger Robot 514