1*16467b97STreehugger Robot /** \file 2*16467b97STreehugger Robot * Defines the basic structure to support recognizing by either a lexer, 3*16467b97STreehugger Robot * parser, or tree parser. 4*16467b97STreehugger Robot * \addtogroup ANTLR3_BASE_RECOGNIZER 5*16467b97STreehugger Robot * @{ 6*16467b97STreehugger Robot */ 7*16467b97STreehugger Robot #ifndef _ANTLR3_BASERECOGNIZER_H 8*16467b97STreehugger Robot #define _ANTLR3_BASERECOGNIZER_H 9*16467b97STreehugger Robot 10*16467b97STreehugger Robot // [The "BSD licence"] 11*16467b97STreehugger Robot // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 12*16467b97STreehugger Robot // http://www.temporal-wave.com 13*16467b97STreehugger Robot // http://www.linkedin.com/in/jimidle 14*16467b97STreehugger Robot // 15*16467b97STreehugger Robot // All rights reserved. 16*16467b97STreehugger Robot // 17*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without 18*16467b97STreehugger Robot // modification, are permitted provided that the following conditions 19*16467b97STreehugger Robot // are met: 20*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright 21*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer. 22*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright 23*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer in the 24*16467b97STreehugger Robot // documentation and/or other materials provided with the distribution. 25*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products 26*16467b97STreehugger Robot // derived from this software without specific prior written permission. 27*16467b97STreehugger Robot // 28*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 29*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 30*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 31*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 32*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 33*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 37*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38*16467b97STreehugger Robot 39*16467b97STreehugger Robot #include <antlr3defs.h> 40*16467b97STreehugger Robot #include <antlr3exception.h> 41*16467b97STreehugger Robot #include <antlr3input.h> 42*16467b97STreehugger Robot #include <antlr3tokenstream.h> 43*16467b97STreehugger Robot #include <antlr3commontoken.h> 44*16467b97STreehugger Robot #include <antlr3commontreenodestream.h> 45*16467b97STreehugger Robot #include <antlr3debugeventlistener.h> 46*16467b97STreehugger Robot #include <antlr3recognizersharedstate.h> 47*16467b97STreehugger Robot 48*16467b97STreehugger Robot /** Type indicator for a lexer recognizer 49*16467b97STreehugger Robot */ 50*16467b97STreehugger Robot #define ANTLR3_TYPE_LEXER 0x0001 51*16467b97STreehugger Robot 52*16467b97STreehugger Robot /** Type indicator for a parser recognizer 53*16467b97STreehugger Robot */ 54*16467b97STreehugger Robot #define ANTLR3_TYPE_PARSER 0x0002 55*16467b97STreehugger Robot 56*16467b97STreehugger Robot /** Type indicator for a tree parser recognizer 57*16467b97STreehugger Robot */ 58*16467b97STreehugger Robot #define ANTLR3_TYPE_TREE_PARSER 0x0004 59*16467b97STreehugger Robot 60*16467b97STreehugger Robot #ifdef __cplusplus 61*16467b97STreehugger Robot extern "C" { 62*16467b97STreehugger Robot #endif 63*16467b97STreehugger Robot 64*16467b97STreehugger Robot /** \brief Base tracking context structure for all types of 65*16467b97STreehugger Robot * recognizers. 66*16467b97STreehugger Robot */ 67*16467b97STreehugger Robot typedef struct ANTLR3_BASE_RECOGNIZER_struct 68*16467b97STreehugger Robot { 69*16467b97STreehugger Robot /// Whatever super structure is providing this interface needs a pointer to itself 70*16467b97STreehugger Robot /// so that this can be passed back to it whenever the api functions 71*16467b97STreehugger Robot /// are called back from here. 72*16467b97STreehugger Robot /// 73*16467b97STreehugger Robot void * super; 74*16467b97STreehugger Robot 75*16467b97STreehugger Robot /// Indicates the type of recognizer that we are an instance of. 76*16467b97STreehugger Robot /// The programmer may set this to anything of course, but the default 77*16467b97STreehugger Robot /// implementations of the interface only really understand the built in 78*16467b97STreehugger Robot /// types, so new error handlers etc would probably be required to as well. 79*16467b97STreehugger Robot /// 80*16467b97STreehugger Robot /// Valid types are: 81*16467b97STreehugger Robot /// 82*16467b97STreehugger Robot /// - #ANTLR3_TYPE_LEXER 83*16467b97STreehugger Robot /// - #ANTLR3_TYPE_PARSER 84*16467b97STreehugger Robot /// - #ANTLR3_TYPE_TREE_PARSER 85*16467b97STreehugger Robot /// 86*16467b97STreehugger Robot ANTLR3_UINT32 type; 87*16467b97STreehugger Robot 88*16467b97STreehugger Robot /// A pointer to the shared recognizer state, such that multiple 89*16467b97STreehugger Robot /// recognizers can use the same inputs streams and so on (in 90*16467b97STreehugger Robot /// the case of grammar inheritance for instance. 91*16467b97STreehugger Robot /// 92*16467b97STreehugger Robot pANTLR3_RECOGNIZER_SHARED_STATE state; 93*16467b97STreehugger Robot 94*16467b97STreehugger Robot /// If set to something other than NULL, then this structure is 95*16467b97STreehugger Robot /// points to an instance of the debugger interface. In general, the 96*16467b97STreehugger Robot /// debugger is only referenced internally in recovery/error operations 97*16467b97STreehugger Robot /// so that it does not cause overhead by having to check this pointer 98*16467b97STreehugger Robot /// in every function/method 99*16467b97STreehugger Robot /// 100*16467b97STreehugger Robot pANTLR3_DEBUG_EVENT_LISTENER debugger; 101*16467b97STreehugger Robot 102*16467b97STreehugger Robot 103*16467b97STreehugger Robot /// Pointer to a function that matches the current input symbol 104*16467b97STreehugger Robot /// against the supplied type. the function causes an error if a 105*16467b97STreehugger Robot /// match is not found and the default implementation will also 106*16467b97STreehugger Robot /// attempt to perform one token insertion or deletion if that is 107*16467b97STreehugger Robot /// possible with the input stream. You can override the default 108*16467b97STreehugger Robot /// implementation by installing a pointer to your own function 109*16467b97STreehugger Robot /// in this interface after the recognizer has initialized. This can 110*16467b97STreehugger Robot /// perform different recovery options or not recover at all and so on. 111*16467b97STreehugger Robot /// To ignore recovery altogether, see the comments in the default 112*16467b97STreehugger Robot /// implementation of this function in antlr3baserecognizer.c 113*16467b97STreehugger Robot /// 114*16467b97STreehugger Robot /// Note that errors are signalled by setting the error flag below 115*16467b97STreehugger Robot /// and creating a new exception structure and installing it in the 116*16467b97STreehugger Robot /// exception pointer below (you can chain these if you like and handle them 117*16467b97STreehugger Robot /// in some customized way). 118*16467b97STreehugger Robot /// 119*16467b97STreehugger Robot void * (*match) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 120*16467b97STreehugger Robot ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); 121*16467b97STreehugger Robot 122*16467b97STreehugger Robot /// Pointer to a function that matches the next token/char in the input stream 123*16467b97STreehugger Robot /// regardless of what it actually is. 124*16467b97STreehugger Robot /// 125*16467b97STreehugger Robot void (*matchAny) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 126*16467b97STreehugger Robot 127*16467b97STreehugger Robot /// Pointer to a function that decides if the token ahead of the current one is the 128*16467b97STreehugger Robot /// one we were loking for, in which case the curernt one is very likely extraneous 129*16467b97STreehugger Robot /// and can be reported that way. 130*16467b97STreehugger Robot /// 131*16467b97STreehugger Robot ANTLR3_BOOLEAN 132*16467b97STreehugger Robot (*mismatchIsUnwantedToken) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_INT_STREAM input, ANTLR3_UINT32 ttype); 133*16467b97STreehugger Robot 134*16467b97STreehugger Robot /// Pointer to a function that decides if the current token is one that can logically 135*16467b97STreehugger Robot /// follow the one we were looking for, in which case the one we were looking for is 136*16467b97STreehugger Robot /// probably missing from the input. 137*16467b97STreehugger Robot /// 138*16467b97STreehugger Robot ANTLR3_BOOLEAN 139*16467b97STreehugger Robot (*mismatchIsMissingToken) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_INT_STREAM input, pANTLR3_BITSET_LIST follow); 140*16467b97STreehugger Robot 141*16467b97STreehugger Robot /** Pointer to a function that works out what to do when a token mismatch 142*16467b97STreehugger Robot * occurs, so that Tree parsers can behave differently to other recognizers. 143*16467b97STreehugger Robot */ 144*16467b97STreehugger Robot void (*mismatch) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 145*16467b97STreehugger Robot ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); 146*16467b97STreehugger Robot 147*16467b97STreehugger Robot /** Pointer to a function to call to report a recognition problem. You may override 148*16467b97STreehugger Robot * this function with your own function, but refer to the standard implementation 149*16467b97STreehugger Robot * in antlr3baserecognizer.c for guidance. The function should recognize whether 150*16467b97STreehugger Robot * error recovery is in force, so that it does not print out more than one error messages 151*16467b97STreehugger Robot * for the same error. From the java comments in BaseRecognizer.java: 152*16467b97STreehugger Robot * 153*16467b97STreehugger Robot * This method sets errorRecovery to indicate the parser is recovering 154*16467b97STreehugger Robot * not parsing. Once in recovery mode, no errors are generated. 155*16467b97STreehugger Robot * To get out of recovery mode, the parser must successfully match 156*16467b97STreehugger Robot * a token (after a resync). So it will go: 157*16467b97STreehugger Robot * 158*16467b97STreehugger Robot * 1. error occurs 159*16467b97STreehugger Robot * 2. enter recovery mode, report error 160*16467b97STreehugger Robot * 3. consume until token found in resynch set 161*16467b97STreehugger Robot * 4. try to resume parsing 162*16467b97STreehugger Robot * 5. next match() will reset errorRecovery mode 163*16467b97STreehugger Robot */ 164*16467b97STreehugger Robot void (*reportError) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 165*16467b97STreehugger Robot 166*16467b97STreehugger Robot /** Pointer to a function that is called to display a recognition error message. You may 167*16467b97STreehugger Robot * override this function independently of (*reportError)() above as that function calls 168*16467b97STreehugger Robot * this one to do the actual exception printing. 169*16467b97STreehugger Robot */ 170*16467b97STreehugger Robot void (*displayRecognitionError) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_UINT8 * tokenNames); 171*16467b97STreehugger Robot 172*16467b97STreehugger Robot /// Get number of recognition errors (lexer, parser, tree parser). Each 173*16467b97STreehugger Robot /// recognizer tracks its own number. So parser and lexer each have 174*16467b97STreehugger Robot /// separate count. Does not count the spurious errors found between 175*16467b97STreehugger Robot /// an error and next valid token match 176*16467b97STreehugger Robot /// 177*16467b97STreehugger Robot /// \see reportError() 178*16467b97STreehugger Robot /// 179*16467b97STreehugger Robot ANTLR3_UINT32 180*16467b97STreehugger Robot (*getNumberOfSyntaxErrors) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 181*16467b97STreehugger Robot 182*16467b97STreehugger Robot /** Pointer to a function that recovers from an error found in the input stream. 183*16467b97STreehugger Robot * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also 184*16467b97STreehugger Robot * be from a mismatched token that the (*match)() could not recover from. 185*16467b97STreehugger Robot */ 186*16467b97STreehugger Robot void (*recover) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 187*16467b97STreehugger Robot 188*16467b97STreehugger Robot /** Pointer to a function that is a hook to listen to token consumption during error recovery. 189*16467b97STreehugger Robot * This is mainly used by the debug parser to send events to the listener. 190*16467b97STreehugger Robot */ 191*16467b97STreehugger Robot void (*beginResync) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 192*16467b97STreehugger Robot 193*16467b97STreehugger Robot /** Pointer to a function that is a hook to listen to token consumption during error recovery. 194*16467b97STreehugger Robot * This is mainly used by the debug parser to send events to the listener. 195*16467b97STreehugger Robot */ 196*16467b97STreehugger Robot void (*endResync) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 197*16467b97STreehugger Robot 198*16467b97STreehugger Robot /** Pointer to a function that is a hook to listen to token consumption during error recovery. 199*16467b97STreehugger Robot * This is mainly used by the debug parser to send events to the listener. 200*16467b97STreehugger Robot */ 201*16467b97STreehugger Robot void (*beginBacktrack) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, ANTLR3_UINT32 level); 202*16467b97STreehugger Robot 203*16467b97STreehugger Robot /** Pointer to a function that is a hook to listen to token consumption during error recovery. 204*16467b97STreehugger Robot * This is mainly used by the debug parser to send events to the listener. 205*16467b97STreehugger Robot */ 206*16467b97STreehugger Robot void (*endBacktrack) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful); 207*16467b97STreehugger Robot 208*16467b97STreehugger Robot /** Pointer to a function to computer the error recovery set for the current rule. 209*16467b97STreehugger Robot * \see antlr3ComputeErrorRecoverySet() for details. 210*16467b97STreehugger Robot */ 211*16467b97STreehugger Robot pANTLR3_BITSET (*computeErrorRecoverySet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 212*16467b97STreehugger Robot 213*16467b97STreehugger Robot /** Pointer to a function that computes the context-sensitive FOLLOW set for the 214*16467b97STreehugger Robot * current rule. 215*16467b97STreehugger Robot * \see antlr3ComputeCSRuleFollow() for details. 216*16467b97STreehugger Robot */ 217*16467b97STreehugger Robot pANTLR3_BITSET (*computeCSRuleFollow) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 218*16467b97STreehugger Robot 219*16467b97STreehugger Robot /** Pointer to a function to combine follow bitsets. 220*16467b97STreehugger Robot * \see antlr3CombineFollows() for details. 221*16467b97STreehugger Robot */ 222*16467b97STreehugger Robot pANTLR3_BITSET (*combineFollows) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 223*16467b97STreehugger Robot ANTLR3_BOOLEAN exact); 224*16467b97STreehugger Robot 225*16467b97STreehugger Robot /** Pointer to a function that recovers from a mismatched token in the input stream. 226*16467b97STreehugger Robot * \see antlr3RecoverMismatch() for details. 227*16467b97STreehugger Robot */ 228*16467b97STreehugger Robot void * (*recoverFromMismatchedToken) 229*16467b97STreehugger Robot (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 230*16467b97STreehugger Robot ANTLR3_UINT32 ttype, 231*16467b97STreehugger Robot pANTLR3_BITSET_LIST follow); 232*16467b97STreehugger Robot 233*16467b97STreehugger Robot /** Pointer to a function that recovers from a mismatched set in the token stream, in a similar manner 234*16467b97STreehugger Robot * to (*recoverFromMismatchedToken) 235*16467b97STreehugger Robot */ 236*16467b97STreehugger Robot void * (*recoverFromMismatchedSet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 237*16467b97STreehugger Robot pANTLR3_BITSET_LIST follow); 238*16467b97STreehugger Robot 239*16467b97STreehugger Robot /** Pointer to common routine to handle single token insertion for recovery functions. 240*16467b97STreehugger Robot */ 241*16467b97STreehugger Robot ANTLR3_BOOLEAN (*recoverFromMismatchedElement) 242*16467b97STreehugger Robot (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 243*16467b97STreehugger Robot pANTLR3_BITSET_LIST follow); 244*16467b97STreehugger Robot 245*16467b97STreehugger Robot /** Pointer to function that consumes input until the next token matches 246*16467b97STreehugger Robot * the given token. 247*16467b97STreehugger Robot */ 248*16467b97STreehugger Robot void (*consumeUntil) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 249*16467b97STreehugger Robot ANTLR3_UINT32 tokenType); 250*16467b97STreehugger Robot 251*16467b97STreehugger Robot /** Pointer to function that consumes input until the next token matches 252*16467b97STreehugger Robot * one in the given set. 253*16467b97STreehugger Robot */ 254*16467b97STreehugger Robot void (*consumeUntilSet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 255*16467b97STreehugger Robot pANTLR3_BITSET set); 256*16467b97STreehugger Robot 257*16467b97STreehugger Robot /** Pointer to function that returns an ANTLR3_LIST of the strings that identify 258*16467b97STreehugger Robot * the rules in the parser that got you to this point. Can be overridden by installing your 259*16467b97STreehugger Robot * own function set. 260*16467b97STreehugger Robot * 261*16467b97STreehugger Robot * \todo Document how to override invocation stack functions. 262*16467b97STreehugger Robot */ 263*16467b97STreehugger Robot pANTLR3_STACK (*getRuleInvocationStack) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 264*16467b97STreehugger Robot pANTLR3_STACK (*getRuleInvocationStackNamed) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 265*16467b97STreehugger Robot pANTLR3_UINT8 name); 266*16467b97STreehugger Robot 267*16467b97STreehugger Robot /** Pointer to a function that converts an ANLR3_LIST of tokens to an ANTLR3_LIST of 268*16467b97STreehugger Robot * string token names. As this is mostly used in string template processing it may not be useful 269*16467b97STreehugger Robot * in the C runtime. 270*16467b97STreehugger Robot */ 271*16467b97STreehugger Robot pANTLR3_HASH_TABLE (*toStrings) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 272*16467b97STreehugger Robot pANTLR3_HASH_TABLE); 273*16467b97STreehugger Robot 274*16467b97STreehugger Robot /** Pointer to a function to return whether the rule has parsed input starting at the supplied 275*16467b97STreehugger Robot * start index before. If the rule has not parsed input starting from the supplied start index, 276*16467b97STreehugger Robot * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point 277*16467b97STreehugger Robot * then it will return the point where it last stopped parsing after that start point. 278*16467b97STreehugger Robot */ 279*16467b97STreehugger Robot ANTLR3_MARKER (*getRuleMemoization) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 280*16467b97STreehugger Robot ANTLR3_INTKEY ruleIndex, 281*16467b97STreehugger Robot ANTLR3_MARKER ruleParseStart); 282*16467b97STreehugger Robot 283*16467b97STreehugger Robot /** Pointer to function that determines whether the rule has parsed input at the current index 284*16467b97STreehugger Robot * in the input stream 285*16467b97STreehugger Robot */ 286*16467b97STreehugger Robot ANTLR3_BOOLEAN (*alreadyParsedRule) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 287*16467b97STreehugger Robot ANTLR3_MARKER ruleIndex); 288*16467b97STreehugger Robot 289*16467b97STreehugger Robot /** Pointer to function that records whether the rule has parsed the input at a 290*16467b97STreehugger Robot * current position successfully or not. 291*16467b97STreehugger Robot */ 292*16467b97STreehugger Robot void (*memoize) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 293*16467b97STreehugger Robot ANTLR3_MARKER ruleIndex, 294*16467b97STreehugger Robot ANTLR3_MARKER ruleParseStart); 295*16467b97STreehugger Robot 296*16467b97STreehugger Robot /// Pointer to a function that returns the current input symbol. 297*16467b97STreehugger Robot /// The is placed into any label for the associated token ref; e.g., x=ID. Token 298*16467b97STreehugger Robot /// and tree parsers need to return different objects. Rather than test 299*16467b97STreehugger Robot /// for input stream type or change the IntStream interface, I use 300*16467b97STreehugger Robot /// a simple method to ask the recognizer to tell me what the current 301*16467b97STreehugger Robot /// input symbol is. 302*16467b97STreehugger Robot /// 303*16467b97STreehugger Robot /// This is ignored for lexers and the lexer implementation of this 304*16467b97STreehugger Robot /// function should return NULL. 305*16467b97STreehugger Robot /// 306*16467b97STreehugger Robot void * (*getCurrentInputSymbol) ( struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 307*16467b97STreehugger Robot pANTLR3_INT_STREAM istream); 308*16467b97STreehugger Robot 309*16467b97STreehugger Robot /// Conjure up a missing token during error recovery. 310*16467b97STreehugger Robot /// 311*16467b97STreehugger Robot /// The recognizer attempts to recover from single missing 312*16467b97STreehugger Robot /// symbols. But, actions might refer to that missing symbol. 313*16467b97STreehugger Robot /// For example, x=ID {f($x);}. The action clearly assumes 314*16467b97STreehugger Robot /// that there has been an identifier matched previously and that 315*16467b97STreehugger Robot /// $x points at that token. If that token is missing, but 316*16467b97STreehugger Robot /// the next token in the stream is what we want we assume that 317*16467b97STreehugger Robot /// this token is missing and we keep going. Because we 318*16467b97STreehugger Robot /// have to return some token to replace the missing token, 319*16467b97STreehugger Robot /// we have to conjure one up. This method gives the user control 320*16467b97STreehugger Robot /// over the tokens returned for missing tokens. Mostly, 321*16467b97STreehugger Robot /// you will want to create something special for identifier 322*16467b97STreehugger Robot /// tokens. For literals such as '{' and ',', the default 323*16467b97STreehugger Robot /// action in the parser or tree parser works. It simply creates 324*16467b97STreehugger Robot /// a CommonToken of the appropriate type. The text will be the token. 325*16467b97STreehugger Robot /// If you change what tokens must be created by the lexer, 326*16467b97STreehugger Robot /// override this method to create the appropriate tokens. 327*16467b97STreehugger Robot /// 328*16467b97STreehugger Robot void * (*getMissingSymbol) ( struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 329*16467b97STreehugger Robot pANTLR3_INT_STREAM istream, 330*16467b97STreehugger Robot pANTLR3_EXCEPTION e, 331*16467b97STreehugger Robot ANTLR3_UINT32 expectedTokenType, 332*16467b97STreehugger Robot pANTLR3_BITSET_LIST follow); 333*16467b97STreehugger Robot 334*16467b97STreehugger Robot /** Pointer to a function that returns whether the supplied grammar function 335*16467b97STreehugger Robot * will parse the current input stream or not. This is the way that syntactic 336*16467b97STreehugger Robot * predicates are evaluated. Unlike java, C is perfectly happy to invoke code 337*16467b97STreehugger Robot * via a pointer to a function (hence that's what all the ANTLR3 C interfaces 338*16467b97STreehugger Robot * do. 339*16467b97STreehugger Robot */ 340*16467b97STreehugger Robot ANTLR3_BOOLEAN (*synpred) ( struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, void * ctx, 341*16467b97STreehugger Robot void (*predicate)(void * ctx)); 342*16467b97STreehugger Robot 343*16467b97STreehugger Robot /** Pointer to a function that can construct a generic exception structure 344*16467b97STreehugger Robot * with such information as the input stream can provide. 345*16467b97STreehugger Robot */ 346*16467b97STreehugger Robot void (*exConstruct) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 347*16467b97STreehugger Robot 348*16467b97STreehugger Robot /** Reset the recognizer 349*16467b97STreehugger Robot */ 350*16467b97STreehugger Robot void (*reset) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 351*16467b97STreehugger Robot 352*16467b97STreehugger Robot /** Pointer to a function that knows how to free the resources of a base recognizer. 353*16467b97STreehugger Robot */ 354*16467b97STreehugger Robot void (*free) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer); 355*16467b97STreehugger Robot 356*16467b97STreehugger Robot } 357*16467b97STreehugger Robot ANTLR3_BASE_RECOGNIZER; 358*16467b97STreehugger Robot 359*16467b97STreehugger Robot #ifdef __cplusplus 360*16467b97STreehugger Robot } 361*16467b97STreehugger Robot #endif 362*16467b97STreehugger Robot 363*16467b97STreehugger Robot #include <antlr3lexer.h> 364*16467b97STreehugger Robot #include <antlr3parser.h> 365*16467b97STreehugger Robot #include <antlr3treeparser.h> 366*16467b97STreehugger Robot 367*16467b97STreehugger Robot /// @} 368*16467b97STreehugger Robot /// 369*16467b97STreehugger Robot 370*16467b97STreehugger Robot #endif /* _ANTLR3_BASERECOGNIZER_H */ 371*16467b97STreehugger Robot 372