xref: /aosp_15_r20/external/antlr/runtime/C/src/antlr3baserecognizer.c (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot /** \file
2*16467b97STreehugger Robot  * Contains the base functions that all recognizers require.
3*16467b97STreehugger Robot  * Any function can be overridden by a lexer/parser/tree parser or by the
4*16467b97STreehugger Robot  * ANTLR3 programmer.
5*16467b97STreehugger Robot  *
6*16467b97STreehugger Robot  * \addtogroup pANTLR3_BASE_RECOGNIZER
7*16467b97STreehugger Robot  * @{
8*16467b97STreehugger Robot  */
9*16467b97STreehugger Robot #include    <antlr3baserecognizer.h>
10*16467b97STreehugger Robot 
11*16467b97STreehugger Robot // [The "BSD licence"]
12*16467b97STreehugger Robot // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13*16467b97STreehugger Robot // http://www.temporal-wave.com
14*16467b97STreehugger Robot // http://www.linkedin.com/in/jimidle
15*16467b97STreehugger Robot //
16*16467b97STreehugger Robot // All rights reserved.
17*16467b97STreehugger Robot //
18*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without
19*16467b97STreehugger Robot // modification, are permitted provided that the following conditions
20*16467b97STreehugger Robot // are met:
21*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright
22*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer.
23*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright
24*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer in the
25*16467b97STreehugger Robot //    documentation and/or other materials provided with the distribution.
26*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products
27*16467b97STreehugger Robot //    derived from this software without specific prior written permission.
28*16467b97STreehugger Robot //
29*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39*16467b97STreehugger Robot 
40*16467b97STreehugger Robot #ifdef	ANTLR3_WINDOWS
41*16467b97STreehugger Robot #pragma warning( disable : 4100 )
42*16467b97STreehugger Robot #endif
43*16467b97STreehugger Robot 
44*16467b97STreehugger Robot /* Interface functions -standard implementations cover parser and treeparser
45*16467b97STreehugger Robot  * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides
46*16467b97STreehugger Robot  * most of these functions.
47*16467b97STreehugger Robot  */
48*16467b97STreehugger Robot static void					beginResync					(pANTLR3_BASE_RECOGNIZER recognizer);
49*16467b97STreehugger Robot static pANTLR3_BITSET		computeErrorRecoverySet	    (pANTLR3_BASE_RECOGNIZER recognizer);
50*16467b97STreehugger Robot static void					endResync					(pANTLR3_BASE_RECOGNIZER recognizer);
51*16467b97STreehugger Robot static void					beginBacktrack				(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level);
52*16467b97STreehugger Robot static void					endBacktrack				(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
53*16467b97STreehugger Robot 
54*16467b97STreehugger Robot static void *				match						(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
55*16467b97STreehugger Robot static void					matchAny					(pANTLR3_BASE_RECOGNIZER recognizer);
56*16467b97STreehugger Robot static void					mismatch					(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
57*16467b97STreehugger Robot static ANTLR3_BOOLEAN		mismatchIsUnwantedToken		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype);
58*16467b97STreehugger Robot static ANTLR3_BOOLEAN		mismatchIsMissingToken		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow);
59*16467b97STreehugger Robot static void					reportError					(pANTLR3_BASE_RECOGNIZER recognizer);
60*16467b97STreehugger Robot static pANTLR3_BITSET		computeCSRuleFollow			(pANTLR3_BASE_RECOGNIZER recognizer);
61*16467b97STreehugger Robot static pANTLR3_BITSET		combineFollows				(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact);
62*16467b97STreehugger Robot static void					displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);
63*16467b97STreehugger Robot static void					recover						(pANTLR3_BASE_RECOGNIZER recognizer);
64*16467b97STreehugger Robot static void	*				recoverFromMismatchedToken  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
65*16467b97STreehugger Robot static void	*				recoverFromMismatchedSet    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
66*16467b97STreehugger Robot static ANTLR3_BOOLEAN		recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
67*16467b97STreehugger Robot static void					consumeUntil				(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType);
68*16467b97STreehugger Robot static void					consumeUntilSet				(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set);
69*16467b97STreehugger Robot static pANTLR3_STACK		getRuleInvocationStack	    (pANTLR3_BASE_RECOGNIZER recognizer);
70*16467b97STreehugger Robot static pANTLR3_STACK		getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name);
71*16467b97STreehugger Robot static pANTLR3_HASH_TABLE	toStrings					(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE);
72*16467b97STreehugger Robot static ANTLR3_MARKER		getRuleMemoization			(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart);
73*16467b97STreehugger Robot static ANTLR3_BOOLEAN		alreadyParsedRule			(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex);
74*16467b97STreehugger Robot static void					memoize						(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart);
75*16467b97STreehugger Robot static ANTLR3_BOOLEAN		synpred						(pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx));
76*16467b97STreehugger Robot static void					reset						(pANTLR3_BASE_RECOGNIZER recognizer);
77*16467b97STreehugger Robot static void					freeBR						(pANTLR3_BASE_RECOGNIZER recognizer);
78*16467b97STreehugger Robot static void *				getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
79*16467b97STreehugger Robot static void *				getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
80*16467b97STreehugger Robot 															ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
81*16467b97STreehugger Robot static ANTLR3_UINT32		getNumberOfSyntaxErrors		(pANTLR3_BASE_RECOGNIZER recognizer);
82*16467b97STreehugger Robot 
83*16467b97STreehugger Robot ANTLR3_API pANTLR3_BASE_RECOGNIZER
antlr3BaseRecognizerNew(ANTLR3_UINT32 type,ANTLR3_UINT32 sizeHint,pANTLR3_RECOGNIZER_SHARED_STATE state)84*16467b97STreehugger Robot antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
85*16467b97STreehugger Robot {
86*16467b97STreehugger Robot     pANTLR3_BASE_RECOGNIZER recognizer;
87*16467b97STreehugger Robot 
88*16467b97STreehugger Robot     // Allocate memory for the structure
89*16467b97STreehugger Robot     //
90*16467b97STreehugger Robot     recognizer	    = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER));
91*16467b97STreehugger Robot 
92*16467b97STreehugger Robot     if	(recognizer == NULL)
93*16467b97STreehugger Robot     {
94*16467b97STreehugger Robot 		// Allocation failed
95*16467b97STreehugger Robot 		//
96*16467b97STreehugger Robot 		return	NULL;
97*16467b97STreehugger Robot     }
98*16467b97STreehugger Robot 
99*16467b97STreehugger Robot 
100*16467b97STreehugger Robot 	// If we have been supplied with a pre-existing recognizer state
101*16467b97STreehugger Robot 	// then we just install it, otherwise we must create one from scratch
102*16467b97STreehugger Robot 	//
103*16467b97STreehugger Robot 	if	(state == NULL)
104*16467b97STreehugger Robot 	{
105*16467b97STreehugger Robot 		recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE));
106*16467b97STreehugger Robot 
107*16467b97STreehugger Robot 		if	(recognizer->state == NULL)
108*16467b97STreehugger Robot 		{
109*16467b97STreehugger Robot 			ANTLR3_FREE(recognizer);
110*16467b97STreehugger Robot 			return	NULL;
111*16467b97STreehugger Robot 		}
112*16467b97STreehugger Robot 
113*16467b97STreehugger Robot 		// Initialize any new recognizer state
114*16467b97STreehugger Robot 		//
115*16467b97STreehugger Robot 		recognizer->state->errorRecovery	= ANTLR3_FALSE;
116*16467b97STreehugger Robot 		recognizer->state->lastErrorIndex	= -1;
117*16467b97STreehugger Robot 		recognizer->state->failed		= ANTLR3_FALSE;
118*16467b97STreehugger Robot 		recognizer->state->errorCount		= 0;
119*16467b97STreehugger Robot 		recognizer->state->backtracking		= 0;
120*16467b97STreehugger Robot 		recognizer->state->following		= NULL;
121*16467b97STreehugger Robot 		recognizer->state->ruleMemo		= NULL;
122*16467b97STreehugger Robot 		recognizer->state->tokenNames		= NULL;
123*16467b97STreehugger Robot 		recognizer->state->sizeHint             = sizeHint;
124*16467b97STreehugger Robot 		recognizer->state->tokSource		= NULL;
125*16467b97STreehugger Robot                 recognizer->state->tokFactory           = NULL;
126*16467b97STreehugger Robot 
127*16467b97STreehugger Robot 		// Rather than check to see if we must initialize
128*16467b97STreehugger Robot 		// the stack every time we are asked for an new rewrite stream
129*16467b97STreehugger Robot 		// we just always create an empty stack and then just
130*16467b97STreehugger Robot 		// free it when the base recognizer is freed.
131*16467b97STreehugger Robot 		//
132*16467b97STreehugger Robot 		recognizer->state->rStreams		= antlr3VectorNew(0);  // We don't know the size.
133*16467b97STreehugger Robot 
134*16467b97STreehugger Robot 		if	(recognizer->state->rStreams == NULL)
135*16467b97STreehugger Robot 		{
136*16467b97STreehugger Robot 			// Out of memory
137*16467b97STreehugger Robot 			//
138*16467b97STreehugger Robot 			ANTLR3_FREE(recognizer->state);
139*16467b97STreehugger Robot 			ANTLR3_FREE(recognizer);
140*16467b97STreehugger Robot 			return	NULL;
141*16467b97STreehugger Robot 		}
142*16467b97STreehugger Robot 	}
143*16467b97STreehugger Robot 	else
144*16467b97STreehugger Robot 	{
145*16467b97STreehugger Robot 		// Install the one we were given, and do not reset it here
146*16467b97STreehugger Robot 		// as it will either already have been initialized or will
147*16467b97STreehugger Robot 		// be in a state that needs to be preserved.
148*16467b97STreehugger Robot 		//
149*16467b97STreehugger Robot 		recognizer->state = state;
150*16467b97STreehugger Robot 	}
151*16467b97STreehugger Robot 
152*16467b97STreehugger Robot     // Install the BR API
153*16467b97STreehugger Robot     //
154*16467b97STreehugger Robot     recognizer->alreadyParsedRule           = alreadyParsedRule;
155*16467b97STreehugger Robot     recognizer->beginResync                 = beginResync;
156*16467b97STreehugger Robot     recognizer->combineFollows              = combineFollows;
157*16467b97STreehugger Robot     recognizer->beginBacktrack              = beginBacktrack;
158*16467b97STreehugger Robot     recognizer->endBacktrack                = endBacktrack;
159*16467b97STreehugger Robot     recognizer->computeCSRuleFollow         = computeCSRuleFollow;
160*16467b97STreehugger Robot     recognizer->computeErrorRecoverySet     = computeErrorRecoverySet;
161*16467b97STreehugger Robot     recognizer->consumeUntil                = consumeUntil;
162*16467b97STreehugger Robot     recognizer->consumeUntilSet             = consumeUntilSet;
163*16467b97STreehugger Robot     recognizer->displayRecognitionError     = displayRecognitionError;
164*16467b97STreehugger Robot     recognizer->endResync                   = endResync;
165*16467b97STreehugger Robot     recognizer->exConstruct                 = antlr3MTExceptionNew;
166*16467b97STreehugger Robot     recognizer->getRuleInvocationStack      = getRuleInvocationStack;
167*16467b97STreehugger Robot     recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed;
168*16467b97STreehugger Robot     recognizer->getRuleMemoization          = getRuleMemoization;
169*16467b97STreehugger Robot     recognizer->match                       = match;
170*16467b97STreehugger Robot     recognizer->matchAny                    = matchAny;
171*16467b97STreehugger Robot     recognizer->memoize                     = memoize;
172*16467b97STreehugger Robot     recognizer->mismatch                    = mismatch;
173*16467b97STreehugger Robot     recognizer->mismatchIsUnwantedToken     = mismatchIsUnwantedToken;
174*16467b97STreehugger Robot     recognizer->mismatchIsMissingToken      = mismatchIsMissingToken;
175*16467b97STreehugger Robot     recognizer->recover                     = recover;
176*16467b97STreehugger Robot     recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement;
177*16467b97STreehugger Robot     recognizer->recoverFromMismatchedSet    = recoverFromMismatchedSet;
178*16467b97STreehugger Robot     recognizer->recoverFromMismatchedToken  = recoverFromMismatchedToken;
179*16467b97STreehugger Robot     recognizer->getNumberOfSyntaxErrors     = getNumberOfSyntaxErrors;
180*16467b97STreehugger Robot     recognizer->reportError                 = reportError;
181*16467b97STreehugger Robot     recognizer->reset                       = reset;
182*16467b97STreehugger Robot     recognizer->synpred                     = synpred;
183*16467b97STreehugger Robot     recognizer->toStrings                   = toStrings;
184*16467b97STreehugger Robot     recognizer->getCurrentInputSymbol       = getCurrentInputSymbol;
185*16467b97STreehugger Robot     recognizer->getMissingSymbol            = getMissingSymbol;
186*16467b97STreehugger Robot     recognizer->debugger                    = NULL;
187*16467b97STreehugger Robot 
188*16467b97STreehugger Robot     recognizer->free = freeBR;
189*16467b97STreehugger Robot 
190*16467b97STreehugger Robot     /* Initialize variables
191*16467b97STreehugger Robot      */
192*16467b97STreehugger Robot     recognizer->type			= type;
193*16467b97STreehugger Robot 
194*16467b97STreehugger Robot 
195*16467b97STreehugger Robot     return  recognizer;
196*16467b97STreehugger Robot }
197*16467b97STreehugger Robot static void
freeBR(pANTLR3_BASE_RECOGNIZER recognizer)198*16467b97STreehugger Robot freeBR	    (pANTLR3_BASE_RECOGNIZER recognizer)
199*16467b97STreehugger Robot {
200*16467b97STreehugger Robot     pANTLR3_EXCEPTION thisE;
201*16467b97STreehugger Robot 
202*16467b97STreehugger Robot 	// Did we have a state allocated?
203*16467b97STreehugger Robot 	//
204*16467b97STreehugger Robot 	if	(recognizer->state != NULL)
205*16467b97STreehugger Robot 	{
206*16467b97STreehugger Robot 		// Free any rule memoization we set up
207*16467b97STreehugger Robot 		//
208*16467b97STreehugger Robot 		if	(recognizer->state->ruleMemo != NULL)
209*16467b97STreehugger Robot 		{
210*16467b97STreehugger Robot 			recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
211*16467b97STreehugger Robot 			recognizer->state->ruleMemo = NULL;
212*16467b97STreehugger Robot 		}
213*16467b97STreehugger Robot 
214*16467b97STreehugger Robot 		// Free any exception space we have left around
215*16467b97STreehugger Robot 		//
216*16467b97STreehugger Robot 		thisE = recognizer->state->exception;
217*16467b97STreehugger Robot 		if	(thisE != NULL)
218*16467b97STreehugger Robot 		{
219*16467b97STreehugger Robot 			thisE->freeEx(thisE);
220*16467b97STreehugger Robot 		}
221*16467b97STreehugger Robot 
222*16467b97STreehugger Robot 		// Free any rewrite streams we have allocated
223*16467b97STreehugger Robot 		//
224*16467b97STreehugger Robot 		if	(recognizer->state->rStreams != NULL)
225*16467b97STreehugger Robot 		{
226*16467b97STreehugger Robot 			recognizer->state->rStreams->free(recognizer->state->rStreams);
227*16467b97STreehugger Robot 		}
228*16467b97STreehugger Robot 
229*16467b97STreehugger Robot 		// Free up any token factory we created (error recovery for instance)
230*16467b97STreehugger Robot 		//
231*16467b97STreehugger Robot 		if	(recognizer->state->tokFactory != NULL)
232*16467b97STreehugger Robot 		{
233*16467b97STreehugger Robot 			recognizer->state->tokFactory->close(recognizer->state->tokFactory);
234*16467b97STreehugger Robot 		}
235*16467b97STreehugger Robot 		// Free the shared state memory
236*16467b97STreehugger Robot 		//
237*16467b97STreehugger Robot 		ANTLR3_FREE(recognizer->state);
238*16467b97STreehugger Robot 	}
239*16467b97STreehugger Robot 
240*16467b97STreehugger Robot 	// Free the actual recognizer space
241*16467b97STreehugger Robot 	//
242*16467b97STreehugger Robot     ANTLR3_FREE(recognizer);
243*16467b97STreehugger Robot }
244*16467b97STreehugger Robot 
245*16467b97STreehugger Robot /**
246*16467b97STreehugger Robot  * Creates a new Mismatched Token Exception and inserts in the recognizer
247*16467b97STreehugger Robot  * exception stack.
248*16467b97STreehugger Robot  *
249*16467b97STreehugger Robot  * \param recognizer
250*16467b97STreehugger Robot  * Context pointer for this recognizer
251*16467b97STreehugger Robot  *
252*16467b97STreehugger Robot  */
253*16467b97STreehugger Robot ANTLR3_API	void
antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)254*16467b97STreehugger Robot antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
255*16467b97STreehugger Robot {
256*16467b97STreehugger Robot     /* Create a basic recognition exception structure
257*16467b97STreehugger Robot      */
258*16467b97STreehugger Robot     antlr3RecognitionExceptionNew(recognizer);
259*16467b97STreehugger Robot 
260*16467b97STreehugger Robot     /* Now update it to indicate this is a Mismatched token exception
261*16467b97STreehugger Robot      */
262*16467b97STreehugger Robot     recognizer->state->exception->name		= ANTLR3_MISMATCHED_EX_NAME;
263*16467b97STreehugger Robot     recognizer->state->exception->type		= ANTLR3_MISMATCHED_TOKEN_EXCEPTION;
264*16467b97STreehugger Robot 
265*16467b97STreehugger Robot     return;
266*16467b97STreehugger Robot }
267*16467b97STreehugger Robot 
268*16467b97STreehugger Robot ANTLR3_API	void
antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)269*16467b97STreehugger Robot antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
270*16467b97STreehugger Robot {
271*16467b97STreehugger Robot 	pANTLR3_EXCEPTION				ex;
272*16467b97STreehugger Robot 	pANTLR3_LEXER					lexer;
273*16467b97STreehugger Robot 	pANTLR3_PARSER					parser;
274*16467b97STreehugger Robot 	pANTLR3_TREE_PARSER				tparser;
275*16467b97STreehugger Robot 
276*16467b97STreehugger Robot 	pANTLR3_INPUT_STREAM			ins;
277*16467b97STreehugger Robot 	pANTLR3_INT_STREAM				is;
278*16467b97STreehugger Robot 	pANTLR3_COMMON_TOKEN_STREAM	    cts;
279*16467b97STreehugger Robot 	pANTLR3_TREE_NODE_STREAM	    tns;
280*16467b97STreehugger Robot 
281*16467b97STreehugger Robot 	ins	    = NULL;
282*16467b97STreehugger Robot 	cts	    = NULL;
283*16467b97STreehugger Robot 	tns	    = NULL;
284*16467b97STreehugger Robot 	is	    = NULL;
285*16467b97STreehugger Robot 	lexer   = NULL;
286*16467b97STreehugger Robot 	parser  = NULL;
287*16467b97STreehugger Robot 	tparser = NULL;
288*16467b97STreehugger Robot 
289*16467b97STreehugger Robot 	switch	(recognizer->type)
290*16467b97STreehugger Robot 	{
291*16467b97STreehugger Robot 	case	ANTLR3_TYPE_LEXER:
292*16467b97STreehugger Robot 
293*16467b97STreehugger Robot 		lexer	= (pANTLR3_LEXER) (recognizer->super);
294*16467b97STreehugger Robot 		ins	= lexer->input;
295*16467b97STreehugger Robot 		is	= ins->istream;
296*16467b97STreehugger Robot 
297*16467b97STreehugger Robot 		break;
298*16467b97STreehugger Robot 
299*16467b97STreehugger Robot 	case	ANTLR3_TYPE_PARSER:
300*16467b97STreehugger Robot 
301*16467b97STreehugger Robot 		parser  = (pANTLR3_PARSER) (recognizer->super);
302*16467b97STreehugger Robot 		cts	= (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super);
303*16467b97STreehugger Robot 		is	= parser->tstream->istream;
304*16467b97STreehugger Robot 
305*16467b97STreehugger Robot 		break;
306*16467b97STreehugger Robot 
307*16467b97STreehugger Robot 	case	ANTLR3_TYPE_TREE_PARSER:
308*16467b97STreehugger Robot 
309*16467b97STreehugger Robot 		tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
310*16467b97STreehugger Robot 		tns	= tparser->ctnstream->tnstream;
311*16467b97STreehugger Robot 		is	= tns->istream;
312*16467b97STreehugger Robot 
313*16467b97STreehugger Robot 		break;
314*16467b97STreehugger Robot 
315*16467b97STreehugger Robot 	default:
316*16467b97STreehugger Robot 
317*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n");
318*16467b97STreehugger Robot 		return;
319*16467b97STreehugger Robot 
320*16467b97STreehugger Robot 		break;
321*16467b97STreehugger Robot 	}
322*16467b97STreehugger Robot 
323*16467b97STreehugger Robot 	/* Create a basic exception structure
324*16467b97STreehugger Robot 	 */
325*16467b97STreehugger Robot 	ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION,
326*16467b97STreehugger Robot 		(void *)ANTLR3_RECOGNITION_EX_NAME,
327*16467b97STreehugger Robot 		NULL,
328*16467b97STreehugger Robot 		ANTLR3_FALSE);
329*16467b97STreehugger Robot 
330*16467b97STreehugger Robot 	/* Rest of information depends on the base type of the
331*16467b97STreehugger Robot 	 * input stream.
332*16467b97STreehugger Robot 	 */
333*16467b97STreehugger Robot 	switch  (is->type & ANTLR3_INPUT_MASK)
334*16467b97STreehugger Robot 	{
335*16467b97STreehugger Robot 	case    ANTLR3_CHARSTREAM:
336*16467b97STreehugger Robot 
337*16467b97STreehugger Robot 		ex->c			= is->_LA		    	(is, 1);					/* Current input character			*/
338*16467b97STreehugger Robot 		ex->line		= ins->getLine			(ins);						/* Line number comes from stream		*/
339*16467b97STreehugger Robot 		ex->charPositionInLine	= ins->getCharPositionInLine	(ins);	    /* Line offset also comes from the stream   */
340*16467b97STreehugger Robot 		ex->index		= is->index			(is);
341*16467b97STreehugger Robot 		ex->streamName		= ins->fileName;
342*16467b97STreehugger Robot 		ex->message		= "Unexpected character";
343*16467b97STreehugger Robot 		break;
344*16467b97STreehugger Robot 
345*16467b97STreehugger Robot 	case    ANTLR3_TOKENSTREAM:
346*16467b97STreehugger Robot 
347*16467b97STreehugger Robot 		ex->token		= cts->tstream->_LT						(cts->tstream, 1);	    /* Current input token			    */
348*16467b97STreehugger Robot 		ex->line		= ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine			((pANTLR3_COMMON_TOKEN)(ex->token));
349*16467b97STreehugger Robot 		ex->charPositionInLine	= ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine	((pANTLR3_COMMON_TOKEN)(ex->token));
350*16467b97STreehugger Robot 		ex->index		= cts->tstream->istream->index					(cts->tstream->istream);
351*16467b97STreehugger Robot 		if	(((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
352*16467b97STreehugger Robot 		{
353*16467b97STreehugger Robot 			ex->streamName		= NULL;
354*16467b97STreehugger Robot 		}
355*16467b97STreehugger Robot 		else
356*16467b97STreehugger Robot 		{
357*16467b97STreehugger Robot 			ex->streamName		= ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName;
358*16467b97STreehugger Robot 		}
359*16467b97STreehugger Robot 		ex->message		= "Unexpected token";
360*16467b97STreehugger Robot 		break;
361*16467b97STreehugger Robot 
362*16467b97STreehugger Robot 	case    ANTLR3_COMMONTREENODE:
363*16467b97STreehugger Robot 
364*16467b97STreehugger Robot 		ex->token		= tns->_LT						    (tns, 1);	    /* Current input tree node			    */
365*16467b97STreehugger Robot 		ex->line		= ((pANTLR3_BASE_TREE)(ex->token))->getLine		    ((pANTLR3_BASE_TREE)(ex->token));
366*16467b97STreehugger Robot 		ex->charPositionInLine	= ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine   ((pANTLR3_BASE_TREE)(ex->token));
367*16467b97STreehugger Robot 		ex->index		= tns->istream->index					    (tns->istream);
368*16467b97STreehugger Robot 
369*16467b97STreehugger Robot 		// Are you ready for this? Deep breath now...
370*16467b97STreehugger Robot 		//
371*16467b97STreehugger Robot 		{
372*16467b97STreehugger Robot 			pANTLR3_COMMON_TREE tnode;
373*16467b97STreehugger Robot 
374*16467b97STreehugger Robot 			tnode		= ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super));
375*16467b97STreehugger Robot 
376*16467b97STreehugger Robot 			if	(tnode->token    == NULL)
377*16467b97STreehugger Robot 			{
378*16467b97STreehugger Robot 				ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-");
379*16467b97STreehugger Robot 			}
380*16467b97STreehugger Robot 			else
381*16467b97STreehugger Robot 			{
382*16467b97STreehugger Robot 				if	(tnode->token->input == NULL)
383*16467b97STreehugger Robot 				{
384*16467b97STreehugger Robot 					ex->streamName		= NULL;
385*16467b97STreehugger Robot 				}
386*16467b97STreehugger Robot 				else
387*16467b97STreehugger Robot 				{
388*16467b97STreehugger Robot 					ex->streamName		= tnode->token->input->fileName;
389*16467b97STreehugger Robot 				}
390*16467b97STreehugger Robot 			}
391*16467b97STreehugger Robot 			ex->message		= "Unexpected node";
392*16467b97STreehugger Robot 		}
393*16467b97STreehugger Robot 		break;
394*16467b97STreehugger Robot 	}
395*16467b97STreehugger Robot 
396*16467b97STreehugger Robot 	ex->input						= is;
397*16467b97STreehugger Robot 	ex->nextException				= recognizer->state->exception;	/* So we don't leak the memory */
398*16467b97STreehugger Robot 	recognizer->state->exception	= ex;
399*16467b97STreehugger Robot 	recognizer->state->error	    = ANTLR3_TRUE;	    /* Exception is outstanding	*/
400*16467b97STreehugger Robot 
401*16467b97STreehugger Robot 	return;
402*16467b97STreehugger Robot }
403*16467b97STreehugger Robot 
404*16467b97STreehugger Robot 
405*16467b97STreehugger Robot /// Match current input symbol against ttype.  Upon error, do one token
406*16467b97STreehugger Robot /// insertion or deletion if possible.
407*16467b97STreehugger Robot /// To turn off single token insertion or deletion error
408*16467b97STreehugger Robot /// recovery, override mismatchRecover() and have it call
409*16467b97STreehugger Robot /// plain mismatch(), which does not recover.  Then any error
410*16467b97STreehugger Robot /// in a rule will cause an exception and immediate exit from
411*16467b97STreehugger Robot /// rule.  Rule would recover by resynchronizing to the set of
412*16467b97STreehugger Robot /// symbols that can follow rule ref.
413*16467b97STreehugger Robot ///
414*16467b97STreehugger Robot static void *
match(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 ttype,pANTLR3_BITSET_LIST follow)415*16467b97STreehugger Robot match(	pANTLR3_BASE_RECOGNIZER recognizer,
416*16467b97STreehugger Robot 		ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
417*16467b97STreehugger Robot {
418*16467b97STreehugger Robot     pANTLR3_PARSER			parser;
419*16467b97STreehugger Robot     pANTLR3_TREE_PARSER	    tparser;
420*16467b97STreehugger Robot     pANTLR3_INT_STREAM	    is;
421*16467b97STreehugger Robot 	void					* matchedSymbol;
422*16467b97STreehugger Robot 
423*16467b97STreehugger Robot     switch	(recognizer->type)
424*16467b97STreehugger Robot     {
425*16467b97STreehugger Robot 		case	ANTLR3_TYPE_PARSER:
426*16467b97STreehugger Robot 
427*16467b97STreehugger Robot 			parser  = (pANTLR3_PARSER) (recognizer->super);
428*16467b97STreehugger Robot 			tparser	= NULL;
429*16467b97STreehugger Robot 			is	= parser->tstream->istream;
430*16467b97STreehugger Robot 
431*16467b97STreehugger Robot 			break;
432*16467b97STreehugger Robot 
433*16467b97STreehugger Robot 		case	ANTLR3_TYPE_TREE_PARSER:
434*16467b97STreehugger Robot 
435*16467b97STreehugger Robot 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
436*16467b97STreehugger Robot 			parser	= NULL;
437*16467b97STreehugger Robot 			is	= tparser->ctnstream->tnstream->istream;
438*16467b97STreehugger Robot 
439*16467b97STreehugger Robot 			break;
440*16467b97STreehugger Robot 
441*16467b97STreehugger Robot 		default:
442*16467b97STreehugger Robot 
443*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n");
444*16467b97STreehugger Robot 			return ANTLR3_FALSE;
445*16467b97STreehugger Robot 
446*16467b97STreehugger Robot 			break;
447*16467b97STreehugger Robot     }
448*16467b97STreehugger Robot 
449*16467b97STreehugger Robot 	// Pick up the current input token/node for assignment to labels
450*16467b97STreehugger Robot 	//
451*16467b97STreehugger Robot 	matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
452*16467b97STreehugger Robot 
453*16467b97STreehugger Robot     if	(is->_LA(is, 1) == ttype)
454*16467b97STreehugger Robot     {
455*16467b97STreehugger Robot 		// The token was the one we were told to expect
456*16467b97STreehugger Robot 		//
457*16467b97STreehugger Robot 		is->consume(is);									// Consume that token from the stream
458*16467b97STreehugger Robot 		recognizer->state->errorRecovery	= ANTLR3_FALSE;	// Not in error recovery now (if we were)
459*16467b97STreehugger Robot 		recognizer->state->failed			= ANTLR3_FALSE;	// The match was a success
460*16467b97STreehugger Robot 		return matchedSymbol;								// We are done
461*16467b97STreehugger Robot     }
462*16467b97STreehugger Robot 
463*16467b97STreehugger Robot     // We did not find the expected token type, if we are backtracking then
464*16467b97STreehugger Robot     // we just set the failed flag and return.
465*16467b97STreehugger Robot     //
466*16467b97STreehugger Robot     if	(recognizer->state->backtracking > 0)
467*16467b97STreehugger Robot     {
468*16467b97STreehugger Robot 		// Backtracking is going on
469*16467b97STreehugger Robot 		//
470*16467b97STreehugger Robot 		recognizer->state->failed  = ANTLR3_TRUE;
471*16467b97STreehugger Robot 		return matchedSymbol;
472*16467b97STreehugger Robot 	}
473*16467b97STreehugger Robot 
474*16467b97STreehugger Robot     // We did not find the expected token and there is no backtracking
475*16467b97STreehugger Robot     // going on, so we mismatch, which creates an exception in the recognizer exception
476*16467b97STreehugger Robot     // stack.
477*16467b97STreehugger Robot     //
478*16467b97STreehugger Robot 	matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);
479*16467b97STreehugger Robot     return matchedSymbol;
480*16467b97STreehugger Robot }
481*16467b97STreehugger Robot 
482*16467b97STreehugger Robot /// Consumes the next token, whatever it is, and resets the recognizer state
483*16467b97STreehugger Robot /// so that it is not in error.
484*16467b97STreehugger Robot ///
485*16467b97STreehugger Robot /// \param recognizer
486*16467b97STreehugger Robot /// Recognizer context pointer
487*16467b97STreehugger Robot ///
488*16467b97STreehugger Robot static void
matchAny(pANTLR3_BASE_RECOGNIZER recognizer)489*16467b97STreehugger Robot matchAny(pANTLR3_BASE_RECOGNIZER recognizer)
490*16467b97STreehugger Robot {
491*16467b97STreehugger Robot     pANTLR3_PARSER	    parser;
492*16467b97STreehugger Robot     pANTLR3_TREE_PARSER	    tparser;
493*16467b97STreehugger Robot     pANTLR3_INT_STREAM	    is;
494*16467b97STreehugger Robot 
495*16467b97STreehugger Robot     switch	(recognizer->type)
496*16467b97STreehugger Robot     {
497*16467b97STreehugger Robot 		case	ANTLR3_TYPE_PARSER:
498*16467b97STreehugger Robot 
499*16467b97STreehugger Robot 			parser  = (pANTLR3_PARSER) (recognizer->super);
500*16467b97STreehugger Robot 			tparser	= NULL;
501*16467b97STreehugger Robot 			is	= parser->tstream->istream;
502*16467b97STreehugger Robot 
503*16467b97STreehugger Robot 			break;
504*16467b97STreehugger Robot 
505*16467b97STreehugger Robot 		case	ANTLR3_TYPE_TREE_PARSER:
506*16467b97STreehugger Robot 
507*16467b97STreehugger Robot 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
508*16467b97STreehugger Robot 			parser	= NULL;
509*16467b97STreehugger Robot 			is	= tparser->ctnstream->tnstream->istream;
510*16467b97STreehugger Robot 
511*16467b97STreehugger Robot 			break;
512*16467b97STreehugger Robot 
513*16467b97STreehugger Robot 		default:
514*16467b97STreehugger Robot 
515*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n");
516*16467b97STreehugger Robot 			return;
517*16467b97STreehugger Robot 
518*16467b97STreehugger Robot 		break;
519*16467b97STreehugger Robot     }
520*16467b97STreehugger Robot     recognizer->state->errorRecovery	= ANTLR3_FALSE;
521*16467b97STreehugger Robot     recognizer->state->failed		    = ANTLR3_FALSE;
522*16467b97STreehugger Robot     is->consume(is);
523*16467b97STreehugger Robot 
524*16467b97STreehugger Robot     return;
525*16467b97STreehugger Robot }
526*16467b97STreehugger Robot ///
527*16467b97STreehugger Robot ///
528*16467b97STreehugger Robot static ANTLR3_BOOLEAN
mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM is,ANTLR3_UINT32 ttype)529*16467b97STreehugger Robot mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype)
530*16467b97STreehugger Robot {
531*16467b97STreehugger Robot 	ANTLR3_UINT32 nextt;
532*16467b97STreehugger Robot 
533*16467b97STreehugger Robot 	nextt = is->_LA(is, 2);
534*16467b97STreehugger Robot 
535*16467b97STreehugger Robot 	if	(nextt == ttype)
536*16467b97STreehugger Robot 	{
537*16467b97STreehugger Robot 		if	(recognizer->state->exception != NULL)
538*16467b97STreehugger Robot 		{
539*16467b97STreehugger Robot 			recognizer->state->exception->expecting = nextt;
540*16467b97STreehugger Robot 		}
541*16467b97STreehugger Robot 		return ANTLR3_TRUE;		// This token is unknown, but the next one is the one we wanted
542*16467b97STreehugger Robot 	}
543*16467b97STreehugger Robot 	else
544*16467b97STreehugger Robot 	{
545*16467b97STreehugger Robot 		return ANTLR3_FALSE;	// Neither this token, nor the one following is the one we wanted
546*16467b97STreehugger Robot 	}
547*16467b97STreehugger Robot }
548*16467b97STreehugger Robot 
549*16467b97STreehugger Robot ///
550*16467b97STreehugger Robot ///
551*16467b97STreehugger Robot static ANTLR3_BOOLEAN
mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM is,pANTLR3_BITSET_LIST follow)552*16467b97STreehugger Robot mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow)
553*16467b97STreehugger Robot {
554*16467b97STreehugger Robot 	ANTLR3_BOOLEAN	retcode;
555*16467b97STreehugger Robot 	pANTLR3_BITSET	followClone;
556*16467b97STreehugger Robot 	pANTLR3_BITSET	viableTokensFollowingThisRule;
557*16467b97STreehugger Robot 
558*16467b97STreehugger Robot 	if	(follow == NULL)
559*16467b97STreehugger Robot 	{
560*16467b97STreehugger Robot 		// There is no information about the tokens that can follow the last one
561*16467b97STreehugger Robot 		// hence we must say that the current one we found is not a member of the
562*16467b97STreehugger Robot 		// follow set and does not indicate a missing token. We will just consume this
563*16467b97STreehugger Robot 		// single token and see if the parser works it out from there.
564*16467b97STreehugger Robot 		//
565*16467b97STreehugger Robot 		return	ANTLR3_FALSE;
566*16467b97STreehugger Robot 	}
567*16467b97STreehugger Robot 
568*16467b97STreehugger Robot 	followClone						= NULL;
569*16467b97STreehugger Robot 	viableTokensFollowingThisRule	= NULL;
570*16467b97STreehugger Robot 
571*16467b97STreehugger Robot 	// The C bitset maps are laid down at compile time by the
572*16467b97STreehugger Robot 	// C code generation. Hence we cannot remove things from them
573*16467b97STreehugger Robot 	// and so on. So, in order to remove EOR (if we need to) then
574*16467b97STreehugger Robot 	// we clone the static bitset.
575*16467b97STreehugger Robot 	//
576*16467b97STreehugger Robot 	followClone = antlr3BitsetLoad(follow);
577*16467b97STreehugger Robot 	if	(followClone == NULL)
578*16467b97STreehugger Robot 	{
579*16467b97STreehugger Robot 		return ANTLR3_FALSE;
580*16467b97STreehugger Robot 	}
581*16467b97STreehugger Robot 
582*16467b97STreehugger Robot 	// Compute what can follow this grammar reference
583*16467b97STreehugger Robot 	//
584*16467b97STreehugger Robot 	if	(followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE))
585*16467b97STreehugger Robot 	{
586*16467b97STreehugger Robot 		// EOR can follow, but if we are not the start symbol, we
587*16467b97STreehugger Robot 		// need to remove it.
588*16467b97STreehugger Robot 		//
589*16467b97STreehugger Robot 		//if	(recognizer->state->following->vector->count >= 0) ml: always true
590*16467b97STreehugger Robot 		{
591*16467b97STreehugger Robot 			followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE);
592*16467b97STreehugger Robot 		}
593*16467b97STreehugger Robot 
594*16467b97STreehugger Robot 		// Now compute the visiable tokens that can follow this rule, according to context
595*16467b97STreehugger Robot 		// and make them part of the follow set.
596*16467b97STreehugger Robot 		//
597*16467b97STreehugger Robot 		viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer);
598*16467b97STreehugger Robot 		followClone->borInPlace(followClone, viableTokensFollowingThisRule);
599*16467b97STreehugger Robot 	}
600*16467b97STreehugger Robot 
601*16467b97STreehugger Robot 	/// if current token is consistent with what could come after set
602*16467b97STreehugger Robot 	/// then we know we're missing a token; error recovery is free to
603*16467b97STreehugger Robot 	/// "insert" the missing token
604*16467b97STreehugger Robot 	///
605*16467b97STreehugger Robot 	/// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
606*16467b97STreehugger Robot 	/// in follow set to indicate that the fall of the start symbol is
607*16467b97STreehugger Robot 	/// in the set (EOF can follow).
608*16467b97STreehugger Robot 	///
609*16467b97STreehugger Robot 	if	(		followClone->isMember(followClone, is->_LA(is, 1))
610*16467b97STreehugger Robot 			||	followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)
611*16467b97STreehugger Robot 		)
612*16467b97STreehugger Robot 	{
613*16467b97STreehugger Robot 		retcode = ANTLR3_TRUE;
614*16467b97STreehugger Robot 	}
615*16467b97STreehugger Robot 	else
616*16467b97STreehugger Robot 	{
617*16467b97STreehugger Robot 		retcode	= ANTLR3_FALSE;
618*16467b97STreehugger Robot 	}
619*16467b97STreehugger Robot 
620*16467b97STreehugger Robot 	if	(viableTokensFollowingThisRule != NULL)
621*16467b97STreehugger Robot 	{
622*16467b97STreehugger Robot 		viableTokensFollowingThisRule->free(viableTokensFollowingThisRule);
623*16467b97STreehugger Robot 	}
624*16467b97STreehugger Robot 	if	(followClone != NULL)
625*16467b97STreehugger Robot 	{
626*16467b97STreehugger Robot 		followClone->free(followClone);
627*16467b97STreehugger Robot 	}
628*16467b97STreehugger Robot 
629*16467b97STreehugger Robot 	return retcode;
630*16467b97STreehugger Robot 
631*16467b97STreehugger Robot }
632*16467b97STreehugger Robot 
633*16467b97STreehugger Robot /// Factor out what to do upon token mismatch so tree parsers can behave
634*16467b97STreehugger Robot /// differently.  Override and call mismatchRecover(input, ttype, follow)
635*16467b97STreehugger Robot /// to get single token insertion and deletion.  Use this to turn off
636*16467b97STreehugger Robot /// single token insertion and deletion. Override mismatchRecover
637*16467b97STreehugger Robot /// to call this instead.
638*16467b97STreehugger Robot ///
639*16467b97STreehugger Robot /// \remark mismatch only works for parsers and must be overridden for anything else.
640*16467b97STreehugger Robot ///
641*16467b97STreehugger Robot static	void
mismatch(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 ttype,pANTLR3_BITSET_LIST follow)642*16467b97STreehugger Robot mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
643*16467b97STreehugger Robot {
644*16467b97STreehugger Robot     pANTLR3_PARSER	    parser;
645*16467b97STreehugger Robot     pANTLR3_TREE_PARSER	    tparser;
646*16467b97STreehugger Robot     pANTLR3_INT_STREAM	    is;
647*16467b97STreehugger Robot 
648*16467b97STreehugger Robot     // Install a mismatched token exception in the exception stack
649*16467b97STreehugger Robot     //
650*16467b97STreehugger Robot     antlr3MTExceptionNew(recognizer);
651*16467b97STreehugger Robot     recognizer->state->exception->expecting    = ttype;
652*16467b97STreehugger Robot 
653*16467b97STreehugger Robot     switch	(recognizer->type)
654*16467b97STreehugger Robot     {
655*16467b97STreehugger Robot 		case	ANTLR3_TYPE_PARSER:
656*16467b97STreehugger Robot 
657*16467b97STreehugger Robot 			parser  = (pANTLR3_PARSER) (recognizer->super);
658*16467b97STreehugger Robot 			tparser	= NULL;
659*16467b97STreehugger Robot 			is	= parser->tstream->istream;
660*16467b97STreehugger Robot 
661*16467b97STreehugger Robot 			break;
662*16467b97STreehugger Robot 
663*16467b97STreehugger Robot 		default:
664*16467b97STreehugger Robot 
665*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n");
666*16467b97STreehugger Robot 			return;
667*16467b97STreehugger Robot 
668*16467b97STreehugger Robot 			break;
669*16467b97STreehugger Robot     }
670*16467b97STreehugger Robot 
671*16467b97STreehugger Robot 	if	(mismatchIsUnwantedToken(recognizer, is, ttype))
672*16467b97STreehugger Robot 	{
673*16467b97STreehugger Robot 		// Create a basic recognition exception structure
674*16467b97STreehugger Robot 		//
675*16467b97STreehugger Robot 	    antlr3RecognitionExceptionNew(recognizer);
676*16467b97STreehugger Robot 
677*16467b97STreehugger Robot 		// Now update it to indicate this is an unwanted token exception
678*16467b97STreehugger Robot 		//
679*16467b97STreehugger Robot 		recognizer->state->exception->name		= ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
680*16467b97STreehugger Robot 		recognizer->state->exception->type		= ANTLR3_UNWANTED_TOKEN_EXCEPTION;
681*16467b97STreehugger Robot 
682*16467b97STreehugger Robot 		return;
683*16467b97STreehugger Robot 	}
684*16467b97STreehugger Robot 
685*16467b97STreehugger Robot 	if	(mismatchIsMissingToken(recognizer, is, follow))
686*16467b97STreehugger Robot 	{
687*16467b97STreehugger Robot 		// Create a basic recognition exception structure
688*16467b97STreehugger Robot 		//
689*16467b97STreehugger Robot 	    antlr3RecognitionExceptionNew(recognizer);
690*16467b97STreehugger Robot 
691*16467b97STreehugger Robot 		// Now update it to indicate this is an unwanted token exception
692*16467b97STreehugger Robot 		//
693*16467b97STreehugger Robot 		recognizer->state->exception->name		= ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
694*16467b97STreehugger Robot 		recognizer->state->exception->type		= ANTLR3_MISSING_TOKEN_EXCEPTION;
695*16467b97STreehugger Robot 
696*16467b97STreehugger Robot 		return;
697*16467b97STreehugger Robot 	}
698*16467b97STreehugger Robot 
699*16467b97STreehugger Robot 	// Just a mismatched token is all we can dtermine
700*16467b97STreehugger Robot 	//
701*16467b97STreehugger Robot 	antlr3MTExceptionNew(recognizer);
702*16467b97STreehugger Robot 
703*16467b97STreehugger Robot 	return;
704*16467b97STreehugger Robot }
705*16467b97STreehugger Robot /// Report a recognition problem.
706*16467b97STreehugger Robot ///
707*16467b97STreehugger Robot /// This method sets errorRecovery to indicate the parser is recovering
708*16467b97STreehugger Robot /// not parsing.  Once in recovery mode, no errors are generated.
709*16467b97STreehugger Robot /// To get out of recovery mode, the parser must successfully match
710*16467b97STreehugger Robot /// a token (after a resync).  So it will go:
711*16467b97STreehugger Robot ///
712*16467b97STreehugger Robot ///		1. error occurs
713*16467b97STreehugger Robot ///		2. enter recovery mode, report error
714*16467b97STreehugger Robot ///		3. consume until token found in resynch set
715*16467b97STreehugger Robot ///		4. try to resume parsing
716*16467b97STreehugger Robot ///		5. next match() will reset errorRecovery mode
717*16467b97STreehugger Robot ///
718*16467b97STreehugger Robot /// If you override, make sure to update errorCount if you care about that.
719*16467b97STreehugger Robot ///
720*16467b97STreehugger Robot static void
reportError(pANTLR3_BASE_RECOGNIZER recognizer)721*16467b97STreehugger Robot reportError		    (pANTLR3_BASE_RECOGNIZER recognizer)
722*16467b97STreehugger Robot {
723*16467b97STreehugger Robot     	// Invoke the debugger event if there is a debugger listening to us
724*16467b97STreehugger Robot 	//
725*16467b97STreehugger Robot 	if	(recognizer->debugger != NULL)
726*16467b97STreehugger Robot 	{
727*16467b97STreehugger Robot 		recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception);
728*16467b97STreehugger Robot 	}
729*16467b97STreehugger Robot 
730*16467b97STreehugger Robot     if	(recognizer->state->errorRecovery == ANTLR3_TRUE)
731*16467b97STreehugger Robot     {
732*16467b97STreehugger Robot 		// Already in error recovery so don't display another error while doing so
733*16467b97STreehugger Robot 		//
734*16467b97STreehugger Robot 		return;
735*16467b97STreehugger Robot     }
736*16467b97STreehugger Robot 
737*16467b97STreehugger Robot     // Signal we are in error recovery now
738*16467b97STreehugger Robot     //
739*16467b97STreehugger Robot     recognizer->state->errorRecovery = ANTLR3_TRUE;
740*16467b97STreehugger Robot 
741*16467b97STreehugger Robot 	// Indicate this recognizer had an error while processing.
742*16467b97STreehugger Robot 	//
743*16467b97STreehugger Robot 	recognizer->state->errorCount++;
744*16467b97STreehugger Robot 
745*16467b97STreehugger Robot 	// Call the error display routine
746*16467b97STreehugger Robot 	//
747*16467b97STreehugger Robot     recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
748*16467b97STreehugger Robot }
749*16467b97STreehugger Robot 
750*16467b97STreehugger Robot static void
beginBacktrack(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 level)751*16467b97STreehugger Robot beginBacktrack		(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level)
752*16467b97STreehugger Robot {
753*16467b97STreehugger Robot 	if	(recognizer->debugger != NULL)
754*16467b97STreehugger Robot 	{
755*16467b97STreehugger Robot 		recognizer->debugger->beginBacktrack(recognizer->debugger, level);
756*16467b97STreehugger Robot 	}
757*16467b97STreehugger Robot }
758*16467b97STreehugger Robot 
759*16467b97STreehugger Robot static void
endBacktrack(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 level,ANTLR3_BOOLEAN successful)760*16467b97STreehugger Robot endBacktrack		(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful)
761*16467b97STreehugger Robot {
762*16467b97STreehugger Robot 	if	(recognizer->debugger != NULL)
763*16467b97STreehugger Robot 	{
764*16467b97STreehugger Robot 		recognizer->debugger->endBacktrack(recognizer->debugger, level, successful);
765*16467b97STreehugger Robot 	}
766*16467b97STreehugger Robot }
767*16467b97STreehugger Robot static void
beginResync(pANTLR3_BASE_RECOGNIZER recognizer)768*16467b97STreehugger Robot beginResync		    (pANTLR3_BASE_RECOGNIZER recognizer)
769*16467b97STreehugger Robot {
770*16467b97STreehugger Robot 	if	(recognizer->debugger != NULL)
771*16467b97STreehugger Robot 	{
772*16467b97STreehugger Robot 		recognizer->debugger->beginResync(recognizer->debugger);
773*16467b97STreehugger Robot 	}
774*16467b97STreehugger Robot }
775*16467b97STreehugger Robot 
776*16467b97STreehugger Robot static void
endResync(pANTLR3_BASE_RECOGNIZER recognizer)777*16467b97STreehugger Robot endResync		    (pANTLR3_BASE_RECOGNIZER recognizer)
778*16467b97STreehugger Robot {
779*16467b97STreehugger Robot 	if	(recognizer->debugger != NULL)
780*16467b97STreehugger Robot 	{
781*16467b97STreehugger Robot 		recognizer->debugger->endResync(recognizer->debugger);
782*16467b97STreehugger Robot 	}
783*16467b97STreehugger Robot }
784*16467b97STreehugger Robot 
785*16467b97STreehugger Robot /// Compute the error recovery set for the current rule.
786*16467b97STreehugger Robot /// Documentation below is from the Java implementation.
787*16467b97STreehugger Robot ///
788*16467b97STreehugger Robot /// During rule invocation, the parser pushes the set of tokens that can
789*16467b97STreehugger Robot /// follow that rule reference on the stack; this amounts to
790*16467b97STreehugger Robot /// computing FIRST of what follows the rule reference in the
791*16467b97STreehugger Robot /// enclosing rule. This local follow set only includes tokens
792*16467b97STreehugger Robot /// from within the rule; i.e., the FIRST computation done by
793*16467b97STreehugger Robot /// ANTLR stops at the end of a rule.
794*16467b97STreehugger Robot //
795*16467b97STreehugger Robot /// EXAMPLE
796*16467b97STreehugger Robot //
797*16467b97STreehugger Robot /// When you find a "no viable alt exception", the input is not
798*16467b97STreehugger Robot /// consistent with any of the alternatives for rule r.  The best
799*16467b97STreehugger Robot /// thing to do is to consume tokens until you see something that
800*16467b97STreehugger Robot /// can legally follow a call to r *or* any rule that called r.
801*16467b97STreehugger Robot /// You don't want the exact set of viable next tokens because the
802*16467b97STreehugger Robot /// input might just be missing a token--you might consume the
803*16467b97STreehugger Robot /// rest of the input looking for one of the missing tokens.
804*16467b97STreehugger Robot ///
805*16467b97STreehugger Robot /// Consider grammar:
806*16467b97STreehugger Robot ///
807*16467b97STreehugger Robot /// a : '[' b ']'
808*16467b97STreehugger Robot ///   | '(' b ')'
809*16467b97STreehugger Robot ///   ;
810*16467b97STreehugger Robot /// b : c '^' INT ;
811*16467b97STreehugger Robot /// c : ID
812*16467b97STreehugger Robot ///   | INT
813*16467b97STreehugger Robot ///   ;
814*16467b97STreehugger Robot ///
815*16467b97STreehugger Robot /// At each rule invocation, the set of tokens that could follow
816*16467b97STreehugger Robot /// that rule is pushed on a stack.  Here are the various "local"
817*16467b97STreehugger Robot /// follow sets:
818*16467b97STreehugger Robot ///
819*16467b97STreehugger Robot /// FOLLOW(b1_in_a) = FIRST(']') = ']'
820*16467b97STreehugger Robot /// FOLLOW(b2_in_a) = FIRST(')') = ')'
821*16467b97STreehugger Robot /// FOLLOW(c_in_b) = FIRST('^') = '^'
822*16467b97STreehugger Robot ///
823*16467b97STreehugger Robot /// Upon erroneous input "[]", the call chain is
824*16467b97STreehugger Robot ///
825*16467b97STreehugger Robot /// a -> b -> c
826*16467b97STreehugger Robot ///
827*16467b97STreehugger Robot /// and, hence, the follow context stack is:
828*16467b97STreehugger Robot ///
829*16467b97STreehugger Robot /// depth  local follow set     after call to rule
830*16467b97STreehugger Robot ///   0         <EOF>                    a (from main())
831*16467b97STreehugger Robot ///   1          ']'                     b
832*16467b97STreehugger Robot ///   3          '^'                     c
833*16467b97STreehugger Robot ///
834*16467b97STreehugger Robot /// Notice that ')' is not included, because b would have to have
835*16467b97STreehugger Robot /// been called from a different context in rule a for ')' to be
836*16467b97STreehugger Robot /// included.
837*16467b97STreehugger Robot ///
838*16467b97STreehugger Robot /// For error recovery, we cannot consider FOLLOW(c)
839*16467b97STreehugger Robot /// (context-sensitive or otherwise).  We need the combined set of
840*16467b97STreehugger Robot /// all context-sensitive FOLLOW sets--the set of all tokens that
841*16467b97STreehugger Robot /// could follow any reference in the call chain.  We need to
842*16467b97STreehugger Robot /// resync to one of those tokens.  Note that FOLLOW(c)='^' and if
843*16467b97STreehugger Robot /// we resync'd to that token, we'd consume until EOF.  We need to
844*16467b97STreehugger Robot /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
845*16467b97STreehugger Robot /// In this case, for input "[]", LA(1) is in this set so we would
846*16467b97STreehugger Robot /// not consume anything and after printing an error rule c would
847*16467b97STreehugger Robot /// return normally.  It would not find the required '^' though.
848*16467b97STreehugger Robot /// At this point, it gets a mismatched token error and throws an
849*16467b97STreehugger Robot /// exception (since LA(1) is not in the viable following token
850*16467b97STreehugger Robot /// set).  The rule exception handler tries to recover, but finds
851*16467b97STreehugger Robot /// the same recovery set and doesn't consume anything.  Rule b
852*16467b97STreehugger Robot /// exits normally returning to rule a.  Now it finds the ']' (and
853*16467b97STreehugger Robot /// with the successful match exits errorRecovery mode).
854*16467b97STreehugger Robot ///
855*16467b97STreehugger Robot /// So, you can see that the parser walks up call chain looking
856*16467b97STreehugger Robot /// for the token that was a member of the recovery set.
857*16467b97STreehugger Robot ///
858*16467b97STreehugger Robot /// Errors are not generated in errorRecovery mode.
859*16467b97STreehugger Robot ///
860*16467b97STreehugger Robot /// ANTLR's error recovery mechanism is based upon original ideas:
861*16467b97STreehugger Robot ///
862*16467b97STreehugger Robot /// "Algorithms + Data Structures = Programs" by Niklaus Wirth
863*16467b97STreehugger Robot ///
864*16467b97STreehugger Robot /// and
865*16467b97STreehugger Robot ///
866*16467b97STreehugger Robot /// "A note on error recovery in recursive descent parsers":
867*16467b97STreehugger Robot /// http://portal.acm.org/citation.cfm?id=947902.947905
868*16467b97STreehugger Robot ///
869*16467b97STreehugger Robot /// Later, Josef Grosch had some good ideas:
870*16467b97STreehugger Robot ///
871*16467b97STreehugger Robot /// "Efficient and Comfortable Error Recovery in Recursive Descent
872*16467b97STreehugger Robot /// Parsers":
873*16467b97STreehugger Robot /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
874*16467b97STreehugger Robot ///
875*16467b97STreehugger Robot /// Like Grosch I implemented local FOLLOW sets that are combined
876*16467b97STreehugger Robot /// at run-time upon error to avoid overhead during parsing.
877*16467b97STreehugger Robot ///
878*16467b97STreehugger Robot static pANTLR3_BITSET
computeErrorRecoverySet(pANTLR3_BASE_RECOGNIZER recognizer)879*16467b97STreehugger Robot computeErrorRecoverySet	    (pANTLR3_BASE_RECOGNIZER recognizer)
880*16467b97STreehugger Robot {
881*16467b97STreehugger Robot     return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);
882*16467b97STreehugger Robot }
883*16467b97STreehugger Robot 
884*16467b97STreehugger Robot /// Compute the context-sensitive FOLLOW set for current rule.
885*16467b97STreehugger Robot /// Documentation below is from the Java runtime.
886*16467b97STreehugger Robot ///
887*16467b97STreehugger Robot /// This is the set of token types that can follow a specific rule
888*16467b97STreehugger Robot /// reference given a specific call chain.  You get the set of
889*16467b97STreehugger Robot /// viable tokens that can possibly come next (look ahead depth 1)
890*16467b97STreehugger Robot /// given the current call chain.  Contrast this with the
891*16467b97STreehugger Robot /// definition of plain FOLLOW for rule r:
892*16467b97STreehugger Robot ///
893*16467b97STreehugger Robot ///  FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
894*16467b97STreehugger Robot ///
895*16467b97STreehugger Robot /// where x in T* and alpha, beta in V*; T is set of terminals and
896*16467b97STreehugger Robot /// V is the set of terminals and non terminals.  In other words,
897*16467b97STreehugger Robot /// FOLLOW(r) is the set of all tokens that can possibly follow
898*16467b97STreehugger Robot /// references to r in///any* sentential form (context).  At
899*16467b97STreehugger Robot /// runtime, however, we know precisely which context applies as
900*16467b97STreehugger Robot /// we have the call chain.  We may compute the exact (rather
901*16467b97STreehugger Robot /// than covering superset) set of following tokens.
902*16467b97STreehugger Robot ///
903*16467b97STreehugger Robot /// For example, consider grammar:
904*16467b97STreehugger Robot ///
905*16467b97STreehugger Robot /// stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
906*16467b97STreehugger Robot ///      | "return" expr '.'
907*16467b97STreehugger Robot ///      ;
908*16467b97STreehugger Robot /// expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
909*16467b97STreehugger Robot /// atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
910*16467b97STreehugger Robot ///      | '(' expr ')'
911*16467b97STreehugger Robot ///      ;
912*16467b97STreehugger Robot ///
913*16467b97STreehugger Robot /// The FOLLOW sets are all inclusive whereas context-sensitive
914*16467b97STreehugger Robot /// FOLLOW sets are precisely what could follow a rule reference.
915*16467b97STreehugger Robot /// For input input "i=(3);", here is the derivation:
916*16467b97STreehugger Robot ///
917*16467b97STreehugger Robot /// stat => ID '=' expr ';'
918*16467b97STreehugger Robot ///      => ID '=' atom ('+' atom)* ';'
919*16467b97STreehugger Robot ///      => ID '=' '(' expr ')' ('+' atom)* ';'
920*16467b97STreehugger Robot ///      => ID '=' '(' atom ')' ('+' atom)* ';'
921*16467b97STreehugger Robot ///      => ID '=' '(' INT ')' ('+' atom)* ';'
922*16467b97STreehugger Robot ///      => ID '=' '(' INT ')' ';'
923*16467b97STreehugger Robot ///
924*16467b97STreehugger Robot /// At the "3" token, you'd have a call chain of
925*16467b97STreehugger Robot ///
926*16467b97STreehugger Robot ///   stat -> expr -> atom -> expr -> atom
927*16467b97STreehugger Robot ///
928*16467b97STreehugger Robot /// What can follow that specific nested ref to atom?  Exactly ')'
929*16467b97STreehugger Robot /// as you can see by looking at the derivation of this specific
930*16467b97STreehugger Robot /// input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
931*16467b97STreehugger Robot ///
932*16467b97STreehugger Robot /// You want the exact viable token set when recovering from a
933*16467b97STreehugger Robot /// token mismatch.  Upon token mismatch, if LA(1) is member of
934*16467b97STreehugger Robot /// the viable next token set, then you know there is most likely
935*16467b97STreehugger Robot /// a missing token in the input stream.  "Insert" one by just not
936*16467b97STreehugger Robot /// throwing an exception.
937*16467b97STreehugger Robot ///
938*16467b97STreehugger Robot static pANTLR3_BITSET
computeCSRuleFollow(pANTLR3_BASE_RECOGNIZER recognizer)939*16467b97STreehugger Robot computeCSRuleFollow	    (pANTLR3_BASE_RECOGNIZER recognizer)
940*16467b97STreehugger Robot {
941*16467b97STreehugger Robot     return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);
942*16467b97STreehugger Robot }
943*16467b97STreehugger Robot 
944*16467b97STreehugger Robot /// Compute the current followset for the input stream.
945*16467b97STreehugger Robot ///
946*16467b97STreehugger Robot static pANTLR3_BITSET
combineFollows(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_BOOLEAN exact)947*16467b97STreehugger Robot combineFollows		    (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact)
948*16467b97STreehugger Robot {
949*16467b97STreehugger Robot     pANTLR3_BITSET	followSet;
950*16467b97STreehugger Robot     pANTLR3_BITSET	localFollowSet;
951*16467b97STreehugger Robot     ANTLR3_UINT32	top;
952*16467b97STreehugger Robot     ANTLR3_UINT32	i;
953*16467b97STreehugger Robot 
954*16467b97STreehugger Robot     top	= recognizer->state->following->size(recognizer->state->following);
955*16467b97STreehugger Robot 
956*16467b97STreehugger Robot     followSet	    = antlr3BitsetNew(0);
957*16467b97STreehugger Robot 	localFollowSet	= NULL;
958*16467b97STreehugger Robot 
959*16467b97STreehugger Robot     for (i = top; i>0; i--)
960*16467b97STreehugger Robot     {
961*16467b97STreehugger Robot 		localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) recognizer->state->following->get(recognizer->state->following, i-1));
962*16467b97STreehugger Robot 
963*16467b97STreehugger Robot 		if  (localFollowSet != NULL)
964*16467b97STreehugger Robot 		{
965*16467b97STreehugger Robot 			followSet->borInPlace(followSet, localFollowSet);
966*16467b97STreehugger Robot 
967*16467b97STreehugger Robot 			if	(exact == ANTLR3_TRUE)
968*16467b97STreehugger Robot 			{
969*16467b97STreehugger Robot 				if	(localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE)
970*16467b97STreehugger Robot 				{
971*16467b97STreehugger Robot 					// Only leave EOR in the set if at top (start rule); this lets us know
972*16467b97STreehugger Robot 					// if we have to include the follow(start rule); I.E., EOF
973*16467b97STreehugger Robot 					//
974*16467b97STreehugger Robot 					if	(i>1)
975*16467b97STreehugger Robot 					{
976*16467b97STreehugger Robot 						followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE);
977*16467b97STreehugger Robot 					}
978*16467b97STreehugger Robot 				}
979*16467b97STreehugger Robot 				else
980*16467b97STreehugger Robot 				{
981*16467b97STreehugger Robot 					break;	// Cannot see End Of Rule from here, just drop out
982*16467b97STreehugger Robot 				}
983*16467b97STreehugger Robot 			}
984*16467b97STreehugger Robot 			localFollowSet->free(localFollowSet);
985*16467b97STreehugger Robot 			localFollowSet = NULL;
986*16467b97STreehugger Robot 		}
987*16467b97STreehugger Robot     }
988*16467b97STreehugger Robot 
989*16467b97STreehugger Robot 	if	(localFollowSet != NULL)
990*16467b97STreehugger Robot 	{
991*16467b97STreehugger Robot 		localFollowSet->free(localFollowSet);
992*16467b97STreehugger Robot 	}
993*16467b97STreehugger Robot     return  followSet;
994*16467b97STreehugger Robot }
995*16467b97STreehugger Robot 
996*16467b97STreehugger Robot /// Standard/Example error display method.
997*16467b97STreehugger Robot /// No generic error message display funciton coudl possibly do everything correctly
998*16467b97STreehugger Robot /// for all possible parsers. Hence you are provided with this example routine, which
999*16467b97STreehugger Robot /// you should override in your parser/tree parser to do as you will.
1000*16467b97STreehugger Robot ///
1001*16467b97STreehugger Robot /// Here we depart somewhat from the Java runtime as that has now split up a lot
1002*16467b97STreehugger Robot /// of the error display routines into spearate units. However, ther is little advantage
1003*16467b97STreehugger Robot /// to this in the C version as you will probably implement all such routines as a
1004*16467b97STreehugger Robot /// separate translation unit, rather than install them all as pointers to functions
1005*16467b97STreehugger Robot /// in the base recognizer.
1006*16467b97STreehugger Robot ///
1007*16467b97STreehugger Robot static void
displayRecognitionError(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_UINT8 * tokenNames)1008*16467b97STreehugger Robot displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
1009*16467b97STreehugger Robot {
1010*16467b97STreehugger Robot 	pANTLR3_PARSER			parser;
1011*16467b97STreehugger Robot 	pANTLR3_TREE_PARSER	    tparser;
1012*16467b97STreehugger Robot 	pANTLR3_INT_STREAM	    is;
1013*16467b97STreehugger Robot 	pANTLR3_STRING			ttext;
1014*16467b97STreehugger Robot 	pANTLR3_STRING			ftext;
1015*16467b97STreehugger Robot 	pANTLR3_EXCEPTION	    ex;
1016*16467b97STreehugger Robot 	pANTLR3_COMMON_TOKEN    theToken;
1017*16467b97STreehugger Robot 	pANTLR3_BASE_TREE	    theBaseTree;
1018*16467b97STreehugger Robot 	pANTLR3_COMMON_TREE	    theCommonTree;
1019*16467b97STreehugger Robot 
1020*16467b97STreehugger Robot 	// Retrieve some info for easy reading.
1021*16467b97STreehugger Robot 	//
1022*16467b97STreehugger Robot 	ex	    =		recognizer->state->exception;
1023*16467b97STreehugger Robot 	ttext   =		NULL;
1024*16467b97STreehugger Robot 
1025*16467b97STreehugger Robot 	// See if there is a 'filename' we can use
1026*16467b97STreehugger Robot 	//
1027*16467b97STreehugger Robot 	if	(ex->streamName == NULL)
1028*16467b97STreehugger Robot 	{
1029*16467b97STreehugger Robot 		if	(((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
1030*16467b97STreehugger Robot 		{
1031*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "-end of input-(");
1032*16467b97STreehugger Robot 		}
1033*16467b97STreehugger Robot 		else
1034*16467b97STreehugger Robot 		{
1035*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "-unknown source-(");
1036*16467b97STreehugger Robot 		}
1037*16467b97STreehugger Robot 	}
1038*16467b97STreehugger Robot 	else
1039*16467b97STreehugger Robot 	{
1040*16467b97STreehugger Robot 		ftext = ex->streamName->to8(ex->streamName);
1041*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
1042*16467b97STreehugger Robot 	}
1043*16467b97STreehugger Robot 
1044*16467b97STreehugger Robot 	// Next comes the line number
1045*16467b97STreehugger Robot 	//
1046*16467b97STreehugger Robot 
1047*16467b97STreehugger Robot 	ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
1048*16467b97STreehugger Robot 	ANTLR3_FPRINTF(stderr, " : error %d : %s",
1049*16467b97STreehugger Robot 										recognizer->state->exception->type,
1050*16467b97STreehugger Robot 					(pANTLR3_UINT8)	   (recognizer->state->exception->message));
1051*16467b97STreehugger Robot 
1052*16467b97STreehugger Robot 
1053*16467b97STreehugger Robot 	// How we determine the next piece is dependent on which thing raised the
1054*16467b97STreehugger Robot 	// error.
1055*16467b97STreehugger Robot 	//
1056*16467b97STreehugger Robot 	switch	(recognizer->type)
1057*16467b97STreehugger Robot 	{
1058*16467b97STreehugger Robot 	case	ANTLR3_TYPE_PARSER:
1059*16467b97STreehugger Robot 
1060*16467b97STreehugger Robot 		// Prepare the knowledge we know we have
1061*16467b97STreehugger Robot 		//
1062*16467b97STreehugger Robot 		parser	    = (pANTLR3_PARSER) (recognizer->super);
1063*16467b97STreehugger Robot 		tparser	    = NULL;
1064*16467b97STreehugger Robot 		is			= parser->tstream->istream;
1065*16467b97STreehugger Robot 		theToken    = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
1066*16467b97STreehugger Robot 		ttext	    = theToken->toString(theToken);
1067*16467b97STreehugger Robot 
1068*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine);
1069*16467b97STreehugger Robot 		if  (theToken != NULL)
1070*16467b97STreehugger Robot 		{
1071*16467b97STreehugger Robot 			if (theToken->type == ANTLR3_TOKEN_EOF)
1072*16467b97STreehugger Robot 			{
1073*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, ", at <EOF>");
1074*16467b97STreehugger Robot 			}
1075*16467b97STreehugger Robot 			else
1076*16467b97STreehugger Robot 			{
1077*16467b97STreehugger Robot 				// Guard against null text in a token
1078*16467b97STreehugger Robot 				//
1079*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, "\n    near %s\n    ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
1080*16467b97STreehugger Robot 			}
1081*16467b97STreehugger Robot 		}
1082*16467b97STreehugger Robot 		break;
1083*16467b97STreehugger Robot 
1084*16467b97STreehugger Robot 	case	ANTLR3_TYPE_TREE_PARSER:
1085*16467b97STreehugger Robot 
1086*16467b97STreehugger Robot 		tparser		= (pANTLR3_TREE_PARSER) (recognizer->super);
1087*16467b97STreehugger Robot 		parser		= NULL;
1088*16467b97STreehugger Robot 		is			= tparser->ctnstream->tnstream->istream;
1089*16467b97STreehugger Robot 		theBaseTree	= (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
1090*16467b97STreehugger Robot 		ttext		= theBaseTree->toStringTree(theBaseTree);
1091*16467b97STreehugger Robot 
1092*16467b97STreehugger Robot 		if  (theBaseTree != NULL)
1093*16467b97STreehugger Robot 		{
1094*16467b97STreehugger Robot 			theCommonTree	= (pANTLR3_COMMON_TREE)	    theBaseTree->super;
1095*16467b97STreehugger Robot 
1096*16467b97STreehugger Robot 			if	(theCommonTree != NULL)
1097*16467b97STreehugger Robot 			{
1098*16467b97STreehugger Robot 				theToken	= (pANTLR3_COMMON_TOKEN)    theBaseTree->getToken(theBaseTree);
1099*16467b97STreehugger Robot 			}
1100*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
1101*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars);
1102*16467b97STreehugger Robot 		}
1103*16467b97STreehugger Robot 		break;
1104*16467b97STreehugger Robot 
1105*16467b97STreehugger Robot 	default:
1106*16467b97STreehugger Robot 
1107*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n");
1108*16467b97STreehugger Robot 		return;
1109*16467b97STreehugger Robot 		break;
1110*16467b97STreehugger Robot 	}
1111*16467b97STreehugger Robot 
1112*16467b97STreehugger Robot 	// Although this function should generally be provided by the implementation, this one
1113*16467b97STreehugger Robot 	// should be as helpful as possible for grammar developers and serve as an example
1114*16467b97STreehugger Robot 	// of what you can do with each exception type. In general, when you make up your
1115*16467b97STreehugger Robot 	// 'real' handler, you should debug the routine with all possible errors you expect
1116*16467b97STreehugger Robot 	// which will then let you be as specific as possible about all circumstances.
1117*16467b97STreehugger Robot 	//
1118*16467b97STreehugger Robot 	// Note that in the general case, errors thrown by tree parsers indicate a problem
1119*16467b97STreehugger Robot 	// with the output of the parser or with the tree grammar itself. The job of the parser
1120*16467b97STreehugger Robot 	// is to produce a perfect (in traversal terms) syntactically correct tree, so errors
1121*16467b97STreehugger Robot 	// at that stage should really be semantic errors that your own code determines and handles
1122*16467b97STreehugger Robot 	// in whatever way is appropriate.
1123*16467b97STreehugger Robot 	//
1124*16467b97STreehugger Robot 	switch  (ex->type)
1125*16467b97STreehugger Robot 	{
1126*16467b97STreehugger Robot 	case	ANTLR3_UNWANTED_TOKEN_EXCEPTION:
1127*16467b97STreehugger Robot 
1128*16467b97STreehugger Robot 		// Indicates that the recognizer was fed a token which seesm to be
1129*16467b97STreehugger Robot 		// spurious input. We can detect this when the token that follows
1130*16467b97STreehugger Robot 		// this unwanted token would normally be part of the syntactically
1131*16467b97STreehugger Robot 		// correct stream. Then we can see that the token we are looking at
1132*16467b97STreehugger Robot 		// is just something that should not be there and throw this exception.
1133*16467b97STreehugger Robot 		//
1134*16467b97STreehugger Robot 		if	(tokenNames == NULL)
1135*16467b97STreehugger Robot 		{
1136*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, " : Extraneous input...");
1137*16467b97STreehugger Robot 		}
1138*16467b97STreehugger Robot 		else
1139*16467b97STreehugger Robot 		{
1140*16467b97STreehugger Robot 			if	(ex->expecting == ANTLR3_TOKEN_EOF)
1141*16467b97STreehugger Robot 			{
1142*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
1143*16467b97STreehugger Robot 			}
1144*16467b97STreehugger Robot 			else
1145*16467b97STreehugger Robot 			{
1146*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
1147*16467b97STreehugger Robot 			}
1148*16467b97STreehugger Robot 		}
1149*16467b97STreehugger Robot 		break;
1150*16467b97STreehugger Robot 
1151*16467b97STreehugger Robot 	case	ANTLR3_MISSING_TOKEN_EXCEPTION:
1152*16467b97STreehugger Robot 
1153*16467b97STreehugger Robot 		// Indicates that the recognizer detected that the token we just
1154*16467b97STreehugger Robot 		// hit would be valid syntactically if preceeded by a particular
1155*16467b97STreehugger Robot 		// token. Perhaps a missing ';' at line end or a missing ',' in an
1156*16467b97STreehugger Robot 		// expression list, and such like.
1157*16467b97STreehugger Robot 		//
1158*16467b97STreehugger Robot 		if	(tokenNames == NULL)
1159*16467b97STreehugger Robot 		{
1160*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting);
1161*16467b97STreehugger Robot 		}
1162*16467b97STreehugger Robot 		else
1163*16467b97STreehugger Robot 		{
1164*16467b97STreehugger Robot 			if	(ex->expecting == ANTLR3_TOKEN_EOF)
1165*16467b97STreehugger Robot 			{
1166*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n");
1167*16467b97STreehugger Robot 			}
1168*16467b97STreehugger Robot 			else
1169*16467b97STreehugger Robot 			{
1170*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]);
1171*16467b97STreehugger Robot 			}
1172*16467b97STreehugger Robot 		}
1173*16467b97STreehugger Robot 		break;
1174*16467b97STreehugger Robot 
1175*16467b97STreehugger Robot 	case	ANTLR3_RECOGNITION_EXCEPTION:
1176*16467b97STreehugger Robot 
1177*16467b97STreehugger Robot 		// Indicates that the recognizer received a token
1178*16467b97STreehugger Robot 		// in the input that was not predicted. This is the basic exception type
1179*16467b97STreehugger Robot 		// from which all others are derived. So we assume it was a syntax error.
1180*16467b97STreehugger Robot 		// You may get this if there are not more tokens and more are needed
1181*16467b97STreehugger Robot 		// to complete a parse for instance.
1182*16467b97STreehugger Robot 		//
1183*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1184*16467b97STreehugger Robot 		break;
1185*16467b97STreehugger Robot 
1186*16467b97STreehugger Robot 	case    ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
1187*16467b97STreehugger Robot 
1188*16467b97STreehugger Robot 		// We were expecting to see one thing and got another. This is the
1189*16467b97STreehugger Robot 		// most common error if we coudl not detect a missing or unwanted token.
1190*16467b97STreehugger Robot 		// Here you can spend your efforts to
1191*16467b97STreehugger Robot 		// derive more useful error messages based on the expected
1192*16467b97STreehugger Robot 		// token set and the last token and so on. The error following
1193*16467b97STreehugger Robot 		// bitmaps do a good job of reducing the set that we were looking
1194*16467b97STreehugger Robot 		// for down to something small. Knowing what you are parsing may be
1195*16467b97STreehugger Robot 		// able to allow you to be even more specific about an error.
1196*16467b97STreehugger Robot 		//
1197*16467b97STreehugger Robot 		if	(tokenNames == NULL)
1198*16467b97STreehugger Robot 		{
1199*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1200*16467b97STreehugger Robot 		}
1201*16467b97STreehugger Robot 		else
1202*16467b97STreehugger Robot 		{
1203*16467b97STreehugger Robot 			if	(ex->expecting == ANTLR3_TOKEN_EOF)
1204*16467b97STreehugger Robot 			{
1205*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
1206*16467b97STreehugger Robot 			}
1207*16467b97STreehugger Robot 			else
1208*16467b97STreehugger Robot 			{
1209*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
1210*16467b97STreehugger Robot 			}
1211*16467b97STreehugger Robot 		}
1212*16467b97STreehugger Robot 		break;
1213*16467b97STreehugger Robot 
1214*16467b97STreehugger Robot 	case	ANTLR3_NO_VIABLE_ALT_EXCEPTION:
1215*16467b97STreehugger Robot 
1216*16467b97STreehugger Robot 		// We could not pick any alt decision from the input given
1217*16467b97STreehugger Robot 		// so god knows what happened - however when you examine your grammar,
1218*16467b97STreehugger Robot 		// you should. It means that at the point where the current token occurred
1219*16467b97STreehugger Robot 		// that the DFA indicates nowhere to go from here.
1220*16467b97STreehugger Robot 		//
1221*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
1222*16467b97STreehugger Robot 
1223*16467b97STreehugger Robot 		break;
1224*16467b97STreehugger Robot 
1225*16467b97STreehugger Robot 	case	ANTLR3_MISMATCHED_SET_EXCEPTION:
1226*16467b97STreehugger Robot 
1227*16467b97STreehugger Robot 		{
1228*16467b97STreehugger Robot 			ANTLR3_UINT32	  count;
1229*16467b97STreehugger Robot 			ANTLR3_UINT32	  bit;
1230*16467b97STreehugger Robot 			ANTLR3_UINT32	  size;
1231*16467b97STreehugger Robot 			ANTLR3_UINT32	  numbits;
1232*16467b97STreehugger Robot 			pANTLR3_BITSET	  errBits;
1233*16467b97STreehugger Robot 
1234*16467b97STreehugger Robot 			// This means we were able to deal with one of a set of
1235*16467b97STreehugger Robot 			// possible tokens at this point, but we did not see any
1236*16467b97STreehugger Robot 			// member of that set.
1237*16467b97STreehugger Robot 			//
1238*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, " : unexpected input...\n  expected one of : ");
1239*16467b97STreehugger Robot 
1240*16467b97STreehugger Robot 			// What tokens could we have accepted at this point in the
1241*16467b97STreehugger Robot 			// parse?
1242*16467b97STreehugger Robot 			//
1243*16467b97STreehugger Robot 			count   = 0;
1244*16467b97STreehugger Robot 			errBits = antlr3BitsetLoad		(ex->expectingSet);
1245*16467b97STreehugger Robot 			numbits = errBits->numBits		(errBits);
1246*16467b97STreehugger Robot 			size    = errBits->size			(errBits);
1247*16467b97STreehugger Robot 
1248*16467b97STreehugger Robot 			if  (size > 0)
1249*16467b97STreehugger Robot 			{
1250*16467b97STreehugger Robot 				// However many tokens we could have dealt with here, it is usually
1251*16467b97STreehugger Robot 				// not useful to print ALL of the set here. I arbitrarily chose 8
1252*16467b97STreehugger Robot 				// here, but you should do whatever makes sense for you of course.
1253*16467b97STreehugger Robot 				// No token number 0, so look for bit 1 and on.
1254*16467b97STreehugger Robot 				//
1255*16467b97STreehugger Robot 				for	(bit = 1; bit < numbits && count < 8 && count < size; bit++)
1256*16467b97STreehugger Robot 				{
1257*16467b97STreehugger Robot 					// TODO: This doesn;t look right - should be asking if the bit is set!!
1258*16467b97STreehugger Robot 					//
1259*16467b97STreehugger Robot 					if  (tokenNames[bit])
1260*16467b97STreehugger Robot 					{
1261*16467b97STreehugger Robot 						ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]);
1262*16467b97STreehugger Robot 						count++;
1263*16467b97STreehugger Robot 					}
1264*16467b97STreehugger Robot 				}
1265*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, "\n");
1266*16467b97STreehugger Robot 			}
1267*16467b97STreehugger Robot 			else
1268*16467b97STreehugger Robot 			{
1269*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
1270*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
1271*16467b97STreehugger Robot 			}
1272*16467b97STreehugger Robot 		}
1273*16467b97STreehugger Robot 		break;
1274*16467b97STreehugger Robot 
1275*16467b97STreehugger Robot 	case	ANTLR3_EARLY_EXIT_EXCEPTION:
1276*16467b97STreehugger Robot 
1277*16467b97STreehugger Robot 		// We entered a loop requiring a number of token sequences
1278*16467b97STreehugger Robot 		// but found a token that ended that sequence earlier than
1279*16467b97STreehugger Robot 		// we should have done.
1280*16467b97STreehugger Robot 		//
1281*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, " : missing elements...\n");
1282*16467b97STreehugger Robot 		break;
1283*16467b97STreehugger Robot 
1284*16467b97STreehugger Robot 	default:
1285*16467b97STreehugger Robot 
1286*16467b97STreehugger Robot 		// We don't handle any other exceptions here, but you can
1287*16467b97STreehugger Robot 		// if you wish. If we get an exception that hits this point
1288*16467b97STreehugger Robot 		// then we are just going to report what we know about the
1289*16467b97STreehugger Robot 		// token.
1290*16467b97STreehugger Robot 		//
1291*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
1292*16467b97STreehugger Robot 		break;
1293*16467b97STreehugger Robot 	}
1294*16467b97STreehugger Robot 
1295*16467b97STreehugger Robot 	// Here you have the token that was in error which if this is
1296*16467b97STreehugger Robot 	// the standard implementation will tell you the line and offset
1297*16467b97STreehugger Robot 	// and also record the address of the start of the line in the
1298*16467b97STreehugger Robot 	// input stream. You could therefore print the source line and so on.
1299*16467b97STreehugger Robot 	// Generally though, I would expect that your lexer/parser will keep
1300*16467b97STreehugger Robot 	// its own map of lines and source pointers or whatever as there
1301*16467b97STreehugger Robot 	// are a lot of specific things you need to know about the input
1302*16467b97STreehugger Robot 	// to do something like that.
1303*16467b97STreehugger Robot 	// Here is where you do it though :-).
1304*16467b97STreehugger Robot 	//
1305*16467b97STreehugger Robot }
1306*16467b97STreehugger Robot 
1307*16467b97STreehugger Robot /// Return how many syntax errors were detected by this recognizer
1308*16467b97STreehugger Robot ///
1309*16467b97STreehugger Robot static ANTLR3_UINT32
getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)1310*16467b97STreehugger Robot getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)
1311*16467b97STreehugger Robot {
1312*16467b97STreehugger Robot 	return	recognizer->state->errorCount;
1313*16467b97STreehugger Robot }
1314*16467b97STreehugger Robot 
1315*16467b97STreehugger Robot /// Recover from an error found on the input stream.  Mostly this is
1316*16467b97STreehugger Robot /// NoViableAlt exceptions, but could be a mismatched token that
1317*16467b97STreehugger Robot /// the match() routine could not recover from.
1318*16467b97STreehugger Robot ///
1319*16467b97STreehugger Robot static void
recover(pANTLR3_BASE_RECOGNIZER recognizer)1320*16467b97STreehugger Robot recover			    (pANTLR3_BASE_RECOGNIZER recognizer)
1321*16467b97STreehugger Robot {
1322*16467b97STreehugger Robot     // Used to compute the follow set of tokens
1323*16467b97STreehugger Robot     //
1324*16467b97STreehugger Robot     pANTLR3_BITSET			followSet;
1325*16467b97STreehugger Robot     pANTLR3_PARSER			parser;
1326*16467b97STreehugger Robot     pANTLR3_TREE_PARSER	    tparser;
1327*16467b97STreehugger Robot     pANTLR3_INT_STREAM	    is;
1328*16467b97STreehugger Robot 
1329*16467b97STreehugger Robot     switch	(recognizer->type)
1330*16467b97STreehugger Robot     {
1331*16467b97STreehugger Robot 		case	ANTLR3_TYPE_PARSER:
1332*16467b97STreehugger Robot 
1333*16467b97STreehugger Robot 		parser  = (pANTLR3_PARSER) (recognizer->super);
1334*16467b97STreehugger Robot 		tparser	= NULL;
1335*16467b97STreehugger Robot 		is		= parser->tstream->istream;
1336*16467b97STreehugger Robot 
1337*16467b97STreehugger Robot 	break;
1338*16467b97STreehugger Robot 
1339*16467b97STreehugger Robot     case	ANTLR3_TYPE_TREE_PARSER:
1340*16467b97STreehugger Robot 
1341*16467b97STreehugger Robot 		tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1342*16467b97STreehugger Robot 		parser	= NULL;
1343*16467b97STreehugger Robot 		is		= tparser->ctnstream->tnstream->istream;
1344*16467b97STreehugger Robot 
1345*16467b97STreehugger Robot 	break;
1346*16467b97STreehugger Robot 
1347*16467b97STreehugger Robot     default:
1348*16467b97STreehugger Robot 
1349*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1350*16467b97STreehugger Robot 		return;
1351*16467b97STreehugger Robot 
1352*16467b97STreehugger Robot 	break;
1353*16467b97STreehugger Robot     }
1354*16467b97STreehugger Robot 
1355*16467b97STreehugger Robot 	// Are we about to repeat the same error?
1356*16467b97STreehugger Robot 	//
1357*16467b97STreehugger Robot     if	(recognizer->state->lastErrorIndex == is->index(is))
1358*16467b97STreehugger Robot     {
1359*16467b97STreehugger Robot 		// The last error was at the same token index point. This must be a case
1360*16467b97STreehugger Robot 		// where LT(1) is in the recovery token set so nothing is
1361*16467b97STreehugger Robot 		// consumed. Consume a single token so at least to prevent
1362*16467b97STreehugger Robot 		// an infinite loop; this is a failsafe.
1363*16467b97STreehugger Robot 		//
1364*16467b97STreehugger Robot 		is->consume(is);
1365*16467b97STreehugger Robot     }
1366*16467b97STreehugger Robot 
1367*16467b97STreehugger Robot     // Record error index position
1368*16467b97STreehugger Robot     //
1369*16467b97STreehugger Robot     recognizer->state->lastErrorIndex	 = is->index(is);
1370*16467b97STreehugger Robot 
1371*16467b97STreehugger Robot     // Work out the follows set for error recovery
1372*16467b97STreehugger Robot     //
1373*16467b97STreehugger Robot     followSet	= recognizer->computeErrorRecoverySet(recognizer);
1374*16467b97STreehugger Robot 
1375*16467b97STreehugger Robot     // Call resync hook (for debuggers and so on)
1376*16467b97STreehugger Robot     //
1377*16467b97STreehugger Robot     recognizer->beginResync(recognizer);
1378*16467b97STreehugger Robot 
1379*16467b97STreehugger Robot     // Consume tokens until we have resynced to something in the follows set
1380*16467b97STreehugger Robot     //
1381*16467b97STreehugger Robot     recognizer->consumeUntilSet(recognizer, followSet);
1382*16467b97STreehugger Robot 
1383*16467b97STreehugger Robot     // End resync hook
1384*16467b97STreehugger Robot     //
1385*16467b97STreehugger Robot     recognizer->endResync(recognizer);
1386*16467b97STreehugger Robot 
1387*16467b97STreehugger Robot     // Destroy the temporary bitset we produced.
1388*16467b97STreehugger Robot     //
1389*16467b97STreehugger Robot     followSet->free(followSet);
1390*16467b97STreehugger Robot 
1391*16467b97STreehugger Robot     // Reset the inError flag so we don't re-report the exception
1392*16467b97STreehugger Robot     //
1393*16467b97STreehugger Robot     recognizer->state->error	= ANTLR3_FALSE;
1394*16467b97STreehugger Robot     recognizer->state->failed	= ANTLR3_FALSE;
1395*16467b97STreehugger Robot }
1396*16467b97STreehugger Robot 
1397*16467b97STreehugger Robot 
1398*16467b97STreehugger Robot /// Attempt to recover from a single missing or extra token.
1399*16467b97STreehugger Robot ///
1400*16467b97STreehugger Robot /// EXTRA TOKEN
1401*16467b97STreehugger Robot ///
1402*16467b97STreehugger Robot /// LA(1) is not what we are looking for.  If LA(2) has the right token,
1403*16467b97STreehugger Robot /// however, then assume LA(1) is some extra spurious token.  Delete it
1404*16467b97STreehugger Robot /// and LA(2) as if we were doing a normal match(), which advances the
1405*16467b97STreehugger Robot /// input.
1406*16467b97STreehugger Robot ///
1407*16467b97STreehugger Robot /// MISSING TOKEN
1408*16467b97STreehugger Robot ///
1409*16467b97STreehugger Robot /// If current token is consistent with what could come after
1410*16467b97STreehugger Robot /// ttype then it is ok to "insert" the missing token, else throw
1411*16467b97STreehugger Robot /// exception For example, Input "i=(3;" is clearly missing the
1412*16467b97STreehugger Robot /// ')'.  When the parser returns from the nested call to expr, it
1413*16467b97STreehugger Robot /// will have call chain:
1414*16467b97STreehugger Robot ///
1415*16467b97STreehugger Robot ///    stat -> expr -> atom
1416*16467b97STreehugger Robot ///
1417*16467b97STreehugger Robot /// and it will be trying to match the ')' at this point in the
1418*16467b97STreehugger Robot /// derivation:
1419*16467b97STreehugger Robot ///
1420*16467b97STreehugger Robot ///       => ID '=' '(' INT ')' ('+' atom)* ';'
1421*16467b97STreehugger Robot ///                          ^
1422*16467b97STreehugger Robot /// match() will see that ';' doesn't match ')' and report a
1423*16467b97STreehugger Robot /// mismatched token error.  To recover, it sees that LA(1)==';'
1424*16467b97STreehugger Robot /// is in the set of tokens that can follow the ')' token
1425*16467b97STreehugger Robot /// reference in rule atom.  It can assume that you forgot the ')'.
1426*16467b97STreehugger Robot ///
1427*16467b97STreehugger Robot /// The exception that was passed in, in the java implementation is
1428*16467b97STreehugger Robot /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the
1429*16467b97STreehugger Robot /// error flag and rules cascade back when this is set.
1430*16467b97STreehugger Robot ///
1431*16467b97STreehugger Robot static void *
recoverFromMismatchedToken(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 ttype,pANTLR3_BITSET_LIST follow)1432*16467b97STreehugger Robot recoverFromMismatchedToken  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
1433*16467b97STreehugger Robot {
1434*16467b97STreehugger Robot 	pANTLR3_PARSER			  parser;
1435*16467b97STreehugger Robot 	pANTLR3_TREE_PARSER	      tparser;
1436*16467b97STreehugger Robot 	pANTLR3_INT_STREAM	      is;
1437*16467b97STreehugger Robot 	void					* matchedSymbol;
1438*16467b97STreehugger Robot 
1439*16467b97STreehugger Robot 
1440*16467b97STreehugger Robot 
1441*16467b97STreehugger Robot 	switch	(recognizer->type)
1442*16467b97STreehugger Robot 	{
1443*16467b97STreehugger Robot 	case	ANTLR3_TYPE_PARSER:
1444*16467b97STreehugger Robot 
1445*16467b97STreehugger Robot 		parser  = (pANTLR3_PARSER) (recognizer->super);
1446*16467b97STreehugger Robot 		tparser	= NULL;
1447*16467b97STreehugger Robot 		is	= parser->tstream->istream;
1448*16467b97STreehugger Robot 
1449*16467b97STreehugger Robot 		break;
1450*16467b97STreehugger Robot 
1451*16467b97STreehugger Robot 	case	ANTLR3_TYPE_TREE_PARSER:
1452*16467b97STreehugger Robot 
1453*16467b97STreehugger Robot 		tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1454*16467b97STreehugger Robot 		parser	= NULL;
1455*16467b97STreehugger Robot 		is	= tparser->ctnstream->tnstream->istream;
1456*16467b97STreehugger Robot 
1457*16467b97STreehugger Robot 		break;
1458*16467b97STreehugger Robot 
1459*16467b97STreehugger Robot 	default:
1460*16467b97STreehugger Robot 
1461*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n");
1462*16467b97STreehugger Robot 		return NULL;
1463*16467b97STreehugger Robot 
1464*16467b97STreehugger Robot 		break;
1465*16467b97STreehugger Robot 	}
1466*16467b97STreehugger Robot 
1467*16467b97STreehugger Robot 	// Create an exception if we need one
1468*16467b97STreehugger Robot 	//
1469*16467b97STreehugger Robot 	if	(recognizer->state->exception == NULL)
1470*16467b97STreehugger Robot 	{
1471*16467b97STreehugger Robot 		antlr3RecognitionExceptionNew(recognizer);
1472*16467b97STreehugger Robot 	}
1473*16467b97STreehugger Robot 
1474*16467b97STreehugger Robot 	// If the next token after the one we are looking at in the input stream
1475*16467b97STreehugger Robot 	// is what we are looking for then we remove the one we have discovered
1476*16467b97STreehugger Robot 	// from the stream by consuming it, then consume this next one along too as
1477*16467b97STreehugger Robot 	// if nothing had happened.
1478*16467b97STreehugger Robot 	//
1479*16467b97STreehugger Robot 	if	( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE)
1480*16467b97STreehugger Robot 	{
1481*16467b97STreehugger Robot 		recognizer->state->exception->type		= ANTLR3_UNWANTED_TOKEN_EXCEPTION;
1482*16467b97STreehugger Robot 		recognizer->state->exception->message	= ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
1483*16467b97STreehugger Robot 
1484*16467b97STreehugger Robot 		// Call resync hook (for debuggers and so on)
1485*16467b97STreehugger Robot 		//
1486*16467b97STreehugger Robot 		if	(recognizer->debugger != NULL)
1487*16467b97STreehugger Robot 		{
1488*16467b97STreehugger Robot 			recognizer->debugger->beginResync(recognizer->debugger);
1489*16467b97STreehugger Robot 		}
1490*16467b97STreehugger Robot 
1491*16467b97STreehugger Robot 		// "delete" the extra token
1492*16467b97STreehugger Robot 		//
1493*16467b97STreehugger Robot 		recognizer->beginResync(recognizer);
1494*16467b97STreehugger Robot 		is->consume(is);
1495*16467b97STreehugger Robot 		recognizer->endResync(recognizer);
1496*16467b97STreehugger Robot 		// End resync hook
1497*16467b97STreehugger Robot 		//
1498*16467b97STreehugger Robot 		if	(recognizer->debugger != NULL)
1499*16467b97STreehugger Robot 		{
1500*16467b97STreehugger Robot 			recognizer->debugger->endResync(recognizer->debugger);
1501*16467b97STreehugger Robot 		}
1502*16467b97STreehugger Robot 
1503*16467b97STreehugger Robot 		// Print out the error after we consume so that ANTLRWorks sees the
1504*16467b97STreehugger Robot 		// token in the exception.
1505*16467b97STreehugger Robot 		//
1506*16467b97STreehugger Robot 		recognizer->reportError(recognizer);
1507*16467b97STreehugger Robot 
1508*16467b97STreehugger Robot 		// Return the token we are actually matching
1509*16467b97STreehugger Robot 		//
1510*16467b97STreehugger Robot 		matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
1511*16467b97STreehugger Robot 
1512*16467b97STreehugger Robot 		// Consume the token that the rule actually expected to get as if everything
1513*16467b97STreehugger Robot 		// was hunky dory.
1514*16467b97STreehugger Robot 		//
1515*16467b97STreehugger Robot 		is->consume(is);
1516*16467b97STreehugger Robot 
1517*16467b97STreehugger Robot 		recognizer->state->error  = ANTLR3_FALSE;	// Exception is not outstanding any more
1518*16467b97STreehugger Robot 
1519*16467b97STreehugger Robot 		return	matchedSymbol;
1520*16467b97STreehugger Robot 	}
1521*16467b97STreehugger Robot 
1522*16467b97STreehugger Robot 	// Single token deletion (Unwanted above) did not work
1523*16467b97STreehugger Robot 	// so we see if we can insert a token instead by calculating which
1524*16467b97STreehugger Robot 	// token would be missing
1525*16467b97STreehugger Robot 	//
1526*16467b97STreehugger Robot 	if	(mismatchIsMissingToken(recognizer, is, follow))
1527*16467b97STreehugger Robot 	{
1528*16467b97STreehugger Robot 		// We can fake the missing token and proceed
1529*16467b97STreehugger Robot 		//
1530*16467b97STreehugger Robot 		matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow);
1531*16467b97STreehugger Robot 		recognizer->state->exception->type		= ANTLR3_MISSING_TOKEN_EXCEPTION;
1532*16467b97STreehugger Robot 		recognizer->state->exception->message	= ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
1533*16467b97STreehugger Robot 		recognizer->state->exception->token		= matchedSymbol;
1534*16467b97STreehugger Robot 		recognizer->state->exception->expecting	= ttype;
1535*16467b97STreehugger Robot 
1536*16467b97STreehugger Robot 		// Print out the error after we insert so that ANTLRWorks sees the
1537*16467b97STreehugger Robot 		// token in the exception.
1538*16467b97STreehugger Robot 		//
1539*16467b97STreehugger Robot 		recognizer->reportError(recognizer);
1540*16467b97STreehugger Robot 
1541*16467b97STreehugger Robot 		recognizer->state->error  = ANTLR3_FALSE;	// Exception is not outstanding any more
1542*16467b97STreehugger Robot 
1543*16467b97STreehugger Robot 		return	matchedSymbol;
1544*16467b97STreehugger Robot 	}
1545*16467b97STreehugger Robot 
1546*16467b97STreehugger Robot 
1547*16467b97STreehugger Robot 	// Neither deleting nor inserting tokens allows recovery
1548*16467b97STreehugger Robot 	// must just report the exception.
1549*16467b97STreehugger Robot 	//
1550*16467b97STreehugger Robot 	recognizer->state->error	    = ANTLR3_TRUE;
1551*16467b97STreehugger Robot 	return NULL;
1552*16467b97STreehugger Robot }
1553*16467b97STreehugger Robot 
1554*16467b97STreehugger Robot static void *
recoverFromMismatchedSet(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_BITSET_LIST follow)1555*16467b97STreehugger Robot recoverFromMismatchedSet	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow)
1556*16467b97STreehugger Robot {
1557*16467b97STreehugger Robot     pANTLR3_PARSER			parser;
1558*16467b97STreehugger Robot     pANTLR3_TREE_PARSER	    tparser;
1559*16467b97STreehugger Robot     pANTLR3_INT_STREAM	    is;
1560*16467b97STreehugger Robot 	pANTLR3_COMMON_TOKEN	matchedSymbol;
1561*16467b97STreehugger Robot 
1562*16467b97STreehugger Robot     switch	(recognizer->type)
1563*16467b97STreehugger Robot     {
1564*16467b97STreehugger Robot     case	ANTLR3_TYPE_PARSER:
1565*16467b97STreehugger Robot 
1566*16467b97STreehugger Robot 		parser  = (pANTLR3_PARSER) (recognizer->super);
1567*16467b97STreehugger Robot 		tparser	= NULL;
1568*16467b97STreehugger Robot 		is	= parser->tstream->istream;
1569*16467b97STreehugger Robot 
1570*16467b97STreehugger Robot 	break;
1571*16467b97STreehugger Robot 
1572*16467b97STreehugger Robot     case	ANTLR3_TYPE_TREE_PARSER:
1573*16467b97STreehugger Robot 
1574*16467b97STreehugger Robot 		tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1575*16467b97STreehugger Robot 		parser	= NULL;
1576*16467b97STreehugger Robot 		is	= tparser->ctnstream->tnstream->istream;
1577*16467b97STreehugger Robot 
1578*16467b97STreehugger Robot 	break;
1579*16467b97STreehugger Robot 
1580*16467b97STreehugger Robot     default:
1581*16467b97STreehugger Robot 
1582*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n");
1583*16467b97STreehugger Robot 		return NULL;
1584*16467b97STreehugger Robot 
1585*16467b97STreehugger Robot 	break;
1586*16467b97STreehugger Robot     }
1587*16467b97STreehugger Robot 
1588*16467b97STreehugger Robot 	if	(recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE)
1589*16467b97STreehugger Robot 	{
1590*16467b97STreehugger Robot 		// We can fake the missing token and proceed
1591*16467b97STreehugger Robot 		//
1592*16467b97STreehugger Robot 		matchedSymbol = (pANTLR3_COMMON_TOKEN)recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow);
1593*16467b97STreehugger Robot 		recognizer->state->exception->type	= ANTLR3_MISSING_TOKEN_EXCEPTION;
1594*16467b97STreehugger Robot 		recognizer->state->exception->token	= matchedSymbol;
1595*16467b97STreehugger Robot 
1596*16467b97STreehugger Robot 		// Print out the error after we insert so that ANTLRWorks sees the
1597*16467b97STreehugger Robot 		// token in the exception.
1598*16467b97STreehugger Robot 		//
1599*16467b97STreehugger Robot 		recognizer->reportError(recognizer);
1600*16467b97STreehugger Robot 
1601*16467b97STreehugger Robot 		recognizer->state->error  = ANTLR3_FALSE;	// Exception is not outstanding any more
1602*16467b97STreehugger Robot 
1603*16467b97STreehugger Robot 		return	matchedSymbol;
1604*16467b97STreehugger Robot 	}
1605*16467b97STreehugger Robot 
1606*16467b97STreehugger Robot     // TODO - Single token deletion like in recoverFromMismatchedToken()
1607*16467b97STreehugger Robot     //
1608*16467b97STreehugger Robot     recognizer->state->error	= ANTLR3_TRUE;
1609*16467b97STreehugger Robot 	recognizer->state->failed	= ANTLR3_TRUE;
1610*16467b97STreehugger Robot 	return NULL;
1611*16467b97STreehugger Robot }
1612*16467b97STreehugger Robot 
1613*16467b97STreehugger Robot /// This code is factored out from mismatched token and mismatched set
1614*16467b97STreehugger Robot ///  recovery.  It handles "single token insertion" error recovery for
1615*16467b97STreehugger Robot /// both.  No tokens are consumed to recover from insertions.  Return
1616*16467b97STreehugger Robot /// true if recovery was possible else return false.
1617*16467b97STreehugger Robot ///
1618*16467b97STreehugger Robot static ANTLR3_BOOLEAN
recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_BITSET_LIST followBits)1619*16467b97STreehugger Robot recoverFromMismatchedElement	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits)
1620*16467b97STreehugger Robot {
1621*16467b97STreehugger Robot     pANTLR3_BITSET	    viableToksFollowingRule;
1622*16467b97STreehugger Robot     pANTLR3_BITSET	    follow;
1623*16467b97STreehugger Robot     pANTLR3_PARSER	    parser;
1624*16467b97STreehugger Robot     pANTLR3_TREE_PARSER	    tparser;
1625*16467b97STreehugger Robot     pANTLR3_INT_STREAM	    is;
1626*16467b97STreehugger Robot 
1627*16467b97STreehugger Robot     switch	(recognizer->type)
1628*16467b97STreehugger Robot     {
1629*16467b97STreehugger Robot     case	ANTLR3_TYPE_PARSER:
1630*16467b97STreehugger Robot 
1631*16467b97STreehugger Robot 		parser  = (pANTLR3_PARSER) (recognizer->super);
1632*16467b97STreehugger Robot 		tparser	= NULL;
1633*16467b97STreehugger Robot 		is	= parser->tstream->istream;
1634*16467b97STreehugger Robot 
1635*16467b97STreehugger Robot 	break;
1636*16467b97STreehugger Robot 
1637*16467b97STreehugger Robot     case	ANTLR3_TYPE_TREE_PARSER:
1638*16467b97STreehugger Robot 
1639*16467b97STreehugger Robot 		tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1640*16467b97STreehugger Robot 		parser	= NULL;
1641*16467b97STreehugger Robot 		is	= tparser->ctnstream->tnstream->istream;
1642*16467b97STreehugger Robot 
1643*16467b97STreehugger Robot 	break;
1644*16467b97STreehugger Robot 
1645*16467b97STreehugger Robot     default:
1646*16467b97STreehugger Robot 
1647*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1648*16467b97STreehugger Robot 		return ANTLR3_FALSE;
1649*16467b97STreehugger Robot 
1650*16467b97STreehugger Robot 	break;
1651*16467b97STreehugger Robot     }
1652*16467b97STreehugger Robot 
1653*16467b97STreehugger Robot     follow	= antlr3BitsetLoad(followBits);
1654*16467b97STreehugger Robot 
1655*16467b97STreehugger Robot     if	(follow == NULL)
1656*16467b97STreehugger Robot     {
1657*16467b97STreehugger Robot 		/* The follow set is NULL, which means we don't know what can come
1658*16467b97STreehugger Robot 		 * next, so we "hit and hope" by just signifying that we cannot
1659*16467b97STreehugger Robot 		 * recover, which will just cause the next token to be consumed,
1660*16467b97STreehugger Robot 		 * which might dig us out.
1661*16467b97STreehugger Robot 		 */
1662*16467b97STreehugger Robot 		return	ANTLR3_FALSE;
1663*16467b97STreehugger Robot     }
1664*16467b97STreehugger Robot 
1665*16467b97STreehugger Robot     /* We have a bitmap for the follow set, hence we can compute
1666*16467b97STreehugger Robot      * what can follow this grammar element reference.
1667*16467b97STreehugger Robot      */
1668*16467b97STreehugger Robot     if	(follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE)
1669*16467b97STreehugger Robot     {
1670*16467b97STreehugger Robot 		/* First we need to know which of the available tokens are viable
1671*16467b97STreehugger Robot 		 * to follow this reference.
1672*16467b97STreehugger Robot 		 */
1673*16467b97STreehugger Robot 		viableToksFollowingRule	= recognizer->computeCSRuleFollow(recognizer);
1674*16467b97STreehugger Robot 
1675*16467b97STreehugger Robot 		/* Remove the EOR token, which we do not wish to compute with
1676*16467b97STreehugger Robot 		 */
1677*16467b97STreehugger Robot 		follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE);
1678*16467b97STreehugger Robot 		viableToksFollowingRule->free(viableToksFollowingRule);
1679*16467b97STreehugger Robot 		/* We now have the computed set of what can follow the current token
1680*16467b97STreehugger Robot 		 */
1681*16467b97STreehugger Robot     }
1682*16467b97STreehugger Robot 
1683*16467b97STreehugger Robot     /* We can now see if the current token works with the set of tokens
1684*16467b97STreehugger Robot      * that could follow the current grammar reference. If it looks like it
1685*16467b97STreehugger Robot      * is consistent, then we can "insert" that token by not throwing
1686*16467b97STreehugger Robot      * an exception and assuming that we saw it.
1687*16467b97STreehugger Robot      */
1688*16467b97STreehugger Robot     if	( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE)
1689*16467b97STreehugger Robot     {
1690*16467b97STreehugger Robot 		/* report the error, but don't cause any rules to abort and stuff
1691*16467b97STreehugger Robot 		 */
1692*16467b97STreehugger Robot 		recognizer->reportError(recognizer);
1693*16467b97STreehugger Robot 		if	(follow != NULL)
1694*16467b97STreehugger Robot 		{
1695*16467b97STreehugger Robot 			follow->free(follow);
1696*16467b97STreehugger Robot 		}
1697*16467b97STreehugger Robot 		recognizer->state->error			= ANTLR3_FALSE;
1698*16467b97STreehugger Robot 		recognizer->state->failed			= ANTLR3_FALSE;
1699*16467b97STreehugger Robot 		return ANTLR3_TRUE;	/* Success in recovery	*/
1700*16467b97STreehugger Robot     }
1701*16467b97STreehugger Robot 
1702*16467b97STreehugger Robot     if	(follow != NULL)
1703*16467b97STreehugger Robot     {
1704*16467b97STreehugger Robot 		follow->free(follow);
1705*16467b97STreehugger Robot     }
1706*16467b97STreehugger Robot 
1707*16467b97STreehugger Robot     /* We could not find anything viable to do, so this is going to
1708*16467b97STreehugger Robot      * cause an exception.
1709*16467b97STreehugger Robot      */
1710*16467b97STreehugger Robot     return  ANTLR3_FALSE;
1711*16467b97STreehugger Robot }
1712*16467b97STreehugger Robot 
1713*16467b97STreehugger Robot /// Eat tokens from the input stream until we get one of JUST the right type
1714*16467b97STreehugger Robot ///
1715*16467b97STreehugger Robot static void
consumeUntil(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 tokenType)1716*16467b97STreehugger Robot consumeUntil	(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType)
1717*16467b97STreehugger Robot {
1718*16467b97STreehugger Robot     ANTLR3_UINT32			ttype;
1719*16467b97STreehugger Robot     pANTLR3_PARSER			parser;
1720*16467b97STreehugger Robot     pANTLR3_TREE_PARSER	    tparser;
1721*16467b97STreehugger Robot     pANTLR3_INT_STREAM	    is;
1722*16467b97STreehugger Robot 
1723*16467b97STreehugger Robot     switch	(recognizer->type)
1724*16467b97STreehugger Robot     {
1725*16467b97STreehugger Robot 		case	ANTLR3_TYPE_PARSER:
1726*16467b97STreehugger Robot 
1727*16467b97STreehugger Robot 			parser  = (pANTLR3_PARSER) (recognizer->super);
1728*16467b97STreehugger Robot 			tparser	= NULL;
1729*16467b97STreehugger Robot 			is	= parser->tstream->istream;
1730*16467b97STreehugger Robot 
1731*16467b97STreehugger Robot 			break;
1732*16467b97STreehugger Robot 
1733*16467b97STreehugger Robot 		case	ANTLR3_TYPE_TREE_PARSER:
1734*16467b97STreehugger Robot 
1735*16467b97STreehugger Robot 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1736*16467b97STreehugger Robot 			parser	= NULL;
1737*16467b97STreehugger Robot 			is	= tparser->ctnstream->tnstream->istream;
1738*16467b97STreehugger Robot 
1739*16467b97STreehugger Robot 			break;
1740*16467b97STreehugger Robot 
1741*16467b97STreehugger Robot 		default:
1742*16467b97STreehugger Robot 
1743*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n");
1744*16467b97STreehugger Robot 			return;
1745*16467b97STreehugger Robot 
1746*16467b97STreehugger Robot 			break;
1747*16467b97STreehugger Robot     }
1748*16467b97STreehugger Robot 
1749*16467b97STreehugger Robot     // What do have at the moment?
1750*16467b97STreehugger Robot     //
1751*16467b97STreehugger Robot     ttype	= is->_LA(is, 1);
1752*16467b97STreehugger Robot 
1753*16467b97STreehugger Robot     // Start eating tokens until we get to the one we want.
1754*16467b97STreehugger Robot     //
1755*16467b97STreehugger Robot     while   (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType)
1756*16467b97STreehugger Robot     {
1757*16467b97STreehugger Robot 		is->consume(is);
1758*16467b97STreehugger Robot 		ttype	= is->_LA(is, 1);
1759*16467b97STreehugger Robot     }
1760*16467b97STreehugger Robot }
1761*16467b97STreehugger Robot 
1762*16467b97STreehugger Robot /// Eat tokens from the input stream until we find one that
1763*16467b97STreehugger Robot /// belongs to the supplied set.
1764*16467b97STreehugger Robot ///
1765*16467b97STreehugger Robot static void
consumeUntilSet(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_BITSET set)1766*16467b97STreehugger Robot consumeUntilSet			    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set)
1767*16467b97STreehugger Robot {
1768*16467b97STreehugger Robot     ANTLR3_UINT32	    ttype;
1769*16467b97STreehugger Robot     pANTLR3_PARSER	    parser;
1770*16467b97STreehugger Robot     pANTLR3_TREE_PARSER	    tparser;
1771*16467b97STreehugger Robot     pANTLR3_INT_STREAM	    is;
1772*16467b97STreehugger Robot 
1773*16467b97STreehugger Robot     switch	(recognizer->type)
1774*16467b97STreehugger Robot     {
1775*16467b97STreehugger Robot 		case	ANTLR3_TYPE_PARSER:
1776*16467b97STreehugger Robot 
1777*16467b97STreehugger Robot 			parser  = (pANTLR3_PARSER) (recognizer->super);
1778*16467b97STreehugger Robot 			tparser	= NULL;
1779*16467b97STreehugger Robot 			is	= parser->tstream->istream;
1780*16467b97STreehugger Robot 
1781*16467b97STreehugger Robot 			break;
1782*16467b97STreehugger Robot 
1783*16467b97STreehugger Robot 		case	ANTLR3_TYPE_TREE_PARSER:
1784*16467b97STreehugger Robot 
1785*16467b97STreehugger Robot 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1786*16467b97STreehugger Robot 			parser	= NULL;
1787*16467b97STreehugger Robot 			is	= tparser->ctnstream->tnstream->istream;
1788*16467b97STreehugger Robot 
1789*16467b97STreehugger Robot 			break;
1790*16467b97STreehugger Robot 
1791*16467b97STreehugger Robot 		default:
1792*16467b97STreehugger Robot 
1793*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n");
1794*16467b97STreehugger Robot 			return;
1795*16467b97STreehugger Robot 
1796*16467b97STreehugger Robot 			break;
1797*16467b97STreehugger Robot     }
1798*16467b97STreehugger Robot 
1799*16467b97STreehugger Robot     // What do have at the moment?
1800*16467b97STreehugger Robot     //
1801*16467b97STreehugger Robot     ttype	= is->_LA(is, 1);
1802*16467b97STreehugger Robot 
1803*16467b97STreehugger Robot     // Start eating tokens until we get to one we want.
1804*16467b97STreehugger Robot     //
1805*16467b97STreehugger Robot     while   (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE)
1806*16467b97STreehugger Robot     {
1807*16467b97STreehugger Robot 		is->consume(is);
1808*16467b97STreehugger Robot 		ttype	= is->_LA(is, 1);
1809*16467b97STreehugger Robot     }
1810*16467b97STreehugger Robot }
1811*16467b97STreehugger Robot 
1812*16467b97STreehugger Robot /** Return the rule invocation stack (how we got here in the parse.
1813*16467b97STreehugger Robot  *  In the java version Ter just asks the JVM for all the information
1814*16467b97STreehugger Robot  *  but in C we don't get this information, so I am going to do nothing
1815*16467b97STreehugger Robot  *  right now.
1816*16467b97STreehugger Robot  */
1817*16467b97STreehugger Robot static pANTLR3_STACK
getRuleInvocationStack(pANTLR3_BASE_RECOGNIZER recognizer)1818*16467b97STreehugger Robot getRuleInvocationStack		    (pANTLR3_BASE_RECOGNIZER recognizer)
1819*16467b97STreehugger Robot {
1820*16467b97STreehugger Robot     return NULL;
1821*16467b97STreehugger Robot }
1822*16467b97STreehugger Robot 
1823*16467b97STreehugger Robot static pANTLR3_STACK
getRuleInvocationStackNamed(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_UINT8 name)1824*16467b97STreehugger Robot getRuleInvocationStackNamed	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name)
1825*16467b97STreehugger Robot {
1826*16467b97STreehugger Robot     return NULL;
1827*16467b97STreehugger Robot }
1828*16467b97STreehugger Robot 
1829*16467b97STreehugger Robot /** Convenience method for template rewrites - NYI.
1830*16467b97STreehugger Robot  */
1831*16467b97STreehugger Robot static pANTLR3_HASH_TABLE
toStrings(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_HASH_TABLE tokens)1832*16467b97STreehugger Robot toStrings			    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens)
1833*16467b97STreehugger Robot {
1834*16467b97STreehugger Robot     return NULL;
1835*16467b97STreehugger Robot }
1836*16467b97STreehugger Robot 
1837*16467b97STreehugger Robot static	void ANTLR3_CDECL
freeIntTrie(void * trie)1838*16467b97STreehugger Robot freeIntTrie    (void * trie)
1839*16467b97STreehugger Robot {
1840*16467b97STreehugger Robot     ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie);
1841*16467b97STreehugger Robot }
1842*16467b97STreehugger Robot 
1843*16467b97STreehugger Robot 
1844*16467b97STreehugger Robot /** Pointer to a function to return whether the rule has parsed input starting at the supplied
1845*16467b97STreehugger Robot  *  start index before. If the rule has not parsed input starting from the supplied start index,
1846*16467b97STreehugger Robot  *  then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
1847*16467b97STreehugger Robot  *  then it will return the point where it last stopped parsing after that start point.
1848*16467b97STreehugger Robot  *
1849*16467b97STreehugger Robot  * \remark
1850*16467b97STreehugger Robot  * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance
1851*16467b97STreehugger Robot  * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only
1852*16467b97STreehugger Robot  * version of the table.
1853*16467b97STreehugger Robot  */
1854*16467b97STreehugger Robot static ANTLR3_MARKER
getRuleMemoization(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_INTKEY ruleIndex,ANTLR3_MARKER ruleParseStart)1855*16467b97STreehugger Robot getRuleMemoization		    (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart)
1856*16467b97STreehugger Robot {
1857*16467b97STreehugger Robot     /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1858*16467b97STreehugger Robot      */
1859*16467b97STreehugger Robot     pANTLR3_INT_TRIE	ruleList;
1860*16467b97STreehugger Robot     ANTLR3_MARKER	stopIndex;
1861*16467b97STreehugger Robot     pANTLR3_TRIE_ENTRY	entry;
1862*16467b97STreehugger Robot 
1863*16467b97STreehugger Robot     /* See if we have a list in the ruleMemos for this rule, and if not, then create one
1864*16467b97STreehugger Robot      * as we will need it eventually if we are being asked for the memo here.
1865*16467b97STreehugger Robot      */
1866*16467b97STreehugger Robot     entry	= recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
1867*16467b97STreehugger Robot 
1868*16467b97STreehugger Robot     if	(entry == NULL)
1869*16467b97STreehugger Robot     {
1870*16467b97STreehugger Robot 		/* Did not find it, so create a new one for it, with a bit depth based on the
1871*16467b97STreehugger Robot 		 * size of the input stream. We need the bit depth to incorporate the number if
1872*16467b97STreehugger Robot 		 * bits required to represent the largest possible stop index in the input, which is the
1873*16467b97STreehugger Robot 		 * last character. An int stream is free to return the largest 64 bit offset if it has
1874*16467b97STreehugger Robot 		 * no idea of the size, but you should remember that this will cause the leftmost
1875*16467b97STreehugger Robot 		 * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-)
1876*16467b97STreehugger Robot 		 */
1877*16467b97STreehugger Robot 		ruleList    = antlr3IntTrieNew(63);	/* Depth is theoretically 64 bits, but probably not ;-)	*/
1878*16467b97STreehugger Robot 
1879*16467b97STreehugger Robot 		if (ruleList != NULL)
1880*16467b97STreehugger Robot 		{
1881*16467b97STreehugger Robot 			recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie);
1882*16467b97STreehugger Robot 		}
1883*16467b97STreehugger Robot 
1884*16467b97STreehugger Robot 		/* We cannot have a stopIndex in a trie we have just created of course
1885*16467b97STreehugger Robot 		 */
1886*16467b97STreehugger Robot 		return	MEMO_RULE_UNKNOWN;
1887*16467b97STreehugger Robot     }
1888*16467b97STreehugger Robot 
1889*16467b97STreehugger Robot     ruleList	= (pANTLR3_INT_TRIE) (entry->data.ptr);
1890*16467b97STreehugger Robot 
1891*16467b97STreehugger Robot     /* See if there is a stop index associated with the supplied start index.
1892*16467b97STreehugger Robot      */
1893*16467b97STreehugger Robot     stopIndex	= 0;
1894*16467b97STreehugger Robot 
1895*16467b97STreehugger Robot     entry = ruleList->get(ruleList, ruleParseStart);
1896*16467b97STreehugger Robot     if (entry != NULL)
1897*16467b97STreehugger Robot     {
1898*16467b97STreehugger Robot 		stopIndex = (ANTLR3_MARKER)(entry->data.intVal);
1899*16467b97STreehugger Robot     }
1900*16467b97STreehugger Robot 
1901*16467b97STreehugger Robot     if	(stopIndex == 0)
1902*16467b97STreehugger Robot     {
1903*16467b97STreehugger Robot 		return MEMO_RULE_UNKNOWN;
1904*16467b97STreehugger Robot     }
1905*16467b97STreehugger Robot 
1906*16467b97STreehugger Robot     return  stopIndex;
1907*16467b97STreehugger Robot }
1908*16467b97STreehugger Robot 
1909*16467b97STreehugger Robot /** Has this rule already parsed input at the current index in the
1910*16467b97STreehugger Robot  *  input stream?  Return ANTLR3_TRUE if we have and ANTLR3_FALSE
1911*16467b97STreehugger Robot  *  if we have not.
1912*16467b97STreehugger Robot  *
1913*16467b97STreehugger Robot  *  This method has a side-effect: if we have seen this input for
1914*16467b97STreehugger Robot  *  this rule and successfully parsed before, then seek ahead to
1915*16467b97STreehugger Robot  *  1 past the stop token matched for this rule last time.
1916*16467b97STreehugger Robot  */
1917*16467b97STreehugger Robot static ANTLR3_BOOLEAN
alreadyParsedRule(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_MARKER ruleIndex)1918*16467b97STreehugger Robot alreadyParsedRule		    (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex)
1919*16467b97STreehugger Robot {
1920*16467b97STreehugger Robot     ANTLR3_MARKER			stopIndex;
1921*16467b97STreehugger Robot     pANTLR3_LEXER			lexer;
1922*16467b97STreehugger Robot     pANTLR3_PARSER			parser;
1923*16467b97STreehugger Robot     pANTLR3_TREE_PARSER	    tparser;
1924*16467b97STreehugger Robot     pANTLR3_INT_STREAM	    is;
1925*16467b97STreehugger Robot 
1926*16467b97STreehugger Robot     switch	(recognizer->type)
1927*16467b97STreehugger Robot     {
1928*16467b97STreehugger Robot 		case	ANTLR3_TYPE_PARSER:
1929*16467b97STreehugger Robot 
1930*16467b97STreehugger Robot 			parser  = (pANTLR3_PARSER) (recognizer->super);
1931*16467b97STreehugger Robot 			tparser	= NULL;
1932*16467b97STreehugger Robot 			lexer	= NULL;
1933*16467b97STreehugger Robot 			is	= parser->tstream->istream;
1934*16467b97STreehugger Robot 
1935*16467b97STreehugger Robot 			break;
1936*16467b97STreehugger Robot 
1937*16467b97STreehugger Robot 		case	ANTLR3_TYPE_TREE_PARSER:
1938*16467b97STreehugger Robot 
1939*16467b97STreehugger Robot 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1940*16467b97STreehugger Robot 			parser	= NULL;
1941*16467b97STreehugger Robot 			lexer	= NULL;
1942*16467b97STreehugger Robot 			is	= tparser->ctnstream->tnstream->istream;
1943*16467b97STreehugger Robot 
1944*16467b97STreehugger Robot 			break;
1945*16467b97STreehugger Robot 
1946*16467b97STreehugger Robot 		case	ANTLR3_TYPE_LEXER:
1947*16467b97STreehugger Robot 
1948*16467b97STreehugger Robot 			lexer	= (pANTLR3_LEXER)   (recognizer->super);
1949*16467b97STreehugger Robot 			parser	= NULL;
1950*16467b97STreehugger Robot 			tparser	= NULL;
1951*16467b97STreehugger Robot 			is	= lexer->input->istream;
1952*16467b97STreehugger Robot 			break;
1953*16467b97STreehugger Robot 
1954*16467b97STreehugger Robot 		default:
1955*16467b97STreehugger Robot 
1956*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n");
1957*16467b97STreehugger Robot 			return ANTLR3_FALSE;
1958*16467b97STreehugger Robot 
1959*16467b97STreehugger Robot 			break;
1960*16467b97STreehugger Robot     }
1961*16467b97STreehugger Robot 
1962*16467b97STreehugger Robot     /* See if we have a memo marker for this.
1963*16467b97STreehugger Robot      */
1964*16467b97STreehugger Robot     stopIndex	    = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is));
1965*16467b97STreehugger Robot 
1966*16467b97STreehugger Robot     if	(stopIndex  == MEMO_RULE_UNKNOWN)
1967*16467b97STreehugger Robot     {
1968*16467b97STreehugger Robot 		return ANTLR3_FALSE;
1969*16467b97STreehugger Robot     }
1970*16467b97STreehugger Robot 
1971*16467b97STreehugger Robot     if	(stopIndex == MEMO_RULE_FAILED)
1972*16467b97STreehugger Robot     {
1973*16467b97STreehugger Robot 		recognizer->state->failed = ANTLR3_TRUE;
1974*16467b97STreehugger Robot     }
1975*16467b97STreehugger Robot     else
1976*16467b97STreehugger Robot     {
1977*16467b97STreehugger Robot 		is->seek(is, stopIndex+1);
1978*16467b97STreehugger Robot     }
1979*16467b97STreehugger Robot 
1980*16467b97STreehugger Robot     /* If here then the rule was executed for this input already
1981*16467b97STreehugger Robot      */
1982*16467b97STreehugger Robot     return  ANTLR3_TRUE;
1983*16467b97STreehugger Robot }
1984*16467b97STreehugger Robot 
1985*16467b97STreehugger Robot /** Record whether or not this rule parsed the input at this position
1986*16467b97STreehugger Robot  *  successfully.
1987*16467b97STreehugger Robot  */
1988*16467b97STreehugger Robot static void
memoize(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_MARKER ruleIndex,ANTLR3_MARKER ruleParseStart)1989*16467b97STreehugger Robot memoize	(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart)
1990*16467b97STreehugger Robot {
1991*16467b97STreehugger Robot     /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1992*16467b97STreehugger Robot      */
1993*16467b97STreehugger Robot     pANTLR3_INT_TRIE	    ruleList;
1994*16467b97STreehugger Robot     pANTLR3_TRIE_ENTRY	    entry;
1995*16467b97STreehugger Robot     ANTLR3_MARKER	    stopIndex;
1996*16467b97STreehugger Robot     pANTLR3_LEXER	    lexer;
1997*16467b97STreehugger Robot     pANTLR3_PARSER	    parser;
1998*16467b97STreehugger Robot     pANTLR3_TREE_PARSER	    tparser;
1999*16467b97STreehugger Robot     pANTLR3_INT_STREAM	    is;
2000*16467b97STreehugger Robot 
2001*16467b97STreehugger Robot     switch	(recognizer->type)
2002*16467b97STreehugger Robot     {
2003*16467b97STreehugger Robot 		case	ANTLR3_TYPE_PARSER:
2004*16467b97STreehugger Robot 
2005*16467b97STreehugger Robot 			parser  = (pANTLR3_PARSER) (recognizer->super);
2006*16467b97STreehugger Robot 			tparser	= NULL;
2007*16467b97STreehugger Robot 			is	= parser->tstream->istream;
2008*16467b97STreehugger Robot 
2009*16467b97STreehugger Robot 			break;
2010*16467b97STreehugger Robot 
2011*16467b97STreehugger Robot 		case	ANTLR3_TYPE_TREE_PARSER:
2012*16467b97STreehugger Robot 
2013*16467b97STreehugger Robot 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2014*16467b97STreehugger Robot 			parser	= NULL;
2015*16467b97STreehugger Robot 			is	= tparser->ctnstream->tnstream->istream;
2016*16467b97STreehugger Robot 
2017*16467b97STreehugger Robot 			break;
2018*16467b97STreehugger Robot 
2019*16467b97STreehugger Robot 		case	ANTLR3_TYPE_LEXER:
2020*16467b97STreehugger Robot 
2021*16467b97STreehugger Robot 			lexer	= (pANTLR3_LEXER)   (recognizer->super);
2022*16467b97STreehugger Robot 			parser	= NULL;
2023*16467b97STreehugger Robot 			tparser	= NULL;
2024*16467b97STreehugger Robot 			is		= lexer->input->istream;
2025*16467b97STreehugger Robot 			break;
2026*16467b97STreehugger Robot 
2027*16467b97STreehugger Robot 		default:
2028*16467b97STreehugger Robot 
2029*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n");
2030*16467b97STreehugger Robot 			return;
2031*16467b97STreehugger Robot 
2032*16467b97STreehugger Robot 			break;
2033*16467b97STreehugger Robot     }
2034*16467b97STreehugger Robot 
2035*16467b97STreehugger Robot     stopIndex	= recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1;
2036*16467b97STreehugger Robot 
2037*16467b97STreehugger Robot     entry	= recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
2038*16467b97STreehugger Robot 
2039*16467b97STreehugger Robot     if	(entry != NULL)
2040*16467b97STreehugger Robot     {
2041*16467b97STreehugger Robot 		ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr);
2042*16467b97STreehugger Robot 
2043*16467b97STreehugger Robot 		/* If we don't already have this entry, append it. The memoize trie does not
2044*16467b97STreehugger Robot 		 * accept duplicates so it won't add it if already there and we just ignore the
2045*16467b97STreehugger Robot 		 * return code as we don't care if it is there already.
2046*16467b97STreehugger Robot 		 */
2047*16467b97STreehugger Robot 		ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL);
2048*16467b97STreehugger Robot     }
2049*16467b97STreehugger Robot }
2050*16467b97STreehugger Robot /** A syntactic predicate.  Returns true/false depending on whether
2051*16467b97STreehugger Robot  *  the specified grammar fragment matches the current input stream.
2052*16467b97STreehugger Robot  *  This resets the failed instance var afterwards.
2053*16467b97STreehugger Robot  */
2054*16467b97STreehugger Robot static ANTLR3_BOOLEAN
synpred(pANTLR3_BASE_RECOGNIZER recognizer,void * ctx,void (* predicate)(void * ctx))2055*16467b97STreehugger Robot synpred	(pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx))
2056*16467b97STreehugger Robot {
2057*16467b97STreehugger Robot     ANTLR3_MARKER   start;
2058*16467b97STreehugger Robot     pANTLR3_PARSER	    parser;
2059*16467b97STreehugger Robot     pANTLR3_TREE_PARSER	    tparser;
2060*16467b97STreehugger Robot     pANTLR3_INT_STREAM	    is;
2061*16467b97STreehugger Robot 
2062*16467b97STreehugger Robot     switch	(recognizer->type)
2063*16467b97STreehugger Robot     {
2064*16467b97STreehugger Robot 		case	ANTLR3_TYPE_PARSER:
2065*16467b97STreehugger Robot 
2066*16467b97STreehugger Robot 			parser  = (pANTLR3_PARSER) (recognizer->super);
2067*16467b97STreehugger Robot 			tparser	= NULL;
2068*16467b97STreehugger Robot 			is	= parser->tstream->istream;
2069*16467b97STreehugger Robot 
2070*16467b97STreehugger Robot 			break;
2071*16467b97STreehugger Robot 
2072*16467b97STreehugger Robot 		case	ANTLR3_TYPE_TREE_PARSER:
2073*16467b97STreehugger Robot 
2074*16467b97STreehugger Robot 			tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2075*16467b97STreehugger Robot 			parser	= NULL;
2076*16467b97STreehugger Robot 			is	= tparser->ctnstream->tnstream->istream;
2077*16467b97STreehugger Robot 
2078*16467b97STreehugger Robot 			break;
2079*16467b97STreehugger Robot 
2080*16467b97STreehugger Robot 		default:
2081*16467b97STreehugger Robot 
2082*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n");
2083*16467b97STreehugger Robot 			return ANTLR3_FALSE;
2084*16467b97STreehugger Robot 
2085*16467b97STreehugger Robot 			break;
2086*16467b97STreehugger Robot     }
2087*16467b97STreehugger Robot 
2088*16467b97STreehugger Robot     /* Begin backtracking so we can get back to where we started after trying out
2089*16467b97STreehugger Robot      * the syntactic predicate.
2090*16467b97STreehugger Robot      */
2091*16467b97STreehugger Robot     start   = is->mark(is);
2092*16467b97STreehugger Robot     recognizer->state->backtracking++;
2093*16467b97STreehugger Robot 
2094*16467b97STreehugger Robot     /* Try the syntactical predicate
2095*16467b97STreehugger Robot      */
2096*16467b97STreehugger Robot     predicate(ctx);
2097*16467b97STreehugger Robot 
2098*16467b97STreehugger Robot     /* Reset
2099*16467b97STreehugger Robot      */
2100*16467b97STreehugger Robot     is->rewind(is, start);
2101*16467b97STreehugger Robot     recognizer->state->backtracking--;
2102*16467b97STreehugger Robot 
2103*16467b97STreehugger Robot     if	(recognizer->state->failed == ANTLR3_TRUE)
2104*16467b97STreehugger Robot     {
2105*16467b97STreehugger Robot 		/* Predicate failed
2106*16467b97STreehugger Robot 		 */
2107*16467b97STreehugger Robot 		recognizer->state->failed = ANTLR3_FALSE;
2108*16467b97STreehugger Robot 		return	ANTLR3_FALSE;
2109*16467b97STreehugger Robot     }
2110*16467b97STreehugger Robot     else
2111*16467b97STreehugger Robot     {
2112*16467b97STreehugger Robot 		/* Predicate was successful
2113*16467b97STreehugger Robot 		 */
2114*16467b97STreehugger Robot 		recognizer->state->failed	= ANTLR3_FALSE;
2115*16467b97STreehugger Robot 		return	ANTLR3_TRUE;
2116*16467b97STreehugger Robot     }
2117*16467b97STreehugger Robot }
2118*16467b97STreehugger Robot 
2119*16467b97STreehugger Robot static void
reset(pANTLR3_BASE_RECOGNIZER recognizer)2120*16467b97STreehugger Robot reset(pANTLR3_BASE_RECOGNIZER recognizer)
2121*16467b97STreehugger Robot {
2122*16467b97STreehugger Robot     if	(recognizer->state->following != NULL)
2123*16467b97STreehugger Robot     {
2124*16467b97STreehugger Robot 		recognizer->state->following->free(recognizer->state->following);
2125*16467b97STreehugger Robot     }
2126*16467b97STreehugger Robot 
2127*16467b97STreehugger Robot 	// Reset the state flags
2128*16467b97STreehugger Robot 	//
2129*16467b97STreehugger Robot 	recognizer->state->errorRecovery	= ANTLR3_FALSE;
2130*16467b97STreehugger Robot 	recognizer->state->lastErrorIndex	= -1;
2131*16467b97STreehugger Robot 	recognizer->state->failed			= ANTLR3_FALSE;
2132*16467b97STreehugger Robot 	recognizer->state->errorCount		= 0;
2133*16467b97STreehugger Robot 	recognizer->state->backtracking		= 0;
2134*16467b97STreehugger Robot 	recognizer->state->following		= NULL;
2135*16467b97STreehugger Robot 
2136*16467b97STreehugger Robot 	if	(recognizer->state != NULL)
2137*16467b97STreehugger Robot 	{
2138*16467b97STreehugger Robot 		if	(recognizer->state->ruleMemo != NULL)
2139*16467b97STreehugger Robot 		{
2140*16467b97STreehugger Robot 			recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
2141*16467b97STreehugger Robot 			recognizer->state->ruleMemo = antlr3IntTrieNew(15);	/* 16 bit depth is enough for 32768 rules! */
2142*16467b97STreehugger Robot 		}
2143*16467b97STreehugger Robot 	}
2144*16467b97STreehugger Robot 
2145*16467b97STreehugger Robot   // ml: 2013-11-05, added reset of old exceptions.
2146*16467b97STreehugger Robot   pANTLR3_EXCEPTION thisE = recognizer->state->exception;
2147*16467b97STreehugger Robot   if	(thisE != NULL)
2148*16467b97STreehugger Robot   {
2149*16467b97STreehugger Robot     thisE->freeEx(thisE);
2150*16467b97STreehugger Robot     recognizer->state->exception = NULL;
2151*16467b97STreehugger Robot   }
2152*16467b97STreehugger Robot 
2153*16467b97STreehugger Robot     // Install a new following set
2154*16467b97STreehugger Robot     //
2155*16467b97STreehugger Robot     recognizer->state->following   = antlr3StackNew(8);
2156*16467b97STreehugger Robot 
2157*16467b97STreehugger Robot }
2158*16467b97STreehugger Robot 
2159*16467b97STreehugger Robot // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2160*16467b97STreehugger Robot // You MAY need override this function if the standard TOKEN_STREAM is not what you are using.
2161*16467b97STreehugger Robot //
2162*16467b97STreehugger Robot static void *
getCurrentInputSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream)2163*16467b97STreehugger Robot getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
2164*16467b97STreehugger Robot {
2165*16467b97STreehugger Robot 	return ((pANTLR3_TOKEN_STREAM)istream->super)->_LT((pANTLR3_TOKEN_STREAM)istream->super, 1);
2166*16467b97STreehugger Robot }
2167*16467b97STreehugger Robot 
2168*16467b97STreehugger Robot // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2169*16467b97STreehugger Robot // You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using.
2170*16467b97STreehugger Robot //
2171*16467b97STreehugger Robot static void *
getMissingSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream,pANTLR3_EXCEPTION e,ANTLR3_UINT32 expectedTokenType,pANTLR3_BITSET_LIST follow)2172*16467b97STreehugger Robot getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
2173*16467b97STreehugger Robot 									ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
2174*16467b97STreehugger Robot {
2175*16467b97STreehugger Robot 	pANTLR3_TOKEN_STREAM			ts;
2176*16467b97STreehugger Robot 	pANTLR3_COMMON_TOKEN_STREAM		cts;
2177*16467b97STreehugger Robot 	pANTLR3_COMMON_TOKEN			token;
2178*16467b97STreehugger Robot 	pANTLR3_COMMON_TOKEN			current;
2179*16467b97STreehugger Robot 	pANTLR3_STRING					text;
2180*16467b97STreehugger Robot 
2181*16467b97STreehugger Robot 	// Dereference the standard pointers
2182*16467b97STreehugger Robot 	//
2183*16467b97STreehugger Robot 	ts		= (pANTLR3_TOKEN_STREAM)istream->super;
2184*16467b97STreehugger Robot 	cts		= (pANTLR3_COMMON_TOKEN_STREAM)ts->super;
2185*16467b97STreehugger Robot 
2186*16467b97STreehugger Robot 	// Work out what to use as the current symbol to make a line and offset etc
2187*16467b97STreehugger Robot 	// If we are at EOF, we use the token before EOF
2188*16467b97STreehugger Robot 	//
2189*16467b97STreehugger Robot 	current	= ts->_LT(ts, 1);
2190*16467b97STreehugger Robot 	if	(current->getType(current) == ANTLR3_TOKEN_EOF)
2191*16467b97STreehugger Robot 	{
2192*16467b97STreehugger Robot 		current = ts->_LT(ts, -1);
2193*16467b97STreehugger Robot 	}
2194*16467b97STreehugger Robot 
2195*16467b97STreehugger Robot 	// Create a new empty token
2196*16467b97STreehugger Robot 	//
2197*16467b97STreehugger Robot 	if	(recognizer->state->tokFactory == NULL)
2198*16467b97STreehugger Robot 	{
2199*16467b97STreehugger Robot 		// We don't yet have a token factory for making tokens
2200*16467b97STreehugger Robot 		// we just need a fake one using the input stream of the current
2201*16467b97STreehugger Robot 		// token.
2202*16467b97STreehugger Robot 		//
2203*16467b97STreehugger Robot 		recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input);
2204*16467b97STreehugger Robot 	}
2205*16467b97STreehugger Robot 	token	= recognizer->state->tokFactory->newToken(recognizer->state->tokFactory);
2206*16467b97STreehugger Robot 	if (token == NULL) { return NULL; }
2207*16467b97STreehugger Robot 
2208*16467b97STreehugger Robot 	// Set some of the token properties based on the current token
2209*16467b97STreehugger Robot 	//
2210*16467b97STreehugger Robot 	token->setLine					(token, current->getLine(current));
2211*16467b97STreehugger Robot 	token->setCharPositionInLine	(token, current->getCharPositionInLine(current));
2212*16467b97STreehugger Robot 	token->setChannel				(token, ANTLR3_TOKEN_DEFAULT_CHANNEL);
2213*16467b97STreehugger Robot 	token->setType					(token, expectedTokenType);
2214*16467b97STreehugger Robot     token->user1                    = current->user1;
2215*16467b97STreehugger Robot     token->user2                    = current->user2;
2216*16467b97STreehugger Robot     token->user3                    = current->user3;
2217*16467b97STreehugger Robot     token->custom                   = current->custom;
2218*16467b97STreehugger Robot     token->lineStart                = current->lineStart;
2219*16467b97STreehugger Robot 
2220*16467b97STreehugger Robot 	// Create the token text that shows it has been inserted
2221*16467b97STreehugger Robot 	//
2222*16467b97STreehugger Robot 	token->setText8(token, (pANTLR3_UINT8)"<missing ");
2223*16467b97STreehugger Robot 	text = token->getText(token);
2224*16467b97STreehugger Robot 
2225*16467b97STreehugger Robot 	if	(text != NULL)
2226*16467b97STreehugger Robot 	{
2227*16467b97STreehugger Robot 		text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]);
2228*16467b97STreehugger Robot 		text->append8(text, (const char *)">");
2229*16467b97STreehugger Robot 	}
2230*16467b97STreehugger Robot 
2231*16467b97STreehugger Robot 	// Finally return the pointer to our new token
2232*16467b97STreehugger Robot 	//
2233*16467b97STreehugger Robot 	return	token;
2234*16467b97STreehugger Robot }
2235*16467b97STreehugger Robot 
2236*16467b97STreehugger Robot 
2237*16467b97STreehugger Robot #ifdef	ANTLR3_WINDOWS
2238*16467b97STreehugger Robot #pragma warning( default : 4100 )
2239*16467b97STreehugger Robot #endif
2240*16467b97STreehugger Robot 
2241*16467b97STreehugger Robot /// @}
2242*16467b97STreehugger Robot ///
2243*16467b97STreehugger Robot 
2244