1*16467b97STreehugger Robot /** \file
2*16467b97STreehugger Robot * Contains the base functions that all recognizers require.
3*16467b97STreehugger Robot * Any function can be overridden by a lexer/parser/tree parser or by the
4*16467b97STreehugger Robot * ANTLR3 programmer.
5*16467b97STreehugger Robot *
6*16467b97STreehugger Robot * \addtogroup pANTLR3_BASE_RECOGNIZER
7*16467b97STreehugger Robot * @{
8*16467b97STreehugger Robot */
9*16467b97STreehugger Robot #include <antlr3baserecognizer.h>
10*16467b97STreehugger Robot
11*16467b97STreehugger Robot // [The "BSD licence"]
12*16467b97STreehugger Robot // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13*16467b97STreehugger Robot // http://www.temporal-wave.com
14*16467b97STreehugger Robot // http://www.linkedin.com/in/jimidle
15*16467b97STreehugger Robot //
16*16467b97STreehugger Robot // All rights reserved.
17*16467b97STreehugger Robot //
18*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without
19*16467b97STreehugger Robot // modification, are permitted provided that the following conditions
20*16467b97STreehugger Robot // are met:
21*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright
22*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer.
23*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright
24*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer in the
25*16467b97STreehugger Robot // documentation and/or other materials provided with the distribution.
26*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products
27*16467b97STreehugger Robot // derived from this software without specific prior written permission.
28*16467b97STreehugger Robot //
29*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39*16467b97STreehugger Robot
40*16467b97STreehugger Robot #ifdef ANTLR3_WINDOWS
41*16467b97STreehugger Robot #pragma warning( disable : 4100 )
42*16467b97STreehugger Robot #endif
43*16467b97STreehugger Robot
44*16467b97STreehugger Robot /* Interface functions -standard implementations cover parser and treeparser
45*16467b97STreehugger Robot * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides
46*16467b97STreehugger Robot * most of these functions.
47*16467b97STreehugger Robot */
48*16467b97STreehugger Robot static void beginResync (pANTLR3_BASE_RECOGNIZER recognizer);
49*16467b97STreehugger Robot static pANTLR3_BITSET computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer);
50*16467b97STreehugger Robot static void endResync (pANTLR3_BASE_RECOGNIZER recognizer);
51*16467b97STreehugger Robot static void beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level);
52*16467b97STreehugger Robot static void endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
53*16467b97STreehugger Robot
54*16467b97STreehugger Robot static void * match (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
55*16467b97STreehugger Robot static void matchAny (pANTLR3_BASE_RECOGNIZER recognizer);
56*16467b97STreehugger Robot static void mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
57*16467b97STreehugger Robot static ANTLR3_BOOLEAN mismatchIsUnwantedToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype);
58*16467b97STreehugger Robot static ANTLR3_BOOLEAN mismatchIsMissingToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow);
59*16467b97STreehugger Robot static void reportError (pANTLR3_BASE_RECOGNIZER recognizer);
60*16467b97STreehugger Robot static pANTLR3_BITSET computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer);
61*16467b97STreehugger Robot static pANTLR3_BITSET combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact);
62*16467b97STreehugger Robot static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);
63*16467b97STreehugger Robot static void recover (pANTLR3_BASE_RECOGNIZER recognizer);
64*16467b97STreehugger Robot static void * recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
65*16467b97STreehugger Robot static void * recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
66*16467b97STreehugger Robot static ANTLR3_BOOLEAN recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
67*16467b97STreehugger Robot static void consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType);
68*16467b97STreehugger Robot static void consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set);
69*16467b97STreehugger Robot static pANTLR3_STACK getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer);
70*16467b97STreehugger Robot static pANTLR3_STACK getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name);
71*16467b97STreehugger Robot static pANTLR3_HASH_TABLE toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE);
72*16467b97STreehugger Robot static ANTLR3_MARKER getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart);
73*16467b97STreehugger Robot static ANTLR3_BOOLEAN alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex);
74*16467b97STreehugger Robot static void memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart);
75*16467b97STreehugger Robot static ANTLR3_BOOLEAN synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx));
76*16467b97STreehugger Robot static void reset (pANTLR3_BASE_RECOGNIZER recognizer);
77*16467b97STreehugger Robot static void freeBR (pANTLR3_BASE_RECOGNIZER recognizer);
78*16467b97STreehugger Robot static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
79*16467b97STreehugger Robot static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
80*16467b97STreehugger Robot ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
81*16467b97STreehugger Robot static ANTLR3_UINT32 getNumberOfSyntaxErrors (pANTLR3_BASE_RECOGNIZER recognizer);
82*16467b97STreehugger Robot
83*16467b97STreehugger Robot ANTLR3_API pANTLR3_BASE_RECOGNIZER
antlr3BaseRecognizerNew(ANTLR3_UINT32 type,ANTLR3_UINT32 sizeHint,pANTLR3_RECOGNIZER_SHARED_STATE state)84*16467b97STreehugger Robot antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
85*16467b97STreehugger Robot {
86*16467b97STreehugger Robot pANTLR3_BASE_RECOGNIZER recognizer;
87*16467b97STreehugger Robot
88*16467b97STreehugger Robot // Allocate memory for the structure
89*16467b97STreehugger Robot //
90*16467b97STreehugger Robot recognizer = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER));
91*16467b97STreehugger Robot
92*16467b97STreehugger Robot if (recognizer == NULL)
93*16467b97STreehugger Robot {
94*16467b97STreehugger Robot // Allocation failed
95*16467b97STreehugger Robot //
96*16467b97STreehugger Robot return NULL;
97*16467b97STreehugger Robot }
98*16467b97STreehugger Robot
99*16467b97STreehugger Robot
100*16467b97STreehugger Robot // If we have been supplied with a pre-existing recognizer state
101*16467b97STreehugger Robot // then we just install it, otherwise we must create one from scratch
102*16467b97STreehugger Robot //
103*16467b97STreehugger Robot if (state == NULL)
104*16467b97STreehugger Robot {
105*16467b97STreehugger Robot recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE));
106*16467b97STreehugger Robot
107*16467b97STreehugger Robot if (recognizer->state == NULL)
108*16467b97STreehugger Robot {
109*16467b97STreehugger Robot ANTLR3_FREE(recognizer);
110*16467b97STreehugger Robot return NULL;
111*16467b97STreehugger Robot }
112*16467b97STreehugger Robot
113*16467b97STreehugger Robot // Initialize any new recognizer state
114*16467b97STreehugger Robot //
115*16467b97STreehugger Robot recognizer->state->errorRecovery = ANTLR3_FALSE;
116*16467b97STreehugger Robot recognizer->state->lastErrorIndex = -1;
117*16467b97STreehugger Robot recognizer->state->failed = ANTLR3_FALSE;
118*16467b97STreehugger Robot recognizer->state->errorCount = 0;
119*16467b97STreehugger Robot recognizer->state->backtracking = 0;
120*16467b97STreehugger Robot recognizer->state->following = NULL;
121*16467b97STreehugger Robot recognizer->state->ruleMemo = NULL;
122*16467b97STreehugger Robot recognizer->state->tokenNames = NULL;
123*16467b97STreehugger Robot recognizer->state->sizeHint = sizeHint;
124*16467b97STreehugger Robot recognizer->state->tokSource = NULL;
125*16467b97STreehugger Robot recognizer->state->tokFactory = NULL;
126*16467b97STreehugger Robot
127*16467b97STreehugger Robot // Rather than check to see if we must initialize
128*16467b97STreehugger Robot // the stack every time we are asked for an new rewrite stream
129*16467b97STreehugger Robot // we just always create an empty stack and then just
130*16467b97STreehugger Robot // free it when the base recognizer is freed.
131*16467b97STreehugger Robot //
132*16467b97STreehugger Robot recognizer->state->rStreams = antlr3VectorNew(0); // We don't know the size.
133*16467b97STreehugger Robot
134*16467b97STreehugger Robot if (recognizer->state->rStreams == NULL)
135*16467b97STreehugger Robot {
136*16467b97STreehugger Robot // Out of memory
137*16467b97STreehugger Robot //
138*16467b97STreehugger Robot ANTLR3_FREE(recognizer->state);
139*16467b97STreehugger Robot ANTLR3_FREE(recognizer);
140*16467b97STreehugger Robot return NULL;
141*16467b97STreehugger Robot }
142*16467b97STreehugger Robot }
143*16467b97STreehugger Robot else
144*16467b97STreehugger Robot {
145*16467b97STreehugger Robot // Install the one we were given, and do not reset it here
146*16467b97STreehugger Robot // as it will either already have been initialized or will
147*16467b97STreehugger Robot // be in a state that needs to be preserved.
148*16467b97STreehugger Robot //
149*16467b97STreehugger Robot recognizer->state = state;
150*16467b97STreehugger Robot }
151*16467b97STreehugger Robot
152*16467b97STreehugger Robot // Install the BR API
153*16467b97STreehugger Robot //
154*16467b97STreehugger Robot recognizer->alreadyParsedRule = alreadyParsedRule;
155*16467b97STreehugger Robot recognizer->beginResync = beginResync;
156*16467b97STreehugger Robot recognizer->combineFollows = combineFollows;
157*16467b97STreehugger Robot recognizer->beginBacktrack = beginBacktrack;
158*16467b97STreehugger Robot recognizer->endBacktrack = endBacktrack;
159*16467b97STreehugger Robot recognizer->computeCSRuleFollow = computeCSRuleFollow;
160*16467b97STreehugger Robot recognizer->computeErrorRecoverySet = computeErrorRecoverySet;
161*16467b97STreehugger Robot recognizer->consumeUntil = consumeUntil;
162*16467b97STreehugger Robot recognizer->consumeUntilSet = consumeUntilSet;
163*16467b97STreehugger Robot recognizer->displayRecognitionError = displayRecognitionError;
164*16467b97STreehugger Robot recognizer->endResync = endResync;
165*16467b97STreehugger Robot recognizer->exConstruct = antlr3MTExceptionNew;
166*16467b97STreehugger Robot recognizer->getRuleInvocationStack = getRuleInvocationStack;
167*16467b97STreehugger Robot recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed;
168*16467b97STreehugger Robot recognizer->getRuleMemoization = getRuleMemoization;
169*16467b97STreehugger Robot recognizer->match = match;
170*16467b97STreehugger Robot recognizer->matchAny = matchAny;
171*16467b97STreehugger Robot recognizer->memoize = memoize;
172*16467b97STreehugger Robot recognizer->mismatch = mismatch;
173*16467b97STreehugger Robot recognizer->mismatchIsUnwantedToken = mismatchIsUnwantedToken;
174*16467b97STreehugger Robot recognizer->mismatchIsMissingToken = mismatchIsMissingToken;
175*16467b97STreehugger Robot recognizer->recover = recover;
176*16467b97STreehugger Robot recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement;
177*16467b97STreehugger Robot recognizer->recoverFromMismatchedSet = recoverFromMismatchedSet;
178*16467b97STreehugger Robot recognizer->recoverFromMismatchedToken = recoverFromMismatchedToken;
179*16467b97STreehugger Robot recognizer->getNumberOfSyntaxErrors = getNumberOfSyntaxErrors;
180*16467b97STreehugger Robot recognizer->reportError = reportError;
181*16467b97STreehugger Robot recognizer->reset = reset;
182*16467b97STreehugger Robot recognizer->synpred = synpred;
183*16467b97STreehugger Robot recognizer->toStrings = toStrings;
184*16467b97STreehugger Robot recognizer->getCurrentInputSymbol = getCurrentInputSymbol;
185*16467b97STreehugger Robot recognizer->getMissingSymbol = getMissingSymbol;
186*16467b97STreehugger Robot recognizer->debugger = NULL;
187*16467b97STreehugger Robot
188*16467b97STreehugger Robot recognizer->free = freeBR;
189*16467b97STreehugger Robot
190*16467b97STreehugger Robot /* Initialize variables
191*16467b97STreehugger Robot */
192*16467b97STreehugger Robot recognizer->type = type;
193*16467b97STreehugger Robot
194*16467b97STreehugger Robot
195*16467b97STreehugger Robot return recognizer;
196*16467b97STreehugger Robot }
197*16467b97STreehugger Robot static void
freeBR(pANTLR3_BASE_RECOGNIZER recognizer)198*16467b97STreehugger Robot freeBR (pANTLR3_BASE_RECOGNIZER recognizer)
199*16467b97STreehugger Robot {
200*16467b97STreehugger Robot pANTLR3_EXCEPTION thisE;
201*16467b97STreehugger Robot
202*16467b97STreehugger Robot // Did we have a state allocated?
203*16467b97STreehugger Robot //
204*16467b97STreehugger Robot if (recognizer->state != NULL)
205*16467b97STreehugger Robot {
206*16467b97STreehugger Robot // Free any rule memoization we set up
207*16467b97STreehugger Robot //
208*16467b97STreehugger Robot if (recognizer->state->ruleMemo != NULL)
209*16467b97STreehugger Robot {
210*16467b97STreehugger Robot recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
211*16467b97STreehugger Robot recognizer->state->ruleMemo = NULL;
212*16467b97STreehugger Robot }
213*16467b97STreehugger Robot
214*16467b97STreehugger Robot // Free any exception space we have left around
215*16467b97STreehugger Robot //
216*16467b97STreehugger Robot thisE = recognizer->state->exception;
217*16467b97STreehugger Robot if (thisE != NULL)
218*16467b97STreehugger Robot {
219*16467b97STreehugger Robot thisE->freeEx(thisE);
220*16467b97STreehugger Robot }
221*16467b97STreehugger Robot
222*16467b97STreehugger Robot // Free any rewrite streams we have allocated
223*16467b97STreehugger Robot //
224*16467b97STreehugger Robot if (recognizer->state->rStreams != NULL)
225*16467b97STreehugger Robot {
226*16467b97STreehugger Robot recognizer->state->rStreams->free(recognizer->state->rStreams);
227*16467b97STreehugger Robot }
228*16467b97STreehugger Robot
229*16467b97STreehugger Robot // Free up any token factory we created (error recovery for instance)
230*16467b97STreehugger Robot //
231*16467b97STreehugger Robot if (recognizer->state->tokFactory != NULL)
232*16467b97STreehugger Robot {
233*16467b97STreehugger Robot recognizer->state->tokFactory->close(recognizer->state->tokFactory);
234*16467b97STreehugger Robot }
235*16467b97STreehugger Robot // Free the shared state memory
236*16467b97STreehugger Robot //
237*16467b97STreehugger Robot ANTLR3_FREE(recognizer->state);
238*16467b97STreehugger Robot }
239*16467b97STreehugger Robot
240*16467b97STreehugger Robot // Free the actual recognizer space
241*16467b97STreehugger Robot //
242*16467b97STreehugger Robot ANTLR3_FREE(recognizer);
243*16467b97STreehugger Robot }
244*16467b97STreehugger Robot
245*16467b97STreehugger Robot /**
246*16467b97STreehugger Robot * Creates a new Mismatched Token Exception and inserts in the recognizer
247*16467b97STreehugger Robot * exception stack.
248*16467b97STreehugger Robot *
249*16467b97STreehugger Robot * \param recognizer
250*16467b97STreehugger Robot * Context pointer for this recognizer
251*16467b97STreehugger Robot *
252*16467b97STreehugger Robot */
253*16467b97STreehugger Robot ANTLR3_API void
antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)254*16467b97STreehugger Robot antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
255*16467b97STreehugger Robot {
256*16467b97STreehugger Robot /* Create a basic recognition exception structure
257*16467b97STreehugger Robot */
258*16467b97STreehugger Robot antlr3RecognitionExceptionNew(recognizer);
259*16467b97STreehugger Robot
260*16467b97STreehugger Robot /* Now update it to indicate this is a Mismatched token exception
261*16467b97STreehugger Robot */
262*16467b97STreehugger Robot recognizer->state->exception->name = ANTLR3_MISMATCHED_EX_NAME;
263*16467b97STreehugger Robot recognizer->state->exception->type = ANTLR3_MISMATCHED_TOKEN_EXCEPTION;
264*16467b97STreehugger Robot
265*16467b97STreehugger Robot return;
266*16467b97STreehugger Robot }
267*16467b97STreehugger Robot
268*16467b97STreehugger Robot ANTLR3_API void
antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)269*16467b97STreehugger Robot antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
270*16467b97STreehugger Robot {
271*16467b97STreehugger Robot pANTLR3_EXCEPTION ex;
272*16467b97STreehugger Robot pANTLR3_LEXER lexer;
273*16467b97STreehugger Robot pANTLR3_PARSER parser;
274*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
275*16467b97STreehugger Robot
276*16467b97STreehugger Robot pANTLR3_INPUT_STREAM ins;
277*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
278*16467b97STreehugger Robot pANTLR3_COMMON_TOKEN_STREAM cts;
279*16467b97STreehugger Robot pANTLR3_TREE_NODE_STREAM tns;
280*16467b97STreehugger Robot
281*16467b97STreehugger Robot ins = NULL;
282*16467b97STreehugger Robot cts = NULL;
283*16467b97STreehugger Robot tns = NULL;
284*16467b97STreehugger Robot is = NULL;
285*16467b97STreehugger Robot lexer = NULL;
286*16467b97STreehugger Robot parser = NULL;
287*16467b97STreehugger Robot tparser = NULL;
288*16467b97STreehugger Robot
289*16467b97STreehugger Robot switch (recognizer->type)
290*16467b97STreehugger Robot {
291*16467b97STreehugger Robot case ANTLR3_TYPE_LEXER:
292*16467b97STreehugger Robot
293*16467b97STreehugger Robot lexer = (pANTLR3_LEXER) (recognizer->super);
294*16467b97STreehugger Robot ins = lexer->input;
295*16467b97STreehugger Robot is = ins->istream;
296*16467b97STreehugger Robot
297*16467b97STreehugger Robot break;
298*16467b97STreehugger Robot
299*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
300*16467b97STreehugger Robot
301*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
302*16467b97STreehugger Robot cts = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super);
303*16467b97STreehugger Robot is = parser->tstream->istream;
304*16467b97STreehugger Robot
305*16467b97STreehugger Robot break;
306*16467b97STreehugger Robot
307*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
308*16467b97STreehugger Robot
309*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
310*16467b97STreehugger Robot tns = tparser->ctnstream->tnstream;
311*16467b97STreehugger Robot is = tns->istream;
312*16467b97STreehugger Robot
313*16467b97STreehugger Robot break;
314*16467b97STreehugger Robot
315*16467b97STreehugger Robot default:
316*16467b97STreehugger Robot
317*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n");
318*16467b97STreehugger Robot return;
319*16467b97STreehugger Robot
320*16467b97STreehugger Robot break;
321*16467b97STreehugger Robot }
322*16467b97STreehugger Robot
323*16467b97STreehugger Robot /* Create a basic exception structure
324*16467b97STreehugger Robot */
325*16467b97STreehugger Robot ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION,
326*16467b97STreehugger Robot (void *)ANTLR3_RECOGNITION_EX_NAME,
327*16467b97STreehugger Robot NULL,
328*16467b97STreehugger Robot ANTLR3_FALSE);
329*16467b97STreehugger Robot
330*16467b97STreehugger Robot /* Rest of information depends on the base type of the
331*16467b97STreehugger Robot * input stream.
332*16467b97STreehugger Robot */
333*16467b97STreehugger Robot switch (is->type & ANTLR3_INPUT_MASK)
334*16467b97STreehugger Robot {
335*16467b97STreehugger Robot case ANTLR3_CHARSTREAM:
336*16467b97STreehugger Robot
337*16467b97STreehugger Robot ex->c = is->_LA (is, 1); /* Current input character */
338*16467b97STreehugger Robot ex->line = ins->getLine (ins); /* Line number comes from stream */
339*16467b97STreehugger Robot ex->charPositionInLine = ins->getCharPositionInLine (ins); /* Line offset also comes from the stream */
340*16467b97STreehugger Robot ex->index = is->index (is);
341*16467b97STreehugger Robot ex->streamName = ins->fileName;
342*16467b97STreehugger Robot ex->message = "Unexpected character";
343*16467b97STreehugger Robot break;
344*16467b97STreehugger Robot
345*16467b97STreehugger Robot case ANTLR3_TOKENSTREAM:
346*16467b97STreehugger Robot
347*16467b97STreehugger Robot ex->token = cts->tstream->_LT (cts->tstream, 1); /* Current input token */
348*16467b97STreehugger Robot ex->line = ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine ((pANTLR3_COMMON_TOKEN)(ex->token));
349*16467b97STreehugger Robot ex->charPositionInLine = ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine ((pANTLR3_COMMON_TOKEN)(ex->token));
350*16467b97STreehugger Robot ex->index = cts->tstream->istream->index (cts->tstream->istream);
351*16467b97STreehugger Robot if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
352*16467b97STreehugger Robot {
353*16467b97STreehugger Robot ex->streamName = NULL;
354*16467b97STreehugger Robot }
355*16467b97STreehugger Robot else
356*16467b97STreehugger Robot {
357*16467b97STreehugger Robot ex->streamName = ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName;
358*16467b97STreehugger Robot }
359*16467b97STreehugger Robot ex->message = "Unexpected token";
360*16467b97STreehugger Robot break;
361*16467b97STreehugger Robot
362*16467b97STreehugger Robot case ANTLR3_COMMONTREENODE:
363*16467b97STreehugger Robot
364*16467b97STreehugger Robot ex->token = tns->_LT (tns, 1); /* Current input tree node */
365*16467b97STreehugger Robot ex->line = ((pANTLR3_BASE_TREE)(ex->token))->getLine ((pANTLR3_BASE_TREE)(ex->token));
366*16467b97STreehugger Robot ex->charPositionInLine = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine ((pANTLR3_BASE_TREE)(ex->token));
367*16467b97STreehugger Robot ex->index = tns->istream->index (tns->istream);
368*16467b97STreehugger Robot
369*16467b97STreehugger Robot // Are you ready for this? Deep breath now...
370*16467b97STreehugger Robot //
371*16467b97STreehugger Robot {
372*16467b97STreehugger Robot pANTLR3_COMMON_TREE tnode;
373*16467b97STreehugger Robot
374*16467b97STreehugger Robot tnode = ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super));
375*16467b97STreehugger Robot
376*16467b97STreehugger Robot if (tnode->token == NULL)
377*16467b97STreehugger Robot {
378*16467b97STreehugger Robot ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-");
379*16467b97STreehugger Robot }
380*16467b97STreehugger Robot else
381*16467b97STreehugger Robot {
382*16467b97STreehugger Robot if (tnode->token->input == NULL)
383*16467b97STreehugger Robot {
384*16467b97STreehugger Robot ex->streamName = NULL;
385*16467b97STreehugger Robot }
386*16467b97STreehugger Robot else
387*16467b97STreehugger Robot {
388*16467b97STreehugger Robot ex->streamName = tnode->token->input->fileName;
389*16467b97STreehugger Robot }
390*16467b97STreehugger Robot }
391*16467b97STreehugger Robot ex->message = "Unexpected node";
392*16467b97STreehugger Robot }
393*16467b97STreehugger Robot break;
394*16467b97STreehugger Robot }
395*16467b97STreehugger Robot
396*16467b97STreehugger Robot ex->input = is;
397*16467b97STreehugger Robot ex->nextException = recognizer->state->exception; /* So we don't leak the memory */
398*16467b97STreehugger Robot recognizer->state->exception = ex;
399*16467b97STreehugger Robot recognizer->state->error = ANTLR3_TRUE; /* Exception is outstanding */
400*16467b97STreehugger Robot
401*16467b97STreehugger Robot return;
402*16467b97STreehugger Robot }
403*16467b97STreehugger Robot
404*16467b97STreehugger Robot
405*16467b97STreehugger Robot /// Match current input symbol against ttype. Upon error, do one token
406*16467b97STreehugger Robot /// insertion or deletion if possible.
407*16467b97STreehugger Robot /// To turn off single token insertion or deletion error
408*16467b97STreehugger Robot /// recovery, override mismatchRecover() and have it call
409*16467b97STreehugger Robot /// plain mismatch(), which does not recover. Then any error
410*16467b97STreehugger Robot /// in a rule will cause an exception and immediate exit from
411*16467b97STreehugger Robot /// rule. Rule would recover by resynchronizing to the set of
412*16467b97STreehugger Robot /// symbols that can follow rule ref.
413*16467b97STreehugger Robot ///
414*16467b97STreehugger Robot static void *
match(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 ttype,pANTLR3_BITSET_LIST follow)415*16467b97STreehugger Robot match( pANTLR3_BASE_RECOGNIZER recognizer,
416*16467b97STreehugger Robot ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
417*16467b97STreehugger Robot {
418*16467b97STreehugger Robot pANTLR3_PARSER parser;
419*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
420*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
421*16467b97STreehugger Robot void * matchedSymbol;
422*16467b97STreehugger Robot
423*16467b97STreehugger Robot switch (recognizer->type)
424*16467b97STreehugger Robot {
425*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
426*16467b97STreehugger Robot
427*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
428*16467b97STreehugger Robot tparser = NULL;
429*16467b97STreehugger Robot is = parser->tstream->istream;
430*16467b97STreehugger Robot
431*16467b97STreehugger Robot break;
432*16467b97STreehugger Robot
433*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
434*16467b97STreehugger Robot
435*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
436*16467b97STreehugger Robot parser = NULL;
437*16467b97STreehugger Robot is = tparser->ctnstream->tnstream->istream;
438*16467b97STreehugger Robot
439*16467b97STreehugger Robot break;
440*16467b97STreehugger Robot
441*16467b97STreehugger Robot default:
442*16467b97STreehugger Robot
443*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n");
444*16467b97STreehugger Robot return ANTLR3_FALSE;
445*16467b97STreehugger Robot
446*16467b97STreehugger Robot break;
447*16467b97STreehugger Robot }
448*16467b97STreehugger Robot
449*16467b97STreehugger Robot // Pick up the current input token/node for assignment to labels
450*16467b97STreehugger Robot //
451*16467b97STreehugger Robot matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
452*16467b97STreehugger Robot
453*16467b97STreehugger Robot if (is->_LA(is, 1) == ttype)
454*16467b97STreehugger Robot {
455*16467b97STreehugger Robot // The token was the one we were told to expect
456*16467b97STreehugger Robot //
457*16467b97STreehugger Robot is->consume(is); // Consume that token from the stream
458*16467b97STreehugger Robot recognizer->state->errorRecovery = ANTLR3_FALSE; // Not in error recovery now (if we were)
459*16467b97STreehugger Robot recognizer->state->failed = ANTLR3_FALSE; // The match was a success
460*16467b97STreehugger Robot return matchedSymbol; // We are done
461*16467b97STreehugger Robot }
462*16467b97STreehugger Robot
463*16467b97STreehugger Robot // We did not find the expected token type, if we are backtracking then
464*16467b97STreehugger Robot // we just set the failed flag and return.
465*16467b97STreehugger Robot //
466*16467b97STreehugger Robot if (recognizer->state->backtracking > 0)
467*16467b97STreehugger Robot {
468*16467b97STreehugger Robot // Backtracking is going on
469*16467b97STreehugger Robot //
470*16467b97STreehugger Robot recognizer->state->failed = ANTLR3_TRUE;
471*16467b97STreehugger Robot return matchedSymbol;
472*16467b97STreehugger Robot }
473*16467b97STreehugger Robot
474*16467b97STreehugger Robot // We did not find the expected token and there is no backtracking
475*16467b97STreehugger Robot // going on, so we mismatch, which creates an exception in the recognizer exception
476*16467b97STreehugger Robot // stack.
477*16467b97STreehugger Robot //
478*16467b97STreehugger Robot matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);
479*16467b97STreehugger Robot return matchedSymbol;
480*16467b97STreehugger Robot }
481*16467b97STreehugger Robot
482*16467b97STreehugger Robot /// Consumes the next token, whatever it is, and resets the recognizer state
483*16467b97STreehugger Robot /// so that it is not in error.
484*16467b97STreehugger Robot ///
485*16467b97STreehugger Robot /// \param recognizer
486*16467b97STreehugger Robot /// Recognizer context pointer
487*16467b97STreehugger Robot ///
488*16467b97STreehugger Robot static void
matchAny(pANTLR3_BASE_RECOGNIZER recognizer)489*16467b97STreehugger Robot matchAny(pANTLR3_BASE_RECOGNIZER recognizer)
490*16467b97STreehugger Robot {
491*16467b97STreehugger Robot pANTLR3_PARSER parser;
492*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
493*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
494*16467b97STreehugger Robot
495*16467b97STreehugger Robot switch (recognizer->type)
496*16467b97STreehugger Robot {
497*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
498*16467b97STreehugger Robot
499*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
500*16467b97STreehugger Robot tparser = NULL;
501*16467b97STreehugger Robot is = parser->tstream->istream;
502*16467b97STreehugger Robot
503*16467b97STreehugger Robot break;
504*16467b97STreehugger Robot
505*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
506*16467b97STreehugger Robot
507*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
508*16467b97STreehugger Robot parser = NULL;
509*16467b97STreehugger Robot is = tparser->ctnstream->tnstream->istream;
510*16467b97STreehugger Robot
511*16467b97STreehugger Robot break;
512*16467b97STreehugger Robot
513*16467b97STreehugger Robot default:
514*16467b97STreehugger Robot
515*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n");
516*16467b97STreehugger Robot return;
517*16467b97STreehugger Robot
518*16467b97STreehugger Robot break;
519*16467b97STreehugger Robot }
520*16467b97STreehugger Robot recognizer->state->errorRecovery = ANTLR3_FALSE;
521*16467b97STreehugger Robot recognizer->state->failed = ANTLR3_FALSE;
522*16467b97STreehugger Robot is->consume(is);
523*16467b97STreehugger Robot
524*16467b97STreehugger Robot return;
525*16467b97STreehugger Robot }
526*16467b97STreehugger Robot ///
527*16467b97STreehugger Robot ///
528*16467b97STreehugger Robot static ANTLR3_BOOLEAN
mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM is,ANTLR3_UINT32 ttype)529*16467b97STreehugger Robot mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype)
530*16467b97STreehugger Robot {
531*16467b97STreehugger Robot ANTLR3_UINT32 nextt;
532*16467b97STreehugger Robot
533*16467b97STreehugger Robot nextt = is->_LA(is, 2);
534*16467b97STreehugger Robot
535*16467b97STreehugger Robot if (nextt == ttype)
536*16467b97STreehugger Robot {
537*16467b97STreehugger Robot if (recognizer->state->exception != NULL)
538*16467b97STreehugger Robot {
539*16467b97STreehugger Robot recognizer->state->exception->expecting = nextt;
540*16467b97STreehugger Robot }
541*16467b97STreehugger Robot return ANTLR3_TRUE; // This token is unknown, but the next one is the one we wanted
542*16467b97STreehugger Robot }
543*16467b97STreehugger Robot else
544*16467b97STreehugger Robot {
545*16467b97STreehugger Robot return ANTLR3_FALSE; // Neither this token, nor the one following is the one we wanted
546*16467b97STreehugger Robot }
547*16467b97STreehugger Robot }
548*16467b97STreehugger Robot
549*16467b97STreehugger Robot ///
550*16467b97STreehugger Robot ///
551*16467b97STreehugger Robot static ANTLR3_BOOLEAN
mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM is,pANTLR3_BITSET_LIST follow)552*16467b97STreehugger Robot mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow)
553*16467b97STreehugger Robot {
554*16467b97STreehugger Robot ANTLR3_BOOLEAN retcode;
555*16467b97STreehugger Robot pANTLR3_BITSET followClone;
556*16467b97STreehugger Robot pANTLR3_BITSET viableTokensFollowingThisRule;
557*16467b97STreehugger Robot
558*16467b97STreehugger Robot if (follow == NULL)
559*16467b97STreehugger Robot {
560*16467b97STreehugger Robot // There is no information about the tokens that can follow the last one
561*16467b97STreehugger Robot // hence we must say that the current one we found is not a member of the
562*16467b97STreehugger Robot // follow set and does not indicate a missing token. We will just consume this
563*16467b97STreehugger Robot // single token and see if the parser works it out from there.
564*16467b97STreehugger Robot //
565*16467b97STreehugger Robot return ANTLR3_FALSE;
566*16467b97STreehugger Robot }
567*16467b97STreehugger Robot
568*16467b97STreehugger Robot followClone = NULL;
569*16467b97STreehugger Robot viableTokensFollowingThisRule = NULL;
570*16467b97STreehugger Robot
571*16467b97STreehugger Robot // The C bitset maps are laid down at compile time by the
572*16467b97STreehugger Robot // C code generation. Hence we cannot remove things from them
573*16467b97STreehugger Robot // and so on. So, in order to remove EOR (if we need to) then
574*16467b97STreehugger Robot // we clone the static bitset.
575*16467b97STreehugger Robot //
576*16467b97STreehugger Robot followClone = antlr3BitsetLoad(follow);
577*16467b97STreehugger Robot if (followClone == NULL)
578*16467b97STreehugger Robot {
579*16467b97STreehugger Robot return ANTLR3_FALSE;
580*16467b97STreehugger Robot }
581*16467b97STreehugger Robot
582*16467b97STreehugger Robot // Compute what can follow this grammar reference
583*16467b97STreehugger Robot //
584*16467b97STreehugger Robot if (followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE))
585*16467b97STreehugger Robot {
586*16467b97STreehugger Robot // EOR can follow, but if we are not the start symbol, we
587*16467b97STreehugger Robot // need to remove it.
588*16467b97STreehugger Robot //
589*16467b97STreehugger Robot //if (recognizer->state->following->vector->count >= 0) ml: always true
590*16467b97STreehugger Robot {
591*16467b97STreehugger Robot followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE);
592*16467b97STreehugger Robot }
593*16467b97STreehugger Robot
594*16467b97STreehugger Robot // Now compute the visiable tokens that can follow this rule, according to context
595*16467b97STreehugger Robot // and make them part of the follow set.
596*16467b97STreehugger Robot //
597*16467b97STreehugger Robot viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer);
598*16467b97STreehugger Robot followClone->borInPlace(followClone, viableTokensFollowingThisRule);
599*16467b97STreehugger Robot }
600*16467b97STreehugger Robot
601*16467b97STreehugger Robot /// if current token is consistent with what could come after set
602*16467b97STreehugger Robot /// then we know we're missing a token; error recovery is free to
603*16467b97STreehugger Robot /// "insert" the missing token
604*16467b97STreehugger Robot ///
605*16467b97STreehugger Robot /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
606*16467b97STreehugger Robot /// in follow set to indicate that the fall of the start symbol is
607*16467b97STreehugger Robot /// in the set (EOF can follow).
608*16467b97STreehugger Robot ///
609*16467b97STreehugger Robot if ( followClone->isMember(followClone, is->_LA(is, 1))
610*16467b97STreehugger Robot || followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)
611*16467b97STreehugger Robot )
612*16467b97STreehugger Robot {
613*16467b97STreehugger Robot retcode = ANTLR3_TRUE;
614*16467b97STreehugger Robot }
615*16467b97STreehugger Robot else
616*16467b97STreehugger Robot {
617*16467b97STreehugger Robot retcode = ANTLR3_FALSE;
618*16467b97STreehugger Robot }
619*16467b97STreehugger Robot
620*16467b97STreehugger Robot if (viableTokensFollowingThisRule != NULL)
621*16467b97STreehugger Robot {
622*16467b97STreehugger Robot viableTokensFollowingThisRule->free(viableTokensFollowingThisRule);
623*16467b97STreehugger Robot }
624*16467b97STreehugger Robot if (followClone != NULL)
625*16467b97STreehugger Robot {
626*16467b97STreehugger Robot followClone->free(followClone);
627*16467b97STreehugger Robot }
628*16467b97STreehugger Robot
629*16467b97STreehugger Robot return retcode;
630*16467b97STreehugger Robot
631*16467b97STreehugger Robot }
632*16467b97STreehugger Robot
633*16467b97STreehugger Robot /// Factor out what to do upon token mismatch so tree parsers can behave
634*16467b97STreehugger Robot /// differently. Override and call mismatchRecover(input, ttype, follow)
635*16467b97STreehugger Robot /// to get single token insertion and deletion. Use this to turn off
636*16467b97STreehugger Robot /// single token insertion and deletion. Override mismatchRecover
637*16467b97STreehugger Robot /// to call this instead.
638*16467b97STreehugger Robot ///
639*16467b97STreehugger Robot /// \remark mismatch only works for parsers and must be overridden for anything else.
640*16467b97STreehugger Robot ///
641*16467b97STreehugger Robot static void
mismatch(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 ttype,pANTLR3_BITSET_LIST follow)642*16467b97STreehugger Robot mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
643*16467b97STreehugger Robot {
644*16467b97STreehugger Robot pANTLR3_PARSER parser;
645*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
646*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
647*16467b97STreehugger Robot
648*16467b97STreehugger Robot // Install a mismatched token exception in the exception stack
649*16467b97STreehugger Robot //
650*16467b97STreehugger Robot antlr3MTExceptionNew(recognizer);
651*16467b97STreehugger Robot recognizer->state->exception->expecting = ttype;
652*16467b97STreehugger Robot
653*16467b97STreehugger Robot switch (recognizer->type)
654*16467b97STreehugger Robot {
655*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
656*16467b97STreehugger Robot
657*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
658*16467b97STreehugger Robot tparser = NULL;
659*16467b97STreehugger Robot is = parser->tstream->istream;
660*16467b97STreehugger Robot
661*16467b97STreehugger Robot break;
662*16467b97STreehugger Robot
663*16467b97STreehugger Robot default:
664*16467b97STreehugger Robot
665*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n");
666*16467b97STreehugger Robot return;
667*16467b97STreehugger Robot
668*16467b97STreehugger Robot break;
669*16467b97STreehugger Robot }
670*16467b97STreehugger Robot
671*16467b97STreehugger Robot if (mismatchIsUnwantedToken(recognizer, is, ttype))
672*16467b97STreehugger Robot {
673*16467b97STreehugger Robot // Create a basic recognition exception structure
674*16467b97STreehugger Robot //
675*16467b97STreehugger Robot antlr3RecognitionExceptionNew(recognizer);
676*16467b97STreehugger Robot
677*16467b97STreehugger Robot // Now update it to indicate this is an unwanted token exception
678*16467b97STreehugger Robot //
679*16467b97STreehugger Robot recognizer->state->exception->name = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
680*16467b97STreehugger Robot recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
681*16467b97STreehugger Robot
682*16467b97STreehugger Robot return;
683*16467b97STreehugger Robot }
684*16467b97STreehugger Robot
685*16467b97STreehugger Robot if (mismatchIsMissingToken(recognizer, is, follow))
686*16467b97STreehugger Robot {
687*16467b97STreehugger Robot // Create a basic recognition exception structure
688*16467b97STreehugger Robot //
689*16467b97STreehugger Robot antlr3RecognitionExceptionNew(recognizer);
690*16467b97STreehugger Robot
691*16467b97STreehugger Robot // Now update it to indicate this is an unwanted token exception
692*16467b97STreehugger Robot //
693*16467b97STreehugger Robot recognizer->state->exception->name = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
694*16467b97STreehugger Robot recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
695*16467b97STreehugger Robot
696*16467b97STreehugger Robot return;
697*16467b97STreehugger Robot }
698*16467b97STreehugger Robot
699*16467b97STreehugger Robot // Just a mismatched token is all we can dtermine
700*16467b97STreehugger Robot //
701*16467b97STreehugger Robot antlr3MTExceptionNew(recognizer);
702*16467b97STreehugger Robot
703*16467b97STreehugger Robot return;
704*16467b97STreehugger Robot }
705*16467b97STreehugger Robot /// Report a recognition problem.
706*16467b97STreehugger Robot ///
707*16467b97STreehugger Robot /// This method sets errorRecovery to indicate the parser is recovering
708*16467b97STreehugger Robot /// not parsing. Once in recovery mode, no errors are generated.
709*16467b97STreehugger Robot /// To get out of recovery mode, the parser must successfully match
710*16467b97STreehugger Robot /// a token (after a resync). So it will go:
711*16467b97STreehugger Robot ///
712*16467b97STreehugger Robot /// 1. error occurs
713*16467b97STreehugger Robot /// 2. enter recovery mode, report error
714*16467b97STreehugger Robot /// 3. consume until token found in resynch set
715*16467b97STreehugger Robot /// 4. try to resume parsing
716*16467b97STreehugger Robot /// 5. next match() will reset errorRecovery mode
717*16467b97STreehugger Robot ///
718*16467b97STreehugger Robot /// If you override, make sure to update errorCount if you care about that.
719*16467b97STreehugger Robot ///
720*16467b97STreehugger Robot static void
reportError(pANTLR3_BASE_RECOGNIZER recognizer)721*16467b97STreehugger Robot reportError (pANTLR3_BASE_RECOGNIZER recognizer)
722*16467b97STreehugger Robot {
723*16467b97STreehugger Robot // Invoke the debugger event if there is a debugger listening to us
724*16467b97STreehugger Robot //
725*16467b97STreehugger Robot if (recognizer->debugger != NULL)
726*16467b97STreehugger Robot {
727*16467b97STreehugger Robot recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception);
728*16467b97STreehugger Robot }
729*16467b97STreehugger Robot
730*16467b97STreehugger Robot if (recognizer->state->errorRecovery == ANTLR3_TRUE)
731*16467b97STreehugger Robot {
732*16467b97STreehugger Robot // Already in error recovery so don't display another error while doing so
733*16467b97STreehugger Robot //
734*16467b97STreehugger Robot return;
735*16467b97STreehugger Robot }
736*16467b97STreehugger Robot
737*16467b97STreehugger Robot // Signal we are in error recovery now
738*16467b97STreehugger Robot //
739*16467b97STreehugger Robot recognizer->state->errorRecovery = ANTLR3_TRUE;
740*16467b97STreehugger Robot
741*16467b97STreehugger Robot // Indicate this recognizer had an error while processing.
742*16467b97STreehugger Robot //
743*16467b97STreehugger Robot recognizer->state->errorCount++;
744*16467b97STreehugger Robot
745*16467b97STreehugger Robot // Call the error display routine
746*16467b97STreehugger Robot //
747*16467b97STreehugger Robot recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
748*16467b97STreehugger Robot }
749*16467b97STreehugger Robot
750*16467b97STreehugger Robot static void
beginBacktrack(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 level)751*16467b97STreehugger Robot beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level)
752*16467b97STreehugger Robot {
753*16467b97STreehugger Robot if (recognizer->debugger != NULL)
754*16467b97STreehugger Robot {
755*16467b97STreehugger Robot recognizer->debugger->beginBacktrack(recognizer->debugger, level);
756*16467b97STreehugger Robot }
757*16467b97STreehugger Robot }
758*16467b97STreehugger Robot
759*16467b97STreehugger Robot static void
endBacktrack(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 level,ANTLR3_BOOLEAN successful)760*16467b97STreehugger Robot endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful)
761*16467b97STreehugger Robot {
762*16467b97STreehugger Robot if (recognizer->debugger != NULL)
763*16467b97STreehugger Robot {
764*16467b97STreehugger Robot recognizer->debugger->endBacktrack(recognizer->debugger, level, successful);
765*16467b97STreehugger Robot }
766*16467b97STreehugger Robot }
767*16467b97STreehugger Robot static void
beginResync(pANTLR3_BASE_RECOGNIZER recognizer)768*16467b97STreehugger Robot beginResync (pANTLR3_BASE_RECOGNIZER recognizer)
769*16467b97STreehugger Robot {
770*16467b97STreehugger Robot if (recognizer->debugger != NULL)
771*16467b97STreehugger Robot {
772*16467b97STreehugger Robot recognizer->debugger->beginResync(recognizer->debugger);
773*16467b97STreehugger Robot }
774*16467b97STreehugger Robot }
775*16467b97STreehugger Robot
776*16467b97STreehugger Robot static void
endResync(pANTLR3_BASE_RECOGNIZER recognizer)777*16467b97STreehugger Robot endResync (pANTLR3_BASE_RECOGNIZER recognizer)
778*16467b97STreehugger Robot {
779*16467b97STreehugger Robot if (recognizer->debugger != NULL)
780*16467b97STreehugger Robot {
781*16467b97STreehugger Robot recognizer->debugger->endResync(recognizer->debugger);
782*16467b97STreehugger Robot }
783*16467b97STreehugger Robot }
784*16467b97STreehugger Robot
785*16467b97STreehugger Robot /// Compute the error recovery set for the current rule.
786*16467b97STreehugger Robot /// Documentation below is from the Java implementation.
787*16467b97STreehugger Robot ///
788*16467b97STreehugger Robot /// During rule invocation, the parser pushes the set of tokens that can
789*16467b97STreehugger Robot /// follow that rule reference on the stack; this amounts to
790*16467b97STreehugger Robot /// computing FIRST of what follows the rule reference in the
791*16467b97STreehugger Robot /// enclosing rule. This local follow set only includes tokens
792*16467b97STreehugger Robot /// from within the rule; i.e., the FIRST computation done by
793*16467b97STreehugger Robot /// ANTLR stops at the end of a rule.
794*16467b97STreehugger Robot //
795*16467b97STreehugger Robot /// EXAMPLE
796*16467b97STreehugger Robot //
797*16467b97STreehugger Robot /// When you find a "no viable alt exception", the input is not
798*16467b97STreehugger Robot /// consistent with any of the alternatives for rule r. The best
799*16467b97STreehugger Robot /// thing to do is to consume tokens until you see something that
800*16467b97STreehugger Robot /// can legally follow a call to r *or* any rule that called r.
801*16467b97STreehugger Robot /// You don't want the exact set of viable next tokens because the
802*16467b97STreehugger Robot /// input might just be missing a token--you might consume the
803*16467b97STreehugger Robot /// rest of the input looking for one of the missing tokens.
804*16467b97STreehugger Robot ///
805*16467b97STreehugger Robot /// Consider grammar:
806*16467b97STreehugger Robot ///
807*16467b97STreehugger Robot /// a : '[' b ']'
808*16467b97STreehugger Robot /// | '(' b ')'
809*16467b97STreehugger Robot /// ;
810*16467b97STreehugger Robot /// b : c '^' INT ;
811*16467b97STreehugger Robot /// c : ID
812*16467b97STreehugger Robot /// | INT
813*16467b97STreehugger Robot /// ;
814*16467b97STreehugger Robot ///
815*16467b97STreehugger Robot /// At each rule invocation, the set of tokens that could follow
816*16467b97STreehugger Robot /// that rule is pushed on a stack. Here are the various "local"
817*16467b97STreehugger Robot /// follow sets:
818*16467b97STreehugger Robot ///
819*16467b97STreehugger Robot /// FOLLOW(b1_in_a) = FIRST(']') = ']'
820*16467b97STreehugger Robot /// FOLLOW(b2_in_a) = FIRST(')') = ')'
821*16467b97STreehugger Robot /// FOLLOW(c_in_b) = FIRST('^') = '^'
822*16467b97STreehugger Robot ///
823*16467b97STreehugger Robot /// Upon erroneous input "[]", the call chain is
824*16467b97STreehugger Robot ///
825*16467b97STreehugger Robot /// a -> b -> c
826*16467b97STreehugger Robot ///
827*16467b97STreehugger Robot /// and, hence, the follow context stack is:
828*16467b97STreehugger Robot ///
829*16467b97STreehugger Robot /// depth local follow set after call to rule
830*16467b97STreehugger Robot /// 0 <EOF> a (from main())
831*16467b97STreehugger Robot /// 1 ']' b
832*16467b97STreehugger Robot /// 3 '^' c
833*16467b97STreehugger Robot ///
834*16467b97STreehugger Robot /// Notice that ')' is not included, because b would have to have
835*16467b97STreehugger Robot /// been called from a different context in rule a for ')' to be
836*16467b97STreehugger Robot /// included.
837*16467b97STreehugger Robot ///
838*16467b97STreehugger Robot /// For error recovery, we cannot consider FOLLOW(c)
839*16467b97STreehugger Robot /// (context-sensitive or otherwise). We need the combined set of
840*16467b97STreehugger Robot /// all context-sensitive FOLLOW sets--the set of all tokens that
841*16467b97STreehugger Robot /// could follow any reference in the call chain. We need to
842*16467b97STreehugger Robot /// resync to one of those tokens. Note that FOLLOW(c)='^' and if
843*16467b97STreehugger Robot /// we resync'd to that token, we'd consume until EOF. We need to
844*16467b97STreehugger Robot /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
845*16467b97STreehugger Robot /// In this case, for input "[]", LA(1) is in this set so we would
846*16467b97STreehugger Robot /// not consume anything and after printing an error rule c would
847*16467b97STreehugger Robot /// return normally. It would not find the required '^' though.
848*16467b97STreehugger Robot /// At this point, it gets a mismatched token error and throws an
849*16467b97STreehugger Robot /// exception (since LA(1) is not in the viable following token
850*16467b97STreehugger Robot /// set). The rule exception handler tries to recover, but finds
851*16467b97STreehugger Robot /// the same recovery set and doesn't consume anything. Rule b
852*16467b97STreehugger Robot /// exits normally returning to rule a. Now it finds the ']' (and
853*16467b97STreehugger Robot /// with the successful match exits errorRecovery mode).
854*16467b97STreehugger Robot ///
855*16467b97STreehugger Robot /// So, you can see that the parser walks up call chain looking
856*16467b97STreehugger Robot /// for the token that was a member of the recovery set.
857*16467b97STreehugger Robot ///
858*16467b97STreehugger Robot /// Errors are not generated in errorRecovery mode.
859*16467b97STreehugger Robot ///
860*16467b97STreehugger Robot /// ANTLR's error recovery mechanism is based upon original ideas:
861*16467b97STreehugger Robot ///
862*16467b97STreehugger Robot /// "Algorithms + Data Structures = Programs" by Niklaus Wirth
863*16467b97STreehugger Robot ///
864*16467b97STreehugger Robot /// and
865*16467b97STreehugger Robot ///
866*16467b97STreehugger Robot /// "A note on error recovery in recursive descent parsers":
867*16467b97STreehugger Robot /// http://portal.acm.org/citation.cfm?id=947902.947905
868*16467b97STreehugger Robot ///
869*16467b97STreehugger Robot /// Later, Josef Grosch had some good ideas:
870*16467b97STreehugger Robot ///
871*16467b97STreehugger Robot /// "Efficient and Comfortable Error Recovery in Recursive Descent
872*16467b97STreehugger Robot /// Parsers":
873*16467b97STreehugger Robot /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
874*16467b97STreehugger Robot ///
875*16467b97STreehugger Robot /// Like Grosch I implemented local FOLLOW sets that are combined
876*16467b97STreehugger Robot /// at run-time upon error to avoid overhead during parsing.
877*16467b97STreehugger Robot ///
878*16467b97STreehugger Robot static pANTLR3_BITSET
computeErrorRecoverySet(pANTLR3_BASE_RECOGNIZER recognizer)879*16467b97STreehugger Robot computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer)
880*16467b97STreehugger Robot {
881*16467b97STreehugger Robot return recognizer->combineFollows(recognizer, ANTLR3_FALSE);
882*16467b97STreehugger Robot }
883*16467b97STreehugger Robot
884*16467b97STreehugger Robot /// Compute the context-sensitive FOLLOW set for current rule.
885*16467b97STreehugger Robot /// Documentation below is from the Java runtime.
886*16467b97STreehugger Robot ///
887*16467b97STreehugger Robot /// This is the set of token types that can follow a specific rule
888*16467b97STreehugger Robot /// reference given a specific call chain. You get the set of
889*16467b97STreehugger Robot /// viable tokens that can possibly come next (look ahead depth 1)
890*16467b97STreehugger Robot /// given the current call chain. Contrast this with the
891*16467b97STreehugger Robot /// definition of plain FOLLOW for rule r:
892*16467b97STreehugger Robot ///
893*16467b97STreehugger Robot /// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
894*16467b97STreehugger Robot ///
895*16467b97STreehugger Robot /// where x in T* and alpha, beta in V*; T is set of terminals and
896*16467b97STreehugger Robot /// V is the set of terminals and non terminals. In other words,
897*16467b97STreehugger Robot /// FOLLOW(r) is the set of all tokens that can possibly follow
898*16467b97STreehugger Robot /// references to r in///any* sentential form (context). At
899*16467b97STreehugger Robot /// runtime, however, we know precisely which context applies as
900*16467b97STreehugger Robot /// we have the call chain. We may compute the exact (rather
901*16467b97STreehugger Robot /// than covering superset) set of following tokens.
902*16467b97STreehugger Robot ///
903*16467b97STreehugger Robot /// For example, consider grammar:
904*16467b97STreehugger Robot ///
905*16467b97STreehugger Robot /// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
906*16467b97STreehugger Robot /// | "return" expr '.'
907*16467b97STreehugger Robot /// ;
908*16467b97STreehugger Robot /// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
909*16467b97STreehugger Robot /// atom : INT // FOLLOW(atom)=={'+',')',';','.'}
910*16467b97STreehugger Robot /// | '(' expr ')'
911*16467b97STreehugger Robot /// ;
912*16467b97STreehugger Robot ///
913*16467b97STreehugger Robot /// The FOLLOW sets are all inclusive whereas context-sensitive
914*16467b97STreehugger Robot /// FOLLOW sets are precisely what could follow a rule reference.
915*16467b97STreehugger Robot /// For input input "i=(3);", here is the derivation:
916*16467b97STreehugger Robot ///
917*16467b97STreehugger Robot /// stat => ID '=' expr ';'
918*16467b97STreehugger Robot /// => ID '=' atom ('+' atom)* ';'
919*16467b97STreehugger Robot /// => ID '=' '(' expr ')' ('+' atom)* ';'
920*16467b97STreehugger Robot /// => ID '=' '(' atom ')' ('+' atom)* ';'
921*16467b97STreehugger Robot /// => ID '=' '(' INT ')' ('+' atom)* ';'
922*16467b97STreehugger Robot /// => ID '=' '(' INT ')' ';'
923*16467b97STreehugger Robot ///
924*16467b97STreehugger Robot /// At the "3" token, you'd have a call chain of
925*16467b97STreehugger Robot ///
926*16467b97STreehugger Robot /// stat -> expr -> atom -> expr -> atom
927*16467b97STreehugger Robot ///
928*16467b97STreehugger Robot /// What can follow that specific nested ref to atom? Exactly ')'
929*16467b97STreehugger Robot /// as you can see by looking at the derivation of this specific
930*16467b97STreehugger Robot /// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
931*16467b97STreehugger Robot ///
932*16467b97STreehugger Robot /// You want the exact viable token set when recovering from a
933*16467b97STreehugger Robot /// token mismatch. Upon token mismatch, if LA(1) is member of
934*16467b97STreehugger Robot /// the viable next token set, then you know there is most likely
935*16467b97STreehugger Robot /// a missing token in the input stream. "Insert" one by just not
936*16467b97STreehugger Robot /// throwing an exception.
937*16467b97STreehugger Robot ///
938*16467b97STreehugger Robot static pANTLR3_BITSET
computeCSRuleFollow(pANTLR3_BASE_RECOGNIZER recognizer)939*16467b97STreehugger Robot computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer)
940*16467b97STreehugger Robot {
941*16467b97STreehugger Robot return recognizer->combineFollows(recognizer, ANTLR3_FALSE);
942*16467b97STreehugger Robot }
943*16467b97STreehugger Robot
944*16467b97STreehugger Robot /// Compute the current followset for the input stream.
945*16467b97STreehugger Robot ///
946*16467b97STreehugger Robot static pANTLR3_BITSET
combineFollows(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_BOOLEAN exact)947*16467b97STreehugger Robot combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact)
948*16467b97STreehugger Robot {
949*16467b97STreehugger Robot pANTLR3_BITSET followSet;
950*16467b97STreehugger Robot pANTLR3_BITSET localFollowSet;
951*16467b97STreehugger Robot ANTLR3_UINT32 top;
952*16467b97STreehugger Robot ANTLR3_UINT32 i;
953*16467b97STreehugger Robot
954*16467b97STreehugger Robot top = recognizer->state->following->size(recognizer->state->following);
955*16467b97STreehugger Robot
956*16467b97STreehugger Robot followSet = antlr3BitsetNew(0);
957*16467b97STreehugger Robot localFollowSet = NULL;
958*16467b97STreehugger Robot
959*16467b97STreehugger Robot for (i = top; i>0; i--)
960*16467b97STreehugger Robot {
961*16467b97STreehugger Robot localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) recognizer->state->following->get(recognizer->state->following, i-1));
962*16467b97STreehugger Robot
963*16467b97STreehugger Robot if (localFollowSet != NULL)
964*16467b97STreehugger Robot {
965*16467b97STreehugger Robot followSet->borInPlace(followSet, localFollowSet);
966*16467b97STreehugger Robot
967*16467b97STreehugger Robot if (exact == ANTLR3_TRUE)
968*16467b97STreehugger Robot {
969*16467b97STreehugger Robot if (localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE)
970*16467b97STreehugger Robot {
971*16467b97STreehugger Robot // Only leave EOR in the set if at top (start rule); this lets us know
972*16467b97STreehugger Robot // if we have to include the follow(start rule); I.E., EOF
973*16467b97STreehugger Robot //
974*16467b97STreehugger Robot if (i>1)
975*16467b97STreehugger Robot {
976*16467b97STreehugger Robot followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE);
977*16467b97STreehugger Robot }
978*16467b97STreehugger Robot }
979*16467b97STreehugger Robot else
980*16467b97STreehugger Robot {
981*16467b97STreehugger Robot break; // Cannot see End Of Rule from here, just drop out
982*16467b97STreehugger Robot }
983*16467b97STreehugger Robot }
984*16467b97STreehugger Robot localFollowSet->free(localFollowSet);
985*16467b97STreehugger Robot localFollowSet = NULL;
986*16467b97STreehugger Robot }
987*16467b97STreehugger Robot }
988*16467b97STreehugger Robot
989*16467b97STreehugger Robot if (localFollowSet != NULL)
990*16467b97STreehugger Robot {
991*16467b97STreehugger Robot localFollowSet->free(localFollowSet);
992*16467b97STreehugger Robot }
993*16467b97STreehugger Robot return followSet;
994*16467b97STreehugger Robot }
995*16467b97STreehugger Robot
996*16467b97STreehugger Robot /// Standard/Example error display method.
997*16467b97STreehugger Robot /// No generic error message display funciton coudl possibly do everything correctly
998*16467b97STreehugger Robot /// for all possible parsers. Hence you are provided with this example routine, which
999*16467b97STreehugger Robot /// you should override in your parser/tree parser to do as you will.
1000*16467b97STreehugger Robot ///
1001*16467b97STreehugger Robot /// Here we depart somewhat from the Java runtime as that has now split up a lot
1002*16467b97STreehugger Robot /// of the error display routines into spearate units. However, ther is little advantage
1003*16467b97STreehugger Robot /// to this in the C version as you will probably implement all such routines as a
1004*16467b97STreehugger Robot /// separate translation unit, rather than install them all as pointers to functions
1005*16467b97STreehugger Robot /// in the base recognizer.
1006*16467b97STreehugger Robot ///
1007*16467b97STreehugger Robot static void
displayRecognitionError(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_UINT8 * tokenNames)1008*16467b97STreehugger Robot displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
1009*16467b97STreehugger Robot {
1010*16467b97STreehugger Robot pANTLR3_PARSER parser;
1011*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
1012*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
1013*16467b97STreehugger Robot pANTLR3_STRING ttext;
1014*16467b97STreehugger Robot pANTLR3_STRING ftext;
1015*16467b97STreehugger Robot pANTLR3_EXCEPTION ex;
1016*16467b97STreehugger Robot pANTLR3_COMMON_TOKEN theToken;
1017*16467b97STreehugger Robot pANTLR3_BASE_TREE theBaseTree;
1018*16467b97STreehugger Robot pANTLR3_COMMON_TREE theCommonTree;
1019*16467b97STreehugger Robot
1020*16467b97STreehugger Robot // Retrieve some info for easy reading.
1021*16467b97STreehugger Robot //
1022*16467b97STreehugger Robot ex = recognizer->state->exception;
1023*16467b97STreehugger Robot ttext = NULL;
1024*16467b97STreehugger Robot
1025*16467b97STreehugger Robot // See if there is a 'filename' we can use
1026*16467b97STreehugger Robot //
1027*16467b97STreehugger Robot if (ex->streamName == NULL)
1028*16467b97STreehugger Robot {
1029*16467b97STreehugger Robot if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
1030*16467b97STreehugger Robot {
1031*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "-end of input-(");
1032*16467b97STreehugger Robot }
1033*16467b97STreehugger Robot else
1034*16467b97STreehugger Robot {
1035*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "-unknown source-(");
1036*16467b97STreehugger Robot }
1037*16467b97STreehugger Robot }
1038*16467b97STreehugger Robot else
1039*16467b97STreehugger Robot {
1040*16467b97STreehugger Robot ftext = ex->streamName->to8(ex->streamName);
1041*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
1042*16467b97STreehugger Robot }
1043*16467b97STreehugger Robot
1044*16467b97STreehugger Robot // Next comes the line number
1045*16467b97STreehugger Robot //
1046*16467b97STreehugger Robot
1047*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
1048*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : error %d : %s",
1049*16467b97STreehugger Robot recognizer->state->exception->type,
1050*16467b97STreehugger Robot (pANTLR3_UINT8) (recognizer->state->exception->message));
1051*16467b97STreehugger Robot
1052*16467b97STreehugger Robot
1053*16467b97STreehugger Robot // How we determine the next piece is dependent on which thing raised the
1054*16467b97STreehugger Robot // error.
1055*16467b97STreehugger Robot //
1056*16467b97STreehugger Robot switch (recognizer->type)
1057*16467b97STreehugger Robot {
1058*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
1059*16467b97STreehugger Robot
1060*16467b97STreehugger Robot // Prepare the knowledge we know we have
1061*16467b97STreehugger Robot //
1062*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
1063*16467b97STreehugger Robot tparser = NULL;
1064*16467b97STreehugger Robot is = parser->tstream->istream;
1065*16467b97STreehugger Robot theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
1066*16467b97STreehugger Robot ttext = theToken->toString(theToken);
1067*16467b97STreehugger Robot
1068*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine);
1069*16467b97STreehugger Robot if (theToken != NULL)
1070*16467b97STreehugger Robot {
1071*16467b97STreehugger Robot if (theToken->type == ANTLR3_TOKEN_EOF)
1072*16467b97STreehugger Robot {
1073*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, ", at <EOF>");
1074*16467b97STreehugger Robot }
1075*16467b97STreehugger Robot else
1076*16467b97STreehugger Robot {
1077*16467b97STreehugger Robot // Guard against null text in a token
1078*16467b97STreehugger Robot //
1079*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "\n near %s\n ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
1080*16467b97STreehugger Robot }
1081*16467b97STreehugger Robot }
1082*16467b97STreehugger Robot break;
1083*16467b97STreehugger Robot
1084*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
1085*16467b97STreehugger Robot
1086*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1087*16467b97STreehugger Robot parser = NULL;
1088*16467b97STreehugger Robot is = tparser->ctnstream->tnstream->istream;
1089*16467b97STreehugger Robot theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
1090*16467b97STreehugger Robot ttext = theBaseTree->toStringTree(theBaseTree);
1091*16467b97STreehugger Robot
1092*16467b97STreehugger Robot if (theBaseTree != NULL)
1093*16467b97STreehugger Robot {
1094*16467b97STreehugger Robot theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super;
1095*16467b97STreehugger Robot
1096*16467b97STreehugger Robot if (theCommonTree != NULL)
1097*16467b97STreehugger Robot {
1098*16467b97STreehugger Robot theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree);
1099*16467b97STreehugger Robot }
1100*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
1101*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars);
1102*16467b97STreehugger Robot }
1103*16467b97STreehugger Robot break;
1104*16467b97STreehugger Robot
1105*16467b97STreehugger Robot default:
1106*16467b97STreehugger Robot
1107*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n");
1108*16467b97STreehugger Robot return;
1109*16467b97STreehugger Robot break;
1110*16467b97STreehugger Robot }
1111*16467b97STreehugger Robot
1112*16467b97STreehugger Robot // Although this function should generally be provided by the implementation, this one
1113*16467b97STreehugger Robot // should be as helpful as possible for grammar developers and serve as an example
1114*16467b97STreehugger Robot // of what you can do with each exception type. In general, when you make up your
1115*16467b97STreehugger Robot // 'real' handler, you should debug the routine with all possible errors you expect
1116*16467b97STreehugger Robot // which will then let you be as specific as possible about all circumstances.
1117*16467b97STreehugger Robot //
1118*16467b97STreehugger Robot // Note that in the general case, errors thrown by tree parsers indicate a problem
1119*16467b97STreehugger Robot // with the output of the parser or with the tree grammar itself. The job of the parser
1120*16467b97STreehugger Robot // is to produce a perfect (in traversal terms) syntactically correct tree, so errors
1121*16467b97STreehugger Robot // at that stage should really be semantic errors that your own code determines and handles
1122*16467b97STreehugger Robot // in whatever way is appropriate.
1123*16467b97STreehugger Robot //
1124*16467b97STreehugger Robot switch (ex->type)
1125*16467b97STreehugger Robot {
1126*16467b97STreehugger Robot case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
1127*16467b97STreehugger Robot
1128*16467b97STreehugger Robot // Indicates that the recognizer was fed a token which seesm to be
1129*16467b97STreehugger Robot // spurious input. We can detect this when the token that follows
1130*16467b97STreehugger Robot // this unwanted token would normally be part of the syntactically
1131*16467b97STreehugger Robot // correct stream. Then we can see that the token we are looking at
1132*16467b97STreehugger Robot // is just something that should not be there and throw this exception.
1133*16467b97STreehugger Robot //
1134*16467b97STreehugger Robot if (tokenNames == NULL)
1135*16467b97STreehugger Robot {
1136*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : Extraneous input...");
1137*16467b97STreehugger Robot }
1138*16467b97STreehugger Robot else
1139*16467b97STreehugger Robot {
1140*16467b97STreehugger Robot if (ex->expecting == ANTLR3_TOKEN_EOF)
1141*16467b97STreehugger Robot {
1142*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
1143*16467b97STreehugger Robot }
1144*16467b97STreehugger Robot else
1145*16467b97STreehugger Robot {
1146*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
1147*16467b97STreehugger Robot }
1148*16467b97STreehugger Robot }
1149*16467b97STreehugger Robot break;
1150*16467b97STreehugger Robot
1151*16467b97STreehugger Robot case ANTLR3_MISSING_TOKEN_EXCEPTION:
1152*16467b97STreehugger Robot
1153*16467b97STreehugger Robot // Indicates that the recognizer detected that the token we just
1154*16467b97STreehugger Robot // hit would be valid syntactically if preceeded by a particular
1155*16467b97STreehugger Robot // token. Perhaps a missing ';' at line end or a missing ',' in an
1156*16467b97STreehugger Robot // expression list, and such like.
1157*16467b97STreehugger Robot //
1158*16467b97STreehugger Robot if (tokenNames == NULL)
1159*16467b97STreehugger Robot {
1160*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting);
1161*16467b97STreehugger Robot }
1162*16467b97STreehugger Robot else
1163*16467b97STreehugger Robot {
1164*16467b97STreehugger Robot if (ex->expecting == ANTLR3_TOKEN_EOF)
1165*16467b97STreehugger Robot {
1166*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n");
1167*16467b97STreehugger Robot }
1168*16467b97STreehugger Robot else
1169*16467b97STreehugger Robot {
1170*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]);
1171*16467b97STreehugger Robot }
1172*16467b97STreehugger Robot }
1173*16467b97STreehugger Robot break;
1174*16467b97STreehugger Robot
1175*16467b97STreehugger Robot case ANTLR3_RECOGNITION_EXCEPTION:
1176*16467b97STreehugger Robot
1177*16467b97STreehugger Robot // Indicates that the recognizer received a token
1178*16467b97STreehugger Robot // in the input that was not predicted. This is the basic exception type
1179*16467b97STreehugger Robot // from which all others are derived. So we assume it was a syntax error.
1180*16467b97STreehugger Robot // You may get this if there are not more tokens and more are needed
1181*16467b97STreehugger Robot // to complete a parse for instance.
1182*16467b97STreehugger Robot //
1183*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1184*16467b97STreehugger Robot break;
1185*16467b97STreehugger Robot
1186*16467b97STreehugger Robot case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
1187*16467b97STreehugger Robot
1188*16467b97STreehugger Robot // We were expecting to see one thing and got another. This is the
1189*16467b97STreehugger Robot // most common error if we coudl not detect a missing or unwanted token.
1190*16467b97STreehugger Robot // Here you can spend your efforts to
1191*16467b97STreehugger Robot // derive more useful error messages based on the expected
1192*16467b97STreehugger Robot // token set and the last token and so on. The error following
1193*16467b97STreehugger Robot // bitmaps do a good job of reducing the set that we were looking
1194*16467b97STreehugger Robot // for down to something small. Knowing what you are parsing may be
1195*16467b97STreehugger Robot // able to allow you to be even more specific about an error.
1196*16467b97STreehugger Robot //
1197*16467b97STreehugger Robot if (tokenNames == NULL)
1198*16467b97STreehugger Robot {
1199*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1200*16467b97STreehugger Robot }
1201*16467b97STreehugger Robot else
1202*16467b97STreehugger Robot {
1203*16467b97STreehugger Robot if (ex->expecting == ANTLR3_TOKEN_EOF)
1204*16467b97STreehugger Robot {
1205*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
1206*16467b97STreehugger Robot }
1207*16467b97STreehugger Robot else
1208*16467b97STreehugger Robot {
1209*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
1210*16467b97STreehugger Robot }
1211*16467b97STreehugger Robot }
1212*16467b97STreehugger Robot break;
1213*16467b97STreehugger Robot
1214*16467b97STreehugger Robot case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
1215*16467b97STreehugger Robot
1216*16467b97STreehugger Robot // We could not pick any alt decision from the input given
1217*16467b97STreehugger Robot // so god knows what happened - however when you examine your grammar,
1218*16467b97STreehugger Robot // you should. It means that at the point where the current token occurred
1219*16467b97STreehugger Robot // that the DFA indicates nowhere to go from here.
1220*16467b97STreehugger Robot //
1221*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
1222*16467b97STreehugger Robot
1223*16467b97STreehugger Robot break;
1224*16467b97STreehugger Robot
1225*16467b97STreehugger Robot case ANTLR3_MISMATCHED_SET_EXCEPTION:
1226*16467b97STreehugger Robot
1227*16467b97STreehugger Robot {
1228*16467b97STreehugger Robot ANTLR3_UINT32 count;
1229*16467b97STreehugger Robot ANTLR3_UINT32 bit;
1230*16467b97STreehugger Robot ANTLR3_UINT32 size;
1231*16467b97STreehugger Robot ANTLR3_UINT32 numbits;
1232*16467b97STreehugger Robot pANTLR3_BITSET errBits;
1233*16467b97STreehugger Robot
1234*16467b97STreehugger Robot // This means we were able to deal with one of a set of
1235*16467b97STreehugger Robot // possible tokens at this point, but we did not see any
1236*16467b97STreehugger Robot // member of that set.
1237*16467b97STreehugger Robot //
1238*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : unexpected input...\n expected one of : ");
1239*16467b97STreehugger Robot
1240*16467b97STreehugger Robot // What tokens could we have accepted at this point in the
1241*16467b97STreehugger Robot // parse?
1242*16467b97STreehugger Robot //
1243*16467b97STreehugger Robot count = 0;
1244*16467b97STreehugger Robot errBits = antlr3BitsetLoad (ex->expectingSet);
1245*16467b97STreehugger Robot numbits = errBits->numBits (errBits);
1246*16467b97STreehugger Robot size = errBits->size (errBits);
1247*16467b97STreehugger Robot
1248*16467b97STreehugger Robot if (size > 0)
1249*16467b97STreehugger Robot {
1250*16467b97STreehugger Robot // However many tokens we could have dealt with here, it is usually
1251*16467b97STreehugger Robot // not useful to print ALL of the set here. I arbitrarily chose 8
1252*16467b97STreehugger Robot // here, but you should do whatever makes sense for you of course.
1253*16467b97STreehugger Robot // No token number 0, so look for bit 1 and on.
1254*16467b97STreehugger Robot //
1255*16467b97STreehugger Robot for (bit = 1; bit < numbits && count < 8 && count < size; bit++)
1256*16467b97STreehugger Robot {
1257*16467b97STreehugger Robot // TODO: This doesn;t look right - should be asking if the bit is set!!
1258*16467b97STreehugger Robot //
1259*16467b97STreehugger Robot if (tokenNames[bit])
1260*16467b97STreehugger Robot {
1261*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]);
1262*16467b97STreehugger Robot count++;
1263*16467b97STreehugger Robot }
1264*16467b97STreehugger Robot }
1265*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "\n");
1266*16467b97STreehugger Robot }
1267*16467b97STreehugger Robot else
1268*16467b97STreehugger Robot {
1269*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
1270*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
1271*16467b97STreehugger Robot }
1272*16467b97STreehugger Robot }
1273*16467b97STreehugger Robot break;
1274*16467b97STreehugger Robot
1275*16467b97STreehugger Robot case ANTLR3_EARLY_EXIT_EXCEPTION:
1276*16467b97STreehugger Robot
1277*16467b97STreehugger Robot // We entered a loop requiring a number of token sequences
1278*16467b97STreehugger Robot // but found a token that ended that sequence earlier than
1279*16467b97STreehugger Robot // we should have done.
1280*16467b97STreehugger Robot //
1281*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : missing elements...\n");
1282*16467b97STreehugger Robot break;
1283*16467b97STreehugger Robot
1284*16467b97STreehugger Robot default:
1285*16467b97STreehugger Robot
1286*16467b97STreehugger Robot // We don't handle any other exceptions here, but you can
1287*16467b97STreehugger Robot // if you wish. If we get an exception that hits this point
1288*16467b97STreehugger Robot // then we are just going to report what we know about the
1289*16467b97STreehugger Robot // token.
1290*16467b97STreehugger Robot //
1291*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
1292*16467b97STreehugger Robot break;
1293*16467b97STreehugger Robot }
1294*16467b97STreehugger Robot
1295*16467b97STreehugger Robot // Here you have the token that was in error which if this is
1296*16467b97STreehugger Robot // the standard implementation will tell you the line and offset
1297*16467b97STreehugger Robot // and also record the address of the start of the line in the
1298*16467b97STreehugger Robot // input stream. You could therefore print the source line and so on.
1299*16467b97STreehugger Robot // Generally though, I would expect that your lexer/parser will keep
1300*16467b97STreehugger Robot // its own map of lines and source pointers or whatever as there
1301*16467b97STreehugger Robot // are a lot of specific things you need to know about the input
1302*16467b97STreehugger Robot // to do something like that.
1303*16467b97STreehugger Robot // Here is where you do it though :-).
1304*16467b97STreehugger Robot //
1305*16467b97STreehugger Robot }
1306*16467b97STreehugger Robot
1307*16467b97STreehugger Robot /// Return how many syntax errors were detected by this recognizer
1308*16467b97STreehugger Robot ///
1309*16467b97STreehugger Robot static ANTLR3_UINT32
getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)1310*16467b97STreehugger Robot getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)
1311*16467b97STreehugger Robot {
1312*16467b97STreehugger Robot return recognizer->state->errorCount;
1313*16467b97STreehugger Robot }
1314*16467b97STreehugger Robot
1315*16467b97STreehugger Robot /// Recover from an error found on the input stream. Mostly this is
1316*16467b97STreehugger Robot /// NoViableAlt exceptions, but could be a mismatched token that
1317*16467b97STreehugger Robot /// the match() routine could not recover from.
1318*16467b97STreehugger Robot ///
1319*16467b97STreehugger Robot static void
recover(pANTLR3_BASE_RECOGNIZER recognizer)1320*16467b97STreehugger Robot recover (pANTLR3_BASE_RECOGNIZER recognizer)
1321*16467b97STreehugger Robot {
1322*16467b97STreehugger Robot // Used to compute the follow set of tokens
1323*16467b97STreehugger Robot //
1324*16467b97STreehugger Robot pANTLR3_BITSET followSet;
1325*16467b97STreehugger Robot pANTLR3_PARSER parser;
1326*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
1327*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
1328*16467b97STreehugger Robot
1329*16467b97STreehugger Robot switch (recognizer->type)
1330*16467b97STreehugger Robot {
1331*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
1332*16467b97STreehugger Robot
1333*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
1334*16467b97STreehugger Robot tparser = NULL;
1335*16467b97STreehugger Robot is = parser->tstream->istream;
1336*16467b97STreehugger Robot
1337*16467b97STreehugger Robot break;
1338*16467b97STreehugger Robot
1339*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
1340*16467b97STreehugger Robot
1341*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1342*16467b97STreehugger Robot parser = NULL;
1343*16467b97STreehugger Robot is = tparser->ctnstream->tnstream->istream;
1344*16467b97STreehugger Robot
1345*16467b97STreehugger Robot break;
1346*16467b97STreehugger Robot
1347*16467b97STreehugger Robot default:
1348*16467b97STreehugger Robot
1349*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1350*16467b97STreehugger Robot return;
1351*16467b97STreehugger Robot
1352*16467b97STreehugger Robot break;
1353*16467b97STreehugger Robot }
1354*16467b97STreehugger Robot
1355*16467b97STreehugger Robot // Are we about to repeat the same error?
1356*16467b97STreehugger Robot //
1357*16467b97STreehugger Robot if (recognizer->state->lastErrorIndex == is->index(is))
1358*16467b97STreehugger Robot {
1359*16467b97STreehugger Robot // The last error was at the same token index point. This must be a case
1360*16467b97STreehugger Robot // where LT(1) is in the recovery token set so nothing is
1361*16467b97STreehugger Robot // consumed. Consume a single token so at least to prevent
1362*16467b97STreehugger Robot // an infinite loop; this is a failsafe.
1363*16467b97STreehugger Robot //
1364*16467b97STreehugger Robot is->consume(is);
1365*16467b97STreehugger Robot }
1366*16467b97STreehugger Robot
1367*16467b97STreehugger Robot // Record error index position
1368*16467b97STreehugger Robot //
1369*16467b97STreehugger Robot recognizer->state->lastErrorIndex = is->index(is);
1370*16467b97STreehugger Robot
1371*16467b97STreehugger Robot // Work out the follows set for error recovery
1372*16467b97STreehugger Robot //
1373*16467b97STreehugger Robot followSet = recognizer->computeErrorRecoverySet(recognizer);
1374*16467b97STreehugger Robot
1375*16467b97STreehugger Robot // Call resync hook (for debuggers and so on)
1376*16467b97STreehugger Robot //
1377*16467b97STreehugger Robot recognizer->beginResync(recognizer);
1378*16467b97STreehugger Robot
1379*16467b97STreehugger Robot // Consume tokens until we have resynced to something in the follows set
1380*16467b97STreehugger Robot //
1381*16467b97STreehugger Robot recognizer->consumeUntilSet(recognizer, followSet);
1382*16467b97STreehugger Robot
1383*16467b97STreehugger Robot // End resync hook
1384*16467b97STreehugger Robot //
1385*16467b97STreehugger Robot recognizer->endResync(recognizer);
1386*16467b97STreehugger Robot
1387*16467b97STreehugger Robot // Destroy the temporary bitset we produced.
1388*16467b97STreehugger Robot //
1389*16467b97STreehugger Robot followSet->free(followSet);
1390*16467b97STreehugger Robot
1391*16467b97STreehugger Robot // Reset the inError flag so we don't re-report the exception
1392*16467b97STreehugger Robot //
1393*16467b97STreehugger Robot recognizer->state->error = ANTLR3_FALSE;
1394*16467b97STreehugger Robot recognizer->state->failed = ANTLR3_FALSE;
1395*16467b97STreehugger Robot }
1396*16467b97STreehugger Robot
1397*16467b97STreehugger Robot
1398*16467b97STreehugger Robot /// Attempt to recover from a single missing or extra token.
1399*16467b97STreehugger Robot ///
1400*16467b97STreehugger Robot /// EXTRA TOKEN
1401*16467b97STreehugger Robot ///
1402*16467b97STreehugger Robot /// LA(1) is not what we are looking for. If LA(2) has the right token,
1403*16467b97STreehugger Robot /// however, then assume LA(1) is some extra spurious token. Delete it
1404*16467b97STreehugger Robot /// and LA(2) as if we were doing a normal match(), which advances the
1405*16467b97STreehugger Robot /// input.
1406*16467b97STreehugger Robot ///
1407*16467b97STreehugger Robot /// MISSING TOKEN
1408*16467b97STreehugger Robot ///
1409*16467b97STreehugger Robot /// If current token is consistent with what could come after
1410*16467b97STreehugger Robot /// ttype then it is ok to "insert" the missing token, else throw
1411*16467b97STreehugger Robot /// exception For example, Input "i=(3;" is clearly missing the
1412*16467b97STreehugger Robot /// ')'. When the parser returns from the nested call to expr, it
1413*16467b97STreehugger Robot /// will have call chain:
1414*16467b97STreehugger Robot ///
1415*16467b97STreehugger Robot /// stat -> expr -> atom
1416*16467b97STreehugger Robot ///
1417*16467b97STreehugger Robot /// and it will be trying to match the ')' at this point in the
1418*16467b97STreehugger Robot /// derivation:
1419*16467b97STreehugger Robot ///
1420*16467b97STreehugger Robot /// => ID '=' '(' INT ')' ('+' atom)* ';'
1421*16467b97STreehugger Robot /// ^
1422*16467b97STreehugger Robot /// match() will see that ';' doesn't match ')' and report a
1423*16467b97STreehugger Robot /// mismatched token error. To recover, it sees that LA(1)==';'
1424*16467b97STreehugger Robot /// is in the set of tokens that can follow the ')' token
1425*16467b97STreehugger Robot /// reference in rule atom. It can assume that you forgot the ')'.
1426*16467b97STreehugger Robot ///
1427*16467b97STreehugger Robot /// The exception that was passed in, in the java implementation is
1428*16467b97STreehugger Robot /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the
1429*16467b97STreehugger Robot /// error flag and rules cascade back when this is set.
1430*16467b97STreehugger Robot ///
1431*16467b97STreehugger Robot static void *
recoverFromMismatchedToken(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 ttype,pANTLR3_BITSET_LIST follow)1432*16467b97STreehugger Robot recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
1433*16467b97STreehugger Robot {
1434*16467b97STreehugger Robot pANTLR3_PARSER parser;
1435*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
1436*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
1437*16467b97STreehugger Robot void * matchedSymbol;
1438*16467b97STreehugger Robot
1439*16467b97STreehugger Robot
1440*16467b97STreehugger Robot
1441*16467b97STreehugger Robot switch (recognizer->type)
1442*16467b97STreehugger Robot {
1443*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
1444*16467b97STreehugger Robot
1445*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
1446*16467b97STreehugger Robot tparser = NULL;
1447*16467b97STreehugger Robot is = parser->tstream->istream;
1448*16467b97STreehugger Robot
1449*16467b97STreehugger Robot break;
1450*16467b97STreehugger Robot
1451*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
1452*16467b97STreehugger Robot
1453*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1454*16467b97STreehugger Robot parser = NULL;
1455*16467b97STreehugger Robot is = tparser->ctnstream->tnstream->istream;
1456*16467b97STreehugger Robot
1457*16467b97STreehugger Robot break;
1458*16467b97STreehugger Robot
1459*16467b97STreehugger Robot default:
1460*16467b97STreehugger Robot
1461*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n");
1462*16467b97STreehugger Robot return NULL;
1463*16467b97STreehugger Robot
1464*16467b97STreehugger Robot break;
1465*16467b97STreehugger Robot }
1466*16467b97STreehugger Robot
1467*16467b97STreehugger Robot // Create an exception if we need one
1468*16467b97STreehugger Robot //
1469*16467b97STreehugger Robot if (recognizer->state->exception == NULL)
1470*16467b97STreehugger Robot {
1471*16467b97STreehugger Robot antlr3RecognitionExceptionNew(recognizer);
1472*16467b97STreehugger Robot }
1473*16467b97STreehugger Robot
1474*16467b97STreehugger Robot // If the next token after the one we are looking at in the input stream
1475*16467b97STreehugger Robot // is what we are looking for then we remove the one we have discovered
1476*16467b97STreehugger Robot // from the stream by consuming it, then consume this next one along too as
1477*16467b97STreehugger Robot // if nothing had happened.
1478*16467b97STreehugger Robot //
1479*16467b97STreehugger Robot if ( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE)
1480*16467b97STreehugger Robot {
1481*16467b97STreehugger Robot recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
1482*16467b97STreehugger Robot recognizer->state->exception->message = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
1483*16467b97STreehugger Robot
1484*16467b97STreehugger Robot // Call resync hook (for debuggers and so on)
1485*16467b97STreehugger Robot //
1486*16467b97STreehugger Robot if (recognizer->debugger != NULL)
1487*16467b97STreehugger Robot {
1488*16467b97STreehugger Robot recognizer->debugger->beginResync(recognizer->debugger);
1489*16467b97STreehugger Robot }
1490*16467b97STreehugger Robot
1491*16467b97STreehugger Robot // "delete" the extra token
1492*16467b97STreehugger Robot //
1493*16467b97STreehugger Robot recognizer->beginResync(recognizer);
1494*16467b97STreehugger Robot is->consume(is);
1495*16467b97STreehugger Robot recognizer->endResync(recognizer);
1496*16467b97STreehugger Robot // End resync hook
1497*16467b97STreehugger Robot //
1498*16467b97STreehugger Robot if (recognizer->debugger != NULL)
1499*16467b97STreehugger Robot {
1500*16467b97STreehugger Robot recognizer->debugger->endResync(recognizer->debugger);
1501*16467b97STreehugger Robot }
1502*16467b97STreehugger Robot
1503*16467b97STreehugger Robot // Print out the error after we consume so that ANTLRWorks sees the
1504*16467b97STreehugger Robot // token in the exception.
1505*16467b97STreehugger Robot //
1506*16467b97STreehugger Robot recognizer->reportError(recognizer);
1507*16467b97STreehugger Robot
1508*16467b97STreehugger Robot // Return the token we are actually matching
1509*16467b97STreehugger Robot //
1510*16467b97STreehugger Robot matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
1511*16467b97STreehugger Robot
1512*16467b97STreehugger Robot // Consume the token that the rule actually expected to get as if everything
1513*16467b97STreehugger Robot // was hunky dory.
1514*16467b97STreehugger Robot //
1515*16467b97STreehugger Robot is->consume(is);
1516*16467b97STreehugger Robot
1517*16467b97STreehugger Robot recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
1518*16467b97STreehugger Robot
1519*16467b97STreehugger Robot return matchedSymbol;
1520*16467b97STreehugger Robot }
1521*16467b97STreehugger Robot
1522*16467b97STreehugger Robot // Single token deletion (Unwanted above) did not work
1523*16467b97STreehugger Robot // so we see if we can insert a token instead by calculating which
1524*16467b97STreehugger Robot // token would be missing
1525*16467b97STreehugger Robot //
1526*16467b97STreehugger Robot if (mismatchIsMissingToken(recognizer, is, follow))
1527*16467b97STreehugger Robot {
1528*16467b97STreehugger Robot // We can fake the missing token and proceed
1529*16467b97STreehugger Robot //
1530*16467b97STreehugger Robot matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow);
1531*16467b97STreehugger Robot recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
1532*16467b97STreehugger Robot recognizer->state->exception->message = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
1533*16467b97STreehugger Robot recognizer->state->exception->token = matchedSymbol;
1534*16467b97STreehugger Robot recognizer->state->exception->expecting = ttype;
1535*16467b97STreehugger Robot
1536*16467b97STreehugger Robot // Print out the error after we insert so that ANTLRWorks sees the
1537*16467b97STreehugger Robot // token in the exception.
1538*16467b97STreehugger Robot //
1539*16467b97STreehugger Robot recognizer->reportError(recognizer);
1540*16467b97STreehugger Robot
1541*16467b97STreehugger Robot recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
1542*16467b97STreehugger Robot
1543*16467b97STreehugger Robot return matchedSymbol;
1544*16467b97STreehugger Robot }
1545*16467b97STreehugger Robot
1546*16467b97STreehugger Robot
1547*16467b97STreehugger Robot // Neither deleting nor inserting tokens allows recovery
1548*16467b97STreehugger Robot // must just report the exception.
1549*16467b97STreehugger Robot //
1550*16467b97STreehugger Robot recognizer->state->error = ANTLR3_TRUE;
1551*16467b97STreehugger Robot return NULL;
1552*16467b97STreehugger Robot }
1553*16467b97STreehugger Robot
1554*16467b97STreehugger Robot static void *
recoverFromMismatchedSet(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_BITSET_LIST follow)1555*16467b97STreehugger Robot recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow)
1556*16467b97STreehugger Robot {
1557*16467b97STreehugger Robot pANTLR3_PARSER parser;
1558*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
1559*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
1560*16467b97STreehugger Robot pANTLR3_COMMON_TOKEN matchedSymbol;
1561*16467b97STreehugger Robot
1562*16467b97STreehugger Robot switch (recognizer->type)
1563*16467b97STreehugger Robot {
1564*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
1565*16467b97STreehugger Robot
1566*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
1567*16467b97STreehugger Robot tparser = NULL;
1568*16467b97STreehugger Robot is = parser->tstream->istream;
1569*16467b97STreehugger Robot
1570*16467b97STreehugger Robot break;
1571*16467b97STreehugger Robot
1572*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
1573*16467b97STreehugger Robot
1574*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1575*16467b97STreehugger Robot parser = NULL;
1576*16467b97STreehugger Robot is = tparser->ctnstream->tnstream->istream;
1577*16467b97STreehugger Robot
1578*16467b97STreehugger Robot break;
1579*16467b97STreehugger Robot
1580*16467b97STreehugger Robot default:
1581*16467b97STreehugger Robot
1582*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n");
1583*16467b97STreehugger Robot return NULL;
1584*16467b97STreehugger Robot
1585*16467b97STreehugger Robot break;
1586*16467b97STreehugger Robot }
1587*16467b97STreehugger Robot
1588*16467b97STreehugger Robot if (recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE)
1589*16467b97STreehugger Robot {
1590*16467b97STreehugger Robot // We can fake the missing token and proceed
1591*16467b97STreehugger Robot //
1592*16467b97STreehugger Robot matchedSymbol = (pANTLR3_COMMON_TOKEN)recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow);
1593*16467b97STreehugger Robot recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
1594*16467b97STreehugger Robot recognizer->state->exception->token = matchedSymbol;
1595*16467b97STreehugger Robot
1596*16467b97STreehugger Robot // Print out the error after we insert so that ANTLRWorks sees the
1597*16467b97STreehugger Robot // token in the exception.
1598*16467b97STreehugger Robot //
1599*16467b97STreehugger Robot recognizer->reportError(recognizer);
1600*16467b97STreehugger Robot
1601*16467b97STreehugger Robot recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
1602*16467b97STreehugger Robot
1603*16467b97STreehugger Robot return matchedSymbol;
1604*16467b97STreehugger Robot }
1605*16467b97STreehugger Robot
1606*16467b97STreehugger Robot // TODO - Single token deletion like in recoverFromMismatchedToken()
1607*16467b97STreehugger Robot //
1608*16467b97STreehugger Robot recognizer->state->error = ANTLR3_TRUE;
1609*16467b97STreehugger Robot recognizer->state->failed = ANTLR3_TRUE;
1610*16467b97STreehugger Robot return NULL;
1611*16467b97STreehugger Robot }
1612*16467b97STreehugger Robot
1613*16467b97STreehugger Robot /// This code is factored out from mismatched token and mismatched set
1614*16467b97STreehugger Robot /// recovery. It handles "single token insertion" error recovery for
1615*16467b97STreehugger Robot /// both. No tokens are consumed to recover from insertions. Return
1616*16467b97STreehugger Robot /// true if recovery was possible else return false.
1617*16467b97STreehugger Robot ///
1618*16467b97STreehugger Robot static ANTLR3_BOOLEAN
recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_BITSET_LIST followBits)1619*16467b97STreehugger Robot recoverFromMismatchedElement (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits)
1620*16467b97STreehugger Robot {
1621*16467b97STreehugger Robot pANTLR3_BITSET viableToksFollowingRule;
1622*16467b97STreehugger Robot pANTLR3_BITSET follow;
1623*16467b97STreehugger Robot pANTLR3_PARSER parser;
1624*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
1625*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
1626*16467b97STreehugger Robot
1627*16467b97STreehugger Robot switch (recognizer->type)
1628*16467b97STreehugger Robot {
1629*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
1630*16467b97STreehugger Robot
1631*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
1632*16467b97STreehugger Robot tparser = NULL;
1633*16467b97STreehugger Robot is = parser->tstream->istream;
1634*16467b97STreehugger Robot
1635*16467b97STreehugger Robot break;
1636*16467b97STreehugger Robot
1637*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
1638*16467b97STreehugger Robot
1639*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1640*16467b97STreehugger Robot parser = NULL;
1641*16467b97STreehugger Robot is = tparser->ctnstream->tnstream->istream;
1642*16467b97STreehugger Robot
1643*16467b97STreehugger Robot break;
1644*16467b97STreehugger Robot
1645*16467b97STreehugger Robot default:
1646*16467b97STreehugger Robot
1647*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1648*16467b97STreehugger Robot return ANTLR3_FALSE;
1649*16467b97STreehugger Robot
1650*16467b97STreehugger Robot break;
1651*16467b97STreehugger Robot }
1652*16467b97STreehugger Robot
1653*16467b97STreehugger Robot follow = antlr3BitsetLoad(followBits);
1654*16467b97STreehugger Robot
1655*16467b97STreehugger Robot if (follow == NULL)
1656*16467b97STreehugger Robot {
1657*16467b97STreehugger Robot /* The follow set is NULL, which means we don't know what can come
1658*16467b97STreehugger Robot * next, so we "hit and hope" by just signifying that we cannot
1659*16467b97STreehugger Robot * recover, which will just cause the next token to be consumed,
1660*16467b97STreehugger Robot * which might dig us out.
1661*16467b97STreehugger Robot */
1662*16467b97STreehugger Robot return ANTLR3_FALSE;
1663*16467b97STreehugger Robot }
1664*16467b97STreehugger Robot
1665*16467b97STreehugger Robot /* We have a bitmap for the follow set, hence we can compute
1666*16467b97STreehugger Robot * what can follow this grammar element reference.
1667*16467b97STreehugger Robot */
1668*16467b97STreehugger Robot if (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE)
1669*16467b97STreehugger Robot {
1670*16467b97STreehugger Robot /* First we need to know which of the available tokens are viable
1671*16467b97STreehugger Robot * to follow this reference.
1672*16467b97STreehugger Robot */
1673*16467b97STreehugger Robot viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer);
1674*16467b97STreehugger Robot
1675*16467b97STreehugger Robot /* Remove the EOR token, which we do not wish to compute with
1676*16467b97STreehugger Robot */
1677*16467b97STreehugger Robot follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE);
1678*16467b97STreehugger Robot viableToksFollowingRule->free(viableToksFollowingRule);
1679*16467b97STreehugger Robot /* We now have the computed set of what can follow the current token
1680*16467b97STreehugger Robot */
1681*16467b97STreehugger Robot }
1682*16467b97STreehugger Robot
1683*16467b97STreehugger Robot /* We can now see if the current token works with the set of tokens
1684*16467b97STreehugger Robot * that could follow the current grammar reference. If it looks like it
1685*16467b97STreehugger Robot * is consistent, then we can "insert" that token by not throwing
1686*16467b97STreehugger Robot * an exception and assuming that we saw it.
1687*16467b97STreehugger Robot */
1688*16467b97STreehugger Robot if ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE)
1689*16467b97STreehugger Robot {
1690*16467b97STreehugger Robot /* report the error, but don't cause any rules to abort and stuff
1691*16467b97STreehugger Robot */
1692*16467b97STreehugger Robot recognizer->reportError(recognizer);
1693*16467b97STreehugger Robot if (follow != NULL)
1694*16467b97STreehugger Robot {
1695*16467b97STreehugger Robot follow->free(follow);
1696*16467b97STreehugger Robot }
1697*16467b97STreehugger Robot recognizer->state->error = ANTLR3_FALSE;
1698*16467b97STreehugger Robot recognizer->state->failed = ANTLR3_FALSE;
1699*16467b97STreehugger Robot return ANTLR3_TRUE; /* Success in recovery */
1700*16467b97STreehugger Robot }
1701*16467b97STreehugger Robot
1702*16467b97STreehugger Robot if (follow != NULL)
1703*16467b97STreehugger Robot {
1704*16467b97STreehugger Robot follow->free(follow);
1705*16467b97STreehugger Robot }
1706*16467b97STreehugger Robot
1707*16467b97STreehugger Robot /* We could not find anything viable to do, so this is going to
1708*16467b97STreehugger Robot * cause an exception.
1709*16467b97STreehugger Robot */
1710*16467b97STreehugger Robot return ANTLR3_FALSE;
1711*16467b97STreehugger Robot }
1712*16467b97STreehugger Robot
1713*16467b97STreehugger Robot /// Eat tokens from the input stream until we get one of JUST the right type
1714*16467b97STreehugger Robot ///
1715*16467b97STreehugger Robot static void
consumeUntil(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_UINT32 tokenType)1716*16467b97STreehugger Robot consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType)
1717*16467b97STreehugger Robot {
1718*16467b97STreehugger Robot ANTLR3_UINT32 ttype;
1719*16467b97STreehugger Robot pANTLR3_PARSER parser;
1720*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
1721*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
1722*16467b97STreehugger Robot
1723*16467b97STreehugger Robot switch (recognizer->type)
1724*16467b97STreehugger Robot {
1725*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
1726*16467b97STreehugger Robot
1727*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
1728*16467b97STreehugger Robot tparser = NULL;
1729*16467b97STreehugger Robot is = parser->tstream->istream;
1730*16467b97STreehugger Robot
1731*16467b97STreehugger Robot break;
1732*16467b97STreehugger Robot
1733*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
1734*16467b97STreehugger Robot
1735*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1736*16467b97STreehugger Robot parser = NULL;
1737*16467b97STreehugger Robot is = tparser->ctnstream->tnstream->istream;
1738*16467b97STreehugger Robot
1739*16467b97STreehugger Robot break;
1740*16467b97STreehugger Robot
1741*16467b97STreehugger Robot default:
1742*16467b97STreehugger Robot
1743*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n");
1744*16467b97STreehugger Robot return;
1745*16467b97STreehugger Robot
1746*16467b97STreehugger Robot break;
1747*16467b97STreehugger Robot }
1748*16467b97STreehugger Robot
1749*16467b97STreehugger Robot // What do have at the moment?
1750*16467b97STreehugger Robot //
1751*16467b97STreehugger Robot ttype = is->_LA(is, 1);
1752*16467b97STreehugger Robot
1753*16467b97STreehugger Robot // Start eating tokens until we get to the one we want.
1754*16467b97STreehugger Robot //
1755*16467b97STreehugger Robot while (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType)
1756*16467b97STreehugger Robot {
1757*16467b97STreehugger Robot is->consume(is);
1758*16467b97STreehugger Robot ttype = is->_LA(is, 1);
1759*16467b97STreehugger Robot }
1760*16467b97STreehugger Robot }
1761*16467b97STreehugger Robot
1762*16467b97STreehugger Robot /// Eat tokens from the input stream until we find one that
1763*16467b97STreehugger Robot /// belongs to the supplied set.
1764*16467b97STreehugger Robot ///
1765*16467b97STreehugger Robot static void
consumeUntilSet(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_BITSET set)1766*16467b97STreehugger Robot consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set)
1767*16467b97STreehugger Robot {
1768*16467b97STreehugger Robot ANTLR3_UINT32 ttype;
1769*16467b97STreehugger Robot pANTLR3_PARSER parser;
1770*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
1771*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
1772*16467b97STreehugger Robot
1773*16467b97STreehugger Robot switch (recognizer->type)
1774*16467b97STreehugger Robot {
1775*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
1776*16467b97STreehugger Robot
1777*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
1778*16467b97STreehugger Robot tparser = NULL;
1779*16467b97STreehugger Robot is = parser->tstream->istream;
1780*16467b97STreehugger Robot
1781*16467b97STreehugger Robot break;
1782*16467b97STreehugger Robot
1783*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
1784*16467b97STreehugger Robot
1785*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1786*16467b97STreehugger Robot parser = NULL;
1787*16467b97STreehugger Robot is = tparser->ctnstream->tnstream->istream;
1788*16467b97STreehugger Robot
1789*16467b97STreehugger Robot break;
1790*16467b97STreehugger Robot
1791*16467b97STreehugger Robot default:
1792*16467b97STreehugger Robot
1793*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n");
1794*16467b97STreehugger Robot return;
1795*16467b97STreehugger Robot
1796*16467b97STreehugger Robot break;
1797*16467b97STreehugger Robot }
1798*16467b97STreehugger Robot
1799*16467b97STreehugger Robot // What do have at the moment?
1800*16467b97STreehugger Robot //
1801*16467b97STreehugger Robot ttype = is->_LA(is, 1);
1802*16467b97STreehugger Robot
1803*16467b97STreehugger Robot // Start eating tokens until we get to one we want.
1804*16467b97STreehugger Robot //
1805*16467b97STreehugger Robot while (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE)
1806*16467b97STreehugger Robot {
1807*16467b97STreehugger Robot is->consume(is);
1808*16467b97STreehugger Robot ttype = is->_LA(is, 1);
1809*16467b97STreehugger Robot }
1810*16467b97STreehugger Robot }
1811*16467b97STreehugger Robot
1812*16467b97STreehugger Robot /** Return the rule invocation stack (how we got here in the parse.
1813*16467b97STreehugger Robot * In the java version Ter just asks the JVM for all the information
1814*16467b97STreehugger Robot * but in C we don't get this information, so I am going to do nothing
1815*16467b97STreehugger Robot * right now.
1816*16467b97STreehugger Robot */
1817*16467b97STreehugger Robot static pANTLR3_STACK
getRuleInvocationStack(pANTLR3_BASE_RECOGNIZER recognizer)1818*16467b97STreehugger Robot getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer)
1819*16467b97STreehugger Robot {
1820*16467b97STreehugger Robot return NULL;
1821*16467b97STreehugger Robot }
1822*16467b97STreehugger Robot
1823*16467b97STreehugger Robot static pANTLR3_STACK
getRuleInvocationStackNamed(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_UINT8 name)1824*16467b97STreehugger Robot getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name)
1825*16467b97STreehugger Robot {
1826*16467b97STreehugger Robot return NULL;
1827*16467b97STreehugger Robot }
1828*16467b97STreehugger Robot
1829*16467b97STreehugger Robot /** Convenience method for template rewrites - NYI.
1830*16467b97STreehugger Robot */
1831*16467b97STreehugger Robot static pANTLR3_HASH_TABLE
toStrings(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_HASH_TABLE tokens)1832*16467b97STreehugger Robot toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens)
1833*16467b97STreehugger Robot {
1834*16467b97STreehugger Robot return NULL;
1835*16467b97STreehugger Robot }
1836*16467b97STreehugger Robot
1837*16467b97STreehugger Robot static void ANTLR3_CDECL
freeIntTrie(void * trie)1838*16467b97STreehugger Robot freeIntTrie (void * trie)
1839*16467b97STreehugger Robot {
1840*16467b97STreehugger Robot ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie);
1841*16467b97STreehugger Robot }
1842*16467b97STreehugger Robot
1843*16467b97STreehugger Robot
1844*16467b97STreehugger Robot /** Pointer to a function to return whether the rule has parsed input starting at the supplied
1845*16467b97STreehugger Robot * start index before. If the rule has not parsed input starting from the supplied start index,
1846*16467b97STreehugger Robot * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
1847*16467b97STreehugger Robot * then it will return the point where it last stopped parsing after that start point.
1848*16467b97STreehugger Robot *
1849*16467b97STreehugger Robot * \remark
1850*16467b97STreehugger Robot * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance
1851*16467b97STreehugger Robot * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only
1852*16467b97STreehugger Robot * version of the table.
1853*16467b97STreehugger Robot */
1854*16467b97STreehugger Robot static ANTLR3_MARKER
getRuleMemoization(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_INTKEY ruleIndex,ANTLR3_MARKER ruleParseStart)1855*16467b97STreehugger Robot getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart)
1856*16467b97STreehugger Robot {
1857*16467b97STreehugger Robot /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1858*16467b97STreehugger Robot */
1859*16467b97STreehugger Robot pANTLR3_INT_TRIE ruleList;
1860*16467b97STreehugger Robot ANTLR3_MARKER stopIndex;
1861*16467b97STreehugger Robot pANTLR3_TRIE_ENTRY entry;
1862*16467b97STreehugger Robot
1863*16467b97STreehugger Robot /* See if we have a list in the ruleMemos for this rule, and if not, then create one
1864*16467b97STreehugger Robot * as we will need it eventually if we are being asked for the memo here.
1865*16467b97STreehugger Robot */
1866*16467b97STreehugger Robot entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
1867*16467b97STreehugger Robot
1868*16467b97STreehugger Robot if (entry == NULL)
1869*16467b97STreehugger Robot {
1870*16467b97STreehugger Robot /* Did not find it, so create a new one for it, with a bit depth based on the
1871*16467b97STreehugger Robot * size of the input stream. We need the bit depth to incorporate the number if
1872*16467b97STreehugger Robot * bits required to represent the largest possible stop index in the input, which is the
1873*16467b97STreehugger Robot * last character. An int stream is free to return the largest 64 bit offset if it has
1874*16467b97STreehugger Robot * no idea of the size, but you should remember that this will cause the leftmost
1875*16467b97STreehugger Robot * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-)
1876*16467b97STreehugger Robot */
1877*16467b97STreehugger Robot ruleList = antlr3IntTrieNew(63); /* Depth is theoretically 64 bits, but probably not ;-) */
1878*16467b97STreehugger Robot
1879*16467b97STreehugger Robot if (ruleList != NULL)
1880*16467b97STreehugger Robot {
1881*16467b97STreehugger Robot recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie);
1882*16467b97STreehugger Robot }
1883*16467b97STreehugger Robot
1884*16467b97STreehugger Robot /* We cannot have a stopIndex in a trie we have just created of course
1885*16467b97STreehugger Robot */
1886*16467b97STreehugger Robot return MEMO_RULE_UNKNOWN;
1887*16467b97STreehugger Robot }
1888*16467b97STreehugger Robot
1889*16467b97STreehugger Robot ruleList = (pANTLR3_INT_TRIE) (entry->data.ptr);
1890*16467b97STreehugger Robot
1891*16467b97STreehugger Robot /* See if there is a stop index associated with the supplied start index.
1892*16467b97STreehugger Robot */
1893*16467b97STreehugger Robot stopIndex = 0;
1894*16467b97STreehugger Robot
1895*16467b97STreehugger Robot entry = ruleList->get(ruleList, ruleParseStart);
1896*16467b97STreehugger Robot if (entry != NULL)
1897*16467b97STreehugger Robot {
1898*16467b97STreehugger Robot stopIndex = (ANTLR3_MARKER)(entry->data.intVal);
1899*16467b97STreehugger Robot }
1900*16467b97STreehugger Robot
1901*16467b97STreehugger Robot if (stopIndex == 0)
1902*16467b97STreehugger Robot {
1903*16467b97STreehugger Robot return MEMO_RULE_UNKNOWN;
1904*16467b97STreehugger Robot }
1905*16467b97STreehugger Robot
1906*16467b97STreehugger Robot return stopIndex;
1907*16467b97STreehugger Robot }
1908*16467b97STreehugger Robot
1909*16467b97STreehugger Robot /** Has this rule already parsed input at the current index in the
1910*16467b97STreehugger Robot * input stream? Return ANTLR3_TRUE if we have and ANTLR3_FALSE
1911*16467b97STreehugger Robot * if we have not.
1912*16467b97STreehugger Robot *
1913*16467b97STreehugger Robot * This method has a side-effect: if we have seen this input for
1914*16467b97STreehugger Robot * this rule and successfully parsed before, then seek ahead to
1915*16467b97STreehugger Robot * 1 past the stop token matched for this rule last time.
1916*16467b97STreehugger Robot */
1917*16467b97STreehugger Robot static ANTLR3_BOOLEAN
alreadyParsedRule(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_MARKER ruleIndex)1918*16467b97STreehugger Robot alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex)
1919*16467b97STreehugger Robot {
1920*16467b97STreehugger Robot ANTLR3_MARKER stopIndex;
1921*16467b97STreehugger Robot pANTLR3_LEXER lexer;
1922*16467b97STreehugger Robot pANTLR3_PARSER parser;
1923*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
1924*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
1925*16467b97STreehugger Robot
1926*16467b97STreehugger Robot switch (recognizer->type)
1927*16467b97STreehugger Robot {
1928*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
1929*16467b97STreehugger Robot
1930*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
1931*16467b97STreehugger Robot tparser = NULL;
1932*16467b97STreehugger Robot lexer = NULL;
1933*16467b97STreehugger Robot is = parser->tstream->istream;
1934*16467b97STreehugger Robot
1935*16467b97STreehugger Robot break;
1936*16467b97STreehugger Robot
1937*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
1938*16467b97STreehugger Robot
1939*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1940*16467b97STreehugger Robot parser = NULL;
1941*16467b97STreehugger Robot lexer = NULL;
1942*16467b97STreehugger Robot is = tparser->ctnstream->tnstream->istream;
1943*16467b97STreehugger Robot
1944*16467b97STreehugger Robot break;
1945*16467b97STreehugger Robot
1946*16467b97STreehugger Robot case ANTLR3_TYPE_LEXER:
1947*16467b97STreehugger Robot
1948*16467b97STreehugger Robot lexer = (pANTLR3_LEXER) (recognizer->super);
1949*16467b97STreehugger Robot parser = NULL;
1950*16467b97STreehugger Robot tparser = NULL;
1951*16467b97STreehugger Robot is = lexer->input->istream;
1952*16467b97STreehugger Robot break;
1953*16467b97STreehugger Robot
1954*16467b97STreehugger Robot default:
1955*16467b97STreehugger Robot
1956*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n");
1957*16467b97STreehugger Robot return ANTLR3_FALSE;
1958*16467b97STreehugger Robot
1959*16467b97STreehugger Robot break;
1960*16467b97STreehugger Robot }
1961*16467b97STreehugger Robot
1962*16467b97STreehugger Robot /* See if we have a memo marker for this.
1963*16467b97STreehugger Robot */
1964*16467b97STreehugger Robot stopIndex = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is));
1965*16467b97STreehugger Robot
1966*16467b97STreehugger Robot if (stopIndex == MEMO_RULE_UNKNOWN)
1967*16467b97STreehugger Robot {
1968*16467b97STreehugger Robot return ANTLR3_FALSE;
1969*16467b97STreehugger Robot }
1970*16467b97STreehugger Robot
1971*16467b97STreehugger Robot if (stopIndex == MEMO_RULE_FAILED)
1972*16467b97STreehugger Robot {
1973*16467b97STreehugger Robot recognizer->state->failed = ANTLR3_TRUE;
1974*16467b97STreehugger Robot }
1975*16467b97STreehugger Robot else
1976*16467b97STreehugger Robot {
1977*16467b97STreehugger Robot is->seek(is, stopIndex+1);
1978*16467b97STreehugger Robot }
1979*16467b97STreehugger Robot
1980*16467b97STreehugger Robot /* If here then the rule was executed for this input already
1981*16467b97STreehugger Robot */
1982*16467b97STreehugger Robot return ANTLR3_TRUE;
1983*16467b97STreehugger Robot }
1984*16467b97STreehugger Robot
1985*16467b97STreehugger Robot /** Record whether or not this rule parsed the input at this position
1986*16467b97STreehugger Robot * successfully.
1987*16467b97STreehugger Robot */
1988*16467b97STreehugger Robot static void
memoize(pANTLR3_BASE_RECOGNIZER recognizer,ANTLR3_MARKER ruleIndex,ANTLR3_MARKER ruleParseStart)1989*16467b97STreehugger Robot memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart)
1990*16467b97STreehugger Robot {
1991*16467b97STreehugger Robot /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1992*16467b97STreehugger Robot */
1993*16467b97STreehugger Robot pANTLR3_INT_TRIE ruleList;
1994*16467b97STreehugger Robot pANTLR3_TRIE_ENTRY entry;
1995*16467b97STreehugger Robot ANTLR3_MARKER stopIndex;
1996*16467b97STreehugger Robot pANTLR3_LEXER lexer;
1997*16467b97STreehugger Robot pANTLR3_PARSER parser;
1998*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
1999*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
2000*16467b97STreehugger Robot
2001*16467b97STreehugger Robot switch (recognizer->type)
2002*16467b97STreehugger Robot {
2003*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
2004*16467b97STreehugger Robot
2005*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
2006*16467b97STreehugger Robot tparser = NULL;
2007*16467b97STreehugger Robot is = parser->tstream->istream;
2008*16467b97STreehugger Robot
2009*16467b97STreehugger Robot break;
2010*16467b97STreehugger Robot
2011*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
2012*16467b97STreehugger Robot
2013*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2014*16467b97STreehugger Robot parser = NULL;
2015*16467b97STreehugger Robot is = tparser->ctnstream->tnstream->istream;
2016*16467b97STreehugger Robot
2017*16467b97STreehugger Robot break;
2018*16467b97STreehugger Robot
2019*16467b97STreehugger Robot case ANTLR3_TYPE_LEXER:
2020*16467b97STreehugger Robot
2021*16467b97STreehugger Robot lexer = (pANTLR3_LEXER) (recognizer->super);
2022*16467b97STreehugger Robot parser = NULL;
2023*16467b97STreehugger Robot tparser = NULL;
2024*16467b97STreehugger Robot is = lexer->input->istream;
2025*16467b97STreehugger Robot break;
2026*16467b97STreehugger Robot
2027*16467b97STreehugger Robot default:
2028*16467b97STreehugger Robot
2029*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n");
2030*16467b97STreehugger Robot return;
2031*16467b97STreehugger Robot
2032*16467b97STreehugger Robot break;
2033*16467b97STreehugger Robot }
2034*16467b97STreehugger Robot
2035*16467b97STreehugger Robot stopIndex = recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1;
2036*16467b97STreehugger Robot
2037*16467b97STreehugger Robot entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
2038*16467b97STreehugger Robot
2039*16467b97STreehugger Robot if (entry != NULL)
2040*16467b97STreehugger Robot {
2041*16467b97STreehugger Robot ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr);
2042*16467b97STreehugger Robot
2043*16467b97STreehugger Robot /* If we don't already have this entry, append it. The memoize trie does not
2044*16467b97STreehugger Robot * accept duplicates so it won't add it if already there and we just ignore the
2045*16467b97STreehugger Robot * return code as we don't care if it is there already.
2046*16467b97STreehugger Robot */
2047*16467b97STreehugger Robot ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL);
2048*16467b97STreehugger Robot }
2049*16467b97STreehugger Robot }
2050*16467b97STreehugger Robot /** A syntactic predicate. Returns true/false depending on whether
2051*16467b97STreehugger Robot * the specified grammar fragment matches the current input stream.
2052*16467b97STreehugger Robot * This resets the failed instance var afterwards.
2053*16467b97STreehugger Robot */
2054*16467b97STreehugger Robot static ANTLR3_BOOLEAN
synpred(pANTLR3_BASE_RECOGNIZER recognizer,void * ctx,void (* predicate)(void * ctx))2055*16467b97STreehugger Robot synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx))
2056*16467b97STreehugger Robot {
2057*16467b97STreehugger Robot ANTLR3_MARKER start;
2058*16467b97STreehugger Robot pANTLR3_PARSER parser;
2059*16467b97STreehugger Robot pANTLR3_TREE_PARSER tparser;
2060*16467b97STreehugger Robot pANTLR3_INT_STREAM is;
2061*16467b97STreehugger Robot
2062*16467b97STreehugger Robot switch (recognizer->type)
2063*16467b97STreehugger Robot {
2064*16467b97STreehugger Robot case ANTLR3_TYPE_PARSER:
2065*16467b97STreehugger Robot
2066*16467b97STreehugger Robot parser = (pANTLR3_PARSER) (recognizer->super);
2067*16467b97STreehugger Robot tparser = NULL;
2068*16467b97STreehugger Robot is = parser->tstream->istream;
2069*16467b97STreehugger Robot
2070*16467b97STreehugger Robot break;
2071*16467b97STreehugger Robot
2072*16467b97STreehugger Robot case ANTLR3_TYPE_TREE_PARSER:
2073*16467b97STreehugger Robot
2074*16467b97STreehugger Robot tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2075*16467b97STreehugger Robot parser = NULL;
2076*16467b97STreehugger Robot is = tparser->ctnstream->tnstream->istream;
2077*16467b97STreehugger Robot
2078*16467b97STreehugger Robot break;
2079*16467b97STreehugger Robot
2080*16467b97STreehugger Robot default:
2081*16467b97STreehugger Robot
2082*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n");
2083*16467b97STreehugger Robot return ANTLR3_FALSE;
2084*16467b97STreehugger Robot
2085*16467b97STreehugger Robot break;
2086*16467b97STreehugger Robot }
2087*16467b97STreehugger Robot
2088*16467b97STreehugger Robot /* Begin backtracking so we can get back to where we started after trying out
2089*16467b97STreehugger Robot * the syntactic predicate.
2090*16467b97STreehugger Robot */
2091*16467b97STreehugger Robot start = is->mark(is);
2092*16467b97STreehugger Robot recognizer->state->backtracking++;
2093*16467b97STreehugger Robot
2094*16467b97STreehugger Robot /* Try the syntactical predicate
2095*16467b97STreehugger Robot */
2096*16467b97STreehugger Robot predicate(ctx);
2097*16467b97STreehugger Robot
2098*16467b97STreehugger Robot /* Reset
2099*16467b97STreehugger Robot */
2100*16467b97STreehugger Robot is->rewind(is, start);
2101*16467b97STreehugger Robot recognizer->state->backtracking--;
2102*16467b97STreehugger Robot
2103*16467b97STreehugger Robot if (recognizer->state->failed == ANTLR3_TRUE)
2104*16467b97STreehugger Robot {
2105*16467b97STreehugger Robot /* Predicate failed
2106*16467b97STreehugger Robot */
2107*16467b97STreehugger Robot recognizer->state->failed = ANTLR3_FALSE;
2108*16467b97STreehugger Robot return ANTLR3_FALSE;
2109*16467b97STreehugger Robot }
2110*16467b97STreehugger Robot else
2111*16467b97STreehugger Robot {
2112*16467b97STreehugger Robot /* Predicate was successful
2113*16467b97STreehugger Robot */
2114*16467b97STreehugger Robot recognizer->state->failed = ANTLR3_FALSE;
2115*16467b97STreehugger Robot return ANTLR3_TRUE;
2116*16467b97STreehugger Robot }
2117*16467b97STreehugger Robot }
2118*16467b97STreehugger Robot
2119*16467b97STreehugger Robot static void
reset(pANTLR3_BASE_RECOGNIZER recognizer)2120*16467b97STreehugger Robot reset(pANTLR3_BASE_RECOGNIZER recognizer)
2121*16467b97STreehugger Robot {
2122*16467b97STreehugger Robot if (recognizer->state->following != NULL)
2123*16467b97STreehugger Robot {
2124*16467b97STreehugger Robot recognizer->state->following->free(recognizer->state->following);
2125*16467b97STreehugger Robot }
2126*16467b97STreehugger Robot
2127*16467b97STreehugger Robot // Reset the state flags
2128*16467b97STreehugger Robot //
2129*16467b97STreehugger Robot recognizer->state->errorRecovery = ANTLR3_FALSE;
2130*16467b97STreehugger Robot recognizer->state->lastErrorIndex = -1;
2131*16467b97STreehugger Robot recognizer->state->failed = ANTLR3_FALSE;
2132*16467b97STreehugger Robot recognizer->state->errorCount = 0;
2133*16467b97STreehugger Robot recognizer->state->backtracking = 0;
2134*16467b97STreehugger Robot recognizer->state->following = NULL;
2135*16467b97STreehugger Robot
2136*16467b97STreehugger Robot if (recognizer->state != NULL)
2137*16467b97STreehugger Robot {
2138*16467b97STreehugger Robot if (recognizer->state->ruleMemo != NULL)
2139*16467b97STreehugger Robot {
2140*16467b97STreehugger Robot recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
2141*16467b97STreehugger Robot recognizer->state->ruleMemo = antlr3IntTrieNew(15); /* 16 bit depth is enough for 32768 rules! */
2142*16467b97STreehugger Robot }
2143*16467b97STreehugger Robot }
2144*16467b97STreehugger Robot
2145*16467b97STreehugger Robot // ml: 2013-11-05, added reset of old exceptions.
2146*16467b97STreehugger Robot pANTLR3_EXCEPTION thisE = recognizer->state->exception;
2147*16467b97STreehugger Robot if (thisE != NULL)
2148*16467b97STreehugger Robot {
2149*16467b97STreehugger Robot thisE->freeEx(thisE);
2150*16467b97STreehugger Robot recognizer->state->exception = NULL;
2151*16467b97STreehugger Robot }
2152*16467b97STreehugger Robot
2153*16467b97STreehugger Robot // Install a new following set
2154*16467b97STreehugger Robot //
2155*16467b97STreehugger Robot recognizer->state->following = antlr3StackNew(8);
2156*16467b97STreehugger Robot
2157*16467b97STreehugger Robot }
2158*16467b97STreehugger Robot
2159*16467b97STreehugger Robot // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2160*16467b97STreehugger Robot // You MAY need override this function if the standard TOKEN_STREAM is not what you are using.
2161*16467b97STreehugger Robot //
2162*16467b97STreehugger Robot static void *
getCurrentInputSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream)2163*16467b97STreehugger Robot getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
2164*16467b97STreehugger Robot {
2165*16467b97STreehugger Robot return ((pANTLR3_TOKEN_STREAM)istream->super)->_LT((pANTLR3_TOKEN_STREAM)istream->super, 1);
2166*16467b97STreehugger Robot }
2167*16467b97STreehugger Robot
2168*16467b97STreehugger Robot // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2169*16467b97STreehugger Robot // You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using.
2170*16467b97STreehugger Robot //
2171*16467b97STreehugger Robot static void *
getMissingSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream,pANTLR3_EXCEPTION e,ANTLR3_UINT32 expectedTokenType,pANTLR3_BITSET_LIST follow)2172*16467b97STreehugger Robot getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
2173*16467b97STreehugger Robot ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
2174*16467b97STreehugger Robot {
2175*16467b97STreehugger Robot pANTLR3_TOKEN_STREAM ts;
2176*16467b97STreehugger Robot pANTLR3_COMMON_TOKEN_STREAM cts;
2177*16467b97STreehugger Robot pANTLR3_COMMON_TOKEN token;
2178*16467b97STreehugger Robot pANTLR3_COMMON_TOKEN current;
2179*16467b97STreehugger Robot pANTLR3_STRING text;
2180*16467b97STreehugger Robot
2181*16467b97STreehugger Robot // Dereference the standard pointers
2182*16467b97STreehugger Robot //
2183*16467b97STreehugger Robot ts = (pANTLR3_TOKEN_STREAM)istream->super;
2184*16467b97STreehugger Robot cts = (pANTLR3_COMMON_TOKEN_STREAM)ts->super;
2185*16467b97STreehugger Robot
2186*16467b97STreehugger Robot // Work out what to use as the current symbol to make a line and offset etc
2187*16467b97STreehugger Robot // If we are at EOF, we use the token before EOF
2188*16467b97STreehugger Robot //
2189*16467b97STreehugger Robot current = ts->_LT(ts, 1);
2190*16467b97STreehugger Robot if (current->getType(current) == ANTLR3_TOKEN_EOF)
2191*16467b97STreehugger Robot {
2192*16467b97STreehugger Robot current = ts->_LT(ts, -1);
2193*16467b97STreehugger Robot }
2194*16467b97STreehugger Robot
2195*16467b97STreehugger Robot // Create a new empty token
2196*16467b97STreehugger Robot //
2197*16467b97STreehugger Robot if (recognizer->state->tokFactory == NULL)
2198*16467b97STreehugger Robot {
2199*16467b97STreehugger Robot // We don't yet have a token factory for making tokens
2200*16467b97STreehugger Robot // we just need a fake one using the input stream of the current
2201*16467b97STreehugger Robot // token.
2202*16467b97STreehugger Robot //
2203*16467b97STreehugger Robot recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input);
2204*16467b97STreehugger Robot }
2205*16467b97STreehugger Robot token = recognizer->state->tokFactory->newToken(recognizer->state->tokFactory);
2206*16467b97STreehugger Robot if (token == NULL) { return NULL; }
2207*16467b97STreehugger Robot
2208*16467b97STreehugger Robot // Set some of the token properties based on the current token
2209*16467b97STreehugger Robot //
2210*16467b97STreehugger Robot token->setLine (token, current->getLine(current));
2211*16467b97STreehugger Robot token->setCharPositionInLine (token, current->getCharPositionInLine(current));
2212*16467b97STreehugger Robot token->setChannel (token, ANTLR3_TOKEN_DEFAULT_CHANNEL);
2213*16467b97STreehugger Robot token->setType (token, expectedTokenType);
2214*16467b97STreehugger Robot token->user1 = current->user1;
2215*16467b97STreehugger Robot token->user2 = current->user2;
2216*16467b97STreehugger Robot token->user3 = current->user3;
2217*16467b97STreehugger Robot token->custom = current->custom;
2218*16467b97STreehugger Robot token->lineStart = current->lineStart;
2219*16467b97STreehugger Robot
2220*16467b97STreehugger Robot // Create the token text that shows it has been inserted
2221*16467b97STreehugger Robot //
2222*16467b97STreehugger Robot token->setText8(token, (pANTLR3_UINT8)"<missing ");
2223*16467b97STreehugger Robot text = token->getText(token);
2224*16467b97STreehugger Robot
2225*16467b97STreehugger Robot if (text != NULL)
2226*16467b97STreehugger Robot {
2227*16467b97STreehugger Robot text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]);
2228*16467b97STreehugger Robot text->append8(text, (const char *)">");
2229*16467b97STreehugger Robot }
2230*16467b97STreehugger Robot
2231*16467b97STreehugger Robot // Finally return the pointer to our new token
2232*16467b97STreehugger Robot //
2233*16467b97STreehugger Robot return token;
2234*16467b97STreehugger Robot }
2235*16467b97STreehugger Robot
2236*16467b97STreehugger Robot
2237*16467b97STreehugger Robot #ifdef ANTLR3_WINDOWS
2238*16467b97STreehugger Robot #pragma warning( default : 4100 )
2239*16467b97STreehugger Robot #endif
2240*16467b97STreehugger Robot
2241*16467b97STreehugger Robot /// @}
2242*16467b97STreehugger Robot ///
2243*16467b97STreehugger Robot
2244