xref: /aosp_15_r20/external/antlr/runtime/C/src/antlr3lexer.c (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot /** \file
2*16467b97STreehugger Robot  *
3*16467b97STreehugger Robot  * Base implementation of an antlr 3 lexer.
4*16467b97STreehugger Robot  *
5*16467b97STreehugger Robot  * An ANTLR3 lexer implements a base recongizer, a token source and
6*16467b97STreehugger Robot  * a lexer interface. It constructs a base recognizer with default
7*16467b97STreehugger Robot  * functions, then overrides any of these that are parser specific (usual
8*16467b97STreehugger Robot  * default implementation of base recognizer.
9*16467b97STreehugger Robot  */
10*16467b97STreehugger Robot 
11*16467b97STreehugger Robot // [The "BSD licence"]
12*16467b97STreehugger Robot // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13*16467b97STreehugger Robot // http://www.temporal-wave.com
14*16467b97STreehugger Robot // http://www.linkedin.com/in/jimidle
15*16467b97STreehugger Robot //
16*16467b97STreehugger Robot // All rights reserved.
17*16467b97STreehugger Robot //
18*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without
19*16467b97STreehugger Robot // modification, are permitted provided that the following conditions
20*16467b97STreehugger Robot // are met:
21*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright
22*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer.
23*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright
24*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer in the
25*16467b97STreehugger Robot //    documentation and/or other materials provided with the distribution.
26*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products
27*16467b97STreehugger Robot //    derived from this software without specific prior written permission.
28*16467b97STreehugger Robot //
29*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39*16467b97STreehugger Robot 
40*16467b97STreehugger Robot #include    <antlr3lexer.h>
41*16467b97STreehugger Robot 
42*16467b97STreehugger Robot static void					mTokens						(pANTLR3_LEXER lexer);
43*16467b97STreehugger Robot static void					setCharStream				(pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input);
44*16467b97STreehugger Robot static void					pushCharStream				(pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input);
45*16467b97STreehugger Robot static void					popCharStream				(pANTLR3_LEXER lexer);
46*16467b97STreehugger Robot 
47*16467b97STreehugger Robot static void					emitNew						(pANTLR3_LEXER lexer,  pANTLR3_COMMON_TOKEN token);
48*16467b97STreehugger Robot static pANTLR3_COMMON_TOKEN emit						(pANTLR3_LEXER lexer);
49*16467b97STreehugger Robot static ANTLR3_BOOLEAN	    matchs						(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string);
50*16467b97STreehugger Robot static ANTLR3_BOOLEAN	    matchc						(pANTLR3_LEXER lexer, ANTLR3_UCHAR c);
51*16467b97STreehugger Robot static ANTLR3_BOOLEAN	    matchRange					(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high);
52*16467b97STreehugger Robot static void					matchAny					(pANTLR3_LEXER lexer);
53*16467b97STreehugger Robot static void					recover						(pANTLR3_LEXER lexer);
54*16467b97STreehugger Robot static ANTLR3_UINT32	    getLine						(pANTLR3_LEXER lexer);
55*16467b97STreehugger Robot static ANTLR3_MARKER	    getCharIndex				(pANTLR3_LEXER lexer);
56*16467b97STreehugger Robot static ANTLR3_UINT32	    getCharPositionInLine		(pANTLR3_LEXER lexer);
57*16467b97STreehugger Robot static pANTLR3_STRING	    getText						(pANTLR3_LEXER lexer);
58*16467b97STreehugger Robot static pANTLR3_COMMON_TOKEN nextToken					(pANTLR3_TOKEN_SOURCE toksource);
59*16467b97STreehugger Robot 
60*16467b97STreehugger Robot static void					displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 * tokenNames);
61*16467b97STreehugger Robot static void					reportError					(pANTLR3_BASE_RECOGNIZER rec);
62*16467b97STreehugger Robot static void *				getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
63*16467b97STreehugger Robot static void *				getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
64*16467b97STreehugger Robot 															ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
65*16467b97STreehugger Robot 
66*16467b97STreehugger Robot static void					reset						(pANTLR3_BASE_RECOGNIZER rec);
67*16467b97STreehugger Robot 
68*16467b97STreehugger Robot static void					freeLexer					(pANTLR3_LEXER lexer);
69*16467b97STreehugger Robot 
70*16467b97STreehugger Robot 
71*16467b97STreehugger Robot ANTLR3_API pANTLR3_LEXER
antlr3LexerNew(ANTLR3_UINT32 sizeHint,pANTLR3_RECOGNIZER_SHARED_STATE state)72*16467b97STreehugger Robot antlr3LexerNew(ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
73*16467b97STreehugger Robot {
74*16467b97STreehugger Robot     pANTLR3_LEXER   lexer;
75*16467b97STreehugger Robot     pANTLR3_COMMON_TOKEN	specialT;
76*16467b97STreehugger Robot 
77*16467b97STreehugger Robot 	/* Allocate memory
78*16467b97STreehugger Robot 	*/
79*16467b97STreehugger Robot 	lexer   = (pANTLR3_LEXER) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER));
80*16467b97STreehugger Robot 
81*16467b97STreehugger Robot 	if	(lexer == NULL)
82*16467b97STreehugger Robot 	{
83*16467b97STreehugger Robot 		return	NULL;
84*16467b97STreehugger Robot 	}
85*16467b97STreehugger Robot 
86*16467b97STreehugger Robot 	/* Now we need to create the base recognizer
87*16467b97STreehugger Robot 	*/
88*16467b97STreehugger Robot 	lexer->rec	    =  antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER, sizeHint, state);
89*16467b97STreehugger Robot 
90*16467b97STreehugger Robot 	if	(lexer->rec == NULL)
91*16467b97STreehugger Robot 	{
92*16467b97STreehugger Robot 		lexer->free(lexer);
93*16467b97STreehugger Robot 		return	NULL;
94*16467b97STreehugger Robot 	}
95*16467b97STreehugger Robot 	lexer->rec->super  =  lexer;
96*16467b97STreehugger Robot 
97*16467b97STreehugger Robot 	lexer->rec->displayRecognitionError	    = displayRecognitionError;
98*16467b97STreehugger Robot 	lexer->rec->reportError					= reportError;
99*16467b97STreehugger Robot 	lexer->rec->reset						= reset;
100*16467b97STreehugger Robot 	lexer->rec->getCurrentInputSymbol		= getCurrentInputSymbol;
101*16467b97STreehugger Robot 	lexer->rec->getMissingSymbol			= getMissingSymbol;
102*16467b97STreehugger Robot 
103*16467b97STreehugger Robot 	/* Now install the token source interface
104*16467b97STreehugger Robot 	*/
105*16467b97STreehugger Robot 	if	(lexer->rec->state->tokSource == NULL)
106*16467b97STreehugger Robot 	{
107*16467b97STreehugger Robot 		lexer->rec->state->tokSource	= (pANTLR3_TOKEN_SOURCE)ANTLR3_CALLOC(1, sizeof(ANTLR3_TOKEN_SOURCE));
108*16467b97STreehugger Robot 
109*16467b97STreehugger Robot 		if	(lexer->rec->state->tokSource == NULL)
110*16467b97STreehugger Robot 		{
111*16467b97STreehugger Robot 			lexer->rec->free(lexer->rec);
112*16467b97STreehugger Robot 			lexer->free(lexer);
113*16467b97STreehugger Robot 
114*16467b97STreehugger Robot 			return	NULL;
115*16467b97STreehugger Robot 		}
116*16467b97STreehugger Robot 		lexer->rec->state->tokSource->super    =  lexer;
117*16467b97STreehugger Robot 
118*16467b97STreehugger Robot 		/* Install the default nextToken() method, which may be overridden
119*16467b97STreehugger Robot 		 * by generated code, or by anything else in fact.
120*16467b97STreehugger Robot 		 */
121*16467b97STreehugger Robot 		lexer->rec->state->tokSource->nextToken	    =  nextToken;
122*16467b97STreehugger Robot 		lexer->rec->state->tokSource->strFactory    = NULL;
123*16467b97STreehugger Robot 
124*16467b97STreehugger Robot 		lexer->rec->state->tokFactory				= NULL;
125*16467b97STreehugger Robot 	}
126*16467b97STreehugger Robot 
127*16467b97STreehugger Robot     /* Install the lexer API
128*16467b97STreehugger Robot      */
129*16467b97STreehugger Robot     lexer->setCharStream			=  setCharStream;
130*16467b97STreehugger Robot     lexer->mTokens					= (void (*)(void *))(mTokens);
131*16467b97STreehugger Robot     lexer->setCharStream			=  setCharStream;
132*16467b97STreehugger Robot     lexer->pushCharStream			=  pushCharStream;
133*16467b97STreehugger Robot     lexer->popCharStream			=  popCharStream;
134*16467b97STreehugger Robot     lexer->emit						=  emit;
135*16467b97STreehugger Robot     lexer->emitNew					=  emitNew;
136*16467b97STreehugger Robot     lexer->matchs					=  matchs;
137*16467b97STreehugger Robot     lexer->matchc					=  matchc;
138*16467b97STreehugger Robot     lexer->matchRange				=  matchRange;
139*16467b97STreehugger Robot     lexer->matchAny					=  matchAny;
140*16467b97STreehugger Robot     lexer->recover					=  recover;
141*16467b97STreehugger Robot     lexer->getLine					=  getLine;
142*16467b97STreehugger Robot     lexer->getCharIndex				=  getCharIndex;
143*16467b97STreehugger Robot     lexer->getCharPositionInLine    =  getCharPositionInLine;
144*16467b97STreehugger Robot     lexer->getText					=  getText;
145*16467b97STreehugger Robot     lexer->free						=  freeLexer;
146*16467b97STreehugger Robot 
147*16467b97STreehugger Robot     /* Initialise the eof token
148*16467b97STreehugger Robot      */
149*16467b97STreehugger Robot     specialT					= &(lexer->rec->state->tokSource->eofToken);
150*16467b97STreehugger Robot     antlr3SetTokenAPI	  (specialT);
151*16467b97STreehugger Robot     specialT->setType	  (specialT, ANTLR3_TOKEN_EOF);
152*16467b97STreehugger Robot     specialT->factoryMade		= ANTLR3_TRUE;					// Prevent things trying to free() it
153*16467b97STreehugger Robot     specialT->strFactory        = NULL;
154*16467b97STreehugger Robot 	specialT->textState			= ANTLR3_TEXT_NONE;
155*16467b97STreehugger Robot 	specialT->custom			= NULL;
156*16467b97STreehugger Robot 	specialT->user1				= 0;
157*16467b97STreehugger Robot 	specialT->user2				= 0;
158*16467b97STreehugger Robot 	specialT->user3				= 0;
159*16467b97STreehugger Robot 
160*16467b97STreehugger Robot 	// Initialize the skip token.
161*16467b97STreehugger Robot 	//
162*16467b97STreehugger Robot     specialT					= &(lexer->rec->state->tokSource->skipToken);
163*16467b97STreehugger Robot     antlr3SetTokenAPI	  (specialT);
164*16467b97STreehugger Robot     specialT->setType	  (specialT, ANTLR3_TOKEN_INVALID);
165*16467b97STreehugger Robot     specialT->factoryMade		= ANTLR3_TRUE;					// Prevent things trying to free() it
166*16467b97STreehugger Robot     specialT->strFactory        = NULL;
167*16467b97STreehugger Robot 	specialT->custom			= NULL;
168*16467b97STreehugger Robot 	specialT->user1				= 0;
169*16467b97STreehugger Robot 	specialT->user2				= 0;
170*16467b97STreehugger Robot 	specialT->user3				= 0;
171*16467b97STreehugger Robot     return  lexer;
172*16467b97STreehugger Robot }
173*16467b97STreehugger Robot 
174*16467b97STreehugger Robot static void
reset(pANTLR3_BASE_RECOGNIZER rec)175*16467b97STreehugger Robot reset	(pANTLR3_BASE_RECOGNIZER rec)
176*16467b97STreehugger Robot {
177*16467b97STreehugger Robot     pANTLR3_LEXER   lexer;
178*16467b97STreehugger Robot 
179*16467b97STreehugger Robot     lexer   = (pANTLR3_LEXER)rec->super;
180*16467b97STreehugger Robot 
181*16467b97STreehugger Robot     lexer->rec->state->token			    = NULL;
182*16467b97STreehugger Robot     lexer->rec->state->type			    = ANTLR3_TOKEN_INVALID;
183*16467b97STreehugger Robot     lexer->rec->state->channel			    = ANTLR3_TOKEN_DEFAULT_CHANNEL;
184*16467b97STreehugger Robot     lexer->rec->state->tokenStartCharIndex	    = -1;
185*16467b97STreehugger Robot     lexer->rec->state->tokenStartCharPositionInLine = -1;
186*16467b97STreehugger Robot     lexer->rec->state->tokenStartLine		    = -1;
187*16467b97STreehugger Robot 
188*16467b97STreehugger Robot     lexer->rec->state->text	                    = NULL;
189*16467b97STreehugger Robot 
190*16467b97STreehugger Robot     // OK - that's all hunky dory, but we may well have had
191*16467b97STreehugger Robot     // a token factory that needs a reset. Do that here
192*16467b97STreehugger Robot     //
193*16467b97STreehugger Robot     if  (lexer->rec->state->tokFactory != NULL)
194*16467b97STreehugger Robot     {
195*16467b97STreehugger Robot         lexer->rec->state->tokFactory->reset(lexer->rec->state->tokFactory);
196*16467b97STreehugger Robot     }
197*16467b97STreehugger Robot }
198*16467b97STreehugger Robot 
199*16467b97STreehugger Robot ///
200*16467b97STreehugger Robot /// \brief
201*16467b97STreehugger Robot /// Returns the next available token from the current input stream.
202*16467b97STreehugger Robot ///
203*16467b97STreehugger Robot /// \param toksource
204*16467b97STreehugger Robot /// Points to the implementation of a token source. The lexer is
205*16467b97STreehugger Robot /// addressed by the super structure pointer.
206*16467b97STreehugger Robot ///
207*16467b97STreehugger Robot /// \returns
208*16467b97STreehugger Robot /// The next token in the current input stream or the EOF token
209*16467b97STreehugger Robot /// if there are no more tokens.
210*16467b97STreehugger Robot ///
211*16467b97STreehugger Robot /// \remarks
212*16467b97STreehugger Robot /// Write remarks for nextToken here.
213*16467b97STreehugger Robot ///
214*16467b97STreehugger Robot /// \see nextToken
215*16467b97STreehugger Robot ///
216*16467b97STreehugger Robot ANTLR3_INLINE static pANTLR3_COMMON_TOKEN
nextTokenStr(pANTLR3_TOKEN_SOURCE toksource)217*16467b97STreehugger Robot nextTokenStr	    (pANTLR3_TOKEN_SOURCE toksource)
218*16467b97STreehugger Robot {
219*16467b97STreehugger Robot     pANTLR3_LEXER                   lexer;
220*16467b97STreehugger Robot     pANTLR3_RECOGNIZER_SHARED_STATE state;
221*16467b97STreehugger Robot     pANTLR3_INPUT_STREAM            input;
222*16467b97STreehugger Robot     pANTLR3_INT_STREAM              istream;
223*16467b97STreehugger Robot 
224*16467b97STreehugger Robot     lexer   = (pANTLR3_LEXER)(toksource->super);
225*16467b97STreehugger Robot     state   = lexer->rec->state;
226*16467b97STreehugger Robot     input   = lexer->input;
227*16467b97STreehugger Robot     istream = input->istream;
228*16467b97STreehugger Robot 
229*16467b97STreehugger Robot     /// Loop until we get a non skipped token or EOF
230*16467b97STreehugger Robot     ///
231*16467b97STreehugger Robot     for	(;;)
232*16467b97STreehugger Robot     {
233*16467b97STreehugger Robot         // Get rid of any previous token (token factory takes care of
234*16467b97STreehugger Robot         // any de-allocation when this token is finally used up.
235*16467b97STreehugger Robot         //
236*16467b97STreehugger Robot         state->token		    = NULL;
237*16467b97STreehugger Robot         state->error		    = ANTLR3_FALSE;	    // Start out without an exception
238*16467b97STreehugger Robot         state->failed		    = ANTLR3_FALSE;
239*16467b97STreehugger Robot 
240*16467b97STreehugger Robot         // Now call the matching rules and see if we can generate a new token
241*16467b97STreehugger Robot         //
242*16467b97STreehugger Robot         for	(;;)
243*16467b97STreehugger Robot         {
244*16467b97STreehugger Robot             // Record the start of the token in our input stream.
245*16467b97STreehugger Robot             //
246*16467b97STreehugger Robot             state->channel			    = ANTLR3_TOKEN_DEFAULT_CHANNEL;
247*16467b97STreehugger Robot             state->tokenStartCharIndex	            = (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar));
248*16467b97STreehugger Robot             state->tokenStartCharPositionInLine     = input->charPositionInLine;
249*16467b97STreehugger Robot             state->tokenStartLine		    = input->line;
250*16467b97STreehugger Robot             state->text			            = NULL;
251*16467b97STreehugger Robot             state->custom                           = NULL;
252*16467b97STreehugger Robot             state->user1                            = 0;
253*16467b97STreehugger Robot             state->user2                            = 0;
254*16467b97STreehugger Robot             state->user3                            = 0;
255*16467b97STreehugger Robot 
256*16467b97STreehugger Robot             if  (istream->_LA(istream, 1) == ANTLR3_CHARSTREAM_EOF)
257*16467b97STreehugger Robot             {
258*16467b97STreehugger Robot                 // Reached the end of the current stream, nothing more to do if this is
259*16467b97STreehugger Robot                 // the last in the stack.
260*16467b97STreehugger Robot                 //
261*16467b97STreehugger Robot                 pANTLR3_COMMON_TOKEN    teof = &(toksource->eofToken);
262*16467b97STreehugger Robot 
263*16467b97STreehugger Robot                 teof->setStartIndex (teof, lexer->getCharIndex(lexer));
264*16467b97STreehugger Robot                 teof->setStopIndex  (teof, lexer->getCharIndex(lexer));
265*16467b97STreehugger Robot                 teof->setLine	    (teof, lexer->getLine(lexer));
266*16467b97STreehugger Robot                 teof->factoryMade = ANTLR3_TRUE;	// This isn't really manufactured but it stops things from trying to free it
267*16467b97STreehugger Robot                 return  teof;
268*16467b97STreehugger Robot             }
269*16467b97STreehugger Robot 
270*16467b97STreehugger Robot             state->token		= NULL;
271*16467b97STreehugger Robot             state->error		= ANTLR3_FALSE;	    // Start out without an exception
272*16467b97STreehugger Robot             state->failed		= ANTLR3_FALSE;
273*16467b97STreehugger Robot 
274*16467b97STreehugger Robot             // Call the generated lexer, see if it can get a new token together.
275*16467b97STreehugger Robot             //
276*16467b97STreehugger Robot             lexer->mTokens(lexer->ctx);
277*16467b97STreehugger Robot 
278*16467b97STreehugger Robot             if  (state->error  == ANTLR3_TRUE)
279*16467b97STreehugger Robot             {
280*16467b97STreehugger Robot                 // Recognition exception, report it and try to recover.
281*16467b97STreehugger Robot                 //
282*16467b97STreehugger Robot                 state->failed	    = ANTLR3_TRUE;
283*16467b97STreehugger Robot                 lexer->rec->reportError(lexer->rec);
284*16467b97STreehugger Robot                 lexer->recover(lexer);
285*16467b97STreehugger Robot             }
286*16467b97STreehugger Robot             else
287*16467b97STreehugger Robot             {
288*16467b97STreehugger Robot                 if (state->token == NULL)
289*16467b97STreehugger Robot                 {
290*16467b97STreehugger Robot                     // Emit the real token, which adds it in to the token stream basically
291*16467b97STreehugger Robot                     //
292*16467b97STreehugger Robot                     emit(lexer);
293*16467b97STreehugger Robot                 }
294*16467b97STreehugger Robot                 else if	(state->token ==  &(toksource->skipToken))
295*16467b97STreehugger Robot                 {
296*16467b97STreehugger Robot                     // A real token could have been generated, but "Computer say's naaaaah" and it
297*16467b97STreehugger Robot                     // it is just something we need to skip altogether.
298*16467b97STreehugger Robot                     //
299*16467b97STreehugger Robot                     continue;
300*16467b97STreehugger Robot                 }
301*16467b97STreehugger Robot 
302*16467b97STreehugger Robot                 // Good token, not skipped, not EOF token
303*16467b97STreehugger Robot                 //
304*16467b97STreehugger Robot                 return  state->token;
305*16467b97STreehugger Robot             }
306*16467b97STreehugger Robot         }
307*16467b97STreehugger Robot     }
308*16467b97STreehugger Robot }
309*16467b97STreehugger Robot 
310*16467b97STreehugger Robot /**
311*16467b97STreehugger Robot  * \brief
312*16467b97STreehugger Robot  * Default implementation of the nextToken() call for a lexer.
313*16467b97STreehugger Robot  *
314*16467b97STreehugger Robot  * \param toksource
315*16467b97STreehugger Robot  * Points to the implementation of a token source. The lexer is
316*16467b97STreehugger Robot  * addressed by the super structure pointer.
317*16467b97STreehugger Robot  *
318*16467b97STreehugger Robot  * \returns
319*16467b97STreehugger Robot  * The next token in the current input stream or the EOF token
320*16467b97STreehugger Robot  * if there are no more tokens in any input stream in the stack.
321*16467b97STreehugger Robot  *
322*16467b97STreehugger Robot  * Write detailed description for nextToken here.
323*16467b97STreehugger Robot  *
324*16467b97STreehugger Robot  * \remarks
325*16467b97STreehugger Robot  * Write remarks for nextToken here.
326*16467b97STreehugger Robot  *
327*16467b97STreehugger Robot  * \see nextTokenStr
328*16467b97STreehugger Robot  */
329*16467b97STreehugger Robot static pANTLR3_COMMON_TOKEN
nextToken(pANTLR3_TOKEN_SOURCE toksource)330*16467b97STreehugger Robot nextToken	    (pANTLR3_TOKEN_SOURCE toksource)
331*16467b97STreehugger Robot {
332*16467b97STreehugger Robot 	pANTLR3_COMMON_TOKEN tok;
333*16467b97STreehugger Robot 
334*16467b97STreehugger Robot 	// Find the next token in the current stream
335*16467b97STreehugger Robot 	//
336*16467b97STreehugger Robot 	tok = nextTokenStr(toksource);
337*16467b97STreehugger Robot 
338*16467b97STreehugger Robot 	// If we got to the EOF token then switch to the previous
339*16467b97STreehugger Robot 	// input stream if there were any and just return the
340*16467b97STreehugger Robot 	// EOF if there are none. We must check the next token
341*16467b97STreehugger Robot 	// in any outstanding input stream we pop into the active
342*16467b97STreehugger Robot 	// role to see if it was sitting at EOF after PUSHing the
343*16467b97STreehugger Robot 	// stream we just consumed, otherwise we will return EOF
344*16467b97STreehugger Robot 	// on the reinstalled input stream, when in actual fact
345*16467b97STreehugger Robot 	// there might be more input streams to POP before the
346*16467b97STreehugger Robot 	// real EOF of the whole logical input stream. Hence we
347*16467b97STreehugger Robot 	// use a while loop here until we find something in the stream
348*16467b97STreehugger Robot 	// that isn't EOF or we reach the actual end of the last input
349*16467b97STreehugger Robot 	// stream on the stack.
350*16467b97STreehugger Robot 	//
351*16467b97STreehugger Robot 	while	((tok != NULL) && (tok->type == ANTLR3_TOKEN_EOF))
352*16467b97STreehugger Robot 	{
353*16467b97STreehugger Robot 		pANTLR3_LEXER   lexer;
354*16467b97STreehugger Robot 
355*16467b97STreehugger Robot 		lexer   = (pANTLR3_LEXER)(toksource->super);
356*16467b97STreehugger Robot 
357*16467b97STreehugger Robot 		if  (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
358*16467b97STreehugger Robot 		{
359*16467b97STreehugger Robot 			// We have another input stream in the stack so we
360*16467b97STreehugger Robot 			// need to revert to it, then resume the loop to check
361*16467b97STreehugger Robot 			// it wasn't sitting at EOF itself.
362*16467b97STreehugger Robot 			//
363*16467b97STreehugger Robot 			lexer->popCharStream(lexer);
364*16467b97STreehugger Robot 			tok = nextTokenStr(toksource);
365*16467b97STreehugger Robot 		}
366*16467b97STreehugger Robot 		else
367*16467b97STreehugger Robot 		{
368*16467b97STreehugger Robot 			// There were no more streams on the input stack
369*16467b97STreehugger Robot 			// so this EOF is the 'real' logical EOF for
370*16467b97STreehugger Robot 			// the input stream. So we just exit the loop and
371*16467b97STreehugger Robot 			// return the EOF we have found.
372*16467b97STreehugger Robot 			//
373*16467b97STreehugger Robot 			break;
374*16467b97STreehugger Robot 		}
375*16467b97STreehugger Robot 
376*16467b97STreehugger Robot 	}
377*16467b97STreehugger Robot 
378*16467b97STreehugger Robot 	// return whatever token we have, which may be EOF
379*16467b97STreehugger Robot 	//
380*16467b97STreehugger Robot 	return  tok;
381*16467b97STreehugger Robot }
382*16467b97STreehugger Robot 
383*16467b97STreehugger Robot ANTLR3_API pANTLR3_LEXER
antlr3LexerNewStream(ANTLR3_UINT32 sizeHint,pANTLR3_INPUT_STREAM input,pANTLR3_RECOGNIZER_SHARED_STATE state)384*16467b97STreehugger Robot antlr3LexerNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_INPUT_STREAM input, pANTLR3_RECOGNIZER_SHARED_STATE state)
385*16467b97STreehugger Robot {
386*16467b97STreehugger Robot     pANTLR3_LEXER   lexer;
387*16467b97STreehugger Robot 
388*16467b97STreehugger Robot     // Create a basic lexer first
389*16467b97STreehugger Robot     //
390*16467b97STreehugger Robot     lexer   = antlr3LexerNew(sizeHint, state);
391*16467b97STreehugger Robot 
392*16467b97STreehugger Robot     if	(lexer != NULL)
393*16467b97STreehugger Robot     {
394*16467b97STreehugger Robot 		// Install the input stream and reset the lexer
395*16467b97STreehugger Robot 		//
396*16467b97STreehugger Robot 		setCharStream(lexer, input);
397*16467b97STreehugger Robot     }
398*16467b97STreehugger Robot 
399*16467b97STreehugger Robot     return  lexer;
400*16467b97STreehugger Robot }
401*16467b97STreehugger Robot 
mTokens(pANTLR3_LEXER lexer)402*16467b97STreehugger Robot static void mTokens	    (pANTLR3_LEXER lexer)
403*16467b97STreehugger Robot {
404*16467b97STreehugger Robot     if	(lexer)	    // Fool compiler, avoid pragmas
405*16467b97STreehugger Robot     {
406*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n");
407*16467b97STreehugger Robot     }
408*16467b97STreehugger Robot }
409*16467b97STreehugger Robot 
410*16467b97STreehugger Robot static void
reportError(pANTLR3_BASE_RECOGNIZER rec)411*16467b97STreehugger Robot reportError		    (pANTLR3_BASE_RECOGNIZER rec)
412*16467b97STreehugger Robot {
413*16467b97STreehugger Robot     // Indicate this recognizer had an error while processing.
414*16467b97STreehugger Robot 	//
415*16467b97STreehugger Robot 	rec->state->errorCount++;
416*16467b97STreehugger Robot 
417*16467b97STreehugger Robot     rec->displayRecognitionError(rec, rec->state->tokenNames);
418*16467b97STreehugger Robot }
419*16467b97STreehugger Robot 
420*16467b97STreehugger Robot #ifdef	ANTLR3_WINDOWS
421*16467b97STreehugger Robot #pragma warning( disable : 4100 )
422*16467b97STreehugger Robot #endif
423*16467b97STreehugger Robot 
424*16467b97STreehugger Robot /** Default lexer error handler (works for 8 bit streams only!!!)
425*16467b97STreehugger Robot  */
426*16467b97STreehugger Robot static void
displayRecognitionError(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_UINT8 * tokenNames)427*16467b97STreehugger Robot displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
428*16467b97STreehugger Robot {
429*16467b97STreehugger Robot     pANTLR3_LEXER			lexer;
430*16467b97STreehugger Robot 	pANTLR3_EXCEPTION	    ex;
431*16467b97STreehugger Robot 	pANTLR3_STRING			ftext;
432*16467b97STreehugger Robot 
433*16467b97STreehugger Robot     lexer   = (pANTLR3_LEXER)(recognizer->super);
434*16467b97STreehugger Robot 	ex		= lexer->rec->state->exception;
435*16467b97STreehugger Robot 
436*16467b97STreehugger Robot 	// See if there is a 'filename' we can use
437*16467b97STreehugger Robot     //
438*16467b97STreehugger Robot     if	(ex->name == NULL)
439*16467b97STreehugger Robot     {
440*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, "-unknown source-(");
441*16467b97STreehugger Robot     }
442*16467b97STreehugger Robot     else
443*16467b97STreehugger Robot     {
444*16467b97STreehugger Robot 		ftext = ex->streamName->to8(ex->streamName);
445*16467b97STreehugger Robot 		ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
446*16467b97STreehugger Robot     }
447*16467b97STreehugger Robot 
448*16467b97STreehugger Robot     ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
449*16467b97STreehugger Robot     ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ",
450*16467b97STreehugger Robot 						ex->type,
451*16467b97STreehugger Robot 						(pANTLR3_UINT8)	   (ex->message),
452*16467b97STreehugger Robot 					    ex->charPositionInLine+1
453*16467b97STreehugger Robot 		    );
454*16467b97STreehugger Robot 	{
455*16467b97STreehugger Robot 		ANTLR3_INT32	width;
456*16467b97STreehugger Robot 
457*16467b97STreehugger Robot 		width	= ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index));
458*16467b97STreehugger Robot 
459*16467b97STreehugger Robot 		if	(width >= 1)
460*16467b97STreehugger Robot 		{
461*16467b97STreehugger Robot 			if	(isprint(ex->c))
462*16467b97STreehugger Robot 			{
463*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c);
464*16467b97STreehugger Robot 			}
465*16467b97STreehugger Robot 			else
466*16467b97STreehugger Robot 			{
467*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c));
468*16467b97STreehugger Robot 			}
469*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index));
470*16467b97STreehugger Robot 		}
471*16467b97STreehugger Robot 		else
472*16467b97STreehugger Robot 		{
473*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n");
474*16467b97STreehugger Robot 			ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ",
475*16467b97STreehugger Robot 								(ANTLR3_UINT32)(lexer->rec->state->tokenStartLine),
476*16467b97STreehugger Robot 								(ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine)
477*16467b97STreehugger Robot 								);
478*16467b97STreehugger Robot 			width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
479*16467b97STreehugger Robot 
480*16467b97STreehugger Robot 			if	(width >= 1)
481*16467b97STreehugger Robot 			{
482*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
483*16467b97STreehugger Robot 			}
484*16467b97STreehugger Robot 			else
485*16467b97STreehugger Robot 			{
486*16467b97STreehugger Robot 				ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n");
487*16467b97STreehugger Robot 			}
488*16467b97STreehugger Robot 		}
489*16467b97STreehugger Robot 	}
490*16467b97STreehugger Robot }
491*16467b97STreehugger Robot 
setCharStream(pANTLR3_LEXER lexer,pANTLR3_INPUT_STREAM input)492*16467b97STreehugger Robot static void setCharStream   (pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input)
493*16467b97STreehugger Robot {
494*16467b97STreehugger Robot     /* Install the input interface
495*16467b97STreehugger Robot      */
496*16467b97STreehugger Robot     lexer->input	= input;
497*16467b97STreehugger Robot 
498*16467b97STreehugger Robot     /* We may need a token factory for the lexer; we don't destroy any existing factory
499*16467b97STreehugger Robot      * until the lexer is destroyed, as people may still be using the tokens it produced.
500*16467b97STreehugger Robot      * TODO: Later I will provide a dup() method for a token so that it can extract itself
501*16467b97STreehugger Robot      * out of the factory.
502*16467b97STreehugger Robot      */
503*16467b97STreehugger Robot     if	(lexer->rec->state->tokFactory == NULL)
504*16467b97STreehugger Robot     {
505*16467b97STreehugger Robot 	lexer->rec->state->tokFactory	= antlr3TokenFactoryNew(input);
506*16467b97STreehugger Robot     }
507*16467b97STreehugger Robot     else
508*16467b97STreehugger Robot     {
509*16467b97STreehugger Robot 	/* When the input stream is being changed on the fly, rather than
510*16467b97STreehugger Robot 	 * at the start of a new lexer, then we must tell the tokenFactory
511*16467b97STreehugger Robot 	 * which input stream to adorn the tokens with so that when they
512*16467b97STreehugger Robot 	 * are asked to provide their original input strings they can
513*16467b97STreehugger Robot 	 * do so from the correct text stream.
514*16467b97STreehugger Robot 	 */
515*16467b97STreehugger Robot 	lexer->rec->state->tokFactory->setInputStream(lexer->rec->state->tokFactory, input);
516*16467b97STreehugger Robot     }
517*16467b97STreehugger Robot 
518*16467b97STreehugger Robot     /* Propagate the string factory so that we preserve the encoding form from
519*16467b97STreehugger Robot      * the input stream.
520*16467b97STreehugger Robot      */
521*16467b97STreehugger Robot     if	(lexer->rec->state->tokSource->strFactory == NULL)
522*16467b97STreehugger Robot     {
523*16467b97STreehugger Robot         lexer->rec->state->tokSource->strFactory	= input->strFactory;
524*16467b97STreehugger Robot 
525*16467b97STreehugger Robot         // Set the newly acquired string factory up for our pre-made tokens
526*16467b97STreehugger Robot         // for EOF.
527*16467b97STreehugger Robot         //
528*16467b97STreehugger Robot         if (lexer->rec->state->tokSource->eofToken.strFactory == NULL)
529*16467b97STreehugger Robot         {
530*16467b97STreehugger Robot             lexer->rec->state->tokSource->eofToken.strFactory = input->strFactory;
531*16467b97STreehugger Robot         }
532*16467b97STreehugger Robot     }
533*16467b97STreehugger Robot 
534*16467b97STreehugger Robot     /* This is a lexer, install the appropriate exception creator
535*16467b97STreehugger Robot      */
536*16467b97STreehugger Robot     lexer->rec->exConstruct = antlr3RecognitionExceptionNew;
537*16467b97STreehugger Robot 
538*16467b97STreehugger Robot     /* Set the current token to nothing
539*16467b97STreehugger Robot      */
540*16467b97STreehugger Robot     lexer->rec->state->token		= NULL;
541*16467b97STreehugger Robot     lexer->rec->state->text			= NULL;
542*16467b97STreehugger Robot     lexer->rec->state->tokenStartCharIndex	= -1;
543*16467b97STreehugger Robot 
544*16467b97STreehugger Robot     /* Copy the name of the char stream to the token source
545*16467b97STreehugger Robot      */
546*16467b97STreehugger Robot     lexer->rec->state->tokSource->fileName = input->fileName;
547*16467b97STreehugger Robot }
548*16467b97STreehugger Robot 
549*16467b97STreehugger Robot /*!
550*16467b97STreehugger Robot  * \brief
551*16467b97STreehugger Robot  * Change to a new input stream, remembering the old one.
552*16467b97STreehugger Robot  *
553*16467b97STreehugger Robot  * \param lexer
554*16467b97STreehugger Robot  * Pointer to the lexer instance to switch input streams for.
555*16467b97STreehugger Robot  *
556*16467b97STreehugger Robot  * \param input
557*16467b97STreehugger Robot  * New input stream to install as the current one.
558*16467b97STreehugger Robot  *
559*16467b97STreehugger Robot  * Switches the current character input stream to
560*16467b97STreehugger Robot  * a new one, saving the old one, which we will revert to at the end of this
561*16467b97STreehugger Robot  * new one.
562*16467b97STreehugger Robot  */
563*16467b97STreehugger Robot static void
pushCharStream(pANTLR3_LEXER lexer,pANTLR3_INPUT_STREAM input)564*16467b97STreehugger Robot pushCharStream  (pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input)
565*16467b97STreehugger Robot {
566*16467b97STreehugger Robot 	// Do we need a new input stream stack?
567*16467b97STreehugger Robot 	//
568*16467b97STreehugger Robot 	if	(lexer->rec->state->streams == NULL)
569*16467b97STreehugger Robot 	{
570*16467b97STreehugger Robot 		// This is the first call to stack a new
571*16467b97STreehugger Robot 		// stream and so we must create the stack first.
572*16467b97STreehugger Robot 		//
573*16467b97STreehugger Robot 		lexer->rec->state->streams = antlr3StackNew(0);
574*16467b97STreehugger Robot 
575*16467b97STreehugger Robot 		if  (lexer->rec->state->streams == NULL)
576*16467b97STreehugger Robot 		{
577*16467b97STreehugger Robot 			// Could not do this, we just fail to push it.
578*16467b97STreehugger Robot 			// TODO: Consider if this is what we want to do, but then
579*16467b97STreehugger Robot 			//       any programmer can override this method to do something else.
580*16467b97STreehugger Robot 			return;
581*16467b97STreehugger Robot 		}
582*16467b97STreehugger Robot 	}
583*16467b97STreehugger Robot 
584*16467b97STreehugger Robot 	// We have a stack, so we can save the current input stream
585*16467b97STreehugger Robot 	// into it.
586*16467b97STreehugger Robot 	//
587*16467b97STreehugger Robot 	lexer->input->istream->mark(lexer->input->istream);
588*16467b97STreehugger Robot 	lexer->rec->state->streams->push(lexer->rec->state->streams, lexer->input, NULL);
589*16467b97STreehugger Robot 
590*16467b97STreehugger Robot 	// And now we can install this new one
591*16467b97STreehugger Robot 	//
592*16467b97STreehugger Robot 	lexer->setCharStream(lexer, input);
593*16467b97STreehugger Robot }
594*16467b97STreehugger Robot 
595*16467b97STreehugger Robot /*!
596*16467b97STreehugger Robot  * \brief
597*16467b97STreehugger Robot  * Stops using the current input stream and reverts to any prior
598*16467b97STreehugger Robot  * input stream on the stack.
599*16467b97STreehugger Robot  *
600*16467b97STreehugger Robot  * \param lexer
601*16467b97STreehugger Robot  * Description of parameter lexer.
602*16467b97STreehugger Robot  *
603*16467b97STreehugger Robot  * Pointer to a function that abandons the current input stream, whether it
604*16467b97STreehugger Robot  * is empty or not and reverts to the previous stacked input stream.
605*16467b97STreehugger Robot  *
606*16467b97STreehugger Robot  * \remark
607*16467b97STreehugger Robot  * The function fails silently if there are no prior input streams.
608*16467b97STreehugger Robot  */
609*16467b97STreehugger Robot static void
popCharStream(pANTLR3_LEXER lexer)610*16467b97STreehugger Robot popCharStream   (pANTLR3_LEXER lexer)
611*16467b97STreehugger Robot {
612*16467b97STreehugger Robot     pANTLR3_INPUT_STREAM input;
613*16467b97STreehugger Robot 
614*16467b97STreehugger Robot     // If we do not have a stream stack or we are already at the
615*16467b97STreehugger Robot     // stack bottom, then do nothing.
616*16467b97STreehugger Robot     //
617*16467b97STreehugger Robot     if	(lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
618*16467b97STreehugger Robot     {
619*16467b97STreehugger Robot 	// We just leave the current stream to its fate, we do not close
620*16467b97STreehugger Robot 	// it or anything as we do not know what the programmer intended
621*16467b97STreehugger Robot 	// for it. This method can always be overridden of course.
622*16467b97STreehugger Robot 	// So just find out what was currently saved on the stack and use
623*16467b97STreehugger Robot 	// that now, then pop it from the stack.
624*16467b97STreehugger Robot 	//
625*16467b97STreehugger Robot 	input	= (pANTLR3_INPUT_STREAM)(lexer->rec->state->streams->top);
626*16467b97STreehugger Robot 	lexer->rec->state->streams->pop(lexer->rec->state->streams);
627*16467b97STreehugger Robot 
628*16467b97STreehugger Robot 	// Now install the stream as the current one.
629*16467b97STreehugger Robot 	//
630*16467b97STreehugger Robot 	lexer->setCharStream(lexer, input);
631*16467b97STreehugger Robot 	lexer->input->istream->rewindLast(lexer->input->istream);
632*16467b97STreehugger Robot     }
633*16467b97STreehugger Robot     return;
634*16467b97STreehugger Robot }
635*16467b97STreehugger Robot 
emitNew(pANTLR3_LEXER lexer,pANTLR3_COMMON_TOKEN token)636*16467b97STreehugger Robot static void emitNew	    (pANTLR3_LEXER lexer,  pANTLR3_COMMON_TOKEN token)
637*16467b97STreehugger Robot {
638*16467b97STreehugger Robot     lexer->rec->state->token    = token;	/* Voila!   */
639*16467b97STreehugger Robot }
640*16467b97STreehugger Robot 
641*16467b97STreehugger Robot static pANTLR3_COMMON_TOKEN
emit(pANTLR3_LEXER lexer)642*16467b97STreehugger Robot emit	    (pANTLR3_LEXER lexer)
643*16467b97STreehugger Robot {
644*16467b97STreehugger Robot     pANTLR3_COMMON_TOKEN	token;
645*16467b97STreehugger Robot 
646*16467b97STreehugger Robot     /* We could check pointers to token factories and so on, but
647*16467b97STreehugger Robot     * we are in code that we want to run as fast as possible
648*16467b97STreehugger Robot     * so we are not checking any errors. So make sure you have installed an input stream before
649*16467b97STreehugger Robot     * trying to emit a new token.
650*16467b97STreehugger Robot     */
651*16467b97STreehugger Robot     token   = lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory);
652*16467b97STreehugger Robot 	if (token == NULL) { return NULL; }
653*16467b97STreehugger Robot 
654*16467b97STreehugger Robot     /* Install the supplied information, and some other bits we already know
655*16467b97STreehugger Robot     * get added automatically, such as the input stream it is associated with
656*16467b97STreehugger Robot     * (though it can all be overridden of course)
657*16467b97STreehugger Robot     */
658*16467b97STreehugger Robot     token->type		    = lexer->rec->state->type;
659*16467b97STreehugger Robot     token->channel	    = lexer->rec->state->channel;
660*16467b97STreehugger Robot     token->start	    = lexer->rec->state->tokenStartCharIndex;
661*16467b97STreehugger Robot     token->stop		    = lexer->getCharIndex(lexer) - 1;
662*16467b97STreehugger Robot     token->line		    = lexer->rec->state->tokenStartLine;
663*16467b97STreehugger Robot     token->charPosition	= lexer->rec->state->tokenStartCharPositionInLine;
664*16467b97STreehugger Robot 
665*16467b97STreehugger Robot     if	(lexer->rec->state->text != NULL)
666*16467b97STreehugger Robot     {
667*16467b97STreehugger Robot         token->textState	    = ANTLR3_TEXT_STRING;
668*16467b97STreehugger Robot         token->tokText.text	    = lexer->rec->state->text;
669*16467b97STreehugger Robot     }
670*16467b97STreehugger Robot     else
671*16467b97STreehugger Robot     {
672*16467b97STreehugger Robot         token->textState	= ANTLR3_TEXT_NONE;
673*16467b97STreehugger Robot     }
674*16467b97STreehugger Robot     token->lineStart	= lexer->input->currentLine;
675*16467b97STreehugger Robot     token->user1	= lexer->rec->state->user1;
676*16467b97STreehugger Robot     token->user2	= lexer->rec->state->user2;
677*16467b97STreehugger Robot     token->user3	= lexer->rec->state->user3;
678*16467b97STreehugger Robot     token->custom	= lexer->rec->state->custom;
679*16467b97STreehugger Robot 
680*16467b97STreehugger Robot     lexer->rec->state->token	    = token;
681*16467b97STreehugger Robot 
682*16467b97STreehugger Robot     return  token;
683*16467b97STreehugger Robot }
684*16467b97STreehugger Robot 
685*16467b97STreehugger Robot /**
686*16467b97STreehugger Robot  * Free the resources allocated by a lexer
687*16467b97STreehugger Robot  */
688*16467b97STreehugger Robot static void
freeLexer(pANTLR3_LEXER lexer)689*16467b97STreehugger Robot freeLexer    (pANTLR3_LEXER lexer)
690*16467b97STreehugger Robot {
691*16467b97STreehugger Robot 	// This may have ben a delegate or delegator lexer, in which case the
692*16467b97STreehugger Robot 	// state may already have been freed (and set to NULL therefore)
693*16467b97STreehugger Robot 	// so we ignore the state if we don't have it.
694*16467b97STreehugger Robot 	//
695*16467b97STreehugger Robot 	if	(lexer->rec->state != NULL)
696*16467b97STreehugger Robot 	{
697*16467b97STreehugger Robot 		if	(lexer->rec->state->streams != NULL)
698*16467b97STreehugger Robot 		{
699*16467b97STreehugger Robot 			lexer->rec->state->streams->free(lexer->rec->state->streams);
700*16467b97STreehugger Robot 		}
701*16467b97STreehugger Robot 		if	(lexer->rec->state->tokFactory != NULL)
702*16467b97STreehugger Robot 		{
703*16467b97STreehugger Robot 			lexer->rec->state->tokFactory->close(lexer->rec->state->tokFactory);
704*16467b97STreehugger Robot 			lexer->rec->state->tokFactory = NULL;
705*16467b97STreehugger Robot 		}
706*16467b97STreehugger Robot 		if	(lexer->rec->state->tokSource != NULL)
707*16467b97STreehugger Robot 		{
708*16467b97STreehugger Robot 			ANTLR3_FREE(lexer->rec->state->tokSource);
709*16467b97STreehugger Robot 			lexer->rec->state->tokSource = NULL;
710*16467b97STreehugger Robot 		}
711*16467b97STreehugger Robot 	}
712*16467b97STreehugger Robot 	if	(lexer->rec != NULL)
713*16467b97STreehugger Robot 	{
714*16467b97STreehugger Robot 		lexer->rec->free(lexer->rec);
715*16467b97STreehugger Robot 		lexer->rec = NULL;
716*16467b97STreehugger Robot 	}
717*16467b97STreehugger Robot 	ANTLR3_FREE(lexer);
718*16467b97STreehugger Robot }
719*16467b97STreehugger Robot 
720*16467b97STreehugger Robot /** Implementation of matchs for the lexer, overrides any
721*16467b97STreehugger Robot  *  base implementation in the base recognizer.
722*16467b97STreehugger Robot  *
723*16467b97STreehugger Robot  *  \remark
724*16467b97STreehugger Robot  *  Note that the generated code lays down arrays of ints for constant
725*16467b97STreehugger Robot  *  strings so that they are int UTF32 form!
726*16467b97STreehugger Robot  */
727*16467b97STreehugger Robot static ANTLR3_BOOLEAN
matchs(pANTLR3_LEXER lexer,ANTLR3_UCHAR * string)728*16467b97STreehugger Robot matchs(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string)
729*16467b97STreehugger Robot {
730*16467b97STreehugger Robot 	while   (*string != ANTLR3_STRING_TERMINATOR)
731*16467b97STreehugger Robot 	{
732*16467b97STreehugger Robot 		if  (lexer->input->istream->_LA(lexer->input->istream, 1) != (*string))
733*16467b97STreehugger Robot 		{
734*16467b97STreehugger Robot 			if	(lexer->rec->state->backtracking > 0)
735*16467b97STreehugger Robot 			{
736*16467b97STreehugger Robot 				lexer->rec->state->failed = ANTLR3_TRUE;
737*16467b97STreehugger Robot 				return ANTLR3_FALSE;
738*16467b97STreehugger Robot 			}
739*16467b97STreehugger Robot 
740*16467b97STreehugger Robot 			lexer->rec->exConstruct(lexer->rec);
741*16467b97STreehugger Robot 			lexer->rec->state->failed	 = ANTLR3_TRUE;
742*16467b97STreehugger Robot 
743*16467b97STreehugger Robot 			/* TODO: Implement exception creation more fully perhaps
744*16467b97STreehugger Robot 			 */
745*16467b97STreehugger Robot 			lexer->recover(lexer);
746*16467b97STreehugger Robot 			return  ANTLR3_FALSE;
747*16467b97STreehugger Robot 		}
748*16467b97STreehugger Robot 
749*16467b97STreehugger Robot 		/* Matched correctly, do consume it
750*16467b97STreehugger Robot 		 */
751*16467b97STreehugger Robot 		lexer->input->istream->consume(lexer->input->istream);
752*16467b97STreehugger Robot 		string++;
753*16467b97STreehugger Robot 
754*16467b97STreehugger Robot 		/* Reset any failed indicator
755*16467b97STreehugger Robot 		 */
756*16467b97STreehugger Robot 		lexer->rec->state->failed = ANTLR3_FALSE;
757*16467b97STreehugger Robot 	}
758*16467b97STreehugger Robot 
759*16467b97STreehugger Robot 
760*16467b97STreehugger Robot 	return  ANTLR3_TRUE;
761*16467b97STreehugger Robot }
762*16467b97STreehugger Robot 
763*16467b97STreehugger Robot /** Implementation of matchc for the lexer, overrides any
764*16467b97STreehugger Robot  *  base implementation in the base recognizer.
765*16467b97STreehugger Robot  *
766*16467b97STreehugger Robot  *  \remark
767*16467b97STreehugger Robot  *  Note that the generated code lays down arrays of ints for constant
768*16467b97STreehugger Robot  *  strings so that they are int UTF32 form!
769*16467b97STreehugger Robot  */
770*16467b97STreehugger Robot static ANTLR3_BOOLEAN
matchc(pANTLR3_LEXER lexer,ANTLR3_UCHAR c)771*16467b97STreehugger Robot matchc(pANTLR3_LEXER lexer, ANTLR3_UCHAR c)
772*16467b97STreehugger Robot {
773*16467b97STreehugger Robot 	if	(lexer->input->istream->_LA(lexer->input->istream, 1) == c)
774*16467b97STreehugger Robot 	{
775*16467b97STreehugger Robot 		/* Matched correctly, do consume it
776*16467b97STreehugger Robot 		 */
777*16467b97STreehugger Robot 		lexer->input->istream->consume(lexer->input->istream);
778*16467b97STreehugger Robot 
779*16467b97STreehugger Robot 		/* Reset any failed indicator
780*16467b97STreehugger Robot 		 */
781*16467b97STreehugger Robot 		lexer->rec->state->failed = ANTLR3_FALSE;
782*16467b97STreehugger Robot 
783*16467b97STreehugger Robot 		return	ANTLR3_TRUE;
784*16467b97STreehugger Robot 	}
785*16467b97STreehugger Robot 
786*16467b97STreehugger Robot 	/* Failed to match, exception and recovery time.
787*16467b97STreehugger Robot 	 */
788*16467b97STreehugger Robot 	if	(lexer->rec->state->backtracking > 0)
789*16467b97STreehugger Robot 	{
790*16467b97STreehugger Robot 		lexer->rec->state->failed  = ANTLR3_TRUE;
791*16467b97STreehugger Robot 		return	ANTLR3_FALSE;
792*16467b97STreehugger Robot 	}
793*16467b97STreehugger Robot 
794*16467b97STreehugger Robot 	lexer->rec->exConstruct(lexer->rec);
795*16467b97STreehugger Robot 
796*16467b97STreehugger Robot 	/* TODO: Implement exception creation more fully perhaps
797*16467b97STreehugger Robot 	 */
798*16467b97STreehugger Robot 	lexer->recover(lexer);
799*16467b97STreehugger Robot 
800*16467b97STreehugger Robot 	return  ANTLR3_FALSE;
801*16467b97STreehugger Robot }
802*16467b97STreehugger Robot 
803*16467b97STreehugger Robot /** Implementation of match range for the lexer, overrides any
804*16467b97STreehugger Robot  *  base implementation in the base recognizer.
805*16467b97STreehugger Robot  *
806*16467b97STreehugger Robot  *  \remark
807*16467b97STreehugger Robot  *  Note that the generated code lays down arrays of ints for constant
808*16467b97STreehugger Robot  *  strings so that they are int UTF32 form!
809*16467b97STreehugger Robot  */
810*16467b97STreehugger Robot static ANTLR3_BOOLEAN
matchRange(pANTLR3_LEXER lexer,ANTLR3_UCHAR low,ANTLR3_UCHAR high)811*16467b97STreehugger Robot matchRange(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high)
812*16467b97STreehugger Robot {
813*16467b97STreehugger Robot     ANTLR3_UCHAR    c;
814*16467b97STreehugger Robot 
815*16467b97STreehugger Robot     /* What is in the stream at the moment?
816*16467b97STreehugger Robot      */
817*16467b97STreehugger Robot     c	= lexer->input->istream->_LA(lexer->input->istream, 1);
818*16467b97STreehugger Robot     if	( c >= low && c <= high)
819*16467b97STreehugger Robot     {
820*16467b97STreehugger Robot 	/* Matched correctly, consume it
821*16467b97STreehugger Robot 	 */
822*16467b97STreehugger Robot 	lexer->input->istream->consume(lexer->input->istream);
823*16467b97STreehugger Robot 
824*16467b97STreehugger Robot 	/* Reset any failed indicator
825*16467b97STreehugger Robot 	 */
826*16467b97STreehugger Robot 	lexer->rec->state->failed = ANTLR3_FALSE;
827*16467b97STreehugger Robot 
828*16467b97STreehugger Robot 	return	ANTLR3_TRUE;
829*16467b97STreehugger Robot     }
830*16467b97STreehugger Robot 
831*16467b97STreehugger Robot     /* Failed to match, execption and recovery time.
832*16467b97STreehugger Robot      */
833*16467b97STreehugger Robot 
834*16467b97STreehugger Robot     if	(lexer->rec->state->backtracking > 0)
835*16467b97STreehugger Robot     {
836*16467b97STreehugger Robot 	lexer->rec->state->failed  = ANTLR3_TRUE;
837*16467b97STreehugger Robot 	return	ANTLR3_FALSE;
838*16467b97STreehugger Robot     }
839*16467b97STreehugger Robot 
840*16467b97STreehugger Robot     lexer->rec->exConstruct(lexer->rec);
841*16467b97STreehugger Robot 
842*16467b97STreehugger Robot     /* TODO: Implement exception creation more fully
843*16467b97STreehugger Robot      */
844*16467b97STreehugger Robot     lexer->recover(lexer);
845*16467b97STreehugger Robot 
846*16467b97STreehugger Robot     return  ANTLR3_FALSE;
847*16467b97STreehugger Robot }
848*16467b97STreehugger Robot 
849*16467b97STreehugger Robot static void
matchAny(pANTLR3_LEXER lexer)850*16467b97STreehugger Robot matchAny	    (pANTLR3_LEXER lexer)
851*16467b97STreehugger Robot {
852*16467b97STreehugger Robot     lexer->input->istream->consume(lexer->input->istream);
853*16467b97STreehugger Robot }
854*16467b97STreehugger Robot 
855*16467b97STreehugger Robot static void
recover(pANTLR3_LEXER lexer)856*16467b97STreehugger Robot recover	    (pANTLR3_LEXER lexer)
857*16467b97STreehugger Robot {
858*16467b97STreehugger Robot     lexer->input->istream->consume(lexer->input->istream);
859*16467b97STreehugger Robot }
860*16467b97STreehugger Robot 
861*16467b97STreehugger Robot static ANTLR3_UINT32
getLine(pANTLR3_LEXER lexer)862*16467b97STreehugger Robot getLine	    (pANTLR3_LEXER lexer)
863*16467b97STreehugger Robot {
864*16467b97STreehugger Robot     return  lexer->input->getLine(lexer->input);
865*16467b97STreehugger Robot }
866*16467b97STreehugger Robot 
867*16467b97STreehugger Robot static ANTLR3_UINT32
getCharPositionInLine(pANTLR3_LEXER lexer)868*16467b97STreehugger Robot getCharPositionInLine	(pANTLR3_LEXER lexer)
869*16467b97STreehugger Robot {
870*16467b97STreehugger Robot     return  lexer->input->charPositionInLine;
871*16467b97STreehugger Robot }
872*16467b97STreehugger Robot 
getCharIndex(pANTLR3_LEXER lexer)873*16467b97STreehugger Robot static ANTLR3_MARKER	getCharIndex	    (pANTLR3_LEXER lexer)
874*16467b97STreehugger Robot {
875*16467b97STreehugger Robot     return lexer->input->istream->index(lexer->input->istream);
876*16467b97STreehugger Robot }
877*16467b97STreehugger Robot 
878*16467b97STreehugger Robot static pANTLR3_STRING
getText(pANTLR3_LEXER lexer)879*16467b97STreehugger Robot getText	    (pANTLR3_LEXER lexer)
880*16467b97STreehugger Robot {
881*16467b97STreehugger Robot 	if (lexer->rec->state->text)
882*16467b97STreehugger Robot 	{
883*16467b97STreehugger Robot 		return	lexer->rec->state->text;
884*16467b97STreehugger Robot 
885*16467b97STreehugger Robot 	}
886*16467b97STreehugger Robot 	return  lexer->input->substr(
887*16467b97STreehugger Robot 									lexer->input,
888*16467b97STreehugger Robot 									lexer->rec->state->tokenStartCharIndex,
889*16467b97STreehugger Robot 									lexer->getCharIndex(lexer) - lexer->input->charByteSize
890*16467b97STreehugger Robot 							);
891*16467b97STreehugger Robot 
892*16467b97STreehugger Robot }
893*16467b97STreehugger Robot 
894*16467b97STreehugger Robot static void *
getCurrentInputSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream)895*16467b97STreehugger Robot getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
896*16467b97STreehugger Robot {
897*16467b97STreehugger Robot 	return NULL;
898*16467b97STreehugger Robot }
899*16467b97STreehugger Robot 
900*16467b97STreehugger Robot static void *
getMissingSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream,pANTLR3_EXCEPTION e,ANTLR3_UINT32 expectedTokenType,pANTLR3_BITSET_LIST follow)901*16467b97STreehugger Robot getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
902*16467b97STreehugger Robot 									ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
903*16467b97STreehugger Robot {
904*16467b97STreehugger Robot 	return NULL;
905*16467b97STreehugger Robot }
906