1*16467b97STreehugger Robot /** \file
2*16467b97STreehugger Robot *
3*16467b97STreehugger Robot * Base implementation of an antlr 3 lexer.
4*16467b97STreehugger Robot *
5*16467b97STreehugger Robot * An ANTLR3 lexer implements a base recongizer, a token source and
6*16467b97STreehugger Robot * a lexer interface. It constructs a base recognizer with default
7*16467b97STreehugger Robot * functions, then overrides any of these that are parser specific (usual
8*16467b97STreehugger Robot * default implementation of base recognizer.
9*16467b97STreehugger Robot */
10*16467b97STreehugger Robot
11*16467b97STreehugger Robot // [The "BSD licence"]
12*16467b97STreehugger Robot // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13*16467b97STreehugger Robot // http://www.temporal-wave.com
14*16467b97STreehugger Robot // http://www.linkedin.com/in/jimidle
15*16467b97STreehugger Robot //
16*16467b97STreehugger Robot // All rights reserved.
17*16467b97STreehugger Robot //
18*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without
19*16467b97STreehugger Robot // modification, are permitted provided that the following conditions
20*16467b97STreehugger Robot // are met:
21*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright
22*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer.
23*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright
24*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer in the
25*16467b97STreehugger Robot // documentation and/or other materials provided with the distribution.
26*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products
27*16467b97STreehugger Robot // derived from this software without specific prior written permission.
28*16467b97STreehugger Robot //
29*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39*16467b97STreehugger Robot
40*16467b97STreehugger Robot #include <antlr3lexer.h>
41*16467b97STreehugger Robot
42*16467b97STreehugger Robot static void mTokens (pANTLR3_LEXER lexer);
43*16467b97STreehugger Robot static void setCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input);
44*16467b97STreehugger Robot static void pushCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input);
45*16467b97STreehugger Robot static void popCharStream (pANTLR3_LEXER lexer);
46*16467b97STreehugger Robot
47*16467b97STreehugger Robot static void emitNew (pANTLR3_LEXER lexer, pANTLR3_COMMON_TOKEN token);
48*16467b97STreehugger Robot static pANTLR3_COMMON_TOKEN emit (pANTLR3_LEXER lexer);
49*16467b97STreehugger Robot static ANTLR3_BOOLEAN matchs (pANTLR3_LEXER lexer, ANTLR3_UCHAR * string);
50*16467b97STreehugger Robot static ANTLR3_BOOLEAN matchc (pANTLR3_LEXER lexer, ANTLR3_UCHAR c);
51*16467b97STreehugger Robot static ANTLR3_BOOLEAN matchRange (pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high);
52*16467b97STreehugger Robot static void matchAny (pANTLR3_LEXER lexer);
53*16467b97STreehugger Robot static void recover (pANTLR3_LEXER lexer);
54*16467b97STreehugger Robot static ANTLR3_UINT32 getLine (pANTLR3_LEXER lexer);
55*16467b97STreehugger Robot static ANTLR3_MARKER getCharIndex (pANTLR3_LEXER lexer);
56*16467b97STreehugger Robot static ANTLR3_UINT32 getCharPositionInLine (pANTLR3_LEXER lexer);
57*16467b97STreehugger Robot static pANTLR3_STRING getText (pANTLR3_LEXER lexer);
58*16467b97STreehugger Robot static pANTLR3_COMMON_TOKEN nextToken (pANTLR3_TOKEN_SOURCE toksource);
59*16467b97STreehugger Robot
60*16467b97STreehugger Robot static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 * tokenNames);
61*16467b97STreehugger Robot static void reportError (pANTLR3_BASE_RECOGNIZER rec);
62*16467b97STreehugger Robot static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
63*16467b97STreehugger Robot static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
64*16467b97STreehugger Robot ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
65*16467b97STreehugger Robot
66*16467b97STreehugger Robot static void reset (pANTLR3_BASE_RECOGNIZER rec);
67*16467b97STreehugger Robot
68*16467b97STreehugger Robot static void freeLexer (pANTLR3_LEXER lexer);
69*16467b97STreehugger Robot
70*16467b97STreehugger Robot
71*16467b97STreehugger Robot ANTLR3_API pANTLR3_LEXER
antlr3LexerNew(ANTLR3_UINT32 sizeHint,pANTLR3_RECOGNIZER_SHARED_STATE state)72*16467b97STreehugger Robot antlr3LexerNew(ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
73*16467b97STreehugger Robot {
74*16467b97STreehugger Robot pANTLR3_LEXER lexer;
75*16467b97STreehugger Robot pANTLR3_COMMON_TOKEN specialT;
76*16467b97STreehugger Robot
77*16467b97STreehugger Robot /* Allocate memory
78*16467b97STreehugger Robot */
79*16467b97STreehugger Robot lexer = (pANTLR3_LEXER) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER));
80*16467b97STreehugger Robot
81*16467b97STreehugger Robot if (lexer == NULL)
82*16467b97STreehugger Robot {
83*16467b97STreehugger Robot return NULL;
84*16467b97STreehugger Robot }
85*16467b97STreehugger Robot
86*16467b97STreehugger Robot /* Now we need to create the base recognizer
87*16467b97STreehugger Robot */
88*16467b97STreehugger Robot lexer->rec = antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER, sizeHint, state);
89*16467b97STreehugger Robot
90*16467b97STreehugger Robot if (lexer->rec == NULL)
91*16467b97STreehugger Robot {
92*16467b97STreehugger Robot lexer->free(lexer);
93*16467b97STreehugger Robot return NULL;
94*16467b97STreehugger Robot }
95*16467b97STreehugger Robot lexer->rec->super = lexer;
96*16467b97STreehugger Robot
97*16467b97STreehugger Robot lexer->rec->displayRecognitionError = displayRecognitionError;
98*16467b97STreehugger Robot lexer->rec->reportError = reportError;
99*16467b97STreehugger Robot lexer->rec->reset = reset;
100*16467b97STreehugger Robot lexer->rec->getCurrentInputSymbol = getCurrentInputSymbol;
101*16467b97STreehugger Robot lexer->rec->getMissingSymbol = getMissingSymbol;
102*16467b97STreehugger Robot
103*16467b97STreehugger Robot /* Now install the token source interface
104*16467b97STreehugger Robot */
105*16467b97STreehugger Robot if (lexer->rec->state->tokSource == NULL)
106*16467b97STreehugger Robot {
107*16467b97STreehugger Robot lexer->rec->state->tokSource = (pANTLR3_TOKEN_SOURCE)ANTLR3_CALLOC(1, sizeof(ANTLR3_TOKEN_SOURCE));
108*16467b97STreehugger Robot
109*16467b97STreehugger Robot if (lexer->rec->state->tokSource == NULL)
110*16467b97STreehugger Robot {
111*16467b97STreehugger Robot lexer->rec->free(lexer->rec);
112*16467b97STreehugger Robot lexer->free(lexer);
113*16467b97STreehugger Robot
114*16467b97STreehugger Robot return NULL;
115*16467b97STreehugger Robot }
116*16467b97STreehugger Robot lexer->rec->state->tokSource->super = lexer;
117*16467b97STreehugger Robot
118*16467b97STreehugger Robot /* Install the default nextToken() method, which may be overridden
119*16467b97STreehugger Robot * by generated code, or by anything else in fact.
120*16467b97STreehugger Robot */
121*16467b97STreehugger Robot lexer->rec->state->tokSource->nextToken = nextToken;
122*16467b97STreehugger Robot lexer->rec->state->tokSource->strFactory = NULL;
123*16467b97STreehugger Robot
124*16467b97STreehugger Robot lexer->rec->state->tokFactory = NULL;
125*16467b97STreehugger Robot }
126*16467b97STreehugger Robot
127*16467b97STreehugger Robot /* Install the lexer API
128*16467b97STreehugger Robot */
129*16467b97STreehugger Robot lexer->setCharStream = setCharStream;
130*16467b97STreehugger Robot lexer->mTokens = (void (*)(void *))(mTokens);
131*16467b97STreehugger Robot lexer->setCharStream = setCharStream;
132*16467b97STreehugger Robot lexer->pushCharStream = pushCharStream;
133*16467b97STreehugger Robot lexer->popCharStream = popCharStream;
134*16467b97STreehugger Robot lexer->emit = emit;
135*16467b97STreehugger Robot lexer->emitNew = emitNew;
136*16467b97STreehugger Robot lexer->matchs = matchs;
137*16467b97STreehugger Robot lexer->matchc = matchc;
138*16467b97STreehugger Robot lexer->matchRange = matchRange;
139*16467b97STreehugger Robot lexer->matchAny = matchAny;
140*16467b97STreehugger Robot lexer->recover = recover;
141*16467b97STreehugger Robot lexer->getLine = getLine;
142*16467b97STreehugger Robot lexer->getCharIndex = getCharIndex;
143*16467b97STreehugger Robot lexer->getCharPositionInLine = getCharPositionInLine;
144*16467b97STreehugger Robot lexer->getText = getText;
145*16467b97STreehugger Robot lexer->free = freeLexer;
146*16467b97STreehugger Robot
147*16467b97STreehugger Robot /* Initialise the eof token
148*16467b97STreehugger Robot */
149*16467b97STreehugger Robot specialT = &(lexer->rec->state->tokSource->eofToken);
150*16467b97STreehugger Robot antlr3SetTokenAPI (specialT);
151*16467b97STreehugger Robot specialT->setType (specialT, ANTLR3_TOKEN_EOF);
152*16467b97STreehugger Robot specialT->factoryMade = ANTLR3_TRUE; // Prevent things trying to free() it
153*16467b97STreehugger Robot specialT->strFactory = NULL;
154*16467b97STreehugger Robot specialT->textState = ANTLR3_TEXT_NONE;
155*16467b97STreehugger Robot specialT->custom = NULL;
156*16467b97STreehugger Robot specialT->user1 = 0;
157*16467b97STreehugger Robot specialT->user2 = 0;
158*16467b97STreehugger Robot specialT->user3 = 0;
159*16467b97STreehugger Robot
160*16467b97STreehugger Robot // Initialize the skip token.
161*16467b97STreehugger Robot //
162*16467b97STreehugger Robot specialT = &(lexer->rec->state->tokSource->skipToken);
163*16467b97STreehugger Robot antlr3SetTokenAPI (specialT);
164*16467b97STreehugger Robot specialT->setType (specialT, ANTLR3_TOKEN_INVALID);
165*16467b97STreehugger Robot specialT->factoryMade = ANTLR3_TRUE; // Prevent things trying to free() it
166*16467b97STreehugger Robot specialT->strFactory = NULL;
167*16467b97STreehugger Robot specialT->custom = NULL;
168*16467b97STreehugger Robot specialT->user1 = 0;
169*16467b97STreehugger Robot specialT->user2 = 0;
170*16467b97STreehugger Robot specialT->user3 = 0;
171*16467b97STreehugger Robot return lexer;
172*16467b97STreehugger Robot }
173*16467b97STreehugger Robot
174*16467b97STreehugger Robot static void
reset(pANTLR3_BASE_RECOGNIZER rec)175*16467b97STreehugger Robot reset (pANTLR3_BASE_RECOGNIZER rec)
176*16467b97STreehugger Robot {
177*16467b97STreehugger Robot pANTLR3_LEXER lexer;
178*16467b97STreehugger Robot
179*16467b97STreehugger Robot lexer = (pANTLR3_LEXER)rec->super;
180*16467b97STreehugger Robot
181*16467b97STreehugger Robot lexer->rec->state->token = NULL;
182*16467b97STreehugger Robot lexer->rec->state->type = ANTLR3_TOKEN_INVALID;
183*16467b97STreehugger Robot lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL;
184*16467b97STreehugger Robot lexer->rec->state->tokenStartCharIndex = -1;
185*16467b97STreehugger Robot lexer->rec->state->tokenStartCharPositionInLine = -1;
186*16467b97STreehugger Robot lexer->rec->state->tokenStartLine = -1;
187*16467b97STreehugger Robot
188*16467b97STreehugger Robot lexer->rec->state->text = NULL;
189*16467b97STreehugger Robot
190*16467b97STreehugger Robot // OK - that's all hunky dory, but we may well have had
191*16467b97STreehugger Robot // a token factory that needs a reset. Do that here
192*16467b97STreehugger Robot //
193*16467b97STreehugger Robot if (lexer->rec->state->tokFactory != NULL)
194*16467b97STreehugger Robot {
195*16467b97STreehugger Robot lexer->rec->state->tokFactory->reset(lexer->rec->state->tokFactory);
196*16467b97STreehugger Robot }
197*16467b97STreehugger Robot }
198*16467b97STreehugger Robot
199*16467b97STreehugger Robot ///
200*16467b97STreehugger Robot /// \brief
201*16467b97STreehugger Robot /// Returns the next available token from the current input stream.
202*16467b97STreehugger Robot ///
203*16467b97STreehugger Robot /// \param toksource
204*16467b97STreehugger Robot /// Points to the implementation of a token source. The lexer is
205*16467b97STreehugger Robot /// addressed by the super structure pointer.
206*16467b97STreehugger Robot ///
207*16467b97STreehugger Robot /// \returns
208*16467b97STreehugger Robot /// The next token in the current input stream or the EOF token
209*16467b97STreehugger Robot /// if there are no more tokens.
210*16467b97STreehugger Robot ///
211*16467b97STreehugger Robot /// \remarks
212*16467b97STreehugger Robot /// Write remarks for nextToken here.
213*16467b97STreehugger Robot ///
214*16467b97STreehugger Robot /// \see nextToken
215*16467b97STreehugger Robot ///
216*16467b97STreehugger Robot ANTLR3_INLINE static pANTLR3_COMMON_TOKEN
nextTokenStr(pANTLR3_TOKEN_SOURCE toksource)217*16467b97STreehugger Robot nextTokenStr (pANTLR3_TOKEN_SOURCE toksource)
218*16467b97STreehugger Robot {
219*16467b97STreehugger Robot pANTLR3_LEXER lexer;
220*16467b97STreehugger Robot pANTLR3_RECOGNIZER_SHARED_STATE state;
221*16467b97STreehugger Robot pANTLR3_INPUT_STREAM input;
222*16467b97STreehugger Robot pANTLR3_INT_STREAM istream;
223*16467b97STreehugger Robot
224*16467b97STreehugger Robot lexer = (pANTLR3_LEXER)(toksource->super);
225*16467b97STreehugger Robot state = lexer->rec->state;
226*16467b97STreehugger Robot input = lexer->input;
227*16467b97STreehugger Robot istream = input->istream;
228*16467b97STreehugger Robot
229*16467b97STreehugger Robot /// Loop until we get a non skipped token or EOF
230*16467b97STreehugger Robot ///
231*16467b97STreehugger Robot for (;;)
232*16467b97STreehugger Robot {
233*16467b97STreehugger Robot // Get rid of any previous token (token factory takes care of
234*16467b97STreehugger Robot // any de-allocation when this token is finally used up.
235*16467b97STreehugger Robot //
236*16467b97STreehugger Robot state->token = NULL;
237*16467b97STreehugger Robot state->error = ANTLR3_FALSE; // Start out without an exception
238*16467b97STreehugger Robot state->failed = ANTLR3_FALSE;
239*16467b97STreehugger Robot
240*16467b97STreehugger Robot // Now call the matching rules and see if we can generate a new token
241*16467b97STreehugger Robot //
242*16467b97STreehugger Robot for (;;)
243*16467b97STreehugger Robot {
244*16467b97STreehugger Robot // Record the start of the token in our input stream.
245*16467b97STreehugger Robot //
246*16467b97STreehugger Robot state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL;
247*16467b97STreehugger Robot state->tokenStartCharIndex = (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar));
248*16467b97STreehugger Robot state->tokenStartCharPositionInLine = input->charPositionInLine;
249*16467b97STreehugger Robot state->tokenStartLine = input->line;
250*16467b97STreehugger Robot state->text = NULL;
251*16467b97STreehugger Robot state->custom = NULL;
252*16467b97STreehugger Robot state->user1 = 0;
253*16467b97STreehugger Robot state->user2 = 0;
254*16467b97STreehugger Robot state->user3 = 0;
255*16467b97STreehugger Robot
256*16467b97STreehugger Robot if (istream->_LA(istream, 1) == ANTLR3_CHARSTREAM_EOF)
257*16467b97STreehugger Robot {
258*16467b97STreehugger Robot // Reached the end of the current stream, nothing more to do if this is
259*16467b97STreehugger Robot // the last in the stack.
260*16467b97STreehugger Robot //
261*16467b97STreehugger Robot pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken);
262*16467b97STreehugger Robot
263*16467b97STreehugger Robot teof->setStartIndex (teof, lexer->getCharIndex(lexer));
264*16467b97STreehugger Robot teof->setStopIndex (teof, lexer->getCharIndex(lexer));
265*16467b97STreehugger Robot teof->setLine (teof, lexer->getLine(lexer));
266*16467b97STreehugger Robot teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it
267*16467b97STreehugger Robot return teof;
268*16467b97STreehugger Robot }
269*16467b97STreehugger Robot
270*16467b97STreehugger Robot state->token = NULL;
271*16467b97STreehugger Robot state->error = ANTLR3_FALSE; // Start out without an exception
272*16467b97STreehugger Robot state->failed = ANTLR3_FALSE;
273*16467b97STreehugger Robot
274*16467b97STreehugger Robot // Call the generated lexer, see if it can get a new token together.
275*16467b97STreehugger Robot //
276*16467b97STreehugger Robot lexer->mTokens(lexer->ctx);
277*16467b97STreehugger Robot
278*16467b97STreehugger Robot if (state->error == ANTLR3_TRUE)
279*16467b97STreehugger Robot {
280*16467b97STreehugger Robot // Recognition exception, report it and try to recover.
281*16467b97STreehugger Robot //
282*16467b97STreehugger Robot state->failed = ANTLR3_TRUE;
283*16467b97STreehugger Robot lexer->rec->reportError(lexer->rec);
284*16467b97STreehugger Robot lexer->recover(lexer);
285*16467b97STreehugger Robot }
286*16467b97STreehugger Robot else
287*16467b97STreehugger Robot {
288*16467b97STreehugger Robot if (state->token == NULL)
289*16467b97STreehugger Robot {
290*16467b97STreehugger Robot // Emit the real token, which adds it in to the token stream basically
291*16467b97STreehugger Robot //
292*16467b97STreehugger Robot emit(lexer);
293*16467b97STreehugger Robot }
294*16467b97STreehugger Robot else if (state->token == &(toksource->skipToken))
295*16467b97STreehugger Robot {
296*16467b97STreehugger Robot // A real token could have been generated, but "Computer say's naaaaah" and it
297*16467b97STreehugger Robot // it is just something we need to skip altogether.
298*16467b97STreehugger Robot //
299*16467b97STreehugger Robot continue;
300*16467b97STreehugger Robot }
301*16467b97STreehugger Robot
302*16467b97STreehugger Robot // Good token, not skipped, not EOF token
303*16467b97STreehugger Robot //
304*16467b97STreehugger Robot return state->token;
305*16467b97STreehugger Robot }
306*16467b97STreehugger Robot }
307*16467b97STreehugger Robot }
308*16467b97STreehugger Robot }
309*16467b97STreehugger Robot
310*16467b97STreehugger Robot /**
311*16467b97STreehugger Robot * \brief
312*16467b97STreehugger Robot * Default implementation of the nextToken() call for a lexer.
313*16467b97STreehugger Robot *
314*16467b97STreehugger Robot * \param toksource
315*16467b97STreehugger Robot * Points to the implementation of a token source. The lexer is
316*16467b97STreehugger Robot * addressed by the super structure pointer.
317*16467b97STreehugger Robot *
318*16467b97STreehugger Robot * \returns
319*16467b97STreehugger Robot * The next token in the current input stream or the EOF token
320*16467b97STreehugger Robot * if there are no more tokens in any input stream in the stack.
321*16467b97STreehugger Robot *
322*16467b97STreehugger Robot * Write detailed description for nextToken here.
323*16467b97STreehugger Robot *
324*16467b97STreehugger Robot * \remarks
325*16467b97STreehugger Robot * Write remarks for nextToken here.
326*16467b97STreehugger Robot *
327*16467b97STreehugger Robot * \see nextTokenStr
328*16467b97STreehugger Robot */
329*16467b97STreehugger Robot static pANTLR3_COMMON_TOKEN
nextToken(pANTLR3_TOKEN_SOURCE toksource)330*16467b97STreehugger Robot nextToken (pANTLR3_TOKEN_SOURCE toksource)
331*16467b97STreehugger Robot {
332*16467b97STreehugger Robot pANTLR3_COMMON_TOKEN tok;
333*16467b97STreehugger Robot
334*16467b97STreehugger Robot // Find the next token in the current stream
335*16467b97STreehugger Robot //
336*16467b97STreehugger Robot tok = nextTokenStr(toksource);
337*16467b97STreehugger Robot
338*16467b97STreehugger Robot // If we got to the EOF token then switch to the previous
339*16467b97STreehugger Robot // input stream if there were any and just return the
340*16467b97STreehugger Robot // EOF if there are none. We must check the next token
341*16467b97STreehugger Robot // in any outstanding input stream we pop into the active
342*16467b97STreehugger Robot // role to see if it was sitting at EOF after PUSHing the
343*16467b97STreehugger Robot // stream we just consumed, otherwise we will return EOF
344*16467b97STreehugger Robot // on the reinstalled input stream, when in actual fact
345*16467b97STreehugger Robot // there might be more input streams to POP before the
346*16467b97STreehugger Robot // real EOF of the whole logical input stream. Hence we
347*16467b97STreehugger Robot // use a while loop here until we find something in the stream
348*16467b97STreehugger Robot // that isn't EOF or we reach the actual end of the last input
349*16467b97STreehugger Robot // stream on the stack.
350*16467b97STreehugger Robot //
351*16467b97STreehugger Robot while ((tok != NULL) && (tok->type == ANTLR3_TOKEN_EOF))
352*16467b97STreehugger Robot {
353*16467b97STreehugger Robot pANTLR3_LEXER lexer;
354*16467b97STreehugger Robot
355*16467b97STreehugger Robot lexer = (pANTLR3_LEXER)(toksource->super);
356*16467b97STreehugger Robot
357*16467b97STreehugger Robot if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
358*16467b97STreehugger Robot {
359*16467b97STreehugger Robot // We have another input stream in the stack so we
360*16467b97STreehugger Robot // need to revert to it, then resume the loop to check
361*16467b97STreehugger Robot // it wasn't sitting at EOF itself.
362*16467b97STreehugger Robot //
363*16467b97STreehugger Robot lexer->popCharStream(lexer);
364*16467b97STreehugger Robot tok = nextTokenStr(toksource);
365*16467b97STreehugger Robot }
366*16467b97STreehugger Robot else
367*16467b97STreehugger Robot {
368*16467b97STreehugger Robot // There were no more streams on the input stack
369*16467b97STreehugger Robot // so this EOF is the 'real' logical EOF for
370*16467b97STreehugger Robot // the input stream. So we just exit the loop and
371*16467b97STreehugger Robot // return the EOF we have found.
372*16467b97STreehugger Robot //
373*16467b97STreehugger Robot break;
374*16467b97STreehugger Robot }
375*16467b97STreehugger Robot
376*16467b97STreehugger Robot }
377*16467b97STreehugger Robot
378*16467b97STreehugger Robot // return whatever token we have, which may be EOF
379*16467b97STreehugger Robot //
380*16467b97STreehugger Robot return tok;
381*16467b97STreehugger Robot }
382*16467b97STreehugger Robot
383*16467b97STreehugger Robot ANTLR3_API pANTLR3_LEXER
antlr3LexerNewStream(ANTLR3_UINT32 sizeHint,pANTLR3_INPUT_STREAM input,pANTLR3_RECOGNIZER_SHARED_STATE state)384*16467b97STreehugger Robot antlr3LexerNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_INPUT_STREAM input, pANTLR3_RECOGNIZER_SHARED_STATE state)
385*16467b97STreehugger Robot {
386*16467b97STreehugger Robot pANTLR3_LEXER lexer;
387*16467b97STreehugger Robot
388*16467b97STreehugger Robot // Create a basic lexer first
389*16467b97STreehugger Robot //
390*16467b97STreehugger Robot lexer = antlr3LexerNew(sizeHint, state);
391*16467b97STreehugger Robot
392*16467b97STreehugger Robot if (lexer != NULL)
393*16467b97STreehugger Robot {
394*16467b97STreehugger Robot // Install the input stream and reset the lexer
395*16467b97STreehugger Robot //
396*16467b97STreehugger Robot setCharStream(lexer, input);
397*16467b97STreehugger Robot }
398*16467b97STreehugger Robot
399*16467b97STreehugger Robot return lexer;
400*16467b97STreehugger Robot }
401*16467b97STreehugger Robot
mTokens(pANTLR3_LEXER lexer)402*16467b97STreehugger Robot static void mTokens (pANTLR3_LEXER lexer)
403*16467b97STreehugger Robot {
404*16467b97STreehugger Robot if (lexer) // Fool compiler, avoid pragmas
405*16467b97STreehugger Robot {
406*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n");
407*16467b97STreehugger Robot }
408*16467b97STreehugger Robot }
409*16467b97STreehugger Robot
410*16467b97STreehugger Robot static void
reportError(pANTLR3_BASE_RECOGNIZER rec)411*16467b97STreehugger Robot reportError (pANTLR3_BASE_RECOGNIZER rec)
412*16467b97STreehugger Robot {
413*16467b97STreehugger Robot // Indicate this recognizer had an error while processing.
414*16467b97STreehugger Robot //
415*16467b97STreehugger Robot rec->state->errorCount++;
416*16467b97STreehugger Robot
417*16467b97STreehugger Robot rec->displayRecognitionError(rec, rec->state->tokenNames);
418*16467b97STreehugger Robot }
419*16467b97STreehugger Robot
420*16467b97STreehugger Robot #ifdef ANTLR3_WINDOWS
421*16467b97STreehugger Robot #pragma warning( disable : 4100 )
422*16467b97STreehugger Robot #endif
423*16467b97STreehugger Robot
424*16467b97STreehugger Robot /** Default lexer error handler (works for 8 bit streams only!!!)
425*16467b97STreehugger Robot */
426*16467b97STreehugger Robot static void
displayRecognitionError(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_UINT8 * tokenNames)427*16467b97STreehugger Robot displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
428*16467b97STreehugger Robot {
429*16467b97STreehugger Robot pANTLR3_LEXER lexer;
430*16467b97STreehugger Robot pANTLR3_EXCEPTION ex;
431*16467b97STreehugger Robot pANTLR3_STRING ftext;
432*16467b97STreehugger Robot
433*16467b97STreehugger Robot lexer = (pANTLR3_LEXER)(recognizer->super);
434*16467b97STreehugger Robot ex = lexer->rec->state->exception;
435*16467b97STreehugger Robot
436*16467b97STreehugger Robot // See if there is a 'filename' we can use
437*16467b97STreehugger Robot //
438*16467b97STreehugger Robot if (ex->name == NULL)
439*16467b97STreehugger Robot {
440*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "-unknown source-(");
441*16467b97STreehugger Robot }
442*16467b97STreehugger Robot else
443*16467b97STreehugger Robot {
444*16467b97STreehugger Robot ftext = ex->streamName->to8(ex->streamName);
445*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
446*16467b97STreehugger Robot }
447*16467b97STreehugger Robot
448*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
449*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ",
450*16467b97STreehugger Robot ex->type,
451*16467b97STreehugger Robot (pANTLR3_UINT8) (ex->message),
452*16467b97STreehugger Robot ex->charPositionInLine+1
453*16467b97STreehugger Robot );
454*16467b97STreehugger Robot {
455*16467b97STreehugger Robot ANTLR3_INT32 width;
456*16467b97STreehugger Robot
457*16467b97STreehugger Robot width = ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index));
458*16467b97STreehugger Robot
459*16467b97STreehugger Robot if (width >= 1)
460*16467b97STreehugger Robot {
461*16467b97STreehugger Robot if (isprint(ex->c))
462*16467b97STreehugger Robot {
463*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c);
464*16467b97STreehugger Robot }
465*16467b97STreehugger Robot else
466*16467b97STreehugger Robot {
467*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c));
468*16467b97STreehugger Robot }
469*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index));
470*16467b97STreehugger Robot }
471*16467b97STreehugger Robot else
472*16467b97STreehugger Robot {
473*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n");
474*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ",
475*16467b97STreehugger Robot (ANTLR3_UINT32)(lexer->rec->state->tokenStartLine),
476*16467b97STreehugger Robot (ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine)
477*16467b97STreehugger Robot );
478*16467b97STreehugger Robot width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
479*16467b97STreehugger Robot
480*16467b97STreehugger Robot if (width >= 1)
481*16467b97STreehugger Robot {
482*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
483*16467b97STreehugger Robot }
484*16467b97STreehugger Robot else
485*16467b97STreehugger Robot {
486*16467b97STreehugger Robot ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n");
487*16467b97STreehugger Robot }
488*16467b97STreehugger Robot }
489*16467b97STreehugger Robot }
490*16467b97STreehugger Robot }
491*16467b97STreehugger Robot
setCharStream(pANTLR3_LEXER lexer,pANTLR3_INPUT_STREAM input)492*16467b97STreehugger Robot static void setCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input)
493*16467b97STreehugger Robot {
494*16467b97STreehugger Robot /* Install the input interface
495*16467b97STreehugger Robot */
496*16467b97STreehugger Robot lexer->input = input;
497*16467b97STreehugger Robot
498*16467b97STreehugger Robot /* We may need a token factory for the lexer; we don't destroy any existing factory
499*16467b97STreehugger Robot * until the lexer is destroyed, as people may still be using the tokens it produced.
500*16467b97STreehugger Robot * TODO: Later I will provide a dup() method for a token so that it can extract itself
501*16467b97STreehugger Robot * out of the factory.
502*16467b97STreehugger Robot */
503*16467b97STreehugger Robot if (lexer->rec->state->tokFactory == NULL)
504*16467b97STreehugger Robot {
505*16467b97STreehugger Robot lexer->rec->state->tokFactory = antlr3TokenFactoryNew(input);
506*16467b97STreehugger Robot }
507*16467b97STreehugger Robot else
508*16467b97STreehugger Robot {
509*16467b97STreehugger Robot /* When the input stream is being changed on the fly, rather than
510*16467b97STreehugger Robot * at the start of a new lexer, then we must tell the tokenFactory
511*16467b97STreehugger Robot * which input stream to adorn the tokens with so that when they
512*16467b97STreehugger Robot * are asked to provide their original input strings they can
513*16467b97STreehugger Robot * do so from the correct text stream.
514*16467b97STreehugger Robot */
515*16467b97STreehugger Robot lexer->rec->state->tokFactory->setInputStream(lexer->rec->state->tokFactory, input);
516*16467b97STreehugger Robot }
517*16467b97STreehugger Robot
518*16467b97STreehugger Robot /* Propagate the string factory so that we preserve the encoding form from
519*16467b97STreehugger Robot * the input stream.
520*16467b97STreehugger Robot */
521*16467b97STreehugger Robot if (lexer->rec->state->tokSource->strFactory == NULL)
522*16467b97STreehugger Robot {
523*16467b97STreehugger Robot lexer->rec->state->tokSource->strFactory = input->strFactory;
524*16467b97STreehugger Robot
525*16467b97STreehugger Robot // Set the newly acquired string factory up for our pre-made tokens
526*16467b97STreehugger Robot // for EOF.
527*16467b97STreehugger Robot //
528*16467b97STreehugger Robot if (lexer->rec->state->tokSource->eofToken.strFactory == NULL)
529*16467b97STreehugger Robot {
530*16467b97STreehugger Robot lexer->rec->state->tokSource->eofToken.strFactory = input->strFactory;
531*16467b97STreehugger Robot }
532*16467b97STreehugger Robot }
533*16467b97STreehugger Robot
534*16467b97STreehugger Robot /* This is a lexer, install the appropriate exception creator
535*16467b97STreehugger Robot */
536*16467b97STreehugger Robot lexer->rec->exConstruct = antlr3RecognitionExceptionNew;
537*16467b97STreehugger Robot
538*16467b97STreehugger Robot /* Set the current token to nothing
539*16467b97STreehugger Robot */
540*16467b97STreehugger Robot lexer->rec->state->token = NULL;
541*16467b97STreehugger Robot lexer->rec->state->text = NULL;
542*16467b97STreehugger Robot lexer->rec->state->tokenStartCharIndex = -1;
543*16467b97STreehugger Robot
544*16467b97STreehugger Robot /* Copy the name of the char stream to the token source
545*16467b97STreehugger Robot */
546*16467b97STreehugger Robot lexer->rec->state->tokSource->fileName = input->fileName;
547*16467b97STreehugger Robot }
548*16467b97STreehugger Robot
549*16467b97STreehugger Robot /*!
550*16467b97STreehugger Robot * \brief
551*16467b97STreehugger Robot * Change to a new input stream, remembering the old one.
552*16467b97STreehugger Robot *
553*16467b97STreehugger Robot * \param lexer
554*16467b97STreehugger Robot * Pointer to the lexer instance to switch input streams for.
555*16467b97STreehugger Robot *
556*16467b97STreehugger Robot * \param input
557*16467b97STreehugger Robot * New input stream to install as the current one.
558*16467b97STreehugger Robot *
559*16467b97STreehugger Robot * Switches the current character input stream to
560*16467b97STreehugger Robot * a new one, saving the old one, which we will revert to at the end of this
561*16467b97STreehugger Robot * new one.
562*16467b97STreehugger Robot */
563*16467b97STreehugger Robot static void
pushCharStream(pANTLR3_LEXER lexer,pANTLR3_INPUT_STREAM input)564*16467b97STreehugger Robot pushCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input)
565*16467b97STreehugger Robot {
566*16467b97STreehugger Robot // Do we need a new input stream stack?
567*16467b97STreehugger Robot //
568*16467b97STreehugger Robot if (lexer->rec->state->streams == NULL)
569*16467b97STreehugger Robot {
570*16467b97STreehugger Robot // This is the first call to stack a new
571*16467b97STreehugger Robot // stream and so we must create the stack first.
572*16467b97STreehugger Robot //
573*16467b97STreehugger Robot lexer->rec->state->streams = antlr3StackNew(0);
574*16467b97STreehugger Robot
575*16467b97STreehugger Robot if (lexer->rec->state->streams == NULL)
576*16467b97STreehugger Robot {
577*16467b97STreehugger Robot // Could not do this, we just fail to push it.
578*16467b97STreehugger Robot // TODO: Consider if this is what we want to do, but then
579*16467b97STreehugger Robot // any programmer can override this method to do something else.
580*16467b97STreehugger Robot return;
581*16467b97STreehugger Robot }
582*16467b97STreehugger Robot }
583*16467b97STreehugger Robot
584*16467b97STreehugger Robot // We have a stack, so we can save the current input stream
585*16467b97STreehugger Robot // into it.
586*16467b97STreehugger Robot //
587*16467b97STreehugger Robot lexer->input->istream->mark(lexer->input->istream);
588*16467b97STreehugger Robot lexer->rec->state->streams->push(lexer->rec->state->streams, lexer->input, NULL);
589*16467b97STreehugger Robot
590*16467b97STreehugger Robot // And now we can install this new one
591*16467b97STreehugger Robot //
592*16467b97STreehugger Robot lexer->setCharStream(lexer, input);
593*16467b97STreehugger Robot }
594*16467b97STreehugger Robot
595*16467b97STreehugger Robot /*!
596*16467b97STreehugger Robot * \brief
597*16467b97STreehugger Robot * Stops using the current input stream and reverts to any prior
598*16467b97STreehugger Robot * input stream on the stack.
599*16467b97STreehugger Robot *
600*16467b97STreehugger Robot * \param lexer
601*16467b97STreehugger Robot * Description of parameter lexer.
602*16467b97STreehugger Robot *
603*16467b97STreehugger Robot * Pointer to a function that abandons the current input stream, whether it
604*16467b97STreehugger Robot * is empty or not and reverts to the previous stacked input stream.
605*16467b97STreehugger Robot *
606*16467b97STreehugger Robot * \remark
607*16467b97STreehugger Robot * The function fails silently if there are no prior input streams.
608*16467b97STreehugger Robot */
609*16467b97STreehugger Robot static void
popCharStream(pANTLR3_LEXER lexer)610*16467b97STreehugger Robot popCharStream (pANTLR3_LEXER lexer)
611*16467b97STreehugger Robot {
612*16467b97STreehugger Robot pANTLR3_INPUT_STREAM input;
613*16467b97STreehugger Robot
614*16467b97STreehugger Robot // If we do not have a stream stack or we are already at the
615*16467b97STreehugger Robot // stack bottom, then do nothing.
616*16467b97STreehugger Robot //
617*16467b97STreehugger Robot if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
618*16467b97STreehugger Robot {
619*16467b97STreehugger Robot // We just leave the current stream to its fate, we do not close
620*16467b97STreehugger Robot // it or anything as we do not know what the programmer intended
621*16467b97STreehugger Robot // for it. This method can always be overridden of course.
622*16467b97STreehugger Robot // So just find out what was currently saved on the stack and use
623*16467b97STreehugger Robot // that now, then pop it from the stack.
624*16467b97STreehugger Robot //
625*16467b97STreehugger Robot input = (pANTLR3_INPUT_STREAM)(lexer->rec->state->streams->top);
626*16467b97STreehugger Robot lexer->rec->state->streams->pop(lexer->rec->state->streams);
627*16467b97STreehugger Robot
628*16467b97STreehugger Robot // Now install the stream as the current one.
629*16467b97STreehugger Robot //
630*16467b97STreehugger Robot lexer->setCharStream(lexer, input);
631*16467b97STreehugger Robot lexer->input->istream->rewindLast(lexer->input->istream);
632*16467b97STreehugger Robot }
633*16467b97STreehugger Robot return;
634*16467b97STreehugger Robot }
635*16467b97STreehugger Robot
emitNew(pANTLR3_LEXER lexer,pANTLR3_COMMON_TOKEN token)636*16467b97STreehugger Robot static void emitNew (pANTLR3_LEXER lexer, pANTLR3_COMMON_TOKEN token)
637*16467b97STreehugger Robot {
638*16467b97STreehugger Robot lexer->rec->state->token = token; /* Voila! */
639*16467b97STreehugger Robot }
640*16467b97STreehugger Robot
641*16467b97STreehugger Robot static pANTLR3_COMMON_TOKEN
emit(pANTLR3_LEXER lexer)642*16467b97STreehugger Robot emit (pANTLR3_LEXER lexer)
643*16467b97STreehugger Robot {
644*16467b97STreehugger Robot pANTLR3_COMMON_TOKEN token;
645*16467b97STreehugger Robot
646*16467b97STreehugger Robot /* We could check pointers to token factories and so on, but
647*16467b97STreehugger Robot * we are in code that we want to run as fast as possible
648*16467b97STreehugger Robot * so we are not checking any errors. So make sure you have installed an input stream before
649*16467b97STreehugger Robot * trying to emit a new token.
650*16467b97STreehugger Robot */
651*16467b97STreehugger Robot token = lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory);
652*16467b97STreehugger Robot if (token == NULL) { return NULL; }
653*16467b97STreehugger Robot
654*16467b97STreehugger Robot /* Install the supplied information, and some other bits we already know
655*16467b97STreehugger Robot * get added automatically, such as the input stream it is associated with
656*16467b97STreehugger Robot * (though it can all be overridden of course)
657*16467b97STreehugger Robot */
658*16467b97STreehugger Robot token->type = lexer->rec->state->type;
659*16467b97STreehugger Robot token->channel = lexer->rec->state->channel;
660*16467b97STreehugger Robot token->start = lexer->rec->state->tokenStartCharIndex;
661*16467b97STreehugger Robot token->stop = lexer->getCharIndex(lexer) - 1;
662*16467b97STreehugger Robot token->line = lexer->rec->state->tokenStartLine;
663*16467b97STreehugger Robot token->charPosition = lexer->rec->state->tokenStartCharPositionInLine;
664*16467b97STreehugger Robot
665*16467b97STreehugger Robot if (lexer->rec->state->text != NULL)
666*16467b97STreehugger Robot {
667*16467b97STreehugger Robot token->textState = ANTLR3_TEXT_STRING;
668*16467b97STreehugger Robot token->tokText.text = lexer->rec->state->text;
669*16467b97STreehugger Robot }
670*16467b97STreehugger Robot else
671*16467b97STreehugger Robot {
672*16467b97STreehugger Robot token->textState = ANTLR3_TEXT_NONE;
673*16467b97STreehugger Robot }
674*16467b97STreehugger Robot token->lineStart = lexer->input->currentLine;
675*16467b97STreehugger Robot token->user1 = lexer->rec->state->user1;
676*16467b97STreehugger Robot token->user2 = lexer->rec->state->user2;
677*16467b97STreehugger Robot token->user3 = lexer->rec->state->user3;
678*16467b97STreehugger Robot token->custom = lexer->rec->state->custom;
679*16467b97STreehugger Robot
680*16467b97STreehugger Robot lexer->rec->state->token = token;
681*16467b97STreehugger Robot
682*16467b97STreehugger Robot return token;
683*16467b97STreehugger Robot }
684*16467b97STreehugger Robot
685*16467b97STreehugger Robot /**
686*16467b97STreehugger Robot * Free the resources allocated by a lexer
687*16467b97STreehugger Robot */
688*16467b97STreehugger Robot static void
freeLexer(pANTLR3_LEXER lexer)689*16467b97STreehugger Robot freeLexer (pANTLR3_LEXER lexer)
690*16467b97STreehugger Robot {
691*16467b97STreehugger Robot // This may have ben a delegate or delegator lexer, in which case the
692*16467b97STreehugger Robot // state may already have been freed (and set to NULL therefore)
693*16467b97STreehugger Robot // so we ignore the state if we don't have it.
694*16467b97STreehugger Robot //
695*16467b97STreehugger Robot if (lexer->rec->state != NULL)
696*16467b97STreehugger Robot {
697*16467b97STreehugger Robot if (lexer->rec->state->streams != NULL)
698*16467b97STreehugger Robot {
699*16467b97STreehugger Robot lexer->rec->state->streams->free(lexer->rec->state->streams);
700*16467b97STreehugger Robot }
701*16467b97STreehugger Robot if (lexer->rec->state->tokFactory != NULL)
702*16467b97STreehugger Robot {
703*16467b97STreehugger Robot lexer->rec->state->tokFactory->close(lexer->rec->state->tokFactory);
704*16467b97STreehugger Robot lexer->rec->state->tokFactory = NULL;
705*16467b97STreehugger Robot }
706*16467b97STreehugger Robot if (lexer->rec->state->tokSource != NULL)
707*16467b97STreehugger Robot {
708*16467b97STreehugger Robot ANTLR3_FREE(lexer->rec->state->tokSource);
709*16467b97STreehugger Robot lexer->rec->state->tokSource = NULL;
710*16467b97STreehugger Robot }
711*16467b97STreehugger Robot }
712*16467b97STreehugger Robot if (lexer->rec != NULL)
713*16467b97STreehugger Robot {
714*16467b97STreehugger Robot lexer->rec->free(lexer->rec);
715*16467b97STreehugger Robot lexer->rec = NULL;
716*16467b97STreehugger Robot }
717*16467b97STreehugger Robot ANTLR3_FREE(lexer);
718*16467b97STreehugger Robot }
719*16467b97STreehugger Robot
720*16467b97STreehugger Robot /** Implementation of matchs for the lexer, overrides any
721*16467b97STreehugger Robot * base implementation in the base recognizer.
722*16467b97STreehugger Robot *
723*16467b97STreehugger Robot * \remark
724*16467b97STreehugger Robot * Note that the generated code lays down arrays of ints for constant
725*16467b97STreehugger Robot * strings so that they are int UTF32 form!
726*16467b97STreehugger Robot */
727*16467b97STreehugger Robot static ANTLR3_BOOLEAN
matchs(pANTLR3_LEXER lexer,ANTLR3_UCHAR * string)728*16467b97STreehugger Robot matchs(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string)
729*16467b97STreehugger Robot {
730*16467b97STreehugger Robot while (*string != ANTLR3_STRING_TERMINATOR)
731*16467b97STreehugger Robot {
732*16467b97STreehugger Robot if (lexer->input->istream->_LA(lexer->input->istream, 1) != (*string))
733*16467b97STreehugger Robot {
734*16467b97STreehugger Robot if (lexer->rec->state->backtracking > 0)
735*16467b97STreehugger Robot {
736*16467b97STreehugger Robot lexer->rec->state->failed = ANTLR3_TRUE;
737*16467b97STreehugger Robot return ANTLR3_FALSE;
738*16467b97STreehugger Robot }
739*16467b97STreehugger Robot
740*16467b97STreehugger Robot lexer->rec->exConstruct(lexer->rec);
741*16467b97STreehugger Robot lexer->rec->state->failed = ANTLR3_TRUE;
742*16467b97STreehugger Robot
743*16467b97STreehugger Robot /* TODO: Implement exception creation more fully perhaps
744*16467b97STreehugger Robot */
745*16467b97STreehugger Robot lexer->recover(lexer);
746*16467b97STreehugger Robot return ANTLR3_FALSE;
747*16467b97STreehugger Robot }
748*16467b97STreehugger Robot
749*16467b97STreehugger Robot /* Matched correctly, do consume it
750*16467b97STreehugger Robot */
751*16467b97STreehugger Robot lexer->input->istream->consume(lexer->input->istream);
752*16467b97STreehugger Robot string++;
753*16467b97STreehugger Robot
754*16467b97STreehugger Robot /* Reset any failed indicator
755*16467b97STreehugger Robot */
756*16467b97STreehugger Robot lexer->rec->state->failed = ANTLR3_FALSE;
757*16467b97STreehugger Robot }
758*16467b97STreehugger Robot
759*16467b97STreehugger Robot
760*16467b97STreehugger Robot return ANTLR3_TRUE;
761*16467b97STreehugger Robot }
762*16467b97STreehugger Robot
763*16467b97STreehugger Robot /** Implementation of matchc for the lexer, overrides any
764*16467b97STreehugger Robot * base implementation in the base recognizer.
765*16467b97STreehugger Robot *
766*16467b97STreehugger Robot * \remark
767*16467b97STreehugger Robot * Note that the generated code lays down arrays of ints for constant
768*16467b97STreehugger Robot * strings so that they are int UTF32 form!
769*16467b97STreehugger Robot */
770*16467b97STreehugger Robot static ANTLR3_BOOLEAN
matchc(pANTLR3_LEXER lexer,ANTLR3_UCHAR c)771*16467b97STreehugger Robot matchc(pANTLR3_LEXER lexer, ANTLR3_UCHAR c)
772*16467b97STreehugger Robot {
773*16467b97STreehugger Robot if (lexer->input->istream->_LA(lexer->input->istream, 1) == c)
774*16467b97STreehugger Robot {
775*16467b97STreehugger Robot /* Matched correctly, do consume it
776*16467b97STreehugger Robot */
777*16467b97STreehugger Robot lexer->input->istream->consume(lexer->input->istream);
778*16467b97STreehugger Robot
779*16467b97STreehugger Robot /* Reset any failed indicator
780*16467b97STreehugger Robot */
781*16467b97STreehugger Robot lexer->rec->state->failed = ANTLR3_FALSE;
782*16467b97STreehugger Robot
783*16467b97STreehugger Robot return ANTLR3_TRUE;
784*16467b97STreehugger Robot }
785*16467b97STreehugger Robot
786*16467b97STreehugger Robot /* Failed to match, exception and recovery time.
787*16467b97STreehugger Robot */
788*16467b97STreehugger Robot if (lexer->rec->state->backtracking > 0)
789*16467b97STreehugger Robot {
790*16467b97STreehugger Robot lexer->rec->state->failed = ANTLR3_TRUE;
791*16467b97STreehugger Robot return ANTLR3_FALSE;
792*16467b97STreehugger Robot }
793*16467b97STreehugger Robot
794*16467b97STreehugger Robot lexer->rec->exConstruct(lexer->rec);
795*16467b97STreehugger Robot
796*16467b97STreehugger Robot /* TODO: Implement exception creation more fully perhaps
797*16467b97STreehugger Robot */
798*16467b97STreehugger Robot lexer->recover(lexer);
799*16467b97STreehugger Robot
800*16467b97STreehugger Robot return ANTLR3_FALSE;
801*16467b97STreehugger Robot }
802*16467b97STreehugger Robot
803*16467b97STreehugger Robot /** Implementation of match range for the lexer, overrides any
804*16467b97STreehugger Robot * base implementation in the base recognizer.
805*16467b97STreehugger Robot *
806*16467b97STreehugger Robot * \remark
807*16467b97STreehugger Robot * Note that the generated code lays down arrays of ints for constant
808*16467b97STreehugger Robot * strings so that they are int UTF32 form!
809*16467b97STreehugger Robot */
810*16467b97STreehugger Robot static ANTLR3_BOOLEAN
matchRange(pANTLR3_LEXER lexer,ANTLR3_UCHAR low,ANTLR3_UCHAR high)811*16467b97STreehugger Robot matchRange(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high)
812*16467b97STreehugger Robot {
813*16467b97STreehugger Robot ANTLR3_UCHAR c;
814*16467b97STreehugger Robot
815*16467b97STreehugger Robot /* What is in the stream at the moment?
816*16467b97STreehugger Robot */
817*16467b97STreehugger Robot c = lexer->input->istream->_LA(lexer->input->istream, 1);
818*16467b97STreehugger Robot if ( c >= low && c <= high)
819*16467b97STreehugger Robot {
820*16467b97STreehugger Robot /* Matched correctly, consume it
821*16467b97STreehugger Robot */
822*16467b97STreehugger Robot lexer->input->istream->consume(lexer->input->istream);
823*16467b97STreehugger Robot
824*16467b97STreehugger Robot /* Reset any failed indicator
825*16467b97STreehugger Robot */
826*16467b97STreehugger Robot lexer->rec->state->failed = ANTLR3_FALSE;
827*16467b97STreehugger Robot
828*16467b97STreehugger Robot return ANTLR3_TRUE;
829*16467b97STreehugger Robot }
830*16467b97STreehugger Robot
831*16467b97STreehugger Robot /* Failed to match, execption and recovery time.
832*16467b97STreehugger Robot */
833*16467b97STreehugger Robot
834*16467b97STreehugger Robot if (lexer->rec->state->backtracking > 0)
835*16467b97STreehugger Robot {
836*16467b97STreehugger Robot lexer->rec->state->failed = ANTLR3_TRUE;
837*16467b97STreehugger Robot return ANTLR3_FALSE;
838*16467b97STreehugger Robot }
839*16467b97STreehugger Robot
840*16467b97STreehugger Robot lexer->rec->exConstruct(lexer->rec);
841*16467b97STreehugger Robot
842*16467b97STreehugger Robot /* TODO: Implement exception creation more fully
843*16467b97STreehugger Robot */
844*16467b97STreehugger Robot lexer->recover(lexer);
845*16467b97STreehugger Robot
846*16467b97STreehugger Robot return ANTLR3_FALSE;
847*16467b97STreehugger Robot }
848*16467b97STreehugger Robot
849*16467b97STreehugger Robot static void
matchAny(pANTLR3_LEXER lexer)850*16467b97STreehugger Robot matchAny (pANTLR3_LEXER lexer)
851*16467b97STreehugger Robot {
852*16467b97STreehugger Robot lexer->input->istream->consume(lexer->input->istream);
853*16467b97STreehugger Robot }
854*16467b97STreehugger Robot
855*16467b97STreehugger Robot static void
recover(pANTLR3_LEXER lexer)856*16467b97STreehugger Robot recover (pANTLR3_LEXER lexer)
857*16467b97STreehugger Robot {
858*16467b97STreehugger Robot lexer->input->istream->consume(lexer->input->istream);
859*16467b97STreehugger Robot }
860*16467b97STreehugger Robot
861*16467b97STreehugger Robot static ANTLR3_UINT32
getLine(pANTLR3_LEXER lexer)862*16467b97STreehugger Robot getLine (pANTLR3_LEXER lexer)
863*16467b97STreehugger Robot {
864*16467b97STreehugger Robot return lexer->input->getLine(lexer->input);
865*16467b97STreehugger Robot }
866*16467b97STreehugger Robot
867*16467b97STreehugger Robot static ANTLR3_UINT32
getCharPositionInLine(pANTLR3_LEXER lexer)868*16467b97STreehugger Robot getCharPositionInLine (pANTLR3_LEXER lexer)
869*16467b97STreehugger Robot {
870*16467b97STreehugger Robot return lexer->input->charPositionInLine;
871*16467b97STreehugger Robot }
872*16467b97STreehugger Robot
getCharIndex(pANTLR3_LEXER lexer)873*16467b97STreehugger Robot static ANTLR3_MARKER getCharIndex (pANTLR3_LEXER lexer)
874*16467b97STreehugger Robot {
875*16467b97STreehugger Robot return lexer->input->istream->index(lexer->input->istream);
876*16467b97STreehugger Robot }
877*16467b97STreehugger Robot
878*16467b97STreehugger Robot static pANTLR3_STRING
getText(pANTLR3_LEXER lexer)879*16467b97STreehugger Robot getText (pANTLR3_LEXER lexer)
880*16467b97STreehugger Robot {
881*16467b97STreehugger Robot if (lexer->rec->state->text)
882*16467b97STreehugger Robot {
883*16467b97STreehugger Robot return lexer->rec->state->text;
884*16467b97STreehugger Robot
885*16467b97STreehugger Robot }
886*16467b97STreehugger Robot return lexer->input->substr(
887*16467b97STreehugger Robot lexer->input,
888*16467b97STreehugger Robot lexer->rec->state->tokenStartCharIndex,
889*16467b97STreehugger Robot lexer->getCharIndex(lexer) - lexer->input->charByteSize
890*16467b97STreehugger Robot );
891*16467b97STreehugger Robot
892*16467b97STreehugger Robot }
893*16467b97STreehugger Robot
894*16467b97STreehugger Robot static void *
getCurrentInputSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream)895*16467b97STreehugger Robot getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
896*16467b97STreehugger Robot {
897*16467b97STreehugger Robot return NULL;
898*16467b97STreehugger Robot }
899*16467b97STreehugger Robot
900*16467b97STreehugger Robot static void *
getMissingSymbol(pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM istream,pANTLR3_EXCEPTION e,ANTLR3_UINT32 expectedTokenType,pANTLR3_BITSET_LIST follow)901*16467b97STreehugger Robot getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
902*16467b97STreehugger Robot ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
903*16467b97STreehugger Robot {
904*16467b97STreehugger Robot return NULL;
905*16467b97STreehugger Robot }
906