xref: /aosp_15_r20/external/antlr/runtime/C/src/antlr3cyclicdfa.c (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot /** Support functions for traversing cyclic DFA states as laid
2*16467b97STreehugger Robot  *  out in static initialized structures by the code generator.
3*16467b97STreehugger Robot  *
4*16467b97STreehugger Robot  * A DFA implemented as a set of transition tables.
5*16467b97STreehugger Robot  *
6*16467b97STreehugger Robot  *  Any state that has a semantic predicate edge is special; those states
7*16467b97STreehugger Robot  *  are generated with if-then-else structures in a ->specialStateTransition()
8*16467b97STreehugger Robot  *  which is generated by cyclicDFA template.
9*16467b97STreehugger Robot  *
10*16467b97STreehugger Robot  *  There are at most 32767 states (16-bit signed short).
11*16467b97STreehugger Robot  *  Could get away with byte sometimes but would have to generate different
12*16467b97STreehugger Robot  *  types and the simulation code too.  For a point of reference, the Java
13*16467b97STreehugger Robot  *  lexer's Tokens rule DFA has 326 states roughly.
14*16467b97STreehugger Robot  */
15*16467b97STreehugger Robot 
16*16467b97STreehugger Robot // [The "BSD licence"]
17*16467b97STreehugger Robot // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
18*16467b97STreehugger Robot // http://www.temporal-wave.com
19*16467b97STreehugger Robot // http://www.linkedin.com/in/jimidle
20*16467b97STreehugger Robot //
21*16467b97STreehugger Robot // All rights reserved.
22*16467b97STreehugger Robot //
23*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without
24*16467b97STreehugger Robot // modification, are permitted provided that the following conditions
25*16467b97STreehugger Robot // are met:
26*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright
27*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer.
28*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright
29*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer in the
30*16467b97STreehugger Robot //    documentation and/or other materials provided with the distribution.
31*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products
32*16467b97STreehugger Robot //    derived from this software without specific prior written permission.
33*16467b97STreehugger Robot //
34*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
35*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
36*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
37*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
38*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
39*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
40*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
41*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
42*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
43*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44*16467b97STreehugger Robot 
45*16467b97STreehugger Robot #include    <antlr3defs.h>
46*16467b97STreehugger Robot #include    <antlr3cyclicdfa.h>
47*16467b97STreehugger Robot 
48*16467b97STreehugger Robot #ifdef	ANTLR3_WINDOWS
49*16467b97STreehugger Robot #pragma warning( disable : 4100 )
50*16467b97STreehugger Robot #endif
51*16467b97STreehugger Robot 
52*16467b97STreehugger Robot static void
noViableAlt(pANTLR3_BASE_RECOGNIZER rec,pANTLR3_CYCLIC_DFA cdfa,ANTLR3_UINT32 s)53*16467b97STreehugger Robot noViableAlt(pANTLR3_BASE_RECOGNIZER rec, pANTLR3_CYCLIC_DFA cdfa, ANTLR3_UINT32	s)
54*16467b97STreehugger Robot {
55*16467b97STreehugger Robot 	// In backtracking mode, we just set the failed flag so that the
56*16467b97STreehugger Robot 	// alt can just exit right now. If we are parsing though, then
57*16467b97STreehugger Robot 	// we want the exception to be raised.
58*16467b97STreehugger Robot 	//
59*16467b97STreehugger Robot     if	(rec->state->backtracking > 0)
60*16467b97STreehugger Robot     {
61*16467b97STreehugger Robot 		rec->state->failed = ANTLR3_TRUE;
62*16467b97STreehugger Robot     }
63*16467b97STreehugger Robot 	else
64*16467b97STreehugger Robot 	{
65*16467b97STreehugger Robot 		rec->exConstruct(rec);
66*16467b97STreehugger Robot 		rec->state->exception->type         = ANTLR3_NO_VIABLE_ALT_EXCEPTION;
67*16467b97STreehugger Robot 		rec->state->exception->message      = cdfa->description;
68*16467b97STreehugger Robot 		rec->state->exception->decisionNum  = cdfa->decisionNumber;
69*16467b97STreehugger Robot 		rec->state->exception->state        = s;
70*16467b97STreehugger Robot 	}
71*16467b97STreehugger Robot }
72*16467b97STreehugger Robot 
73*16467b97STreehugger Robot /** From the input stream, predict what alternative will succeed
74*16467b97STreehugger Robot  *  using this DFA (representing the covering regular approximation
75*16467b97STreehugger Robot  *  to the underlying CFL).  Return an alternative number 1..n.  Throw
76*16467b97STreehugger Robot  *  an exception upon error.
77*16467b97STreehugger Robot  */
78*16467b97STreehugger Robot ANTLR3_API ANTLR3_INT32
antlr3dfapredict(void * ctx,pANTLR3_BASE_RECOGNIZER rec,pANTLR3_INT_STREAM is,pANTLR3_CYCLIC_DFA cdfa)79*16467b97STreehugger Robot antlr3dfapredict (void * ctx, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_INT_STREAM is, pANTLR3_CYCLIC_DFA cdfa)
80*16467b97STreehugger Robot {
81*16467b97STreehugger Robot     ANTLR3_MARKER	mark;
82*16467b97STreehugger Robot     ANTLR3_INT32	s;
83*16467b97STreehugger Robot     ANTLR3_INT32	specialState;
84*16467b97STreehugger Robot     ANTLR3_INT32	c;
85*16467b97STreehugger Robot 
86*16467b97STreehugger Robot     mark	= is->mark(is);	    /* Store where we are right now	*/
87*16467b97STreehugger Robot     s		= 0;		    /* Always start with state 0	*/
88*16467b97STreehugger Robot 
89*16467b97STreehugger Robot 	for (;;)
90*16467b97STreehugger Robot 	{
91*16467b97STreehugger Robot 		/* Pick out any special state entry for this state
92*16467b97STreehugger Robot 		 */
93*16467b97STreehugger Robot 		specialState	= cdfa->special[s];
94*16467b97STreehugger Robot 
95*16467b97STreehugger Robot 		/* Transition the special state and consume an input token
96*16467b97STreehugger Robot 		 */
97*16467b97STreehugger Robot 		if  (specialState >= 0)
98*16467b97STreehugger Robot 		{
99*16467b97STreehugger Robot 			s = cdfa->specialStateTransition(ctx, rec, is, cdfa, specialState);
100*16467b97STreehugger Robot 
101*16467b97STreehugger Robot 			// Error?
102*16467b97STreehugger Robot 			//
103*16467b97STreehugger Robot 			if	(s<0)
104*16467b97STreehugger Robot 			{
105*16467b97STreehugger Robot 				// If the predicate/rule raised an exception then we leave it
106*16467b97STreehugger Robot 				// in tact, else we have an NVA.
107*16467b97STreehugger Robot 				//
108*16467b97STreehugger Robot 				if	(rec->state->error != ANTLR3_TRUE)
109*16467b97STreehugger Robot 				{
110*16467b97STreehugger Robot 					noViableAlt(rec,cdfa, s);
111*16467b97STreehugger Robot 				}
112*16467b97STreehugger Robot 				is->rewind(is, mark);
113*16467b97STreehugger Robot 				return	0;
114*16467b97STreehugger Robot 			}
115*16467b97STreehugger Robot 			is->consume(is);
116*16467b97STreehugger Robot 			continue;
117*16467b97STreehugger Robot 		}
118*16467b97STreehugger Robot 
119*16467b97STreehugger Robot 		/* Accept state?
120*16467b97STreehugger Robot 		 */
121*16467b97STreehugger Robot 		if  (cdfa->accept[s] >= 1)
122*16467b97STreehugger Robot 		{
123*16467b97STreehugger Robot 			is->rewind(is, mark);
124*16467b97STreehugger Robot 			return  cdfa->accept[s];
125*16467b97STreehugger Robot 		}
126*16467b97STreehugger Robot 
127*16467b97STreehugger Robot 		/* Look for a normal transition state based upon the input token element
128*16467b97STreehugger Robot 		 */
129*16467b97STreehugger Robot 		c = is->_LA(is, 1);
130*16467b97STreehugger Robot 
131*16467b97STreehugger Robot 		/* Check against min and max for this state
132*16467b97STreehugger Robot 		 */
133*16467b97STreehugger Robot 		if  (c>= cdfa->min[s] && c <= cdfa->max[s])
134*16467b97STreehugger Robot 		{
135*16467b97STreehugger Robot 			ANTLR3_INT32   snext;
136*16467b97STreehugger Robot 
137*16467b97STreehugger Robot 			/* What is the next state?
138*16467b97STreehugger Robot 			 */
139*16467b97STreehugger Robot 			snext = cdfa->transition[s][c - cdfa->min[s]];
140*16467b97STreehugger Robot 
141*16467b97STreehugger Robot 			if	(snext < 0)
142*16467b97STreehugger Robot 			{
143*16467b97STreehugger Robot 				/* Was in range but not a normal transition
144*16467b97STreehugger Robot 				 * must check EOT, which is like the else clause.
145*16467b97STreehugger Robot 				 * eot[s]>=0 indicates that an EOT edge goes to another
146*16467b97STreehugger Robot 				 * state.
147*16467b97STreehugger Robot 				 */
148*16467b97STreehugger Robot 				if  (cdfa->eot[s] >= 0)
149*16467b97STreehugger Robot 				{
150*16467b97STreehugger Robot 					s = cdfa->eot[s];
151*16467b97STreehugger Robot 					is->consume(is);
152*16467b97STreehugger Robot 					continue;
153*16467b97STreehugger Robot 				}
154*16467b97STreehugger Robot 				noViableAlt(rec,cdfa, s);
155*16467b97STreehugger Robot 				is->rewind(is, mark);
156*16467b97STreehugger Robot 				return	0;
157*16467b97STreehugger Robot 			}
158*16467b97STreehugger Robot 
159*16467b97STreehugger Robot 			/* New current state - move to it
160*16467b97STreehugger Robot 			 */
161*16467b97STreehugger Robot 			s	= snext;
162*16467b97STreehugger Robot 			is->consume(is);
163*16467b97STreehugger Robot 			continue;
164*16467b97STreehugger Robot 		}
165*16467b97STreehugger Robot 		/* EOT Transition?
166*16467b97STreehugger Robot 		 */
167*16467b97STreehugger Robot 		if  (cdfa->eot[s] >= 0)
168*16467b97STreehugger Robot 		{
169*16467b97STreehugger Robot 			s	= cdfa->eot[s];
170*16467b97STreehugger Robot 			is->consume(is);
171*16467b97STreehugger Robot 			continue;
172*16467b97STreehugger Robot 		}
173*16467b97STreehugger Robot 		/* EOF transition to accept state?
174*16467b97STreehugger Robot 		 */
175*16467b97STreehugger Robot 		if  ( c == ANTLR3_TOKEN_EOF && cdfa->eof[s] >= 0)
176*16467b97STreehugger Robot 		{
177*16467b97STreehugger Robot 			is->rewind(is, mark);
178*16467b97STreehugger Robot 			return  cdfa->accept[cdfa->eof[s]];
179*16467b97STreehugger Robot 		}
180*16467b97STreehugger Robot 
181*16467b97STreehugger Robot 		/* No alt, so bomb
182*16467b97STreehugger Robot 		 */
183*16467b97STreehugger Robot 		noViableAlt(rec, cdfa, s);
184*16467b97STreehugger Robot 		is->rewind(is, mark);
185*16467b97STreehugger Robot 		return 0;
186*16467b97STreehugger Robot 	}
187*16467b97STreehugger Robot 
188*16467b97STreehugger Robot }
189*16467b97STreehugger Robot 
190*16467b97STreehugger Robot /** Default special state implementation
191*16467b97STreehugger Robot  */
192*16467b97STreehugger Robot ANTLR3_API ANTLR3_INT32
antlr3dfaspecialStateTransition(void * ctx,pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM is,pANTLR3_CYCLIC_DFA dfa,ANTLR3_INT32 s)193*16467b97STreehugger Robot antlr3dfaspecialStateTransition   (void * ctx, pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_CYCLIC_DFA dfa, ANTLR3_INT32 s)
194*16467b97STreehugger Robot {
195*16467b97STreehugger Robot     return -1;
196*16467b97STreehugger Robot }
197*16467b97STreehugger Robot 
198*16467b97STreehugger Robot /* Default special transition implementation
199*16467b97STreehugger Robot  */
200*16467b97STreehugger Robot ANTLR3_API ANTLR3_INT32
antlr3dfaspecialTransition(void * ctx,pANTLR3_BASE_RECOGNIZER recognizer,pANTLR3_INT_STREAM is,pANTLR3_CYCLIC_DFA dfa,ANTLR3_INT32 s)201*16467b97STreehugger Robot antlr3dfaspecialTransition    (void * ctx, pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_CYCLIC_DFA dfa, ANTLR3_INT32 s)
202*16467b97STreehugger Robot {
203*16467b97STreehugger Robot     return 0;
204*16467b97STreehugger Robot }
205