xref: /aosp_15_r20/external/antlr/runtime/Cpp/include/antlr3recognizersharedstate.hpp (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot /** \file
2*16467b97STreehugger Robot  * While the C runtime does not need to model the state of
3*16467b97STreehugger Robot  * multiple lexers and parsers in the same way as the Java runtime does
4*16467b97STreehugger Robot  * it is no overhead to reflect that model. In fact the
5*16467b97STreehugger Robot  * C runtime has always been able to share recognizer state.
6*16467b97STreehugger Robot  *
7*16467b97STreehugger Robot  * This 'class' therefore defines all the elements of a recognizer
8*16467b97STreehugger Robot  * (either lexer, parser or tree parser) that are need to
9*16467b97STreehugger Robot  * track the current recognition state. Multiple recognizers
10*16467b97STreehugger Robot  * may then share this state, for instance when one grammar
11*16467b97STreehugger Robot  * imports another.
12*16467b97STreehugger Robot  */
13*16467b97STreehugger Robot 
14*16467b97STreehugger Robot #ifndef	_ANTLR3_RECOGNIZER_SHARED_STATE_HPP
15*16467b97STreehugger Robot #define	_ANTLR3_RECOGNIZER_SHARED_STATE_HPP
16*16467b97STreehugger Robot 
17*16467b97STreehugger Robot // [The "BSD licence"]
18*16467b97STreehugger Robot // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB
19*16467b97STreehugger Robot 
20*16467b97STreehugger Robot //
21*16467b97STreehugger Robot // All rights reserved.
22*16467b97STreehugger Robot //
23*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without
24*16467b97STreehugger Robot // modification, are permitted provided that the following conditions
25*16467b97STreehugger Robot // are met:
26*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright
27*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer.
28*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright
29*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer in the
30*16467b97STreehugger Robot //    documentation and/or other materials provided with the distribution.
31*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products
32*16467b97STreehugger Robot //    derived from this software without specific prior written permission.
33*16467b97STreehugger Robot //
34*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
35*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
36*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
37*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
38*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
39*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
40*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
41*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
42*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
43*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44*16467b97STreehugger Robot 
45*16467b97STreehugger Robot #include "antlr3defs.hpp"
46*16467b97STreehugger Robot 
47*16467b97STreehugger Robot ANTLR_BEGIN_NAMESPACE()
48*16467b97STreehugger Robot 
49*16467b97STreehugger Robot /** All the data elements required to track the current state
50*16467b97STreehugger Robot  *  of any recognizer (lexer, parser, tree parser).
51*16467b97STreehugger Robot  * May be share between multiple recognizers such that
52*16467b97STreehugger Robot  * grammar inheritance is easily supported.
53*16467b97STreehugger Robot  */
54*16467b97STreehugger Robot template<class ImplTraits, class StreamType>
55*16467b97STreehugger Robot class RecognizerSharedState  : public ImplTraits::AllocPolicyType
56*16467b97STreehugger Robot {
57*16467b97STreehugger Robot public:
58*16467b97STreehugger Robot 	typedef typename ImplTraits::AllocPolicyType AllocPolicyType;
59*16467b97STreehugger Robot 	typedef typename StreamType::UnitType TokenType;
60*16467b97STreehugger Robot 	typedef typename ImplTraits::CommonTokenType CommonTokenType;
61*16467b97STreehugger Robot 
62*16467b97STreehugger Robot 	typedef typename ComponentTypeFinder<ImplTraits, StreamType>::ComponentType  ComponentType;
63*16467b97STreehugger Robot 	typedef typename ImplTraits::template RewriteStreamType< ComponentType > RewriteStreamType;
64*16467b97STreehugger Robot 	typedef typename ImplTraits::StringType StringType;
65*16467b97STreehugger Robot 	typedef typename ImplTraits::TokenSourceType TokenSourceType;
66*16467b97STreehugger Robot 	typedef typename ImplTraits::template ExceptionBaseType<StreamType> ExceptionBaseType;
67*16467b97STreehugger Robot 	typedef typename ImplTraits::BitsetType BitsetType;
68*16467b97STreehugger Robot 	typedef typename ImplTraits::BitsetListType BitsetListType;
69*16467b97STreehugger Robot 
70*16467b97STreehugger Robot 	typedef typename AllocPolicyType::template StackType< BitsetListType > FollowingType;
71*16467b97STreehugger Robot 	typedef typename AllocPolicyType::template StackType< typename ImplTraits::InputStreamType* > InputStreamsType;
72*16467b97STreehugger Robot 	typedef InputStreamsType StreamsType;
73*16467b97STreehugger Robot 	typedef typename AllocPolicyType::template VectorType<RewriteStreamType> RewriteStreamsType;
74*16467b97STreehugger Robot 
75*16467b97STreehugger Robot 	typedef IntTrie<ImplTraits, ANTLR_MARKER> RuleListType;
76*16467b97STreehugger Robot 	typedef IntTrie<ImplTraits, RuleListType*> RuleMemoType;
77*16467b97STreehugger Robot 
78*16467b97STreehugger Robot private:
79*16467b97STreehugger Robot 	/** Points to the first in a possible chain of exceptions that the
80*16467b97STreehugger Robot      *  recognizer has discovered.
81*16467b97STreehugger Robot      */
82*16467b97STreehugger Robot     ExceptionBaseType*			m_exception;
83*16467b97STreehugger Robot 
84*16467b97STreehugger Robot 
85*16467b97STreehugger Robot     /** Track the set of token types that can follow any rule invocation.
86*16467b97STreehugger Robot      *  Stack structure, to support: List<BitSet>.
87*16467b97STreehugger Robot      */
88*16467b97STreehugger Robot     FollowingType		m_following;
89*16467b97STreehugger Robot 
90*16467b97STreehugger Robot     /** Track around a hint from the creator of the recognizer as to how big this
91*16467b97STreehugger Robot      *  thing is going to get, as the actress said to the bishop. This allows us
92*16467b97STreehugger Robot      *  to tune hash tables accordingly. This might not be the best place for this
93*16467b97STreehugger Robot      *  in the end but we will see.
94*16467b97STreehugger Robot      */
95*16467b97STreehugger Robot     ANTLR_UINT32		m_sizeHint;
96*16467b97STreehugger Robot 
97*16467b97STreehugger Robot 
98*16467b97STreehugger Robot     /** If set to true then the recognizer has an exception
99*16467b97STreehugger Robot      * condition (this is tested by the generated code for the rules of
100*16467b97STreehugger Robot      * the grammar).
101*16467b97STreehugger Robot      */
102*16467b97STreehugger Robot     bool				m_error;
103*16467b97STreehugger Robot 
104*16467b97STreehugger Robot 
105*16467b97STreehugger Robot     /** This is true when we see an error and before having successfully
106*16467b97STreehugger Robot      *  matched a token.  Prevents generation of more than one error message
107*16467b97STreehugger Robot      *  per error.
108*16467b97STreehugger Robot      */
109*16467b97STreehugger Robot     bool				m_errorRecovery;
110*16467b97STreehugger Robot 
111*16467b97STreehugger Robot 	/** In lieu of a return value, this indicates that a rule or token
112*16467b97STreehugger Robot      *  has failed to match.  Reset to false upon valid token match.
113*16467b97STreehugger Robot      */
114*16467b97STreehugger Robot     bool				m_failed;
115*16467b97STreehugger Robot 
116*16467b97STreehugger Robot 	/*
117*16467b97STreehugger Robot 	Instead of allocating CommonTokenType, we do it in the stack. hence we need a null indicator
118*16467b97STreehugger Robot 	*/
119*16467b97STreehugger Robot 	bool				m_token_present;
120*16467b97STreehugger Robot 
121*16467b97STreehugger Robot     /** The index into the input stream where the last error occurred.
122*16467b97STreehugger Robot      * 	This is used to prevent infinite loops where an error is found
123*16467b97STreehugger Robot      *  but no token is consumed during recovery...another error is found,
124*16467b97STreehugger Robot      *  ad nauseam.  This is a failsafe mechanism to guarantee that at least
125*16467b97STreehugger Robot      *  one token/tree node is consumed for two errors.
126*16467b97STreehugger Robot      */
127*16467b97STreehugger Robot     ANTLR_MARKER		m_lastErrorIndex;
128*16467b97STreehugger Robot 
129*16467b97STreehugger Robot     /** When the recognizer terminates, the error handling functions
130*16467b97STreehugger Robot      *  will have incremented this value if any error occurred (that was displayed). It can then be
131*16467b97STreehugger Robot      *  used by the grammar programmer without having to use static globals.
132*16467b97STreehugger Robot      */
133*16467b97STreehugger Robot     ANTLR_UINT32		m_errorCount;
134*16467b97STreehugger Robot 
135*16467b97STreehugger Robot     /** If 0, no backtracking is going on.  Safe to exec actions etc...
136*16467b97STreehugger Robot      *  If >0 then it's the level of backtracking.
137*16467b97STreehugger Robot      */
138*16467b97STreehugger Robot     ANTLR_INT32			m_backtracking;
139*16467b97STreehugger Robot 
140*16467b97STreehugger Robot     /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing.
141*16467b97STreehugger Robot      *  Tracks  the stop token index for each rule.  ruleMemo[ruleIndex] is
142*16467b97STreehugger Robot      *  the memoization table for ruleIndex.  For key ruleStartIndex, you
143*16467b97STreehugger Robot      *  get back the stop token for associated rule or MEMO_RULE_FAILED.
144*16467b97STreehugger Robot      *
145*16467b97STreehugger Robot      *  This is only used if rule memoization is on.
146*16467b97STreehugger Robot      */
147*16467b97STreehugger Robot     RuleMemoType*		m_ruleMemo;
148*16467b97STreehugger Robot 
149*16467b97STreehugger Robot     /** Pointer to an array of token names
150*16467b97STreehugger Robot      *  that are generally useful in error reporting. The generated parsers install
151*16467b97STreehugger Robot      *  this pointer. The table it points to is statically allocated as 8 bit ascii
152*16467b97STreehugger Robot      *  at parser compile time - grammar token names are thus restricted in character
153*16467b97STreehugger Robot      *  sets, which does not seem to terrible.
154*16467b97STreehugger Robot      */
155*16467b97STreehugger Robot     ANTLR_UINT8**		m_tokenNames;
156*16467b97STreehugger Robot 
157*16467b97STreehugger Robot     /** The goal of all lexer rules/methods is to create a token object.
158*16467b97STreehugger Robot      *  This is an instance variable as multiple rules may collaborate to
159*16467b97STreehugger Robot      *  create a single token.  For example, NUM : INT | FLOAT ;
160*16467b97STreehugger Robot      *  In this case, you want the INT or FLOAT rule to set token and not
161*16467b97STreehugger Robot      *  have it reset to a NUM token in rule NUM.
162*16467b97STreehugger Robot      */
163*16467b97STreehugger Robot     CommonTokenType		m_token;
164*16467b97STreehugger Robot 
165*16467b97STreehugger Robot     /** A lexer is a source of tokens, produced by all the generated (or
166*16467b97STreehugger Robot      *  hand crafted if you like) matching rules. As such it needs to provide
167*16467b97STreehugger Robot      *  a token source interface implementation. For others, this will become a empty class
168*16467b97STreehugger Robot      */
169*16467b97STreehugger Robot     TokenSourceType*	m_tokSource;
170*16467b97STreehugger Robot 
171*16467b97STreehugger Robot     /** The channel number for the current token
172*16467b97STreehugger Robot      */
173*16467b97STreehugger Robot     ANTLR_UINT32			m_channel;
174*16467b97STreehugger Robot 
175*16467b97STreehugger Robot     /** The token type for the current token
176*16467b97STreehugger Robot      */
177*16467b97STreehugger Robot     ANTLR_UINT32			m_type;
178*16467b97STreehugger Robot 
179*16467b97STreehugger Robot     /** The input line (where it makes sense) on which the first character of the current
180*16467b97STreehugger Robot      *  token resides.
181*16467b97STreehugger Robot      */
182*16467b97STreehugger Robot     ANTLR_INT32			m_tokenStartLine;
183*16467b97STreehugger Robot 
184*16467b97STreehugger Robot     /** The character position of the first character of the current token
185*16467b97STreehugger Robot      *  within the line specified by tokenStartLine
186*16467b97STreehugger Robot      */
187*16467b97STreehugger Robot     ANTLR_INT32		m_tokenStartCharPositionInLine;
188*16467b97STreehugger Robot 
189*16467b97STreehugger Robot     /** What character index in the stream did the current token start at?
190*16467b97STreehugger Robot      *  Needed, for example, to get the text for current token.  Set at
191*16467b97STreehugger Robot      *  the start of nextToken.
192*16467b97STreehugger Robot      */
193*16467b97STreehugger Robot     ANTLR_MARKER		m_tokenStartCharIndex;
194*16467b97STreehugger Robot 
195*16467b97STreehugger Robot     /** Text for the current token. This can be overridden by setting this
196*16467b97STreehugger Robot      *  variable directly or by using the SETTEXT() macro (preferred) in your
197*16467b97STreehugger Robot      *  lexer rules.
198*16467b97STreehugger Robot      */
199*16467b97STreehugger Robot     StringType			m_text;
200*16467b97STreehugger Robot 
201*16467b97STreehugger Robot     /** Input stream stack, which allows the C programmer to switch input streams
202*16467b97STreehugger Robot      *  easily and allow the standard nextToken() implementation to deal with it
203*16467b97STreehugger Robot      *  as this is a common requirement.
204*16467b97STreehugger Robot      */
205*16467b97STreehugger Robot     InputStreamsType	m_streams;
206*16467b97STreehugger Robot 
207*16467b97STreehugger Robot public:
208*16467b97STreehugger Robot 	RecognizerSharedState();
209*16467b97STreehugger Robot 	ExceptionBaseType* get_exception() const;
210*16467b97STreehugger Robot 	FollowingType& get_following();
211*16467b97STreehugger Robot 	ANTLR_UINT32 get_sizeHint() const;
212*16467b97STreehugger Robot 	bool get_error() const;
213*16467b97STreehugger Robot 	bool get_errorRecovery() const;
214*16467b97STreehugger Robot 	bool get_failed() const;
215*16467b97STreehugger Robot 	bool get_token_present() const;
216*16467b97STreehugger Robot 	ANTLR_MARKER get_lastErrorIndex() const;
217*16467b97STreehugger Robot 	ANTLR_UINT32 get_errorCount() const;
218*16467b97STreehugger Robot 	ANTLR_INT32 get_backtracking() const;
219*16467b97STreehugger Robot 	RuleMemoType* get_ruleMemo() const;
220*16467b97STreehugger Robot 	ANTLR_UINT8** get_tokenNames() const;
221*16467b97STreehugger Robot 	ANTLR_UINT8* get_tokenName( ANTLR_UINT32 i ) const;
222*16467b97STreehugger Robot 	CommonTokenType* get_token();
223*16467b97STreehugger Robot 	TokenSourceType* get_tokSource() const;
224*16467b97STreehugger Robot 	ANTLR_UINT32& get_channel();
225*16467b97STreehugger Robot 	ANTLR_UINT32 get_type() const;
226*16467b97STreehugger Robot 	ANTLR_INT32 get_tokenStartLine() const;
227*16467b97STreehugger Robot 	ANTLR_INT32 get_tokenStartCharPositionInLine() const;
228*16467b97STreehugger Robot 	ANTLR_MARKER get_tokenStartCharIndex() const;
229*16467b97STreehugger Robot 	StringType& get_text();
230*16467b97STreehugger Robot 	InputStreamsType& get_streams();
231*16467b97STreehugger Robot 
232*16467b97STreehugger Robot 	void  set_following( const FollowingType& following );
233*16467b97STreehugger Robot 	void  set_sizeHint( ANTLR_UINT32 sizeHint );
234*16467b97STreehugger Robot 	void  set_error( bool error );
235*16467b97STreehugger Robot 	void  set_errorRecovery( bool errorRecovery );
236*16467b97STreehugger Robot 	void  set_failed( bool failed );
237*16467b97STreehugger Robot 	void  set_token_present(bool token_present);
238*16467b97STreehugger Robot 	void  set_lastErrorIndex( ANTLR_MARKER lastErrorIndex );
239*16467b97STreehugger Robot 	void  set_errorCount( ANTLR_UINT32 errorCount );
240*16467b97STreehugger Robot 	void  set_backtracking( ANTLR_INT32 backtracking );
241*16467b97STreehugger Robot 	void  set_ruleMemo( RuleMemoType* ruleMemo );
242*16467b97STreehugger Robot 	void  set_tokenNames( ANTLR_UINT8** tokenNames );
243*16467b97STreehugger Robot 	void  set_tokSource( TokenSourceType* tokSource );
244*16467b97STreehugger Robot 	void  set_channel( ANTLR_UINT32 channel );
245*16467b97STreehugger Robot 	void  set_exception( ExceptionBaseType* exception );
246*16467b97STreehugger Robot 	void  set_type( ANTLR_UINT32 type );
247*16467b97STreehugger Robot 	void  set_token( const CommonTokenType* tok);
248*16467b97STreehugger Robot 	void  set_tokenStartLine( ANTLR_INT32 tokenStartLine );
249*16467b97STreehugger Robot 	void  set_tokenStartCharPositionInLine( ANTLR_INT32 tokenStartCharPositionInLine );
250*16467b97STreehugger Robot 	void  set_tokenStartCharIndex( ANTLR_MARKER tokenStartCharIndex );
251*16467b97STreehugger Robot 	void  set_text( const StringType& text );
252*16467b97STreehugger Robot 	void  set_streams( const InputStreamsType& streams );
253*16467b97STreehugger Robot 
254*16467b97STreehugger Robot 	void inc_errorCount();
255*16467b97STreehugger Robot 	void inc_backtracking();
256*16467b97STreehugger Robot 	void dec_backtracking();
257*16467b97STreehugger Robot };
258*16467b97STreehugger Robot 
259*16467b97STreehugger Robot ANTLR_END_NAMESPACE()
260*16467b97STreehugger Robot 
261*16467b97STreehugger Robot #include "antlr3recognizersharedstate.inl"
262*16467b97STreehugger Robot 
263*16467b97STreehugger Robot #endif
264*16467b97STreehugger Robot 
265*16467b97STreehugger Robot 
266