xref: /aosp_15_r20/external/antlr/runtime/Cpp/include/antlr3commontoken.hpp (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot /** \file
2*16467b97STreehugger Robot  * \brief Defines the interface for a common token.
3*16467b97STreehugger Robot  *
4*16467b97STreehugger Robot  * All token streams should provide their tokens using an instance
5*16467b97STreehugger Robot  * of this common token. A custom pointer is provided, wher you may attach
6*16467b97STreehugger Robot  * a further structure to enhance the common token if you feel the need
7*16467b97STreehugger Robot  * to do so. The C runtime will assume that a token provides implementations
8*16467b97STreehugger Robot  * of the interface functions, but all of them may be rplaced by your own
9*16467b97STreehugger Robot  * implementation if you require it.
10*16467b97STreehugger Robot  */
11*16467b97STreehugger Robot #ifndef	_ANTLR3_COMMON_TOKEN_HPP
12*16467b97STreehugger Robot #define	_ANTLR3_COMMON_TOKEN_HPP
13*16467b97STreehugger Robot 
14*16467b97STreehugger Robot // [The "BSD licence"]
15*16467b97STreehugger Robot // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB
16*16467b97STreehugger Robot 
17*16467b97STreehugger Robot //
18*16467b97STreehugger Robot // All rights reserved.
19*16467b97STreehugger Robot //
20*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without
21*16467b97STreehugger Robot // modification, are permitted provided that the following conditions
22*16467b97STreehugger Robot // are met:
23*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright
24*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer.
25*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright
26*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer in the
27*16467b97STreehugger Robot //    documentation and/or other materials provided with the distribution.
28*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products
29*16467b97STreehugger Robot //    derived from this software without specific prior written permission.
30*16467b97STreehugger Robot //
31*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
32*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
34*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
35*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
40*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41*16467b97STreehugger Robot 
42*16467b97STreehugger Robot #include    <stdlib.h>
43*16467b97STreehugger Robot 
44*16467b97STreehugger Robot #include    "antlr3defs.hpp"
45*16467b97STreehugger Robot 
46*16467b97STreehugger Robot ANTLR_BEGIN_NAMESPACE()
47*16467b97STreehugger Robot 
48*16467b97STreehugger Robot /** The definition of an ANTLR3 common token structure, which all implementations
49*16467b97STreehugger Robot  * of a token stream should provide, installing any further structures in the
50*16467b97STreehugger Robot  * custom pointer element of this structure.
51*16467b97STreehugger Robot  *
52*16467b97STreehugger Robot  * \remark
53*16467b97STreehugger Robot  * Token streams are in essence provided by lexers or other programs that serve
54*16467b97STreehugger Robot  * as lexers.
55*16467b97STreehugger Robot  */
56*16467b97STreehugger Robot 
57*16467b97STreehugger Robot template<class ImplTraits>
58*16467b97STreehugger Robot class CommonToken : public ImplTraits::AllocPolicyType
59*16467b97STreehugger Robot {
60*16467b97STreehugger Robot public:
61*16467b97STreehugger Robot 	/* Base token types, which all lexer/parser tokens come after in sequence.
62*16467b97STreehugger Robot 	*/
63*16467b97STreehugger Robot 	enum TOKEN_TYPE
64*16467b97STreehugger Robot 	{
65*16467b97STreehugger Robot 		/** Indicator of an invalid token
66*16467b97STreehugger Robot 		 */
67*16467b97STreehugger Robot 		TOKEN_INVALID =	0
68*16467b97STreehugger Robot 		, EOR_TOKEN_TYPE
69*16467b97STreehugger Robot 		/** Imaginary token type to cause a traversal of child nodes in a tree parser
70*16467b97STreehugger Robot 		 */
71*16467b97STreehugger Robot 		, TOKEN_DOWN
72*16467b97STreehugger Robot 		/** Imaginary token type to signal the end of a stream of child nodes.
73*16467b97STreehugger Robot 		 */
74*16467b97STreehugger Robot 		, TOKEN_UP
75*16467b97STreehugger Robot 		/** First token that can be used by users/generated code
76*16467b97STreehugger Robot 		 */
77*16467b97STreehugger Robot 		, MIN_TOKEN_TYPE =	TOKEN_UP + 1
78*16467b97STreehugger Robot 
79*16467b97STreehugger Robot 		/** End of file token
80*16467b97STreehugger Robot 		 */
81*16467b97STreehugger Robot 		, TOKEN_EOF =	(ANTLR_CHARSTREAM_EOF & 0xFFFFFFFF)
82*16467b97STreehugger Robot 	};
83*16467b97STreehugger Robot 
84*16467b97STreehugger Robot 	typedef typename ImplTraits::TokenIntStreamType TokenIntStreamType;
85*16467b97STreehugger Robot 	typedef typename ImplTraits::StringType StringType;
86*16467b97STreehugger Robot 	typedef typename ImplTraits::InputStreamType InputStreamType;
87*16467b97STreehugger Robot 	typedef typename ImplTraits::StreamDataType StreamDataType;
88*16467b97STreehugger Robot 
89*16467b97STreehugger Robot private:
90*16467b97STreehugger Robot     /** The actual type of this token
91*16467b97STreehugger Robot      */
92*16467b97STreehugger Robot     ANTLR_UINT32   m_type;
93*16467b97STreehugger Robot 
94*16467b97STreehugger Robot 	/** The virtual channel that this token exists in.
95*16467b97STreehugger Robot      */
96*16467b97STreehugger Robot     ANTLR_UINT32	m_channel;
97*16467b97STreehugger Robot 
98*16467b97STreehugger Robot 	mutable StringType		m_tokText;
99*16467b97STreehugger Robot 
100*16467b97STreehugger Robot     /** The offset into the input stream that the line in which this
101*16467b97STreehugger Robot      *  token resides starts.
102*16467b97STreehugger Robot      */
103*16467b97STreehugger Robot 	const StreamDataType*	m_lineStart;
104*16467b97STreehugger Robot 
105*16467b97STreehugger Robot 	/** The line number in the input stream where this token was derived from
106*16467b97STreehugger Robot      */
107*16467b97STreehugger Robot     ANTLR_UINT32	m_line;
108*16467b97STreehugger Robot 
109*16467b97STreehugger Robot     /** The character position in the line that this token was derived from
110*16467b97STreehugger Robot      */
111*16467b97STreehugger Robot     ANTLR_INT32		m_charPositionInLine;
112*16467b97STreehugger Robot 
113*16467b97STreehugger Robot     /** Pointer to the input stream that this token originated in.
114*16467b97STreehugger Robot      */
115*16467b97STreehugger Robot     InputStreamType*    m_input;
116*16467b97STreehugger Robot 
117*16467b97STreehugger Robot     /** What the index of this token is, 0, 1, .., n-2, n-1 tokens
118*16467b97STreehugger Robot      */
119*16467b97STreehugger Robot     ANTLR_MARKER		m_index;
120*16467b97STreehugger Robot 
121*16467b97STreehugger Robot     /** The character offset in the input stream where the text for this token
122*16467b97STreehugger Robot      *  starts.
123*16467b97STreehugger Robot      */
124*16467b97STreehugger Robot     ANTLR_MARKER		m_startIndex;
125*16467b97STreehugger Robot 
126*16467b97STreehugger Robot     /** The character offset in the input stream where the text for this token
127*16467b97STreehugger Robot      *  stops.
128*16467b97STreehugger Robot      */
129*16467b97STreehugger Robot     ANTLR_MARKER		m_stopIndex;
130*16467b97STreehugger Robot 
131*16467b97STreehugger Robot public:
132*16467b97STreehugger Robot 	CommonToken();
133*16467b97STreehugger Robot 	CommonToken(ANTLR_UINT32 type);
134*16467b97STreehugger Robot 	CommonToken(TOKEN_TYPE type);
135*16467b97STreehugger Robot 	CommonToken( const CommonToken& ctoken );
136*16467b97STreehugger Robot 
137*16467b97STreehugger Robot 	CommonToken& operator=( const CommonToken& ctoken );
138*16467b97STreehugger Robot 	bool operator==( const CommonToken& ctoken ) const;
139*16467b97STreehugger Robot 	bool operator<( const CommonToken& ctoken ) const;
140*16467b97STreehugger Robot 
141*16467b97STreehugger Robot 	InputStreamType* get_input() const;
142*16467b97STreehugger Robot 	ANTLR_MARKER get_index() const;
143*16467b97STreehugger Robot 	void set_index( ANTLR_MARKER index );
144*16467b97STreehugger Robot 	void set_input( InputStreamType* input );
145*16467b97STreehugger Robot 
146*16467b97STreehugger Robot     /* ==============================
147*16467b97STreehugger Robot      * API
148*16467b97STreehugger Robot      */
149*16467b97STreehugger Robot 
150*16467b97STreehugger Robot     /** Function that returns the text pointer of a token, use
151*16467b97STreehugger Robot      *  toString() if you want a pANTLR3_STRING version of the token.
152*16467b97STreehugger Robot      */
153*16467b97STreehugger Robot     StringType  getText() const;
154*16467b97STreehugger Robot 
155*16467b97STreehugger Robot     /** Pointer to a function that 'might' be able to set the text associated
156*16467b97STreehugger Robot      *  with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
157*16467b97STreehugger Robot      *  do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have
158*16467b97STreehugger Robot      *  strings associated with them but just point into the current input stream. These
159*16467b97STreehugger Robot      *  tokens will implement this function with a function that errors out (probably
160*16467b97STreehugger Robot      *  drastically.
161*16467b97STreehugger Robot      */
162*16467b97STreehugger Robot     void set_tokText( const StringType& text );
163*16467b97STreehugger Robot 
164*16467b97STreehugger Robot     /** Pointer to a function that 'might' be able to set the text associated
165*16467b97STreehugger Robot      *  with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
166*16467b97STreehugger Robot      *  do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have
167*16467b97STreehugger Robot      *  strings associated with them but just point into the current input stream. These
168*16467b97STreehugger Robot      *  tokens will implement this function with a function that errors out (probably
169*16467b97STreehugger Robot      *  drastically.
170*16467b97STreehugger Robot      */
171*16467b97STreehugger Robot     void	setText(ANTLR_UINT8* text);
172*16467b97STreehugger Robot 	void	setText(const char* text);
173*16467b97STreehugger Robot 
174*16467b97STreehugger Robot     /** Pointer to a function that returns the token type of this token
175*16467b97STreehugger Robot      */
176*16467b97STreehugger Robot     ANTLR_UINT32  get_type() const;
177*16467b97STreehugger Robot 	ANTLR_UINT32  getType() const;
178*16467b97STreehugger Robot 
179*16467b97STreehugger Robot     /** Pointer to a function that sets the type of this token
180*16467b97STreehugger Robot      */
181*16467b97STreehugger Robot     void	set_type(ANTLR_UINT32 ttype);
182*16467b97STreehugger Robot 
183*16467b97STreehugger Robot     /** Pointer to a function that gets the 'line' number where this token resides
184*16467b97STreehugger Robot      */
185*16467b97STreehugger Robot     ANTLR_UINT32   get_line() const;
186*16467b97STreehugger Robot 
187*16467b97STreehugger Robot     /** Pointer to a function that sets the 'line' number where this token reside
188*16467b97STreehugger Robot      */
189*16467b97STreehugger Robot     void set_line(ANTLR_UINT32 line);
190*16467b97STreehugger Robot 
191*16467b97STreehugger Robot     /** Pointer to a function that gets the offset in the line where this token exists
192*16467b97STreehugger Robot      */
193*16467b97STreehugger Robot     ANTLR_INT32  get_charPositionInLine() const;
194*16467b97STreehugger Robot 	ANTLR_INT32  getCharPositionInLine() const;
195*16467b97STreehugger Robot 
196*16467b97STreehugger Robot     /** Pointer to a function that sets the offset in the line where this token exists
197*16467b97STreehugger Robot      */
198*16467b97STreehugger Robot     void	set_charPositionInLine(ANTLR_INT32 pos);
199*16467b97STreehugger Robot 
200*16467b97STreehugger Robot     /** Pointer to a function that gets the channel that this token was placed in (parsers
201*16467b97STreehugger Robot      *  can 'tune' to these channels.
202*16467b97STreehugger Robot      */
203*16467b97STreehugger Robot     ANTLR_UINT32   get_channel() const;
204*16467b97STreehugger Robot 
205*16467b97STreehugger Robot     /** Pointer to a function that sets the channel that this token should belong to
206*16467b97STreehugger Robot      */
207*16467b97STreehugger Robot     void set_channel(ANTLR_UINT32 channel);
208*16467b97STreehugger Robot 
209*16467b97STreehugger Robot     /** Pointer to a function that returns an index 0...n-1 of the token in the token
210*16467b97STreehugger Robot      *  input stream.
211*16467b97STreehugger Robot      */
212*16467b97STreehugger Robot     ANTLR_MARKER  get_tokenIndex() const;
213*16467b97STreehugger Robot 
214*16467b97STreehugger Robot     /** Pointer to a function that can set the token index of this token in the token
215*16467b97STreehugger Robot      *  input stream.
216*16467b97STreehugger Robot      */
217*16467b97STreehugger Robot     void	set_tokenIndex(ANTLR_MARKER tokenIndex);
218*16467b97STreehugger Robot 
219*16467b97STreehugger Robot     /** Pointer to a function that gets the start index in the input stream for this token.
220*16467b97STreehugger Robot      */
221*16467b97STreehugger Robot     ANTLR_MARKER   get_startIndex() const;
222*16467b97STreehugger Robot 
223*16467b97STreehugger Robot     /** Pointer to a function that sets the start index in the input stream for this token.
224*16467b97STreehugger Robot      */
225*16467b97STreehugger Robot     void	set_startIndex(ANTLR_MARKER index);
226*16467b97STreehugger Robot 
227*16467b97STreehugger Robot     /** Pointer to a function that gets the stop index in the input stream for this token.
228*16467b97STreehugger Robot      */
229*16467b97STreehugger Robot     ANTLR_MARKER  get_stopIndex() const;
230*16467b97STreehugger Robot 
231*16467b97STreehugger Robot     /** Pointer to a function that sets the stop index in the input stream for this token.
232*16467b97STreehugger Robot      */
233*16467b97STreehugger Robot     void	set_stopIndex(ANTLR_MARKER index);
234*16467b97STreehugger Robot 	const StreamDataType* get_lineStart() const;
235*16467b97STreehugger Robot 	void	set_lineStart( const StreamDataType* lineStart );
236*16467b97STreehugger Robot 
237*16467b97STreehugger Robot     /** Pointer to a function that returns this token as a text representation that can be
238*16467b97STreehugger Robot      *  printed with embedded control codes such as \n replaced with the printable sequence "\\n"
239*16467b97STreehugger Robot      *  This also yields a string structure that can be used more easily than the pointer to
240*16467b97STreehugger Robot      *  the input stream in certain situations.
241*16467b97STreehugger Robot      */
242*16467b97STreehugger Robot     StringType  toString() const;
243*16467b97STreehugger Robot 
244*16467b97STreehugger Robot };
245*16467b97STreehugger Robot 
246*16467b97STreehugger Robot ANTLR_END_NAMESPACE()
247*16467b97STreehugger Robot 
248*16467b97STreehugger Robot #include "antlr3commontoken.inl"
249*16467b97STreehugger Robot 
250*16467b97STreehugger Robot #endif
251