1*16467b97STreehugger Robot /** \file 2*16467b97STreehugger Robot * \brief Defines the interface for a common token. 3*16467b97STreehugger Robot * 4*16467b97STreehugger Robot * All token streams should provide their tokens using an instance 5*16467b97STreehugger Robot * of this common token. A custom pointer is provided, wher you may attach 6*16467b97STreehugger Robot * a further structure to enhance the common token if you feel the need 7*16467b97STreehugger Robot * to do so. The C runtime will assume that a token provides implementations 8*16467b97STreehugger Robot * of the interface functions, but all of them may be rplaced by your own 9*16467b97STreehugger Robot * implementation if you require it. 10*16467b97STreehugger Robot */ 11*16467b97STreehugger Robot #ifndef _ANTLR3_COMMON_TOKEN_HPP 12*16467b97STreehugger Robot #define _ANTLR3_COMMON_TOKEN_HPP 13*16467b97STreehugger Robot 14*16467b97STreehugger Robot // [The "BSD licence"] 15*16467b97STreehugger Robot // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB 16*16467b97STreehugger Robot 17*16467b97STreehugger Robot // 18*16467b97STreehugger Robot // All rights reserved. 19*16467b97STreehugger Robot // 20*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without 21*16467b97STreehugger Robot // modification, are permitted provided that the following conditions 22*16467b97STreehugger Robot // are met: 23*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright 24*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer. 25*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright 26*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer in the 27*16467b97STreehugger Robot // documentation and/or other materials provided with the distribution. 28*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products 29*16467b97STreehugger Robot // derived from this software without specific prior written permission. 30*16467b97STreehugger Robot // 31*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 32*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 33*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 34*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 35*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 37*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 38*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 40*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41*16467b97STreehugger Robot 42*16467b97STreehugger Robot #include <stdlib.h> 43*16467b97STreehugger Robot 44*16467b97STreehugger Robot #include "antlr3defs.hpp" 45*16467b97STreehugger Robot 46*16467b97STreehugger Robot ANTLR_BEGIN_NAMESPACE() 47*16467b97STreehugger Robot 48*16467b97STreehugger Robot /** The definition of an ANTLR3 common token structure, which all implementations 49*16467b97STreehugger Robot * of a token stream should provide, installing any further structures in the 50*16467b97STreehugger Robot * custom pointer element of this structure. 51*16467b97STreehugger Robot * 52*16467b97STreehugger Robot * \remark 53*16467b97STreehugger Robot * Token streams are in essence provided by lexers or other programs that serve 54*16467b97STreehugger Robot * as lexers. 55*16467b97STreehugger Robot */ 56*16467b97STreehugger Robot 57*16467b97STreehugger Robot template<class ImplTraits> 58*16467b97STreehugger Robot class CommonToken : public ImplTraits::AllocPolicyType 59*16467b97STreehugger Robot { 60*16467b97STreehugger Robot public: 61*16467b97STreehugger Robot /* Base token types, which all lexer/parser tokens come after in sequence. 62*16467b97STreehugger Robot */ 63*16467b97STreehugger Robot enum TOKEN_TYPE 64*16467b97STreehugger Robot { 65*16467b97STreehugger Robot /** Indicator of an invalid token 66*16467b97STreehugger Robot */ 67*16467b97STreehugger Robot TOKEN_INVALID = 0 68*16467b97STreehugger Robot , EOR_TOKEN_TYPE 69*16467b97STreehugger Robot /** Imaginary token type to cause a traversal of child nodes in a tree parser 70*16467b97STreehugger Robot */ 71*16467b97STreehugger Robot , TOKEN_DOWN 72*16467b97STreehugger Robot /** Imaginary token type to signal the end of a stream of child nodes. 73*16467b97STreehugger Robot */ 74*16467b97STreehugger Robot , TOKEN_UP 75*16467b97STreehugger Robot /** First token that can be used by users/generated code 76*16467b97STreehugger Robot */ 77*16467b97STreehugger Robot , MIN_TOKEN_TYPE = TOKEN_UP + 1 78*16467b97STreehugger Robot 79*16467b97STreehugger Robot /** End of file token 80*16467b97STreehugger Robot */ 81*16467b97STreehugger Robot , TOKEN_EOF = (ANTLR_CHARSTREAM_EOF & 0xFFFFFFFF) 82*16467b97STreehugger Robot }; 83*16467b97STreehugger Robot 84*16467b97STreehugger Robot typedef typename ImplTraits::TokenIntStreamType TokenIntStreamType; 85*16467b97STreehugger Robot typedef typename ImplTraits::StringType StringType; 86*16467b97STreehugger Robot typedef typename ImplTraits::InputStreamType InputStreamType; 87*16467b97STreehugger Robot typedef typename ImplTraits::StreamDataType StreamDataType; 88*16467b97STreehugger Robot 89*16467b97STreehugger Robot private: 90*16467b97STreehugger Robot /** The actual type of this token 91*16467b97STreehugger Robot */ 92*16467b97STreehugger Robot ANTLR_UINT32 m_type; 93*16467b97STreehugger Robot 94*16467b97STreehugger Robot /** The virtual channel that this token exists in. 95*16467b97STreehugger Robot */ 96*16467b97STreehugger Robot ANTLR_UINT32 m_channel; 97*16467b97STreehugger Robot 98*16467b97STreehugger Robot mutable StringType m_tokText; 99*16467b97STreehugger Robot 100*16467b97STreehugger Robot /** The offset into the input stream that the line in which this 101*16467b97STreehugger Robot * token resides starts. 102*16467b97STreehugger Robot */ 103*16467b97STreehugger Robot const StreamDataType* m_lineStart; 104*16467b97STreehugger Robot 105*16467b97STreehugger Robot /** The line number in the input stream where this token was derived from 106*16467b97STreehugger Robot */ 107*16467b97STreehugger Robot ANTLR_UINT32 m_line; 108*16467b97STreehugger Robot 109*16467b97STreehugger Robot /** The character position in the line that this token was derived from 110*16467b97STreehugger Robot */ 111*16467b97STreehugger Robot ANTLR_INT32 m_charPositionInLine; 112*16467b97STreehugger Robot 113*16467b97STreehugger Robot /** Pointer to the input stream that this token originated in. 114*16467b97STreehugger Robot */ 115*16467b97STreehugger Robot InputStreamType* m_input; 116*16467b97STreehugger Robot 117*16467b97STreehugger Robot /** What the index of this token is, 0, 1, .., n-2, n-1 tokens 118*16467b97STreehugger Robot */ 119*16467b97STreehugger Robot ANTLR_MARKER m_index; 120*16467b97STreehugger Robot 121*16467b97STreehugger Robot /** The character offset in the input stream where the text for this token 122*16467b97STreehugger Robot * starts. 123*16467b97STreehugger Robot */ 124*16467b97STreehugger Robot ANTLR_MARKER m_startIndex; 125*16467b97STreehugger Robot 126*16467b97STreehugger Robot /** The character offset in the input stream where the text for this token 127*16467b97STreehugger Robot * stops. 128*16467b97STreehugger Robot */ 129*16467b97STreehugger Robot ANTLR_MARKER m_stopIndex; 130*16467b97STreehugger Robot 131*16467b97STreehugger Robot public: 132*16467b97STreehugger Robot CommonToken(); 133*16467b97STreehugger Robot CommonToken(ANTLR_UINT32 type); 134*16467b97STreehugger Robot CommonToken(TOKEN_TYPE type); 135*16467b97STreehugger Robot CommonToken( const CommonToken& ctoken ); 136*16467b97STreehugger Robot 137*16467b97STreehugger Robot CommonToken& operator=( const CommonToken& ctoken ); 138*16467b97STreehugger Robot bool operator==( const CommonToken& ctoken ) const; 139*16467b97STreehugger Robot bool operator<( const CommonToken& ctoken ) const; 140*16467b97STreehugger Robot 141*16467b97STreehugger Robot InputStreamType* get_input() const; 142*16467b97STreehugger Robot ANTLR_MARKER get_index() const; 143*16467b97STreehugger Robot void set_index( ANTLR_MARKER index ); 144*16467b97STreehugger Robot void set_input( InputStreamType* input ); 145*16467b97STreehugger Robot 146*16467b97STreehugger Robot /* ============================== 147*16467b97STreehugger Robot * API 148*16467b97STreehugger Robot */ 149*16467b97STreehugger Robot 150*16467b97STreehugger Robot /** Function that returns the text pointer of a token, use 151*16467b97STreehugger Robot * toString() if you want a pANTLR3_STRING version of the token. 152*16467b97STreehugger Robot */ 153*16467b97STreehugger Robot StringType getText() const; 154*16467b97STreehugger Robot 155*16467b97STreehugger Robot /** Pointer to a function that 'might' be able to set the text associated 156*16467b97STreehugger Robot * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually 157*16467b97STreehugger Robot * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have 158*16467b97STreehugger Robot * strings associated with them but just point into the current input stream. These 159*16467b97STreehugger Robot * tokens will implement this function with a function that errors out (probably 160*16467b97STreehugger Robot * drastically. 161*16467b97STreehugger Robot */ 162*16467b97STreehugger Robot void set_tokText( const StringType& text ); 163*16467b97STreehugger Robot 164*16467b97STreehugger Robot /** Pointer to a function that 'might' be able to set the text associated 165*16467b97STreehugger Robot * with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually 166*16467b97STreehugger Robot * do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have 167*16467b97STreehugger Robot * strings associated with them but just point into the current input stream. These 168*16467b97STreehugger Robot * tokens will implement this function with a function that errors out (probably 169*16467b97STreehugger Robot * drastically. 170*16467b97STreehugger Robot */ 171*16467b97STreehugger Robot void setText(ANTLR_UINT8* text); 172*16467b97STreehugger Robot void setText(const char* text); 173*16467b97STreehugger Robot 174*16467b97STreehugger Robot /** Pointer to a function that returns the token type of this token 175*16467b97STreehugger Robot */ 176*16467b97STreehugger Robot ANTLR_UINT32 get_type() const; 177*16467b97STreehugger Robot ANTLR_UINT32 getType() const; 178*16467b97STreehugger Robot 179*16467b97STreehugger Robot /** Pointer to a function that sets the type of this token 180*16467b97STreehugger Robot */ 181*16467b97STreehugger Robot void set_type(ANTLR_UINT32 ttype); 182*16467b97STreehugger Robot 183*16467b97STreehugger Robot /** Pointer to a function that gets the 'line' number where this token resides 184*16467b97STreehugger Robot */ 185*16467b97STreehugger Robot ANTLR_UINT32 get_line() const; 186*16467b97STreehugger Robot 187*16467b97STreehugger Robot /** Pointer to a function that sets the 'line' number where this token reside 188*16467b97STreehugger Robot */ 189*16467b97STreehugger Robot void set_line(ANTLR_UINT32 line); 190*16467b97STreehugger Robot 191*16467b97STreehugger Robot /** Pointer to a function that gets the offset in the line where this token exists 192*16467b97STreehugger Robot */ 193*16467b97STreehugger Robot ANTLR_INT32 get_charPositionInLine() const; 194*16467b97STreehugger Robot ANTLR_INT32 getCharPositionInLine() const; 195*16467b97STreehugger Robot 196*16467b97STreehugger Robot /** Pointer to a function that sets the offset in the line where this token exists 197*16467b97STreehugger Robot */ 198*16467b97STreehugger Robot void set_charPositionInLine(ANTLR_INT32 pos); 199*16467b97STreehugger Robot 200*16467b97STreehugger Robot /** Pointer to a function that gets the channel that this token was placed in (parsers 201*16467b97STreehugger Robot * can 'tune' to these channels. 202*16467b97STreehugger Robot */ 203*16467b97STreehugger Robot ANTLR_UINT32 get_channel() const; 204*16467b97STreehugger Robot 205*16467b97STreehugger Robot /** Pointer to a function that sets the channel that this token should belong to 206*16467b97STreehugger Robot */ 207*16467b97STreehugger Robot void set_channel(ANTLR_UINT32 channel); 208*16467b97STreehugger Robot 209*16467b97STreehugger Robot /** Pointer to a function that returns an index 0...n-1 of the token in the token 210*16467b97STreehugger Robot * input stream. 211*16467b97STreehugger Robot */ 212*16467b97STreehugger Robot ANTLR_MARKER get_tokenIndex() const; 213*16467b97STreehugger Robot 214*16467b97STreehugger Robot /** Pointer to a function that can set the token index of this token in the token 215*16467b97STreehugger Robot * input stream. 216*16467b97STreehugger Robot */ 217*16467b97STreehugger Robot void set_tokenIndex(ANTLR_MARKER tokenIndex); 218*16467b97STreehugger Robot 219*16467b97STreehugger Robot /** Pointer to a function that gets the start index in the input stream for this token. 220*16467b97STreehugger Robot */ 221*16467b97STreehugger Robot ANTLR_MARKER get_startIndex() const; 222*16467b97STreehugger Robot 223*16467b97STreehugger Robot /** Pointer to a function that sets the start index in the input stream for this token. 224*16467b97STreehugger Robot */ 225*16467b97STreehugger Robot void set_startIndex(ANTLR_MARKER index); 226*16467b97STreehugger Robot 227*16467b97STreehugger Robot /** Pointer to a function that gets the stop index in the input stream for this token. 228*16467b97STreehugger Robot */ 229*16467b97STreehugger Robot ANTLR_MARKER get_stopIndex() const; 230*16467b97STreehugger Robot 231*16467b97STreehugger Robot /** Pointer to a function that sets the stop index in the input stream for this token. 232*16467b97STreehugger Robot */ 233*16467b97STreehugger Robot void set_stopIndex(ANTLR_MARKER index); 234*16467b97STreehugger Robot const StreamDataType* get_lineStart() const; 235*16467b97STreehugger Robot void set_lineStart( const StreamDataType* lineStart ); 236*16467b97STreehugger Robot 237*16467b97STreehugger Robot /** Pointer to a function that returns this token as a text representation that can be 238*16467b97STreehugger Robot * printed with embedded control codes such as \n replaced with the printable sequence "\\n" 239*16467b97STreehugger Robot * This also yields a string structure that can be used more easily than the pointer to 240*16467b97STreehugger Robot * the input stream in certain situations. 241*16467b97STreehugger Robot */ 242*16467b97STreehugger Robot StringType toString() const; 243*16467b97STreehugger Robot 244*16467b97STreehugger Robot }; 245*16467b97STreehugger Robot 246*16467b97STreehugger Robot ANTLR_END_NAMESPACE() 247*16467b97STreehugger Robot 248*16467b97STreehugger Robot #include "antlr3commontoken.inl" 249*16467b97STreehugger Robot 250*16467b97STreehugger Robot #endif 251