1*16467b97STreehugger RobotANTLR_BEGIN_NAMESPACE() 2*16467b97STreehugger Robot 3*16467b97STreehugger Robottemplate<class ImplTraits> 4*16467b97STreehugger RobotInputStream<ImplTraits>::InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding) 5*16467b97STreehugger Robot{ 6*16467b97STreehugger Robot // First order of business is to read the file into some buffer space 7*16467b97STreehugger Robot // as just straight 8 bit bytes. Then we will work out the encoding and 8*16467b97STreehugger Robot // byte order and adjust the API functions that are installed for the 9*16467b97STreehugger Robot // default 8Bit stream accordingly. 10*16467b97STreehugger Robot // 11*16467b97STreehugger Robot this->createFileStream(fileName); 12*16467b97STreehugger Robot 13*16467b97STreehugger Robot // We have the data in memory now so we can deal with it according to 14*16467b97STreehugger Robot // the encoding scheme we were given by the user. 15*16467b97STreehugger Robot // 16*16467b97STreehugger Robot m_encoding = encoding; 17*16467b97STreehugger Robot 18*16467b97STreehugger Robot // Now we need to work out the endian type and install any 19*16467b97STreehugger Robot // API functions that differ from 8Bit 20*16467b97STreehugger Robot // 21*16467b97STreehugger Robot this->setupInputStream(); 22*16467b97STreehugger Robot 23*16467b97STreehugger Robot // Now we can set up the file name 24*16467b97STreehugger Robot // 25*16467b97STreehugger Robot BaseType::m_streamName = (const char* )fileName; 26*16467b97STreehugger Robot m_fileName = BaseType::m_streamName; 27*16467b97STreehugger Robot} 28*16467b97STreehugger Robot 29*16467b97STreehugger Robottemplate<class ImplTraits> 30*16467b97STreehugger RobotInputStream<ImplTraits>::InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name) 31*16467b97STreehugger Robot{ 32*16467b97STreehugger Robot // First order of business is to set up the stream and install the data pointer. 33*16467b97STreehugger Robot // Then we will work out the encoding and byte order and adjust the API functions that are installed for the 34*16467b97STreehugger Robot // default 8Bit stream accordingly. 35*16467b97STreehugger Robot // 36*16467b97STreehugger Robot this->createStringStream(data); 37*16467b97STreehugger Robot 38*16467b97STreehugger Robot // Size (in bytes) of the given 'string' 39*16467b97STreehugger Robot // 40*16467b97STreehugger Robot m_sizeBuf = size; 41*16467b97STreehugger Robot 42*16467b97STreehugger Robot // We have the data in memory now so we can deal with it according to 43*16467b97STreehugger Robot // the encoding scheme we were given by the user. 44*16467b97STreehugger Robot // 45*16467b97STreehugger Robot m_encoding = encoding; 46*16467b97STreehugger Robot 47*16467b97STreehugger Robot // Now we need to work out the endian type and install any 48*16467b97STreehugger Robot // API functions that differ from 8Bit 49*16467b97STreehugger Robot // 50*16467b97STreehugger Robot this->setupInputStream(); 51*16467b97STreehugger Robot 52*16467b97STreehugger Robot // Now we can set up the file name 53*16467b97STreehugger Robot // 54*16467b97STreehugger Robot BaseType::m_streamName = (name == NULL ) ? "" : (const char*)name; 55*16467b97STreehugger Robot m_fileName = BaseType::m_streamName; 56*16467b97STreehugger Robot 57*16467b97STreehugger Robot} 58*16467b97STreehugger Robot 59*16467b97STreehugger Robottemplate<class ImplTraits> 60*16467b97STreehugger Robotvoid InputStream<ImplTraits>::createStringStream(const ANTLR_UINT8* data) 61*16467b97STreehugger Robot{ 62*16467b97STreehugger Robot if (data == NULL) 63*16467b97STreehugger Robot { 64*16467b97STreehugger Robot ParseNullStringException ex; 65*16467b97STreehugger Robot throw ex; 66*16467b97STreehugger Robot } 67*16467b97STreehugger Robot 68*16467b97STreehugger Robot // Structure was allocated correctly, now we can install the pointer 69*16467b97STreehugger Robot // 70*16467b97STreehugger Robot m_data = data; 71*16467b97STreehugger Robot m_isAllocated = false; 72*16467b97STreehugger Robot 73*16467b97STreehugger Robot // Call the common 8 bit input stream handler 74*16467b97STreehugger Robot // initialization. 75*16467b97STreehugger Robot // 76*16467b97STreehugger Robot this->genericSetupStream(); 77*16467b97STreehugger Robot} 78*16467b97STreehugger Robot 79*16467b97STreehugger Robottemplate<class ImplTraits> 80*16467b97STreehugger Robotvoid InputStream<ImplTraits>::createFileStream(const ANTLR_UINT8* fileName) 81*16467b97STreehugger Robot{ 82*16467b97STreehugger Robot if (fileName == NULL) 83*16467b97STreehugger Robot { 84*16467b97STreehugger Robot ParseFileAbsentException ex; 85*16467b97STreehugger Robot throw ex; 86*16467b97STreehugger Robot } 87*16467b97STreehugger Robot 88*16467b97STreehugger Robot // Structure was allocated correctly, now we can read the file. 89*16467b97STreehugger Robot // 90*16467b97STreehugger Robot FileUtils<ImplTraits>::AntlrRead8Bit(this, fileName); 91*16467b97STreehugger Robot 92*16467b97STreehugger Robot // Call the common 8 bit input stream handler 93*16467b97STreehugger Robot // initialization. 94*16467b97STreehugger Robot // 95*16467b97STreehugger Robot this->genericSetupStream(); 96*16467b97STreehugger Robot} 97*16467b97STreehugger Robot 98*16467b97STreehugger Robottemplate<class ImplTraits> 99*16467b97STreehugger Robotvoid InputStream<ImplTraits>::genericSetupStream() 100*16467b97STreehugger Robot{ 101*16467b97STreehugger Robot this->set_charByteSize(1); 102*16467b97STreehugger Robot 103*16467b97STreehugger Robot /* Set up the input stream brand new 104*16467b97STreehugger Robot */ 105*16467b97STreehugger Robot this->reset(); 106*16467b97STreehugger Robot 107*16467b97STreehugger Robot /* Install default line separator character (it can be replaced 108*16467b97STreehugger Robot * by the grammar programmer later) 109*16467b97STreehugger Robot */ 110*16467b97STreehugger Robot this->set_newLineChar((ANTLR_UCHAR)'\n'); 111*16467b97STreehugger Robot} 112*16467b97STreehugger Robot 113*16467b97STreehugger Robottemplate<class ImplTraits> 114*16467b97STreehugger RobotInputStream<ImplTraits>::~InputStream() 115*16467b97STreehugger Robot{ 116*16467b97STreehugger Robot // Free the input stream buffer if we allocated it 117*16467b97STreehugger Robot // 118*16467b97STreehugger Robot if (m_isAllocated && (m_data != NULL)) 119*16467b97STreehugger Robot AllocPolicyType::free((void*)m_data); //const_cast is required 120*16467b97STreehugger Robot} 121*16467b97STreehugger Robot 122*16467b97STreehugger Robottemplate<class ImplTraits> 123*16467b97STreehugger RobotANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_data() const 124*16467b97STreehugger Robot{ 125*16467b97STreehugger Robot return m_data; 126*16467b97STreehugger Robot} 127*16467b97STreehugger Robottemplate<class ImplTraits> 128*16467b97STreehugger RobotANTLR_INLINE bool InputStream<ImplTraits>::get_isAllocated() const 129*16467b97STreehugger Robot{ 130*16467b97STreehugger Robot return m_isAllocated; 131*16467b97STreehugger Robot} 132*16467b97STreehugger Robottemplate<class ImplTraits> 133*16467b97STreehugger RobotANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_nextChar() const 134*16467b97STreehugger Robot{ 135*16467b97STreehugger Robot return m_nextChar; 136*16467b97STreehugger Robot} 137*16467b97STreehugger Robottemplate<class ImplTraits> 138*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_sizeBuf() const 139*16467b97STreehugger Robot{ 140*16467b97STreehugger Robot return m_sizeBuf; 141*16467b97STreehugger Robot} 142*16467b97STreehugger Robottemplate<class ImplTraits> 143*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_line() const 144*16467b97STreehugger Robot{ 145*16467b97STreehugger Robot return m_line; 146*16467b97STreehugger Robot} 147*16467b97STreehugger Robottemplate<class ImplTraits> 148*16467b97STreehugger RobotANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_currentLine() const 149*16467b97STreehugger Robot{ 150*16467b97STreehugger Robot return m_currentLine; 151*16467b97STreehugger Robot} 152*16467b97STreehugger Robottemplate<class ImplTraits> 153*16467b97STreehugger RobotANTLR_INLINE ANTLR_INT32 InputStream<ImplTraits>::get_charPositionInLine() const 154*16467b97STreehugger Robot{ 155*16467b97STreehugger Robot return m_charPositionInLine; 156*16467b97STreehugger Robot} 157*16467b97STreehugger Robottemplate<class ImplTraits> 158*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_markDepth() const 159*16467b97STreehugger Robot{ 160*16467b97STreehugger Robot return m_markDepth; 161*16467b97STreehugger Robot} 162*16467b97STreehugger Robottemplate<class ImplTraits> 163*16467b97STreehugger RobotANTLR_INLINE typename InputStream<ImplTraits>::MarkersType& InputStream<ImplTraits>::get_markers() 164*16467b97STreehugger Robot{ 165*16467b97STreehugger Robot return m_markers; 166*16467b97STreehugger Robot} 167*16467b97STreehugger Robottemplate<class ImplTraits> 168*16467b97STreehugger RobotANTLR_INLINE const typename InputStream<ImplTraits>::StringType& InputStream<ImplTraits>::get_fileName() const 169*16467b97STreehugger Robot{ 170*16467b97STreehugger Robot return m_fileName; 171*16467b97STreehugger Robot} 172*16467b97STreehugger Robottemplate<class ImplTraits> 173*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_fileNo() const 174*16467b97STreehugger Robot{ 175*16467b97STreehugger Robot return m_fileNo; 176*16467b97STreehugger Robot} 177*16467b97STreehugger Robottemplate<class ImplTraits> 178*16467b97STreehugger RobotANTLR_INLINE ANTLR_UCHAR InputStream<ImplTraits>::get_newlineChar() const 179*16467b97STreehugger Robot{ 180*16467b97STreehugger Robot return m_newlineChar; 181*16467b97STreehugger Robot} 182*16467b97STreehugger Robottemplate<class ImplTraits> 183*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT8 InputStream<ImplTraits>::get_charByteSize() const 184*16467b97STreehugger Robot{ 185*16467b97STreehugger Robot return m_charByteSize; 186*16467b97STreehugger Robot} 187*16467b97STreehugger Robottemplate<class ImplTraits> 188*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_encoding() const 189*16467b97STreehugger Robot{ 190*16467b97STreehugger Robot return m_encoding; 191*16467b97STreehugger Robot} 192*16467b97STreehugger Robottemplate<class ImplTraits> 193*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_data( DataType* data ) 194*16467b97STreehugger Robot{ 195*16467b97STreehugger Robot m_data = data; 196*16467b97STreehugger Robot} 197*16467b97STreehugger Robottemplate<class ImplTraits> 198*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_isAllocated( bool isAllocated ) 199*16467b97STreehugger Robot{ 200*16467b97STreehugger Robot m_isAllocated = isAllocated; 201*16467b97STreehugger Robot} 202*16467b97STreehugger Robottemplate<class ImplTraits> 203*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_nextChar( const DataType* nextChar ) 204*16467b97STreehugger Robot{ 205*16467b97STreehugger Robot m_nextChar = nextChar; 206*16467b97STreehugger Robot} 207*16467b97STreehugger Robottemplate<class ImplTraits> 208*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_sizeBuf( ANTLR_UINT32 sizeBuf ) 209*16467b97STreehugger Robot{ 210*16467b97STreehugger Robot m_sizeBuf = sizeBuf; 211*16467b97STreehugger Robot} 212*16467b97STreehugger Robottemplate<class ImplTraits> 213*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_line( ANTLR_UINT32 line ) 214*16467b97STreehugger Robot{ 215*16467b97STreehugger Robot m_line = line; 216*16467b97STreehugger Robot} 217*16467b97STreehugger Robottemplate<class ImplTraits> 218*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_currentLine( const DataType* currentLine ) 219*16467b97STreehugger Robot{ 220*16467b97STreehugger Robot m_currentLine = currentLine; 221*16467b97STreehugger Robot} 222*16467b97STreehugger Robottemplate<class ImplTraits> 223*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) 224*16467b97STreehugger Robot{ 225*16467b97STreehugger Robot m_charPositionInLine = charPositionInLine; 226*16467b97STreehugger Robot} 227*16467b97STreehugger Robottemplate<class ImplTraits> 228*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_markDepth( ANTLR_UINT32 markDepth ) 229*16467b97STreehugger Robot{ 230*16467b97STreehugger Robot m_markDepth = markDepth; 231*16467b97STreehugger Robot} 232*16467b97STreehugger Robottemplate<class ImplTraits> 233*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_markers( const MarkersType& markers ) 234*16467b97STreehugger Robot{ 235*16467b97STreehugger Robot m_markers = markers; 236*16467b97STreehugger Robot} 237*16467b97STreehugger Robottemplate<class ImplTraits> 238*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_fileName( const StringType& fileName ) 239*16467b97STreehugger Robot{ 240*16467b97STreehugger Robot m_fileName = fileName; 241*16467b97STreehugger Robot} 242*16467b97STreehugger Robottemplate<class ImplTraits> 243*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_fileNo( ANTLR_UINT32 fileNo ) 244*16467b97STreehugger Robot{ 245*16467b97STreehugger Robot m_fileNo = fileNo; 246*16467b97STreehugger Robot} 247*16467b97STreehugger Robottemplate<class ImplTraits> 248*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_newlineChar( ANTLR_UCHAR newlineChar ) 249*16467b97STreehugger Robot{ 250*16467b97STreehugger Robot m_newlineChar = newlineChar; 251*16467b97STreehugger Robot} 252*16467b97STreehugger Robottemplate<class ImplTraits> 253*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_charByteSize( ANTLR_UINT8 charByteSize ) 254*16467b97STreehugger Robot{ 255*16467b97STreehugger Robot m_charByteSize = charByteSize; 256*16467b97STreehugger Robot} 257*16467b97STreehugger Robottemplate<class ImplTraits> 258*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_encoding( ANTLR_UINT32 encoding ) 259*16467b97STreehugger Robot{ 260*16467b97STreehugger Robot m_encoding = encoding; 261*16467b97STreehugger Robot} 262*16467b97STreehugger Robot 263*16467b97STreehugger Robottemplate<class ImplTraits> 264*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::inc_charPositionInLine() 265*16467b97STreehugger Robot{ 266*16467b97STreehugger Robot ++m_charPositionInLine; 267*16467b97STreehugger Robot} 268*16467b97STreehugger Robot 269*16467b97STreehugger Robottemplate<class ImplTraits> 270*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::inc_line() 271*16467b97STreehugger Robot{ 272*16467b97STreehugger Robot ++m_line; 273*16467b97STreehugger Robot} 274*16467b97STreehugger Robot 275*16467b97STreehugger Robottemplate<class ImplTraits> 276*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::inc_markDepth() 277*16467b97STreehugger Robot{ 278*16467b97STreehugger Robot ++m_markDepth; 279*16467b97STreehugger Robot} 280*16467b97STreehugger Robot 281*16467b97STreehugger Robottemplate<class ImplTraits> 282*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::reset() 283*16467b97STreehugger Robot{ 284*16467b97STreehugger Robot m_nextChar = m_data; /* Input at first character */ 285*16467b97STreehugger Robot m_line = 1; /* starts at line 1 */ 286*16467b97STreehugger Robot m_charPositionInLine = 0; 287*16467b97STreehugger Robot m_currentLine = m_data; 288*16467b97STreehugger Robot m_markDepth = 0; /* Reset markers */ 289*16467b97STreehugger Robot 290*16467b97STreehugger Robot /* Clear out up the markers table if it is there 291*16467b97STreehugger Robot */ 292*16467b97STreehugger Robot m_markers.clear(); 293*16467b97STreehugger Robot} 294*16467b97STreehugger Robot 295*16467b97STreehugger Robottemplate<class ImplTraits> 296*16467b97STreehugger Robotvoid InputStream<ImplTraits>::reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name) 297*16467b97STreehugger Robot{ 298*16467b97STreehugger Robot m_isAllocated = false; 299*16467b97STreehugger Robot m_data = inString; 300*16467b97STreehugger Robot m_sizeBuf = size; 301*16467b97STreehugger Robot 302*16467b97STreehugger Robot // Now we can set up the file name. As we are reusing the stream, there may already 303*16467b97STreehugger Robot // be a string that we can reuse for holding the filename. 304*16467b97STreehugger Robot // 305*16467b97STreehugger Robot if ( BaseType::m_streamName.empty() ) 306*16467b97STreehugger Robot { 307*16467b97STreehugger Robot BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name); 308*16467b97STreehugger Robot m_fileName = BaseType::m_streamName; 309*16467b97STreehugger Robot } 310*16467b97STreehugger Robot else 311*16467b97STreehugger Robot { 312*16467b97STreehugger Robot BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name); 313*16467b97STreehugger Robot } 314*16467b97STreehugger Robot 315*16467b97STreehugger Robot this->reset(); 316*16467b97STreehugger Robot} 317*16467b97STreehugger Robot 318*16467b97STreehugger Robot/* 319*16467b97STreehugger Robottemplate<class ImplTraits> 320*16467b97STreehugger Robottypename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::_LT(ANTLR_INT32 lt) 321*16467b97STreehugger Robot{ 322*16467b97STreehugger Robot return this->_LA(lt); 323*16467b97STreehugger Robot} 324*16467b97STreehugger Robot*/ 325*16467b97STreehugger Robot 326*16467b97STreehugger Robottemplate<class ImplTraits> 327*16467b97STreehugger RobotANTLR_UINT32 InputStream<ImplTraits>::size() 328*16467b97STreehugger Robot{ 329*16467b97STreehugger Robot return m_sizeBuf; 330*16467b97STreehugger Robot} 331*16467b97STreehugger Robot 332*16467b97STreehugger Robottemplate<class ImplTraits> 333*16467b97STreehugger RobotANTLR_MARKER InputStream<ImplTraits>::index_impl() 334*16467b97STreehugger Robot{ 335*16467b97STreehugger Robot return (ANTLR_MARKER)m_nextChar; 336*16467b97STreehugger Robot} 337*16467b97STreehugger Robot 338*16467b97STreehugger Robot 339*16467b97STreehugger Robottemplate<class ImplTraits> 340*16467b97STreehugger Robottypename InputStream<ImplTraits>::StringType InputStream<ImplTraits>::substr(ANTLR_MARKER start, ANTLR_MARKER stop) 341*16467b97STreehugger Robot{ 342*16467b97STreehugger Robot std::size_t len = static_cast<std::size_t>( (stop-start)/sizeof(DataType) + 1 ); 343*16467b97STreehugger Robot StringType str( (const char*)start, len ); 344*16467b97STreehugger Robot return str; 345*16467b97STreehugger Robot} 346*16467b97STreehugger Robot 347*16467b97STreehugger Robottemplate<class ImplTraits> 348*16467b97STreehugger RobotANTLR_UINT32 InputStream<ImplTraits>::get_line() 349*16467b97STreehugger Robot{ 350*16467b97STreehugger Robot return m_line; 351*16467b97STreehugger Robot} 352*16467b97STreehugger Robot 353*16467b97STreehugger Robottemplate<class ImplTraits> 354*16467b97STreehugger Robotconst typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::getLineBuf() 355*16467b97STreehugger Robot{ 356*16467b97STreehugger Robot return m_currentLine; 357*16467b97STreehugger Robot} 358*16467b97STreehugger Robot 359*16467b97STreehugger Robottemplate<class ImplTraits> 360*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_charPositionInLine() 361*16467b97STreehugger Robot{ 362*16467b97STreehugger Robot return m_charPositionInLine; 363*16467b97STreehugger Robot} 364*16467b97STreehugger Robot 365*16467b97STreehugger Robottemplate<class ImplTraits> 366*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine(ANTLR_UINT32 position) 367*16467b97STreehugger Robot{ 368*16467b97STreehugger Robot m_charPositionInLine = position; 369*16467b97STreehugger Robot} 370*16467b97STreehugger Robot 371*16467b97STreehugger Robottemplate<class ImplTraits> 372*16467b97STreehugger Robotvoid InputStream<ImplTraits>::set_newLineChar(ANTLR_UINT32 newlineChar) 373*16467b97STreehugger Robot{ 374*16467b97STreehugger Robot m_newlineChar = newlineChar; 375*16467b97STreehugger Robot} 376*16467b97STreehugger Robot 377*16467b97STreehugger Robottemplate<class ImplTraits> 378*16467b97STreehugger RobotANTLR_INLINE LexState<ImplTraits>::LexState() 379*16467b97STreehugger Robot{ 380*16467b97STreehugger Robot m_nextChar = NULL; 381*16467b97STreehugger Robot m_line = 0; 382*16467b97STreehugger Robot m_currentLine = NULL; 383*16467b97STreehugger Robot m_charPositionInLine = 0; 384*16467b97STreehugger Robot} 385*16467b97STreehugger Robot 386*16467b97STreehugger Robottemplate<class ImplTraits> 387*16467b97STreehugger RobotANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_nextChar() const 388*16467b97STreehugger Robot{ 389*16467b97STreehugger Robot return m_nextChar; 390*16467b97STreehugger Robot} 391*16467b97STreehugger Robot 392*16467b97STreehugger Robottemplate<class ImplTraits> 393*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 LexState<ImplTraits>::get_line() const 394*16467b97STreehugger Robot{ 395*16467b97STreehugger Robot return m_line; 396*16467b97STreehugger Robot} 397*16467b97STreehugger Robot 398*16467b97STreehugger Robottemplate<class ImplTraits> 399*16467b97STreehugger RobotANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_currentLine() const 400*16467b97STreehugger Robot{ 401*16467b97STreehugger Robot return m_currentLine; 402*16467b97STreehugger Robot} 403*16467b97STreehugger Robot 404*16467b97STreehugger Robottemplate<class ImplTraits> 405*16467b97STreehugger RobotANTLR_INLINE ANTLR_INT32 LexState<ImplTraits>::get_charPositionInLine() const 406*16467b97STreehugger Robot{ 407*16467b97STreehugger Robot return m_charPositionInLine; 408*16467b97STreehugger Robot} 409*16467b97STreehugger Robot 410*16467b97STreehugger Robottemplate<class ImplTraits> 411*16467b97STreehugger RobotANTLR_INLINE void LexState<ImplTraits>::set_nextChar( const DataType* nextChar ) 412*16467b97STreehugger Robot{ 413*16467b97STreehugger Robot m_nextChar = nextChar; 414*16467b97STreehugger Robot} 415*16467b97STreehugger Robot 416*16467b97STreehugger Robottemplate<class ImplTraits> 417*16467b97STreehugger RobotANTLR_INLINE void LexState<ImplTraits>::set_line( ANTLR_UINT32 line ) 418*16467b97STreehugger Robot{ 419*16467b97STreehugger Robot m_line = line; 420*16467b97STreehugger Robot} 421*16467b97STreehugger Robot 422*16467b97STreehugger Robottemplate<class ImplTraits> 423*16467b97STreehugger RobotANTLR_INLINE void LexState<ImplTraits>::set_currentLine( const DataType* currentLine ) 424*16467b97STreehugger Robot{ 425*16467b97STreehugger Robot m_currentLine = currentLine; 426*16467b97STreehugger Robot} 427*16467b97STreehugger Robot 428*16467b97STreehugger Robottemplate<class ImplTraits> 429*16467b97STreehugger RobotANTLR_INLINE void LexState<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) 430*16467b97STreehugger Robot{ 431*16467b97STreehugger Robot m_charPositionInLine = charPositionInLine; 432*16467b97STreehugger Robot} 433*16467b97STreehugger Robot 434*16467b97STreehugger Robottemplate<class ImplTraits> 435*16467b97STreehugger RobotANTLR_INLINE typename InputStream<ImplTraits>::IntStreamType* InputStream<ImplTraits>::get_istream() 436*16467b97STreehugger Robot{ 437*16467b97STreehugger Robot return this; 438*16467b97STreehugger Robot} 439*16467b97STreehugger Robot 440*16467b97STreehugger Robottemplate<class ImplTraits> 441*16467b97STreehugger Robotvoid InputStream<ImplTraits>::setupInputStream() 442*16467b97STreehugger Robot{ 443*16467b97STreehugger Robot bool isBigEndian; 444*16467b97STreehugger Robot 445*16467b97STreehugger Robot // Used to determine the endianness of the machine we are currently 446*16467b97STreehugger Robot // running on. 447*16467b97STreehugger Robot // 448*16467b97STreehugger Robot ANTLR_UINT16 bomTest = 0xFEFF; 449*16467b97STreehugger Robot 450*16467b97STreehugger Robot // What endianess is the machine we are running on? If the incoming 451*16467b97STreehugger Robot // encoding endianess is the same as this machine's natural byte order 452*16467b97STreehugger Robot // then we can use more efficient API calls. 453*16467b97STreehugger Robot // 454*16467b97STreehugger Robot if (*((ANTLR_UINT8*)(&bomTest)) == 0xFE) 455*16467b97STreehugger Robot { 456*16467b97STreehugger Robot isBigEndian = true; 457*16467b97STreehugger Robot } 458*16467b97STreehugger Robot else 459*16467b97STreehugger Robot { 460*16467b97STreehugger Robot isBigEndian = false; 461*16467b97STreehugger Robot } 462*16467b97STreehugger Robot 463*16467b97STreehugger Robot // What encoding did the user tell us {s}he thought it was? I am going 464*16467b97STreehugger Robot // to get sick of the questions on antlr-interest, I know I am. 465*16467b97STreehugger Robot // 466*16467b97STreehugger Robot switch (m_encoding) 467*16467b97STreehugger Robot { 468*16467b97STreehugger Robot case ANTLR_ENC_UTF8: 469*16467b97STreehugger Robot 470*16467b97STreehugger Robot // See if there is a BOM at the start of this UTF-8 sequence 471*16467b97STreehugger Robot // and just eat it if there is. Windows .TXT files have this for instance 472*16467b97STreehugger Robot // as it identifies UTF-8 even though it is of no consequence for byte order 473*16467b97STreehugger Robot // as UTF-8 does not have a byte order. 474*16467b97STreehugger Robot // 475*16467b97STreehugger Robot if ( (*(m_nextChar)) == 0xEF 476*16467b97STreehugger Robot && (*(m_nextChar+1)) == 0xBB 477*16467b97STreehugger Robot && (*(m_nextChar+2)) == 0xBF 478*16467b97STreehugger Robot ) 479*16467b97STreehugger Robot { 480*16467b97STreehugger Robot // The UTF8 BOM is present so skip it 481*16467b97STreehugger Robot // 482*16467b97STreehugger Robot m_nextChar += 3; 483*16467b97STreehugger Robot } 484*16467b97STreehugger Robot 485*16467b97STreehugger Robot // Install the UTF8 input routines 486*16467b97STreehugger Robot // 487*16467b97STreehugger Robot this->setupIntStream( isBigEndian, isBigEndian ); 488*16467b97STreehugger Robot this->set_charByteSize(0); 489*16467b97STreehugger Robot break; 490*16467b97STreehugger Robot 491*16467b97STreehugger Robot case ANTLR_ENC_UTF16: 492*16467b97STreehugger Robot 493*16467b97STreehugger Robot // See if there is a BOM at the start of the input. If not then 494*16467b97STreehugger Robot // we assume that the byte order is the natural order of this 495*16467b97STreehugger Robot // machine (or it is really UCS2). If there is a BOM we determine if the encoding 496*16467b97STreehugger Robot // is the same as the natural order of this machine. 497*16467b97STreehugger Robot // 498*16467b97STreehugger Robot if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFE 499*16467b97STreehugger Robot && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFF 500*16467b97STreehugger Robot ) 501*16467b97STreehugger Robot { 502*16467b97STreehugger Robot // BOM Present, indicates Big Endian 503*16467b97STreehugger Robot // 504*16467b97STreehugger Robot m_nextChar += 1; 505*16467b97STreehugger Robot 506*16467b97STreehugger Robot this->setupIntStream( isBigEndian, true ); 507*16467b97STreehugger Robot } 508*16467b97STreehugger Robot else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF 509*16467b97STreehugger Robot && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE 510*16467b97STreehugger Robot ) 511*16467b97STreehugger Robot { 512*16467b97STreehugger Robot // BOM present, indicates Little Endian 513*16467b97STreehugger Robot // 514*16467b97STreehugger Robot m_nextChar += 1; 515*16467b97STreehugger Robot 516*16467b97STreehugger Robot this->setupIntStream( isBigEndian, false ); 517*16467b97STreehugger Robot } 518*16467b97STreehugger Robot else 519*16467b97STreehugger Robot { 520*16467b97STreehugger Robot // No BOM present, assume local computer byte order 521*16467b97STreehugger Robot // 522*16467b97STreehugger Robot this->setupIntStream(isBigEndian, isBigEndian); 523*16467b97STreehugger Robot } 524*16467b97STreehugger Robot this->set_charByteSize(2); 525*16467b97STreehugger Robot break; 526*16467b97STreehugger Robot 527*16467b97STreehugger Robot case ANTLR_ENC_UTF32: 528*16467b97STreehugger Robot 529*16467b97STreehugger Robot // See if there is a BOM at the start of the input. If not then 530*16467b97STreehugger Robot // we assume that the byte order is the natural order of this 531*16467b97STreehugger Robot // machine. If there is we determine if the encoding 532*16467b97STreehugger Robot // is the same as the natural order of this machine. 533*16467b97STreehugger Robot // 534*16467b97STreehugger Robot if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0x00 535*16467b97STreehugger Robot && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 536*16467b97STreehugger Robot && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+2)) == 0xFE 537*16467b97STreehugger Robot && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+3)) == 0xFF 538*16467b97STreehugger Robot ) 539*16467b97STreehugger Robot { 540*16467b97STreehugger Robot // BOM Present, indicates Big Endian 541*16467b97STreehugger Robot // 542*16467b97STreehugger Robot m_nextChar += 1; 543*16467b97STreehugger Robot 544*16467b97STreehugger Robot this->setupIntStream(isBigEndian, true); 545*16467b97STreehugger Robot } 546*16467b97STreehugger Robot else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF 547*16467b97STreehugger Robot && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE 548*16467b97STreehugger Robot && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 549*16467b97STreehugger Robot && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 550*16467b97STreehugger Robot ) 551*16467b97STreehugger Robot { 552*16467b97STreehugger Robot // BOM present, indicates Little Endian 553*16467b97STreehugger Robot // 554*16467b97STreehugger Robot m_nextChar += 1; 555*16467b97STreehugger Robot 556*16467b97STreehugger Robot this->setupIntStream( isBigEndian, false ); 557*16467b97STreehugger Robot } 558*16467b97STreehugger Robot else 559*16467b97STreehugger Robot { 560*16467b97STreehugger Robot // No BOM present, assume local computer byte order 561*16467b97STreehugger Robot // 562*16467b97STreehugger Robot this->setupIntStream( isBigEndian, isBigEndian ); 563*16467b97STreehugger Robot } 564*16467b97STreehugger Robot this->set_charByteSize(4); 565*16467b97STreehugger Robot break; 566*16467b97STreehugger Robot 567*16467b97STreehugger Robot case ANTLR_ENC_UTF16BE: 568*16467b97STreehugger Robot 569*16467b97STreehugger Robot // Encoding is definately Big Endian with no BOM 570*16467b97STreehugger Robot // 571*16467b97STreehugger Robot this->setupIntStream( isBigEndian, true ); 572*16467b97STreehugger Robot this->set_charByteSize(2); 573*16467b97STreehugger Robot break; 574*16467b97STreehugger Robot 575*16467b97STreehugger Robot case ANTLR_ENC_UTF16LE: 576*16467b97STreehugger Robot 577*16467b97STreehugger Robot // Encoding is definately Little Endian with no BOM 578*16467b97STreehugger Robot // 579*16467b97STreehugger Robot this->setupIntStream( isBigEndian, false ); 580*16467b97STreehugger Robot this->set_charByteSize(2); 581*16467b97STreehugger Robot break; 582*16467b97STreehugger Robot 583*16467b97STreehugger Robot case ANTLR_ENC_UTF32BE: 584*16467b97STreehugger Robot 585*16467b97STreehugger Robot // Encoding is definately Big Endian with no BOM 586*16467b97STreehugger Robot // 587*16467b97STreehugger Robot this->setupIntStream( isBigEndian, true ); 588*16467b97STreehugger Robot this->set_charByteSize(4); 589*16467b97STreehugger Robot break; 590*16467b97STreehugger Robot 591*16467b97STreehugger Robot case ANTLR_ENC_UTF32LE: 592*16467b97STreehugger Robot 593*16467b97STreehugger Robot // Encoding is definately Little Endian with no BOM 594*16467b97STreehugger Robot // 595*16467b97STreehugger Robot this->setupIntStream( isBigEndian, false ); 596*16467b97STreehugger Robot this->set_charByteSize(4); 597*16467b97STreehugger Robot break; 598*16467b97STreehugger Robot 599*16467b97STreehugger Robot case ANTLR_ENC_EBCDIC: 600*16467b97STreehugger Robot 601*16467b97STreehugger Robot // EBCDIC is basically the same as ASCII but with an on the 602*16467b97STreehugger Robot // fly translation to ASCII 603*16467b97STreehugger Robot // 604*16467b97STreehugger Robot this->setupIntStream( isBigEndian, isBigEndian ); 605*16467b97STreehugger Robot this->set_charByteSize(1); 606*16467b97STreehugger Robot break; 607*16467b97STreehugger Robot 608*16467b97STreehugger Robot case ANTLR_ENC_8BIT: 609*16467b97STreehugger Robot default: 610*16467b97STreehugger Robot 611*16467b97STreehugger Robot // Standard 8bit/ASCII 612*16467b97STreehugger Robot // 613*16467b97STreehugger Robot this->setupIntStream( isBigEndian, isBigEndian ); 614*16467b97STreehugger Robot this->set_charByteSize(1); 615*16467b97STreehugger Robot break; 616*16467b97STreehugger Robot } 617*16467b97STreehugger Robot} 618*16467b97STreehugger Robot 619*16467b97STreehugger RobotANTLR_END_NAMESPACE() 620