1*16467b97STreehugger RobotANTLR_BEGIN_NAMESPACE() 2*16467b97STreehugger Robot 3*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 4*16467b97STreehugger RobotANTLR_INLINE IntStream<ImplTraits, SuperType>::IntStream() 5*16467b97STreehugger Robot{ 6*16467b97STreehugger Robot m_lastMarker = 0; 7*16467b97STreehugger Robot m_upper_case = false; 8*16467b97STreehugger Robot} 9*16467b97STreehugger Robot 10*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 11*16467b97STreehugger RobotANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType IntStream<ImplTraits, SuperType>::getSourceName() 12*16467b97STreehugger Robot{ 13*16467b97STreehugger Robot return m_streamName; 14*16467b97STreehugger Robot} 15*16467b97STreehugger Robot 16*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 17*16467b97STreehugger RobotANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType& IntStream<ImplTraits, SuperType>::get_streamName() 18*16467b97STreehugger Robot{ 19*16467b97STreehugger Robot return m_streamName; 20*16467b97STreehugger Robot} 21*16467b97STreehugger Robot 22*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 23*16467b97STreehugger RobotANTLR_INLINE const typename IntStream<ImplTraits, SuperType>::StringType& IntStream<ImplTraits, SuperType>::get_streamName() const 24*16467b97STreehugger Robot{ 25*16467b97STreehugger Robot return m_streamName; 26*16467b97STreehugger Robot} 27*16467b97STreehugger Robot 28*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 29*16467b97STreehugger RobotANTLR_INLINE ANTLR_MARKER IntStream<ImplTraits, SuperType>::get_lastMarker() const 30*16467b97STreehugger Robot{ 31*16467b97STreehugger Robot return m_lastMarker; 32*16467b97STreehugger Robot} 33*16467b97STreehugger Robot 34*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 35*16467b97STreehugger RobotANTLR_INLINE void IntStream<ImplTraits, SuperType>::setUcaseLA(bool flag) 36*16467b97STreehugger Robot{ 37*16467b97STreehugger Robot m_upper_case = flag; 38*16467b97STreehugger Robot} 39*16467b97STreehugger Robot 40*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 41*16467b97STreehugger RobotANTLR_INLINE SuperType* IntStream<ImplTraits, SuperType>::get_super() 42*16467b97STreehugger Robot{ 43*16467b97STreehugger Robot return static_cast<SuperType*>(this); 44*16467b97STreehugger Robot} 45*16467b97STreehugger Robot 46*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 47*16467b97STreehugger Robotvoid IntStream<ImplTraits, SuperType>::consume() 48*16467b97STreehugger Robot{ 49*16467b97STreehugger Robot SuperType* input = this->get_super(); 50*16467b97STreehugger Robot 51*16467b97STreehugger Robot const ANTLR_UINT8* nextChar = input->get_nextChar(); 52*16467b97STreehugger Robot const ANTLR_UINT8* data = input->get_data(); 53*16467b97STreehugger Robot ANTLR_UINT32 sizeBuf = input->get_sizeBuf(); 54*16467b97STreehugger Robot 55*16467b97STreehugger Robot if ( nextChar < ( data + sizeBuf ) ) 56*16467b97STreehugger Robot { 57*16467b97STreehugger Robot /* Indicate one more character in this line 58*16467b97STreehugger Robot */ 59*16467b97STreehugger Robot input->inc_charPositionInLine(); 60*16467b97STreehugger Robot 61*16467b97STreehugger Robot if ((ANTLR_UCHAR)(*(nextChar)) == input->get_newlineChar() ) 62*16467b97STreehugger Robot { 63*16467b97STreehugger Robot /* Reset for start of a new line of input 64*16467b97STreehugger Robot */ 65*16467b97STreehugger Robot input->inc_line(); 66*16467b97STreehugger Robot input->set_charPositionInLine(0); 67*16467b97STreehugger Robot input->set_currentLine(nextChar + 1); 68*16467b97STreehugger Robot } 69*16467b97STreehugger Robot 70*16467b97STreehugger Robot /* Increment to next character position 71*16467b97STreehugger Robot */ 72*16467b97STreehugger Robot input->set_nextChar( nextChar + 1 ); 73*16467b97STreehugger Robot } 74*16467b97STreehugger Robot} 75*16467b97STreehugger Robot 76*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 77*16467b97STreehugger RobotANTLR_UINT32 IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la ) 78*16467b97STreehugger Robot{ 79*16467b97STreehugger Robot SuperType* input = this->get_super(); 80*16467b97STreehugger Robot const ANTLR_UINT8* nextChar = input->get_nextChar(); 81*16467b97STreehugger Robot const ANTLR_UINT8* data = input->get_data(); 82*16467b97STreehugger Robot ANTLR_UINT32 sizeBuf = input->get_sizeBuf(); 83*16467b97STreehugger Robot 84*16467b97STreehugger Robot if (( nextChar + la - 1) >= (data + sizeBuf)) 85*16467b97STreehugger Robot { 86*16467b97STreehugger Robot return ANTLR_CHARSTREAM_EOF; 87*16467b97STreehugger Robot } 88*16467b97STreehugger Robot else 89*16467b97STreehugger Robot { 90*16467b97STreehugger Robot if( !m_upper_case ) 91*16467b97STreehugger Robot return (ANTLR_UCHAR)(*(nextChar + la - 1)); 92*16467b97STreehugger Robot else 93*16467b97STreehugger Robot return (ANTLR_UCHAR)toupper(*(nextChar + la - 1)); 94*16467b97STreehugger Robot } 95*16467b97STreehugger Robot} 96*16467b97STreehugger Robot 97*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 98*16467b97STreehugger RobotANTLR_MARKER IntStream<ImplTraits, SuperType>::mark() 99*16467b97STreehugger Robot{ 100*16467b97STreehugger Robot LexState<ImplTraits>* state; 101*16467b97STreehugger Robot SuperType* input = this->get_super(); 102*16467b97STreehugger Robot 103*16467b97STreehugger Robot /* New mark point 104*16467b97STreehugger Robot */ 105*16467b97STreehugger Robot input->inc_markDepth(); 106*16467b97STreehugger Robot 107*16467b97STreehugger Robot /* See if we are revisiting a mark as we can just reuse the vector 108*16467b97STreehugger Robot * entry if we are, otherwise, we need a new one 109*16467b97STreehugger Robot */ 110*16467b97STreehugger Robot if (input->get_markDepth() > input->get_markers().size() ) 111*16467b97STreehugger Robot { 112*16467b97STreehugger Robot input->get_markers().push_back( LexState<ImplTraits>() ); 113*16467b97STreehugger Robot LexState<ImplTraits>& state_r = input->get_markers().back(); 114*16467b97STreehugger Robot state = &state_r; 115*16467b97STreehugger Robot } 116*16467b97STreehugger Robot else 117*16467b97STreehugger Robot { 118*16467b97STreehugger Robot LexState<ImplTraits>& state_r = input->get_markers().at( input->get_markDepth() - 1 ); 119*16467b97STreehugger Robot state = &state_r; 120*16467b97STreehugger Robot 121*16467b97STreehugger Robot /* Assume no errors for speed, it will just blow up if the table failed 122*16467b97STreehugger Robot * for some reasons, hence lots of unit tests on the tables ;-) 123*16467b97STreehugger Robot */ 124*16467b97STreehugger Robot } 125*16467b97STreehugger Robot 126*16467b97STreehugger Robot /* We have created or retrieved the state, so update it with the current 127*16467b97STreehugger Robot * elements of the lexer state. 128*16467b97STreehugger Robot */ 129*16467b97STreehugger Robot state->set_charPositionInLine( input->get_charPositionInLine() ); 130*16467b97STreehugger Robot state->set_currentLine( input->get_currentLine() ); 131*16467b97STreehugger Robot state->set_line( input->get_line() ); 132*16467b97STreehugger Robot state->set_nextChar( input->get_nextChar() ); 133*16467b97STreehugger Robot 134*16467b97STreehugger Robot m_lastMarker = input->get_markDepth(); 135*16467b97STreehugger Robot 136*16467b97STreehugger Robot /* And that's it 137*16467b97STreehugger Robot */ 138*16467b97STreehugger Robot return input->get_markDepth(); 139*16467b97STreehugger Robot} 140*16467b97STreehugger Robot 141*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 142*16467b97STreehugger RobotANTLR_MARKER IntStream<ImplTraits, SuperType>::index() 143*16467b97STreehugger Robot{ 144*16467b97STreehugger Robot SuperType* input = this->get_super(); 145*16467b97STreehugger Robot return input->index_impl(); 146*16467b97STreehugger Robot} 147*16467b97STreehugger Robot 148*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 149*16467b97STreehugger Robotvoid IntStream<ImplTraits, SuperType>::rewind(ANTLR_MARKER mark) 150*16467b97STreehugger Robot{ 151*16467b97STreehugger Robot SuperType* input = this->get_super(); 152*16467b97STreehugger Robot 153*16467b97STreehugger Robot /* Perform any clean up of the marks 154*16467b97STreehugger Robot */ 155*16467b97STreehugger Robot this->release(mark); 156*16467b97STreehugger Robot 157*16467b97STreehugger Robot /* Find the supplied mark state 158*16467b97STreehugger Robot */ 159*16467b97STreehugger Robot ANTLR_UINT32 idx = static_cast<ANTLR_UINT32>( mark-1 ); 160*16467b97STreehugger Robot typename ImplTraits::LexStateType& state = input->get_markers().at( idx ); 161*16467b97STreehugger Robot 162*16467b97STreehugger Robot /* Seek input pointer to the requested point (note we supply the void *pointer 163*16467b97STreehugger Robot * to whatever is implementing the int stream to seek). 164*16467b97STreehugger Robot */ 165*16467b97STreehugger Robot this->seek( (ANTLR_MARKER)state.get_nextChar() ); 166*16467b97STreehugger Robot 167*16467b97STreehugger Robot /* Reset to the reset of the information in the mark 168*16467b97STreehugger Robot */ 169*16467b97STreehugger Robot input->set_charPositionInLine( state.get_charPositionInLine() ); 170*16467b97STreehugger Robot input->set_currentLine( state.get_currentLine() ); 171*16467b97STreehugger Robot input->set_line( state.get_line() ); 172*16467b97STreehugger Robot input->set_nextChar( state.get_nextChar() ); 173*16467b97STreehugger Robot 174*16467b97STreehugger Robot /* And we are done 175*16467b97STreehugger Robot */ 176*16467b97STreehugger Robot} 177*16467b97STreehugger Robot 178*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 179*16467b97STreehugger Robotvoid IntStream<ImplTraits, SuperType>::rewindLast() 180*16467b97STreehugger Robot{ 181*16467b97STreehugger Robot this->rewind(m_lastMarker); 182*16467b97STreehugger Robot} 183*16467b97STreehugger Robot 184*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 185*16467b97STreehugger Robotvoid IntStream<ImplTraits, SuperType>::release(ANTLR_MARKER mark) 186*16467b97STreehugger Robot{ 187*16467b97STreehugger Robot SuperType* input = this->get_super(); 188*16467b97STreehugger Robot 189*16467b97STreehugger Robot /* We don't do much here in fact as we never free any higher marks in 190*16467b97STreehugger Robot * the hashtable as we just resuse any memory allocated for them. 191*16467b97STreehugger Robot */ 192*16467b97STreehugger Robot input->set_markDepth( (ANTLR_UINT32)(mark - 1) ); 193*16467b97STreehugger Robot 194*16467b97STreehugger Robot} 195*16467b97STreehugger Robot 196*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 197*16467b97STreehugger Robotvoid IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool) 198*16467b97STreehugger Robot{ 199*16467b97STreehugger Robot} 200*16467b97STreehugger Robot 201*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 202*16467b97STreehugger Robotvoid IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) 203*16467b97STreehugger Robot{ 204*16467b97STreehugger Robot ANTLR_INT32 count; 205*16467b97STreehugger Robot SuperType* input = this->get_super(); 206*16467b97STreehugger Robot 207*16467b97STreehugger Robot ANTLR_MARKER nextChar = (ANTLR_MARKER) input->get_nextChar(); 208*16467b97STreehugger Robot /* If the requested seek point is less than the current 209*16467b97STreehugger Robot * input point, then we assume that we are resetting from a mark 210*16467b97STreehugger Robot * and do not need to scan, but can just set to there. 211*16467b97STreehugger Robot */ 212*16467b97STreehugger Robot if (seekPoint <= nextChar) 213*16467b97STreehugger Robot { 214*16467b97STreehugger Robot input->set_nextChar((ANTLR_UINT8*) seekPoint); 215*16467b97STreehugger Robot } 216*16467b97STreehugger Robot else 217*16467b97STreehugger Robot { 218*16467b97STreehugger Robot count = (ANTLR_UINT32)(seekPoint - nextChar); 219*16467b97STreehugger Robot 220*16467b97STreehugger Robot while (count--) 221*16467b97STreehugger Robot { 222*16467b97STreehugger Robot this->consume(); 223*16467b97STreehugger Robot } 224*16467b97STreehugger Robot } 225*16467b97STreehugger Robot} 226*16467b97STreehugger Robot 227*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 228*16467b97STreehugger RobotIntStream<ImplTraits, SuperType>::~IntStream() 229*16467b97STreehugger Robot{ 230*16467b97STreehugger Robot} 231*16467b97STreehugger Robot 232*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 233*16467b97STreehugger RobotANTLR_UINT32 EBCDIC_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la) 234*16467b97STreehugger Robot{ 235*16467b97STreehugger Robot // EBCDIC to ASCII conversion table 236*16467b97STreehugger Robot // 237*16467b97STreehugger Robot // This for EBCDIC EDF04 translated to ISO-8859.1 which is the usually accepted POSIX 238*16467b97STreehugger Robot // translation and the character tables are published all over the interweb. 239*16467b97STreehugger Robot // 240*16467b97STreehugger Robot const ANTLR_UCHAR e2a[256] = 241*16467b97STreehugger Robot { 242*16467b97STreehugger Robot 0x00, 0x01, 0x02, 0x03, 0x85, 0x09, 0x86, 0x7f, 243*16467b97STreehugger Robot 0x87, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 244*16467b97STreehugger Robot 0x10, 0x11, 0x12, 0x13, 0x8f, 0x0a, 0x08, 0x97, 245*16467b97STreehugger Robot 0x18, 0x19, 0x9c, 0x9d, 0x1c, 0x1d, 0x1e, 0x1f, 246*16467b97STreehugger Robot 0x80, 0x81, 0x82, 0x83, 0x84, 0x92, 0x17, 0x1b, 247*16467b97STreehugger Robot 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, 248*16467b97STreehugger Robot 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 249*16467b97STreehugger Robot 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a, 250*16467b97STreehugger Robot 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5, 251*16467b97STreehugger Robot 0xe7, 0xf1, 0x60, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 252*16467b97STreehugger Robot 0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, 253*16467b97STreehugger Robot 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x9f, 254*16467b97STreehugger Robot 0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5, 255*16467b97STreehugger Robot 0xc7, 0xd1, 0x5e, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, 256*16467b97STreehugger Robot 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, 257*16467b97STreehugger Robot 0xcc, 0xa8, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 258*16467b97STreehugger Robot 0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 259*16467b97STreehugger Robot 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, 260*16467b97STreehugger Robot 0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 261*16467b97STreehugger Robot 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4, 262*16467b97STreehugger Robot 0xb5, 0xaf, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 263*16467b97STreehugger Robot 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xde, 0xae, 264*16467b97STreehugger Robot 0xa2, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, 265*16467b97STreehugger Robot 0xbd, 0xbe, 0xac, 0x5b, 0x5c, 0x5d, 0xb4, 0xd7, 266*16467b97STreehugger Robot 0xf9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 267*16467b97STreehugger Robot 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5, 268*16467b97STreehugger Robot 0xa6, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 269*16467b97STreehugger Robot 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xdb, 0xfa, 0xff, 270*16467b97STreehugger Robot 0xd9, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 271*16467b97STreehugger Robot 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5, 272*16467b97STreehugger Robot 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 273*16467b97STreehugger Robot 0x38, 0x39, 0xb3, 0x7b, 0xdc, 0x7d, 0xda, 0x7e 274*16467b97STreehugger Robot }; 275*16467b97STreehugger Robot 276*16467b97STreehugger Robot SuperType* input = this->get_super(); 277*16467b97STreehugger Robot 278*16467b97STreehugger Robot if (( input->get_nextChar() + la - 1) >= ( input->get_data() + input->get_sizeBuf() )) 279*16467b97STreehugger Robot { 280*16467b97STreehugger Robot return ANTLR_CHARSTREAM_EOF; 281*16467b97STreehugger Robot } 282*16467b97STreehugger Robot else 283*16467b97STreehugger Robot { 284*16467b97STreehugger Robot // Translate the required character via the constant conversion table 285*16467b97STreehugger Robot // 286*16467b97STreehugger Robot return e2a[(*(input->get_nextChar() + la - 1))]; 287*16467b97STreehugger Robot } 288*16467b97STreehugger Robot} 289*16467b97STreehugger Robot 290*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 291*16467b97STreehugger Robotvoid EBCDIC_IntStream<ImplTraits, SuperType>::setupIntStream() 292*16467b97STreehugger Robot{ 293*16467b97STreehugger Robot SuperType* super = this->get_super(); 294*16467b97STreehugger Robot super->set_charByteSize(1); 295*16467b97STreehugger Robot} 296*16467b97STreehugger Robot 297*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 298*16467b97STreehugger RobotANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 i) 299*16467b97STreehugger Robot{ 300*16467b97STreehugger Robot return this->_LA(i, ClassForwarder< typename ImplTraits::Endianness >() ); 301*16467b97STreehugger Robot} 302*16467b97STreehugger Robot 303*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 304*16467b97STreehugger Robotvoid UTF16_IntStream<ImplTraits, SuperType>::consume() 305*16467b97STreehugger Robot{ 306*16467b97STreehugger Robot this->consume( ClassForwarder< typename ImplTraits::Endianness >() ); 307*16467b97STreehugger Robot} 308*16467b97STreehugger Robot 309*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 310*16467b97STreehugger RobotANTLR_MARKER UTF16_IntStream<ImplTraits, SuperType>::index() 311*16467b97STreehugger Robot{ 312*16467b97STreehugger Robot SuperType* input = this->get_super(); 313*16467b97STreehugger Robot return (ANTLR_MARKER)(input->get_nextChar()); 314*16467b97STreehugger Robot} 315*16467b97STreehugger Robot 316*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 317*16467b97STreehugger Robotvoid UTF16_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) 318*16467b97STreehugger Robot{ 319*16467b97STreehugger Robot SuperType* input = this->get_super(); 320*16467b97STreehugger Robot 321*16467b97STreehugger Robot // If the requested seek point is less than the current 322*16467b97STreehugger Robot // input point, then we assume that we are resetting from a mark 323*16467b97STreehugger Robot // and do not need to scan, but can just set to there as rewind will 324*16467b97STreehugger Robot // reset line numbers and so on. 325*16467b97STreehugger Robot // 326*16467b97STreehugger Robot if (seekPoint <= (ANTLR_MARKER)(input->get_nextChar())) 327*16467b97STreehugger Robot { 328*16467b97STreehugger Robot input->set_nextChar( seekPoint ); 329*16467b97STreehugger Robot } 330*16467b97STreehugger Robot else 331*16467b97STreehugger Robot { 332*16467b97STreehugger Robot // Call consume until we reach the asked for seek point or EOF 333*16467b97STreehugger Robot // 334*16467b97STreehugger Robot while( (this->_LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar() ) ) 335*16467b97STreehugger Robot { 336*16467b97STreehugger Robot this->consume(); 337*16467b97STreehugger Robot } 338*16467b97STreehugger Robot } 339*16467b97STreehugger Robot} 340*16467b97STreehugger Robot 341*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 342*16467b97STreehugger Robotvoid IntStream<ImplTraits, SuperType>::findout_endian_spec(bool machineBigEndian, bool inputBigEndian) 343*16467b97STreehugger Robot{ 344*16467b97STreehugger Robot // We must install different UTF16 routines according to whether the input 345*16467b97STreehugger Robot // is the same endianess as the machine we are executing upon or not. If it is not 346*16467b97STreehugger Robot // then we must install methods that can convert the endianess on the fly as they go 347*16467b97STreehugger Robot // 348*16467b97STreehugger Robot 349*16467b97STreehugger Robot if(machineBigEndian == true) 350*16467b97STreehugger Robot { 351*16467b97STreehugger Robot // Machine is Big Endian, if the input is also then install the 352*16467b97STreehugger Robot // methods that do not access input by bytes and reverse them. 353*16467b97STreehugger Robot // Otherwise install endian aware methods. 354*16467b97STreehugger Robot // 355*16467b97STreehugger Robot if (inputBigEndian == true) 356*16467b97STreehugger Robot { 357*16467b97STreehugger Robot // Input is machine compatible 358*16467b97STreehugger Robot // 359*16467b97STreehugger Robot m_endian_spec = 1; 360*16467b97STreehugger Robot } 361*16467b97STreehugger Robot else 362*16467b97STreehugger Robot { 363*16467b97STreehugger Robot // Need to use methods that know that the input is little endian 364*16467b97STreehugger Robot // 365*16467b97STreehugger Robot m_endian_spec = 2; 366*16467b97STreehugger Robot } 367*16467b97STreehugger Robot } 368*16467b97STreehugger Robot else 369*16467b97STreehugger Robot { 370*16467b97STreehugger Robot // Machine is Little Endian, if the input is also then install the 371*16467b97STreehugger Robot // methods that do not access input by bytes and reverse them. 372*16467b97STreehugger Robot // Otherwise install endian aware methods. 373*16467b97STreehugger Robot // 374*16467b97STreehugger Robot if (inputBigEndian == false) 375*16467b97STreehugger Robot { 376*16467b97STreehugger Robot // Input is machine compatible 377*16467b97STreehugger Robot // 378*16467b97STreehugger Robot m_endian_spec = 1; 379*16467b97STreehugger Robot } 380*16467b97STreehugger Robot else 381*16467b97STreehugger Robot { 382*16467b97STreehugger Robot // Need to use methods that know that the input is Big Endian 383*16467b97STreehugger Robot // 384*16467b97STreehugger Robot m_endian_spec = 3; 385*16467b97STreehugger Robot } 386*16467b97STreehugger Robot } 387*16467b97STreehugger Robot} 388*16467b97STreehugger Robot 389*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 390*16467b97STreehugger Robotvoid UTF16_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian) 391*16467b97STreehugger Robot{ 392*16467b97STreehugger Robot SuperType* super = this->get_super(); 393*16467b97STreehugger Robot super->set_charByteSize(2); 394*16467b97STreehugger Robot 395*16467b97STreehugger Robot this->findout_endian_spec( machineBigEndian, inputBigEndian ); 396*16467b97STreehugger Robot} 397*16467b97STreehugger Robot 398*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 399*16467b97STreehugger RobotANTLR_UINT32 IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ) 400*16467b97STreehugger Robot{ 401*16467b97STreehugger Robot assert( (m_endian_spec >= 1) && (m_endian_spec <= 3)); 402*16467b97STreehugger Robot switch(m_endian_spec) 403*16467b97STreehugger Robot { 404*16467b97STreehugger Robot case 1: 405*16467b97STreehugger Robot return this->_LA(i, ClassForwarder<BYTE_AGNOSTIC>() ); 406*16467b97STreehugger Robot break; 407*16467b97STreehugger Robot case 2: 408*16467b97STreehugger Robot return this->_LA(i, ClassForwarder<ANTLR_LITTLE_ENDIAN>() ); 409*16467b97STreehugger Robot break; 410*16467b97STreehugger Robot case 3: 411*16467b97STreehugger Robot return this->_LA(i, ClassForwarder<ANTLR_BIG_ENDIAN>() ); 412*16467b97STreehugger Robot break; 413*16467b97STreehugger Robot default: 414*16467b97STreehugger Robot break; 415*16467b97STreehugger Robot } 416*16467b97STreehugger Robot return 0; 417*16467b97STreehugger Robot} 418*16467b97STreehugger Robot 419*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 420*16467b97STreehugger Robotvoid IntStream<ImplTraits, SuperType>::consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> ) 421*16467b97STreehugger Robot{ 422*16467b97STreehugger Robot assert( (m_endian_spec >= 1) && (m_endian_spec <= 3)); 423*16467b97STreehugger Robot switch(m_endian_spec) 424*16467b97STreehugger Robot { 425*16467b97STreehugger Robot case 1: 426*16467b97STreehugger Robot this->consume( ClassForwarder<BYTE_AGNOSTIC>() ); 427*16467b97STreehugger Robot break; 428*16467b97STreehugger Robot case 2: 429*16467b97STreehugger Robot this->consume( ClassForwarder<ANTLR_LITTLE_ENDIAN>() ); 430*16467b97STreehugger Robot break; 431*16467b97STreehugger Robot case 3: 432*16467b97STreehugger Robot this->consume( ClassForwarder<ANTLR_BIG_ENDIAN>() ); 433*16467b97STreehugger Robot break; 434*16467b97STreehugger Robot default: 435*16467b97STreehugger Robot break; 436*16467b97STreehugger Robot } 437*16467b97STreehugger Robot} 438*16467b97STreehugger Robot 439*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 440*16467b97STreehugger RobotANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> ) 441*16467b97STreehugger Robot{ 442*16467b97STreehugger Robot SuperType* input; 443*16467b97STreehugger Robot UTF32 ch; 444*16467b97STreehugger Robot UTF32 ch2; 445*16467b97STreehugger Robot UTF16* nextChar; 446*16467b97STreehugger Robot 447*16467b97STreehugger Robot // Find the input interface and where we are currently pointing to 448*16467b97STreehugger Robot // in the input stream 449*16467b97STreehugger Robot // 450*16467b97STreehugger Robot input = this->get_super; 451*16467b97STreehugger Robot nextChar = input->get_nextChar(); 452*16467b97STreehugger Robot 453*16467b97STreehugger Robot // If a positive offset then advance forward, else retreat 454*16467b97STreehugger Robot // 455*16467b97STreehugger Robot if (la >= 0) 456*16467b97STreehugger Robot { 457*16467b97STreehugger Robot while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) 458*16467b97STreehugger Robot { 459*16467b97STreehugger Robot // Advance our copy of the input pointer 460*16467b97STreehugger Robot // 461*16467b97STreehugger Robot // Next char in natural machine byte order 462*16467b97STreehugger Robot // 463*16467b97STreehugger Robot ch = *nextChar++; 464*16467b97STreehugger Robot 465*16467b97STreehugger Robot // If we have a surrogate pair then we need to consume 466*16467b97STreehugger Robot // a following valid LO surrogate. 467*16467b97STreehugger Robot // 468*16467b97STreehugger Robot if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 469*16467b97STreehugger Robot { 470*16467b97STreehugger Robot // If the 16 bits following the high surrogate are in the source buffer... 471*16467b97STreehugger Robot // 472*16467b97STreehugger Robot if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )) 473*16467b97STreehugger Robot { 474*16467b97STreehugger Robot // Next character is in natural machine byte order 475*16467b97STreehugger Robot // 476*16467b97STreehugger Robot ch2 = *nextChar; 477*16467b97STreehugger Robot 478*16467b97STreehugger Robot // If it's a valid low surrogate, consume it 479*16467b97STreehugger Robot // 480*16467b97STreehugger Robot if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 481*16467b97STreehugger Robot { 482*16467b97STreehugger Robot // We consumed one 16 bit character 483*16467b97STreehugger Robot // 484*16467b97STreehugger Robot nextChar++; 485*16467b97STreehugger Robot } 486*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 487*16467b97STreehugger Robot // it. 488*16467b97STreehugger Robot // 489*16467b97STreehugger Robot } 490*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 491*16467b97STreehugger Robot // it because the buffer ended 492*16467b97STreehugger Robot // 493*16467b97STreehugger Robot } 494*16467b97STreehugger Robot // Note that we did not check for an invalid low surrogate here, or that fact that the 495*16467b97STreehugger Robot // lo surrogate was missing. We just picked out one 16 bit character unless the character 496*16467b97STreehugger Robot // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. 497*16467b97STreehugger Robot // 498*16467b97STreehugger Robot } 499*16467b97STreehugger Robot } 500*16467b97STreehugger Robot else 501*16467b97STreehugger Robot { 502*16467b97STreehugger Robot // We need to go backwards from our input point 503*16467b97STreehugger Robot // 504*16467b97STreehugger Robot while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) 505*16467b97STreehugger Robot { 506*16467b97STreehugger Robot // Get the previous 16 bit character 507*16467b97STreehugger Robot // 508*16467b97STreehugger Robot ch = *--nextChar; 509*16467b97STreehugger Robot 510*16467b97STreehugger Robot // If we found a low surrogate then go back one more character if 511*16467b97STreehugger Robot // the hi surrogate is there 512*16467b97STreehugger Robot // 513*16467b97STreehugger Robot if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) 514*16467b97STreehugger Robot { 515*16467b97STreehugger Robot ch2 = *(nextChar-1); 516*16467b97STreehugger Robot if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) 517*16467b97STreehugger Robot { 518*16467b97STreehugger Robot // Yes, there is a high surrogate to match it so decrement one more and point to that 519*16467b97STreehugger Robot // 520*16467b97STreehugger Robot nextChar--; 521*16467b97STreehugger Robot } 522*16467b97STreehugger Robot } 523*16467b97STreehugger Robot } 524*16467b97STreehugger Robot } 525*16467b97STreehugger Robot 526*16467b97STreehugger Robot // Our local copy of nextChar is now pointing to either the correct character or end of file 527*16467b97STreehugger Robot // 528*16467b97STreehugger Robot // Input buffer size is always in bytes 529*16467b97STreehugger Robot // 530*16467b97STreehugger Robot if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )) 531*16467b97STreehugger Robot { 532*16467b97STreehugger Robot return ANTLR_CHARSTREAM_EOF; 533*16467b97STreehugger Robot } 534*16467b97STreehugger Robot else 535*16467b97STreehugger Robot { 536*16467b97STreehugger Robot // Pick up the next 16 character (native machine byte order) 537*16467b97STreehugger Robot // 538*16467b97STreehugger Robot ch = *nextChar++; 539*16467b97STreehugger Robot 540*16467b97STreehugger Robot // If we have a surrogate pair then we need to consume 541*16467b97STreehugger Robot // a following valid LO surrogate. 542*16467b97STreehugger Robot // 543*16467b97STreehugger Robot if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 544*16467b97STreehugger Robot { 545*16467b97STreehugger Robot // If the 16 bits following the high surrogate are in the source buffer... 546*16467b97STreehugger Robot // 547*16467b97STreehugger Robot if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) 548*16467b97STreehugger Robot { 549*16467b97STreehugger Robot // Next character is in natural machine byte order 550*16467b97STreehugger Robot // 551*16467b97STreehugger Robot ch2 = *nextChar; 552*16467b97STreehugger Robot 553*16467b97STreehugger Robot // If it's a valid low surrogate, consume it 554*16467b97STreehugger Robot // 555*16467b97STreehugger Robot if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 556*16467b97STreehugger Robot { 557*16467b97STreehugger Robot // Construct the UTF32 code point 558*16467b97STreehugger Robot // 559*16467b97STreehugger Robot ch = ((ch - UNI_SUR_HIGH_START) << halfShift) 560*16467b97STreehugger Robot + (ch2 - UNI_SUR_LOW_START) + halfBase; 561*16467b97STreehugger Robot } 562*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 563*16467b97STreehugger Robot // it. 564*16467b97STreehugger Robot // 565*16467b97STreehugger Robot } 566*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 567*16467b97STreehugger Robot // it because the buffer ended 568*16467b97STreehugger Robot // 569*16467b97STreehugger Robot } 570*16467b97STreehugger Robot } 571*16467b97STreehugger Robot return ch; 572*16467b97STreehugger Robot} 573*16467b97STreehugger Robot 574*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 575*16467b97STreehugger RobotANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> ) 576*16467b97STreehugger Robot{ 577*16467b97STreehugger Robot SuperType* input; 578*16467b97STreehugger Robot UTF32 ch; 579*16467b97STreehugger Robot UTF32 ch2; 580*16467b97STreehugger Robot ANTLR_UCHAR* nextChar; 581*16467b97STreehugger Robot 582*16467b97STreehugger Robot // Find the input interface and where we are currently pointing to 583*16467b97STreehugger Robot // in the input stream 584*16467b97STreehugger Robot // 585*16467b97STreehugger Robot input = this->get_super(); 586*16467b97STreehugger Robot nextChar = input->get_nextChar(); 587*16467b97STreehugger Robot 588*16467b97STreehugger Robot // If a positive offset then advance forward, else retreat 589*16467b97STreehugger Robot // 590*16467b97STreehugger Robot if (la >= 0) 591*16467b97STreehugger Robot { 592*16467b97STreehugger Robot while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) 593*16467b97STreehugger Robot { 594*16467b97STreehugger Robot // Advance our copy of the input pointer 595*16467b97STreehugger Robot // 596*16467b97STreehugger Robot // Next char in Little Endian byte order 597*16467b97STreehugger Robot // 598*16467b97STreehugger Robot ch = (*nextChar) + (*(nextChar+1) << 8); 599*16467b97STreehugger Robot nextChar += 2; 600*16467b97STreehugger Robot 601*16467b97STreehugger Robot // If we have a surrogate pair then we need to consume 602*16467b97STreehugger Robot // a following valid LO surrogate. 603*16467b97STreehugger Robot // 604*16467b97STreehugger Robot if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 605*16467b97STreehugger Robot { 606*16467b97STreehugger Robot // If the 16 bits following the high surrogate are in the source buffer... 607*16467b97STreehugger Robot // 608*16467b97STreehugger Robot if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )) 609*16467b97STreehugger Robot { 610*16467b97STreehugger Robot // Next character is in little endian byte order 611*16467b97STreehugger Robot // 612*16467b97STreehugger Robot ch2 = (*nextChar) + (*(nextChar+1) << 8); 613*16467b97STreehugger Robot 614*16467b97STreehugger Robot // If it's a valid low surrogate, consume it 615*16467b97STreehugger Robot // 616*16467b97STreehugger Robot if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 617*16467b97STreehugger Robot { 618*16467b97STreehugger Robot // We consumed one 16 bit character 619*16467b97STreehugger Robot // 620*16467b97STreehugger Robot nextChar += 2; 621*16467b97STreehugger Robot } 622*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 623*16467b97STreehugger Robot // it. 624*16467b97STreehugger Robot // 625*16467b97STreehugger Robot } 626*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 627*16467b97STreehugger Robot // it because the buffer ended 628*16467b97STreehugger Robot // 629*16467b97STreehugger Robot } 630*16467b97STreehugger Robot // Note that we did not check for an invalid low surrogate here, or that fact that the 631*16467b97STreehugger Robot // lo surrogate was missing. We just picked out one 16 bit character unless the character 632*16467b97STreehugger Robot // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. 633*16467b97STreehugger Robot // 634*16467b97STreehugger Robot } 635*16467b97STreehugger Robot } 636*16467b97STreehugger Robot else 637*16467b97STreehugger Robot { 638*16467b97STreehugger Robot // We need to go backwards from our input point 639*16467b97STreehugger Robot // 640*16467b97STreehugger Robot while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) 641*16467b97STreehugger Robot { 642*16467b97STreehugger Robot // Get the previous 16 bit character 643*16467b97STreehugger Robot // 644*16467b97STreehugger Robot ch = (*nextChar - 2) + ((*nextChar -1) << 8); 645*16467b97STreehugger Robot nextChar -= 2; 646*16467b97STreehugger Robot 647*16467b97STreehugger Robot // If we found a low surrogate then go back one more character if 648*16467b97STreehugger Robot // the hi surrogate is there 649*16467b97STreehugger Robot // 650*16467b97STreehugger Robot if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) 651*16467b97STreehugger Robot { 652*16467b97STreehugger Robot ch2 = (*nextChar - 2) + ((*nextChar -1) << 8); 653*16467b97STreehugger Robot if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) 654*16467b97STreehugger Robot { 655*16467b97STreehugger Robot // Yes, there is a high surrogate to match it so decrement one more and point to that 656*16467b97STreehugger Robot // 657*16467b97STreehugger Robot nextChar -=2; 658*16467b97STreehugger Robot } 659*16467b97STreehugger Robot } 660*16467b97STreehugger Robot } 661*16467b97STreehugger Robot } 662*16467b97STreehugger Robot 663*16467b97STreehugger Robot // Our local copy of nextChar is now pointing to either the correct character or end of file 664*16467b97STreehugger Robot // 665*16467b97STreehugger Robot // Input buffer size is always in bytes 666*16467b97STreehugger Robot // 667*16467b97STreehugger Robot if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) 668*16467b97STreehugger Robot { 669*16467b97STreehugger Robot return ANTLR_CHARSTREAM_EOF; 670*16467b97STreehugger Robot } 671*16467b97STreehugger Robot else 672*16467b97STreehugger Robot { 673*16467b97STreehugger Robot // Pick up the next 16 character (little endian byte order) 674*16467b97STreehugger Robot // 675*16467b97STreehugger Robot ch = (*nextChar) + (*(nextChar+1) << 8); 676*16467b97STreehugger Robot nextChar += 2; 677*16467b97STreehugger Robot 678*16467b97STreehugger Robot // If we have a surrogate pair then we need to consume 679*16467b97STreehugger Robot // a following valid LO surrogate. 680*16467b97STreehugger Robot // 681*16467b97STreehugger Robot if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 682*16467b97STreehugger Robot { 683*16467b97STreehugger Robot // If the 16 bits following the high surrogate are in the source buffer... 684*16467b97STreehugger Robot // 685*16467b97STreehugger Robot if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) 686*16467b97STreehugger Robot { 687*16467b97STreehugger Robot // Next character is in little endian byte order 688*16467b97STreehugger Robot // 689*16467b97STreehugger Robot ch2 = (*nextChar) + (*(nextChar+1) << 8); 690*16467b97STreehugger Robot 691*16467b97STreehugger Robot // If it's a valid low surrogate, consume it 692*16467b97STreehugger Robot // 693*16467b97STreehugger Robot if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 694*16467b97STreehugger Robot { 695*16467b97STreehugger Robot // Construct the UTF32 code point 696*16467b97STreehugger Robot // 697*16467b97STreehugger Robot ch = ((ch - UNI_SUR_HIGH_START) << halfShift) 698*16467b97STreehugger Robot + (ch2 - UNI_SUR_LOW_START) + halfBase; 699*16467b97STreehugger Robot } 700*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 701*16467b97STreehugger Robot // it. 702*16467b97STreehugger Robot // 703*16467b97STreehugger Robot } 704*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 705*16467b97STreehugger Robot // it because the buffer ended 706*16467b97STreehugger Robot // 707*16467b97STreehugger Robot } 708*16467b97STreehugger Robot } 709*16467b97STreehugger Robot return ch; 710*16467b97STreehugger Robot} 711*16467b97STreehugger Robot 712*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 713*16467b97STreehugger RobotANTLR_UINT32 UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> ) 714*16467b97STreehugger Robot{ 715*16467b97STreehugger Robot SuperType* input; 716*16467b97STreehugger Robot UTF32 ch; 717*16467b97STreehugger Robot UTF32 ch2; 718*16467b97STreehugger Robot ANTLR_UCHAR* nextChar; 719*16467b97STreehugger Robot 720*16467b97STreehugger Robot // Find the input interface and where we are currently pointing to 721*16467b97STreehugger Robot // in the input stream 722*16467b97STreehugger Robot // 723*16467b97STreehugger Robot input = this->get_super(); 724*16467b97STreehugger Robot nextChar = input->get_nextChar(); 725*16467b97STreehugger Robot 726*16467b97STreehugger Robot // If a positive offset then advance forward, else retreat 727*16467b97STreehugger Robot // 728*16467b97STreehugger Robot if (la >= 0) 729*16467b97STreehugger Robot { 730*16467b97STreehugger Robot while (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ) 731*16467b97STreehugger Robot { 732*16467b97STreehugger Robot // Advance our copy of the input pointer 733*16467b97STreehugger Robot // 734*16467b97STreehugger Robot // Next char in Big Endian byte order 735*16467b97STreehugger Robot // 736*16467b97STreehugger Robot ch = ((*nextChar) << 8) + *(nextChar+1); 737*16467b97STreehugger Robot nextChar += 2; 738*16467b97STreehugger Robot 739*16467b97STreehugger Robot // If we have a surrogate pair then we need to consume 740*16467b97STreehugger Robot // a following valid LO surrogate. 741*16467b97STreehugger Robot // 742*16467b97STreehugger Robot if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 743*16467b97STreehugger Robot { 744*16467b97STreehugger Robot // If the 16 bits following the high surrogate are in the source buffer... 745*16467b97STreehugger Robot // 746*16467b97STreehugger Robot if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) 747*16467b97STreehugger Robot { 748*16467b97STreehugger Robot // Next character is in big endian byte order 749*16467b97STreehugger Robot // 750*16467b97STreehugger Robot ch2 = ((*nextChar) << 8) + *(nextChar+1); 751*16467b97STreehugger Robot 752*16467b97STreehugger Robot // If it's a valid low surrogate, consume it 753*16467b97STreehugger Robot // 754*16467b97STreehugger Robot if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 755*16467b97STreehugger Robot { 756*16467b97STreehugger Robot // We consumed one 16 bit character 757*16467b97STreehugger Robot // 758*16467b97STreehugger Robot nextChar += 2; 759*16467b97STreehugger Robot } 760*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 761*16467b97STreehugger Robot // it. 762*16467b97STreehugger Robot // 763*16467b97STreehugger Robot } 764*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 765*16467b97STreehugger Robot // it because the buffer ended 766*16467b97STreehugger Robot // 767*16467b97STreehugger Robot } 768*16467b97STreehugger Robot // Note that we did not check for an invalid low surrogate here, or that fact that the 769*16467b97STreehugger Robot // lo surrogate was missing. We just picked out one 16 bit character unless the character 770*16467b97STreehugger Robot // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. 771*16467b97STreehugger Robot // 772*16467b97STreehugger Robot } 773*16467b97STreehugger Robot } 774*16467b97STreehugger Robot else 775*16467b97STreehugger Robot { 776*16467b97STreehugger Robot // We need to go backwards from our input point 777*16467b97STreehugger Robot // 778*16467b97STreehugger Robot while (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() ) 779*16467b97STreehugger Robot { 780*16467b97STreehugger Robot // Get the previous 16 bit character 781*16467b97STreehugger Robot // 782*16467b97STreehugger Robot ch = ((*nextChar - 2) << 8) + (*nextChar -1); 783*16467b97STreehugger Robot nextChar -= 2; 784*16467b97STreehugger Robot 785*16467b97STreehugger Robot // If we found a low surrogate then go back one more character if 786*16467b97STreehugger Robot // the hi surrogate is there 787*16467b97STreehugger Robot // 788*16467b97STreehugger Robot if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) 789*16467b97STreehugger Robot { 790*16467b97STreehugger Robot ch2 = ((*nextChar - 2) << 8) + (*nextChar -1); 791*16467b97STreehugger Robot if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) 792*16467b97STreehugger Robot { 793*16467b97STreehugger Robot // Yes, there is a high surrogate to match it so decrement one more and point to that 794*16467b97STreehugger Robot // 795*16467b97STreehugger Robot nextChar -=2; 796*16467b97STreehugger Robot } 797*16467b97STreehugger Robot } 798*16467b97STreehugger Robot } 799*16467b97STreehugger Robot } 800*16467b97STreehugger Robot 801*16467b97STreehugger Robot // Our local copy of nextChar is now pointing to either the correct character or end of file 802*16467b97STreehugger Robot // 803*16467b97STreehugger Robot // Input buffer size is always in bytes 804*16467b97STreehugger Robot // 805*16467b97STreehugger Robot if ( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) 806*16467b97STreehugger Robot { 807*16467b97STreehugger Robot return ANTLR_CHARSTREAM_EOF; 808*16467b97STreehugger Robot } 809*16467b97STreehugger Robot else 810*16467b97STreehugger Robot { 811*16467b97STreehugger Robot // Pick up the next 16 character (big endian byte order) 812*16467b97STreehugger Robot // 813*16467b97STreehugger Robot ch = ((*nextChar) << 8) + *(nextChar+1); 814*16467b97STreehugger Robot nextChar += 2; 815*16467b97STreehugger Robot 816*16467b97STreehugger Robot // If we have a surrogate pair then we need to consume 817*16467b97STreehugger Robot // a following valid LO surrogate. 818*16467b97STreehugger Robot // 819*16467b97STreehugger Robot if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 820*16467b97STreehugger Robot { 821*16467b97STreehugger Robot // If the 16 bits following the high surrogate are in the source buffer... 822*16467b97STreehugger Robot // 823*16467b97STreehugger Robot if ((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf())) 824*16467b97STreehugger Robot { 825*16467b97STreehugger Robot // Next character is in big endian byte order 826*16467b97STreehugger Robot // 827*16467b97STreehugger Robot ch2 = ((*nextChar) << 8) + *(nextChar+1); 828*16467b97STreehugger Robot 829*16467b97STreehugger Robot // If it's a valid low surrogate, consume it 830*16467b97STreehugger Robot // 831*16467b97STreehugger Robot if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 832*16467b97STreehugger Robot { 833*16467b97STreehugger Robot // Construct the UTF32 code point 834*16467b97STreehugger Robot // 835*16467b97STreehugger Robot ch = ((ch - UNI_SUR_HIGH_START) << halfShift) 836*16467b97STreehugger Robot + (ch2 - UNI_SUR_LOW_START) + halfBase; 837*16467b97STreehugger Robot } 838*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 839*16467b97STreehugger Robot // it. 840*16467b97STreehugger Robot // 841*16467b97STreehugger Robot } 842*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 843*16467b97STreehugger Robot // it because the buffer ended 844*16467b97STreehugger Robot // 845*16467b97STreehugger Robot } 846*16467b97STreehugger Robot } 847*16467b97STreehugger Robot return ch; 848*16467b97STreehugger Robot} 849*16467b97STreehugger Robot 850*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 851*16467b97STreehugger Robotvoid UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<BYTE_AGNOSTIC> ) 852*16467b97STreehugger Robot{ 853*16467b97STreehugger Robot SuperType* input; 854*16467b97STreehugger Robot UTF32 ch; 855*16467b97STreehugger Robot UTF32 ch2; 856*16467b97STreehugger Robot 857*16467b97STreehugger Robot input = this->get_super(); 858*16467b97STreehugger Robot 859*16467b97STreehugger Robot // Buffer size is always in bytes 860*16467b97STreehugger Robot // 861*16467b97STreehugger Robot if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) 862*16467b97STreehugger Robot { 863*16467b97STreehugger Robot // Indicate one more character in this line 864*16467b97STreehugger Robot // 865*16467b97STreehugger Robot input->inc_charPositionInLine(); 866*16467b97STreehugger Robot 867*16467b97STreehugger Robot if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) 868*16467b97STreehugger Robot { 869*16467b97STreehugger Robot // Reset for start of a new line of input 870*16467b97STreehugger Robot // 871*16467b97STreehugger Robot input->inc_line(); 872*16467b97STreehugger Robot input->set_charPositionInLine(0); 873*16467b97STreehugger Robot input->set_currentLine( input->get_nextChar() + 1 ); 874*16467b97STreehugger Robot } 875*16467b97STreehugger Robot 876*16467b97STreehugger Robot // Increment to next character position, accounting for any surrogates 877*16467b97STreehugger Robot // 878*16467b97STreehugger Robot // Next char in natural machine byte order 879*16467b97STreehugger Robot // 880*16467b97STreehugger Robot ch = *(input->get_nextChar()); 881*16467b97STreehugger Robot 882*16467b97STreehugger Robot // We consumed one 16 bit character 883*16467b97STreehugger Robot // 884*16467b97STreehugger Robot input->set_nextChar( input->get_nextChar() + 1 ); 885*16467b97STreehugger Robot 886*16467b97STreehugger Robot // If we have a surrogate pair then we need to consume 887*16467b97STreehugger Robot // a following valid LO surrogate. 888*16467b97STreehugger Robot // 889*16467b97STreehugger Robot if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { 890*16467b97STreehugger Robot 891*16467b97STreehugger Robot // If the 16 bits following the high surrogate are in the source buffer... 892*16467b97STreehugger Robot // 893*16467b97STreehugger Robot if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) 894*16467b97STreehugger Robot { 895*16467b97STreehugger Robot // Next character is in natural machine byte order 896*16467b97STreehugger Robot // 897*16467b97STreehugger Robot ch2 = *(input->get_nextChar()); 898*16467b97STreehugger Robot 899*16467b97STreehugger Robot // If it's a valid low surrogate, consume it 900*16467b97STreehugger Robot // 901*16467b97STreehugger Robot if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 902*16467b97STreehugger Robot { 903*16467b97STreehugger Robot // We consumed one 16 bit character 904*16467b97STreehugger Robot // 905*16467b97STreehugger Robot input->set_nextChar( input->get_nextChar() + 1 ); 906*16467b97STreehugger Robot } 907*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 908*16467b97STreehugger Robot // it. 909*16467b97STreehugger Robot // 910*16467b97STreehugger Robot } 911*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 912*16467b97STreehugger Robot // it because the buffer ended 913*16467b97STreehugger Robot // 914*16467b97STreehugger Robot } 915*16467b97STreehugger Robot // Note that we did not check for an invalid low surrogate here, or that fact that the 916*16467b97STreehugger Robot // lo surrogate was missing. We just picked out one 16 bit character unless the character 917*16467b97STreehugger Robot // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. 918*16467b97STreehugger Robot // 919*16467b97STreehugger Robot } 920*16467b97STreehugger Robot 921*16467b97STreehugger Robot} 922*16467b97STreehugger Robot 923*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 924*16467b97STreehugger Robotvoid UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> ) 925*16467b97STreehugger Robot{ 926*16467b97STreehugger Robot SuperType* input; 927*16467b97STreehugger Robot UTF32 ch; 928*16467b97STreehugger Robot UTF32 ch2; 929*16467b97STreehugger Robot 930*16467b97STreehugger Robot input = this->get_super(); 931*16467b97STreehugger Robot 932*16467b97STreehugger Robot // Buffer size is always in bytes 933*16467b97STreehugger Robot // 934*16467b97STreehugger Robot if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) 935*16467b97STreehugger Robot { 936*16467b97STreehugger Robot // Indicate one more character in this line 937*16467b97STreehugger Robot // 938*16467b97STreehugger Robot input->inc_charPositionInLine(); 939*16467b97STreehugger Robot 940*16467b97STreehugger Robot if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) 941*16467b97STreehugger Robot { 942*16467b97STreehugger Robot // Reset for start of a new line of input 943*16467b97STreehugger Robot // 944*16467b97STreehugger Robot input->inc_line(); 945*16467b97STreehugger Robot input->set_charPositionInLine(0); 946*16467b97STreehugger Robot input->set_currentLine(input->get_nextChar() + 1); 947*16467b97STreehugger Robot } 948*16467b97STreehugger Robot 949*16467b97STreehugger Robot // Increment to next character position, accounting for any surrogates 950*16467b97STreehugger Robot // 951*16467b97STreehugger Robot // Next char in litle endian form 952*16467b97STreehugger Robot // 953*16467b97STreehugger Robot ch = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8); 954*16467b97STreehugger Robot 955*16467b97STreehugger Robot // We consumed one 16 bit character 956*16467b97STreehugger Robot // 957*16467b97STreehugger Robot input->set_nextChar( input->get_nextChar() + 1); 958*16467b97STreehugger Robot 959*16467b97STreehugger Robot // If we have a surrogate pair then we need to consume 960*16467b97STreehugger Robot // a following valid LO surrogate. 961*16467b97STreehugger Robot // 962*16467b97STreehugger Robot if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 963*16467b97STreehugger Robot { 964*16467b97STreehugger Robot // If the 16 bits following the high surrogate are in the source buffer... 965*16467b97STreehugger Robot // 966*16467b97STreehugger Robot if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) 967*16467b97STreehugger Robot { 968*16467b97STreehugger Robot ch2 = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8); 969*16467b97STreehugger Robot 970*16467b97STreehugger Robot // If it's a valid low surrogate, consume it 971*16467b97STreehugger Robot // 972*16467b97STreehugger Robot if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 973*16467b97STreehugger Robot { 974*16467b97STreehugger Robot // We consumed one 16 bit character 975*16467b97STreehugger Robot // 976*16467b97STreehugger Robot input->set_nextChar( input->get_nextChar() + 1); 977*16467b97STreehugger Robot } 978*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 979*16467b97STreehugger Robot // it. 980*16467b97STreehugger Robot // 981*16467b97STreehugger Robot } 982*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 983*16467b97STreehugger Robot // it because the buffer ended 984*16467b97STreehugger Robot // 985*16467b97STreehugger Robot } 986*16467b97STreehugger Robot // Note that we did not check for an invalid low surrogate here, or that fact that the 987*16467b97STreehugger Robot // lo surrogate was missing. We just picked out one 16 bit character unless the character 988*16467b97STreehugger Robot // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. 989*16467b97STreehugger Robot // 990*16467b97STreehugger Robot } 991*16467b97STreehugger Robot} 992*16467b97STreehugger Robot 993*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 994*16467b97STreehugger Robotvoid UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_BIG_ENDIAN> ) 995*16467b97STreehugger Robot{ 996*16467b97STreehugger Robot SuperType* input; 997*16467b97STreehugger Robot UTF32 ch; 998*16467b97STreehugger Robot UTF32 ch2; 999*16467b97STreehugger Robot 1000*16467b97STreehugger Robot input = this->get_super(); 1001*16467b97STreehugger Robot 1002*16467b97STreehugger Robot // Buffer size is always in bytes 1003*16467b97STreehugger Robot // 1004*16467b97STreehugger Robot if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) 1005*16467b97STreehugger Robot { 1006*16467b97STreehugger Robot // Indicate one more character in this line 1007*16467b97STreehugger Robot // 1008*16467b97STreehugger Robot input->inc_charPositionInLine(); 1009*16467b97STreehugger Robot 1010*16467b97STreehugger Robot if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) 1011*16467b97STreehugger Robot { 1012*16467b97STreehugger Robot // Reset for start of a new line of input 1013*16467b97STreehugger Robot // 1014*16467b97STreehugger Robot input->inc_line(); 1015*16467b97STreehugger Robot input->set_charPositionInLine(0); 1016*16467b97STreehugger Robot input->set_currentLine(input->get_nextChar() + 1); 1017*16467b97STreehugger Robot } 1018*16467b97STreehugger Robot 1019*16467b97STreehugger Robot // Increment to next character position, accounting for any surrogates 1020*16467b97STreehugger Robot // 1021*16467b97STreehugger Robot // Next char in big endian form 1022*16467b97STreehugger Robot // 1023*16467b97STreehugger Robot ch = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8); 1024*16467b97STreehugger Robot 1025*16467b97STreehugger Robot // We consumed one 16 bit character 1026*16467b97STreehugger Robot // 1027*16467b97STreehugger Robot input->set_nextChar( input->get_nextChar() + 1); 1028*16467b97STreehugger Robot 1029*16467b97STreehugger Robot // If we have a surrogate pair then we need to consume 1030*16467b97STreehugger Robot // a following valid LO surrogate. 1031*16467b97STreehugger Robot // 1032*16467b97STreehugger Robot if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 1033*16467b97STreehugger Robot { 1034*16467b97STreehugger Robot // If the 16 bits following the high surrogate are in the source buffer... 1035*16467b97STreehugger Robot // 1036*16467b97STreehugger Robot if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) ) 1037*16467b97STreehugger Robot { 1038*16467b97STreehugger Robot // Big endian 1039*16467b97STreehugger Robot // 1040*16467b97STreehugger Robot ch2 = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8); 1041*16467b97STreehugger Robot 1042*16467b97STreehugger Robot // If it's a valid low surrogate, consume it 1043*16467b97STreehugger Robot // 1044*16467b97STreehugger Robot if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 1045*16467b97STreehugger Robot { 1046*16467b97STreehugger Robot // We consumed one 16 bit character 1047*16467b97STreehugger Robot // 1048*16467b97STreehugger Robot input->set_nextChar( input->get_nextChar() + 1); 1049*16467b97STreehugger Robot } 1050*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1051*16467b97STreehugger Robot // it. 1052*16467b97STreehugger Robot // 1053*16467b97STreehugger Robot } 1054*16467b97STreehugger Robot // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1055*16467b97STreehugger Robot // it because the buffer ended 1056*16467b97STreehugger Robot // 1057*16467b97STreehugger Robot } 1058*16467b97STreehugger Robot // Note that we did not check for an invalid low surrogate here, or that fact that the 1059*16467b97STreehugger Robot // lo surrogate was missing. We just picked out one 16 bit character unless the character 1060*16467b97STreehugger Robot // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. 1061*16467b97STreehugger Robot // 1062*16467b97STreehugger Robot } 1063*16467b97STreehugger Robot} 1064*16467b97STreehugger Robot 1065*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1066*16467b97STreehugger RobotANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 i) 1067*16467b97STreehugger Robot{ 1068*16467b97STreehugger Robot return this->_LA( i, ClassForwarder<typename ImplTraits::Endianness>() ); 1069*16467b97STreehugger Robot} 1070*16467b97STreehugger Robot 1071*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1072*16467b97STreehugger RobotANTLR_MARKER UTF32_IntStream<ImplTraits, SuperType>::index() 1073*16467b97STreehugger Robot{ 1074*16467b97STreehugger Robot SuperType* input = this->get_super(); 1075*16467b97STreehugger Robot return (ANTLR_MARKER)(input->get_nextChar()); 1076*16467b97STreehugger Robot} 1077*16467b97STreehugger Robot 1078*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1079*16467b97STreehugger Robotvoid UTF32_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint) 1080*16467b97STreehugger Robot{ 1081*16467b97STreehugger Robot SuperType* input; 1082*16467b97STreehugger Robot 1083*16467b97STreehugger Robot input = this->get_super(); 1084*16467b97STreehugger Robot 1085*16467b97STreehugger Robot // If the requested seek point is less than the current 1086*16467b97STreehugger Robot // input point, then we assume that we are resetting from a mark 1087*16467b97STreehugger Robot // and do not need to scan, but can just set to there as rewind will 1088*16467b97STreehugger Robot // reset line numbers and so on. 1089*16467b97STreehugger Robot // 1090*16467b97STreehugger Robot if (seekPoint <= (ANTLR_MARKER)(input->get_nextChar())) 1091*16467b97STreehugger Robot { 1092*16467b97STreehugger Robot input->set_nextChar( static_cast<typename ImplTraits::DataType*>(seekPoint) ); 1093*16467b97STreehugger Robot } 1094*16467b97STreehugger Robot else 1095*16467b97STreehugger Robot { 1096*16467b97STreehugger Robot // Call consume until we reach the asked for seek point or EOF 1097*16467b97STreehugger Robot // 1098*16467b97STreehugger Robot while( (this->_LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar()) ) 1099*16467b97STreehugger Robot { 1100*16467b97STreehugger Robot this->consume(); 1101*16467b97STreehugger Robot } 1102*16467b97STreehugger Robot } 1103*16467b97STreehugger Robot 1104*16467b97STreehugger Robot} 1105*16467b97STreehugger Robot 1106*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1107*16467b97STreehugger Robotvoid UTF32_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian) 1108*16467b97STreehugger Robot{ 1109*16467b97STreehugger Robot SuperType* super = this->get_super(); 1110*16467b97STreehugger Robot super->set_charByteSize(4); 1111*16467b97STreehugger Robot 1112*16467b97STreehugger Robot this->findout_endian_spec(machineBigEndian, inputBigEndian); 1113*16467b97STreehugger Robot} 1114*16467b97STreehugger Robot 1115*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1116*16467b97STreehugger RobotANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> ) 1117*16467b97STreehugger Robot{ 1118*16467b97STreehugger Robot SuperType* input = this->get_super(); 1119*16467b97STreehugger Robot 1120*16467b97STreehugger Robot if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 )) 1121*16467b97STreehugger Robot { 1122*16467b97STreehugger Robot return ANTLR_CHARSTREAM_EOF; 1123*16467b97STreehugger Robot } 1124*16467b97STreehugger Robot else 1125*16467b97STreehugger Robot { 1126*16467b97STreehugger Robot return (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); 1127*16467b97STreehugger Robot } 1128*16467b97STreehugger Robot} 1129*16467b97STreehugger Robot 1130*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1131*16467b97STreehugger RobotANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> ) 1132*16467b97STreehugger Robot{ 1133*16467b97STreehugger Robot SuperType* input = this->get_super(); 1134*16467b97STreehugger Robot 1135*16467b97STreehugger Robot if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 )) 1136*16467b97STreehugger Robot { 1137*16467b97STreehugger Robot return ANTLR_CHARSTREAM_EOF; 1138*16467b97STreehugger Robot } 1139*16467b97STreehugger Robot else 1140*16467b97STreehugger Robot { 1141*16467b97STreehugger Robot ANTLR_UCHAR c; 1142*16467b97STreehugger Robot 1143*16467b97STreehugger Robot c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); 1144*16467b97STreehugger Robot 1145*16467b97STreehugger Robot // Swap Endianess to Big Endian 1146*16467b97STreehugger Robot // 1147*16467b97STreehugger Robot return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); 1148*16467b97STreehugger Robot } 1149*16467b97STreehugger Robot} 1150*16467b97STreehugger Robot 1151*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1152*16467b97STreehugger RobotANTLR_UINT32 UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> ) 1153*16467b97STreehugger Robot{ 1154*16467b97STreehugger Robot SuperType* input = this->get_super(); 1155*16467b97STreehugger Robot 1156*16467b97STreehugger Robot if (( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 )) 1157*16467b97STreehugger Robot { 1158*16467b97STreehugger Robot return ANTLR_CHARSTREAM_EOF; 1159*16467b97STreehugger Robot } 1160*16467b97STreehugger Robot else 1161*16467b97STreehugger Robot { 1162*16467b97STreehugger Robot ANTLR_UCHAR c; 1163*16467b97STreehugger Robot 1164*16467b97STreehugger Robot c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1)); 1165*16467b97STreehugger Robot 1166*16467b97STreehugger Robot // Swap Endianess to Little Endian 1167*16467b97STreehugger Robot // 1168*16467b97STreehugger Robot return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); 1169*16467b97STreehugger Robot } 1170*16467b97STreehugger Robot} 1171*16467b97STreehugger Robot 1172*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1173*16467b97STreehugger Robotvoid UTF32_IntStream<ImplTraits, SuperType>::consume() 1174*16467b97STreehugger Robot{ 1175*16467b97STreehugger Robot SuperType* input = this->get_super(); 1176*16467b97STreehugger Robot 1177*16467b97STreehugger Robot // SizeBuf is always in bytes 1178*16467b97STreehugger Robot // 1179*16467b97STreehugger Robot if ( input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/4 )) 1180*16467b97STreehugger Robot { 1181*16467b97STreehugger Robot /* Indicate one more character in this line 1182*16467b97STreehugger Robot */ 1183*16467b97STreehugger Robot input->inc_charPositionInLine(); 1184*16467b97STreehugger Robot 1185*16467b97STreehugger Robot if ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar()) 1186*16467b97STreehugger Robot { 1187*16467b97STreehugger Robot /* Reset for start of a new line of input 1188*16467b97STreehugger Robot */ 1189*16467b97STreehugger Robot input->inc_line(); 1190*16467b97STreehugger Robot input->set_charPositionInLine(0); 1191*16467b97STreehugger Robot input->set_currentLine( input->get_nextChar() + 1 ); 1192*16467b97STreehugger Robot } 1193*16467b97STreehugger Robot 1194*16467b97STreehugger Robot /* Increment to next character position 1195*16467b97STreehugger Robot */ 1196*16467b97STreehugger Robot input->set_nextChar( input->get_nextChar() + 1 ); 1197*16467b97STreehugger Robot } 1198*16467b97STreehugger Robot} 1199*16467b97STreehugger Robot 1200*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1201*16467b97STreehugger Robotvoid UTF8_IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool) 1202*16467b97STreehugger Robot{ 1203*16467b97STreehugger Robot SuperType* super = this->get_super(); 1204*16467b97STreehugger Robot super->set_charByteSize(0); 1205*16467b97STreehugger Robot} 1206*16467b97STreehugger Robot 1207*16467b97STreehugger Robot// ------------------------------------------------------ 1208*16467b97STreehugger Robot// Following is from Unicode.org (see antlr3convertutf.c) 1209*16467b97STreehugger Robot// 1210*16467b97STreehugger Robot 1211*16467b97STreehugger Robot/// Index into the table below with the first byte of a UTF-8 sequence to 1212*16467b97STreehugger Robot/// get the number of trailing bytes that are supposed to follow it. 1213*16467b97STreehugger Robot/// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is 1214*16467b97STreehugger Robot/// left as-is for anyone who may want to do such conversion, which was 1215*16467b97STreehugger Robot/// allowed in earlier algorithms. 1216*16467b97STreehugger Robot/// 1217*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1218*16467b97STreehugger Robotconst ANTLR_UINT32* UTF8_IntStream<ImplTraits, SuperType>::TrailingBytesForUTF8() 1219*16467b97STreehugger Robot{ 1220*16467b97STreehugger Robot static const ANTLR_UINT32 trailingBytesForUTF8[256] = { 1221*16467b97STreehugger Robot 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1222*16467b97STreehugger Robot 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1223*16467b97STreehugger Robot 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1224*16467b97STreehugger Robot 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1225*16467b97STreehugger Robot 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1226*16467b97STreehugger Robot 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1227*16467b97STreehugger Robot 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1228*16467b97STreehugger Robot 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 1229*16467b97STreehugger Robot }; 1230*16467b97STreehugger Robot 1231*16467b97STreehugger Robot return trailingBytesForUTF8; 1232*16467b97STreehugger Robot} 1233*16467b97STreehugger Robot 1234*16467b97STreehugger Robot/// Magic values subtracted from a buffer value during UTF8 conversion. 1235*16467b97STreehugger Robot/// This table contains as many values as there might be trailing bytes 1236*16467b97STreehugger Robot/// in a UTF-8 sequence. 1237*16467b97STreehugger Robot/// 1238*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1239*16467b97STreehugger Robotconst UTF32* UTF8_IntStream<ImplTraits, SuperType>::OffsetsFromUTF8() 1240*16467b97STreehugger Robot{ 1241*16467b97STreehugger Robot static const UTF32 offsetsFromUTF8[6] = 1242*16467b97STreehugger Robot { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 1243*16467b97STreehugger Robot 0x03C82080UL, 0xFA082080UL, 0x82082080UL 1244*16467b97STreehugger Robot }; 1245*16467b97STreehugger Robot return offsetsFromUTF8; 1246*16467b97STreehugger Robot} 1247*16467b97STreehugger Robot 1248*16467b97STreehugger Robot// End of Unicode.org tables 1249*16467b97STreehugger Robot// ------------------------- 1250*16467b97STreehugger Robot 1251*16467b97STreehugger Robot 1252*16467b97STreehugger Robot/** \brief Consume the next character in a UTF8 input stream 1253*16467b97STreehugger Robot * 1254*16467b97STreehugger Robot * \param input Input stream context pointer 1255*16467b97STreehugger Robot */ 1256*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1257*16467b97STreehugger Robotvoid UTF8_IntStream<ImplTraits, SuperType>::consume() 1258*16467b97STreehugger Robot{ 1259*16467b97STreehugger Robot SuperType* input = this->get_super(); 1260*16467b97STreehugger Robot const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8(); 1261*16467b97STreehugger Robot const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8(); 1262*16467b97STreehugger Robot 1263*16467b97STreehugger Robot ANTLR_UINT32 extraBytesToRead; 1264*16467b97STreehugger Robot ANTLR_UCHAR ch; 1265*16467b97STreehugger Robot ANTLR_UINT8* nextChar; 1266*16467b97STreehugger Robot 1267*16467b97STreehugger Robot nextChar = input->get_nextChar(); 1268*16467b97STreehugger Robot 1269*16467b97STreehugger Robot if (nextChar < (input->get_data() + input->get_sizeBuf())) 1270*16467b97STreehugger Robot { 1271*16467b97STreehugger Robot // Indicate one more character in this line 1272*16467b97STreehugger Robot // 1273*16467b97STreehugger Robot input->inc_charPositionInLine(); 1274*16467b97STreehugger Robot 1275*16467b97STreehugger Robot // Are there more bytes needed to make up the whole thing? 1276*16467b97STreehugger Robot // 1277*16467b97STreehugger Robot extraBytesToRead = trailingBytesForUTF8[*nextChar]; 1278*16467b97STreehugger Robot 1279*16467b97STreehugger Robot if ((nextChar + extraBytesToRead) >= (input->get_data() + input->get_sizeBuf())) 1280*16467b97STreehugger Robot { 1281*16467b97STreehugger Robot input->set_nextChar( input->get_data() + input->get_sizeBuf() ); 1282*16467b97STreehugger Robot return; 1283*16467b97STreehugger Robot } 1284*16467b97STreehugger Robot 1285*16467b97STreehugger Robot // Cases deliberately fall through (see note A in antlrconvertutf.c) 1286*16467b97STreehugger Robot // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so 1287*16467b97STreehugger Robot // we allow it. 1288*16467b97STreehugger Robot // 1289*16467b97STreehugger Robot ch = 0; 1290*16467b97STreehugger Robot switch (extraBytesToRead) 1291*16467b97STreehugger Robot { 1292*16467b97STreehugger Robot case 5: ch += *nextChar++; ch <<= 6; 1293*16467b97STreehugger Robot case 4: ch += *nextChar++; ch <<= 6; 1294*16467b97STreehugger Robot case 3: ch += *nextChar++; ch <<= 6; 1295*16467b97STreehugger Robot case 2: ch += *nextChar++; ch <<= 6; 1296*16467b97STreehugger Robot case 1: ch += *nextChar++; ch <<= 6; 1297*16467b97STreehugger Robot case 0: ch += *nextChar++; 1298*16467b97STreehugger Robot } 1299*16467b97STreehugger Robot 1300*16467b97STreehugger Robot // Magically correct the input value 1301*16467b97STreehugger Robot // 1302*16467b97STreehugger Robot ch -= offsetsFromUTF8[extraBytesToRead]; 1303*16467b97STreehugger Robot if (ch == input->get_newlineChar()) 1304*16467b97STreehugger Robot { 1305*16467b97STreehugger Robot /* Reset for start of a new line of input 1306*16467b97STreehugger Robot */ 1307*16467b97STreehugger Robot input->inc_line(); 1308*16467b97STreehugger Robot input->set_charPositionInLine(0); 1309*16467b97STreehugger Robot input->set_currentLine(nextChar); 1310*16467b97STreehugger Robot } 1311*16467b97STreehugger Robot 1312*16467b97STreehugger Robot // Update input pointer 1313*16467b97STreehugger Robot // 1314*16467b97STreehugger Robot input->set_nextChar(nextChar); 1315*16467b97STreehugger Robot } 1316*16467b97STreehugger Robot} 1317*16467b97STreehugger Robot 1318*16467b97STreehugger Robot/** \brief Return the input element assuming a UTF8 input 1319*16467b97STreehugger Robot * 1320*16467b97STreehugger Robot * \param[in] input Input stream context pointer 1321*16467b97STreehugger Robot * \param[in] la 1 based offset of next input stream element 1322*16467b97STreehugger Robot * 1323*16467b97STreehugger Robot * \return Next input character in internal ANTLR3 encoding (UTF32) 1324*16467b97STreehugger Robot */ 1325*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType> 1326*16467b97STreehugger RobotANTLR_UCHAR UTF8_IntStream<ImplTraits, SuperType>::_LA(ANTLR_INT32 la) 1327*16467b97STreehugger Robot{ 1328*16467b97STreehugger Robot SuperType* input = this->get_super(); 1329*16467b97STreehugger Robot const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8(); 1330*16467b97STreehugger Robot const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8(); 1331*16467b97STreehugger Robot ANTLR_UINT32 extraBytesToRead; 1332*16467b97STreehugger Robot ANTLR_UCHAR ch; 1333*16467b97STreehugger Robot ANTLR_UINT8* nextChar; 1334*16467b97STreehugger Robot 1335*16467b97STreehugger Robot nextChar = input->get_nextChar(); 1336*16467b97STreehugger Robot 1337*16467b97STreehugger Robot // Do we need to traverse forwards or backwards? 1338*16467b97STreehugger Robot // - LA(0) is treated as LA(1) and we assume that the nextChar is 1339*16467b97STreehugger Robot // already positioned. 1340*16467b97STreehugger Robot // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding 1341*16467b97STreehugger Robot // - LA(-n) means we must traverse backwards n chracters 1342*16467b97STreehugger Robot // 1343*16467b97STreehugger Robot if (la > 1) { 1344*16467b97STreehugger Robot 1345*16467b97STreehugger Robot // Make sure that we have at least one character left before trying to 1346*16467b97STreehugger Robot // loop through the buffer. 1347*16467b97STreehugger Robot // 1348*16467b97STreehugger Robot if (nextChar < (input->get_data() + input->get_sizeBuf())) 1349*16467b97STreehugger Robot { 1350*16467b97STreehugger Robot // Now traverse n-1 characters forward 1351*16467b97STreehugger Robot // 1352*16467b97STreehugger Robot while (--la > 0) 1353*16467b97STreehugger Robot { 1354*16467b97STreehugger Robot // Does the next character require trailing bytes? 1355*16467b97STreehugger Robot // If so advance the pointer by that many bytes as well as advancing 1356*16467b97STreehugger Robot // one position for what will be at least a single byte character. 1357*16467b97STreehugger Robot // 1358*16467b97STreehugger Robot nextChar += trailingBytesForUTF8[*nextChar] + 1; 1359*16467b97STreehugger Robot 1360*16467b97STreehugger Robot // Does that calculation take us past the byte length of the buffer? 1361*16467b97STreehugger Robot // 1362*16467b97STreehugger Robot if (nextChar >= (input->get_data() + input->get_sizeBuf())) 1363*16467b97STreehugger Robot { 1364*16467b97STreehugger Robot return ANTLR_CHARSTREAM_EOF; 1365*16467b97STreehugger Robot } 1366*16467b97STreehugger Robot } 1367*16467b97STreehugger Robot } 1368*16467b97STreehugger Robot else 1369*16467b97STreehugger Robot { 1370*16467b97STreehugger Robot return ANTLR_CHARSTREAM_EOF; 1371*16467b97STreehugger Robot } 1372*16467b97STreehugger Robot } 1373*16467b97STreehugger Robot else 1374*16467b97STreehugger Robot { 1375*16467b97STreehugger Robot // LA is negative so we decrease the pointer by n character positions 1376*16467b97STreehugger Robot // 1377*16467b97STreehugger Robot while (nextChar > input->get_data() && la++ < 0) 1378*16467b97STreehugger Robot { 1379*16467b97STreehugger Robot // Traversing backwards in UTF8 means decermenting by one 1380*16467b97STreehugger Robot // then continuing to decrement while ever a character pattern 1381*16467b97STreehugger Robot // is flagged as being a trailing byte of an encoded code point. 1382*16467b97STreehugger Robot // Trailing UTF8 bytes always start with 10 in binary. We assumne that 1383*16467b97STreehugger Robot // the UTF8 is well formed and do not check boundary conditions 1384*16467b97STreehugger Robot // 1385*16467b97STreehugger Robot nextChar--; 1386*16467b97STreehugger Robot while ((*nextChar & 0xC0) == 0x80) 1387*16467b97STreehugger Robot { 1388*16467b97STreehugger Robot nextChar--; 1389*16467b97STreehugger Robot } 1390*16467b97STreehugger Robot } 1391*16467b97STreehugger Robot } 1392*16467b97STreehugger Robot 1393*16467b97STreehugger Robot // nextChar is now pointing at the UTF8 encoded character that we need to 1394*16467b97STreehugger Robot // decode and return. 1395*16467b97STreehugger Robot // 1396*16467b97STreehugger Robot // Are there more bytes needed to make up the whole thing? 1397*16467b97STreehugger Robot // 1398*16467b97STreehugger Robot extraBytesToRead = trailingBytesForUTF8[*nextChar]; 1399*16467b97STreehugger Robot if (nextChar + extraBytesToRead >= (input->get_data() + input->get_sizeBuf())) 1400*16467b97STreehugger Robot { 1401*16467b97STreehugger Robot return ANTLR_CHARSTREAM_EOF; 1402*16467b97STreehugger Robot } 1403*16467b97STreehugger Robot 1404*16467b97STreehugger Robot // Cases deliberately fall through (see note A in antlrconvertutf.c) 1405*16467b97STreehugger Robot // 1406*16467b97STreehugger Robot ch = 0; 1407*16467b97STreehugger Robot switch (extraBytesToRead) 1408*16467b97STreehugger Robot { 1409*16467b97STreehugger Robot case 5: ch += *nextChar++; ch <<= 6; 1410*16467b97STreehugger Robot case 4: ch += *nextChar++; ch <<= 6; 1411*16467b97STreehugger Robot case 3: ch += *nextChar++; ch <<= 6; 1412*16467b97STreehugger Robot case 2: ch += *nextChar++; ch <<= 6; 1413*16467b97STreehugger Robot case 1: ch += *nextChar++; ch <<= 6; 1414*16467b97STreehugger Robot case 0: ch += *nextChar++; 1415*16467b97STreehugger Robot } 1416*16467b97STreehugger Robot 1417*16467b97STreehugger Robot // Magically correct the input value 1418*16467b97STreehugger Robot // 1419*16467b97STreehugger Robot ch -= offsetsFromUTF8[extraBytesToRead]; 1420*16467b97STreehugger Robot 1421*16467b97STreehugger Robot return ch; 1422*16467b97STreehugger Robot} 1423*16467b97STreehugger Robot 1424*16467b97STreehugger Robottemplate<class ImplTraits> 1425*16467b97STreehugger RobotTokenIntStream<ImplTraits>::TokenIntStream() 1426*16467b97STreehugger Robot{ 1427*16467b97STreehugger Robot m_cachedSize = 0; 1428*16467b97STreehugger Robot} 1429*16467b97STreehugger Robot 1430*16467b97STreehugger Robottemplate<class ImplTraits> 1431*16467b97STreehugger RobotANTLR_UINT32 TokenIntStream<ImplTraits>::get_cachedSize() const 1432*16467b97STreehugger Robot{ 1433*16467b97STreehugger Robot return m_cachedSize; 1434*16467b97STreehugger Robot} 1435*16467b97STreehugger Robot 1436*16467b97STreehugger Robottemplate<class ImplTraits> 1437*16467b97STreehugger Robotvoid TokenIntStream<ImplTraits>::set_cachedSize( ANTLR_UINT32 cachedSize ) 1438*16467b97STreehugger Robot{ 1439*16467b97STreehugger Robot m_cachedSize = cachedSize; 1440*16467b97STreehugger Robot} 1441*16467b97STreehugger Robot 1442*16467b97STreehugger Robot/** Move the input pointer to the next incoming token. The stream 1443*16467b97STreehugger Robot * must become active with LT(1) available. consume() simply 1444*16467b97STreehugger Robot * moves the input pointer so that LT(1) points at the next 1445*16467b97STreehugger Robot * input symbol. Consume at least one token. 1446*16467b97STreehugger Robot * 1447*16467b97STreehugger Robot * Walk past any token not on the channel the parser is listening to. 1448*16467b97STreehugger Robot */ 1449*16467b97STreehugger Robottemplate<class ImplTraits> 1450*16467b97STreehugger Robotvoid TokenIntStream<ImplTraits>::consume() 1451*16467b97STreehugger Robot{ 1452*16467b97STreehugger Robot TokenStreamType* cts = static_cast<TokenStreamType*>(this); 1453*16467b97STreehugger Robot 1454*16467b97STreehugger Robot if((ANTLR_UINT32)cts->get_p() < m_cachedSize ) 1455*16467b97STreehugger Robot { 1456*16467b97STreehugger Robot cts->inc_p(); 1457*16467b97STreehugger Robot cts->set_p( cts->skipOffTokenChannels(cts->get_p()) ); 1458*16467b97STreehugger Robot } 1459*16467b97STreehugger Robot} 1460*16467b97STreehugger Robottemplate<class ImplTraits> 1461*16467b97STreehugger Robotvoid TokenIntStream<ImplTraits>::consumeInitialHiddenTokens() 1462*16467b97STreehugger Robot{ 1463*16467b97STreehugger Robot ANTLR_MARKER first; 1464*16467b97STreehugger Robot ANTLR_INT32 i; 1465*16467b97STreehugger Robot TokenStreamType* ts; 1466*16467b97STreehugger Robot 1467*16467b97STreehugger Robot ts = this->get_super(); 1468*16467b97STreehugger Robot first = this->index(); 1469*16467b97STreehugger Robot 1470*16467b97STreehugger Robot for (i=0; i<first; i++) 1471*16467b97STreehugger Robot { 1472*16467b97STreehugger Robot ts->get_debugger()->consumeHiddenToken(ts->get(i)); 1473*16467b97STreehugger Robot } 1474*16467b97STreehugger Robot 1475*16467b97STreehugger Robot ts->set_initialStreamState(false); 1476*16467b97STreehugger Robot} 1477*16467b97STreehugger Robot 1478*16467b97STreehugger Robot 1479*16467b97STreehugger Robottemplate<class ImplTraits> 1480*16467b97STreehugger RobotANTLR_UINT32 TokenIntStream<ImplTraits>::_LA( ANTLR_INT32 i ) 1481*16467b97STreehugger Robot{ 1482*16467b97STreehugger Robot const CommonTokenType* tok; 1483*16467b97STreehugger Robot TokenStreamType* ts = static_cast<TokenStreamType*>(this); 1484*16467b97STreehugger Robot 1485*16467b97STreehugger Robot tok = ts->_LT(i); 1486*16467b97STreehugger Robot 1487*16467b97STreehugger Robot if (tok != NULL) 1488*16467b97STreehugger Robot { 1489*16467b97STreehugger Robot return tok->get_type(); 1490*16467b97STreehugger Robot } 1491*16467b97STreehugger Robot else 1492*16467b97STreehugger Robot { 1493*16467b97STreehugger Robot return CommonTokenType::TOKEN_INVALID; 1494*16467b97STreehugger Robot } 1495*16467b97STreehugger Robot 1496*16467b97STreehugger Robot} 1497*16467b97STreehugger Robot 1498*16467b97STreehugger Robottemplate<class ImplTraits> 1499*16467b97STreehugger RobotANTLR_MARKER TokenIntStream<ImplTraits>::mark() 1500*16467b97STreehugger Robot{ 1501*16467b97STreehugger Robot BaseType::m_lastMarker = this->index(); 1502*16467b97STreehugger Robot return BaseType::m_lastMarker; 1503*16467b97STreehugger Robot} 1504*16467b97STreehugger Robot 1505*16467b97STreehugger Robottemplate<class ImplTraits> 1506*16467b97STreehugger RobotANTLR_UINT32 TokenIntStream<ImplTraits>::size() 1507*16467b97STreehugger Robot{ 1508*16467b97STreehugger Robot if (this->get_cachedSize() > 0) 1509*16467b97STreehugger Robot { 1510*16467b97STreehugger Robot return this->get_cachedSize(); 1511*16467b97STreehugger Robot } 1512*16467b97STreehugger Robot TokenStreamType* cts = this->get_super(); 1513*16467b97STreehugger Robot 1514*16467b97STreehugger Robot this->set_cachedSize( static_cast<ANTLR_UINT32>(cts->get_tokens().size()) ); 1515*16467b97STreehugger Robot return this->get_cachedSize(); 1516*16467b97STreehugger Robot} 1517*16467b97STreehugger Robot 1518*16467b97STreehugger Robottemplate<class ImplTraits> 1519*16467b97STreehugger Robotvoid TokenIntStream<ImplTraits>::release() 1520*16467b97STreehugger Robot{ 1521*16467b97STreehugger Robot return; 1522*16467b97STreehugger Robot} 1523*16467b97STreehugger Robot 1524*16467b97STreehugger Robottemplate<class ImplTraits> 1525*16467b97STreehugger RobotANTLR_MARKER TokenIntStream<ImplTraits>::tindex() 1526*16467b97STreehugger Robot{ 1527*16467b97STreehugger Robot return this->get_super()->get_p(); 1528*16467b97STreehugger Robot} 1529*16467b97STreehugger Robot 1530*16467b97STreehugger Robottemplate<class ImplTraits> 1531*16467b97STreehugger Robotvoid TokenIntStream<ImplTraits>::rewindLast() 1532*16467b97STreehugger Robot{ 1533*16467b97STreehugger Robot this->rewind( this->get_lastMarker() ); 1534*16467b97STreehugger Robot} 1535*16467b97STreehugger Robot 1536*16467b97STreehugger Robottemplate<class ImplTraits> 1537*16467b97STreehugger Robotvoid TokenIntStream<ImplTraits>::rewind(ANTLR_MARKER marker) 1538*16467b97STreehugger Robot{ 1539*16467b97STreehugger Robot return this->seek(marker); 1540*16467b97STreehugger Robot} 1541*16467b97STreehugger Robot 1542*16467b97STreehugger Robottemplate<class ImplTraits> 1543*16467b97STreehugger Robotvoid TokenIntStream<ImplTraits>::seek(ANTLR_MARKER index) 1544*16467b97STreehugger Robot{ 1545*16467b97STreehugger Robot TokenStreamType* cts = static_cast<TokenStreamType*>(this); 1546*16467b97STreehugger Robot 1547*16467b97STreehugger Robot cts->set_p( static_cast<ANTLR_INT32>(index) ); 1548*16467b97STreehugger Robot} 1549*16467b97STreehugger Robot 1550*16467b97STreehugger Robot 1551*16467b97STreehugger Robot/// Return a string that represents the name assoicated with the input source 1552*16467b97STreehugger Robot/// 1553*16467b97STreehugger Robot/// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream. 1554*16467b97STreehugger Robot/// 1555*16467b97STreehugger Robot/// /returns 1556*16467b97STreehugger Robot/// /implements ANTLR3_INT_STREAM_struct::getSourceName() 1557*16467b97STreehugger Robot/// 1558*16467b97STreehugger Robottemplate<class ImplTraits> 1559*16467b97STreehugger Robottypename TokenIntStream<ImplTraits>::StringType 1560*16467b97STreehugger RobotTokenIntStream<ImplTraits>::getSourceName() 1561*16467b97STreehugger Robot{ 1562*16467b97STreehugger Robot // Slightly convoluted as we must trace back to the lexer's input source 1563*16467b97STreehugger Robot // via the token source. The streamName that is here is not initialized 1564*16467b97STreehugger Robot // because this is a token stream, not a file or string stream, which are the 1565*16467b97STreehugger Robot // only things that have a context for a source name. 1566*16467b97STreehugger Robot // 1567*16467b97STreehugger Robot return this->get_super()->get_tokenSource()->get_fileName(); 1568*16467b97STreehugger Robot} 1569*16467b97STreehugger Robot 1570*16467b97STreehugger Robottemplate<class ImplTraits> 1571*16467b97STreehugger Robotvoid TreeNodeIntStream<ImplTraits>::consume() 1572*16467b97STreehugger Robot{ 1573*16467b97STreehugger Robot CommonTreeNodeStreamType* ctns = this->get_super(); 1574*16467b97STreehugger Robot if( ctns->get_p() == -1 ) 1575*16467b97STreehugger Robot ctns->fillBufferRoot(); 1576*16467b97STreehugger Robot ctns->inc_p(); 1577*16467b97STreehugger Robot} 1578*16467b97STreehugger Robottemplate<class ImplTraits> 1579*16467b97STreehugger RobotANTLR_MARKER TreeNodeIntStream<ImplTraits>::tindex() 1580*16467b97STreehugger Robot{ 1581*16467b97STreehugger Robot CommonTreeNodeStreamType* ctns = this->get_super(); 1582*16467b97STreehugger Robot return (ANTLR_MARKER)(ctns->get_p()); 1583*16467b97STreehugger Robot} 1584*16467b97STreehugger Robot 1585*16467b97STreehugger Robottemplate<class ImplTraits> 1586*16467b97STreehugger RobotANTLR_UINT32 TreeNodeIntStream<ImplTraits>::_LA(ANTLR_INT32 i) 1587*16467b97STreehugger Robot{ 1588*16467b97STreehugger Robot CommonTreeNodeStreamType* tns = this->get_super(); 1589*16467b97STreehugger Robot 1590*16467b97STreehugger Robot // Ask LT for the 'token' at that position 1591*16467b97STreehugger Robot // 1592*16467b97STreehugger Robot TreeType* t = tns->_LT(i); 1593*16467b97STreehugger Robot 1594*16467b97STreehugger Robot if (t == NULL) 1595*16467b97STreehugger Robot { 1596*16467b97STreehugger Robot return CommonTokenType::TOKEN_INVALID; 1597*16467b97STreehugger Robot } 1598*16467b97STreehugger Robot 1599*16467b97STreehugger Robot // Token node was there so return the type of it 1600*16467b97STreehugger Robot // 1601*16467b97STreehugger Robot return t->get_type(); 1602*16467b97STreehugger Robot} 1603*16467b97STreehugger Robot 1604*16467b97STreehugger Robottemplate<class ImplTraits> 1605*16467b97STreehugger RobotANTLR_MARKER TreeNodeIntStream<ImplTraits>::mark() 1606*16467b97STreehugger Robot{ 1607*16467b97STreehugger Robot CommonTreeNodeStreamType* ctns = this->get_super(); 1608*16467b97STreehugger Robot 1609*16467b97STreehugger Robot if (ctns->get_p() == -1) 1610*16467b97STreehugger Robot { 1611*16467b97STreehugger Robot ctns->fillBufferRoot(); 1612*16467b97STreehugger Robot } 1613*16467b97STreehugger Robot 1614*16467b97STreehugger Robot // Return the current mark point 1615*16467b97STreehugger Robot // 1616*16467b97STreehugger Robot this->set_lastMarker( this->index() ); 1617*16467b97STreehugger Robot 1618*16467b97STreehugger Robot return this->get_lastMarker(); 1619*16467b97STreehugger Robot 1620*16467b97STreehugger Robot} 1621*16467b97STreehugger Robot 1622*16467b97STreehugger Robottemplate<class ImplTraits> 1623*16467b97STreehugger Robotvoid TreeNodeIntStream<ImplTraits>::release(ANTLR_MARKER marker) 1624*16467b97STreehugger Robot{ 1625*16467b97STreehugger Robot 1626*16467b97STreehugger Robot} 1627*16467b97STreehugger Robot 1628*16467b97STreehugger Robottemplate<class ImplTraits> 1629*16467b97STreehugger Robotvoid TreeNodeIntStream<ImplTraits>::rewindMark(ANTLR_MARKER marker) 1630*16467b97STreehugger Robot{ 1631*16467b97STreehugger Robot this->seek(marker); 1632*16467b97STreehugger Robot} 1633*16467b97STreehugger Robot 1634*16467b97STreehugger Robottemplate<class ImplTraits> 1635*16467b97STreehugger Robotvoid TreeNodeIntStream<ImplTraits>::rewindLast() 1636*16467b97STreehugger Robot{ 1637*16467b97STreehugger Robot this->seek( this->get_lastMarker() ); 1638*16467b97STreehugger Robot} 1639*16467b97STreehugger Robot 1640*16467b97STreehugger Robottemplate<class ImplTraits> 1641*16467b97STreehugger Robotvoid TreeNodeIntStream<ImplTraits>::seek(ANTLR_MARKER index) 1642*16467b97STreehugger Robot{ 1643*16467b97STreehugger Robot CommonTreeNodeStreamType* ctns = this->get_super(); 1644*16467b97STreehugger Robot ctns->set_p( ANTLR_UINT32_CAST(index) ); 1645*16467b97STreehugger Robot} 1646*16467b97STreehugger Robot 1647*16467b97STreehugger Robottemplate<class ImplTraits> 1648*16467b97STreehugger RobotANTLR_UINT32 TreeNodeIntStream<ImplTraits>::size() 1649*16467b97STreehugger Robot{ 1650*16467b97STreehugger Robot CommonTreeNodeStreamType* ctns = this->get_super(); 1651*16467b97STreehugger Robot 1652*16467b97STreehugger Robot if (ctns->get_p() == -1) 1653*16467b97STreehugger Robot { 1654*16467b97STreehugger Robot ctns->fillBufferRoot(); 1655*16467b97STreehugger Robot } 1656*16467b97STreehugger Robot 1657*16467b97STreehugger Robot return ctns->get_nodes().size(); 1658*16467b97STreehugger Robot} 1659*16467b97STreehugger Robot 1660*16467b97STreehugger Robot 1661*16467b97STreehugger RobotANTLR_END_NAMESPACE() 1662