xref: /aosp_15_r20/external/antlr/runtime/Cpp/include/antlr3intstream.inl (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger RobotANTLR_BEGIN_NAMESPACE()
2*16467b97STreehugger Robot
3*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
4*16467b97STreehugger RobotANTLR_INLINE IntStream<ImplTraits, SuperType>::IntStream()
5*16467b97STreehugger Robot{
6*16467b97STreehugger Robot	m_lastMarker = 0;
7*16467b97STreehugger Robot	m_upper_case = false;
8*16467b97STreehugger Robot}
9*16467b97STreehugger Robot
10*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
11*16467b97STreehugger RobotANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType	IntStream<ImplTraits, SuperType>::getSourceName()
12*16467b97STreehugger Robot{
13*16467b97STreehugger Robot	return m_streamName;
14*16467b97STreehugger Robot}
15*16467b97STreehugger Robot
16*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
17*16467b97STreehugger RobotANTLR_INLINE typename IntStream<ImplTraits, SuperType>::StringType& 	IntStream<ImplTraits, SuperType>::get_streamName()
18*16467b97STreehugger Robot{
19*16467b97STreehugger Robot	return m_streamName;
20*16467b97STreehugger Robot}
21*16467b97STreehugger Robot
22*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
23*16467b97STreehugger RobotANTLR_INLINE const typename IntStream<ImplTraits, SuperType>::StringType& 	IntStream<ImplTraits, SuperType>::get_streamName() const
24*16467b97STreehugger Robot{
25*16467b97STreehugger Robot	return m_streamName;
26*16467b97STreehugger Robot}
27*16467b97STreehugger Robot
28*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
29*16467b97STreehugger RobotANTLR_INLINE ANTLR_MARKER IntStream<ImplTraits, SuperType>::get_lastMarker() const
30*16467b97STreehugger Robot{
31*16467b97STreehugger Robot	return m_lastMarker;
32*16467b97STreehugger Robot}
33*16467b97STreehugger Robot
34*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
35*16467b97STreehugger RobotANTLR_INLINE void	IntStream<ImplTraits, SuperType>::setUcaseLA(bool flag)
36*16467b97STreehugger Robot{
37*16467b97STreehugger Robot	m_upper_case = flag;
38*16467b97STreehugger Robot}
39*16467b97STreehugger Robot
40*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
41*16467b97STreehugger RobotANTLR_INLINE SuperType* IntStream<ImplTraits, SuperType>::get_super()
42*16467b97STreehugger Robot{
43*16467b97STreehugger Robot	return static_cast<SuperType*>(this);
44*16467b97STreehugger Robot}
45*16467b97STreehugger Robot
46*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
47*16467b97STreehugger Robotvoid	IntStream<ImplTraits, SuperType>::consume()
48*16467b97STreehugger Robot{
49*16467b97STreehugger Robot	SuperType* input = this->get_super();
50*16467b97STreehugger Robot
51*16467b97STreehugger Robot	const ANTLR_UINT8* nextChar = input->get_nextChar();
52*16467b97STreehugger Robot	const ANTLR_UINT8* data = input->get_data();
53*16467b97STreehugger Robot	ANTLR_UINT32 sizeBuf = input->get_sizeBuf();
54*16467b97STreehugger Robot
55*16467b97STreehugger Robot    if	( nextChar < ( data + sizeBuf ) )
56*16467b97STreehugger Robot    {
57*16467b97STreehugger Robot		/* Indicate one more character in this line
58*16467b97STreehugger Robot		 */
59*16467b97STreehugger Robot		input->inc_charPositionInLine();
60*16467b97STreehugger Robot
61*16467b97STreehugger Robot		if  ((ANTLR_UCHAR)(*(nextChar)) == input->get_newlineChar() )
62*16467b97STreehugger Robot		{
63*16467b97STreehugger Robot			/* Reset for start of a new line of input
64*16467b97STreehugger Robot			 */
65*16467b97STreehugger Robot			input->inc_line();
66*16467b97STreehugger Robot			input->set_charPositionInLine(0);
67*16467b97STreehugger Robot			input->set_currentLine(nextChar + 1);
68*16467b97STreehugger Robot		}
69*16467b97STreehugger Robot
70*16467b97STreehugger Robot		/* Increment to next character position
71*16467b97STreehugger Robot		 */
72*16467b97STreehugger Robot		input->set_nextChar( nextChar + 1 );
73*16467b97STreehugger Robot    }
74*16467b97STreehugger Robot}
75*16467b97STreehugger Robot
76*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
77*16467b97STreehugger RobotANTLR_UINT32	IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la )
78*16467b97STreehugger Robot{
79*16467b97STreehugger Robot	SuperType* input = this->get_super();
80*16467b97STreehugger Robot	const ANTLR_UINT8* nextChar = input->get_nextChar();
81*16467b97STreehugger Robot	const ANTLR_UINT8* data = input->get_data();
82*16467b97STreehugger Robot	ANTLR_UINT32 sizeBuf = input->get_sizeBuf();
83*16467b97STreehugger Robot
84*16467b97STreehugger Robot    if	(( nextChar + la - 1) >= (data + sizeBuf))
85*16467b97STreehugger Robot    {
86*16467b97STreehugger Robot		return	ANTLR_CHARSTREAM_EOF;
87*16467b97STreehugger Robot    }
88*16467b97STreehugger Robot    else
89*16467b97STreehugger Robot    {
90*16467b97STreehugger Robot		if( !m_upper_case )
91*16467b97STreehugger Robot			return	(ANTLR_UCHAR)(*(nextChar + la - 1));
92*16467b97STreehugger Robot		else
93*16467b97STreehugger Robot			return	(ANTLR_UCHAR)toupper(*(nextChar + la - 1));
94*16467b97STreehugger Robot    }
95*16467b97STreehugger Robot}
96*16467b97STreehugger Robot
97*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
98*16467b97STreehugger RobotANTLR_MARKER IntStream<ImplTraits, SuperType>::mark()
99*16467b97STreehugger Robot{
100*16467b97STreehugger Robot	LexState<ImplTraits>*	    state;
101*16467b97STreehugger Robot    SuperType* input = this->get_super();
102*16467b97STreehugger Robot
103*16467b97STreehugger Robot    /* New mark point
104*16467b97STreehugger Robot     */
105*16467b97STreehugger Robot    input->inc_markDepth();
106*16467b97STreehugger Robot
107*16467b97STreehugger Robot    /* See if we are revisiting a mark as we can just reuse the vector
108*16467b97STreehugger Robot     * entry if we are, otherwise, we need a new one
109*16467b97STreehugger Robot     */
110*16467b97STreehugger Robot    if	(input->get_markDepth() > input->get_markers().size() )
111*16467b97STreehugger Robot    {
112*16467b97STreehugger Robot		input->get_markers().push_back( LexState<ImplTraits>() );
113*16467b97STreehugger Robot		LexState<ImplTraits>& state_r = input->get_markers().back();
114*16467b97STreehugger Robot		state = &state_r;
115*16467b97STreehugger Robot    }
116*16467b97STreehugger Robot    else
117*16467b97STreehugger Robot    {
118*16467b97STreehugger Robot		LexState<ImplTraits>& state_r = input->get_markers().at( input->get_markDepth() - 1 );
119*16467b97STreehugger Robot		state	= &state_r;
120*16467b97STreehugger Robot
121*16467b97STreehugger Robot		/* Assume no errors for speed, it will just blow up if the table failed
122*16467b97STreehugger Robot		 * for some reasons, hence lots of unit tests on the tables ;-)
123*16467b97STreehugger Robot		 */
124*16467b97STreehugger Robot    }
125*16467b97STreehugger Robot
126*16467b97STreehugger Robot    /* We have created or retrieved the state, so update it with the current
127*16467b97STreehugger Robot     * elements of the lexer state.
128*16467b97STreehugger Robot     */
129*16467b97STreehugger Robot    state->set_charPositionInLine( input->get_charPositionInLine() );
130*16467b97STreehugger Robot    state->set_currentLine( input->get_currentLine() );
131*16467b97STreehugger Robot    state->set_line( input->get_line() );
132*16467b97STreehugger Robot    state->set_nextChar( input->get_nextChar() );
133*16467b97STreehugger Robot
134*16467b97STreehugger Robot    m_lastMarker = input->get_markDepth();
135*16467b97STreehugger Robot
136*16467b97STreehugger Robot    /* And that's it
137*16467b97STreehugger Robot     */
138*16467b97STreehugger Robot    return  input->get_markDepth();
139*16467b97STreehugger Robot}
140*16467b97STreehugger Robot
141*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
142*16467b97STreehugger RobotANTLR_MARKER	IntStream<ImplTraits, SuperType>::index()
143*16467b97STreehugger Robot{
144*16467b97STreehugger Robot	SuperType* input = this->get_super();
145*16467b97STreehugger Robot	return input->index_impl();
146*16467b97STreehugger Robot}
147*16467b97STreehugger Robot
148*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
149*16467b97STreehugger Robotvoid	IntStream<ImplTraits, SuperType>::rewind(ANTLR_MARKER mark)
150*16467b97STreehugger Robot{
151*16467b97STreehugger Robot    SuperType* input = this->get_super();
152*16467b97STreehugger Robot
153*16467b97STreehugger Robot    /* Perform any clean up of the marks
154*16467b97STreehugger Robot     */
155*16467b97STreehugger Robot    this->release(mark);
156*16467b97STreehugger Robot
157*16467b97STreehugger Robot    /* Find the supplied mark state
158*16467b97STreehugger Robot     */
159*16467b97STreehugger Robot	ANTLR_UINT32 idx = static_cast<ANTLR_UINT32>( mark-1 );
160*16467b97STreehugger Robot    typename ImplTraits::LexStateType&   state = input->get_markers().at( idx );
161*16467b97STreehugger Robot
162*16467b97STreehugger Robot    /* Seek input pointer to the requested point (note we supply the void *pointer
163*16467b97STreehugger Robot     * to whatever is implementing the int stream to seek).
164*16467b97STreehugger Robot     */
165*16467b97STreehugger Robot	this->seek( (ANTLR_MARKER)state.get_nextChar() );
166*16467b97STreehugger Robot
167*16467b97STreehugger Robot    /* Reset to the reset of the information in the mark
168*16467b97STreehugger Robot     */
169*16467b97STreehugger Robot    input->set_charPositionInLine( state.get_charPositionInLine() );
170*16467b97STreehugger Robot    input->set_currentLine( state.get_currentLine() );
171*16467b97STreehugger Robot    input->set_line( state.get_line() );
172*16467b97STreehugger Robot    input->set_nextChar( state.get_nextChar() );
173*16467b97STreehugger Robot
174*16467b97STreehugger Robot    /* And we are done
175*16467b97STreehugger Robot     */
176*16467b97STreehugger Robot}
177*16467b97STreehugger Robot
178*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
179*16467b97STreehugger Robotvoid	IntStream<ImplTraits, SuperType>::rewindLast()
180*16467b97STreehugger Robot{
181*16467b97STreehugger Robot	this->rewind(m_lastMarker);
182*16467b97STreehugger Robot}
183*16467b97STreehugger Robot
184*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
185*16467b97STreehugger Robotvoid	IntStream<ImplTraits, SuperType>::release(ANTLR_MARKER mark)
186*16467b97STreehugger Robot{
187*16467b97STreehugger Robot	SuperType* input = this->get_super();
188*16467b97STreehugger Robot
189*16467b97STreehugger Robot	/* We don't do much here in fact as we never free any higher marks in
190*16467b97STreehugger Robot     * the hashtable as we just resuse any memory allocated for them.
191*16467b97STreehugger Robot     */
192*16467b97STreehugger Robot    input->set_markDepth( (ANTLR_UINT32)(mark - 1) );
193*16467b97STreehugger Robot
194*16467b97STreehugger Robot}
195*16467b97STreehugger Robot
196*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
197*16467b97STreehugger Robotvoid IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool)
198*16467b97STreehugger Robot{
199*16467b97STreehugger Robot}
200*16467b97STreehugger Robot
201*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
202*16467b97STreehugger Robotvoid	IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
203*16467b97STreehugger Robot{
204*16467b97STreehugger Robot	ANTLR_INT32   count;
205*16467b97STreehugger Robot	SuperType* input = this->get_super();
206*16467b97STreehugger Robot
207*16467b97STreehugger Robot	ANTLR_MARKER nextChar = (ANTLR_MARKER) input->get_nextChar();
208*16467b97STreehugger Robot	/* If the requested seek point is less than the current
209*16467b97STreehugger Robot	* input point, then we assume that we are resetting from a mark
210*16467b97STreehugger Robot	* and do not need to scan, but can just set to there.
211*16467b97STreehugger Robot	*/
212*16467b97STreehugger Robot	if	(seekPoint <= nextChar)
213*16467b97STreehugger Robot	{
214*16467b97STreehugger Robot		input->set_nextChar((ANTLR_UINT8*) seekPoint);
215*16467b97STreehugger Robot	}
216*16467b97STreehugger Robot	else
217*16467b97STreehugger Robot	{
218*16467b97STreehugger Robot		count	= (ANTLR_UINT32)(seekPoint - nextChar);
219*16467b97STreehugger Robot
220*16467b97STreehugger Robot		while (count--)
221*16467b97STreehugger Robot		{
222*16467b97STreehugger Robot			this->consume();
223*16467b97STreehugger Robot		}
224*16467b97STreehugger Robot	}
225*16467b97STreehugger Robot}
226*16467b97STreehugger Robot
227*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
228*16467b97STreehugger RobotIntStream<ImplTraits, SuperType>::~IntStream()
229*16467b97STreehugger Robot{
230*16467b97STreehugger Robot}
231*16467b97STreehugger Robot
232*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
233*16467b97STreehugger RobotANTLR_UINT32	EBCDIC_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la)
234*16467b97STreehugger Robot{
235*16467b97STreehugger Robot	// EBCDIC to ASCII conversion table
236*16467b97STreehugger Robot	//
237*16467b97STreehugger Robot	// This for EBCDIC EDF04 translated to ISO-8859.1 which is the usually accepted POSIX
238*16467b97STreehugger Robot	// translation and the character tables are published all over the interweb.
239*16467b97STreehugger Robot	//
240*16467b97STreehugger Robot	const ANTLR_UCHAR e2a[256] =
241*16467b97STreehugger Robot	{
242*16467b97STreehugger Robot		0x00, 0x01, 0x02, 0x03, 0x85, 0x09, 0x86, 0x7f,
243*16467b97STreehugger Robot		0x87, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
244*16467b97STreehugger Robot		0x10, 0x11, 0x12, 0x13, 0x8f, 0x0a, 0x08, 0x97,
245*16467b97STreehugger Robot		0x18, 0x19, 0x9c, 0x9d, 0x1c, 0x1d, 0x1e, 0x1f,
246*16467b97STreehugger Robot		0x80, 0x81, 0x82, 0x83, 0x84, 0x92, 0x17, 0x1b,
247*16467b97STreehugger Robot		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
248*16467b97STreehugger Robot		0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
249*16467b97STreehugger Robot		0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a,
250*16467b97STreehugger Robot		0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5,
251*16467b97STreehugger Robot		0xe7, 0xf1, 0x60, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
252*16467b97STreehugger Robot		0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef,
253*16467b97STreehugger Robot		0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x9f,
254*16467b97STreehugger Robot		0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5,
255*16467b97STreehugger Robot		0xc7, 0xd1, 0x5e, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
256*16467b97STreehugger Robot		0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
257*16467b97STreehugger Robot		0xcc, 0xa8, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
258*16467b97STreehugger Robot		0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
259*16467b97STreehugger Robot		0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1,
260*16467b97STreehugger Robot		0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
261*16467b97STreehugger Robot		0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4,
262*16467b97STreehugger Robot		0xb5, 0xaf, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
263*16467b97STreehugger Robot		0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xde, 0xae,
264*16467b97STreehugger Robot		0xa2, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc,
265*16467b97STreehugger Robot		0xbd, 0xbe, 0xac, 0x5b, 0x5c, 0x5d, 0xb4, 0xd7,
266*16467b97STreehugger Robot		0xf9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
267*16467b97STreehugger Robot		0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
268*16467b97STreehugger Robot		0xa6, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
269*16467b97STreehugger Robot		0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xdb, 0xfa, 0xff,
270*16467b97STreehugger Robot		0xd9, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
271*16467b97STreehugger Robot		0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
272*16467b97STreehugger Robot		0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
273*16467b97STreehugger Robot		0x38, 0x39, 0xb3, 0x7b, 0xdc, 0x7d, 0xda, 0x7e
274*16467b97STreehugger Robot	};
275*16467b97STreehugger Robot
276*16467b97STreehugger Robot	SuperType* input = this->get_super();
277*16467b97STreehugger Robot
278*16467b97STreehugger Robot    if	(( input->get_nextChar() + la - 1) >= ( input->get_data() + input->get_sizeBuf() ))
279*16467b97STreehugger Robot    {
280*16467b97STreehugger Robot        return	ANTLR_CHARSTREAM_EOF;
281*16467b97STreehugger Robot    }
282*16467b97STreehugger Robot    else
283*16467b97STreehugger Robot    {
284*16467b97STreehugger Robot        // Translate the required character via the constant conversion table
285*16467b97STreehugger Robot        //
286*16467b97STreehugger Robot        return	e2a[(*(input->get_nextChar() + la - 1))];
287*16467b97STreehugger Robot    }
288*16467b97STreehugger Robot}
289*16467b97STreehugger Robot
290*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
291*16467b97STreehugger Robotvoid EBCDIC_IntStream<ImplTraits, SuperType>::setupIntStream()
292*16467b97STreehugger Robot{
293*16467b97STreehugger Robot	SuperType* super = this->get_super();
294*16467b97STreehugger Robot	super->set_charByteSize(1);
295*16467b97STreehugger Robot}
296*16467b97STreehugger Robot
297*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
298*16467b97STreehugger RobotANTLR_UINT32	UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 i)
299*16467b97STreehugger Robot{
300*16467b97STreehugger Robot	return this->_LA(i, ClassForwarder< typename ImplTraits::Endianness >() );
301*16467b97STreehugger Robot}
302*16467b97STreehugger Robot
303*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
304*16467b97STreehugger Robotvoid UTF16_IntStream<ImplTraits, SuperType>::consume()
305*16467b97STreehugger Robot{
306*16467b97STreehugger Robot	this->consume( ClassForwarder< typename ImplTraits::Endianness >() );
307*16467b97STreehugger Robot}
308*16467b97STreehugger Robot
309*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
310*16467b97STreehugger RobotANTLR_MARKER	UTF16_IntStream<ImplTraits, SuperType>::index()
311*16467b97STreehugger Robot{
312*16467b97STreehugger Robot	SuperType* input = this->get_super();
313*16467b97STreehugger Robot    return  (ANTLR_MARKER)(input->get_nextChar());
314*16467b97STreehugger Robot}
315*16467b97STreehugger Robot
316*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
317*16467b97STreehugger Robotvoid UTF16_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
318*16467b97STreehugger Robot{
319*16467b97STreehugger Robot	SuperType* input = this->get_super();
320*16467b97STreehugger Robot
321*16467b97STreehugger Robot	// If the requested seek point is less than the current
322*16467b97STreehugger Robot	// input point, then we assume that we are resetting from a mark
323*16467b97STreehugger Robot	// and do not need to scan, but can just set to there as rewind will
324*16467b97STreehugger Robot    // reset line numbers and so on.
325*16467b97STreehugger Robot	//
326*16467b97STreehugger Robot	if	(seekPoint <= (ANTLR_MARKER)(input->get_nextChar()))
327*16467b97STreehugger Robot	{
328*16467b97STreehugger Robot		input->set_nextChar( seekPoint );
329*16467b97STreehugger Robot	}
330*16467b97STreehugger Robot	else
331*16467b97STreehugger Robot	{
332*16467b97STreehugger Robot        // Call consume until we reach the asked for seek point or EOF
333*16467b97STreehugger Robot        //
334*16467b97STreehugger Robot        while( (this->_LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar() ) )
335*16467b97STreehugger Robot	    {
336*16467b97STreehugger Robot			this->consume();
337*16467b97STreehugger Robot	    }
338*16467b97STreehugger Robot	}
339*16467b97STreehugger Robot}
340*16467b97STreehugger Robot
341*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
342*16467b97STreehugger Robotvoid IntStream<ImplTraits, SuperType>::findout_endian_spec(bool machineBigEndian, bool inputBigEndian)
343*16467b97STreehugger Robot{
344*16467b97STreehugger Robot	// We must install different UTF16 routines according to whether the input
345*16467b97STreehugger Robot	// is the same endianess as the machine we are executing upon or not. If it is not
346*16467b97STreehugger Robot	// then we must install methods that can convert the endianess on the fly as they go
347*16467b97STreehugger Robot	//
348*16467b97STreehugger Robot
349*16467b97STreehugger Robot	if(machineBigEndian == true)
350*16467b97STreehugger Robot	{
351*16467b97STreehugger Robot		// Machine is Big Endian, if the input is also then install the
352*16467b97STreehugger Robot		// methods that do not access input by bytes and reverse them.
353*16467b97STreehugger Robot		// Otherwise install endian aware methods.
354*16467b97STreehugger Robot		//
355*16467b97STreehugger Robot		if  (inputBigEndian == true)
356*16467b97STreehugger Robot		{
357*16467b97STreehugger Robot			// Input is machine compatible
358*16467b97STreehugger Robot			//
359*16467b97STreehugger Robot			m_endian_spec = 1;
360*16467b97STreehugger Robot		}
361*16467b97STreehugger Robot		else
362*16467b97STreehugger Robot		{
363*16467b97STreehugger Robot			// Need to use methods that know that the input is little endian
364*16467b97STreehugger Robot			//
365*16467b97STreehugger Robot			m_endian_spec = 2;
366*16467b97STreehugger Robot		}
367*16467b97STreehugger Robot	}
368*16467b97STreehugger Robot	else
369*16467b97STreehugger Robot	{
370*16467b97STreehugger Robot		// Machine is Little Endian, if the input is also then install the
371*16467b97STreehugger Robot		// methods that do not access input by bytes and reverse them.
372*16467b97STreehugger Robot		// Otherwise install endian aware methods.
373*16467b97STreehugger Robot		//
374*16467b97STreehugger Robot		if  (inputBigEndian == false)
375*16467b97STreehugger Robot		{
376*16467b97STreehugger Robot			// Input is machine compatible
377*16467b97STreehugger Robot			//
378*16467b97STreehugger Robot			m_endian_spec =  1;
379*16467b97STreehugger Robot		}
380*16467b97STreehugger Robot		else
381*16467b97STreehugger Robot		{
382*16467b97STreehugger Robot			// Need to use methods that know that the input is Big Endian
383*16467b97STreehugger Robot			//
384*16467b97STreehugger Robot			m_endian_spec	= 3;
385*16467b97STreehugger Robot		}
386*16467b97STreehugger Robot	}
387*16467b97STreehugger Robot}
388*16467b97STreehugger Robot
389*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
390*16467b97STreehugger Robotvoid UTF16_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian)
391*16467b97STreehugger Robot{
392*16467b97STreehugger Robot	SuperType* super = this->get_super();
393*16467b97STreehugger Robot	super->set_charByteSize(2);
394*16467b97STreehugger Robot
395*16467b97STreehugger Robot	this->findout_endian_spec( machineBigEndian, inputBigEndian );
396*16467b97STreehugger Robot}
397*16467b97STreehugger Robot
398*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
399*16467b97STreehugger RobotANTLR_UINT32 IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 i, ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> )
400*16467b97STreehugger Robot{
401*16467b97STreehugger Robot	assert( (m_endian_spec >= 1) && (m_endian_spec <= 3));
402*16467b97STreehugger Robot	switch(m_endian_spec)
403*16467b97STreehugger Robot	{
404*16467b97STreehugger Robot	case 1:
405*16467b97STreehugger Robot		return this->_LA(i, ClassForwarder<BYTE_AGNOSTIC>() );
406*16467b97STreehugger Robot		break;
407*16467b97STreehugger Robot	case 2:
408*16467b97STreehugger Robot		return this->_LA(i, ClassForwarder<ANTLR_LITTLE_ENDIAN>() );
409*16467b97STreehugger Robot		break;
410*16467b97STreehugger Robot	case 3:
411*16467b97STreehugger Robot		return this->_LA(i, ClassForwarder<ANTLR_BIG_ENDIAN>() );
412*16467b97STreehugger Robot		break;
413*16467b97STreehugger Robot	default:
414*16467b97STreehugger Robot		break;
415*16467b97STreehugger Robot	}
416*16467b97STreehugger Robot	return 0;
417*16467b97STreehugger Robot}
418*16467b97STreehugger Robot
419*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
420*16467b97STreehugger Robotvoid	IntStream<ImplTraits, SuperType>::consume( ClassForwarder<RESOLVE_ENDIAN_AT_RUNTIME> )
421*16467b97STreehugger Robot{
422*16467b97STreehugger Robot	assert( (m_endian_spec >= 1) && (m_endian_spec <= 3));
423*16467b97STreehugger Robot	switch(m_endian_spec)
424*16467b97STreehugger Robot	{
425*16467b97STreehugger Robot	case 1:
426*16467b97STreehugger Robot		this->consume( ClassForwarder<BYTE_AGNOSTIC>() );
427*16467b97STreehugger Robot		break;
428*16467b97STreehugger Robot	case 2:
429*16467b97STreehugger Robot		this->consume( ClassForwarder<ANTLR_LITTLE_ENDIAN>() );
430*16467b97STreehugger Robot		break;
431*16467b97STreehugger Robot	case 3:
432*16467b97STreehugger Robot		this->consume( ClassForwarder<ANTLR_BIG_ENDIAN>() );
433*16467b97STreehugger Robot		break;
434*16467b97STreehugger Robot	default:
435*16467b97STreehugger Robot		break;
436*16467b97STreehugger Robot	}
437*16467b97STreehugger Robot}
438*16467b97STreehugger Robot
439*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
440*16467b97STreehugger RobotANTLR_UINT32	UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> )
441*16467b97STreehugger Robot{
442*16467b97STreehugger Robot	SuperType* input;
443*16467b97STreehugger Robot    UTF32   ch;
444*16467b97STreehugger Robot    UTF32   ch2;
445*16467b97STreehugger Robot    UTF16*	nextChar;
446*16467b97STreehugger Robot
447*16467b97STreehugger Robot    // Find the input interface and where we are currently pointing to
448*16467b97STreehugger Robot    // in the input stream
449*16467b97STreehugger Robot    //
450*16467b97STreehugger Robot	input   = this->get_super;
451*16467b97STreehugger Robot	nextChar    = input->get_nextChar();
452*16467b97STreehugger Robot
453*16467b97STreehugger Robot    // If a positive offset then advance forward, else retreat
454*16467b97STreehugger Robot    //
455*16467b97STreehugger Robot    if  (la >= 0)
456*16467b97STreehugger Robot    {
457*16467b97STreehugger Robot        while   (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
458*16467b97STreehugger Robot        {
459*16467b97STreehugger Robot            // Advance our copy of the input pointer
460*16467b97STreehugger Robot            //
461*16467b97STreehugger Robot            // Next char in natural machine byte order
462*16467b97STreehugger Robot            //
463*16467b97STreehugger Robot            ch  = *nextChar++;
464*16467b97STreehugger Robot
465*16467b97STreehugger Robot            // If we have a surrogate pair then we need to consume
466*16467b97STreehugger Robot            // a following valid LO surrogate.
467*16467b97STreehugger Robot            //
468*16467b97STreehugger Robot            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
469*16467b97STreehugger Robot            {
470*16467b97STreehugger Robot                // If the 16 bits following the high surrogate are in the source buffer...
471*16467b97STreehugger Robot                //
472*16467b97STreehugger Robot                if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
473*16467b97STreehugger Robot                {
474*16467b97STreehugger Robot                    // Next character is in natural machine byte order
475*16467b97STreehugger Robot                    //
476*16467b97STreehugger Robot                    ch2 = *nextChar;
477*16467b97STreehugger Robot
478*16467b97STreehugger Robot                    // If it's a valid low surrogate, consume it
479*16467b97STreehugger Robot                    //
480*16467b97STreehugger Robot                    if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
481*16467b97STreehugger Robot                    {
482*16467b97STreehugger Robot                        // We consumed one 16 bit character
483*16467b97STreehugger Robot                        //
484*16467b97STreehugger Robot						nextChar++;
485*16467b97STreehugger Robot                    }
486*16467b97STreehugger Robot                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
487*16467b97STreehugger Robot                    // it.
488*16467b97STreehugger Robot                    //
489*16467b97STreehugger Robot                }
490*16467b97STreehugger Robot                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
491*16467b97STreehugger Robot                // it because the buffer ended
492*16467b97STreehugger Robot                //
493*16467b97STreehugger Robot            }
494*16467b97STreehugger Robot            // Note that we did not check for an invalid low surrogate here, or that fact that the
495*16467b97STreehugger Robot            // lo surrogate was missing. We just picked out one 16 bit character unless the character
496*16467b97STreehugger Robot            // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
497*16467b97STreehugger Robot            //
498*16467b97STreehugger Robot        }
499*16467b97STreehugger Robot    }
500*16467b97STreehugger Robot    else
501*16467b97STreehugger Robot    {
502*16467b97STreehugger Robot        // We need to go backwards from our input point
503*16467b97STreehugger Robot        //
504*16467b97STreehugger Robot        while   (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
505*16467b97STreehugger Robot        {
506*16467b97STreehugger Robot            // Get the previous 16 bit character
507*16467b97STreehugger Robot            //
508*16467b97STreehugger Robot            ch = *--nextChar;
509*16467b97STreehugger Robot
510*16467b97STreehugger Robot            // If we found a low surrogate then go back one more character if
511*16467b97STreehugger Robot            // the hi surrogate is there
512*16467b97STreehugger Robot            //
513*16467b97STreehugger Robot            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
514*16467b97STreehugger Robot            {
515*16467b97STreehugger Robot                ch2 = *(nextChar-1);
516*16467b97STreehugger Robot                if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
517*16467b97STreehugger Robot                {
518*16467b97STreehugger Robot                    // Yes, there is a high surrogate to match it so decrement one more and point to that
519*16467b97STreehugger Robot                    //
520*16467b97STreehugger Robot                    nextChar--;
521*16467b97STreehugger Robot                }
522*16467b97STreehugger Robot            }
523*16467b97STreehugger Robot        }
524*16467b97STreehugger Robot    }
525*16467b97STreehugger Robot
526*16467b97STreehugger Robot    // Our local copy of nextChar is now pointing to either the correct character or end of file
527*16467b97STreehugger Robot    //
528*16467b97STreehugger Robot    // Input buffer size is always in bytes
529*16467b97STreehugger Robot    //
530*16467b97STreehugger Robot	if	( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
531*16467b97STreehugger Robot	{
532*16467b97STreehugger Robot		return	ANTLR_CHARSTREAM_EOF;
533*16467b97STreehugger Robot	}
534*16467b97STreehugger Robot	else
535*16467b97STreehugger Robot	{
536*16467b97STreehugger Robot        // Pick up the next 16 character (native machine byte order)
537*16467b97STreehugger Robot        //
538*16467b97STreehugger Robot        ch = *nextChar++;
539*16467b97STreehugger Robot
540*16467b97STreehugger Robot        // If we have a surrogate pair then we need to consume
541*16467b97STreehugger Robot        // a following valid LO surrogate.
542*16467b97STreehugger Robot        //
543*16467b97STreehugger Robot        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
544*16467b97STreehugger Robot        {
545*16467b97STreehugger Robot            // If the 16 bits following the high surrogate are in the source buffer...
546*16467b97STreehugger Robot            //
547*16467b97STreehugger Robot            if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
548*16467b97STreehugger Robot            {
549*16467b97STreehugger Robot                // Next character is in natural machine byte order
550*16467b97STreehugger Robot                //
551*16467b97STreehugger Robot                ch2 = *nextChar;
552*16467b97STreehugger Robot
553*16467b97STreehugger Robot                // If it's a valid low surrogate, consume it
554*16467b97STreehugger Robot                //
555*16467b97STreehugger Robot                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
556*16467b97STreehugger Robot                {
557*16467b97STreehugger Robot                    // Construct the UTF32 code point
558*16467b97STreehugger Robot                    //
559*16467b97STreehugger Robot                    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
560*16467b97STreehugger Robot								+ (ch2 - UNI_SUR_LOW_START) + halfBase;
561*16467b97STreehugger Robot                }
562*16467b97STreehugger Robot                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
563*16467b97STreehugger Robot                // it.
564*16467b97STreehugger Robot                //
565*16467b97STreehugger Robot            }
566*16467b97STreehugger Robot            // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
567*16467b97STreehugger Robot            // it because the buffer ended
568*16467b97STreehugger Robot            //
569*16467b97STreehugger Robot        }
570*16467b97STreehugger Robot    }
571*16467b97STreehugger Robot    return ch;
572*16467b97STreehugger Robot}
573*16467b97STreehugger Robot
574*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
575*16467b97STreehugger RobotANTLR_UINT32	UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> )
576*16467b97STreehugger Robot{
577*16467b97STreehugger Robot	SuperType* input;
578*16467b97STreehugger Robot    UTF32           ch;
579*16467b97STreehugger Robot    UTF32           ch2;
580*16467b97STreehugger Robot    ANTLR_UCHAR*   nextChar;
581*16467b97STreehugger Robot
582*16467b97STreehugger Robot    // Find the input interface and where we are currently pointing to
583*16467b97STreehugger Robot    // in the input stream
584*16467b97STreehugger Robot    //
585*16467b97STreehugger Robot	input       = this->get_super();
586*16467b97STreehugger Robot    nextChar    = input->get_nextChar();
587*16467b97STreehugger Robot
588*16467b97STreehugger Robot    // If a positive offset then advance forward, else retreat
589*16467b97STreehugger Robot    //
590*16467b97STreehugger Robot    if  (la >= 0)
591*16467b97STreehugger Robot    {
592*16467b97STreehugger Robot        while   (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
593*16467b97STreehugger Robot        {
594*16467b97STreehugger Robot            // Advance our copy of the input pointer
595*16467b97STreehugger Robot            //
596*16467b97STreehugger Robot            // Next char in Little Endian byte order
597*16467b97STreehugger Robot            //
598*16467b97STreehugger Robot            ch  = (*nextChar) + (*(nextChar+1) << 8);
599*16467b97STreehugger Robot            nextChar += 2;
600*16467b97STreehugger Robot
601*16467b97STreehugger Robot            // If we have a surrogate pair then we need to consume
602*16467b97STreehugger Robot            // a following valid LO surrogate.
603*16467b97STreehugger Robot            //
604*16467b97STreehugger Robot            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
605*16467b97STreehugger Robot            {
606*16467b97STreehugger Robot                // If the 16 bits following the high surrogate are in the source buffer...
607*16467b97STreehugger Robot                //
608*16467b97STreehugger Robot                if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() ))
609*16467b97STreehugger Robot                {
610*16467b97STreehugger Robot                    // Next character is in little endian byte order
611*16467b97STreehugger Robot                    //
612*16467b97STreehugger Robot                    ch2 = (*nextChar) + (*(nextChar+1) << 8);
613*16467b97STreehugger Robot
614*16467b97STreehugger Robot                    // If it's a valid low surrogate, consume it
615*16467b97STreehugger Robot                    //
616*16467b97STreehugger Robot                    if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
617*16467b97STreehugger Robot                    {
618*16467b97STreehugger Robot                        // We consumed one 16 bit character
619*16467b97STreehugger Robot                        //
620*16467b97STreehugger Robot						nextChar += 2;
621*16467b97STreehugger Robot                    }
622*16467b97STreehugger Robot                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
623*16467b97STreehugger Robot                    // it.
624*16467b97STreehugger Robot                    //
625*16467b97STreehugger Robot                }
626*16467b97STreehugger Robot                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
627*16467b97STreehugger Robot                // it because the buffer ended
628*16467b97STreehugger Robot                //
629*16467b97STreehugger Robot            }
630*16467b97STreehugger Robot            // Note that we did not check for an invalid low surrogate here, or that fact that the
631*16467b97STreehugger Robot            // lo surrogate was missing. We just picked out one 16 bit character unless the character
632*16467b97STreehugger Robot            // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
633*16467b97STreehugger Robot            //
634*16467b97STreehugger Robot        }
635*16467b97STreehugger Robot    }
636*16467b97STreehugger Robot    else
637*16467b97STreehugger Robot    {
638*16467b97STreehugger Robot        // We need to go backwards from our input point
639*16467b97STreehugger Robot        //
640*16467b97STreehugger Robot        while   (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
641*16467b97STreehugger Robot        {
642*16467b97STreehugger Robot            // Get the previous 16 bit character
643*16467b97STreehugger Robot            //
644*16467b97STreehugger Robot            ch = (*nextChar - 2) + ((*nextChar -1) << 8);
645*16467b97STreehugger Robot            nextChar -= 2;
646*16467b97STreehugger Robot
647*16467b97STreehugger Robot            // If we found a low surrogate then go back one more character if
648*16467b97STreehugger Robot            // the hi surrogate is there
649*16467b97STreehugger Robot            //
650*16467b97STreehugger Robot            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
651*16467b97STreehugger Robot            {
652*16467b97STreehugger Robot                ch2 = (*nextChar - 2) + ((*nextChar -1) << 8);
653*16467b97STreehugger Robot                if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
654*16467b97STreehugger Robot                {
655*16467b97STreehugger Robot                    // Yes, there is a high surrogate to match it so decrement one more and point to that
656*16467b97STreehugger Robot                    //
657*16467b97STreehugger Robot                    nextChar -=2;
658*16467b97STreehugger Robot                }
659*16467b97STreehugger Robot            }
660*16467b97STreehugger Robot        }
661*16467b97STreehugger Robot    }
662*16467b97STreehugger Robot
663*16467b97STreehugger Robot    // Our local copy of nextChar is now pointing to either the correct character or end of file
664*16467b97STreehugger Robot    //
665*16467b97STreehugger Robot    // Input buffer size is always in bytes
666*16467b97STreehugger Robot    //
667*16467b97STreehugger Robot	if	( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
668*16467b97STreehugger Robot	{
669*16467b97STreehugger Robot		return	ANTLR_CHARSTREAM_EOF;
670*16467b97STreehugger Robot	}
671*16467b97STreehugger Robot	else
672*16467b97STreehugger Robot	{
673*16467b97STreehugger Robot        // Pick up the next 16 character (little endian byte order)
674*16467b97STreehugger Robot        //
675*16467b97STreehugger Robot        ch = (*nextChar) + (*(nextChar+1) << 8);
676*16467b97STreehugger Robot        nextChar += 2;
677*16467b97STreehugger Robot
678*16467b97STreehugger Robot        // If we have a surrogate pair then we need to consume
679*16467b97STreehugger Robot        // a following valid LO surrogate.
680*16467b97STreehugger Robot        //
681*16467b97STreehugger Robot        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
682*16467b97STreehugger Robot        {
683*16467b97STreehugger Robot            // If the 16 bits following the high surrogate are in the source buffer...
684*16467b97STreehugger Robot            //
685*16467b97STreehugger Robot            if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
686*16467b97STreehugger Robot            {
687*16467b97STreehugger Robot                // Next character is in little endian byte order
688*16467b97STreehugger Robot                //
689*16467b97STreehugger Robot                ch2 = (*nextChar) + (*(nextChar+1) << 8);
690*16467b97STreehugger Robot
691*16467b97STreehugger Robot                // If it's a valid low surrogate, consume it
692*16467b97STreehugger Robot                //
693*16467b97STreehugger Robot                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
694*16467b97STreehugger Robot                {
695*16467b97STreehugger Robot                    // Construct the UTF32 code point
696*16467b97STreehugger Robot                    //
697*16467b97STreehugger Robot                    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
698*16467b97STreehugger Robot								+ (ch2 - UNI_SUR_LOW_START) + halfBase;
699*16467b97STreehugger Robot                }
700*16467b97STreehugger Robot                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
701*16467b97STreehugger Robot                // it.
702*16467b97STreehugger Robot                //
703*16467b97STreehugger Robot            }
704*16467b97STreehugger Robot            // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
705*16467b97STreehugger Robot            // it because the buffer ended
706*16467b97STreehugger Robot            //
707*16467b97STreehugger Robot        }
708*16467b97STreehugger Robot    }
709*16467b97STreehugger Robot    return ch;
710*16467b97STreehugger Robot}
711*16467b97STreehugger Robot
712*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
713*16467b97STreehugger RobotANTLR_UINT32	UTF16_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> )
714*16467b97STreehugger Robot{
715*16467b97STreehugger Robot	SuperType* input;
716*16467b97STreehugger Robot    UTF32           ch;
717*16467b97STreehugger Robot    UTF32           ch2;
718*16467b97STreehugger Robot    ANTLR_UCHAR*   nextChar;
719*16467b97STreehugger Robot
720*16467b97STreehugger Robot    // Find the input interface and where we are currently pointing to
721*16467b97STreehugger Robot    // in the input stream
722*16467b97STreehugger Robot    //
723*16467b97STreehugger Robot	input       = this->get_super();
724*16467b97STreehugger Robot    nextChar    = input->get_nextChar();
725*16467b97STreehugger Robot
726*16467b97STreehugger Robot    // If a positive offset then advance forward, else retreat
727*16467b97STreehugger Robot    //
728*16467b97STreehugger Robot    if  (la >= 0)
729*16467b97STreehugger Robot    {
730*16467b97STreehugger Robot        while   (--la > 0 && (ANTLR_UINT8*)nextChar < ((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf() )
731*16467b97STreehugger Robot        {
732*16467b97STreehugger Robot            // Advance our copy of the input pointer
733*16467b97STreehugger Robot            //
734*16467b97STreehugger Robot            // Next char in Big Endian byte order
735*16467b97STreehugger Robot            //
736*16467b97STreehugger Robot            ch  = ((*nextChar) << 8) + *(nextChar+1);
737*16467b97STreehugger Robot            nextChar += 2;
738*16467b97STreehugger Robot
739*16467b97STreehugger Robot            // If we have a surrogate pair then we need to consume
740*16467b97STreehugger Robot            // a following valid LO surrogate.
741*16467b97STreehugger Robot            //
742*16467b97STreehugger Robot            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
743*16467b97STreehugger Robot            {
744*16467b97STreehugger Robot                // If the 16 bits following the high surrogate are in the source buffer...
745*16467b97STreehugger Robot                //
746*16467b97STreehugger Robot                if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
747*16467b97STreehugger Robot                {
748*16467b97STreehugger Robot                    // Next character is in big endian byte order
749*16467b97STreehugger Robot                    //
750*16467b97STreehugger Robot                    ch2 = ((*nextChar) << 8) + *(nextChar+1);
751*16467b97STreehugger Robot
752*16467b97STreehugger Robot                    // If it's a valid low surrogate, consume it
753*16467b97STreehugger Robot                    //
754*16467b97STreehugger Robot                    if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
755*16467b97STreehugger Robot                    {
756*16467b97STreehugger Robot                        // We consumed one 16 bit character
757*16467b97STreehugger Robot                        //
758*16467b97STreehugger Robot						nextChar += 2;
759*16467b97STreehugger Robot                    }
760*16467b97STreehugger Robot                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
761*16467b97STreehugger Robot                    // it.
762*16467b97STreehugger Robot                    //
763*16467b97STreehugger Robot                }
764*16467b97STreehugger Robot                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
765*16467b97STreehugger Robot                // it because the buffer ended
766*16467b97STreehugger Robot                //
767*16467b97STreehugger Robot            }
768*16467b97STreehugger Robot            // Note that we did not check for an invalid low surrogate here, or that fact that the
769*16467b97STreehugger Robot            // lo surrogate was missing. We just picked out one 16 bit character unless the character
770*16467b97STreehugger Robot            // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
771*16467b97STreehugger Robot            //
772*16467b97STreehugger Robot        }
773*16467b97STreehugger Robot    }
774*16467b97STreehugger Robot    else
775*16467b97STreehugger Robot    {
776*16467b97STreehugger Robot        // We need to go backwards from our input point
777*16467b97STreehugger Robot        //
778*16467b97STreehugger Robot        while   (la++ < 0 && (ANTLR_UINT8*)nextChar > (ANTLR_UINT8*)input->get_data() )
779*16467b97STreehugger Robot        {
780*16467b97STreehugger Robot            // Get the previous 16 bit character
781*16467b97STreehugger Robot            //
782*16467b97STreehugger Robot            ch = ((*nextChar - 2) << 8) + (*nextChar -1);
783*16467b97STreehugger Robot            nextChar -= 2;
784*16467b97STreehugger Robot
785*16467b97STreehugger Robot            // If we found a low surrogate then go back one more character if
786*16467b97STreehugger Robot            // the hi surrogate is there
787*16467b97STreehugger Robot            //
788*16467b97STreehugger Robot            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
789*16467b97STreehugger Robot            {
790*16467b97STreehugger Robot                ch2 = ((*nextChar - 2) << 8) + (*nextChar -1);
791*16467b97STreehugger Robot                if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
792*16467b97STreehugger Robot                {
793*16467b97STreehugger Robot                    // Yes, there is a high surrogate to match it so decrement one more and point to that
794*16467b97STreehugger Robot                    //
795*16467b97STreehugger Robot                    nextChar -=2;
796*16467b97STreehugger Robot                }
797*16467b97STreehugger Robot            }
798*16467b97STreehugger Robot        }
799*16467b97STreehugger Robot    }
800*16467b97STreehugger Robot
801*16467b97STreehugger Robot    // Our local copy of nextChar is now pointing to either the correct character or end of file
802*16467b97STreehugger Robot    //
803*16467b97STreehugger Robot    // Input buffer size is always in bytes
804*16467b97STreehugger Robot    //
805*16467b97STreehugger Robot	if	( (ANTLR_UINT8*)nextChar >= (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
806*16467b97STreehugger Robot	{
807*16467b97STreehugger Robot		return	ANTLR_CHARSTREAM_EOF;
808*16467b97STreehugger Robot	}
809*16467b97STreehugger Robot	else
810*16467b97STreehugger Robot	{
811*16467b97STreehugger Robot        // Pick up the next 16 character (big endian byte order)
812*16467b97STreehugger Robot        //
813*16467b97STreehugger Robot        ch = ((*nextChar) << 8) + *(nextChar+1);
814*16467b97STreehugger Robot        nextChar += 2;
815*16467b97STreehugger Robot
816*16467b97STreehugger Robot        // If we have a surrogate pair then we need to consume
817*16467b97STreehugger Robot        // a following valid LO surrogate.
818*16467b97STreehugger Robot        //
819*16467b97STreehugger Robot        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
820*16467b97STreehugger Robot        {
821*16467b97STreehugger Robot            // If the 16 bits following the high surrogate are in the source buffer...
822*16467b97STreehugger Robot            //
823*16467b97STreehugger Robot            if	((ANTLR_UINT8*)(nextChar) < (((ANTLR_UINT8*)input->get_data()) + input->get_sizeBuf()))
824*16467b97STreehugger Robot            {
825*16467b97STreehugger Robot                // Next character is in big endian byte order
826*16467b97STreehugger Robot                //
827*16467b97STreehugger Robot                ch2 = ((*nextChar) << 8) + *(nextChar+1);
828*16467b97STreehugger Robot
829*16467b97STreehugger Robot                // If it's a valid low surrogate, consume it
830*16467b97STreehugger Robot                //
831*16467b97STreehugger Robot                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
832*16467b97STreehugger Robot                {
833*16467b97STreehugger Robot                    // Construct the UTF32 code point
834*16467b97STreehugger Robot                    //
835*16467b97STreehugger Robot                    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
836*16467b97STreehugger Robot								+ (ch2 - UNI_SUR_LOW_START) + halfBase;
837*16467b97STreehugger Robot                }
838*16467b97STreehugger Robot                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
839*16467b97STreehugger Robot                // it.
840*16467b97STreehugger Robot                //
841*16467b97STreehugger Robot            }
842*16467b97STreehugger Robot            // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
843*16467b97STreehugger Robot            // it because the buffer ended
844*16467b97STreehugger Robot            //
845*16467b97STreehugger Robot        }
846*16467b97STreehugger Robot    }
847*16467b97STreehugger Robot    return ch;
848*16467b97STreehugger Robot}
849*16467b97STreehugger Robot
850*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
851*16467b97STreehugger Robotvoid	UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<BYTE_AGNOSTIC> )
852*16467b97STreehugger Robot{
853*16467b97STreehugger Robot	SuperType* input;
854*16467b97STreehugger Robot    UTF32   ch;
855*16467b97STreehugger Robot    UTF32   ch2;
856*16467b97STreehugger Robot
857*16467b97STreehugger Robot	input   = this->get_super();
858*16467b97STreehugger Robot
859*16467b97STreehugger Robot    // Buffer size is always in bytes
860*16467b97STreehugger Robot    //
861*16467b97STreehugger Robot	if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
862*16467b97STreehugger Robot	{
863*16467b97STreehugger Robot		// Indicate one more character in this line
864*16467b97STreehugger Robot		//
865*16467b97STreehugger Robot		input->inc_charPositionInLine();
866*16467b97STreehugger Robot
867*16467b97STreehugger Robot		if  ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
868*16467b97STreehugger Robot		{
869*16467b97STreehugger Robot			// Reset for start of a new line of input
870*16467b97STreehugger Robot			//
871*16467b97STreehugger Robot			input->inc_line();
872*16467b97STreehugger Robot			input->set_charPositionInLine(0);
873*16467b97STreehugger Robot			input->set_currentLine( input->get_nextChar() + 1 );
874*16467b97STreehugger Robot		}
875*16467b97STreehugger Robot
876*16467b97STreehugger Robot		// Increment to next character position, accounting for any surrogates
877*16467b97STreehugger Robot		//
878*16467b97STreehugger Robot        // Next char in natural machine byte order
879*16467b97STreehugger Robot        //
880*16467b97STreehugger Robot        ch  = *(input->get_nextChar());
881*16467b97STreehugger Robot
882*16467b97STreehugger Robot        // We consumed one 16 bit character
883*16467b97STreehugger Robot        //
884*16467b97STreehugger Robot		input->set_nextChar( input->get_nextChar() + 1 );
885*16467b97STreehugger Robot
886*16467b97STreehugger Robot        // If we have a surrogate pair then we need to consume
887*16467b97STreehugger Robot        // a following valid LO surrogate.
888*16467b97STreehugger Robot        //
889*16467b97STreehugger Robot        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
890*16467b97STreehugger Robot
891*16467b97STreehugger Robot            // If the 16 bits following the high surrogate are in the source buffer...
892*16467b97STreehugger Robot            //
893*16467b97STreehugger Robot            if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
894*16467b97STreehugger Robot            {
895*16467b97STreehugger Robot                // Next character is in natural machine byte order
896*16467b97STreehugger Robot                //
897*16467b97STreehugger Robot                ch2 = *(input->get_nextChar());
898*16467b97STreehugger Robot
899*16467b97STreehugger Robot                // If it's a valid low surrogate, consume it
900*16467b97STreehugger Robot                //
901*16467b97STreehugger Robot                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
902*16467b97STreehugger Robot                {
903*16467b97STreehugger Robot                    // We consumed one 16 bit character
904*16467b97STreehugger Robot                    //
905*16467b97STreehugger Robot					input->set_nextChar( input->get_nextChar() + 1 );
906*16467b97STreehugger Robot                }
907*16467b97STreehugger Robot                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
908*16467b97STreehugger Robot                // it.
909*16467b97STreehugger Robot                //
910*16467b97STreehugger Robot            }
911*16467b97STreehugger Robot            // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
912*16467b97STreehugger Robot            // it because the buffer ended
913*16467b97STreehugger Robot            //
914*16467b97STreehugger Robot        }
915*16467b97STreehugger Robot        // Note that we did not check for an invalid low surrogate here, or that fact that the
916*16467b97STreehugger Robot        // lo surrogate was missing. We just picked out one 16 bit character unless the character
917*16467b97STreehugger Robot        // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
918*16467b97STreehugger Robot        //
919*16467b97STreehugger Robot	}
920*16467b97STreehugger Robot
921*16467b97STreehugger Robot}
922*16467b97STreehugger Robot
923*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
924*16467b97STreehugger Robotvoid	UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_LITTLE_ENDIAN> )
925*16467b97STreehugger Robot{
926*16467b97STreehugger Robot	SuperType* input;
927*16467b97STreehugger Robot    UTF32   ch;
928*16467b97STreehugger Robot    UTF32   ch2;
929*16467b97STreehugger Robot
930*16467b97STreehugger Robot	input   = this->get_super();
931*16467b97STreehugger Robot
932*16467b97STreehugger Robot    // Buffer size is always in bytes
933*16467b97STreehugger Robot    //
934*16467b97STreehugger Robot	if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
935*16467b97STreehugger Robot	{
936*16467b97STreehugger Robot		// Indicate one more character in this line
937*16467b97STreehugger Robot		//
938*16467b97STreehugger Robot		input->inc_charPositionInLine();
939*16467b97STreehugger Robot
940*16467b97STreehugger Robot		if  ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
941*16467b97STreehugger Robot		{
942*16467b97STreehugger Robot			// Reset for start of a new line of input
943*16467b97STreehugger Robot			//
944*16467b97STreehugger Robot			input->inc_line();
945*16467b97STreehugger Robot			input->set_charPositionInLine(0);
946*16467b97STreehugger Robot			input->set_currentLine(input->get_nextChar() + 1);
947*16467b97STreehugger Robot		}
948*16467b97STreehugger Robot
949*16467b97STreehugger Robot		// Increment to next character position, accounting for any surrogates
950*16467b97STreehugger Robot		//
951*16467b97STreehugger Robot        // Next char in litle endian form
952*16467b97STreehugger Robot        //
953*16467b97STreehugger Robot        ch  = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8);
954*16467b97STreehugger Robot
955*16467b97STreehugger Robot        // We consumed one 16 bit character
956*16467b97STreehugger Robot        //
957*16467b97STreehugger Robot		input->set_nextChar( input->get_nextChar() + 1);
958*16467b97STreehugger Robot
959*16467b97STreehugger Robot        // If we have a surrogate pair then we need to consume
960*16467b97STreehugger Robot        // a following valid LO surrogate.
961*16467b97STreehugger Robot        //
962*16467b97STreehugger Robot        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
963*16467b97STreehugger Robot		{
964*16467b97STreehugger Robot            // If the 16 bits following the high surrogate are in the source buffer...
965*16467b97STreehugger Robot            //
966*16467b97STreehugger Robot            if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
967*16467b97STreehugger Robot            {
968*16467b97STreehugger Robot                ch2 = *((ANTLR_UINT8*)input->get_nextChar()) + (*((ANTLR_UINT8*)input->get_nextChar() + 1) <<8);
969*16467b97STreehugger Robot
970*16467b97STreehugger Robot                // If it's a valid low surrogate, consume it
971*16467b97STreehugger Robot                //
972*16467b97STreehugger Robot                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
973*16467b97STreehugger Robot                {
974*16467b97STreehugger Robot                    // We consumed one 16 bit character
975*16467b97STreehugger Robot                    //
976*16467b97STreehugger Robot					input->set_nextChar( input->get_nextChar() + 1);
977*16467b97STreehugger Robot                }
978*16467b97STreehugger Robot                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
979*16467b97STreehugger Robot                // it.
980*16467b97STreehugger Robot                //
981*16467b97STreehugger Robot            }
982*16467b97STreehugger Robot            // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
983*16467b97STreehugger Robot            // it because the buffer ended
984*16467b97STreehugger Robot            //
985*16467b97STreehugger Robot        }
986*16467b97STreehugger Robot        // Note that we did not check for an invalid low surrogate here, or that fact that the
987*16467b97STreehugger Robot        // lo surrogate was missing. We just picked out one 16 bit character unless the character
988*16467b97STreehugger Robot        // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
989*16467b97STreehugger Robot        //
990*16467b97STreehugger Robot	}
991*16467b97STreehugger Robot}
992*16467b97STreehugger Robot
993*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
994*16467b97STreehugger Robotvoid	UTF16_IntStream<ImplTraits, SuperType>::consume( ClassForwarder<ANTLR_BIG_ENDIAN> )
995*16467b97STreehugger Robot{
996*16467b97STreehugger Robot	SuperType* input;
997*16467b97STreehugger Robot    UTF32   ch;
998*16467b97STreehugger Robot    UTF32   ch2;
999*16467b97STreehugger Robot
1000*16467b97STreehugger Robot	input   = this->get_super();
1001*16467b97STreehugger Robot
1002*16467b97STreehugger Robot    // Buffer size is always in bytes
1003*16467b97STreehugger Robot    //
1004*16467b97STreehugger Robot	if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
1005*16467b97STreehugger Robot	{
1006*16467b97STreehugger Robot		// Indicate one more character in this line
1007*16467b97STreehugger Robot		//
1008*16467b97STreehugger Robot		input->inc_charPositionInLine();
1009*16467b97STreehugger Robot
1010*16467b97STreehugger Robot		if  ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
1011*16467b97STreehugger Robot		{
1012*16467b97STreehugger Robot			// Reset for start of a new line of input
1013*16467b97STreehugger Robot			//
1014*16467b97STreehugger Robot			input->inc_line();
1015*16467b97STreehugger Robot			input->set_charPositionInLine(0);
1016*16467b97STreehugger Robot			input->set_currentLine(input->get_nextChar() + 1);
1017*16467b97STreehugger Robot		}
1018*16467b97STreehugger Robot
1019*16467b97STreehugger Robot		// Increment to next character position, accounting for any surrogates
1020*16467b97STreehugger Robot		//
1021*16467b97STreehugger Robot        // Next char in big endian form
1022*16467b97STreehugger Robot        //
1023*16467b97STreehugger Robot        ch  = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8);
1024*16467b97STreehugger Robot
1025*16467b97STreehugger Robot        // We consumed one 16 bit character
1026*16467b97STreehugger Robot        //
1027*16467b97STreehugger Robot		input->set_nextChar( input->get_nextChar() + 1);
1028*16467b97STreehugger Robot
1029*16467b97STreehugger Robot        // If we have a surrogate pair then we need to consume
1030*16467b97STreehugger Robot        // a following valid LO surrogate.
1031*16467b97STreehugger Robot        //
1032*16467b97STreehugger Robot        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
1033*16467b97STreehugger Robot		{
1034*16467b97STreehugger Robot            // If the 16 bits following the high surrogate are in the source buffer...
1035*16467b97STreehugger Robot            //
1036*16467b97STreehugger Robot            if(input->get_nextChar() < (input->get_data() + input->get_sizeBuf()/2) )
1037*16467b97STreehugger Robot            {
1038*16467b97STreehugger Robot                // Big endian
1039*16467b97STreehugger Robot                //
1040*16467b97STreehugger Robot                ch2 = *((ANTLR_UINT8*)input->get_nextChar() + 1) + (*((ANTLR_UINT8*)input->get_nextChar() ) <<8);
1041*16467b97STreehugger Robot
1042*16467b97STreehugger Robot                // If it's a valid low surrogate, consume it
1043*16467b97STreehugger Robot                //
1044*16467b97STreehugger Robot                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
1045*16467b97STreehugger Robot                {
1046*16467b97STreehugger Robot                    // We consumed one 16 bit character
1047*16467b97STreehugger Robot                    //
1048*16467b97STreehugger Robot					input->set_nextChar( input->get_nextChar() + 1);
1049*16467b97STreehugger Robot                }
1050*16467b97STreehugger Robot                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1051*16467b97STreehugger Robot                // it.
1052*16467b97STreehugger Robot                //
1053*16467b97STreehugger Robot            }
1054*16467b97STreehugger Robot            // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1055*16467b97STreehugger Robot            // it because the buffer ended
1056*16467b97STreehugger Robot            //
1057*16467b97STreehugger Robot        }
1058*16467b97STreehugger Robot        // Note that we did not check for an invalid low surrogate here, or that fact that the
1059*16467b97STreehugger Robot        // lo surrogate was missing. We just picked out one 16 bit character unless the character
1060*16467b97STreehugger Robot        // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
1061*16467b97STreehugger Robot        //
1062*16467b97STreehugger Robot	}
1063*16467b97STreehugger Robot}
1064*16467b97STreehugger Robot
1065*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1066*16467b97STreehugger RobotANTLR_UINT32	UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 i)
1067*16467b97STreehugger Robot{
1068*16467b97STreehugger Robot	return this->_LA( i, ClassForwarder<typename ImplTraits::Endianness>() );
1069*16467b97STreehugger Robot}
1070*16467b97STreehugger Robot
1071*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1072*16467b97STreehugger RobotANTLR_MARKER	UTF32_IntStream<ImplTraits, SuperType>::index()
1073*16467b97STreehugger Robot{
1074*16467b97STreehugger Robot	SuperType* input = this->get_super();
1075*16467b97STreehugger Robot    return  (ANTLR_MARKER)(input->get_nextChar());
1076*16467b97STreehugger Robot}
1077*16467b97STreehugger Robot
1078*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1079*16467b97STreehugger Robotvoid UTF32_IntStream<ImplTraits, SuperType>::seek(ANTLR_MARKER seekPoint)
1080*16467b97STreehugger Robot{
1081*16467b97STreehugger Robot	SuperType* input;
1082*16467b97STreehugger Robot
1083*16467b97STreehugger Robot	input   = this->get_super();
1084*16467b97STreehugger Robot
1085*16467b97STreehugger Robot	// If the requested seek point is less than the current
1086*16467b97STreehugger Robot	// input point, then we assume that we are resetting from a mark
1087*16467b97STreehugger Robot	// and do not need to scan, but can just set to there as rewind will
1088*16467b97STreehugger Robot        // reset line numbers and so on.
1089*16467b97STreehugger Robot	//
1090*16467b97STreehugger Robot	if	(seekPoint <= (ANTLR_MARKER)(input->get_nextChar()))
1091*16467b97STreehugger Robot	{
1092*16467b97STreehugger Robot		input->set_nextChar( static_cast<typename ImplTraits::DataType*>(seekPoint) );
1093*16467b97STreehugger Robot	}
1094*16467b97STreehugger Robot	else
1095*16467b97STreehugger Robot	{
1096*16467b97STreehugger Robot        // Call consume until we reach the asked for seek point or EOF
1097*16467b97STreehugger Robot        //
1098*16467b97STreehugger Robot        while( (this->_LA(1) != ANTLR_CHARSTREAM_EOF) && (seekPoint < (ANTLR_MARKER)input->get_nextChar()) )
1099*16467b97STreehugger Robot	    {
1100*16467b97STreehugger Robot			this->consume();
1101*16467b97STreehugger Robot	    }
1102*16467b97STreehugger Robot	}
1103*16467b97STreehugger Robot
1104*16467b97STreehugger Robot}
1105*16467b97STreehugger Robot
1106*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1107*16467b97STreehugger Robotvoid UTF32_IntStream<ImplTraits, SuperType>::setupIntStream(bool machineBigEndian, bool inputBigEndian)
1108*16467b97STreehugger Robot{
1109*16467b97STreehugger Robot	SuperType* super = this->get_super();
1110*16467b97STreehugger Robot	super->set_charByteSize(4);
1111*16467b97STreehugger Robot
1112*16467b97STreehugger Robot	this->findout_endian_spec(machineBigEndian, inputBigEndian);
1113*16467b97STreehugger Robot}
1114*16467b97STreehugger Robot
1115*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1116*16467b97STreehugger RobotANTLR_UINT32	UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<BYTE_AGNOSTIC> )
1117*16467b97STreehugger Robot{
1118*16467b97STreehugger Robot    SuperType* input = this->get_super();
1119*16467b97STreehugger Robot
1120*16467b97STreehugger Robot    if	(( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
1121*16467b97STreehugger Robot    {
1122*16467b97STreehugger Robot		return	ANTLR_CHARSTREAM_EOF;
1123*16467b97STreehugger Robot    }
1124*16467b97STreehugger Robot    else
1125*16467b97STreehugger Robot    {
1126*16467b97STreehugger Robot		return	(ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
1127*16467b97STreehugger Robot    }
1128*16467b97STreehugger Robot}
1129*16467b97STreehugger Robot
1130*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1131*16467b97STreehugger RobotANTLR_UINT32	UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_LITTLE_ENDIAN> )
1132*16467b97STreehugger Robot{
1133*16467b97STreehugger Robot	SuperType* input = this->get_super();
1134*16467b97STreehugger Robot
1135*16467b97STreehugger Robot    if	(( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
1136*16467b97STreehugger Robot    {
1137*16467b97STreehugger Robot		return	ANTLR_CHARSTREAM_EOF;
1138*16467b97STreehugger Robot    }
1139*16467b97STreehugger Robot    else
1140*16467b97STreehugger Robot    {
1141*16467b97STreehugger Robot        ANTLR_UCHAR   c;
1142*16467b97STreehugger Robot
1143*16467b97STreehugger Robot        c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
1144*16467b97STreehugger Robot
1145*16467b97STreehugger Robot        // Swap Endianess to Big Endian
1146*16467b97STreehugger Robot        //
1147*16467b97STreehugger Robot        return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24);
1148*16467b97STreehugger Robot    }
1149*16467b97STreehugger Robot}
1150*16467b97STreehugger Robot
1151*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1152*16467b97STreehugger RobotANTLR_UINT32	UTF32_IntStream<ImplTraits, SuperType>::_LA( ANTLR_INT32 la, ClassForwarder<ANTLR_BIG_ENDIAN> )
1153*16467b97STreehugger Robot{
1154*16467b97STreehugger Robot	SuperType* input = this->get_super();
1155*16467b97STreehugger Robot
1156*16467b97STreehugger Robot    if	(( input->get_nextChar() + la - 1) >= (input->get_data() + input->get_sizeBuf()/4 ))
1157*16467b97STreehugger Robot    {
1158*16467b97STreehugger Robot		return	ANTLR_CHARSTREAM_EOF;
1159*16467b97STreehugger Robot    }
1160*16467b97STreehugger Robot    else
1161*16467b97STreehugger Robot    {
1162*16467b97STreehugger Robot        ANTLR_UCHAR   c;
1163*16467b97STreehugger Robot
1164*16467b97STreehugger Robot        c = (ANTLR_UCHAR)(*(input->get_nextChar() + la - 1));
1165*16467b97STreehugger Robot
1166*16467b97STreehugger Robot        // Swap Endianess to Little Endian
1167*16467b97STreehugger Robot        //
1168*16467b97STreehugger Robot        return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24);
1169*16467b97STreehugger Robot    }
1170*16467b97STreehugger Robot}
1171*16467b97STreehugger Robot
1172*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1173*16467b97STreehugger Robotvoid	UTF32_IntStream<ImplTraits, SuperType>::consume()
1174*16467b97STreehugger Robot{
1175*16467b97STreehugger Robot	SuperType* input = this->get_super();
1176*16467b97STreehugger Robot
1177*16467b97STreehugger Robot    // SizeBuf is always in bytes
1178*16467b97STreehugger Robot    //
1179*16467b97STreehugger Robot	if	( input->get_nextChar()  < (input->get_data() + input->get_sizeBuf()/4 ))
1180*16467b97STreehugger Robot    {
1181*16467b97STreehugger Robot		/* Indicate one more character in this line
1182*16467b97STreehugger Robot		 */
1183*16467b97STreehugger Robot		input->inc_charPositionInLine();
1184*16467b97STreehugger Robot
1185*16467b97STreehugger Robot		if  ((ANTLR_UCHAR)(*(input->get_nextChar())) == input->get_newlineChar())
1186*16467b97STreehugger Robot		{
1187*16467b97STreehugger Robot			/* Reset for start of a new line of input
1188*16467b97STreehugger Robot			 */
1189*16467b97STreehugger Robot			input->inc_line();
1190*16467b97STreehugger Robot			input->set_charPositionInLine(0);
1191*16467b97STreehugger Robot			input->set_currentLine(	input->get_nextChar() + 1 );
1192*16467b97STreehugger Robot		}
1193*16467b97STreehugger Robot
1194*16467b97STreehugger Robot		/* Increment to next character position
1195*16467b97STreehugger Robot		 */
1196*16467b97STreehugger Robot		input->set_nextChar( input->get_nextChar() + 1 );
1197*16467b97STreehugger Robot    }
1198*16467b97STreehugger Robot}
1199*16467b97STreehugger Robot
1200*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1201*16467b97STreehugger Robotvoid UTF8_IntStream<ImplTraits, SuperType>::setupIntStream(bool, bool)
1202*16467b97STreehugger Robot{
1203*16467b97STreehugger Robot	SuperType* super = this->get_super();
1204*16467b97STreehugger Robot	super->set_charByteSize(0);
1205*16467b97STreehugger Robot}
1206*16467b97STreehugger Robot
1207*16467b97STreehugger Robot// ------------------------------------------------------
1208*16467b97STreehugger Robot// Following is from Unicode.org (see antlr3convertutf.c)
1209*16467b97STreehugger Robot//
1210*16467b97STreehugger Robot
1211*16467b97STreehugger Robot/// Index into the table below with the first byte of a UTF-8 sequence to
1212*16467b97STreehugger Robot/// get the number of trailing bytes that are supposed to follow it.
1213*16467b97STreehugger Robot/// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
1214*16467b97STreehugger Robot/// left as-is for anyone who may want to do such conversion, which was
1215*16467b97STreehugger Robot/// allowed in earlier algorithms.
1216*16467b97STreehugger Robot///
1217*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1218*16467b97STreehugger Robotconst ANTLR_UINT32* UTF8_IntStream<ImplTraits, SuperType>::TrailingBytesForUTF8()
1219*16467b97STreehugger Robot{
1220*16467b97STreehugger Robot	static const ANTLR_UINT32 trailingBytesForUTF8[256] = {
1221*16467b97STreehugger Robot		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1222*16467b97STreehugger Robot		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1223*16467b97STreehugger Robot		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1224*16467b97STreehugger Robot		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1225*16467b97STreehugger Robot		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1226*16467b97STreehugger Robot		0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1227*16467b97STreehugger Robot		1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1228*16467b97STreehugger Robot		2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
1229*16467b97STreehugger Robot	};
1230*16467b97STreehugger Robot
1231*16467b97STreehugger Robot	return trailingBytesForUTF8;
1232*16467b97STreehugger Robot}
1233*16467b97STreehugger Robot
1234*16467b97STreehugger Robot/// Magic values subtracted from a buffer value during UTF8 conversion.
1235*16467b97STreehugger Robot/// This table contains as many values as there might be trailing bytes
1236*16467b97STreehugger Robot/// in a UTF-8 sequence.
1237*16467b97STreehugger Robot///
1238*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1239*16467b97STreehugger Robotconst UTF32* UTF8_IntStream<ImplTraits, SuperType>::OffsetsFromUTF8()
1240*16467b97STreehugger Robot{
1241*16467b97STreehugger Robot	static const UTF32 offsetsFromUTF8[6] =
1242*16467b97STreehugger Robot		{   0x00000000UL, 0x00003080UL, 0x000E2080UL,
1243*16467b97STreehugger Robot			0x03C82080UL, 0xFA082080UL, 0x82082080UL
1244*16467b97STreehugger Robot		};
1245*16467b97STreehugger Robot	return 	offsetsFromUTF8;
1246*16467b97STreehugger Robot}
1247*16467b97STreehugger Robot
1248*16467b97STreehugger Robot// End of Unicode.org tables
1249*16467b97STreehugger Robot// -------------------------
1250*16467b97STreehugger Robot
1251*16467b97STreehugger Robot
1252*16467b97STreehugger Robot/** \brief Consume the next character in a UTF8 input stream
1253*16467b97STreehugger Robot *
1254*16467b97STreehugger Robot * \param input Input stream context pointer
1255*16467b97STreehugger Robot */
1256*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1257*16467b97STreehugger Robotvoid UTF8_IntStream<ImplTraits, SuperType>::consume()
1258*16467b97STreehugger Robot{
1259*16467b97STreehugger Robot    SuperType* input = this->get_super();
1260*16467b97STreehugger Robot	const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8();
1261*16467b97STreehugger Robot	const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8();
1262*16467b97STreehugger Robot
1263*16467b97STreehugger Robot    ANTLR_UINT32           extraBytesToRead;
1264*16467b97STreehugger Robot    ANTLR_UCHAR            ch;
1265*16467b97STreehugger Robot    ANTLR_UINT8*           nextChar;
1266*16467b97STreehugger Robot
1267*16467b97STreehugger Robot    nextChar = input->get_nextChar();
1268*16467b97STreehugger Robot
1269*16467b97STreehugger Robot    if	(nextChar < (input->get_data() + input->get_sizeBuf()))
1270*16467b97STreehugger Robot    {
1271*16467b97STreehugger Robot		// Indicate one more character in this line
1272*16467b97STreehugger Robot		//
1273*16467b97STreehugger Robot		input->inc_charPositionInLine();
1274*16467b97STreehugger Robot
1275*16467b97STreehugger Robot        // Are there more bytes needed to make up the whole thing?
1276*16467b97STreehugger Robot        //
1277*16467b97STreehugger Robot        extraBytesToRead = trailingBytesForUTF8[*nextChar];
1278*16467b97STreehugger Robot
1279*16467b97STreehugger Robot        if	((nextChar + extraBytesToRead) >= (input->get_data() + input->get_sizeBuf()))
1280*16467b97STreehugger Robot        {
1281*16467b97STreehugger Robot            input->set_nextChar( input->get_data() + input->get_sizeBuf() );
1282*16467b97STreehugger Robot            return;
1283*16467b97STreehugger Robot        }
1284*16467b97STreehugger Robot
1285*16467b97STreehugger Robot        // Cases deliberately fall through (see note A in antlrconvertutf.c)
1286*16467b97STreehugger Robot        // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so
1287*16467b97STreehugger Robot        // we allow it.
1288*16467b97STreehugger Robot        //
1289*16467b97STreehugger Robot        ch  = 0;
1290*16467b97STreehugger Robot       	switch (extraBytesToRead)
1291*16467b97STreehugger Robot		{
1292*16467b97STreehugger Robot			case 5: ch += *nextChar++; ch <<= 6;
1293*16467b97STreehugger Robot			case 4: ch += *nextChar++; ch <<= 6;
1294*16467b97STreehugger Robot			case 3: ch += *nextChar++; ch <<= 6;
1295*16467b97STreehugger Robot			case 2: ch += *nextChar++; ch <<= 6;
1296*16467b97STreehugger Robot			case 1: ch += *nextChar++; ch <<= 6;
1297*16467b97STreehugger Robot			case 0: ch += *nextChar++;
1298*16467b97STreehugger Robot		}
1299*16467b97STreehugger Robot
1300*16467b97STreehugger Robot        // Magically correct the input value
1301*16467b97STreehugger Robot        //
1302*16467b97STreehugger Robot		ch -= offsetsFromUTF8[extraBytesToRead];
1303*16467b97STreehugger Robot		if  (ch == input->get_newlineChar())
1304*16467b97STreehugger Robot		{
1305*16467b97STreehugger Robot			/* Reset for start of a new line of input
1306*16467b97STreehugger Robot			 */
1307*16467b97STreehugger Robot			input->inc_line();
1308*16467b97STreehugger Robot			input->set_charPositionInLine(0);
1309*16467b97STreehugger Robot			input->set_currentLine(nextChar);
1310*16467b97STreehugger Robot		}
1311*16467b97STreehugger Robot
1312*16467b97STreehugger Robot        // Update input pointer
1313*16467b97STreehugger Robot        //
1314*16467b97STreehugger Robot        input->set_nextChar(nextChar);
1315*16467b97STreehugger Robot    }
1316*16467b97STreehugger Robot}
1317*16467b97STreehugger Robot
1318*16467b97STreehugger Robot/** \brief Return the input element assuming a UTF8 input
1319*16467b97STreehugger Robot *
1320*16467b97STreehugger Robot * \param[in] input Input stream context pointer
1321*16467b97STreehugger Robot * \param[in] la 1 based offset of next input stream element
1322*16467b97STreehugger Robot *
1323*16467b97STreehugger Robot * \return Next input character in internal ANTLR3 encoding (UTF32)
1324*16467b97STreehugger Robot */
1325*16467b97STreehugger Robottemplate<class ImplTraits, class SuperType>
1326*16467b97STreehugger RobotANTLR_UCHAR UTF8_IntStream<ImplTraits, SuperType>::_LA(ANTLR_INT32 la)
1327*16467b97STreehugger Robot{
1328*16467b97STreehugger Robot    SuperType* input = this->get_super();
1329*16467b97STreehugger Robot	const ANTLR_UINT32* trailingBytesForUTF8 = UTF8_IntStream::TrailingBytesForUTF8();
1330*16467b97STreehugger Robot	const UTF32* offsetsFromUTF8 = UTF8_IntStream::OffsetsFromUTF8();
1331*16467b97STreehugger Robot    ANTLR_UINT32           extraBytesToRead;
1332*16467b97STreehugger Robot    ANTLR_UCHAR            ch;
1333*16467b97STreehugger Robot    ANTLR_UINT8*           nextChar;
1334*16467b97STreehugger Robot
1335*16467b97STreehugger Robot    nextChar = input->get_nextChar();
1336*16467b97STreehugger Robot
1337*16467b97STreehugger Robot    // Do we need to traverse forwards or backwards?
1338*16467b97STreehugger Robot    // - LA(0) is treated as LA(1) and we assume that the nextChar is
1339*16467b97STreehugger Robot    //   already positioned.
1340*16467b97STreehugger Robot    // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding
1341*16467b97STreehugger Robot    // - LA(-n) means we must traverse backwards n chracters
1342*16467b97STreehugger Robot    //
1343*16467b97STreehugger Robot    if (la > 1) {
1344*16467b97STreehugger Robot
1345*16467b97STreehugger Robot        // Make sure that we have at least one character left before trying to
1346*16467b97STreehugger Robot        // loop through the buffer.
1347*16467b97STreehugger Robot        //
1348*16467b97STreehugger Robot        if	(nextChar < (input->get_data() + input->get_sizeBuf()))
1349*16467b97STreehugger Robot        {
1350*16467b97STreehugger Robot            // Now traverse n-1 characters forward
1351*16467b97STreehugger Robot            //
1352*16467b97STreehugger Robot            while (--la > 0)
1353*16467b97STreehugger Robot            {
1354*16467b97STreehugger Robot                // Does the next character require trailing bytes?
1355*16467b97STreehugger Robot                // If so advance the pointer by that many bytes as well as advancing
1356*16467b97STreehugger Robot                // one position for what will be at least a single byte character.
1357*16467b97STreehugger Robot                //
1358*16467b97STreehugger Robot                nextChar += trailingBytesForUTF8[*nextChar] + 1;
1359*16467b97STreehugger Robot
1360*16467b97STreehugger Robot                // Does that calculation take us past the byte length of the buffer?
1361*16467b97STreehugger Robot                //
1362*16467b97STreehugger Robot                if	(nextChar >= (input->get_data() + input->get_sizeBuf()))
1363*16467b97STreehugger Robot                {
1364*16467b97STreehugger Robot                    return ANTLR_CHARSTREAM_EOF;
1365*16467b97STreehugger Robot                }
1366*16467b97STreehugger Robot            }
1367*16467b97STreehugger Robot        }
1368*16467b97STreehugger Robot        else
1369*16467b97STreehugger Robot        {
1370*16467b97STreehugger Robot            return ANTLR_CHARSTREAM_EOF;
1371*16467b97STreehugger Robot        }
1372*16467b97STreehugger Robot    }
1373*16467b97STreehugger Robot    else
1374*16467b97STreehugger Robot    {
1375*16467b97STreehugger Robot        // LA is negative so we decrease the pointer by n character positions
1376*16467b97STreehugger Robot        //
1377*16467b97STreehugger Robot        while   (nextChar > input->get_data() && la++ < 0)
1378*16467b97STreehugger Robot        {
1379*16467b97STreehugger Robot            // Traversing backwards in UTF8 means decermenting by one
1380*16467b97STreehugger Robot            // then continuing to decrement while ever a character pattern
1381*16467b97STreehugger Robot            // is flagged as being a trailing byte of an encoded code point.
1382*16467b97STreehugger Robot            // Trailing UTF8 bytes always start with 10 in binary. We assumne that
1383*16467b97STreehugger Robot            // the UTF8 is well formed and do not check boundary conditions
1384*16467b97STreehugger Robot            //
1385*16467b97STreehugger Robot            nextChar--;
1386*16467b97STreehugger Robot            while ((*nextChar & 0xC0) == 0x80)
1387*16467b97STreehugger Robot            {
1388*16467b97STreehugger Robot                nextChar--;
1389*16467b97STreehugger Robot            }
1390*16467b97STreehugger Robot        }
1391*16467b97STreehugger Robot    }
1392*16467b97STreehugger Robot
1393*16467b97STreehugger Robot    // nextChar is now pointing at the UTF8 encoded character that we need to
1394*16467b97STreehugger Robot    // decode and return.
1395*16467b97STreehugger Robot    //
1396*16467b97STreehugger Robot    // Are there more bytes needed to make up the whole thing?
1397*16467b97STreehugger Robot    //
1398*16467b97STreehugger Robot    extraBytesToRead = trailingBytesForUTF8[*nextChar];
1399*16467b97STreehugger Robot    if	(nextChar + extraBytesToRead >= (input->get_data() + input->get_sizeBuf()))
1400*16467b97STreehugger Robot    {
1401*16467b97STreehugger Robot        return ANTLR_CHARSTREAM_EOF;
1402*16467b97STreehugger Robot    }
1403*16467b97STreehugger Robot
1404*16467b97STreehugger Robot    // Cases deliberately fall through (see note A in antlrconvertutf.c)
1405*16467b97STreehugger Robot    //
1406*16467b97STreehugger Robot    ch  = 0;
1407*16467b97STreehugger Robot    switch (extraBytesToRead)
1408*16467b97STreehugger Robot	{
1409*16467b97STreehugger Robot        case 5: ch += *nextChar++; ch <<= 6;
1410*16467b97STreehugger Robot        case 4: ch += *nextChar++; ch <<= 6;
1411*16467b97STreehugger Robot        case 3: ch += *nextChar++; ch <<= 6;
1412*16467b97STreehugger Robot        case 2: ch += *nextChar++; ch <<= 6;
1413*16467b97STreehugger Robot        case 1: ch += *nextChar++; ch <<= 6;
1414*16467b97STreehugger Robot        case 0: ch += *nextChar++;
1415*16467b97STreehugger Robot    }
1416*16467b97STreehugger Robot
1417*16467b97STreehugger Robot    // Magically correct the input value
1418*16467b97STreehugger Robot    //
1419*16467b97STreehugger Robot    ch -= offsetsFromUTF8[extraBytesToRead];
1420*16467b97STreehugger Robot
1421*16467b97STreehugger Robot    return ch;
1422*16467b97STreehugger Robot}
1423*16467b97STreehugger Robot
1424*16467b97STreehugger Robottemplate<class ImplTraits>
1425*16467b97STreehugger RobotTokenIntStream<ImplTraits>::TokenIntStream()
1426*16467b97STreehugger Robot{
1427*16467b97STreehugger Robot	m_cachedSize = 0;
1428*16467b97STreehugger Robot}
1429*16467b97STreehugger Robot
1430*16467b97STreehugger Robottemplate<class ImplTraits>
1431*16467b97STreehugger RobotANTLR_UINT32 TokenIntStream<ImplTraits>::get_cachedSize() const
1432*16467b97STreehugger Robot{
1433*16467b97STreehugger Robot	return m_cachedSize;
1434*16467b97STreehugger Robot}
1435*16467b97STreehugger Robot
1436*16467b97STreehugger Robottemplate<class ImplTraits>
1437*16467b97STreehugger Robotvoid TokenIntStream<ImplTraits>::set_cachedSize( ANTLR_UINT32 cachedSize )
1438*16467b97STreehugger Robot{
1439*16467b97STreehugger Robot	m_cachedSize = cachedSize;
1440*16467b97STreehugger Robot}
1441*16467b97STreehugger Robot
1442*16467b97STreehugger Robot/** Move the input pointer to the next incoming token.  The stream
1443*16467b97STreehugger Robot *  must become active with LT(1) available.  consume() simply
1444*16467b97STreehugger Robot *  moves the input pointer so that LT(1) points at the next
1445*16467b97STreehugger Robot *  input symbol. Consume at least one token.
1446*16467b97STreehugger Robot *
1447*16467b97STreehugger Robot *  Walk past any token not on the channel the parser is listening to.
1448*16467b97STreehugger Robot */
1449*16467b97STreehugger Robottemplate<class ImplTraits>
1450*16467b97STreehugger Robotvoid TokenIntStream<ImplTraits>::consume()
1451*16467b97STreehugger Robot{
1452*16467b97STreehugger Robot	TokenStreamType* cts = static_cast<TokenStreamType*>(this);
1453*16467b97STreehugger Robot
1454*16467b97STreehugger Robot    if((ANTLR_UINT32)cts->get_p() < m_cachedSize )
1455*16467b97STreehugger Robot	{
1456*16467b97STreehugger Robot		cts->inc_p();
1457*16467b97STreehugger Robot		cts->set_p( cts->skipOffTokenChannels(cts->get_p()) );
1458*16467b97STreehugger Robot	}
1459*16467b97STreehugger Robot}
1460*16467b97STreehugger Robottemplate<class ImplTraits>
1461*16467b97STreehugger Robotvoid  TokenIntStream<ImplTraits>::consumeInitialHiddenTokens()
1462*16467b97STreehugger Robot{
1463*16467b97STreehugger Robot	ANTLR_MARKER	first;
1464*16467b97STreehugger Robot	ANTLR_INT32	i;
1465*16467b97STreehugger Robot	TokenStreamType*	ts;
1466*16467b97STreehugger Robot
1467*16467b97STreehugger Robot	ts	    = this->get_super();
1468*16467b97STreehugger Robot	first	= this->index();
1469*16467b97STreehugger Robot
1470*16467b97STreehugger Robot	for	(i=0; i<first; i++)
1471*16467b97STreehugger Robot	{
1472*16467b97STreehugger Robot		ts->get_debugger()->consumeHiddenToken(ts->get(i));
1473*16467b97STreehugger Robot	}
1474*16467b97STreehugger Robot
1475*16467b97STreehugger Robot	ts->set_initialStreamState(false);
1476*16467b97STreehugger Robot}
1477*16467b97STreehugger Robot
1478*16467b97STreehugger Robot
1479*16467b97STreehugger Robottemplate<class ImplTraits>
1480*16467b97STreehugger RobotANTLR_UINT32	TokenIntStream<ImplTraits>::_LA( ANTLR_INT32 i )
1481*16467b97STreehugger Robot{
1482*16467b97STreehugger Robot	const CommonTokenType*    tok;
1483*16467b97STreehugger Robot	TokenStreamType*    ts	    = static_cast<TokenStreamType*>(this);
1484*16467b97STreehugger Robot
1485*16467b97STreehugger Robot	tok	    =  ts->_LT(i);
1486*16467b97STreehugger Robot
1487*16467b97STreehugger Robot	if	(tok != NULL)
1488*16467b97STreehugger Robot	{
1489*16467b97STreehugger Robot		return	tok->get_type();
1490*16467b97STreehugger Robot	}
1491*16467b97STreehugger Robot	else
1492*16467b97STreehugger Robot	{
1493*16467b97STreehugger Robot		return	CommonTokenType::TOKEN_INVALID;
1494*16467b97STreehugger Robot	}
1495*16467b97STreehugger Robot
1496*16467b97STreehugger Robot}
1497*16467b97STreehugger Robot
1498*16467b97STreehugger Robottemplate<class ImplTraits>
1499*16467b97STreehugger RobotANTLR_MARKER	TokenIntStream<ImplTraits>::mark()
1500*16467b97STreehugger Robot{
1501*16467b97STreehugger Robot    BaseType::m_lastMarker = this->index();
1502*16467b97STreehugger Robot    return  BaseType::m_lastMarker;
1503*16467b97STreehugger Robot}
1504*16467b97STreehugger Robot
1505*16467b97STreehugger Robottemplate<class ImplTraits>
1506*16467b97STreehugger RobotANTLR_UINT32 TokenIntStream<ImplTraits>::size()
1507*16467b97STreehugger Robot{
1508*16467b97STreehugger Robot    if (this->get_cachedSize() > 0)
1509*16467b97STreehugger Robot    {
1510*16467b97STreehugger Robot		return  this->get_cachedSize();
1511*16467b97STreehugger Robot    }
1512*16467b97STreehugger Robot    TokenStreamType* cts   = this->get_super();
1513*16467b97STreehugger Robot
1514*16467b97STreehugger Robot    this->set_cachedSize( static_cast<ANTLR_UINT32>(cts->get_tokens().size()) );
1515*16467b97STreehugger Robot    return  this->get_cachedSize();
1516*16467b97STreehugger Robot}
1517*16467b97STreehugger Robot
1518*16467b97STreehugger Robottemplate<class ImplTraits>
1519*16467b97STreehugger Robotvoid	TokenIntStream<ImplTraits>::release()
1520*16467b97STreehugger Robot{
1521*16467b97STreehugger Robot    return;
1522*16467b97STreehugger Robot}
1523*16467b97STreehugger Robot
1524*16467b97STreehugger Robottemplate<class ImplTraits>
1525*16467b97STreehugger RobotANTLR_MARKER   TokenIntStream<ImplTraits>::tindex()
1526*16467b97STreehugger Robot{
1527*16467b97STreehugger Robot	return this->get_super()->get_p();
1528*16467b97STreehugger Robot}
1529*16467b97STreehugger Robot
1530*16467b97STreehugger Robottemplate<class ImplTraits>
1531*16467b97STreehugger Robotvoid	TokenIntStream<ImplTraits>::rewindLast()
1532*16467b97STreehugger Robot{
1533*16467b97STreehugger Robot    this->rewind( this->get_lastMarker() );
1534*16467b97STreehugger Robot}
1535*16467b97STreehugger Robot
1536*16467b97STreehugger Robottemplate<class ImplTraits>
1537*16467b97STreehugger Robotvoid	TokenIntStream<ImplTraits>::rewind(ANTLR_MARKER marker)
1538*16467b97STreehugger Robot{
1539*16467b97STreehugger Robot	return this->seek(marker);
1540*16467b97STreehugger Robot}
1541*16467b97STreehugger Robot
1542*16467b97STreehugger Robottemplate<class ImplTraits>
1543*16467b97STreehugger Robotvoid	TokenIntStream<ImplTraits>::seek(ANTLR_MARKER index)
1544*16467b97STreehugger Robot{
1545*16467b97STreehugger Robot    TokenStreamType* cts = static_cast<TokenStreamType*>(this);
1546*16467b97STreehugger Robot
1547*16467b97STreehugger Robot    cts->set_p( static_cast<ANTLR_INT32>(index) );
1548*16467b97STreehugger Robot}
1549*16467b97STreehugger Robot
1550*16467b97STreehugger Robot
1551*16467b97STreehugger Robot/// Return a string that represents the name assoicated with the input source
1552*16467b97STreehugger Robot///
1553*16467b97STreehugger Robot/// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream.
1554*16467b97STreehugger Robot///
1555*16467b97STreehugger Robot/// /returns
1556*16467b97STreehugger Robot/// /implements ANTLR3_INT_STREAM_struct::getSourceName()
1557*16467b97STreehugger Robot///
1558*16467b97STreehugger Robottemplate<class ImplTraits>
1559*16467b97STreehugger Robottypename TokenIntStream<ImplTraits>::StringType
1560*16467b97STreehugger RobotTokenIntStream<ImplTraits>::getSourceName()
1561*16467b97STreehugger Robot{
1562*16467b97STreehugger Robot	// Slightly convoluted as we must trace back to the lexer's input source
1563*16467b97STreehugger Robot	// via the token source. The streamName that is here is not initialized
1564*16467b97STreehugger Robot	// because this is a token stream, not a file or string stream, which are the
1565*16467b97STreehugger Robot	// only things that have a context for a source name.
1566*16467b97STreehugger Robot	//
1567*16467b97STreehugger Robot	return this->get_super()->get_tokenSource()->get_fileName();
1568*16467b97STreehugger Robot}
1569*16467b97STreehugger Robot
1570*16467b97STreehugger Robottemplate<class ImplTraits>
1571*16467b97STreehugger Robotvoid  TreeNodeIntStream<ImplTraits>::consume()
1572*16467b97STreehugger Robot{
1573*16467b97STreehugger Robot	CommonTreeNodeStreamType* ctns = this->get_super();
1574*16467b97STreehugger Robot	if( ctns->get_p() == -1 )
1575*16467b97STreehugger Robot		ctns->fillBufferRoot();
1576*16467b97STreehugger Robot	ctns->inc_p();
1577*16467b97STreehugger Robot}
1578*16467b97STreehugger Robottemplate<class ImplTraits>
1579*16467b97STreehugger RobotANTLR_MARKER		TreeNodeIntStream<ImplTraits>::tindex()
1580*16467b97STreehugger Robot{
1581*16467b97STreehugger Robot	CommonTreeNodeStreamType* ctns = this->get_super();
1582*16467b97STreehugger Robot	return (ANTLR_MARKER)(ctns->get_p());
1583*16467b97STreehugger Robot}
1584*16467b97STreehugger Robot
1585*16467b97STreehugger Robottemplate<class ImplTraits>
1586*16467b97STreehugger RobotANTLR_UINT32		TreeNodeIntStream<ImplTraits>::_LA(ANTLR_INT32 i)
1587*16467b97STreehugger Robot{
1588*16467b97STreehugger Robot	CommonTreeNodeStreamType* tns	    = this->get_super();
1589*16467b97STreehugger Robot
1590*16467b97STreehugger Robot	// Ask LT for the 'token' at that position
1591*16467b97STreehugger Robot	//
1592*16467b97STreehugger Robot	TreeType* t = tns->_LT(i);
1593*16467b97STreehugger Robot
1594*16467b97STreehugger Robot	if	(t == NULL)
1595*16467b97STreehugger Robot	{
1596*16467b97STreehugger Robot		return	CommonTokenType::TOKEN_INVALID;
1597*16467b97STreehugger Robot	}
1598*16467b97STreehugger Robot
1599*16467b97STreehugger Robot	// Token node was there so return the type of it
1600*16467b97STreehugger Robot	//
1601*16467b97STreehugger Robot	return  t->get_type();
1602*16467b97STreehugger Robot}
1603*16467b97STreehugger Robot
1604*16467b97STreehugger Robottemplate<class ImplTraits>
1605*16467b97STreehugger RobotANTLR_MARKER	TreeNodeIntStream<ImplTraits>::mark()
1606*16467b97STreehugger Robot{
1607*16467b97STreehugger Robot	CommonTreeNodeStreamType* ctns	    = this->get_super();
1608*16467b97STreehugger Robot
1609*16467b97STreehugger Robot	if	(ctns->get_p() == -1)
1610*16467b97STreehugger Robot	{
1611*16467b97STreehugger Robot		ctns->fillBufferRoot();
1612*16467b97STreehugger Robot	}
1613*16467b97STreehugger Robot
1614*16467b97STreehugger Robot	// Return the current mark point
1615*16467b97STreehugger Robot	//
1616*16467b97STreehugger Robot	this->set_lastMarker( this->index() );
1617*16467b97STreehugger Robot
1618*16467b97STreehugger Robot	return this->get_lastMarker();
1619*16467b97STreehugger Robot
1620*16467b97STreehugger Robot}
1621*16467b97STreehugger Robot
1622*16467b97STreehugger Robottemplate<class ImplTraits>
1623*16467b97STreehugger Robotvoid  TreeNodeIntStream<ImplTraits>::release(ANTLR_MARKER marker)
1624*16467b97STreehugger Robot{
1625*16467b97STreehugger Robot
1626*16467b97STreehugger Robot}
1627*16467b97STreehugger Robot
1628*16467b97STreehugger Robottemplate<class ImplTraits>
1629*16467b97STreehugger Robotvoid TreeNodeIntStream<ImplTraits>::rewindMark(ANTLR_MARKER marker)
1630*16467b97STreehugger Robot{
1631*16467b97STreehugger Robot	this->seek(marker);
1632*16467b97STreehugger Robot}
1633*16467b97STreehugger Robot
1634*16467b97STreehugger Robottemplate<class ImplTraits>
1635*16467b97STreehugger Robotvoid TreeNodeIntStream<ImplTraits>::rewindLast()
1636*16467b97STreehugger Robot{
1637*16467b97STreehugger Robot	this->seek( this->get_lastMarker() );
1638*16467b97STreehugger Robot}
1639*16467b97STreehugger Robot
1640*16467b97STreehugger Robottemplate<class ImplTraits>
1641*16467b97STreehugger Robotvoid	TreeNodeIntStream<ImplTraits>::seek(ANTLR_MARKER index)
1642*16467b97STreehugger Robot{
1643*16467b97STreehugger Robot	CommonTreeNodeStreamType* ctns	    = this->get_super();
1644*16467b97STreehugger Robot	ctns->set_p( ANTLR_UINT32_CAST(index) );
1645*16467b97STreehugger Robot}
1646*16467b97STreehugger Robot
1647*16467b97STreehugger Robottemplate<class ImplTraits>
1648*16467b97STreehugger RobotANTLR_UINT32	TreeNodeIntStream<ImplTraits>::size()
1649*16467b97STreehugger Robot{
1650*16467b97STreehugger Robot	CommonTreeNodeStreamType* ctns	    = this->get_super();
1651*16467b97STreehugger Robot
1652*16467b97STreehugger Robot	if	(ctns->get_p() == -1)
1653*16467b97STreehugger Robot	{
1654*16467b97STreehugger Robot		ctns->fillBufferRoot();
1655*16467b97STreehugger Robot	}
1656*16467b97STreehugger Robot
1657*16467b97STreehugger Robot	return ctns->get_nodes().size();
1658*16467b97STreehugger Robot}
1659*16467b97STreehugger Robot
1660*16467b97STreehugger Robot
1661*16467b97STreehugger RobotANTLR_END_NAMESPACE()
1662