xref: /aosp_15_r20/external/antlr/runtime/Cpp/include/antlr3input.inl (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger RobotANTLR_BEGIN_NAMESPACE()
2*16467b97STreehugger Robot
3*16467b97STreehugger Robottemplate<class ImplTraits>
4*16467b97STreehugger RobotInputStream<ImplTraits>::InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding)
5*16467b97STreehugger Robot{
6*16467b97STreehugger Robot    // First order of business is to read the file into some buffer space
7*16467b97STreehugger Robot    // as just straight 8 bit bytes. Then we will work out the encoding and
8*16467b97STreehugger Robot    // byte order and adjust the API functions that are installed for the
9*16467b97STreehugger Robot    // default 8Bit stream accordingly.
10*16467b97STreehugger Robot    //
11*16467b97STreehugger Robot    this->createFileStream(fileName);
12*16467b97STreehugger Robot
13*16467b97STreehugger Robot    // We have the data in memory now so we can deal with it according to
14*16467b97STreehugger Robot    // the encoding scheme we were given by the user.
15*16467b97STreehugger Robot    //
16*16467b97STreehugger Robot    m_encoding = encoding;
17*16467b97STreehugger Robot
18*16467b97STreehugger Robot    // Now we need to work out the endian type and install any
19*16467b97STreehugger Robot    // API functions that differ from 8Bit
20*16467b97STreehugger Robot    //
21*16467b97STreehugger Robot    this->setupInputStream();
22*16467b97STreehugger Robot
23*16467b97STreehugger Robot    // Now we can set up the file name
24*16467b97STreehugger Robot    //
25*16467b97STreehugger Robot    BaseType::m_streamName	= (const char* )fileName;
26*16467b97STreehugger Robot    m_fileName		= BaseType::m_streamName;
27*16467b97STreehugger Robot}
28*16467b97STreehugger Robot
29*16467b97STreehugger Robottemplate<class ImplTraits>
30*16467b97STreehugger RobotInputStream<ImplTraits>::InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name)
31*16467b97STreehugger Robot{
32*16467b97STreehugger Robot	// First order of business is to set up the stream and install the data pointer.
33*16467b97STreehugger Robot    // Then we will work out the encoding and byte order and adjust the API functions that are installed for the
34*16467b97STreehugger Robot    // default 8Bit stream accordingly.
35*16467b97STreehugger Robot    //
36*16467b97STreehugger Robot    this->createStringStream(data);
37*16467b97STreehugger Robot
38*16467b97STreehugger Robot    // Size (in bytes) of the given 'string'
39*16467b97STreehugger Robot    //
40*16467b97STreehugger Robot    m_sizeBuf		= size;
41*16467b97STreehugger Robot
42*16467b97STreehugger Robot    // We have the data in memory now so we can deal with it according to
43*16467b97STreehugger Robot    // the encoding scheme we were given by the user.
44*16467b97STreehugger Robot    //
45*16467b97STreehugger Robot    m_encoding = encoding;
46*16467b97STreehugger Robot
47*16467b97STreehugger Robot    // Now we need to work out the endian type and install any
48*16467b97STreehugger Robot    // API functions that differ from 8Bit
49*16467b97STreehugger Robot    //
50*16467b97STreehugger Robot    this->setupInputStream();
51*16467b97STreehugger Robot
52*16467b97STreehugger Robot    // Now we can set up the file name
53*16467b97STreehugger Robot    //
54*16467b97STreehugger Robot    BaseType::m_streamName	= (name == NULL ) ? "" : (const char*)name;
55*16467b97STreehugger Robot    m_fileName		= BaseType::m_streamName;
56*16467b97STreehugger Robot
57*16467b97STreehugger Robot}
58*16467b97STreehugger Robot
59*16467b97STreehugger Robottemplate<class ImplTraits>
60*16467b97STreehugger Robotvoid InputStream<ImplTraits>::createStringStream(const ANTLR_UINT8* data)
61*16467b97STreehugger Robot{
62*16467b97STreehugger Robot	if	(data == NULL)
63*16467b97STreehugger Robot	{
64*16467b97STreehugger Robot		ParseNullStringException ex;
65*16467b97STreehugger Robot		throw ex;
66*16467b97STreehugger Robot	}
67*16467b97STreehugger Robot
68*16467b97STreehugger Robot	// Structure was allocated correctly, now we can install the pointer
69*16467b97STreehugger Robot	//
70*16467b97STreehugger Robot    m_data             = data;
71*16467b97STreehugger Robot    m_isAllocated	   = false;
72*16467b97STreehugger Robot
73*16467b97STreehugger Robot	// Call the common 8 bit input stream handler
74*16467b97STreehugger Robot	// initialization.
75*16467b97STreehugger Robot	//
76*16467b97STreehugger Robot	this->genericSetupStream();
77*16467b97STreehugger Robot}
78*16467b97STreehugger Robot
79*16467b97STreehugger Robottemplate<class ImplTraits>
80*16467b97STreehugger Robotvoid InputStream<ImplTraits>::createFileStream(const ANTLR_UINT8* fileName)
81*16467b97STreehugger Robot{
82*16467b97STreehugger Robot	if	(fileName == NULL)
83*16467b97STreehugger Robot	{
84*16467b97STreehugger Robot		ParseFileAbsentException ex;
85*16467b97STreehugger Robot		throw ex;
86*16467b97STreehugger Robot	}
87*16467b97STreehugger Robot
88*16467b97STreehugger Robot	// Structure was allocated correctly, now we can read the file.
89*16467b97STreehugger Robot	//
90*16467b97STreehugger Robot	FileUtils<ImplTraits>::AntlrRead8Bit(this, fileName);
91*16467b97STreehugger Robot
92*16467b97STreehugger Robot	// Call the common 8 bit input stream handler
93*16467b97STreehugger Robot	// initialization.
94*16467b97STreehugger Robot	//
95*16467b97STreehugger Robot	this->genericSetupStream();
96*16467b97STreehugger Robot}
97*16467b97STreehugger Robot
98*16467b97STreehugger Robottemplate<class ImplTraits>
99*16467b97STreehugger Robotvoid InputStream<ImplTraits>::genericSetupStream()
100*16467b97STreehugger Robot{
101*16467b97STreehugger Robot	this->set_charByteSize(1);
102*16467b97STreehugger Robot
103*16467b97STreehugger Robot    /* Set up the input stream brand new
104*16467b97STreehugger Robot     */
105*16467b97STreehugger Robot    this->reset();
106*16467b97STreehugger Robot
107*16467b97STreehugger Robot    /* Install default line separator character (it can be replaced
108*16467b97STreehugger Robot     * by the grammar programmer later)
109*16467b97STreehugger Robot     */
110*16467b97STreehugger Robot    this->set_newLineChar((ANTLR_UCHAR)'\n');
111*16467b97STreehugger Robot}
112*16467b97STreehugger Robot
113*16467b97STreehugger Robottemplate<class ImplTraits>
114*16467b97STreehugger RobotInputStream<ImplTraits>::~InputStream()
115*16467b97STreehugger Robot{
116*16467b97STreehugger Robot	// Free the input stream buffer if we allocated it
117*16467b97STreehugger Robot    //
118*16467b97STreehugger Robot    if	(m_isAllocated && (m_data != NULL))
119*16467b97STreehugger Robot		AllocPolicyType::free((void*)m_data); //const_cast is required
120*16467b97STreehugger Robot}
121*16467b97STreehugger Robot
122*16467b97STreehugger Robottemplate<class ImplTraits>
123*16467b97STreehugger RobotANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_data() const
124*16467b97STreehugger Robot{
125*16467b97STreehugger Robot	return m_data;
126*16467b97STreehugger Robot}
127*16467b97STreehugger Robottemplate<class ImplTraits>
128*16467b97STreehugger RobotANTLR_INLINE bool InputStream<ImplTraits>::get_isAllocated() const
129*16467b97STreehugger Robot{
130*16467b97STreehugger Robot	return m_isAllocated;
131*16467b97STreehugger Robot}
132*16467b97STreehugger Robottemplate<class ImplTraits>
133*16467b97STreehugger RobotANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_nextChar() const
134*16467b97STreehugger Robot{
135*16467b97STreehugger Robot	return m_nextChar;
136*16467b97STreehugger Robot}
137*16467b97STreehugger Robottemplate<class ImplTraits>
138*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_sizeBuf() const
139*16467b97STreehugger Robot{
140*16467b97STreehugger Robot	return m_sizeBuf;
141*16467b97STreehugger Robot}
142*16467b97STreehugger Robottemplate<class ImplTraits>
143*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_line() const
144*16467b97STreehugger Robot{
145*16467b97STreehugger Robot	return m_line;
146*16467b97STreehugger Robot}
147*16467b97STreehugger Robottemplate<class ImplTraits>
148*16467b97STreehugger RobotANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_currentLine() const
149*16467b97STreehugger Robot{
150*16467b97STreehugger Robot	return m_currentLine;
151*16467b97STreehugger Robot}
152*16467b97STreehugger Robottemplate<class ImplTraits>
153*16467b97STreehugger RobotANTLR_INLINE ANTLR_INT32 InputStream<ImplTraits>::get_charPositionInLine() const
154*16467b97STreehugger Robot{
155*16467b97STreehugger Robot	return m_charPositionInLine;
156*16467b97STreehugger Robot}
157*16467b97STreehugger Robottemplate<class ImplTraits>
158*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_markDepth() const
159*16467b97STreehugger Robot{
160*16467b97STreehugger Robot	return m_markDepth;
161*16467b97STreehugger Robot}
162*16467b97STreehugger Robottemplate<class ImplTraits>
163*16467b97STreehugger RobotANTLR_INLINE typename InputStream<ImplTraits>::MarkersType& InputStream<ImplTraits>::get_markers()
164*16467b97STreehugger Robot{
165*16467b97STreehugger Robot	return m_markers;
166*16467b97STreehugger Robot}
167*16467b97STreehugger Robottemplate<class ImplTraits>
168*16467b97STreehugger RobotANTLR_INLINE const typename InputStream<ImplTraits>::StringType& InputStream<ImplTraits>::get_fileName() const
169*16467b97STreehugger Robot{
170*16467b97STreehugger Robot	return m_fileName;
171*16467b97STreehugger Robot}
172*16467b97STreehugger Robottemplate<class ImplTraits>
173*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_fileNo() const
174*16467b97STreehugger Robot{
175*16467b97STreehugger Robot	return m_fileNo;
176*16467b97STreehugger Robot}
177*16467b97STreehugger Robottemplate<class ImplTraits>
178*16467b97STreehugger RobotANTLR_INLINE ANTLR_UCHAR InputStream<ImplTraits>::get_newlineChar() const
179*16467b97STreehugger Robot{
180*16467b97STreehugger Robot	return m_newlineChar;
181*16467b97STreehugger Robot}
182*16467b97STreehugger Robottemplate<class ImplTraits>
183*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT8 InputStream<ImplTraits>::get_charByteSize() const
184*16467b97STreehugger Robot{
185*16467b97STreehugger Robot	return m_charByteSize;
186*16467b97STreehugger Robot}
187*16467b97STreehugger Robottemplate<class ImplTraits>
188*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_encoding() const
189*16467b97STreehugger Robot{
190*16467b97STreehugger Robot	return m_encoding;
191*16467b97STreehugger Robot}
192*16467b97STreehugger Robottemplate<class ImplTraits>
193*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_data( DataType* data )
194*16467b97STreehugger Robot{
195*16467b97STreehugger Robot	m_data = data;
196*16467b97STreehugger Robot}
197*16467b97STreehugger Robottemplate<class ImplTraits>
198*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_isAllocated( bool isAllocated )
199*16467b97STreehugger Robot{
200*16467b97STreehugger Robot	m_isAllocated = isAllocated;
201*16467b97STreehugger Robot}
202*16467b97STreehugger Robottemplate<class ImplTraits>
203*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_nextChar( const DataType* nextChar )
204*16467b97STreehugger Robot{
205*16467b97STreehugger Robot	m_nextChar = nextChar;
206*16467b97STreehugger Robot}
207*16467b97STreehugger Robottemplate<class ImplTraits>
208*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_sizeBuf( ANTLR_UINT32 sizeBuf )
209*16467b97STreehugger Robot{
210*16467b97STreehugger Robot	m_sizeBuf = sizeBuf;
211*16467b97STreehugger Robot}
212*16467b97STreehugger Robottemplate<class ImplTraits>
213*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_line( ANTLR_UINT32 line )
214*16467b97STreehugger Robot{
215*16467b97STreehugger Robot	m_line = line;
216*16467b97STreehugger Robot}
217*16467b97STreehugger Robottemplate<class ImplTraits>
218*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_currentLine( const DataType* currentLine )
219*16467b97STreehugger Robot{
220*16467b97STreehugger Robot	m_currentLine = currentLine;
221*16467b97STreehugger Robot}
222*16467b97STreehugger Robottemplate<class ImplTraits>
223*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine )
224*16467b97STreehugger Robot{
225*16467b97STreehugger Robot	m_charPositionInLine = charPositionInLine;
226*16467b97STreehugger Robot}
227*16467b97STreehugger Robottemplate<class ImplTraits>
228*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_markDepth( ANTLR_UINT32 markDepth )
229*16467b97STreehugger Robot{
230*16467b97STreehugger Robot	m_markDepth = markDepth;
231*16467b97STreehugger Robot}
232*16467b97STreehugger Robottemplate<class ImplTraits>
233*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_markers( const MarkersType& markers )
234*16467b97STreehugger Robot{
235*16467b97STreehugger Robot	m_markers = markers;
236*16467b97STreehugger Robot}
237*16467b97STreehugger Robottemplate<class ImplTraits>
238*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_fileName( const StringType& fileName )
239*16467b97STreehugger Robot{
240*16467b97STreehugger Robot	m_fileName = fileName;
241*16467b97STreehugger Robot}
242*16467b97STreehugger Robottemplate<class ImplTraits>
243*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_fileNo( ANTLR_UINT32 fileNo )
244*16467b97STreehugger Robot{
245*16467b97STreehugger Robot	m_fileNo = fileNo;
246*16467b97STreehugger Robot}
247*16467b97STreehugger Robottemplate<class ImplTraits>
248*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_newlineChar( ANTLR_UCHAR newlineChar )
249*16467b97STreehugger Robot{
250*16467b97STreehugger Robot	m_newlineChar = newlineChar;
251*16467b97STreehugger Robot}
252*16467b97STreehugger Robottemplate<class ImplTraits>
253*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_charByteSize( ANTLR_UINT8 charByteSize )
254*16467b97STreehugger Robot{
255*16467b97STreehugger Robot	m_charByteSize = charByteSize;
256*16467b97STreehugger Robot}
257*16467b97STreehugger Robottemplate<class ImplTraits>
258*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::set_encoding( ANTLR_UINT32 encoding )
259*16467b97STreehugger Robot{
260*16467b97STreehugger Robot	m_encoding = encoding;
261*16467b97STreehugger Robot}
262*16467b97STreehugger Robot
263*16467b97STreehugger Robottemplate<class ImplTraits>
264*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::inc_charPositionInLine()
265*16467b97STreehugger Robot{
266*16467b97STreehugger Robot	++m_charPositionInLine;
267*16467b97STreehugger Robot}
268*16467b97STreehugger Robot
269*16467b97STreehugger Robottemplate<class ImplTraits>
270*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::inc_line()
271*16467b97STreehugger Robot{
272*16467b97STreehugger Robot	++m_line;
273*16467b97STreehugger Robot}
274*16467b97STreehugger Robot
275*16467b97STreehugger Robottemplate<class ImplTraits>
276*16467b97STreehugger RobotANTLR_INLINE void InputStream<ImplTraits>::inc_markDepth()
277*16467b97STreehugger Robot{
278*16467b97STreehugger Robot	++m_markDepth;
279*16467b97STreehugger Robot}
280*16467b97STreehugger Robot
281*16467b97STreehugger Robottemplate<class ImplTraits>
282*16467b97STreehugger RobotANTLR_INLINE void	InputStream<ImplTraits>::reset()
283*16467b97STreehugger Robot{
284*16467b97STreehugger Robot	m_nextChar		= m_data;	/* Input at first character */
285*16467b97STreehugger Robot    m_line			= 1;		/* starts at line 1	    */
286*16467b97STreehugger Robot    m_charPositionInLine	= 0;
287*16467b97STreehugger Robot    m_currentLine		= m_data;
288*16467b97STreehugger Robot    m_markDepth		= 0;		/* Reset markers	    */
289*16467b97STreehugger Robot
290*16467b97STreehugger Robot    /* Clear out up the markers table if it is there
291*16467b97STreehugger Robot     */
292*16467b97STreehugger Robot	m_markers.clear();
293*16467b97STreehugger Robot}
294*16467b97STreehugger Robot
295*16467b97STreehugger Robottemplate<class ImplTraits>
296*16467b97STreehugger Robotvoid    InputStream<ImplTraits>::reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name)
297*16467b97STreehugger Robot{
298*16467b97STreehugger Robot	m_isAllocated	= false;
299*16467b97STreehugger Robot    m_data		= inString;
300*16467b97STreehugger Robot    m_sizeBuf	= size;
301*16467b97STreehugger Robot
302*16467b97STreehugger Robot    // Now we can set up the file name. As we are reusing the stream, there may already
303*16467b97STreehugger Robot    // be a string that we can reuse for holding the filename.
304*16467b97STreehugger Robot    //
305*16467b97STreehugger Robot	if	( BaseType::m_streamName.empty() )
306*16467b97STreehugger Robot	{
307*16467b97STreehugger Robot		BaseType::m_streamName	= ((name == NULL) ? "-memory-" : (const char *)name);
308*16467b97STreehugger Robot		m_fileName		= BaseType::m_streamName;
309*16467b97STreehugger Robot	}
310*16467b97STreehugger Robot	else
311*16467b97STreehugger Robot	{
312*16467b97STreehugger Robot		BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name);
313*16467b97STreehugger Robot	}
314*16467b97STreehugger Robot
315*16467b97STreehugger Robot    this->reset();
316*16467b97STreehugger Robot}
317*16467b97STreehugger Robot
318*16467b97STreehugger Robot/*
319*16467b97STreehugger Robottemplate<class ImplTraits>
320*16467b97STreehugger Robottypename InputStream<ImplTraits>::DataType*	InputStream<ImplTraits>::_LT(ANTLR_INT32 lt)
321*16467b97STreehugger Robot{
322*16467b97STreehugger Robot	return this->_LA(lt);
323*16467b97STreehugger Robot}
324*16467b97STreehugger Robot*/
325*16467b97STreehugger Robot
326*16467b97STreehugger Robottemplate<class ImplTraits>
327*16467b97STreehugger RobotANTLR_UINT32	InputStream<ImplTraits>::size()
328*16467b97STreehugger Robot{
329*16467b97STreehugger Robot	return m_sizeBuf;
330*16467b97STreehugger Robot}
331*16467b97STreehugger Robot
332*16467b97STreehugger Robottemplate<class ImplTraits>
333*16467b97STreehugger RobotANTLR_MARKER	InputStream<ImplTraits>::index_impl()
334*16467b97STreehugger Robot{
335*16467b97STreehugger Robot	return (ANTLR_MARKER)m_nextChar;
336*16467b97STreehugger Robot}
337*16467b97STreehugger Robot
338*16467b97STreehugger Robot
339*16467b97STreehugger Robottemplate<class ImplTraits>
340*16467b97STreehugger Robottypename InputStream<ImplTraits>::StringType	InputStream<ImplTraits>::substr(ANTLR_MARKER start, ANTLR_MARKER stop)
341*16467b97STreehugger Robot{
342*16467b97STreehugger Robot	std::size_t len = static_cast<std::size_t>( (stop-start)/sizeof(DataType) + 1 );
343*16467b97STreehugger Robot	StringType str( (const char*)start, len );
344*16467b97STreehugger Robot	return str;
345*16467b97STreehugger Robot}
346*16467b97STreehugger Robot
347*16467b97STreehugger Robottemplate<class ImplTraits>
348*16467b97STreehugger RobotANTLR_UINT32	InputStream<ImplTraits>::get_line()
349*16467b97STreehugger Robot{
350*16467b97STreehugger Robot	return m_line;
351*16467b97STreehugger Robot}
352*16467b97STreehugger Robot
353*16467b97STreehugger Robottemplate<class ImplTraits>
354*16467b97STreehugger Robotconst typename InputStream<ImplTraits>::DataType*	InputStream<ImplTraits>::getLineBuf()
355*16467b97STreehugger Robot{
356*16467b97STreehugger Robot	return m_currentLine;
357*16467b97STreehugger Robot}
358*16467b97STreehugger Robot
359*16467b97STreehugger Robottemplate<class ImplTraits>
360*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32	InputStream<ImplTraits>::get_charPositionInLine()
361*16467b97STreehugger Robot{
362*16467b97STreehugger Robot	return m_charPositionInLine;
363*16467b97STreehugger Robot}
364*16467b97STreehugger Robot
365*16467b97STreehugger Robottemplate<class ImplTraits>
366*16467b97STreehugger RobotANTLR_INLINE void	InputStream<ImplTraits>::set_charPositionInLine(ANTLR_UINT32 position)
367*16467b97STreehugger Robot{
368*16467b97STreehugger Robot	m_charPositionInLine = position;
369*16467b97STreehugger Robot}
370*16467b97STreehugger Robot
371*16467b97STreehugger Robottemplate<class ImplTraits>
372*16467b97STreehugger Robotvoid	InputStream<ImplTraits>::set_newLineChar(ANTLR_UINT32 newlineChar)
373*16467b97STreehugger Robot{
374*16467b97STreehugger Robot	m_newlineChar = newlineChar;
375*16467b97STreehugger Robot}
376*16467b97STreehugger Robot
377*16467b97STreehugger Robottemplate<class ImplTraits>
378*16467b97STreehugger RobotANTLR_INLINE LexState<ImplTraits>::LexState()
379*16467b97STreehugger Robot{
380*16467b97STreehugger Robot	m_nextChar = NULL;
381*16467b97STreehugger Robot	m_line = 0;
382*16467b97STreehugger Robot	m_currentLine = NULL;
383*16467b97STreehugger Robot	m_charPositionInLine = 0;
384*16467b97STreehugger Robot}
385*16467b97STreehugger Robot
386*16467b97STreehugger Robottemplate<class ImplTraits>
387*16467b97STreehugger RobotANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_nextChar() const
388*16467b97STreehugger Robot{
389*16467b97STreehugger Robot	return m_nextChar;
390*16467b97STreehugger Robot}
391*16467b97STreehugger Robot
392*16467b97STreehugger Robottemplate<class ImplTraits>
393*16467b97STreehugger RobotANTLR_INLINE ANTLR_UINT32 LexState<ImplTraits>::get_line() const
394*16467b97STreehugger Robot{
395*16467b97STreehugger Robot	return m_line;
396*16467b97STreehugger Robot}
397*16467b97STreehugger Robot
398*16467b97STreehugger Robottemplate<class ImplTraits>
399*16467b97STreehugger RobotANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_currentLine() const
400*16467b97STreehugger Robot{
401*16467b97STreehugger Robot	return m_currentLine;
402*16467b97STreehugger Robot}
403*16467b97STreehugger Robot
404*16467b97STreehugger Robottemplate<class ImplTraits>
405*16467b97STreehugger RobotANTLR_INLINE ANTLR_INT32 LexState<ImplTraits>::get_charPositionInLine() const
406*16467b97STreehugger Robot{
407*16467b97STreehugger Robot	return m_charPositionInLine;
408*16467b97STreehugger Robot}
409*16467b97STreehugger Robot
410*16467b97STreehugger Robottemplate<class ImplTraits>
411*16467b97STreehugger RobotANTLR_INLINE void LexState<ImplTraits>::set_nextChar( const DataType* nextChar )
412*16467b97STreehugger Robot{
413*16467b97STreehugger Robot	m_nextChar = nextChar;
414*16467b97STreehugger Robot}
415*16467b97STreehugger Robot
416*16467b97STreehugger Robottemplate<class ImplTraits>
417*16467b97STreehugger RobotANTLR_INLINE void LexState<ImplTraits>::set_line( ANTLR_UINT32 line )
418*16467b97STreehugger Robot{
419*16467b97STreehugger Robot	m_line = line;
420*16467b97STreehugger Robot}
421*16467b97STreehugger Robot
422*16467b97STreehugger Robottemplate<class ImplTraits>
423*16467b97STreehugger RobotANTLR_INLINE void LexState<ImplTraits>::set_currentLine( const DataType* currentLine )
424*16467b97STreehugger Robot{
425*16467b97STreehugger Robot	m_currentLine = currentLine;
426*16467b97STreehugger Robot}
427*16467b97STreehugger Robot
428*16467b97STreehugger Robottemplate<class ImplTraits>
429*16467b97STreehugger RobotANTLR_INLINE void LexState<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine )
430*16467b97STreehugger Robot{
431*16467b97STreehugger Robot	m_charPositionInLine = charPositionInLine;
432*16467b97STreehugger Robot}
433*16467b97STreehugger Robot
434*16467b97STreehugger Robottemplate<class ImplTraits>
435*16467b97STreehugger RobotANTLR_INLINE typename InputStream<ImplTraits>::IntStreamType*	InputStream<ImplTraits>::get_istream()
436*16467b97STreehugger Robot{
437*16467b97STreehugger Robot	return this;
438*16467b97STreehugger Robot}
439*16467b97STreehugger Robot
440*16467b97STreehugger Robottemplate<class ImplTraits>
441*16467b97STreehugger Robotvoid InputStream<ImplTraits>::setupInputStream()
442*16467b97STreehugger Robot{
443*16467b97STreehugger Robot	bool  isBigEndian;
444*16467b97STreehugger Robot
445*16467b97STreehugger Robot    // Used to determine the endianness of the machine we are currently
446*16467b97STreehugger Robot    // running on.
447*16467b97STreehugger Robot    //
448*16467b97STreehugger Robot    ANTLR_UINT16 bomTest = 0xFEFF;
449*16467b97STreehugger Robot
450*16467b97STreehugger Robot    // What endianess is the machine we are running on? If the incoming
451*16467b97STreehugger Robot    // encoding endianess is the same as this machine's natural byte order
452*16467b97STreehugger Robot    // then we can use more efficient API calls.
453*16467b97STreehugger Robot    //
454*16467b97STreehugger Robot    if  (*((ANTLR_UINT8*)(&bomTest)) == 0xFE)
455*16467b97STreehugger Robot    {
456*16467b97STreehugger Robot        isBigEndian = true;
457*16467b97STreehugger Robot    }
458*16467b97STreehugger Robot    else
459*16467b97STreehugger Robot    {
460*16467b97STreehugger Robot        isBigEndian = false;
461*16467b97STreehugger Robot    }
462*16467b97STreehugger Robot
463*16467b97STreehugger Robot    // What encoding did the user tell us {s}he thought it was? I am going
464*16467b97STreehugger Robot    // to get sick of the questions on antlr-interest, I know I am.
465*16467b97STreehugger Robot    //
466*16467b97STreehugger Robot    switch  (m_encoding)
467*16467b97STreehugger Robot    {
468*16467b97STreehugger Robot        case    ANTLR_ENC_UTF8:
469*16467b97STreehugger Robot
470*16467b97STreehugger Robot            // See if there is a BOM at the start of this UTF-8 sequence
471*16467b97STreehugger Robot            // and just eat it if there is. Windows .TXT files have this for instance
472*16467b97STreehugger Robot            // as it identifies UTF-8 even though it is of no consequence for byte order
473*16467b97STreehugger Robot            // as UTF-8 does not have a byte order.
474*16467b97STreehugger Robot            //
475*16467b97STreehugger Robot            if  (       (*(m_nextChar))      == 0xEF
476*16467b97STreehugger Robot                    &&  (*(m_nextChar+1))    == 0xBB
477*16467b97STreehugger Robot                    &&  (*(m_nextChar+2))    == 0xBF
478*16467b97STreehugger Robot                )
479*16467b97STreehugger Robot            {
480*16467b97STreehugger Robot                // The UTF8 BOM is present so skip it
481*16467b97STreehugger Robot                //
482*16467b97STreehugger Robot                m_nextChar += 3;
483*16467b97STreehugger Robot            }
484*16467b97STreehugger Robot
485*16467b97STreehugger Robot            // Install the UTF8 input routines
486*16467b97STreehugger Robot            //
487*16467b97STreehugger Robot			this->setupIntStream( isBigEndian, isBigEndian );
488*16467b97STreehugger Robot			this->set_charByteSize(0);
489*16467b97STreehugger Robot            break;
490*16467b97STreehugger Robot
491*16467b97STreehugger Robot        case    ANTLR_ENC_UTF16:
492*16467b97STreehugger Robot
493*16467b97STreehugger Robot            // See if there is a BOM at the start of the input. If not then
494*16467b97STreehugger Robot            // we assume that the byte order is the natural order of this
495*16467b97STreehugger Robot            // machine (or it is really UCS2). If there is a BOM we determine if the encoding
496*16467b97STreehugger Robot            // is the same as the natural order of this machine.
497*16467b97STreehugger Robot            //
498*16467b97STreehugger Robot            if  (       (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0xFE
499*16467b97STreehugger Robot                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0xFF
500*16467b97STreehugger Robot                )
501*16467b97STreehugger Robot            {
502*16467b97STreehugger Robot                // BOM Present, indicates Big Endian
503*16467b97STreehugger Robot                //
504*16467b97STreehugger Robot                m_nextChar += 1;
505*16467b97STreehugger Robot
506*16467b97STreehugger Robot				this->setupIntStream( isBigEndian, true );
507*16467b97STreehugger Robot            }
508*16467b97STreehugger Robot            else if  (      (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0xFF
509*16467b97STreehugger Robot                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0xFE
510*16467b97STreehugger Robot                )
511*16467b97STreehugger Robot            {
512*16467b97STreehugger Robot                // BOM present, indicates Little Endian
513*16467b97STreehugger Robot                //
514*16467b97STreehugger Robot                m_nextChar += 1;
515*16467b97STreehugger Robot
516*16467b97STreehugger Robot                this->setupIntStream( isBigEndian, false );
517*16467b97STreehugger Robot            }
518*16467b97STreehugger Robot            else
519*16467b97STreehugger Robot            {
520*16467b97STreehugger Robot                // No BOM present, assume local computer byte order
521*16467b97STreehugger Robot                //
522*16467b97STreehugger Robot                this->setupIntStream(isBigEndian, isBigEndian);
523*16467b97STreehugger Robot            }
524*16467b97STreehugger Robot			this->set_charByteSize(2);
525*16467b97STreehugger Robot            break;
526*16467b97STreehugger Robot
527*16467b97STreehugger Robot        case    ANTLR_ENC_UTF32:
528*16467b97STreehugger Robot
529*16467b97STreehugger Robot            // See if there is a BOM at the start of the input. If not then
530*16467b97STreehugger Robot            // we assume that the byte order is the natural order of this
531*16467b97STreehugger Robot            // machine. If there is we determine if the encoding
532*16467b97STreehugger Robot            // is the same as the natural order of this machine.
533*16467b97STreehugger Robot            //
534*16467b97STreehugger Robot            if  (       (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0x00
535*16467b97STreehugger Robot                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0x00
536*16467b97STreehugger Robot                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+2))    == 0xFE
537*16467b97STreehugger Robot                    &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+3))    == 0xFF
538*16467b97STreehugger Robot                )
539*16467b97STreehugger Robot            {
540*16467b97STreehugger Robot                // BOM Present, indicates Big Endian
541*16467b97STreehugger Robot                //
542*16467b97STreehugger Robot                m_nextChar += 1;
543*16467b97STreehugger Robot
544*16467b97STreehugger Robot                this->setupIntStream(isBigEndian, true);
545*16467b97STreehugger Robot            }
546*16467b97STreehugger Robot            else if  (      (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar))      == 0xFF
547*16467b97STreehugger Robot                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0xFE
548*16467b97STreehugger Robot                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0x00
549*16467b97STreehugger Robot                        &&  (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1))    == 0x00
550*16467b97STreehugger Robot                )
551*16467b97STreehugger Robot            {
552*16467b97STreehugger Robot                // BOM present, indicates Little Endian
553*16467b97STreehugger Robot                //
554*16467b97STreehugger Robot                m_nextChar += 1;
555*16467b97STreehugger Robot
556*16467b97STreehugger Robot				this->setupIntStream( isBigEndian, false );
557*16467b97STreehugger Robot            }
558*16467b97STreehugger Robot            else
559*16467b97STreehugger Robot            {
560*16467b97STreehugger Robot                // No BOM present, assume local computer byte order
561*16467b97STreehugger Robot                //
562*16467b97STreehugger Robot				this->setupIntStream( isBigEndian, isBigEndian );
563*16467b97STreehugger Robot            }
564*16467b97STreehugger Robot			this->set_charByteSize(4);
565*16467b97STreehugger Robot            break;
566*16467b97STreehugger Robot
567*16467b97STreehugger Robot        case    ANTLR_ENC_UTF16BE:
568*16467b97STreehugger Robot
569*16467b97STreehugger Robot            // Encoding is definately Big Endian with no BOM
570*16467b97STreehugger Robot            //
571*16467b97STreehugger Robot			this->setupIntStream( isBigEndian, true );
572*16467b97STreehugger Robot			this->set_charByteSize(2);
573*16467b97STreehugger Robot            break;
574*16467b97STreehugger Robot
575*16467b97STreehugger Robot        case    ANTLR_ENC_UTF16LE:
576*16467b97STreehugger Robot
577*16467b97STreehugger Robot            // Encoding is definately Little Endian with no BOM
578*16467b97STreehugger Robot            //
579*16467b97STreehugger Robot            this->setupIntStream( isBigEndian, false );
580*16467b97STreehugger Robot			this->set_charByteSize(2);
581*16467b97STreehugger Robot            break;
582*16467b97STreehugger Robot
583*16467b97STreehugger Robot        case    ANTLR_ENC_UTF32BE:
584*16467b97STreehugger Robot
585*16467b97STreehugger Robot            // Encoding is definately Big Endian with no BOM
586*16467b97STreehugger Robot            //
587*16467b97STreehugger Robot			this->setupIntStream( isBigEndian, true );
588*16467b97STreehugger Robot			this->set_charByteSize(4);
589*16467b97STreehugger Robot            break;
590*16467b97STreehugger Robot
591*16467b97STreehugger Robot        case    ANTLR_ENC_UTF32LE:
592*16467b97STreehugger Robot
593*16467b97STreehugger Robot            // Encoding is definately Little Endian with no BOM
594*16467b97STreehugger Robot            //
595*16467b97STreehugger Robot			this->setupIntStream( isBigEndian, false );
596*16467b97STreehugger Robot			this->set_charByteSize(4);
597*16467b97STreehugger Robot            break;
598*16467b97STreehugger Robot
599*16467b97STreehugger Robot        case    ANTLR_ENC_EBCDIC:
600*16467b97STreehugger Robot
601*16467b97STreehugger Robot            // EBCDIC is basically the same as ASCII but with an on the
602*16467b97STreehugger Robot            // fly translation to ASCII
603*16467b97STreehugger Robot            //
604*16467b97STreehugger Robot            this->setupIntStream( isBigEndian, isBigEndian );
605*16467b97STreehugger Robot			this->set_charByteSize(1);
606*16467b97STreehugger Robot            break;
607*16467b97STreehugger Robot
608*16467b97STreehugger Robot        case    ANTLR_ENC_8BIT:
609*16467b97STreehugger Robot        default:
610*16467b97STreehugger Robot
611*16467b97STreehugger Robot            // Standard 8bit/ASCII
612*16467b97STreehugger Robot            //
613*16467b97STreehugger Robot            this->setupIntStream( isBigEndian, isBigEndian );
614*16467b97STreehugger Robot			this->set_charByteSize(1);
615*16467b97STreehugger Robot            break;
616*16467b97STreehugger Robot    }
617*16467b97STreehugger Robot}
618*16467b97STreehugger Robot
619*16467b97STreehugger RobotANTLR_END_NAMESPACE()
620