xref: /aosp_15_r20/external/antlr/runtime/C/src/antlr3filestream.c (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot /** \file
2*16467b97STreehugger Robot  * \brief The ANTLR3 C filestream is used when the source character stream
3*16467b97STreehugger Robot  * is a filesystem based input set and all the characters in the filestream
4*16467b97STreehugger Robot  * can be loaded at once into memory and away the lexer goes.
5*16467b97STreehugger Robot  *
6*16467b97STreehugger Robot  * A number of initializers are provided in order that various character
7*16467b97STreehugger Robot  * sets can be supported from input files. The ANTLR3 C runtime expects
8*16467b97STreehugger Robot  * to deal with UTF32 characters only (the reasons for this are to
9*16467b97STreehugger Robot  * do with the simplification of C code when using this form of Unicode
10*16467b97STreehugger Robot  * encoding, though this is not a panacea. More information can be
11*16467b97STreehugger Robot  * found on this by consulting:
12*16467b97STreehugger Robot  *   - http://www.unicode.org/versions/Unicode4.0.0/ch02.pdf#G11178
13*16467b97STreehugger Robot  * Where a well grounded discussion of the encoding formats available
14*16467b97STreehugger Robot  * may be found.
15*16467b97STreehugger Robot  *
16*16467b97STreehugger Robot  */
17*16467b97STreehugger Robot 
18*16467b97STreehugger Robot // [The "BSD licence"]
19*16467b97STreehugger Robot // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
20*16467b97STreehugger Robot // http://www.temporal-wave.com
21*16467b97STreehugger Robot // http://www.linkedin.com/in/jimidle
22*16467b97STreehugger Robot //
23*16467b97STreehugger Robot // All rights reserved.
24*16467b97STreehugger Robot //
25*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without
26*16467b97STreehugger Robot // modification, are permitted provided that the following conditions
27*16467b97STreehugger Robot // are met:
28*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright
29*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer.
30*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright
31*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer in the
32*16467b97STreehugger Robot //    documentation and/or other materials provided with the distribution.
33*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products
34*16467b97STreehugger Robot //    derived from this software without specific prior written permission.
35*16467b97STreehugger Robot //
36*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
37*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
39*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
40*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
45*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46*16467b97STreehugger Robot 
47*16467b97STreehugger Robot #include    <antlr3.h>
48*16467b97STreehugger Robot 
49*16467b97STreehugger Robot static  void                    setupInputStream            (pANTLR3_INPUT_STREAM input);
50*16467b97STreehugger Robot static  pANTLR3_INPUT_STREAM    antlr3CreateFileStream      (pANTLR3_UINT8 fileName);
51*16467b97STreehugger Robot static  pANTLR3_INPUT_STREAM    antlr3CreateStringStream    (pANTLR3_UINT8 data);
52*16467b97STreehugger Robot 
53*16467b97STreehugger Robot ANTLR3_API pANTLR3_INPUT_STREAM
antlr3FileStreamNew(pANTLR3_UINT8 fileName,ANTLR3_UINT32 encoding)54*16467b97STreehugger Robot antlr3FileStreamNew(pANTLR3_UINT8 fileName, ANTLR3_UINT32 encoding)
55*16467b97STreehugger Robot {
56*16467b97STreehugger Robot     pANTLR3_INPUT_STREAM input;
57*16467b97STreehugger Robot 
58*16467b97STreehugger Robot     // First order of business is to read the file into some buffer space
59*16467b97STreehugger Robot     // as just straight 8 bit bytes. Then we will work out the encoding and
60*16467b97STreehugger Robot     // byte order and adjust the API functions that are installed for the
61*16467b97STreehugger Robot     // default 8Bit stream accordingly.
62*16467b97STreehugger Robot     //
63*16467b97STreehugger Robot     input   = antlr3CreateFileStream(fileName);
64*16467b97STreehugger Robot     if  (input == NULL)
65*16467b97STreehugger Robot     {
66*16467b97STreehugger Robot         return NULL;
67*16467b97STreehugger Robot     }
68*16467b97STreehugger Robot 
69*16467b97STreehugger Robot     // We have the data in memory now so we can deal with it according to
70*16467b97STreehugger Robot     // the encoding scheme we were given by the user.
71*16467b97STreehugger Robot     //
72*16467b97STreehugger Robot     input->encoding = encoding;
73*16467b97STreehugger Robot 
74*16467b97STreehugger Robot     // Now we need to work out the endian type and install any
75*16467b97STreehugger Robot     // API functions that differ from 8Bit
76*16467b97STreehugger Robot     //
77*16467b97STreehugger Robot     setupInputStream(input);
78*16467b97STreehugger Robot 
79*16467b97STreehugger Robot     // Now we can set up the file name
80*16467b97STreehugger Robot     //
81*16467b97STreehugger Robot     input->istream->streamName	= input->strFactory->newStr8(input->strFactory, fileName);
82*16467b97STreehugger Robot     input->fileName		= input->istream->streamName;
83*16467b97STreehugger Robot 
84*16467b97STreehugger Robot     return input;
85*16467b97STreehugger Robot }
86*16467b97STreehugger Robot 
87*16467b97STreehugger Robot 
88*16467b97STreehugger Robot ANTLR3_API pANTLR3_INPUT_STREAM
antlr3StringStreamNew(pANTLR3_UINT8 data,ANTLR3_UINT32 encoding,ANTLR3_UINT32 size,pANTLR3_UINT8 name)89*16467b97STreehugger Robot antlr3StringStreamNew(pANTLR3_UINT8 data, ANTLR3_UINT32 encoding, ANTLR3_UINT32 size, pANTLR3_UINT8 name)
90*16467b97STreehugger Robot {
91*16467b97STreehugger Robot     pANTLR3_INPUT_STREAM    input;
92*16467b97STreehugger Robot 
93*16467b97STreehugger Robot     // First order of business is to set up the stream and install the data pointer.
94*16467b97STreehugger Robot     // Then we will work out the encoding and byte order and adjust the API functions that are installed for the
95*16467b97STreehugger Robot     // default 8Bit stream accordingly.
96*16467b97STreehugger Robot     //
97*16467b97STreehugger Robot     input   = antlr3CreateStringStream(data);
98*16467b97STreehugger Robot     if  (input == NULL)
99*16467b97STreehugger Robot     {
100*16467b97STreehugger Robot         return NULL;
101*16467b97STreehugger Robot     }
102*16467b97STreehugger Robot 
103*16467b97STreehugger Robot     // Size (in bytes) of the given 'string'
104*16467b97STreehugger Robot     //
105*16467b97STreehugger Robot     input->sizeBuf		= size;
106*16467b97STreehugger Robot 
107*16467b97STreehugger Robot     // We have the data in memory now so we can deal with it according to
108*16467b97STreehugger Robot     // the encoding scheme we were given by the user.
109*16467b97STreehugger Robot     //
110*16467b97STreehugger Robot     input->encoding = encoding;
111*16467b97STreehugger Robot 
112*16467b97STreehugger Robot     // Now we need to work out the endian type and install any
113*16467b97STreehugger Robot     // API functions that differ from 8Bit
114*16467b97STreehugger Robot     //
115*16467b97STreehugger Robot     setupInputStream(input);
116*16467b97STreehugger Robot 
117*16467b97STreehugger Robot     // Now we can set up the file name
118*16467b97STreehugger Robot     //
119*16467b97STreehugger Robot     input->istream->streamName	= input->strFactory->newStr8(input->strFactory, name);
120*16467b97STreehugger Robot     input->fileName		= input->istream->streamName;
121*16467b97STreehugger Robot 
122*16467b97STreehugger Robot     return input;
123*16467b97STreehugger Robot }
124*16467b97STreehugger Robot 
125*16467b97STreehugger Robot 
126*16467b97STreehugger Robot /// Determine endianess of the input stream and install the
127*16467b97STreehugger Robot /// API required for the encoding in that format.
128*16467b97STreehugger Robot ///
129*16467b97STreehugger Robot static void
setupInputStream(pANTLR3_INPUT_STREAM input)130*16467b97STreehugger Robot setupInputStream(pANTLR3_INPUT_STREAM input)
131*16467b97STreehugger Robot {
132*16467b97STreehugger Robot     ANTLR3_BOOLEAN  isBigEndian;
133*16467b97STreehugger Robot 
134*16467b97STreehugger Robot     // Used to determine the endianness of the machine we are currently
135*16467b97STreehugger Robot     // running on.
136*16467b97STreehugger Robot     //
137*16467b97STreehugger Robot     ANTLR3_UINT16 bomTest = 0xFEFF;
138*16467b97STreehugger Robot 
139*16467b97STreehugger Robot     // What endianess is the machine we are running on? If the incoming
140*16467b97STreehugger Robot     // encoding endianess is the same as this machine's natural byte order
141*16467b97STreehugger Robot     // then we can use more efficient API calls.
142*16467b97STreehugger Robot     //
143*16467b97STreehugger Robot     if  (*((pANTLR3_UINT8)(&bomTest)) == 0xFE)
144*16467b97STreehugger Robot     {
145*16467b97STreehugger Robot         isBigEndian = ANTLR3_TRUE;
146*16467b97STreehugger Robot     }
147*16467b97STreehugger Robot     else
148*16467b97STreehugger Robot     {
149*16467b97STreehugger Robot         isBigEndian = ANTLR3_FALSE;
150*16467b97STreehugger Robot     }
151*16467b97STreehugger Robot 
152*16467b97STreehugger Robot     // What encoding did the user tell us {s}he thought it was? I am going
153*16467b97STreehugger Robot     // to get sick of the questions on antlr-interest, I know I am.
154*16467b97STreehugger Robot     //
155*16467b97STreehugger Robot     switch  (input->encoding)
156*16467b97STreehugger Robot     {
157*16467b97STreehugger Robot         case    ANTLR3_ENC_UTF8:
158*16467b97STreehugger Robot 
159*16467b97STreehugger Robot             // See if there is a BOM at the start of this UTF-8 sequence
160*16467b97STreehugger Robot             // and just eat it if there is. Windows .TXT files have this for instance
161*16467b97STreehugger Robot             // as it identifies UTF-8 even though it is of no consequence for byte order
162*16467b97STreehugger Robot             // as UTF-8 does not have a byte order.
163*16467b97STreehugger Robot             //
164*16467b97STreehugger Robot             if  (       (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0xEF
165*16467b97STreehugger Robot                     &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0xBB
166*16467b97STreehugger Robot                     &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2))    == 0xBF
167*16467b97STreehugger Robot                 )
168*16467b97STreehugger Robot             {
169*16467b97STreehugger Robot                 // The UTF8 BOM is present so skip it
170*16467b97STreehugger Robot                 //
171*16467b97STreehugger Robot                 input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 3);
172*16467b97STreehugger Robot             }
173*16467b97STreehugger Robot 
174*16467b97STreehugger Robot             // Install the UTF8 input routines
175*16467b97STreehugger Robot             //
176*16467b97STreehugger Robot             antlr3UTF8SetupStream(input);
177*16467b97STreehugger Robot             break;
178*16467b97STreehugger Robot 
179*16467b97STreehugger Robot         case    ANTLR3_ENC_UTF16:
180*16467b97STreehugger Robot 
181*16467b97STreehugger Robot             // See if there is a BOM at the start of the input. If not then
182*16467b97STreehugger Robot             // we assume that the byte order is the natural order of this
183*16467b97STreehugger Robot             // machine (or it is really UCS2). If there is a BOM we determine if the encoding
184*16467b97STreehugger Robot             // is the same as the natural order of this machine.
185*16467b97STreehugger Robot             //
186*16467b97STreehugger Robot             if  (       (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0xFE
187*16467b97STreehugger Robot                     &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0xFF
188*16467b97STreehugger Robot                 )
189*16467b97STreehugger Robot             {
190*16467b97STreehugger Robot                 // BOM Present, indicates Big Endian
191*16467b97STreehugger Robot                 //
192*16467b97STreehugger Robot                 input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2);
193*16467b97STreehugger Robot 
194*16467b97STreehugger Robot                 antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE);
195*16467b97STreehugger Robot             }
196*16467b97STreehugger Robot             else if  (      (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0xFF
197*16467b97STreehugger Robot                         &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0xFE
198*16467b97STreehugger Robot                 )
199*16467b97STreehugger Robot             {
200*16467b97STreehugger Robot                 // BOM present, indicates Little Endian
201*16467b97STreehugger Robot                 //
202*16467b97STreehugger Robot                 input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2);
203*16467b97STreehugger Robot 
204*16467b97STreehugger Robot                 antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE);
205*16467b97STreehugger Robot             }
206*16467b97STreehugger Robot             else
207*16467b97STreehugger Robot             {
208*16467b97STreehugger Robot                 // No BOM present, assume local computer byte order
209*16467b97STreehugger Robot                 //
210*16467b97STreehugger Robot                 antlr3UTF16SetupStream(input, isBigEndian, isBigEndian);
211*16467b97STreehugger Robot             }
212*16467b97STreehugger Robot             break;
213*16467b97STreehugger Robot 
214*16467b97STreehugger Robot         case    ANTLR3_ENC_UTF32:
215*16467b97STreehugger Robot 
216*16467b97STreehugger Robot             // See if there is a BOM at the start of the input. If not then
217*16467b97STreehugger Robot             // we assume that the byte order is the natural order of this
218*16467b97STreehugger Robot             // machine. If there is we determine if the encoding
219*16467b97STreehugger Robot             // is the same as the natural order of this machine.
220*16467b97STreehugger Robot             //
221*16467b97STreehugger Robot             if  (       (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0x00
222*16467b97STreehugger Robot                     &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0x00
223*16467b97STreehugger Robot                     &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2))    == 0xFE
224*16467b97STreehugger Robot                     &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+3))    == 0xFF
225*16467b97STreehugger Robot                 )
226*16467b97STreehugger Robot             {
227*16467b97STreehugger Robot                 // BOM Present, indicates Big Endian
228*16467b97STreehugger Robot                 //
229*16467b97STreehugger Robot                 input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4);
230*16467b97STreehugger Robot 
231*16467b97STreehugger Robot                 antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE);
232*16467b97STreehugger Robot             }
233*16467b97STreehugger Robot             else if  (      (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar))      == 0xFF
234*16467b97STreehugger Robot                         &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0xFE
235*16467b97STreehugger Robot                         &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0x00
236*16467b97STreehugger Robot                         &&  (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1))    == 0x00
237*16467b97STreehugger Robot                 )
238*16467b97STreehugger Robot             {
239*16467b97STreehugger Robot                 // BOM present, indicates Little Endian
240*16467b97STreehugger Robot                 //
241*16467b97STreehugger Robot                 input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4);
242*16467b97STreehugger Robot 
243*16467b97STreehugger Robot                 antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE);
244*16467b97STreehugger Robot             }
245*16467b97STreehugger Robot             else
246*16467b97STreehugger Robot             {
247*16467b97STreehugger Robot                 // No BOM present, assume local computer byte order
248*16467b97STreehugger Robot                 //
249*16467b97STreehugger Robot                 antlr3UTF32SetupStream(input, isBigEndian, isBigEndian);
250*16467b97STreehugger Robot             }
251*16467b97STreehugger Robot             break;
252*16467b97STreehugger Robot 
253*16467b97STreehugger Robot         case    ANTLR3_ENC_UTF16BE:
254*16467b97STreehugger Robot 
255*16467b97STreehugger Robot             // Encoding is definately Big Endian with no BOM
256*16467b97STreehugger Robot             //
257*16467b97STreehugger Robot             antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE);
258*16467b97STreehugger Robot             break;
259*16467b97STreehugger Robot 
260*16467b97STreehugger Robot         case    ANTLR3_ENC_UTF16LE:
261*16467b97STreehugger Robot 
262*16467b97STreehugger Robot             // Encoding is definately Little Endian with no BOM
263*16467b97STreehugger Robot             //
264*16467b97STreehugger Robot             antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE);
265*16467b97STreehugger Robot             break;
266*16467b97STreehugger Robot 
267*16467b97STreehugger Robot         case    ANTLR3_ENC_UTF32BE:
268*16467b97STreehugger Robot 
269*16467b97STreehugger Robot             // Encoding is definately Big Endian with no BOM
270*16467b97STreehugger Robot             //
271*16467b97STreehugger Robot             antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE);
272*16467b97STreehugger Robot             break;
273*16467b97STreehugger Robot 
274*16467b97STreehugger Robot         case    ANTLR3_ENC_UTF32LE:
275*16467b97STreehugger Robot 
276*16467b97STreehugger Robot             // Encoding is definately Little Endian with no BOM
277*16467b97STreehugger Robot             //
278*16467b97STreehugger Robot             antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE);
279*16467b97STreehugger Robot             break;
280*16467b97STreehugger Robot 
281*16467b97STreehugger Robot         case    ANTLR3_ENC_EBCDIC:
282*16467b97STreehugger Robot 
283*16467b97STreehugger Robot             // EBCDIC is basically the same as ASCII but with an on the
284*16467b97STreehugger Robot             // fly translation to ASCII
285*16467b97STreehugger Robot             //
286*16467b97STreehugger Robot             antlr3EBCDICSetupStream(input);
287*16467b97STreehugger Robot             break;
288*16467b97STreehugger Robot 
289*16467b97STreehugger Robot         case    ANTLR3_ENC_8BIT:
290*16467b97STreehugger Robot         default:
291*16467b97STreehugger Robot 
292*16467b97STreehugger Robot             // Standard 8bit/ASCII
293*16467b97STreehugger Robot             //
294*16467b97STreehugger Robot             antlr38BitSetupStream(input);
295*16467b97STreehugger Robot             break;
296*16467b97STreehugger Robot     }
297*16467b97STreehugger Robot }
298*16467b97STreehugger Robot 
299*16467b97STreehugger Robot /** \brief Use the contents of an operating system file as the input
300*16467b97STreehugger Robot  *         for an input stream.
301*16467b97STreehugger Robot  *
302*16467b97STreehugger Robot  * \param fileName Name of operating system file to read.
303*16467b97STreehugger Robot  * \return
304*16467b97STreehugger Robot  *	- Pointer to new input stream context upon success
305*16467b97STreehugger Robot  *	- One of the ANTLR3_ERR_ defines on error.
306*16467b97STreehugger Robot  */
307*16467b97STreehugger Robot static pANTLR3_INPUT_STREAM
antlr3CreateFileStream(pANTLR3_UINT8 fileName)308*16467b97STreehugger Robot antlr3CreateFileStream(pANTLR3_UINT8 fileName)
309*16467b97STreehugger Robot {
310*16467b97STreehugger Robot 	// Pointer to the input stream we are going to create
311*16467b97STreehugger Robot 	//
312*16467b97STreehugger Robot 	pANTLR3_INPUT_STREAM    input;
313*16467b97STreehugger Robot 	ANTLR3_UINT32	    status;
314*16467b97STreehugger Robot 
315*16467b97STreehugger Robot 	if	(fileName == NULL)
316*16467b97STreehugger Robot 	{
317*16467b97STreehugger Robot 		return NULL;
318*16467b97STreehugger Robot 	}
319*16467b97STreehugger Robot 
320*16467b97STreehugger Robot 	// Allocate memory for the input stream structure
321*16467b97STreehugger Robot 	//
322*16467b97STreehugger Robot 	input   = (pANTLR3_INPUT_STREAM)
323*16467b97STreehugger Robot 		ANTLR3_CALLOC(1, sizeof(ANTLR3_INPUT_STREAM));
324*16467b97STreehugger Robot 
325*16467b97STreehugger Robot 	if	(input == NULL)
326*16467b97STreehugger Robot 	{
327*16467b97STreehugger Robot 		return	NULL;
328*16467b97STreehugger Robot 	}
329*16467b97STreehugger Robot 
330*16467b97STreehugger Robot 	// Structure was allocated correctly, now we can read the file.
331*16467b97STreehugger Robot 	//
332*16467b97STreehugger Robot 	status  = antlr3read8Bit(input, fileName);
333*16467b97STreehugger Robot 
334*16467b97STreehugger Robot 	// Call the common 8 bit input stream handler
335*16467b97STreehugger Robot 	// initialization.
336*16467b97STreehugger Robot 	//
337*16467b97STreehugger Robot 	antlr3GenericSetupStream(input);
338*16467b97STreehugger Robot 
339*16467b97STreehugger Robot         // However if the file was not there or something then we
340*16467b97STreehugger Robot         // need to close. Have to wait until here as we cannot call
341*16467b97STreehugger Robot         // close until the API is installed of course.
342*16467b97STreehugger Robot         //
343*16467b97STreehugger Robot 	if	(status != ANTLR3_SUCCESS)
344*16467b97STreehugger Robot 	{
345*16467b97STreehugger Robot 		input->close(input);
346*16467b97STreehugger Robot 		return	NULL;
347*16467b97STreehugger Robot 	}
348*16467b97STreehugger Robot 
349*16467b97STreehugger Robot 	return  input;
350*16467b97STreehugger Robot }
351*16467b97STreehugger Robot 
352*16467b97STreehugger Robot ANTLR3_API ANTLR3_UINT32
antlr3read8Bit(pANTLR3_INPUT_STREAM input,pANTLR3_UINT8 fileName)353*16467b97STreehugger Robot antlr3read8Bit(pANTLR3_INPUT_STREAM    input, pANTLR3_UINT8 fileName)
354*16467b97STreehugger Robot {
355*16467b97STreehugger Robot 	ANTLR3_FDSC	    infile;
356*16467b97STreehugger Robot 	ANTLR3_UINT32	    fSize;
357*16467b97STreehugger Robot 
358*16467b97STreehugger Robot 	/* Open the OS file in read binary mode
359*16467b97STreehugger Robot 	*/
360*16467b97STreehugger Robot 	infile  = antlr3Fopen(fileName, "rb");
361*16467b97STreehugger Robot 
362*16467b97STreehugger Robot 	/* Check that it was there
363*16467b97STreehugger Robot 	*/
364*16467b97STreehugger Robot 	if	(infile == NULL)
365*16467b97STreehugger Robot 	{
366*16467b97STreehugger Robot 		return	(ANTLR3_UINT32)ANTLR3_ERR_NOFILE;
367*16467b97STreehugger Robot 	}
368*16467b97STreehugger Robot 
369*16467b97STreehugger Robot 	/* It was there, so we can read the bytes now
370*16467b97STreehugger Robot 	*/
371*16467b97STreehugger Robot 	fSize   = antlr3Fsize(fileName);	/* Size of input file	*/
372*16467b97STreehugger Robot 
373*16467b97STreehugger Robot 	/* Allocate buffer for this input set
374*16467b97STreehugger Robot 	*/
375*16467b97STreehugger Robot 	input->data	    = ANTLR3_MALLOC((size_t)fSize);
376*16467b97STreehugger Robot 	input->sizeBuf  = fSize;
377*16467b97STreehugger Robot 
378*16467b97STreehugger Robot 	if	(input->data == NULL)
379*16467b97STreehugger Robot 	{
380*16467b97STreehugger Robot 		return	(ANTLR3_UINT32)ANTLR3_ERR_NOMEM;
381*16467b97STreehugger Robot 	}
382*16467b97STreehugger Robot 
383*16467b97STreehugger Robot 	input->isAllocated	= ANTLR3_TRUE;
384*16467b97STreehugger Robot 
385*16467b97STreehugger Robot 	/* Now we read the file. Characters are not converted to
386*16467b97STreehugger Robot 	* the internal ANTLR encoding until they are read from the buffer
387*16467b97STreehugger Robot 	*/
388*16467b97STreehugger Robot 	antlr3Fread(infile, fSize, input->data);
389*16467b97STreehugger Robot 
390*16467b97STreehugger Robot 	/* And close the file handle
391*16467b97STreehugger Robot 	*/
392*16467b97STreehugger Robot 	antlr3Fclose(infile);
393*16467b97STreehugger Robot 
394*16467b97STreehugger Robot 	return  ANTLR3_SUCCESS;
395*16467b97STreehugger Robot }
396*16467b97STreehugger Robot 
397*16467b97STreehugger Robot /** \brief Open an operating system file and return the descriptor
398*16467b97STreehugger Robot  * We just use the common open() and related functions here.
399*16467b97STreehugger Robot  * Later we might find better ways on systems
400*16467b97STreehugger Robot  * such as Windows and OpenVMS for instance. But the idea is to read the
401*16467b97STreehugger Robot  * while file at once anyway, so it may be irrelevant.
402*16467b97STreehugger Robot  */
403*16467b97STreehugger Robot ANTLR3_API ANTLR3_FDSC
antlr3Fopen(pANTLR3_UINT8 filename,const char * mode)404*16467b97STreehugger Robot antlr3Fopen(pANTLR3_UINT8 filename, const char * mode)
405*16467b97STreehugger Robot {
406*16467b97STreehugger Robot     return  (ANTLR3_FDSC)fopen((const char *)filename, mode);
407*16467b97STreehugger Robot }
408*16467b97STreehugger Robot 
409*16467b97STreehugger Robot /** \brief Close an operating system file and free any handles
410*16467b97STreehugger Robot  *  etc.
411*16467b97STreehugger Robot  */
412*16467b97STreehugger Robot ANTLR3_API void
antlr3Fclose(ANTLR3_FDSC fd)413*16467b97STreehugger Robot antlr3Fclose(ANTLR3_FDSC fd)
414*16467b97STreehugger Robot {
415*16467b97STreehugger Robot     fclose(fd);
416*16467b97STreehugger Robot }
417*16467b97STreehugger Robot ANTLR3_API ANTLR3_UINT32
antlr3Fsize(pANTLR3_UINT8 fileName)418*16467b97STreehugger Robot antlr3Fsize(pANTLR3_UINT8 fileName)
419*16467b97STreehugger Robot {
420*16467b97STreehugger Robot     struct _stat	statbuf;
421*16467b97STreehugger Robot 
422*16467b97STreehugger Robot     _stat((const char *)fileName, &statbuf);
423*16467b97STreehugger Robot 
424*16467b97STreehugger Robot     return (ANTLR3_UINT32)statbuf.st_size;
425*16467b97STreehugger Robot }
426*16467b97STreehugger Robot 
427*16467b97STreehugger Robot ANTLR3_API ANTLR3_UINT32
antlr3Fread(ANTLR3_FDSC fdsc,ANTLR3_UINT32 count,void * data)428*16467b97STreehugger Robot antlr3Fread(ANTLR3_FDSC fdsc, ANTLR3_UINT32 count,  void * data)
429*16467b97STreehugger Robot {
430*16467b97STreehugger Robot     return  (ANTLR3_UINT32)fread(data, (size_t)count, 1, fdsc);
431*16467b97STreehugger Robot }
432*16467b97STreehugger Robot 
433*16467b97STreehugger Robot 
434*16467b97STreehugger Robot /** \brief Use the supplied 'string' as input to the stream
435*16467b97STreehugger Robot  *
436*16467b97STreehugger Robot  * \param data Pointer to the input data
437*16467b97STreehugger Robot  * \return
438*16467b97STreehugger Robot  *	- Pointer to new input stream context upon success
439*16467b97STreehugger Robot  *	- NULL defines on error.
440*16467b97STreehugger Robot  */
441*16467b97STreehugger Robot static pANTLR3_INPUT_STREAM
antlr3CreateStringStream(pANTLR3_UINT8 data)442*16467b97STreehugger Robot antlr3CreateStringStream(pANTLR3_UINT8 data)
443*16467b97STreehugger Robot {
444*16467b97STreehugger Robot 	// Pointer to the input stream we are going to create
445*16467b97STreehugger Robot 	//
446*16467b97STreehugger Robot 	pANTLR3_INPUT_STREAM    input;
447*16467b97STreehugger Robot 
448*16467b97STreehugger Robot 	if	(data == NULL)
449*16467b97STreehugger Robot 	{
450*16467b97STreehugger Robot 		return NULL;
451*16467b97STreehugger Robot 	}
452*16467b97STreehugger Robot 
453*16467b97STreehugger Robot 	// Allocate memory for the input stream structure
454*16467b97STreehugger Robot 	//
455*16467b97STreehugger Robot 	input   = (pANTLR3_INPUT_STREAM)
456*16467b97STreehugger Robot 		ANTLR3_CALLOC(1, sizeof(ANTLR3_INPUT_STREAM));
457*16467b97STreehugger Robot 
458*16467b97STreehugger Robot 	if	(input == NULL)
459*16467b97STreehugger Robot 	{
460*16467b97STreehugger Robot 		return	NULL;
461*16467b97STreehugger Robot 	}
462*16467b97STreehugger Robot 
463*16467b97STreehugger Robot 	// Structure was allocated correctly, now we can install the pointer
464*16467b97STreehugger Robot 	//
465*16467b97STreehugger Robot         input->data             = data;
466*16467b97STreehugger Robot         input->isAllocated	= ANTLR3_FALSE;
467*16467b97STreehugger Robot 
468*16467b97STreehugger Robot 	// Call the common 8 bit input stream handler
469*16467b97STreehugger Robot 	// initialization.
470*16467b97STreehugger Robot 	//
471*16467b97STreehugger Robot 	antlr3GenericSetupStream(input);
472*16467b97STreehugger Robot 
473*16467b97STreehugger Robot         return  input;
474*16467b97STreehugger Robot }