1*16467b97STreehugger Robot /** \file
2*16467b97STreehugger Robot * \brief The ANTLR3 C filestream is used when the source character stream
3*16467b97STreehugger Robot * is a filesystem based input set and all the characters in the filestream
4*16467b97STreehugger Robot * can be loaded at once into memory and away the lexer goes.
5*16467b97STreehugger Robot *
6*16467b97STreehugger Robot * A number of initializers are provided in order that various character
7*16467b97STreehugger Robot * sets can be supported from input files. The ANTLR3 C runtime expects
8*16467b97STreehugger Robot * to deal with UTF32 characters only (the reasons for this are to
9*16467b97STreehugger Robot * do with the simplification of C code when using this form of Unicode
10*16467b97STreehugger Robot * encoding, though this is not a panacea. More information can be
11*16467b97STreehugger Robot * found on this by consulting:
12*16467b97STreehugger Robot * - http://www.unicode.org/versions/Unicode4.0.0/ch02.pdf#G11178
13*16467b97STreehugger Robot * Where a well grounded discussion of the encoding formats available
14*16467b97STreehugger Robot * may be found.
15*16467b97STreehugger Robot *
16*16467b97STreehugger Robot */
17*16467b97STreehugger Robot
18*16467b97STreehugger Robot // [The "BSD licence"]
19*16467b97STreehugger Robot // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
20*16467b97STreehugger Robot // http://www.temporal-wave.com
21*16467b97STreehugger Robot // http://www.linkedin.com/in/jimidle
22*16467b97STreehugger Robot //
23*16467b97STreehugger Robot // All rights reserved.
24*16467b97STreehugger Robot //
25*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without
26*16467b97STreehugger Robot // modification, are permitted provided that the following conditions
27*16467b97STreehugger Robot // are met:
28*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright
29*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer.
30*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright
31*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer in the
32*16467b97STreehugger Robot // documentation and/or other materials provided with the distribution.
33*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products
34*16467b97STreehugger Robot // derived from this software without specific prior written permission.
35*16467b97STreehugger Robot //
36*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
37*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
39*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
40*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
45*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46*16467b97STreehugger Robot
47*16467b97STreehugger Robot #include <antlr3.h>
48*16467b97STreehugger Robot
49*16467b97STreehugger Robot static void setupInputStream (pANTLR3_INPUT_STREAM input);
50*16467b97STreehugger Robot static pANTLR3_INPUT_STREAM antlr3CreateFileStream (pANTLR3_UINT8 fileName);
51*16467b97STreehugger Robot static pANTLR3_INPUT_STREAM antlr3CreateStringStream (pANTLR3_UINT8 data);
52*16467b97STreehugger Robot
53*16467b97STreehugger Robot ANTLR3_API pANTLR3_INPUT_STREAM
antlr3FileStreamNew(pANTLR3_UINT8 fileName,ANTLR3_UINT32 encoding)54*16467b97STreehugger Robot antlr3FileStreamNew(pANTLR3_UINT8 fileName, ANTLR3_UINT32 encoding)
55*16467b97STreehugger Robot {
56*16467b97STreehugger Robot pANTLR3_INPUT_STREAM input;
57*16467b97STreehugger Robot
58*16467b97STreehugger Robot // First order of business is to read the file into some buffer space
59*16467b97STreehugger Robot // as just straight 8 bit bytes. Then we will work out the encoding and
60*16467b97STreehugger Robot // byte order and adjust the API functions that are installed for the
61*16467b97STreehugger Robot // default 8Bit stream accordingly.
62*16467b97STreehugger Robot //
63*16467b97STreehugger Robot input = antlr3CreateFileStream(fileName);
64*16467b97STreehugger Robot if (input == NULL)
65*16467b97STreehugger Robot {
66*16467b97STreehugger Robot return NULL;
67*16467b97STreehugger Robot }
68*16467b97STreehugger Robot
69*16467b97STreehugger Robot // We have the data in memory now so we can deal with it according to
70*16467b97STreehugger Robot // the encoding scheme we were given by the user.
71*16467b97STreehugger Robot //
72*16467b97STreehugger Robot input->encoding = encoding;
73*16467b97STreehugger Robot
74*16467b97STreehugger Robot // Now we need to work out the endian type and install any
75*16467b97STreehugger Robot // API functions that differ from 8Bit
76*16467b97STreehugger Robot //
77*16467b97STreehugger Robot setupInputStream(input);
78*16467b97STreehugger Robot
79*16467b97STreehugger Robot // Now we can set up the file name
80*16467b97STreehugger Robot //
81*16467b97STreehugger Robot input->istream->streamName = input->strFactory->newStr8(input->strFactory, fileName);
82*16467b97STreehugger Robot input->fileName = input->istream->streamName;
83*16467b97STreehugger Robot
84*16467b97STreehugger Robot return input;
85*16467b97STreehugger Robot }
86*16467b97STreehugger Robot
87*16467b97STreehugger Robot
88*16467b97STreehugger Robot ANTLR3_API pANTLR3_INPUT_STREAM
antlr3StringStreamNew(pANTLR3_UINT8 data,ANTLR3_UINT32 encoding,ANTLR3_UINT32 size,pANTLR3_UINT8 name)89*16467b97STreehugger Robot antlr3StringStreamNew(pANTLR3_UINT8 data, ANTLR3_UINT32 encoding, ANTLR3_UINT32 size, pANTLR3_UINT8 name)
90*16467b97STreehugger Robot {
91*16467b97STreehugger Robot pANTLR3_INPUT_STREAM input;
92*16467b97STreehugger Robot
93*16467b97STreehugger Robot // First order of business is to set up the stream and install the data pointer.
94*16467b97STreehugger Robot // Then we will work out the encoding and byte order and adjust the API functions that are installed for the
95*16467b97STreehugger Robot // default 8Bit stream accordingly.
96*16467b97STreehugger Robot //
97*16467b97STreehugger Robot input = antlr3CreateStringStream(data);
98*16467b97STreehugger Robot if (input == NULL)
99*16467b97STreehugger Robot {
100*16467b97STreehugger Robot return NULL;
101*16467b97STreehugger Robot }
102*16467b97STreehugger Robot
103*16467b97STreehugger Robot // Size (in bytes) of the given 'string'
104*16467b97STreehugger Robot //
105*16467b97STreehugger Robot input->sizeBuf = size;
106*16467b97STreehugger Robot
107*16467b97STreehugger Robot // We have the data in memory now so we can deal with it according to
108*16467b97STreehugger Robot // the encoding scheme we were given by the user.
109*16467b97STreehugger Robot //
110*16467b97STreehugger Robot input->encoding = encoding;
111*16467b97STreehugger Robot
112*16467b97STreehugger Robot // Now we need to work out the endian type and install any
113*16467b97STreehugger Robot // API functions that differ from 8Bit
114*16467b97STreehugger Robot //
115*16467b97STreehugger Robot setupInputStream(input);
116*16467b97STreehugger Robot
117*16467b97STreehugger Robot // Now we can set up the file name
118*16467b97STreehugger Robot //
119*16467b97STreehugger Robot input->istream->streamName = input->strFactory->newStr8(input->strFactory, name);
120*16467b97STreehugger Robot input->fileName = input->istream->streamName;
121*16467b97STreehugger Robot
122*16467b97STreehugger Robot return input;
123*16467b97STreehugger Robot }
124*16467b97STreehugger Robot
125*16467b97STreehugger Robot
126*16467b97STreehugger Robot /// Determine endianess of the input stream and install the
127*16467b97STreehugger Robot /// API required for the encoding in that format.
128*16467b97STreehugger Robot ///
129*16467b97STreehugger Robot static void
setupInputStream(pANTLR3_INPUT_STREAM input)130*16467b97STreehugger Robot setupInputStream(pANTLR3_INPUT_STREAM input)
131*16467b97STreehugger Robot {
132*16467b97STreehugger Robot ANTLR3_BOOLEAN isBigEndian;
133*16467b97STreehugger Robot
134*16467b97STreehugger Robot // Used to determine the endianness of the machine we are currently
135*16467b97STreehugger Robot // running on.
136*16467b97STreehugger Robot //
137*16467b97STreehugger Robot ANTLR3_UINT16 bomTest = 0xFEFF;
138*16467b97STreehugger Robot
139*16467b97STreehugger Robot // What endianess is the machine we are running on? If the incoming
140*16467b97STreehugger Robot // encoding endianess is the same as this machine's natural byte order
141*16467b97STreehugger Robot // then we can use more efficient API calls.
142*16467b97STreehugger Robot //
143*16467b97STreehugger Robot if (*((pANTLR3_UINT8)(&bomTest)) == 0xFE)
144*16467b97STreehugger Robot {
145*16467b97STreehugger Robot isBigEndian = ANTLR3_TRUE;
146*16467b97STreehugger Robot }
147*16467b97STreehugger Robot else
148*16467b97STreehugger Robot {
149*16467b97STreehugger Robot isBigEndian = ANTLR3_FALSE;
150*16467b97STreehugger Robot }
151*16467b97STreehugger Robot
152*16467b97STreehugger Robot // What encoding did the user tell us {s}he thought it was? I am going
153*16467b97STreehugger Robot // to get sick of the questions on antlr-interest, I know I am.
154*16467b97STreehugger Robot //
155*16467b97STreehugger Robot switch (input->encoding)
156*16467b97STreehugger Robot {
157*16467b97STreehugger Robot case ANTLR3_ENC_UTF8:
158*16467b97STreehugger Robot
159*16467b97STreehugger Robot // See if there is a BOM at the start of this UTF-8 sequence
160*16467b97STreehugger Robot // and just eat it if there is. Windows .TXT files have this for instance
161*16467b97STreehugger Robot // as it identifies UTF-8 even though it is of no consequence for byte order
162*16467b97STreehugger Robot // as UTF-8 does not have a byte order.
163*16467b97STreehugger Robot //
164*16467b97STreehugger Robot if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xEF
165*16467b97STreehugger Robot && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xBB
166*16467b97STreehugger Robot && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2)) == 0xBF
167*16467b97STreehugger Robot )
168*16467b97STreehugger Robot {
169*16467b97STreehugger Robot // The UTF8 BOM is present so skip it
170*16467b97STreehugger Robot //
171*16467b97STreehugger Robot input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 3);
172*16467b97STreehugger Robot }
173*16467b97STreehugger Robot
174*16467b97STreehugger Robot // Install the UTF8 input routines
175*16467b97STreehugger Robot //
176*16467b97STreehugger Robot antlr3UTF8SetupStream(input);
177*16467b97STreehugger Robot break;
178*16467b97STreehugger Robot
179*16467b97STreehugger Robot case ANTLR3_ENC_UTF16:
180*16467b97STreehugger Robot
181*16467b97STreehugger Robot // See if there is a BOM at the start of the input. If not then
182*16467b97STreehugger Robot // we assume that the byte order is the natural order of this
183*16467b97STreehugger Robot // machine (or it is really UCS2). If there is a BOM we determine if the encoding
184*16467b97STreehugger Robot // is the same as the natural order of this machine.
185*16467b97STreehugger Robot //
186*16467b97STreehugger Robot if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFE
187*16467b97STreehugger Robot && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFF
188*16467b97STreehugger Robot )
189*16467b97STreehugger Robot {
190*16467b97STreehugger Robot // BOM Present, indicates Big Endian
191*16467b97STreehugger Robot //
192*16467b97STreehugger Robot input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2);
193*16467b97STreehugger Robot
194*16467b97STreehugger Robot antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE);
195*16467b97STreehugger Robot }
196*16467b97STreehugger Robot else if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFF
197*16467b97STreehugger Robot && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFE
198*16467b97STreehugger Robot )
199*16467b97STreehugger Robot {
200*16467b97STreehugger Robot // BOM present, indicates Little Endian
201*16467b97STreehugger Robot //
202*16467b97STreehugger Robot input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2);
203*16467b97STreehugger Robot
204*16467b97STreehugger Robot antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE);
205*16467b97STreehugger Robot }
206*16467b97STreehugger Robot else
207*16467b97STreehugger Robot {
208*16467b97STreehugger Robot // No BOM present, assume local computer byte order
209*16467b97STreehugger Robot //
210*16467b97STreehugger Robot antlr3UTF16SetupStream(input, isBigEndian, isBigEndian);
211*16467b97STreehugger Robot }
212*16467b97STreehugger Robot break;
213*16467b97STreehugger Robot
214*16467b97STreehugger Robot case ANTLR3_ENC_UTF32:
215*16467b97STreehugger Robot
216*16467b97STreehugger Robot // See if there is a BOM at the start of the input. If not then
217*16467b97STreehugger Robot // we assume that the byte order is the natural order of this
218*16467b97STreehugger Robot // machine. If there is we determine if the encoding
219*16467b97STreehugger Robot // is the same as the natural order of this machine.
220*16467b97STreehugger Robot //
221*16467b97STreehugger Robot if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0x00
222*16467b97STreehugger Robot && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00
223*16467b97STreehugger Robot && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2)) == 0xFE
224*16467b97STreehugger Robot && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+3)) == 0xFF
225*16467b97STreehugger Robot )
226*16467b97STreehugger Robot {
227*16467b97STreehugger Robot // BOM Present, indicates Big Endian
228*16467b97STreehugger Robot //
229*16467b97STreehugger Robot input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4);
230*16467b97STreehugger Robot
231*16467b97STreehugger Robot antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE);
232*16467b97STreehugger Robot }
233*16467b97STreehugger Robot else if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFF
234*16467b97STreehugger Robot && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFE
235*16467b97STreehugger Robot && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00
236*16467b97STreehugger Robot && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00
237*16467b97STreehugger Robot )
238*16467b97STreehugger Robot {
239*16467b97STreehugger Robot // BOM present, indicates Little Endian
240*16467b97STreehugger Robot //
241*16467b97STreehugger Robot input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4);
242*16467b97STreehugger Robot
243*16467b97STreehugger Robot antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE);
244*16467b97STreehugger Robot }
245*16467b97STreehugger Robot else
246*16467b97STreehugger Robot {
247*16467b97STreehugger Robot // No BOM present, assume local computer byte order
248*16467b97STreehugger Robot //
249*16467b97STreehugger Robot antlr3UTF32SetupStream(input, isBigEndian, isBigEndian);
250*16467b97STreehugger Robot }
251*16467b97STreehugger Robot break;
252*16467b97STreehugger Robot
253*16467b97STreehugger Robot case ANTLR3_ENC_UTF16BE:
254*16467b97STreehugger Robot
255*16467b97STreehugger Robot // Encoding is definately Big Endian with no BOM
256*16467b97STreehugger Robot //
257*16467b97STreehugger Robot antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE);
258*16467b97STreehugger Robot break;
259*16467b97STreehugger Robot
260*16467b97STreehugger Robot case ANTLR3_ENC_UTF16LE:
261*16467b97STreehugger Robot
262*16467b97STreehugger Robot // Encoding is definately Little Endian with no BOM
263*16467b97STreehugger Robot //
264*16467b97STreehugger Robot antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE);
265*16467b97STreehugger Robot break;
266*16467b97STreehugger Robot
267*16467b97STreehugger Robot case ANTLR3_ENC_UTF32BE:
268*16467b97STreehugger Robot
269*16467b97STreehugger Robot // Encoding is definately Big Endian with no BOM
270*16467b97STreehugger Robot //
271*16467b97STreehugger Robot antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE);
272*16467b97STreehugger Robot break;
273*16467b97STreehugger Robot
274*16467b97STreehugger Robot case ANTLR3_ENC_UTF32LE:
275*16467b97STreehugger Robot
276*16467b97STreehugger Robot // Encoding is definately Little Endian with no BOM
277*16467b97STreehugger Robot //
278*16467b97STreehugger Robot antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE);
279*16467b97STreehugger Robot break;
280*16467b97STreehugger Robot
281*16467b97STreehugger Robot case ANTLR3_ENC_EBCDIC:
282*16467b97STreehugger Robot
283*16467b97STreehugger Robot // EBCDIC is basically the same as ASCII but with an on the
284*16467b97STreehugger Robot // fly translation to ASCII
285*16467b97STreehugger Robot //
286*16467b97STreehugger Robot antlr3EBCDICSetupStream(input);
287*16467b97STreehugger Robot break;
288*16467b97STreehugger Robot
289*16467b97STreehugger Robot case ANTLR3_ENC_8BIT:
290*16467b97STreehugger Robot default:
291*16467b97STreehugger Robot
292*16467b97STreehugger Robot // Standard 8bit/ASCII
293*16467b97STreehugger Robot //
294*16467b97STreehugger Robot antlr38BitSetupStream(input);
295*16467b97STreehugger Robot break;
296*16467b97STreehugger Robot }
297*16467b97STreehugger Robot }
298*16467b97STreehugger Robot
299*16467b97STreehugger Robot /** \brief Use the contents of an operating system file as the input
300*16467b97STreehugger Robot * for an input stream.
301*16467b97STreehugger Robot *
302*16467b97STreehugger Robot * \param fileName Name of operating system file to read.
303*16467b97STreehugger Robot * \return
304*16467b97STreehugger Robot * - Pointer to new input stream context upon success
305*16467b97STreehugger Robot * - One of the ANTLR3_ERR_ defines on error.
306*16467b97STreehugger Robot */
307*16467b97STreehugger Robot static pANTLR3_INPUT_STREAM
antlr3CreateFileStream(pANTLR3_UINT8 fileName)308*16467b97STreehugger Robot antlr3CreateFileStream(pANTLR3_UINT8 fileName)
309*16467b97STreehugger Robot {
310*16467b97STreehugger Robot // Pointer to the input stream we are going to create
311*16467b97STreehugger Robot //
312*16467b97STreehugger Robot pANTLR3_INPUT_STREAM input;
313*16467b97STreehugger Robot ANTLR3_UINT32 status;
314*16467b97STreehugger Robot
315*16467b97STreehugger Robot if (fileName == NULL)
316*16467b97STreehugger Robot {
317*16467b97STreehugger Robot return NULL;
318*16467b97STreehugger Robot }
319*16467b97STreehugger Robot
320*16467b97STreehugger Robot // Allocate memory for the input stream structure
321*16467b97STreehugger Robot //
322*16467b97STreehugger Robot input = (pANTLR3_INPUT_STREAM)
323*16467b97STreehugger Robot ANTLR3_CALLOC(1, sizeof(ANTLR3_INPUT_STREAM));
324*16467b97STreehugger Robot
325*16467b97STreehugger Robot if (input == NULL)
326*16467b97STreehugger Robot {
327*16467b97STreehugger Robot return NULL;
328*16467b97STreehugger Robot }
329*16467b97STreehugger Robot
330*16467b97STreehugger Robot // Structure was allocated correctly, now we can read the file.
331*16467b97STreehugger Robot //
332*16467b97STreehugger Robot status = antlr3read8Bit(input, fileName);
333*16467b97STreehugger Robot
334*16467b97STreehugger Robot // Call the common 8 bit input stream handler
335*16467b97STreehugger Robot // initialization.
336*16467b97STreehugger Robot //
337*16467b97STreehugger Robot antlr3GenericSetupStream(input);
338*16467b97STreehugger Robot
339*16467b97STreehugger Robot // However if the file was not there or something then we
340*16467b97STreehugger Robot // need to close. Have to wait until here as we cannot call
341*16467b97STreehugger Robot // close until the API is installed of course.
342*16467b97STreehugger Robot //
343*16467b97STreehugger Robot if (status != ANTLR3_SUCCESS)
344*16467b97STreehugger Robot {
345*16467b97STreehugger Robot input->close(input);
346*16467b97STreehugger Robot return NULL;
347*16467b97STreehugger Robot }
348*16467b97STreehugger Robot
349*16467b97STreehugger Robot return input;
350*16467b97STreehugger Robot }
351*16467b97STreehugger Robot
352*16467b97STreehugger Robot ANTLR3_API ANTLR3_UINT32
antlr3read8Bit(pANTLR3_INPUT_STREAM input,pANTLR3_UINT8 fileName)353*16467b97STreehugger Robot antlr3read8Bit(pANTLR3_INPUT_STREAM input, pANTLR3_UINT8 fileName)
354*16467b97STreehugger Robot {
355*16467b97STreehugger Robot ANTLR3_FDSC infile;
356*16467b97STreehugger Robot ANTLR3_UINT32 fSize;
357*16467b97STreehugger Robot
358*16467b97STreehugger Robot /* Open the OS file in read binary mode
359*16467b97STreehugger Robot */
360*16467b97STreehugger Robot infile = antlr3Fopen(fileName, "rb");
361*16467b97STreehugger Robot
362*16467b97STreehugger Robot /* Check that it was there
363*16467b97STreehugger Robot */
364*16467b97STreehugger Robot if (infile == NULL)
365*16467b97STreehugger Robot {
366*16467b97STreehugger Robot return (ANTLR3_UINT32)ANTLR3_ERR_NOFILE;
367*16467b97STreehugger Robot }
368*16467b97STreehugger Robot
369*16467b97STreehugger Robot /* It was there, so we can read the bytes now
370*16467b97STreehugger Robot */
371*16467b97STreehugger Robot fSize = antlr3Fsize(fileName); /* Size of input file */
372*16467b97STreehugger Robot
373*16467b97STreehugger Robot /* Allocate buffer for this input set
374*16467b97STreehugger Robot */
375*16467b97STreehugger Robot input->data = ANTLR3_MALLOC((size_t)fSize);
376*16467b97STreehugger Robot input->sizeBuf = fSize;
377*16467b97STreehugger Robot
378*16467b97STreehugger Robot if (input->data == NULL)
379*16467b97STreehugger Robot {
380*16467b97STreehugger Robot return (ANTLR3_UINT32)ANTLR3_ERR_NOMEM;
381*16467b97STreehugger Robot }
382*16467b97STreehugger Robot
383*16467b97STreehugger Robot input->isAllocated = ANTLR3_TRUE;
384*16467b97STreehugger Robot
385*16467b97STreehugger Robot /* Now we read the file. Characters are not converted to
386*16467b97STreehugger Robot * the internal ANTLR encoding until they are read from the buffer
387*16467b97STreehugger Robot */
388*16467b97STreehugger Robot antlr3Fread(infile, fSize, input->data);
389*16467b97STreehugger Robot
390*16467b97STreehugger Robot /* And close the file handle
391*16467b97STreehugger Robot */
392*16467b97STreehugger Robot antlr3Fclose(infile);
393*16467b97STreehugger Robot
394*16467b97STreehugger Robot return ANTLR3_SUCCESS;
395*16467b97STreehugger Robot }
396*16467b97STreehugger Robot
397*16467b97STreehugger Robot /** \brief Open an operating system file and return the descriptor
398*16467b97STreehugger Robot * We just use the common open() and related functions here.
399*16467b97STreehugger Robot * Later we might find better ways on systems
400*16467b97STreehugger Robot * such as Windows and OpenVMS for instance. But the idea is to read the
401*16467b97STreehugger Robot * while file at once anyway, so it may be irrelevant.
402*16467b97STreehugger Robot */
403*16467b97STreehugger Robot ANTLR3_API ANTLR3_FDSC
antlr3Fopen(pANTLR3_UINT8 filename,const char * mode)404*16467b97STreehugger Robot antlr3Fopen(pANTLR3_UINT8 filename, const char * mode)
405*16467b97STreehugger Robot {
406*16467b97STreehugger Robot return (ANTLR3_FDSC)fopen((const char *)filename, mode);
407*16467b97STreehugger Robot }
408*16467b97STreehugger Robot
409*16467b97STreehugger Robot /** \brief Close an operating system file and free any handles
410*16467b97STreehugger Robot * etc.
411*16467b97STreehugger Robot */
412*16467b97STreehugger Robot ANTLR3_API void
antlr3Fclose(ANTLR3_FDSC fd)413*16467b97STreehugger Robot antlr3Fclose(ANTLR3_FDSC fd)
414*16467b97STreehugger Robot {
415*16467b97STreehugger Robot fclose(fd);
416*16467b97STreehugger Robot }
417*16467b97STreehugger Robot ANTLR3_API ANTLR3_UINT32
antlr3Fsize(pANTLR3_UINT8 fileName)418*16467b97STreehugger Robot antlr3Fsize(pANTLR3_UINT8 fileName)
419*16467b97STreehugger Robot {
420*16467b97STreehugger Robot struct _stat statbuf;
421*16467b97STreehugger Robot
422*16467b97STreehugger Robot _stat((const char *)fileName, &statbuf);
423*16467b97STreehugger Robot
424*16467b97STreehugger Robot return (ANTLR3_UINT32)statbuf.st_size;
425*16467b97STreehugger Robot }
426*16467b97STreehugger Robot
427*16467b97STreehugger Robot ANTLR3_API ANTLR3_UINT32
antlr3Fread(ANTLR3_FDSC fdsc,ANTLR3_UINT32 count,void * data)428*16467b97STreehugger Robot antlr3Fread(ANTLR3_FDSC fdsc, ANTLR3_UINT32 count, void * data)
429*16467b97STreehugger Robot {
430*16467b97STreehugger Robot return (ANTLR3_UINT32)fread(data, (size_t)count, 1, fdsc);
431*16467b97STreehugger Robot }
432*16467b97STreehugger Robot
433*16467b97STreehugger Robot
434*16467b97STreehugger Robot /** \brief Use the supplied 'string' as input to the stream
435*16467b97STreehugger Robot *
436*16467b97STreehugger Robot * \param data Pointer to the input data
437*16467b97STreehugger Robot * \return
438*16467b97STreehugger Robot * - Pointer to new input stream context upon success
439*16467b97STreehugger Robot * - NULL defines on error.
440*16467b97STreehugger Robot */
441*16467b97STreehugger Robot static pANTLR3_INPUT_STREAM
antlr3CreateStringStream(pANTLR3_UINT8 data)442*16467b97STreehugger Robot antlr3CreateStringStream(pANTLR3_UINT8 data)
443*16467b97STreehugger Robot {
444*16467b97STreehugger Robot // Pointer to the input stream we are going to create
445*16467b97STreehugger Robot //
446*16467b97STreehugger Robot pANTLR3_INPUT_STREAM input;
447*16467b97STreehugger Robot
448*16467b97STreehugger Robot if (data == NULL)
449*16467b97STreehugger Robot {
450*16467b97STreehugger Robot return NULL;
451*16467b97STreehugger Robot }
452*16467b97STreehugger Robot
453*16467b97STreehugger Robot // Allocate memory for the input stream structure
454*16467b97STreehugger Robot //
455*16467b97STreehugger Robot input = (pANTLR3_INPUT_STREAM)
456*16467b97STreehugger Robot ANTLR3_CALLOC(1, sizeof(ANTLR3_INPUT_STREAM));
457*16467b97STreehugger Robot
458*16467b97STreehugger Robot if (input == NULL)
459*16467b97STreehugger Robot {
460*16467b97STreehugger Robot return NULL;
461*16467b97STreehugger Robot }
462*16467b97STreehugger Robot
463*16467b97STreehugger Robot // Structure was allocated correctly, now we can install the pointer
464*16467b97STreehugger Robot //
465*16467b97STreehugger Robot input->data = data;
466*16467b97STreehugger Robot input->isAllocated = ANTLR3_FALSE;
467*16467b97STreehugger Robot
468*16467b97STreehugger Robot // Call the common 8 bit input stream handler
469*16467b97STreehugger Robot // initialization.
470*16467b97STreehugger Robot //
471*16467b97STreehugger Robot antlr3GenericSetupStream(input);
472*16467b97STreehugger Robot
473*16467b97STreehugger Robot return input;
474*16467b97STreehugger Robot }