xref: /aosp_15_r20/external/antlr/runtime/C/src/antlr3string.c (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot /** \file
2*16467b97STreehugger Robot  * Implementation of the ANTLR3 string and string factory classes
3*16467b97STreehugger Robot  */
4*16467b97STreehugger Robot 
5*16467b97STreehugger Robot // [The "BSD licence"]
6*16467b97STreehugger Robot // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
7*16467b97STreehugger Robot // http://www.temporal-wave.com
8*16467b97STreehugger Robot // http://www.linkedin.com/in/jimidle
9*16467b97STreehugger Robot //
10*16467b97STreehugger Robot // All rights reserved.
11*16467b97STreehugger Robot //
12*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without
13*16467b97STreehugger Robot // modification, are permitted provided that the following conditions
14*16467b97STreehugger Robot // are met:
15*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright
16*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer.
17*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright
18*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer in the
19*16467b97STreehugger Robot //    documentation and/or other materials provided with the distribution.
20*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products
21*16467b97STreehugger Robot //    derived from this software without specific prior written permission.
22*16467b97STreehugger Robot //
23*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33*16467b97STreehugger Robot 
34*16467b97STreehugger Robot #include    <antlr3string.h>
35*16467b97STreehugger Robot 
36*16467b97STreehugger Robot /* Factory API
37*16467b97STreehugger Robot  */
38*16467b97STreehugger Robot static    pANTLR3_STRING    newRaw8	(pANTLR3_STRING_FACTORY factory);
39*16467b97STreehugger Robot static    pANTLR3_STRING    newRawUTF16	(pANTLR3_STRING_FACTORY factory);
40*16467b97STreehugger Robot static    pANTLR3_STRING    newSize8	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
41*16467b97STreehugger Robot static    pANTLR3_STRING    newSizeUTF16	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
42*16467b97STreehugger Robot static    pANTLR3_STRING    newPtr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
43*16467b97STreehugger Robot static    pANTLR3_STRING    newPtrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
44*16467b97STreehugger Robot static    pANTLR3_STRING    newPtrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
45*16467b97STreehugger Robot static    pANTLR3_STRING    newStr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
46*16467b97STreehugger Robot static    pANTLR3_STRING    newStrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
47*16467b97STreehugger Robot static    pANTLR3_STRING    newStrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
48*16467b97STreehugger Robot static    void		    destroy	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
49*16467b97STreehugger Robot static    pANTLR3_STRING    printable8	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
50*16467b97STreehugger Robot static    pANTLR3_STRING    printableUTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
51*16467b97STreehugger Robot static    void		    closeFactory(pANTLR3_STRING_FACTORY factory);
52*16467b97STreehugger Robot 
53*16467b97STreehugger Robot /* String API
54*16467b97STreehugger Robot  */
55*16467b97STreehugger Robot static    pANTLR3_UINT8	    set8	(pANTLR3_STRING string, const char * chars);
56*16467b97STreehugger Robot static    pANTLR3_UINT8	    setUTF16_8	(pANTLR3_STRING string, const char * chars);
57*16467b97STreehugger Robot static    pANTLR3_UINT8	    setUTF16_UTF16	(pANTLR3_STRING string, const char * chars);
58*16467b97STreehugger Robot static    pANTLR3_UINT8	    append8	(pANTLR3_STRING string, const char * newbit);
59*16467b97STreehugger Robot static    pANTLR3_UINT8	    appendUTF16_8	(pANTLR3_STRING string, const char * newbit);
60*16467b97STreehugger Robot static    pANTLR3_UINT8	    appendUTF16_UTF16	(pANTLR3_STRING string, const char * newbit);
61*16467b97STreehugger Robot static	  pANTLR3_UINT8	    insert8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
62*16467b97STreehugger Robot static	  pANTLR3_UINT8	    insertUTF16_8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
63*16467b97STreehugger Robot static	  pANTLR3_UINT8	    insertUTF16_UTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
64*16467b97STreehugger Robot 
65*16467b97STreehugger Robot static    pANTLR3_UINT8	    setS	(pANTLR3_STRING string, pANTLR3_STRING chars);
66*16467b97STreehugger Robot static    pANTLR3_UINT8	    appendS	(pANTLR3_STRING string, pANTLR3_STRING newbit);
67*16467b97STreehugger Robot static	  pANTLR3_UINT8	    insertS	(pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);
68*16467b97STreehugger Robot 
69*16467b97STreehugger Robot static    pANTLR3_UINT8	    addc8	(pANTLR3_STRING string, ANTLR3_UINT32 c);
70*16467b97STreehugger Robot static    pANTLR3_UINT8	    addcUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 c);
71*16467b97STreehugger Robot static    pANTLR3_UINT8	    addi8	(pANTLR3_STRING string, ANTLR3_INT32 i);
72*16467b97STreehugger Robot static    pANTLR3_UINT8	    addiUTF16	(pANTLR3_STRING string, ANTLR3_INT32 i);
73*16467b97STreehugger Robot static	  pANTLR3_UINT8	    inserti8	(pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
74*16467b97STreehugger Robot static	  pANTLR3_UINT8	    insertiUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
75*16467b97STreehugger Robot 
76*16467b97STreehugger Robot static    ANTLR3_UINT32     compare8	(pANTLR3_STRING string, const char * compStr);
77*16467b97STreehugger Robot static    ANTLR3_UINT32     compareUTF16_8	(pANTLR3_STRING string, const char * compStr);
78*16467b97STreehugger Robot static    ANTLR3_UINT32     compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr);
79*16467b97STreehugger Robot static    ANTLR3_UINT32     compareS	(pANTLR3_STRING string, pANTLR3_STRING compStr);
80*16467b97STreehugger Robot static    ANTLR3_UCHAR      charAt8	(pANTLR3_STRING string, ANTLR3_UINT32 offset);
81*16467b97STreehugger Robot static    ANTLR3_UCHAR      charAtUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 offset);
82*16467b97STreehugger Robot static    pANTLR3_STRING    subString8	(pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
83*16467b97STreehugger Robot static    pANTLR3_STRING    subStringUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
84*16467b97STreehugger Robot static	  ANTLR3_INT32	    toInt32_8	(pANTLR3_STRING string);
85*16467b97STreehugger Robot static	  ANTLR3_INT32	    toInt32_UTF16  (pANTLR3_STRING string);
86*16467b97STreehugger Robot static	  pANTLR3_STRING    to8_8		(pANTLR3_STRING string);
87*16467b97STreehugger Robot static	  pANTLR3_STRING    to8_UTF16		(pANTLR3_STRING string);
88*16467b97STreehugger Robot static	pANTLR3_STRING		toUTF8_8	(pANTLR3_STRING string);
89*16467b97STreehugger Robot static	pANTLR3_STRING		toUTF8_UTF16	(pANTLR3_STRING string);
90*16467b97STreehugger Robot 
91*16467b97STreehugger Robot /* Local helpers
92*16467b97STreehugger Robot  */
93*16467b97STreehugger Robot static	void			stringInit8	(pANTLR3_STRING string);
94*16467b97STreehugger Robot static	void			stringInitUTF16	(pANTLR3_STRING string);
95*16467b97STreehugger Robot static	void	ANTLR3_CDECL	stringFree	(pANTLR3_STRING string);
96*16467b97STreehugger Robot 
97*16467b97STreehugger Robot ANTLR3_API pANTLR3_STRING_FACTORY
antlr3StringFactoryNew(ANTLR3_UINT32 encoding)98*16467b97STreehugger Robot antlr3StringFactoryNew(ANTLR3_UINT32 encoding)
99*16467b97STreehugger Robot {
100*16467b97STreehugger Robot 	pANTLR3_STRING_FACTORY  factory;
101*16467b97STreehugger Robot 
102*16467b97STreehugger Robot 	/* Allocate memory
103*16467b97STreehugger Robot 	*/
104*16467b97STreehugger Robot 	factory	= (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY));
105*16467b97STreehugger Robot 
106*16467b97STreehugger Robot 	if	(factory == NULL)
107*16467b97STreehugger Robot 	{
108*16467b97STreehugger Robot 		return	NULL;
109*16467b97STreehugger Robot 	}
110*16467b97STreehugger Robot 
111*16467b97STreehugger Robot 	/* Now we make a new list to track the strings.
112*16467b97STreehugger Robot 	*/
113*16467b97STreehugger Robot 	factory->strings	= antlr3VectorNew(0);
114*16467b97STreehugger Robot 	factory->index	= 0;
115*16467b97STreehugger Robot 
116*16467b97STreehugger Robot 	if	(factory->strings == NULL)
117*16467b97STreehugger Robot 	{
118*16467b97STreehugger Robot 		ANTLR3_FREE(factory);
119*16467b97STreehugger Robot 		return	NULL;
120*16467b97STreehugger Robot 	}
121*16467b97STreehugger Robot 
122*16467b97STreehugger Robot     // Install the API
123*16467b97STreehugger Robot     //
124*16467b97STreehugger Robot     // TODO: These encodings need equivalent functions to
125*16467b97STreehugger Robot     // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff.
126*16467b97STreehugger Robot 	// The STRING stuff was intended as a quick and dirty hack for people that did not
127*16467b97STreehugger Robot 	// want to worry about memory and performance very much, but nobody ever reads the
128*16467b97STreehugger Robot 	// notes or comments or uses the email list search. I want to discourage using these
129*16467b97STreehugger Robot 	// interfaces as it is much more efficient to use the pointers within the tokens
130*16467b97STreehugger Robot 	// directly, so I am not implementing the string stuff for the newer encodings.
131*16467b97STreehugger Robot     // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they
132*16467b97STreehugger Robot 	// will not be useful beyond returning the text.
133*16467b97STreehugger Robot 	//
134*16467b97STreehugger Robot     switch(encoding)
135*16467b97STreehugger Robot     {
136*16467b97STreehugger Robot 		case    ANTLR3_ENC_UTF32:
137*16467b97STreehugger Robot 			break;
138*16467b97STreehugger Robot 
139*16467b97STreehugger Robot 		case    ANTLR3_ENC_UTF32BE:
140*16467b97STreehugger Robot 			break;
141*16467b97STreehugger Robot 
142*16467b97STreehugger Robot 		case    ANTLR3_ENC_UTF32LE:
143*16467b97STreehugger Robot 			break;
144*16467b97STreehugger Robot 
145*16467b97STreehugger Robot 		case    ANTLR3_ENC_UTF16BE:
146*16467b97STreehugger Robot 		case    ANTLR3_ENC_UTF16LE:
147*16467b97STreehugger Robot 		case    ANTLR3_ENC_UTF16:
148*16467b97STreehugger Robot 
149*16467b97STreehugger Robot 			factory->newRaw	    =  newRawUTF16;
150*16467b97STreehugger Robot 			factory->newSize	=  newSizeUTF16;
151*16467b97STreehugger Robot 			factory->newPtr	    =  newPtrUTF16_UTF16;
152*16467b97STreehugger Robot 			factory->newPtr8	=  newPtrUTF16_8;
153*16467b97STreehugger Robot 			factory->newStr	    =  newStrUTF16_UTF16;
154*16467b97STreehugger Robot 			factory->newStr8	=  newStrUTF16_8;
155*16467b97STreehugger Robot 			factory->printable	=  printableUTF16;
156*16467b97STreehugger Robot 			factory->destroy	=  destroy;
157*16467b97STreehugger Robot 			factory->close	    =  closeFactory;
158*16467b97STreehugger Robot 			break;
159*16467b97STreehugger Robot 
160*16467b97STreehugger Robot 		case    ANTLR3_ENC_UTF8:
161*16467b97STreehugger Robot 		case    ANTLR3_ENC_EBCDIC:
162*16467b97STreehugger Robot 		case    ANTLR3_ENC_8BIT:
163*16467b97STreehugger Robot 		default:
164*16467b97STreehugger Robot 
165*16467b97STreehugger Robot 			factory->newRaw	    =  newRaw8;
166*16467b97STreehugger Robot 			factory->newSize	=  newSize8;
167*16467b97STreehugger Robot 			factory->newPtr	    =  newPtr8;
168*16467b97STreehugger Robot 			factory->newPtr8	=  newPtr8;
169*16467b97STreehugger Robot 			factory->newStr	    =  newStr8;
170*16467b97STreehugger Robot 			factory->newStr8	=  newStr8;
171*16467b97STreehugger Robot 			factory->printable	=  printable8;
172*16467b97STreehugger Robot 			factory->destroy	=  destroy;
173*16467b97STreehugger Robot 			factory->close	    =  closeFactory;
174*16467b97STreehugger Robot 			break;
175*16467b97STreehugger Robot     }
176*16467b97STreehugger Robot 	return  factory;
177*16467b97STreehugger Robot }
178*16467b97STreehugger Robot 
179*16467b97STreehugger Robot 
180*16467b97STreehugger Robot /**
181*16467b97STreehugger Robot  *
182*16467b97STreehugger Robot  * \param factory
183*16467b97STreehugger Robot  * \return
184*16467b97STreehugger Robot  */
185*16467b97STreehugger Robot static    pANTLR3_STRING
newRaw8(pANTLR3_STRING_FACTORY factory)186*16467b97STreehugger Robot newRaw8	(pANTLR3_STRING_FACTORY factory)
187*16467b97STreehugger Robot {
188*16467b97STreehugger Robot     pANTLR3_STRING  string;
189*16467b97STreehugger Robot 
190*16467b97STreehugger Robot     string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
191*16467b97STreehugger Robot 
192*16467b97STreehugger Robot     if	(string == NULL)
193*16467b97STreehugger Robot     {
194*16467b97STreehugger Robot 		return	NULL;
195*16467b97STreehugger Robot     }
196*16467b97STreehugger Robot 
197*16467b97STreehugger Robot     /* Structure is allocated, now fill in the API etc.
198*16467b97STreehugger Robot      */
199*16467b97STreehugger Robot     stringInit8(string);
200*16467b97STreehugger Robot     string->factory = factory;
201*16467b97STreehugger Robot 
202*16467b97STreehugger Robot     /* Add the string into the allocated list
203*16467b97STreehugger Robot      */
204*16467b97STreehugger Robot     factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
205*16467b97STreehugger Robot     string->index   = factory->index++;
206*16467b97STreehugger Robot 
207*16467b97STreehugger Robot     return string;
208*16467b97STreehugger Robot }
209*16467b97STreehugger Robot /**
210*16467b97STreehugger Robot  *
211*16467b97STreehugger Robot  * \param factory
212*16467b97STreehugger Robot  * \return
213*16467b97STreehugger Robot  */
214*16467b97STreehugger Robot static    pANTLR3_STRING
newRawUTF16(pANTLR3_STRING_FACTORY factory)215*16467b97STreehugger Robot newRawUTF16	(pANTLR3_STRING_FACTORY factory)
216*16467b97STreehugger Robot {
217*16467b97STreehugger Robot     pANTLR3_STRING  string;
218*16467b97STreehugger Robot 
219*16467b97STreehugger Robot     string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
220*16467b97STreehugger Robot 
221*16467b97STreehugger Robot     if	(string == NULL)
222*16467b97STreehugger Robot     {
223*16467b97STreehugger Robot 		return	NULL;
224*16467b97STreehugger Robot     }
225*16467b97STreehugger Robot 
226*16467b97STreehugger Robot     /* Structure is allocated, now fill in the API etc.
227*16467b97STreehugger Robot      */
228*16467b97STreehugger Robot     stringInitUTF16(string);
229*16467b97STreehugger Robot     string->factory = factory;
230*16467b97STreehugger Robot 
231*16467b97STreehugger Robot     /* Add the string into the allocated list
232*16467b97STreehugger Robot      */
233*16467b97STreehugger Robot     factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
234*16467b97STreehugger Robot     string->index   = factory->index++;
235*16467b97STreehugger Robot 
236*16467b97STreehugger Robot     return string;
237*16467b97STreehugger Robot }
238*16467b97STreehugger Robot static
stringFree(pANTLR3_STRING string)239*16467b97STreehugger Robot void	ANTLR3_CDECL stringFree  (pANTLR3_STRING string)
240*16467b97STreehugger Robot {
241*16467b97STreehugger Robot     /* First free the string itself if there was anything in it
242*16467b97STreehugger Robot      */
243*16467b97STreehugger Robot     if	(string->chars)
244*16467b97STreehugger Robot     {
245*16467b97STreehugger Robot 	ANTLR3_FREE(string->chars);
246*16467b97STreehugger Robot     }
247*16467b97STreehugger Robot 
248*16467b97STreehugger Robot     /* Now free the space for this string
249*16467b97STreehugger Robot      */
250*16467b97STreehugger Robot     ANTLR3_FREE(string);
251*16467b97STreehugger Robot 
252*16467b97STreehugger Robot     return;
253*16467b97STreehugger Robot }
254*16467b97STreehugger Robot /**
255*16467b97STreehugger Robot  *
256*16467b97STreehugger Robot  * \param string
257*16467b97STreehugger Robot  * \return
258*16467b97STreehugger Robot  */
259*16467b97STreehugger Robot static	void
stringInit8(pANTLR3_STRING string)260*16467b97STreehugger Robot stringInit8  (pANTLR3_STRING string)
261*16467b97STreehugger Robot {
262*16467b97STreehugger Robot     string->len			= 0;
263*16467b97STreehugger Robot     string->size		= 0;
264*16467b97STreehugger Robot     string->chars		= NULL;
265*16467b97STreehugger Robot     string->encoding	= ANTLR3_ENC_8BIT ;
266*16467b97STreehugger Robot 
267*16467b97STreehugger Robot     /* API for 8 bit strings*/
268*16467b97STreehugger Robot 
269*16467b97STreehugger Robot     string->set		= set8;
270*16467b97STreehugger Robot     string->set8	= set8;
271*16467b97STreehugger Robot     string->append	= append8;
272*16467b97STreehugger Robot     string->append8	= append8;
273*16467b97STreehugger Robot     string->insert	= insert8;
274*16467b97STreehugger Robot     string->insert8	= insert8;
275*16467b97STreehugger Robot     string->addi	= addi8;
276*16467b97STreehugger Robot     string->inserti	= inserti8;
277*16467b97STreehugger Robot     string->addc	= addc8;
278*16467b97STreehugger Robot     string->charAt	= charAt8;
279*16467b97STreehugger Robot     string->compare	= compare8;
280*16467b97STreehugger Robot     string->compare8	= compare8;
281*16467b97STreehugger Robot     string->subString	= subString8;
282*16467b97STreehugger Robot     string->toInt32	= toInt32_8;
283*16467b97STreehugger Robot     string->to8		= to8_8;
284*16467b97STreehugger Robot     string->toUTF8	= toUTF8_8;
285*16467b97STreehugger Robot     string->compareS	= compareS;
286*16467b97STreehugger Robot     string->setS	= setS;
287*16467b97STreehugger Robot     string->appendS	= appendS;
288*16467b97STreehugger Robot     string->insertS	= insertS;
289*16467b97STreehugger Robot 
290*16467b97STreehugger Robot }
291*16467b97STreehugger Robot /**
292*16467b97STreehugger Robot  *
293*16467b97STreehugger Robot  * \param string
294*16467b97STreehugger Robot  * \return
295*16467b97STreehugger Robot  */
296*16467b97STreehugger Robot static	void
stringInitUTF16(pANTLR3_STRING string)297*16467b97STreehugger Robot stringInitUTF16  (pANTLR3_STRING string)
298*16467b97STreehugger Robot {
299*16467b97STreehugger Robot     string->len		= 0;
300*16467b97STreehugger Robot     string->size	= 0;
301*16467b97STreehugger Robot     string->chars	= NULL;
302*16467b97STreehugger Robot     string->encoding	= ANTLR3_ENC_8BIT;
303*16467b97STreehugger Robot 
304*16467b97STreehugger Robot     /* API for UTF16 strings */
305*16467b97STreehugger Robot 
306*16467b97STreehugger Robot     string->set		= setUTF16_UTF16;
307*16467b97STreehugger Robot     string->set8	= setUTF16_8;
308*16467b97STreehugger Robot     string->append	= appendUTF16_UTF16;
309*16467b97STreehugger Robot     string->append8	= appendUTF16_8;
310*16467b97STreehugger Robot     string->insert	= insertUTF16_UTF16;
311*16467b97STreehugger Robot     string->insert8	= insertUTF16_8;
312*16467b97STreehugger Robot     string->addi	= addiUTF16;
313*16467b97STreehugger Robot     string->inserti	= insertiUTF16;
314*16467b97STreehugger Robot     string->addc	= addcUTF16;
315*16467b97STreehugger Robot     string->charAt	= charAtUTF16;
316*16467b97STreehugger Robot     string->compare	= compareUTF16_UTF16;
317*16467b97STreehugger Robot     string->compare8	= compareUTF16_8;
318*16467b97STreehugger Robot     string->subString	= subStringUTF16;
319*16467b97STreehugger Robot     string->toInt32	= toInt32_UTF16;
320*16467b97STreehugger Robot     string->to8		= to8_UTF16;
321*16467b97STreehugger Robot     string->toUTF8	= toUTF8_UTF16;
322*16467b97STreehugger Robot 
323*16467b97STreehugger Robot     string->compareS	= compareS;
324*16467b97STreehugger Robot     string->setS	= setS;
325*16467b97STreehugger Robot     string->appendS	= appendS;
326*16467b97STreehugger Robot     string->insertS	= insertS;
327*16467b97STreehugger Robot }
328*16467b97STreehugger Robot /**
329*16467b97STreehugger Robot  *
330*16467b97STreehugger Robot  * \param string
331*16467b97STreehugger Robot  * \return
332*16467b97STreehugger Robot  * TODO: Implement UTF-8
333*16467b97STreehugger Robot  */
334*16467b97STreehugger Robot static	void
stringInitUTF8(pANTLR3_STRING string)335*16467b97STreehugger Robot stringInitUTF8  (pANTLR3_STRING string)
336*16467b97STreehugger Robot {
337*16467b97STreehugger Robot     string->len	    = 0;
338*16467b97STreehugger Robot     string->size    = 0;
339*16467b97STreehugger Robot     string->chars   = NULL;
340*16467b97STreehugger Robot 
341*16467b97STreehugger Robot     /* API */
342*16467b97STreehugger Robot 
343*16467b97STreehugger Robot }
344*16467b97STreehugger Robot 
345*16467b97STreehugger Robot // Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
346*16467b97STreehugger Robot // a memcpy as we make no assumptions about the 8 bit encoding.
347*16467b97STreehugger Robot //
348*16467b97STreehugger Robot static	pANTLR3_STRING
toUTF8_8(pANTLR3_STRING string)349*16467b97STreehugger Robot toUTF8_8	(pANTLR3_STRING string)
350*16467b97STreehugger Robot {
351*16467b97STreehugger Robot 	return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
352*16467b97STreehugger Robot }
353*16467b97STreehugger Robot 
354*16467b97STreehugger Robot // Convert a UTF16 string into a UTF8 representation using the Unicode.org
355*16467b97STreehugger Robot // supplied C algorithms, which are now contained within the ANTLR3 C runtime
356*16467b97STreehugger Robot // as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
357*16467b97STreehugger Robot // UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
358*16467b97STreehugger Robot //
359*16467b97STreehugger Robot static	pANTLR3_STRING
toUTF8_UTF16(pANTLR3_STRING string)360*16467b97STreehugger Robot toUTF8_UTF16	(pANTLR3_STRING string)
361*16467b97STreehugger Robot {
362*16467b97STreehugger Robot 
363*16467b97STreehugger Robot     UTF8	      * outputEnd;
364*16467b97STreehugger Robot     UTF16	      * inputEnd;
365*16467b97STreehugger Robot     pANTLR3_STRING	utf8String;
366*16467b97STreehugger Robot 
367*16467b97STreehugger Robot     ConversionResult	cResult;
368*16467b97STreehugger Robot 
369*16467b97STreehugger Robot     // Allocate the output buffer, which needs to accommodate potentially
370*16467b97STreehugger Robot     // 3X (in bytes) the input size (in chars).
371*16467b97STreehugger Robot     //
372*16467b97STreehugger Robot     utf8String	= string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");
373*16467b97STreehugger Robot 
374*16467b97STreehugger Robot     if	(utf8String != NULL)
375*16467b97STreehugger Robot     {
376*16467b97STreehugger Robot         // Free existing allocation
377*16467b97STreehugger Robot         //
378*16467b97STreehugger Robot         ANTLR3_FREE(utf8String->chars);
379*16467b97STreehugger Robot 
380*16467b97STreehugger Robot         // Reallocate according to maximum expected size
381*16467b97STreehugger Robot         //
382*16467b97STreehugger Robot         utf8String->size	= string->len *3;
383*16467b97STreehugger Robot         utf8String->chars	= (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);
384*16467b97STreehugger Robot 
385*16467b97STreehugger Robot         if	(utf8String->chars != NULL)
386*16467b97STreehugger Robot         {
387*16467b97STreehugger Robot             inputEnd  = (UTF16 *)	(string->chars);
388*16467b97STreehugger Robot             outputEnd = (UTF8 *)	(utf8String->chars);
389*16467b97STreehugger Robot 
390*16467b97STreehugger Robot             // Call the Unicode converter
391*16467b97STreehugger Robot             //
392*16467b97STreehugger Robot             cResult =  ConvertUTF16toUTF8
393*16467b97STreehugger Robot                 (
394*16467b97STreehugger Robot                 (const UTF16**)&inputEnd,
395*16467b97STreehugger Robot                 ((const UTF16 *)(string->chars)) + string->len,
396*16467b97STreehugger Robot                 &outputEnd,
397*16467b97STreehugger Robot                 outputEnd + utf8String->size - 1,
398*16467b97STreehugger Robot                 lenientConversion
399*16467b97STreehugger Robot                 );
400*16467b97STreehugger Robot 
401*16467b97STreehugger Robot             // We don't really care if things failed or not here, we just converted
402*16467b97STreehugger Robot             // everything that was vaguely possible and stopped when it wasn't. It is
403*16467b97STreehugger Robot             // up to the grammar programmer to verify that the input is sensible.
404*16467b97STreehugger Robot             //
405*16467b97STreehugger Robot             utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);
406*16467b97STreehugger Robot 
407*16467b97STreehugger Robot             *(outputEnd+1) = '\0';		// Always null terminate
408*16467b97STreehugger Robot         }
409*16467b97STreehugger Robot     }
410*16467b97STreehugger Robot     return utf8String;
411*16467b97STreehugger Robot }
412*16467b97STreehugger Robot 
413*16467b97STreehugger Robot /**
414*16467b97STreehugger Robot  * Creates a new string with enough capacity for size 8 bit characters plus a terminator.
415*16467b97STreehugger Robot  *
416*16467b97STreehugger Robot  * \param[in] factory - Pointer to the string factory that owns strings
417*16467b97STreehugger Robot  * \param[in] size - In characters
418*16467b97STreehugger Robot  * \return pointer to the new string.
419*16467b97STreehugger Robot  */
420*16467b97STreehugger Robot static    pANTLR3_STRING
newSize8(pANTLR3_STRING_FACTORY factory,ANTLR3_UINT32 size)421*16467b97STreehugger Robot newSize8	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
422*16467b97STreehugger Robot {
423*16467b97STreehugger Robot     pANTLR3_STRING  string;
424*16467b97STreehugger Robot 
425*16467b97STreehugger Robot     string  = factory->newRaw(factory);
426*16467b97STreehugger Robot 
427*16467b97STreehugger Robot     if	(string == NULL)
428*16467b97STreehugger Robot     {
429*16467b97STreehugger Robot         return	string;
430*16467b97STreehugger Robot     }
431*16467b97STreehugger Robot 
432*16467b97STreehugger Robot     /* Always add one more byte for a terminator ;-)
433*16467b97STreehugger Robot     */
434*16467b97STreehugger Robot     string->chars	= (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
435*16467b97STreehugger Robot 	if (string->chars != NULL)
436*16467b97STreehugger Robot     {
437*16467b97STreehugger Robot 		*(string->chars)	= '\0';
438*16467b97STreehugger Robot 		string->size	= size + 1;
439*16467b97STreehugger Robot 	}
440*16467b97STreehugger Robot 
441*16467b97STreehugger Robot     return string;
442*16467b97STreehugger Robot }
443*16467b97STreehugger Robot /**
444*16467b97STreehugger Robot  * Creates a new string with enough capacity for size UTF16 characters plus a terminator.
445*16467b97STreehugger Robot  *
446*16467b97STreehugger Robot  * \param[in] factory - Pointer to the string factory that owns strings
447*16467b97STreehugger Robot  * \param[in] size - In characters (count double for surrogate pairs!!!)
448*16467b97STreehugger Robot  * \return pointer to the new string.
449*16467b97STreehugger Robot  */
450*16467b97STreehugger Robot static    pANTLR3_STRING
newSizeUTF16(pANTLR3_STRING_FACTORY factory,ANTLR3_UINT32 size)451*16467b97STreehugger Robot newSizeUTF16	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
452*16467b97STreehugger Robot {
453*16467b97STreehugger Robot     pANTLR3_STRING  string;
454*16467b97STreehugger Robot 
455*16467b97STreehugger Robot     string  = factory->newRaw(factory);
456*16467b97STreehugger Robot 
457*16467b97STreehugger Robot     if	(string == NULL)
458*16467b97STreehugger Robot     {
459*16467b97STreehugger Robot         return	string;
460*16467b97STreehugger Robot     }
461*16467b97STreehugger Robot 
462*16467b97STreehugger Robot     /* Always add one more byte for a terminator ;-)
463*16467b97STreehugger Robot     */
464*16467b97STreehugger Robot     string->chars	= (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
465*16467b97STreehugger Robot     if (string->chars != NULL)
466*16467b97STreehugger Robot 	{
467*16467b97STreehugger Robot 		*(string->chars)	= '\0';
468*16467b97STreehugger Robot 		string->size	= size+1;	/* Size is always in characters, as is len */
469*16467b97STreehugger Robot 	}
470*16467b97STreehugger Robot 
471*16467b97STreehugger Robot     return string;
472*16467b97STreehugger Robot }
473*16467b97STreehugger Robot 
474*16467b97STreehugger Robot /** Creates a new 8 bit string initialized with the 8 bit characters at the
475*16467b97STreehugger Robot  *  supplied ptr, of pre-determined size.
476*16467b97STreehugger Robot  * \param[in] factory - Pointer to the string factory that owns the strings
477*16467b97STreehugger Robot  * \param[in] ptr - Pointer to 8 bit encoded characters
478*16467b97STreehugger Robot  * \return pointer to the new string
479*16467b97STreehugger Robot  */
480*16467b97STreehugger Robot static    pANTLR3_STRING
newPtr8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)481*16467b97STreehugger Robot newPtr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
482*16467b97STreehugger Robot {
483*16467b97STreehugger Robot 	pANTLR3_STRING  string;
484*16467b97STreehugger Robot 
485*16467b97STreehugger Robot 	string  = factory->newSize(factory, size);
486*16467b97STreehugger Robot 
487*16467b97STreehugger Robot 	if	(string == NULL)
488*16467b97STreehugger Robot 	{
489*16467b97STreehugger Robot 		return	NULL;
490*16467b97STreehugger Robot 	}
491*16467b97STreehugger Robot 
492*16467b97STreehugger Robot 	if	(size <= 0)
493*16467b97STreehugger Robot 	{
494*16467b97STreehugger Robot 		return	string;
495*16467b97STreehugger Robot 	}
496*16467b97STreehugger Robot 
497*16467b97STreehugger Robot 	if	(ptr != NULL)
498*16467b97STreehugger Robot 	{
499*16467b97STreehugger Robot 		ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
500*16467b97STreehugger Robot 		*(string->chars + size) = '\0';	    /* Terminate, these strings are usually used for Token streams and printing etc.	*/
501*16467b97STreehugger Robot 		string->len = size;
502*16467b97STreehugger Robot 	}
503*16467b97STreehugger Robot 
504*16467b97STreehugger Robot 	return  string;
505*16467b97STreehugger Robot }
506*16467b97STreehugger Robot 
507*16467b97STreehugger Robot /** Creates a new UTF16 string initialized with the 8 bit characters at the
508*16467b97STreehugger Robot  *  supplied 8 bit character ptr, of pre-determined size.
509*16467b97STreehugger Robot  * \param[in] factory - Pointer to the string factory that owns the strings
510*16467b97STreehugger Robot  * \param[in] ptr - Pointer to 8 bit encoded characters
511*16467b97STreehugger Robot  * \return pointer to the new string
512*16467b97STreehugger Robot  */
513*16467b97STreehugger Robot static    pANTLR3_STRING
newPtrUTF16_8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)514*16467b97STreehugger Robot newPtrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
515*16467b97STreehugger Robot {
516*16467b97STreehugger Robot 	pANTLR3_STRING  string;
517*16467b97STreehugger Robot 
518*16467b97STreehugger Robot 	/* newSize accepts size in characters, not bytes
519*16467b97STreehugger Robot 	*/
520*16467b97STreehugger Robot 	string  = factory->newSize(factory, size);
521*16467b97STreehugger Robot 
522*16467b97STreehugger Robot 	if	(string == NULL)
523*16467b97STreehugger Robot 	{
524*16467b97STreehugger Robot 		return	NULL;
525*16467b97STreehugger Robot 	}
526*16467b97STreehugger Robot 
527*16467b97STreehugger Robot 	if	(size <= 0)
528*16467b97STreehugger Robot 	{
529*16467b97STreehugger Robot 		return	string;
530*16467b97STreehugger Robot 	}
531*16467b97STreehugger Robot 
532*16467b97STreehugger Robot 	if	(ptr != NULL)
533*16467b97STreehugger Robot 	{
534*16467b97STreehugger Robot 		pANTLR3_UINT16	out;
535*16467b97STreehugger Robot 		ANTLR3_INT32    inSize;
536*16467b97STreehugger Robot 
537*16467b97STreehugger Robot 		out = (pANTLR3_UINT16)(string->chars);
538*16467b97STreehugger Robot 		inSize	= size;
539*16467b97STreehugger Robot 
540*16467b97STreehugger Robot 		while	(inSize-- > 0)
541*16467b97STreehugger Robot 		{
542*16467b97STreehugger Robot 			*out++ = (ANTLR3_UINT16)(*ptr++);
543*16467b97STreehugger Robot 		}
544*16467b97STreehugger Robot 
545*16467b97STreehugger Robot 		/* Terminate, these strings are usually used for Token streams and printing etc.
546*16467b97STreehugger Robot 		*/
547*16467b97STreehugger Robot 		*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
548*16467b97STreehugger Robot 
549*16467b97STreehugger Robot 		string->len = size;
550*16467b97STreehugger Robot 	}
551*16467b97STreehugger Robot 
552*16467b97STreehugger Robot 	return  string;
553*16467b97STreehugger Robot }
554*16467b97STreehugger Robot 
555*16467b97STreehugger Robot /** Creates a new UTF16 string initialized with the UTF16 characters at the
556*16467b97STreehugger Robot  *  supplied ptr, of pre-determined size.
557*16467b97STreehugger Robot  * \param[in] factory - Pointer to the string factory that owns the strings
558*16467b97STreehugger Robot  * \param[in] ptr - Pointer to UTF16 encoded characters
559*16467b97STreehugger Robot  * \return pointer to the new string
560*16467b97STreehugger Robot  */
561*16467b97STreehugger Robot static    pANTLR3_STRING
newPtrUTF16_UTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)562*16467b97STreehugger Robot newPtrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
563*16467b97STreehugger Robot {
564*16467b97STreehugger Robot 	pANTLR3_STRING  string;
565*16467b97STreehugger Robot 
566*16467b97STreehugger Robot 	string  = factory->newSize(factory, size);
567*16467b97STreehugger Robot 
568*16467b97STreehugger Robot 	if	(string == NULL)
569*16467b97STreehugger Robot 	{
570*16467b97STreehugger Robot 		return	NULL;
571*16467b97STreehugger Robot 	}
572*16467b97STreehugger Robot 
573*16467b97STreehugger Robot 	if	(size <= 0)
574*16467b97STreehugger Robot 	{
575*16467b97STreehugger Robot 		return	string;
576*16467b97STreehugger Robot 	}
577*16467b97STreehugger Robot 
578*16467b97STreehugger Robot 	if	(ptr != NULL)
579*16467b97STreehugger Robot 	{
580*16467b97STreehugger Robot 		ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));
581*16467b97STreehugger Robot 
582*16467b97STreehugger Robot 		/* Terminate, these strings are usually used for Token streams and printing etc.
583*16467b97STreehugger Robot 		*/
584*16467b97STreehugger Robot 		*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
585*16467b97STreehugger Robot 		string->len = size;
586*16467b97STreehugger Robot 	}
587*16467b97STreehugger Robot 
588*16467b97STreehugger Robot 	return  string;
589*16467b97STreehugger Robot }
590*16467b97STreehugger Robot 
591*16467b97STreehugger Robot /** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
592*16467b97STreehugger Robot  * \param[in] factory - Pointer to the string factory that owns strings.
593*16467b97STreehugger Robot  * \param[in] ptr - Pointer to the 8 bit encoded string
594*16467b97STreehugger Robot  * \return Pointer to the newly initialized string
595*16467b97STreehugger Robot  */
596*16467b97STreehugger Robot static    pANTLR3_STRING
newStr8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)597*16467b97STreehugger Robot newStr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
598*16467b97STreehugger Robot {
599*16467b97STreehugger Robot     return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
600*16467b97STreehugger Robot }
601*16467b97STreehugger Robot 
602*16467b97STreehugger Robot /** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer.
603*16467b97STreehugger Robot  * \param[in] factory - Pointer to the string factory that owns strings.
604*16467b97STreehugger Robot  * \param[in] ptr - Pointer to the 8 bit encoded string
605*16467b97STreehugger Robot  * \return POinter to the newly initialized string
606*16467b97STreehugger Robot  */
607*16467b97STreehugger Robot static    pANTLR3_STRING
newStrUTF16_8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)608*16467b97STreehugger Robot newStrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
609*16467b97STreehugger Robot {
610*16467b97STreehugger Robot     return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
611*16467b97STreehugger Robot }
612*16467b97STreehugger Robot 
613*16467b97STreehugger Robot /** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer.
614*16467b97STreehugger Robot  * \param[in] factory - Pointer to the string factory that owns strings.
615*16467b97STreehugger Robot  * \param[in] ptr - Pointer to the UTF16 encoded string
616*16467b97STreehugger Robot  * \return Pointer to the newly initialized string
617*16467b97STreehugger Robot  */
618*16467b97STreehugger Robot static    pANTLR3_STRING
newStrUTF16_UTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)619*16467b97STreehugger Robot newStrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
620*16467b97STreehugger Robot {
621*16467b97STreehugger Robot     pANTLR3_UINT16  in;
622*16467b97STreehugger Robot     ANTLR3_UINT32   count;
623*16467b97STreehugger Robot 
624*16467b97STreehugger Robot     /** First, determine the length of the input string
625*16467b97STreehugger Robot      */
626*16467b97STreehugger Robot     in	    = (pANTLR3_UINT16)ptr;
627*16467b97STreehugger Robot     count   = 0;
628*16467b97STreehugger Robot 
629*16467b97STreehugger Robot     while   (*in++ != '\0')
630*16467b97STreehugger Robot     {
631*16467b97STreehugger Robot 		count++;
632*16467b97STreehugger Robot     }
633*16467b97STreehugger Robot     return factory->newPtr(factory, ptr, count);
634*16467b97STreehugger Robot }
635*16467b97STreehugger Robot 
636*16467b97STreehugger Robot static    void
destroy(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING string)637*16467b97STreehugger Robot destroy	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
638*16467b97STreehugger Robot {
639*16467b97STreehugger Robot     // Record which string we are deleting
640*16467b97STreehugger Robot     //
641*16467b97STreehugger Robot     ANTLR3_UINT32 strIndex = string->index;
642*16467b97STreehugger Robot 
643*16467b97STreehugger Robot     // Ensure that the string was not factory made, or we would try
644*16467b97STreehugger Robot     // to delete memory that wasn't allocated outside the factory
645*16467b97STreehugger Robot     // block.
646*16467b97STreehugger Robot     // Remove the specific indexed string from the vector
647*16467b97STreehugger Robot     //
648*16467b97STreehugger Robot     factory->strings->del(factory->strings, strIndex);
649*16467b97STreehugger Robot 
650*16467b97STreehugger Robot     // One less string in the vector, so decrement the factory index
651*16467b97STreehugger Robot     // so that the next string allocated is indexed correctly with
652*16467b97STreehugger Robot     // respect to the vector.
653*16467b97STreehugger Robot     //
654*16467b97STreehugger Robot     factory->index--;
655*16467b97STreehugger Robot 
656*16467b97STreehugger Robot     // Now we have to reindex the strings in the vector that followed
657*16467b97STreehugger Robot     // the one we just deleted. We only do this if the one we just deleted
658*16467b97STreehugger Robot     // was not the last one.
659*16467b97STreehugger Robot     //
660*16467b97STreehugger Robot     if  (strIndex< factory->index)
661*16467b97STreehugger Robot     {
662*16467b97STreehugger Robot         // We must reindex the strings after the one we just deleted.
663*16467b97STreehugger Robot         // The one that follows the one we just deleted is also out
664*16467b97STreehugger Robot         // of whack, so we start there.
665*16467b97STreehugger Robot         //
666*16467b97STreehugger Robot         ANTLR3_UINT32 i;
667*16467b97STreehugger Robot 
668*16467b97STreehugger Robot         for (i = strIndex; i < factory->index; i++)
669*16467b97STreehugger Robot         {
670*16467b97STreehugger Robot             // Renumber the entry
671*16467b97STreehugger Robot             //
672*16467b97STreehugger Robot             ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
673*16467b97STreehugger Robot         }
674*16467b97STreehugger Robot     }
675*16467b97STreehugger Robot 
676*16467b97STreehugger Robot     // The string has been destroyed and the elements of the factory are reindexed.
677*16467b97STreehugger Robot     //
678*16467b97STreehugger Robot 
679*16467b97STreehugger Robot }
680*16467b97STreehugger Robot 
681*16467b97STreehugger Robot static    pANTLR3_STRING
printable8(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING instr)682*16467b97STreehugger Robot printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
683*16467b97STreehugger Robot {
684*16467b97STreehugger Robot     pANTLR3_STRING  string;
685*16467b97STreehugger Robot 
686*16467b97STreehugger Robot     /* We don't need to be too efficient here, this is mostly for error messages and so on.
687*16467b97STreehugger Robot      */
688*16467b97STreehugger Robot     pANTLR3_UINT8   scannedText;
689*16467b97STreehugger Robot     ANTLR3_UINT32   i;
690*16467b97STreehugger Robot 
691*16467b97STreehugger Robot     /* Assume we need as much as twice as much space to parse out the control characters
692*16467b97STreehugger Robot      */
693*16467b97STreehugger Robot     string  = factory->newSize(factory, instr->len *2 + 1);
694*16467b97STreehugger Robot 
695*16467b97STreehugger Robot     /* Scan through and replace unprintable (in terms of this routine)
696*16467b97STreehugger Robot      * characters
697*16467b97STreehugger Robot      */
698*16467b97STreehugger Robot     scannedText = string->chars;
699*16467b97STreehugger Robot 
700*16467b97STreehugger Robot     for	(i = 0; i < instr->len; i++)
701*16467b97STreehugger Robot     {
702*16467b97STreehugger Robot 		if (*(instr->chars + i) == '\n')
703*16467b97STreehugger Robot 		{
704*16467b97STreehugger Robot 			*scannedText++ = '\\';
705*16467b97STreehugger Robot 			*scannedText++ = 'n';
706*16467b97STreehugger Robot 		}
707*16467b97STreehugger Robot 		else if (*(instr->chars + i) == '\r')
708*16467b97STreehugger Robot 		{
709*16467b97STreehugger Robot 			*scannedText++ = '\\';
710*16467b97STreehugger Robot 			*scannedText++ = 'r';
711*16467b97STreehugger Robot 		}
712*16467b97STreehugger Robot 		else if	(!isprint(*(instr->chars +i)))
713*16467b97STreehugger Robot 		{
714*16467b97STreehugger Robot 			*scannedText++ = '?';
715*16467b97STreehugger Robot 		}
716*16467b97STreehugger Robot 		else
717*16467b97STreehugger Robot 		{
718*16467b97STreehugger Robot 			*scannedText++ = *(instr->chars + i);
719*16467b97STreehugger Robot 		}
720*16467b97STreehugger Robot     }
721*16467b97STreehugger Robot     *scannedText  = '\0';
722*16467b97STreehugger Robot 
723*16467b97STreehugger Robot     string->len	= (ANTLR3_UINT32)(scannedText - string->chars);
724*16467b97STreehugger Robot 
725*16467b97STreehugger Robot     return  string;
726*16467b97STreehugger Robot }
727*16467b97STreehugger Robot 
728*16467b97STreehugger Robot static    pANTLR3_STRING
printableUTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING instr)729*16467b97STreehugger Robot printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
730*16467b97STreehugger Robot {
731*16467b97STreehugger Robot     pANTLR3_STRING  string;
732*16467b97STreehugger Robot 
733*16467b97STreehugger Robot     /* We don't need to be too efficient here, this is mostly for error messages and so on.
734*16467b97STreehugger Robot      */
735*16467b97STreehugger Robot     pANTLR3_UINT16  scannedText;
736*16467b97STreehugger Robot     pANTLR3_UINT16  inText;
737*16467b97STreehugger Robot     ANTLR3_UINT32   i;
738*16467b97STreehugger Robot     ANTLR3_UINT32   outLen;
739*16467b97STreehugger Robot 
740*16467b97STreehugger Robot     /* Assume we need as much as twice as much space to parse out the control characters
741*16467b97STreehugger Robot      */
742*16467b97STreehugger Robot     string  = factory->newSize(factory, instr->len *2 + 1);
743*16467b97STreehugger Robot 
744*16467b97STreehugger Robot     /* Scan through and replace unprintable (in terms of this routine)
745*16467b97STreehugger Robot      * characters
746*16467b97STreehugger Robot      */
747*16467b97STreehugger Robot     scannedText = (pANTLR3_UINT16)(string->chars);
748*16467b97STreehugger Robot     inText	= (pANTLR3_UINT16)(instr->chars);
749*16467b97STreehugger Robot     outLen	= 0;
750*16467b97STreehugger Robot 
751*16467b97STreehugger Robot     for	(i = 0; i < instr->len; i++)
752*16467b97STreehugger Robot     {
753*16467b97STreehugger Robot 		if (*(inText + i) == '\n')
754*16467b97STreehugger Robot 		{
755*16467b97STreehugger Robot 			*scannedText++   = '\\';
756*16467b97STreehugger Robot 			*scannedText++   = 'n';
757*16467b97STreehugger Robot 			outLen	    += 2;
758*16467b97STreehugger Robot 		}
759*16467b97STreehugger Robot 		else if (*(inText + i) == '\r')
760*16467b97STreehugger Robot 		{
761*16467b97STreehugger Robot 			*scannedText++   = '\\';
762*16467b97STreehugger Robot 			*scannedText++   = 'r';
763*16467b97STreehugger Robot 			outLen	    += 2;
764*16467b97STreehugger Robot 		}
765*16467b97STreehugger Robot 		else if	(!isprint(*(inText +i)))
766*16467b97STreehugger Robot 		{
767*16467b97STreehugger Robot 			*scannedText++ = '?';
768*16467b97STreehugger Robot 			outLen++;
769*16467b97STreehugger Robot 		}
770*16467b97STreehugger Robot 		else
771*16467b97STreehugger Robot 		{
772*16467b97STreehugger Robot 			*scannedText++ = *(inText + i);
773*16467b97STreehugger Robot 			outLen++;
774*16467b97STreehugger Robot 		}
775*16467b97STreehugger Robot     }
776*16467b97STreehugger Robot     *scannedText  = '\0';
777*16467b97STreehugger Robot 
778*16467b97STreehugger Robot     string->len	= outLen;
779*16467b97STreehugger Robot 
780*16467b97STreehugger Robot     return  string;
781*16467b97STreehugger Robot }
782*16467b97STreehugger Robot 
783*16467b97STreehugger Robot /** Fascist Capitalist Pig function created
784*16467b97STreehugger Robot  *  to oppress the workers comrade.
785*16467b97STreehugger Robot  */
786*16467b97STreehugger Robot static    void
closeFactory(pANTLR3_STRING_FACTORY factory)787*16467b97STreehugger Robot closeFactory	(pANTLR3_STRING_FACTORY factory)
788*16467b97STreehugger Robot {
789*16467b97STreehugger Robot     /* Delete the vector we were tracking the strings with, this will
790*16467b97STreehugger Robot      * causes all the allocated strings to be deallocated too
791*16467b97STreehugger Robot      */
792*16467b97STreehugger Robot     factory->strings->free(factory->strings);
793*16467b97STreehugger Robot 
794*16467b97STreehugger Robot     /* Delete the space for the factory itself
795*16467b97STreehugger Robot      */
796*16467b97STreehugger Robot     ANTLR3_FREE((void *)factory);
797*16467b97STreehugger Robot }
798*16467b97STreehugger Robot 
799*16467b97STreehugger Robot static    pANTLR3_UINT8
append8(pANTLR3_STRING string,const char * newbit)800*16467b97STreehugger Robot append8	(pANTLR3_STRING string, const char * newbit)
801*16467b97STreehugger Robot {
802*16467b97STreehugger Robot     ANTLR3_UINT32 len;
803*16467b97STreehugger Robot 
804*16467b97STreehugger Robot     len	= (ANTLR3_UINT32)strlen(newbit);
805*16467b97STreehugger Robot 
806*16467b97STreehugger Robot     if	(string->size < (string->len + len + 1))
807*16467b97STreehugger Robot     {
808*16467b97STreehugger Robot 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
809*16467b97STreehugger Robot 		if (newAlloc == NULL)
810*16467b97STreehugger Robot 		{
811*16467b97STreehugger Robot 			return NULL;
812*16467b97STreehugger Robot 		}
813*16467b97STreehugger Robot 		string->chars	= newAlloc;
814*16467b97STreehugger Robot 		string->size	= string->len + len + 1;
815*16467b97STreehugger Robot     }
816*16467b97STreehugger Robot 
817*16467b97STreehugger Robot     /* Note we copy one more byte than the strlen in order to get the trailing
818*16467b97STreehugger Robot      */
819*16467b97STreehugger Robot     ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
820*16467b97STreehugger Robot     string->len	+= len;
821*16467b97STreehugger Robot 
822*16467b97STreehugger Robot     return string->chars;
823*16467b97STreehugger Robot }
824*16467b97STreehugger Robot 
825*16467b97STreehugger Robot static    pANTLR3_UINT8
appendUTF16_8(pANTLR3_STRING string,const char * newbit)826*16467b97STreehugger Robot appendUTF16_8	(pANTLR3_STRING string, const char * newbit)
827*16467b97STreehugger Robot {
828*16467b97STreehugger Robot     ANTLR3_UINT32   len;
829*16467b97STreehugger Robot     pANTLR3_UINT16  apPoint;
830*16467b97STreehugger Robot     ANTLR3_UINT32   count;
831*16467b97STreehugger Robot 
832*16467b97STreehugger Robot     len	= (ANTLR3_UINT32)strlen(newbit);
833*16467b97STreehugger Robot 
834*16467b97STreehugger Robot     if	(string->size < (string->len + len + 1))
835*16467b97STreehugger Robot     {
836*16467b97STreehugger Robot 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
837*16467b97STreehugger Robot 		if (newAlloc == NULL)
838*16467b97STreehugger Robot 		{
839*16467b97STreehugger Robot 			return NULL;
840*16467b97STreehugger Robot 		}
841*16467b97STreehugger Robot 		string->chars	= newAlloc;
842*16467b97STreehugger Robot 		string->size	= string->len + len + 1;
843*16467b97STreehugger Robot     }
844*16467b97STreehugger Robot 
845*16467b97STreehugger Robot     apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
846*16467b97STreehugger Robot     string->len	+= len;
847*16467b97STreehugger Robot 
848*16467b97STreehugger Robot     for	(count = 0; count < len; count++)
849*16467b97STreehugger Robot     {
850*16467b97STreehugger Robot 		*apPoint++   = *(newbit + count);
851*16467b97STreehugger Robot     }
852*16467b97STreehugger Robot     *apPoint = '\0';
853*16467b97STreehugger Robot 
854*16467b97STreehugger Robot     return string->chars;
855*16467b97STreehugger Robot }
856*16467b97STreehugger Robot 
857*16467b97STreehugger Robot static    pANTLR3_UINT8
appendUTF16_UTF16(pANTLR3_STRING string,const char * newbit)858*16467b97STreehugger Robot appendUTF16_UTF16	(pANTLR3_STRING string, const char * newbit)
859*16467b97STreehugger Robot {
860*16467b97STreehugger Robot     ANTLR3_UINT32 len;
861*16467b97STreehugger Robot     pANTLR3_UINT16  in;
862*16467b97STreehugger Robot 
863*16467b97STreehugger Robot     /** First, determine the length of the input string
864*16467b97STreehugger Robot      */
865*16467b97STreehugger Robot     in	    = (pANTLR3_UINT16)newbit;
866*16467b97STreehugger Robot     len   = 0;
867*16467b97STreehugger Robot 
868*16467b97STreehugger Robot     while   (*in++ != '\0')
869*16467b97STreehugger Robot     {
870*16467b97STreehugger Robot 		len++;
871*16467b97STreehugger Robot     }
872*16467b97STreehugger Robot 
873*16467b97STreehugger Robot     if	(string->size < (string->len + len + 1))
874*16467b97STreehugger Robot     {
875*16467b97STreehugger Robot 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
876*16467b97STreehugger Robot 		if (newAlloc == NULL)
877*16467b97STreehugger Robot 		{
878*16467b97STreehugger Robot 			return NULL;
879*16467b97STreehugger Robot 		}
880*16467b97STreehugger Robot 		string->chars	= newAlloc;
881*16467b97STreehugger Robot 		string->size	= string->len + len + 1;
882*16467b97STreehugger Robot     }
883*16467b97STreehugger Robot 
884*16467b97STreehugger Robot     /* Note we copy one more byte than the strlen in order to get the trailing delimiter
885*16467b97STreehugger Robot      */
886*16467b97STreehugger Robot     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
887*16467b97STreehugger Robot     string->len	+= len;
888*16467b97STreehugger Robot 
889*16467b97STreehugger Robot     return string->chars;
890*16467b97STreehugger Robot }
891*16467b97STreehugger Robot 
892*16467b97STreehugger Robot static    pANTLR3_UINT8
set8(pANTLR3_STRING string,const char * chars)893*16467b97STreehugger Robot set8	(pANTLR3_STRING string, const char * chars)
894*16467b97STreehugger Robot {
895*16467b97STreehugger Robot     ANTLR3_UINT32	len;
896*16467b97STreehugger Robot 
897*16467b97STreehugger Robot     len = (ANTLR3_UINT32)strlen(chars);
898*16467b97STreehugger Robot     if	(string->size < len + 1)
899*16467b97STreehugger Robot     {
900*16467b97STreehugger Robot 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
901*16467b97STreehugger Robot 		if (newAlloc == NULL)
902*16467b97STreehugger Robot 		{
903*16467b97STreehugger Robot 			return NULL;
904*16467b97STreehugger Robot 		}
905*16467b97STreehugger Robot 		string->chars	= newAlloc;
906*16467b97STreehugger Robot 		string->size	= len + 1;
907*16467b97STreehugger Robot     }
908*16467b97STreehugger Robot 
909*16467b97STreehugger Robot     /* Note we copy one more byte than the strlen in order to get the trailing '\0'
910*16467b97STreehugger Robot      */
911*16467b97STreehugger Robot     ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
912*16467b97STreehugger Robot     string->len	    = len;
913*16467b97STreehugger Robot 
914*16467b97STreehugger Robot     return  string->chars;
915*16467b97STreehugger Robot 
916*16467b97STreehugger Robot }
917*16467b97STreehugger Robot 
918*16467b97STreehugger Robot static    pANTLR3_UINT8
setUTF16_8(pANTLR3_STRING string,const char * chars)919*16467b97STreehugger Robot setUTF16_8	(pANTLR3_STRING string, const char * chars)
920*16467b97STreehugger Robot {
921*16467b97STreehugger Robot     ANTLR3_UINT32	len;
922*16467b97STreehugger Robot     ANTLR3_UINT32	count;
923*16467b97STreehugger Robot     pANTLR3_UINT16	apPoint;
924*16467b97STreehugger Robot 
925*16467b97STreehugger Robot     len = (ANTLR3_UINT32)strlen(chars);
926*16467b97STreehugger Robot     if	(string->size < len + 1)
927*16467b97STreehugger Robot 	{
928*16467b97STreehugger Robot 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
929*16467b97STreehugger Robot 		if (newAlloc == NULL)
930*16467b97STreehugger Robot 		{
931*16467b97STreehugger Robot 			return NULL;
932*16467b97STreehugger Robot 		}
933*16467b97STreehugger Robot 		string->chars	= newAlloc;
934*16467b97STreehugger Robot 		string->size	= len + 1;
935*16467b97STreehugger Robot     }
936*16467b97STreehugger Robot     apPoint = ((pANTLR3_UINT16)string->chars);
937*16467b97STreehugger Robot     string->len	= len;
938*16467b97STreehugger Robot 
939*16467b97STreehugger Robot     for	(count = 0; count < string->len; count++)
940*16467b97STreehugger Robot     {
941*16467b97STreehugger Robot 		*apPoint++   = *(chars + count);
942*16467b97STreehugger Robot     }
943*16467b97STreehugger Robot     *apPoint = '\0';
944*16467b97STreehugger Robot 
945*16467b97STreehugger Robot     return  string->chars;
946*16467b97STreehugger Robot }
947*16467b97STreehugger Robot 
948*16467b97STreehugger Robot static    pANTLR3_UINT8
setUTF16_UTF16(pANTLR3_STRING string,const char * chars)949*16467b97STreehugger Robot setUTF16_UTF16    (pANTLR3_STRING string, const char * chars)
950*16467b97STreehugger Robot {
951*16467b97STreehugger Robot     ANTLR3_UINT32   len;
952*16467b97STreehugger Robot     pANTLR3_UINT16  in;
953*16467b97STreehugger Robot 
954*16467b97STreehugger Robot     /** First, determine the length of the input string
955*16467b97STreehugger Robot      */
956*16467b97STreehugger Robot     in	    = (pANTLR3_UINT16)chars;
957*16467b97STreehugger Robot     len   = 0;
958*16467b97STreehugger Robot 
959*16467b97STreehugger Robot     while   (*in++ != '\0')
960*16467b97STreehugger Robot     {
961*16467b97STreehugger Robot 		len++;
962*16467b97STreehugger Robot     }
963*16467b97STreehugger Robot 
964*16467b97STreehugger Robot     if	(string->size < len + 1)
965*16467b97STreehugger Robot     {
966*16467b97STreehugger Robot 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
967*16467b97STreehugger Robot 		if (newAlloc == NULL)
968*16467b97STreehugger Robot 		{
969*16467b97STreehugger Robot 			return NULL;
970*16467b97STreehugger Robot 		}
971*16467b97STreehugger Robot 		string->chars	= newAlloc;
972*16467b97STreehugger Robot 		string->size	= len + 1;
973*16467b97STreehugger Robot     }
974*16467b97STreehugger Robot 
975*16467b97STreehugger Robot     /* Note we copy one more byte than the strlen in order to get the trailing '\0'
976*16467b97STreehugger Robot      */
977*16467b97STreehugger Robot     ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
978*16467b97STreehugger Robot     string->len	    = len;
979*16467b97STreehugger Robot 
980*16467b97STreehugger Robot     return  string->chars;
981*16467b97STreehugger Robot 
982*16467b97STreehugger Robot }
983*16467b97STreehugger Robot 
984*16467b97STreehugger Robot static    pANTLR3_UINT8
addc8(pANTLR3_STRING string,ANTLR3_UINT32 c)985*16467b97STreehugger Robot addc8	(pANTLR3_STRING string, ANTLR3_UINT32 c)
986*16467b97STreehugger Robot {
987*16467b97STreehugger Robot     if	(string->size < string->len + 2)
988*16467b97STreehugger Robot     {
989*16467b97STreehugger Robot 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
990*16467b97STreehugger Robot 		if (newAlloc == NULL)
991*16467b97STreehugger Robot 		{
992*16467b97STreehugger Robot 			return NULL;
993*16467b97STreehugger Robot 		}
994*16467b97STreehugger Robot 		string->chars	= newAlloc;
995*16467b97STreehugger Robot 		string->size	= string->len + 2;
996*16467b97STreehugger Robot     }
997*16467b97STreehugger Robot     *(string->chars + string->len)	= (ANTLR3_UINT8)c;
998*16467b97STreehugger Robot     *(string->chars + string->len + 1)	= '\0';
999*16467b97STreehugger Robot     string->len++;
1000*16467b97STreehugger Robot 
1001*16467b97STreehugger Robot     return  string->chars;
1002*16467b97STreehugger Robot }
1003*16467b97STreehugger Robot 
1004*16467b97STreehugger Robot static    pANTLR3_UINT8
addcUTF16(pANTLR3_STRING string,ANTLR3_UINT32 c)1005*16467b97STreehugger Robot addcUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 c)
1006*16467b97STreehugger Robot {
1007*16467b97STreehugger Robot     pANTLR3_UINT16  ptr;
1008*16467b97STreehugger Robot 
1009*16467b97STreehugger Robot     if	(string->size < string->len + 2)
1010*16467b97STreehugger Robot     {
1011*16467b97STreehugger Robot 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
1012*16467b97STreehugger Robot 		if (newAlloc == NULL)
1013*16467b97STreehugger Robot 		{
1014*16467b97STreehugger Robot 			return NULL;
1015*16467b97STreehugger Robot 		}
1016*16467b97STreehugger Robot 		string->chars	= newAlloc;
1017*16467b97STreehugger Robot 		string->size	= string->len + 2;
1018*16467b97STreehugger Robot     }
1019*16467b97STreehugger Robot     ptr	= (pANTLR3_UINT16)(string->chars);
1020*16467b97STreehugger Robot 
1021*16467b97STreehugger Robot     *(ptr + string->len)	= (ANTLR3_UINT16)c;
1022*16467b97STreehugger Robot     *(ptr + string->len + 1)	= '\0';
1023*16467b97STreehugger Robot     string->len++;
1024*16467b97STreehugger Robot 
1025*16467b97STreehugger Robot     return  string->chars;
1026*16467b97STreehugger Robot }
1027*16467b97STreehugger Robot 
1028*16467b97STreehugger Robot static    pANTLR3_UINT8
addi8(pANTLR3_STRING string,ANTLR3_INT32 i)1029*16467b97STreehugger Robot addi8	(pANTLR3_STRING string, ANTLR3_INT32 i)
1030*16467b97STreehugger Robot {
1031*16467b97STreehugger Robot     ANTLR3_UINT8	    newbit[32];
1032*16467b97STreehugger Robot 
1033*16467b97STreehugger Robot     sprintf((char *)newbit, "%d", i);
1034*16467b97STreehugger Robot 
1035*16467b97STreehugger Robot     return  string->append8(string, (const char *)newbit);
1036*16467b97STreehugger Robot }
1037*16467b97STreehugger Robot static    pANTLR3_UINT8
addiUTF16(pANTLR3_STRING string,ANTLR3_INT32 i)1038*16467b97STreehugger Robot addiUTF16	(pANTLR3_STRING string, ANTLR3_INT32 i)
1039*16467b97STreehugger Robot {
1040*16467b97STreehugger Robot     ANTLR3_UINT8	    newbit[32];
1041*16467b97STreehugger Robot 
1042*16467b97STreehugger Robot     sprintf((char *)newbit, "%d", i);
1043*16467b97STreehugger Robot 
1044*16467b97STreehugger Robot     return  string->append8(string, (const char *)newbit);
1045*16467b97STreehugger Robot }
1046*16467b97STreehugger Robot 
1047*16467b97STreehugger Robot static	  pANTLR3_UINT8
inserti8(pANTLR3_STRING string,ANTLR3_UINT32 point,ANTLR3_INT32 i)1048*16467b97STreehugger Robot inserti8    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1049*16467b97STreehugger Robot {
1050*16467b97STreehugger Robot     ANTLR3_UINT8	    newbit[32];
1051*16467b97STreehugger Robot 
1052*16467b97STreehugger Robot     sprintf((char *)newbit, "%d", i);
1053*16467b97STreehugger Robot     return  string->insert8(string, point, (const char *)newbit);
1054*16467b97STreehugger Robot }
1055*16467b97STreehugger Robot static	  pANTLR3_UINT8
insertiUTF16(pANTLR3_STRING string,ANTLR3_UINT32 point,ANTLR3_INT32 i)1056*16467b97STreehugger Robot insertiUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1057*16467b97STreehugger Robot {
1058*16467b97STreehugger Robot     ANTLR3_UINT8	    newbit[32];
1059*16467b97STreehugger Robot 
1060*16467b97STreehugger Robot     sprintf((char *)newbit, "%d", i);
1061*16467b97STreehugger Robot     return  string->insert8(string, point, (const char *)newbit);
1062*16467b97STreehugger Robot }
1063*16467b97STreehugger Robot 
1064*16467b97STreehugger Robot static	pANTLR3_UINT8
insert8(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1065*16467b97STreehugger Robot insert8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1066*16467b97STreehugger Robot {
1067*16467b97STreehugger Robot     ANTLR3_UINT32	len;
1068*16467b97STreehugger Robot 
1069*16467b97STreehugger Robot     if	(point >= string->len)
1070*16467b97STreehugger Robot     {
1071*16467b97STreehugger Robot 		return	string->append(string, newbit);
1072*16467b97STreehugger Robot     }
1073*16467b97STreehugger Robot 
1074*16467b97STreehugger Robot     len	= (ANTLR3_UINT32)strlen(newbit);
1075*16467b97STreehugger Robot 
1076*16467b97STreehugger Robot     if	(len == 0)
1077*16467b97STreehugger Robot     {
1078*16467b97STreehugger Robot 		return	string->chars;
1079*16467b97STreehugger Robot     }
1080*16467b97STreehugger Robot 
1081*16467b97STreehugger Robot     if	(string->size < (string->len + len + 1))
1082*16467b97STreehugger Robot     {
1083*16467b97STreehugger Robot 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
1084*16467b97STreehugger Robot 		if (newAlloc == NULL)
1085*16467b97STreehugger Robot 		{
1086*16467b97STreehugger Robot 			return NULL;
1087*16467b97STreehugger Robot 		}
1088*16467b97STreehugger Robot 		string->chars	= newAlloc;
1089*16467b97STreehugger Robot 		string->size	= string->len + len + 1;
1090*16467b97STreehugger Robot     }
1091*16467b97STreehugger Robot 
1092*16467b97STreehugger Robot     /* Move the characters we are inserting before, including the delimiter
1093*16467b97STreehugger Robot      */
1094*16467b97STreehugger Robot     ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));
1095*16467b97STreehugger Robot 
1096*16467b97STreehugger Robot     /* Note we copy the exact number of bytes
1097*16467b97STreehugger Robot      */
1098*16467b97STreehugger Robot     ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
1099*16467b97STreehugger Robot 
1100*16467b97STreehugger Robot     string->len += len;
1101*16467b97STreehugger Robot 
1102*16467b97STreehugger Robot     return  string->chars;
1103*16467b97STreehugger Robot }
1104*16467b97STreehugger Robot 
1105*16467b97STreehugger Robot static	pANTLR3_UINT8
insertUTF16_8(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1106*16467b97STreehugger Robot insertUTF16_8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1107*16467b97STreehugger Robot {
1108*16467b97STreehugger Robot     ANTLR3_UINT32	len;
1109*16467b97STreehugger Robot     ANTLR3_UINT32	count;
1110*16467b97STreehugger Robot     pANTLR3_UINT16	inPoint;
1111*16467b97STreehugger Robot 
1112*16467b97STreehugger Robot     if	(point >= string->len)
1113*16467b97STreehugger Robot     {
1114*16467b97STreehugger Robot 		return	string->append8(string, newbit);
1115*16467b97STreehugger Robot     }
1116*16467b97STreehugger Robot 
1117*16467b97STreehugger Robot     len	= (ANTLR3_UINT32)strlen(newbit);
1118*16467b97STreehugger Robot 
1119*16467b97STreehugger Robot     if	(len == 0)
1120*16467b97STreehugger Robot     {
1121*16467b97STreehugger Robot 		return	string->chars;
1122*16467b97STreehugger Robot     }
1123*16467b97STreehugger Robot 
1124*16467b97STreehugger Robot     if	(string->size < (string->len + len + 1))
1125*16467b97STreehugger Robot     {
1126*16467b97STreehugger Robot 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1127*16467b97STreehugger Robot 		if (newAlloc == NULL)
1128*16467b97STreehugger Robot 		{
1129*16467b97STreehugger Robot 			return NULL;
1130*16467b97STreehugger Robot 		}
1131*16467b97STreehugger Robot 		string->chars	= newAlloc;
1132*16467b97STreehugger Robot 		string->size	= string->len + len + 1;
1133*16467b97STreehugger Robot     }
1134*16467b97STreehugger Robot 
1135*16467b97STreehugger Robot     /* Move the characters we are inserting before, including the delimiter
1136*16467b97STreehugger Robot      */
1137*16467b97STreehugger Robot     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1138*16467b97STreehugger Robot 
1139*16467b97STreehugger Robot     string->len += len;
1140*16467b97STreehugger Robot 
1141*16467b97STreehugger Robot     inPoint = ((pANTLR3_UINT16)(string->chars))+point;
1142*16467b97STreehugger Robot     for	(count = 0; count<len; count++)
1143*16467b97STreehugger Robot     {
1144*16467b97STreehugger Robot 		*(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
1145*16467b97STreehugger Robot     }
1146*16467b97STreehugger Robot 
1147*16467b97STreehugger Robot     return  string->chars;
1148*16467b97STreehugger Robot }
1149*16467b97STreehugger Robot 
1150*16467b97STreehugger Robot static	pANTLR3_UINT8
insertUTF16_UTF16(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1151*16467b97STreehugger Robot insertUTF16_UTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1152*16467b97STreehugger Robot {
1153*16467b97STreehugger Robot     ANTLR3_UINT32	len;
1154*16467b97STreehugger Robot     pANTLR3_UINT16	in;
1155*16467b97STreehugger Robot 
1156*16467b97STreehugger Robot     if	(point >= string->len)
1157*16467b97STreehugger Robot     {
1158*16467b97STreehugger Robot 		return	string->append(string, newbit);
1159*16467b97STreehugger Robot     }
1160*16467b97STreehugger Robot 
1161*16467b97STreehugger Robot     /** First, determine the length of the input string
1162*16467b97STreehugger Robot      */
1163*16467b97STreehugger Robot     in	    = (pANTLR3_UINT16)newbit;
1164*16467b97STreehugger Robot     len	    = 0;
1165*16467b97STreehugger Robot 
1166*16467b97STreehugger Robot     while   (*in++ != '\0')
1167*16467b97STreehugger Robot     {
1168*16467b97STreehugger Robot 		len++;
1169*16467b97STreehugger Robot     }
1170*16467b97STreehugger Robot 
1171*16467b97STreehugger Robot     if	(len == 0)
1172*16467b97STreehugger Robot     {
1173*16467b97STreehugger Robot 		return	string->chars;
1174*16467b97STreehugger Robot     }
1175*16467b97STreehugger Robot 
1176*16467b97STreehugger Robot     if	(string->size < (string->len + len + 1))
1177*16467b97STreehugger Robot     {
1178*16467b97STreehugger Robot 		pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1179*16467b97STreehugger Robot 		if (newAlloc == NULL)
1180*16467b97STreehugger Robot 		{
1181*16467b97STreehugger Robot 			return NULL;
1182*16467b97STreehugger Robot 		}
1183*16467b97STreehugger Robot 		string->chars	= newAlloc;
1184*16467b97STreehugger Robot 		string->size	= string->len + len + 1;
1185*16467b97STreehugger Robot     }
1186*16467b97STreehugger Robot 
1187*16467b97STreehugger Robot     /* Move the characters we are inserting before, including the delimiter
1188*16467b97STreehugger Robot      */
1189*16467b97STreehugger Robot     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1190*16467b97STreehugger Robot 
1191*16467b97STreehugger Robot 
1192*16467b97STreehugger Robot     /* Note we copy the exact number of characters
1193*16467b97STreehugger Robot      */
1194*16467b97STreehugger Robot     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
1195*16467b97STreehugger Robot 
1196*16467b97STreehugger Robot     string->len += len;
1197*16467b97STreehugger Robot 
1198*16467b97STreehugger Robot     return  string->chars;
1199*16467b97STreehugger Robot }
1200*16467b97STreehugger Robot 
setS(pANTLR3_STRING string,pANTLR3_STRING chars)1201*16467b97STreehugger Robot static    pANTLR3_UINT8	    setS	(pANTLR3_STRING string, pANTLR3_STRING chars)
1202*16467b97STreehugger Robot {
1203*16467b97STreehugger Robot     return  string->set(string, (const char *)(chars->chars));
1204*16467b97STreehugger Robot }
1205*16467b97STreehugger Robot 
appendS(pANTLR3_STRING string,pANTLR3_STRING newbit)1206*16467b97STreehugger Robot static    pANTLR3_UINT8	    appendS	(pANTLR3_STRING string, pANTLR3_STRING newbit)
1207*16467b97STreehugger Robot {
1208*16467b97STreehugger Robot     /* We may be passed an empty string, in which case we just return the current pointer
1209*16467b97STreehugger Robot      */
1210*16467b97STreehugger Robot     if	(newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
1211*16467b97STreehugger Robot     {
1212*16467b97STreehugger Robot 		return	string->chars;
1213*16467b97STreehugger Robot     }
1214*16467b97STreehugger Robot     else
1215*16467b97STreehugger Robot     {
1216*16467b97STreehugger Robot 		return  string->append(string, (const char *)(newbit->chars));
1217*16467b97STreehugger Robot     }
1218*16467b97STreehugger Robot }
1219*16467b97STreehugger Robot 
insertS(pANTLR3_STRING string,ANTLR3_UINT32 point,pANTLR3_STRING newbit)1220*16467b97STreehugger Robot static	  pANTLR3_UINT8	    insertS	(pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
1221*16467b97STreehugger Robot {
1222*16467b97STreehugger Robot     return  string->insert(string, point, (const char *)(newbit->chars));
1223*16467b97STreehugger Robot }
1224*16467b97STreehugger Robot 
1225*16467b97STreehugger Robot /* Function that compares the text of a string to the supplied
1226*16467b97STreehugger Robot  * 8 bit character string and returns a result a la strcmp()
1227*16467b97STreehugger Robot  */
1228*16467b97STreehugger Robot static ANTLR3_UINT32
compare8(pANTLR3_STRING string,const char * compStr)1229*16467b97STreehugger Robot compare8	(pANTLR3_STRING string, const char * compStr)
1230*16467b97STreehugger Robot {
1231*16467b97STreehugger Robot     return  strcmp((const char *)(string->chars), compStr);
1232*16467b97STreehugger Robot }
1233*16467b97STreehugger Robot 
1234*16467b97STreehugger Robot /* Function that compares the text of a string with the supplied character string
1235*16467b97STreehugger Robot  * (which is assumed to be in the same encoding as the string itself) and returns a result
1236*16467b97STreehugger Robot  * a la strcmp()
1237*16467b97STreehugger Robot  */
1238*16467b97STreehugger Robot static ANTLR3_UINT32
compareUTF16_8(pANTLR3_STRING string,const char * compStr)1239*16467b97STreehugger Robot compareUTF16_8	(pANTLR3_STRING string, const char * compStr)
1240*16467b97STreehugger Robot {
1241*16467b97STreehugger Robot     pANTLR3_UINT16  ourString;
1242*16467b97STreehugger Robot     ANTLR3_UINT32   charDiff;
1243*16467b97STreehugger Robot 
1244*16467b97STreehugger Robot     ourString	= (pANTLR3_UINT16)(string->chars);
1245*16467b97STreehugger Robot 
1246*16467b97STreehugger Robot     while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
1247*16467b97STreehugger Robot     {
1248*16467b97STreehugger Robot 		charDiff = *ourString - *compStr;
1249*16467b97STreehugger Robot 		if  (charDiff != 0)
1250*16467b97STreehugger Robot 		{
1251*16467b97STreehugger Robot 			return charDiff;
1252*16467b97STreehugger Robot 		}
1253*16467b97STreehugger Robot 		ourString++;
1254*16467b97STreehugger Robot 		compStr++;
1255*16467b97STreehugger Robot     }
1256*16467b97STreehugger Robot 
1257*16467b97STreehugger Robot     /* At this point, one of the strings was terminated
1258*16467b97STreehugger Robot      */
1259*16467b97STreehugger Robot     return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1260*16467b97STreehugger Robot 
1261*16467b97STreehugger Robot }
1262*16467b97STreehugger Robot 
1263*16467b97STreehugger Robot /* Function that compares the text of a string with the supplied character string
1264*16467b97STreehugger Robot  * (which is assumed to be in the same encoding as the string itself) and returns a result
1265*16467b97STreehugger Robot  * a la strcmp()
1266*16467b97STreehugger Robot  */
1267*16467b97STreehugger Robot static ANTLR3_UINT32
compareUTF16_UTF16(pANTLR3_STRING string,const char * compStr8)1268*16467b97STreehugger Robot compareUTF16_UTF16	(pANTLR3_STRING string, const char * compStr8)
1269*16467b97STreehugger Robot {
1270*16467b97STreehugger Robot     pANTLR3_UINT16  ourString;
1271*16467b97STreehugger Robot     pANTLR3_UINT16  compStr;
1272*16467b97STreehugger Robot     ANTLR3_UINT32   charDiff;
1273*16467b97STreehugger Robot 
1274*16467b97STreehugger Robot     ourString	= (pANTLR3_UINT16)(string->chars);
1275*16467b97STreehugger Robot     compStr	= (pANTLR3_UINT16)(compStr8);
1276*16467b97STreehugger Robot 
1277*16467b97STreehugger Robot     while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
1278*16467b97STreehugger Robot     {
1279*16467b97STreehugger Robot 		charDiff = *ourString - *compStr;
1280*16467b97STreehugger Robot 		if  (charDiff != 0)
1281*16467b97STreehugger Robot 		{
1282*16467b97STreehugger Robot 			return charDiff;
1283*16467b97STreehugger Robot 		}
1284*16467b97STreehugger Robot 		ourString++;
1285*16467b97STreehugger Robot 		compStr++;
1286*16467b97STreehugger Robot     }
1287*16467b97STreehugger Robot 
1288*16467b97STreehugger Robot     /* At this point, one of the strings was terminated
1289*16467b97STreehugger Robot      */
1290*16467b97STreehugger Robot     return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1291*16467b97STreehugger Robot }
1292*16467b97STreehugger Robot 
1293*16467b97STreehugger Robot /* Function that compares the text of a string with the supplied string
1294*16467b97STreehugger Robot  * (which is assumed to be in the same encoding as the string itself) and returns a result
1295*16467b97STreehugger Robot  * a la strcmp()
1296*16467b97STreehugger Robot  */
1297*16467b97STreehugger Robot static ANTLR3_UINT32
compareS(pANTLR3_STRING string,pANTLR3_STRING compStr)1298*16467b97STreehugger Robot compareS    (pANTLR3_STRING string, pANTLR3_STRING compStr)
1299*16467b97STreehugger Robot {
1300*16467b97STreehugger Robot     return  string->compare(string, (const char *)compStr->chars);
1301*16467b97STreehugger Robot }
1302*16467b97STreehugger Robot 
1303*16467b97STreehugger Robot 
1304*16467b97STreehugger Robot /* Function that returns the character indexed at the supplied
1305*16467b97STreehugger Robot  * offset as a 32 bit character.
1306*16467b97STreehugger Robot  */
1307*16467b97STreehugger Robot static ANTLR3_UCHAR
charAt8(pANTLR3_STRING string,ANTLR3_UINT32 offset)1308*16467b97STreehugger Robot charAt8	    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1309*16467b97STreehugger Robot {
1310*16467b97STreehugger Robot     if	(offset > string->len)
1311*16467b97STreehugger Robot     {
1312*16467b97STreehugger Robot 		return (ANTLR3_UCHAR)'\0';
1313*16467b97STreehugger Robot     }
1314*16467b97STreehugger Robot     else
1315*16467b97STreehugger Robot     {
1316*16467b97STreehugger Robot 		return  (ANTLR3_UCHAR)(*(string->chars + offset));
1317*16467b97STreehugger Robot     }
1318*16467b97STreehugger Robot }
1319*16467b97STreehugger Robot 
1320*16467b97STreehugger Robot /* Function that returns the character indexed at the supplied
1321*16467b97STreehugger Robot  * offset as a 32 bit character.
1322*16467b97STreehugger Robot  */
1323*16467b97STreehugger Robot static ANTLR3_UCHAR
charAtUTF16(pANTLR3_STRING string,ANTLR3_UINT32 offset)1324*16467b97STreehugger Robot charAtUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1325*16467b97STreehugger Robot {
1326*16467b97STreehugger Robot     if	(offset > string->len)
1327*16467b97STreehugger Robot     {
1328*16467b97STreehugger Robot 		return (ANTLR3_UCHAR)'\0';
1329*16467b97STreehugger Robot     }
1330*16467b97STreehugger Robot     else
1331*16467b97STreehugger Robot     {
1332*16467b97STreehugger Robot 		return  (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
1333*16467b97STreehugger Robot     }
1334*16467b97STreehugger Robot }
1335*16467b97STreehugger Robot 
1336*16467b97STreehugger Robot /* Function that returns a substring of the supplied string a la .subString(s,e)
1337*16467b97STreehugger Robot  * in java runtimes.
1338*16467b97STreehugger Robot  */
1339*16467b97STreehugger Robot static pANTLR3_STRING
subString8(pANTLR3_STRING string,ANTLR3_UINT32 startIndex,ANTLR3_UINT32 endIndex)1340*16467b97STreehugger Robot subString8   (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1341*16467b97STreehugger Robot {
1342*16467b97STreehugger Robot     pANTLR3_STRING newStr;
1343*16467b97STreehugger Robot 
1344*16467b97STreehugger Robot     if	(endIndex > string->len)
1345*16467b97STreehugger Robot     {
1346*16467b97STreehugger Robot 		endIndex = string->len + 1;
1347*16467b97STreehugger Robot     }
1348*16467b97STreehugger Robot     newStr  = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);
1349*16467b97STreehugger Robot 
1350*16467b97STreehugger Robot     return newStr;
1351*16467b97STreehugger Robot }
1352*16467b97STreehugger Robot 
1353*16467b97STreehugger Robot /* Returns a substring of the supplied string a la .subString(s,e)
1354*16467b97STreehugger Robot  * in java runtimes.
1355*16467b97STreehugger Robot  */
1356*16467b97STreehugger Robot static pANTLR3_STRING
subStringUTF16(pANTLR3_STRING string,ANTLR3_UINT32 startIndex,ANTLR3_UINT32 endIndex)1357*16467b97STreehugger Robot subStringUTF16  (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1358*16467b97STreehugger Robot {
1359*16467b97STreehugger Robot     pANTLR3_STRING newStr;
1360*16467b97STreehugger Robot 
1361*16467b97STreehugger Robot     if	(endIndex > string->len)
1362*16467b97STreehugger Robot     {
1363*16467b97STreehugger Robot 		endIndex = string->len + 1;
1364*16467b97STreehugger Robot     }
1365*16467b97STreehugger Robot     newStr  = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);
1366*16467b97STreehugger Robot 
1367*16467b97STreehugger Robot     return newStr;
1368*16467b97STreehugger Robot }
1369*16467b97STreehugger Robot 
1370*16467b97STreehugger Robot /* Function that can convert the characters in the string to an integer
1371*16467b97STreehugger Robot  */
1372*16467b97STreehugger Robot static ANTLR3_INT32
toInt32_8(struct ANTLR3_STRING_struct * string)1373*16467b97STreehugger Robot toInt32_8	    (struct ANTLR3_STRING_struct * string)
1374*16467b97STreehugger Robot {
1375*16467b97STreehugger Robot     return  atoi((const char *)(string->chars));
1376*16467b97STreehugger Robot }
1377*16467b97STreehugger Robot 
1378*16467b97STreehugger Robot /* Function that can convert the characters in the string to an integer
1379*16467b97STreehugger Robot  */
1380*16467b97STreehugger Robot static ANTLR3_INT32
toInt32_UTF16(struct ANTLR3_STRING_struct * string)1381*16467b97STreehugger Robot toInt32_UTF16       (struct ANTLR3_STRING_struct * string)
1382*16467b97STreehugger Robot {
1383*16467b97STreehugger Robot     pANTLR3_UINT16  input;
1384*16467b97STreehugger Robot     ANTLR3_INT32   value;
1385*16467b97STreehugger Robot     ANTLR3_BOOLEAN  negate;
1386*16467b97STreehugger Robot 
1387*16467b97STreehugger Robot     value   = 0;
1388*16467b97STreehugger Robot     input   = (pANTLR3_UINT16)(string->chars);
1389*16467b97STreehugger Robot     negate  = ANTLR3_FALSE;
1390*16467b97STreehugger Robot 
1391*16467b97STreehugger Robot     if	(*input == (ANTLR3_UCHAR)'-')
1392*16467b97STreehugger Robot     {
1393*16467b97STreehugger Robot 		negate = ANTLR3_TRUE;
1394*16467b97STreehugger Robot 		input++;
1395*16467b97STreehugger Robot     }
1396*16467b97STreehugger Robot     else if (*input == (ANTLR3_UCHAR)'+')
1397*16467b97STreehugger Robot     {
1398*16467b97STreehugger Robot 		input++;
1399*16467b97STreehugger Robot     }
1400*16467b97STreehugger Robot 
1401*16467b97STreehugger Robot     while   (*input != '\0' && isdigit(*input))
1402*16467b97STreehugger Robot     {
1403*16467b97STreehugger Robot 		value	 = value * 10;
1404*16467b97STreehugger Robot 		value	+= ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
1405*16467b97STreehugger Robot 		input++;
1406*16467b97STreehugger Robot     }
1407*16467b97STreehugger Robot 
1408*16467b97STreehugger Robot     return negate ? -value : value;
1409*16467b97STreehugger Robot }
1410*16467b97STreehugger Robot 
1411*16467b97STreehugger Robot /* Function that returns a pointer to an 8 bit version of the string,
1412*16467b97STreehugger Robot  * which in this case is just the string as this is
1413*16467b97STreehugger Robot  * 8 bit encodiing anyway.
1414*16467b97STreehugger Robot  */
to8_8(pANTLR3_STRING string)1415*16467b97STreehugger Robot static	  pANTLR3_STRING	    to8_8	(pANTLR3_STRING string)
1416*16467b97STreehugger Robot {
1417*16467b97STreehugger Robot     return  string;
1418*16467b97STreehugger Robot }
1419*16467b97STreehugger Robot 
1420*16467b97STreehugger Robot /* Function that returns an 8 bit version of the string,
1421*16467b97STreehugger Robot  * which in this case is returning all the UTF16 characters
1422*16467b97STreehugger Robot  * narrowed back into 8 bits, with characters that are too large
1423*16467b97STreehugger Robot  * replaced with '_'
1424*16467b97STreehugger Robot  */
to8_UTF16(pANTLR3_STRING string)1425*16467b97STreehugger Robot static	  pANTLR3_STRING    to8_UTF16	(pANTLR3_STRING string)
1426*16467b97STreehugger Robot {
1427*16467b97STreehugger Robot 	pANTLR3_STRING  newStr;
1428*16467b97STreehugger Robot 	ANTLR3_UINT32   i;
1429*16467b97STreehugger Robot 
1430*16467b97STreehugger Robot 	/* Create a new 8 bit string
1431*16467b97STreehugger Robot 	*/
1432*16467b97STreehugger Robot 	newStr  = newRaw8(string->factory);
1433*16467b97STreehugger Robot 
1434*16467b97STreehugger Robot 	if	(newStr == NULL)
1435*16467b97STreehugger Robot 	{
1436*16467b97STreehugger Robot 		return	NULL;
1437*16467b97STreehugger Robot 	}
1438*16467b97STreehugger Robot 
1439*16467b97STreehugger Robot 	/* Always add one more byte for a terminator
1440*16467b97STreehugger Robot 	*/
1441*16467b97STreehugger Robot 	newStr->chars   = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
1442*16467b97STreehugger Robot 	if (newStr->chars != NULL)
1443*16467b97STreehugger Robot 	{
1444*16467b97STreehugger Robot 		newStr->size    = string->len + 1;
1445*16467b97STreehugger Robot 		newStr->len	    = string->len;
1446*16467b97STreehugger Robot 
1447*16467b97STreehugger Robot 		/* Now copy each UTF16 charActer , making it an 8 bit character of
1448*16467b97STreehugger Robot 		* some sort.
1449*16467b97STreehugger Robot 		*/
1450*16467b97STreehugger Robot 		for	(i=0; i<string->len; i++)
1451*16467b97STreehugger Robot 		{
1452*16467b97STreehugger Robot 			ANTLR3_UCHAR	c;
1453*16467b97STreehugger Robot 
1454*16467b97STreehugger Robot 			c = *(((pANTLR3_UINT16)(string->chars)) + i);
1455*16467b97STreehugger Robot 
1456*16467b97STreehugger Robot 			*(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
1457*16467b97STreehugger Robot 		}
1458*16467b97STreehugger Robot 
1459*16467b97STreehugger Robot 		/* Terminate
1460*16467b97STreehugger Robot 		*/
1461*16467b97STreehugger Robot 		*(newStr->chars + newStr->len) = '\0';
1462*16467b97STreehugger Robot 	}
1463*16467b97STreehugger Robot 
1464*16467b97STreehugger Robot 	return newStr;
1465*16467b97STreehugger Robot }
1466