xref: /aosp_15_r20/external/expat/expat/lib/xmltok.c (revision 6be67779651aebaf20f11e5663acd1ae59e93f66)
1*6be67779SAndroid Build Coastguard Worker /*
2*6be67779SAndroid Build Coastguard Worker                             __  __            _
3*6be67779SAndroid Build Coastguard Worker                          ___\ \/ /_ __   __ _| |_
4*6be67779SAndroid Build Coastguard Worker                         / _ \\  /| '_ \ / _` | __|
5*6be67779SAndroid Build Coastguard Worker                        |  __//  \| |_) | (_| | |_
6*6be67779SAndroid Build Coastguard Worker                         \___/_/\_\ .__/ \__,_|\__|
7*6be67779SAndroid Build Coastguard Worker                                  |_| XML parser
8*6be67779SAndroid Build Coastguard Worker 
9*6be67779SAndroid Build Coastguard Worker    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2000      Clark Cooper <[email protected]>
11*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2001-2003 Fred L. Drake, Jr. <[email protected]>
12*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2002      Greg Stein <[email protected]>
13*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2002-2016 Karl Waclawek <[email protected]>
14*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2005-2009 Steven Solie <[email protected]>
15*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2016-2024 Sebastian Pipping <[email protected]>
16*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2016      Pascal Cuoq <[email protected]>
17*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2016      Don Lewis <[email protected]>
18*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2017      Rhodri James <[email protected]>
19*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2017      Alexander Bluhm <[email protected]>
20*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2017      Benbuck Nason <[email protected]>
21*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2017      José Gutiérrez de la Concha <[email protected]>
22*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2019      David Loffredo <[email protected]>
23*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2021      Donghee Na <[email protected]>
24*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2022      Martin Ettl <[email protected]>
25*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2022      Sean McBride <[email protected]>
26*6be67779SAndroid Build Coastguard Worker    Copyright (c) 2023      Hanno Böck <[email protected]>
27*6be67779SAndroid Build Coastguard Worker    Licensed under the MIT license:
28*6be67779SAndroid Build Coastguard Worker 
29*6be67779SAndroid Build Coastguard Worker    Permission is  hereby granted,  free of charge,  to any  person obtaining
30*6be67779SAndroid Build Coastguard Worker    a  copy  of  this  software   and  associated  documentation  files  (the
31*6be67779SAndroid Build Coastguard Worker    "Software"),  to  deal in  the  Software  without restriction,  including
32*6be67779SAndroid Build Coastguard Worker    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
33*6be67779SAndroid Build Coastguard Worker    distribute, sublicense, and/or sell copies of the Software, and to permit
34*6be67779SAndroid Build Coastguard Worker    persons  to whom  the Software  is  furnished to  do so,  subject to  the
35*6be67779SAndroid Build Coastguard Worker    following conditions:
36*6be67779SAndroid Build Coastguard Worker 
37*6be67779SAndroid Build Coastguard Worker    The above copyright  notice and this permission notice  shall be included
38*6be67779SAndroid Build Coastguard Worker    in all copies or substantial portions of the Software.
39*6be67779SAndroid Build Coastguard Worker 
40*6be67779SAndroid Build Coastguard Worker    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
41*6be67779SAndroid Build Coastguard Worker    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
42*6be67779SAndroid Build Coastguard Worker    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
43*6be67779SAndroid Build Coastguard Worker    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
44*6be67779SAndroid Build Coastguard Worker    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
45*6be67779SAndroid Build Coastguard Worker    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
46*6be67779SAndroid Build Coastguard Worker    USE OR OTHER DEALINGS IN THE SOFTWARE.
47*6be67779SAndroid Build Coastguard Worker */
48*6be67779SAndroid Build Coastguard Worker 
49*6be67779SAndroid Build Coastguard Worker #include "expat_config.h"
50*6be67779SAndroid Build Coastguard Worker 
51*6be67779SAndroid Build Coastguard Worker #include <stddef.h>
52*6be67779SAndroid Build Coastguard Worker #include <string.h> /* memcpy */
53*6be67779SAndroid Build Coastguard Worker #include <stdbool.h>
54*6be67779SAndroid Build Coastguard Worker 
55*6be67779SAndroid Build Coastguard Worker #ifdef _WIN32
56*6be67779SAndroid Build Coastguard Worker #  include "winconfig.h"
57*6be67779SAndroid Build Coastguard Worker #endif
58*6be67779SAndroid Build Coastguard Worker 
59*6be67779SAndroid Build Coastguard Worker #include "expat_external.h"
60*6be67779SAndroid Build Coastguard Worker #include "internal.h"
61*6be67779SAndroid Build Coastguard Worker #include "xmltok.h"
62*6be67779SAndroid Build Coastguard Worker #include "nametab.h"
63*6be67779SAndroid Build Coastguard Worker 
64*6be67779SAndroid Build Coastguard Worker #ifdef XML_DTD
65*6be67779SAndroid Build Coastguard Worker #  define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
66*6be67779SAndroid Build Coastguard Worker #else
67*6be67779SAndroid Build Coastguard Worker #  define IGNORE_SECTION_TOK_VTABLE /* as nothing */
68*6be67779SAndroid Build Coastguard Worker #endif
69*6be67779SAndroid Build Coastguard Worker 
70*6be67779SAndroid Build Coastguard Worker #define VTABLE1                                                                \
71*6be67779SAndroid Build Coastguard Worker   {PREFIX(prologTok), PREFIX(contentTok),                                      \
72*6be67779SAndroid Build Coastguard Worker    PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE},                         \
73*6be67779SAndroid Build Coastguard Worker       {PREFIX(attributeValueTok), PREFIX(entityValueTok)},                     \
74*6be67779SAndroid Build Coastguard Worker       PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS),             \
75*6be67779SAndroid Build Coastguard Worker       PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName),    \
76*6be67779SAndroid Build Coastguard Worker       PREFIX(updatePosition), PREFIX(isPublicId)
77*6be67779SAndroid Build Coastguard Worker 
78*6be67779SAndroid Build Coastguard Worker #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
79*6be67779SAndroid Build Coastguard Worker 
80*6be67779SAndroid Build Coastguard Worker #define UCS2_GET_NAMING(pages, hi, lo)                                         \
81*6be67779SAndroid Build Coastguard Worker   (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
82*6be67779SAndroid Build Coastguard Worker 
83*6be67779SAndroid Build Coastguard Worker /* A 2 byte UTF-8 representation splits the characters 11 bits between
84*6be67779SAndroid Build Coastguard Worker    the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into
85*6be67779SAndroid Build Coastguard Worker    pages, 3 bits to add to that index and 5 bits to generate the mask.
86*6be67779SAndroid Build Coastguard Worker */
87*6be67779SAndroid Build Coastguard Worker #define UTF8_GET_NAMING2(pages, byte)                                          \
88*6be67779SAndroid Build Coastguard Worker   (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3)                         \
89*6be67779SAndroid Build Coastguard Worker                 + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)]         \
90*6be67779SAndroid Build Coastguard Worker    & (1u << (((byte)[1]) & 0x1F)))
91*6be67779SAndroid Build Coastguard Worker 
92*6be67779SAndroid Build Coastguard Worker /* A 3 byte UTF-8 representation splits the characters 16 bits between
93*6be67779SAndroid Build Coastguard Worker    the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index
94*6be67779SAndroid Build Coastguard Worker    into pages, 3 bits to add to that index and 5 bits to generate the
95*6be67779SAndroid Build Coastguard Worker    mask.
96*6be67779SAndroid Build Coastguard Worker */
97*6be67779SAndroid Build Coastguard Worker #define UTF8_GET_NAMING3(pages, byte)                                          \
98*6be67779SAndroid Build Coastguard Worker   (namingBitmap                                                                \
99*6be67779SAndroid Build Coastguard Worker        [((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)]      \
100*6be67779SAndroid Build Coastguard Worker          << 3)                                                                 \
101*6be67779SAndroid Build Coastguard Worker         + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)]                 \
102*6be67779SAndroid Build Coastguard Worker    & (1u << (((byte)[2]) & 0x1F)))
103*6be67779SAndroid Build Coastguard Worker 
104*6be67779SAndroid Build Coastguard Worker /* Detection of invalid UTF-8 sequences is based on Table 3.1B
105*6be67779SAndroid Build Coastguard Worker    of Unicode 3.2: https://www.unicode.org/unicode/reports/tr28/
106*6be67779SAndroid Build Coastguard Worker    with the additional restriction of not allowing the Unicode
107*6be67779SAndroid Build Coastguard Worker    code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
108*6be67779SAndroid Build Coastguard Worker    Implementation details:
109*6be67779SAndroid Build Coastguard Worker      (A & 0x80) == 0     means A < 0x80
110*6be67779SAndroid Build Coastguard Worker    and
111*6be67779SAndroid Build Coastguard Worker      (A & 0xC0) == 0xC0  means A > 0xBF
112*6be67779SAndroid Build Coastguard Worker */
113*6be67779SAndroid Build Coastguard Worker 
114*6be67779SAndroid Build Coastguard Worker #define UTF8_INVALID2(p)                                                       \
115*6be67779SAndroid Build Coastguard Worker   ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
116*6be67779SAndroid Build Coastguard Worker 
117*6be67779SAndroid Build Coastguard Worker #define UTF8_INVALID3(p)                                                       \
118*6be67779SAndroid Build Coastguard Worker   (((p)[2] & 0x80) == 0                                                        \
119*6be67779SAndroid Build Coastguard Worker    || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD                          \
120*6be67779SAndroid Build Coastguard Worker                                       : ((p)[2] & 0xC0) == 0xC0)               \
121*6be67779SAndroid Build Coastguard Worker    || ((*p) == 0xE0                                                            \
122*6be67779SAndroid Build Coastguard Worker            ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0                          \
123*6be67779SAndroid Build Coastguard Worker            : ((p)[1] & 0x80) == 0                                              \
124*6be67779SAndroid Build Coastguard Worker                  || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
125*6be67779SAndroid Build Coastguard Worker 
126*6be67779SAndroid Build Coastguard Worker #define UTF8_INVALID4(p)                                                       \
127*6be67779SAndroid Build Coastguard Worker   (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0     \
128*6be67779SAndroid Build Coastguard Worker    || ((p)[2] & 0xC0) == 0xC0                                                  \
129*6be67779SAndroid Build Coastguard Worker    || ((*p) == 0xF0                                                            \
130*6be67779SAndroid Build Coastguard Worker            ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0                          \
131*6be67779SAndroid Build Coastguard Worker            : ((p)[1] & 0x80) == 0                                              \
132*6be67779SAndroid Build Coastguard Worker                  || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
133*6be67779SAndroid Build Coastguard Worker 
134*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
isNever(const ENCODING * enc,const char * p)135*6be67779SAndroid Build Coastguard Worker isNever(const ENCODING *enc, const char *p) {
136*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
137*6be67779SAndroid Build Coastguard Worker   UNUSED_P(p);
138*6be67779SAndroid Build Coastguard Worker   return 0;
139*6be67779SAndroid Build Coastguard Worker }
140*6be67779SAndroid Build Coastguard Worker 
141*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
utf8_isName2(const ENCODING * enc,const char * p)142*6be67779SAndroid Build Coastguard Worker utf8_isName2(const ENCODING *enc, const char *p) {
143*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
144*6be67779SAndroid Build Coastguard Worker   return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
145*6be67779SAndroid Build Coastguard Worker }
146*6be67779SAndroid Build Coastguard Worker 
147*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
utf8_isName3(const ENCODING * enc,const char * p)148*6be67779SAndroid Build Coastguard Worker utf8_isName3(const ENCODING *enc, const char *p) {
149*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
150*6be67779SAndroid Build Coastguard Worker   return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
151*6be67779SAndroid Build Coastguard Worker }
152*6be67779SAndroid Build Coastguard Worker 
153*6be67779SAndroid Build Coastguard Worker #define utf8_isName4 isNever
154*6be67779SAndroid Build Coastguard Worker 
155*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
utf8_isNmstrt2(const ENCODING * enc,const char * p)156*6be67779SAndroid Build Coastguard Worker utf8_isNmstrt2(const ENCODING *enc, const char *p) {
157*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
158*6be67779SAndroid Build Coastguard Worker   return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
159*6be67779SAndroid Build Coastguard Worker }
160*6be67779SAndroid Build Coastguard Worker 
161*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
utf8_isNmstrt3(const ENCODING * enc,const char * p)162*6be67779SAndroid Build Coastguard Worker utf8_isNmstrt3(const ENCODING *enc, const char *p) {
163*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
164*6be67779SAndroid Build Coastguard Worker   return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
165*6be67779SAndroid Build Coastguard Worker }
166*6be67779SAndroid Build Coastguard Worker 
167*6be67779SAndroid Build Coastguard Worker #define utf8_isNmstrt4 isNever
168*6be67779SAndroid Build Coastguard Worker 
169*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
utf8_isInvalid2(const ENCODING * enc,const char * p)170*6be67779SAndroid Build Coastguard Worker utf8_isInvalid2(const ENCODING *enc, const char *p) {
171*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
172*6be67779SAndroid Build Coastguard Worker   return UTF8_INVALID2((const unsigned char *)p);
173*6be67779SAndroid Build Coastguard Worker }
174*6be67779SAndroid Build Coastguard Worker 
175*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
utf8_isInvalid3(const ENCODING * enc,const char * p)176*6be67779SAndroid Build Coastguard Worker utf8_isInvalid3(const ENCODING *enc, const char *p) {
177*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
178*6be67779SAndroid Build Coastguard Worker   return UTF8_INVALID3((const unsigned char *)p);
179*6be67779SAndroid Build Coastguard Worker }
180*6be67779SAndroid Build Coastguard Worker 
181*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
utf8_isInvalid4(const ENCODING * enc,const char * p)182*6be67779SAndroid Build Coastguard Worker utf8_isInvalid4(const ENCODING *enc, const char *p) {
183*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
184*6be67779SAndroid Build Coastguard Worker   return UTF8_INVALID4((const unsigned char *)p);
185*6be67779SAndroid Build Coastguard Worker }
186*6be67779SAndroid Build Coastguard Worker 
187*6be67779SAndroid Build Coastguard Worker struct normal_encoding {
188*6be67779SAndroid Build Coastguard Worker   ENCODING enc;
189*6be67779SAndroid Build Coastguard Worker   unsigned char type[256];
190*6be67779SAndroid Build Coastguard Worker #ifdef XML_MIN_SIZE
191*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *byteType)(const ENCODING *, const char *);
192*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
193*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
194*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
195*6be67779SAndroid Build Coastguard Worker   int(PTRCALL *charMatches)(const ENCODING *, const char *, int);
196*6be67779SAndroid Build Coastguard Worker #endif /* XML_MIN_SIZE */
197*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *isName2)(const ENCODING *, const char *);
198*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *isName3)(const ENCODING *, const char *);
199*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *isName4)(const ENCODING *, const char *);
200*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
201*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
202*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
203*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
204*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
205*6be67779SAndroid Build Coastguard Worker   int(PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
206*6be67779SAndroid Build Coastguard Worker };
207*6be67779SAndroid Build Coastguard Worker 
208*6be67779SAndroid Build Coastguard Worker #define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc))
209*6be67779SAndroid Build Coastguard Worker 
210*6be67779SAndroid Build Coastguard Worker #ifdef XML_MIN_SIZE
211*6be67779SAndroid Build Coastguard Worker 
212*6be67779SAndroid Build Coastguard Worker #  define STANDARD_VTABLE(E)                                                   \
213*6be67779SAndroid Build Coastguard Worker     E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches,
214*6be67779SAndroid Build Coastguard Worker 
215*6be67779SAndroid Build Coastguard Worker #else
216*6be67779SAndroid Build Coastguard Worker 
217*6be67779SAndroid Build Coastguard Worker #  define STANDARD_VTABLE(E) /* as nothing */
218*6be67779SAndroid Build Coastguard Worker 
219*6be67779SAndroid Build Coastguard Worker #endif
220*6be67779SAndroid Build Coastguard Worker 
221*6be67779SAndroid Build Coastguard Worker #define NORMAL_VTABLE(E)                                                       \
222*6be67779SAndroid Build Coastguard Worker   E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3,              \
223*6be67779SAndroid Build Coastguard Worker       E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4
224*6be67779SAndroid Build Coastguard Worker 
225*6be67779SAndroid Build Coastguard Worker #define NULL_VTABLE                                                            \
226*6be67779SAndroid Build Coastguard Worker   /* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL,                  \
227*6be67779SAndroid Build Coastguard Worker       /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL,        \
228*6be67779SAndroid Build Coastguard Worker       /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL
229*6be67779SAndroid Build Coastguard Worker 
230*6be67779SAndroid Build Coastguard Worker static int FASTCALL checkCharRefNumber(int result);
231*6be67779SAndroid Build Coastguard Worker 
232*6be67779SAndroid Build Coastguard Worker #include "xmltok_impl.h"
233*6be67779SAndroid Build Coastguard Worker #include "ascii.h"
234*6be67779SAndroid Build Coastguard Worker 
235*6be67779SAndroid Build Coastguard Worker #ifdef XML_MIN_SIZE
236*6be67779SAndroid Build Coastguard Worker #  define sb_isNameMin isNever
237*6be67779SAndroid Build Coastguard Worker #  define sb_isNmstrtMin isNever
238*6be67779SAndroid Build Coastguard Worker #endif
239*6be67779SAndroid Build Coastguard Worker 
240*6be67779SAndroid Build Coastguard Worker #ifdef XML_MIN_SIZE
241*6be67779SAndroid Build Coastguard Worker #  define MINBPC(enc) ((enc)->minBytesPerChar)
242*6be67779SAndroid Build Coastguard Worker #else
243*6be67779SAndroid Build Coastguard Worker /* minimum bytes per character */
244*6be67779SAndroid Build Coastguard Worker #  define MINBPC(enc) 1
245*6be67779SAndroid Build Coastguard Worker #endif
246*6be67779SAndroid Build Coastguard Worker 
247*6be67779SAndroid Build Coastguard Worker #define SB_BYTE_TYPE(enc, p)                                                   \
248*6be67779SAndroid Build Coastguard Worker   (((const struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
249*6be67779SAndroid Build Coastguard Worker 
250*6be67779SAndroid Build Coastguard Worker #ifdef XML_MIN_SIZE
251*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
sb_byteType(const ENCODING * enc,const char * p)252*6be67779SAndroid Build Coastguard Worker sb_byteType(const ENCODING *enc, const char *p) {
253*6be67779SAndroid Build Coastguard Worker   return SB_BYTE_TYPE(enc, p);
254*6be67779SAndroid Build Coastguard Worker }
255*6be67779SAndroid Build Coastguard Worker #  define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
256*6be67779SAndroid Build Coastguard Worker #else
257*6be67779SAndroid Build Coastguard Worker #  define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
258*6be67779SAndroid Build Coastguard Worker #endif
259*6be67779SAndroid Build Coastguard Worker 
260*6be67779SAndroid Build Coastguard Worker #ifdef XML_MIN_SIZE
261*6be67779SAndroid Build Coastguard Worker #  define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
262*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
sb_byteToAscii(const ENCODING * enc,const char * p)263*6be67779SAndroid Build Coastguard Worker sb_byteToAscii(const ENCODING *enc, const char *p) {
264*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
265*6be67779SAndroid Build Coastguard Worker   return *p;
266*6be67779SAndroid Build Coastguard Worker }
267*6be67779SAndroid Build Coastguard Worker #else
268*6be67779SAndroid Build Coastguard Worker #  define BYTE_TO_ASCII(enc, p) (*(p))
269*6be67779SAndroid Build Coastguard Worker #endif
270*6be67779SAndroid Build Coastguard Worker 
271*6be67779SAndroid Build Coastguard Worker #define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p))
272*6be67779SAndroid Build Coastguard Worker #define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p))
273*6be67779SAndroid Build Coastguard Worker #ifdef XML_MIN_SIZE
274*6be67779SAndroid Build Coastguard Worker #  define IS_INVALID_CHAR(enc, p, n)                                           \
275*6be67779SAndroid Build Coastguard Worker     (AS_NORMAL_ENCODING(enc)->isInvalid##n                                     \
276*6be67779SAndroid Build Coastguard Worker      && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
277*6be67779SAndroid Build Coastguard Worker #else
278*6be67779SAndroid Build Coastguard Worker #  define IS_INVALID_CHAR(enc, p, n)                                           \
279*6be67779SAndroid Build Coastguard Worker     (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
280*6be67779SAndroid Build Coastguard Worker #endif
281*6be67779SAndroid Build Coastguard Worker 
282*6be67779SAndroid Build Coastguard Worker #ifdef XML_MIN_SIZE
283*6be67779SAndroid Build Coastguard Worker #  define IS_NAME_CHAR_MINBPC(enc, p)                                          \
284*6be67779SAndroid Build Coastguard Worker     (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
285*6be67779SAndroid Build Coastguard Worker #  define IS_NMSTRT_CHAR_MINBPC(enc, p)                                        \
286*6be67779SAndroid Build Coastguard Worker     (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
287*6be67779SAndroid Build Coastguard Worker #else
288*6be67779SAndroid Build Coastguard Worker #  define IS_NAME_CHAR_MINBPC(enc, p) (0)
289*6be67779SAndroid Build Coastguard Worker #  define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
290*6be67779SAndroid Build Coastguard Worker #endif
291*6be67779SAndroid Build Coastguard Worker 
292*6be67779SAndroid Build Coastguard Worker #ifdef XML_MIN_SIZE
293*6be67779SAndroid Build Coastguard Worker #  define CHAR_MATCHES(enc, p, c)                                              \
294*6be67779SAndroid Build Coastguard Worker     (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
295*6be67779SAndroid Build Coastguard Worker static int PTRCALL
sb_charMatches(const ENCODING * enc,const char * p,int c)296*6be67779SAndroid Build Coastguard Worker sb_charMatches(const ENCODING *enc, const char *p, int c) {
297*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
298*6be67779SAndroid Build Coastguard Worker   return *p == c;
299*6be67779SAndroid Build Coastguard Worker }
300*6be67779SAndroid Build Coastguard Worker #else
301*6be67779SAndroid Build Coastguard Worker /* c is an ASCII character */
302*6be67779SAndroid Build Coastguard Worker #  define CHAR_MATCHES(enc, p, c) (*(p) == (c))
303*6be67779SAndroid Build Coastguard Worker #endif
304*6be67779SAndroid Build Coastguard Worker 
305*6be67779SAndroid Build Coastguard Worker #define PREFIX(ident) normal_##ident
306*6be67779SAndroid Build Coastguard Worker #define XML_TOK_IMPL_C
307*6be67779SAndroid Build Coastguard Worker #include "xmltok_impl.c"
308*6be67779SAndroid Build Coastguard Worker #undef XML_TOK_IMPL_C
309*6be67779SAndroid Build Coastguard Worker 
310*6be67779SAndroid Build Coastguard Worker #undef MINBPC
311*6be67779SAndroid Build Coastguard Worker #undef BYTE_TYPE
312*6be67779SAndroid Build Coastguard Worker #undef BYTE_TO_ASCII
313*6be67779SAndroid Build Coastguard Worker #undef CHAR_MATCHES
314*6be67779SAndroid Build Coastguard Worker #undef IS_NAME_CHAR
315*6be67779SAndroid Build Coastguard Worker #undef IS_NAME_CHAR_MINBPC
316*6be67779SAndroid Build Coastguard Worker #undef IS_NMSTRT_CHAR
317*6be67779SAndroid Build Coastguard Worker #undef IS_NMSTRT_CHAR_MINBPC
318*6be67779SAndroid Build Coastguard Worker #undef IS_INVALID_CHAR
319*6be67779SAndroid Build Coastguard Worker 
320*6be67779SAndroid Build Coastguard Worker enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
321*6be67779SAndroid Build Coastguard Worker        UTF8_cval1 = 0x00,
322*6be67779SAndroid Build Coastguard Worker        UTF8_cval2 = 0xc0,
323*6be67779SAndroid Build Coastguard Worker        UTF8_cval3 = 0xe0,
324*6be67779SAndroid Build Coastguard Worker        UTF8_cval4 = 0xf0
325*6be67779SAndroid Build Coastguard Worker };
326*6be67779SAndroid Build Coastguard Worker 
327*6be67779SAndroid Build Coastguard Worker void
_INTERNAL_trim_to_complete_utf8_characters(const char * from,const char ** fromLimRef)328*6be67779SAndroid Build Coastguard Worker _INTERNAL_trim_to_complete_utf8_characters(const char *from,
329*6be67779SAndroid Build Coastguard Worker                                            const char **fromLimRef) {
330*6be67779SAndroid Build Coastguard Worker   const char *fromLim = *fromLimRef;
331*6be67779SAndroid Build Coastguard Worker   size_t walked = 0;
332*6be67779SAndroid Build Coastguard Worker   for (; fromLim > from; fromLim--, walked++) {
333*6be67779SAndroid Build Coastguard Worker     const unsigned char prev = (unsigned char)fromLim[-1];
334*6be67779SAndroid Build Coastguard Worker     if ((prev & 0xf8u)
335*6be67779SAndroid Build Coastguard Worker         == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
336*6be67779SAndroid Build Coastguard Worker       if (walked + 1 >= 4) {
337*6be67779SAndroid Build Coastguard Worker         fromLim += 4 - 1;
338*6be67779SAndroid Build Coastguard Worker         break;
339*6be67779SAndroid Build Coastguard Worker       } else {
340*6be67779SAndroid Build Coastguard Worker         walked = 0;
341*6be67779SAndroid Build Coastguard Worker       }
342*6be67779SAndroid Build Coastguard Worker     } else if ((prev & 0xf0u)
343*6be67779SAndroid Build Coastguard Worker                == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
344*6be67779SAndroid Build Coastguard Worker       if (walked + 1 >= 3) {
345*6be67779SAndroid Build Coastguard Worker         fromLim += 3 - 1;
346*6be67779SAndroid Build Coastguard Worker         break;
347*6be67779SAndroid Build Coastguard Worker       } else {
348*6be67779SAndroid Build Coastguard Worker         walked = 0;
349*6be67779SAndroid Build Coastguard Worker       }
350*6be67779SAndroid Build Coastguard Worker     } else if ((prev & 0xe0u)
351*6be67779SAndroid Build Coastguard Worker                == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
352*6be67779SAndroid Build Coastguard Worker       if (walked + 1 >= 2) {
353*6be67779SAndroid Build Coastguard Worker         fromLim += 2 - 1;
354*6be67779SAndroid Build Coastguard Worker         break;
355*6be67779SAndroid Build Coastguard Worker       } else {
356*6be67779SAndroid Build Coastguard Worker         walked = 0;
357*6be67779SAndroid Build Coastguard Worker       }
358*6be67779SAndroid Build Coastguard Worker     } else if ((prev & 0x80u)
359*6be67779SAndroid Build Coastguard Worker                == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
360*6be67779SAndroid Build Coastguard Worker       break;
361*6be67779SAndroid Build Coastguard Worker     }
362*6be67779SAndroid Build Coastguard Worker   }
363*6be67779SAndroid Build Coastguard Worker   *fromLimRef = fromLim;
364*6be67779SAndroid Build Coastguard Worker }
365*6be67779SAndroid Build Coastguard Worker 
366*6be67779SAndroid Build Coastguard Worker static enum XML_Convert_Result PTRCALL
utf8_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)367*6be67779SAndroid Build Coastguard Worker utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
368*6be67779SAndroid Build Coastguard Worker             char **toP, const char *toLim) {
369*6be67779SAndroid Build Coastguard Worker   bool input_incomplete = false;
370*6be67779SAndroid Build Coastguard Worker   bool output_exhausted = false;
371*6be67779SAndroid Build Coastguard Worker 
372*6be67779SAndroid Build Coastguard Worker   /* Avoid copying partial characters (due to limited space). */
373*6be67779SAndroid Build Coastguard Worker   const ptrdiff_t bytesAvailable = fromLim - *fromP;
374*6be67779SAndroid Build Coastguard Worker   const ptrdiff_t bytesStorable = toLim - *toP;
375*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
376*6be67779SAndroid Build Coastguard Worker   if (bytesAvailable > bytesStorable) {
377*6be67779SAndroid Build Coastguard Worker     fromLim = *fromP + bytesStorable;
378*6be67779SAndroid Build Coastguard Worker     output_exhausted = true;
379*6be67779SAndroid Build Coastguard Worker   }
380*6be67779SAndroid Build Coastguard Worker 
381*6be67779SAndroid Build Coastguard Worker   /* Avoid copying partial characters (from incomplete input). */
382*6be67779SAndroid Build Coastguard Worker   {
383*6be67779SAndroid Build Coastguard Worker     const char *const fromLimBefore = fromLim;
384*6be67779SAndroid Build Coastguard Worker     _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim);
385*6be67779SAndroid Build Coastguard Worker     if (fromLim < fromLimBefore) {
386*6be67779SAndroid Build Coastguard Worker       input_incomplete = true;
387*6be67779SAndroid Build Coastguard Worker     }
388*6be67779SAndroid Build Coastguard Worker   }
389*6be67779SAndroid Build Coastguard Worker 
390*6be67779SAndroid Build Coastguard Worker   {
391*6be67779SAndroid Build Coastguard Worker     const ptrdiff_t bytesToCopy = fromLim - *fromP;
392*6be67779SAndroid Build Coastguard Worker     memcpy(*toP, *fromP, bytesToCopy);
393*6be67779SAndroid Build Coastguard Worker     *fromP += bytesToCopy;
394*6be67779SAndroid Build Coastguard Worker     *toP += bytesToCopy;
395*6be67779SAndroid Build Coastguard Worker   }
396*6be67779SAndroid Build Coastguard Worker 
397*6be67779SAndroid Build Coastguard Worker   if (output_exhausted) /* needs to go first */
398*6be67779SAndroid Build Coastguard Worker     return XML_CONVERT_OUTPUT_EXHAUSTED;
399*6be67779SAndroid Build Coastguard Worker   else if (input_incomplete)
400*6be67779SAndroid Build Coastguard Worker     return XML_CONVERT_INPUT_INCOMPLETE;
401*6be67779SAndroid Build Coastguard Worker   else
402*6be67779SAndroid Build Coastguard Worker     return XML_CONVERT_COMPLETED;
403*6be67779SAndroid Build Coastguard Worker }
404*6be67779SAndroid Build Coastguard Worker 
405*6be67779SAndroid Build Coastguard Worker static enum XML_Convert_Result PTRCALL
utf8_toUtf16(const ENCODING * enc,const char ** fromP,const char * fromLim,unsigned short ** toP,const unsigned short * toLim)406*6be67779SAndroid Build Coastguard Worker utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
407*6be67779SAndroid Build Coastguard Worker              unsigned short **toP, const unsigned short *toLim) {
408*6be67779SAndroid Build Coastguard Worker   enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
409*6be67779SAndroid Build Coastguard Worker   unsigned short *to = *toP;
410*6be67779SAndroid Build Coastguard Worker   const char *from = *fromP;
411*6be67779SAndroid Build Coastguard Worker   while (from < fromLim && to < toLim) {
412*6be67779SAndroid Build Coastguard Worker     switch (SB_BYTE_TYPE(enc, from)) {
413*6be67779SAndroid Build Coastguard Worker     case BT_LEAD2:
414*6be67779SAndroid Build Coastguard Worker       if (fromLim - from < 2) {
415*6be67779SAndroid Build Coastguard Worker         res = XML_CONVERT_INPUT_INCOMPLETE;
416*6be67779SAndroid Build Coastguard Worker         goto after;
417*6be67779SAndroid Build Coastguard Worker       }
418*6be67779SAndroid Build Coastguard Worker       *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
419*6be67779SAndroid Build Coastguard Worker       from += 2;
420*6be67779SAndroid Build Coastguard Worker       break;
421*6be67779SAndroid Build Coastguard Worker     case BT_LEAD3:
422*6be67779SAndroid Build Coastguard Worker       if (fromLim - from < 3) {
423*6be67779SAndroid Build Coastguard Worker         res = XML_CONVERT_INPUT_INCOMPLETE;
424*6be67779SAndroid Build Coastguard Worker         goto after;
425*6be67779SAndroid Build Coastguard Worker       }
426*6be67779SAndroid Build Coastguard Worker       *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6)
427*6be67779SAndroid Build Coastguard Worker                                | (from[2] & 0x3f));
428*6be67779SAndroid Build Coastguard Worker       from += 3;
429*6be67779SAndroid Build Coastguard Worker       break;
430*6be67779SAndroid Build Coastguard Worker     case BT_LEAD4: {
431*6be67779SAndroid Build Coastguard Worker       unsigned long n;
432*6be67779SAndroid Build Coastguard Worker       if (toLim - to < 2) {
433*6be67779SAndroid Build Coastguard Worker         res = XML_CONVERT_OUTPUT_EXHAUSTED;
434*6be67779SAndroid Build Coastguard Worker         goto after;
435*6be67779SAndroid Build Coastguard Worker       }
436*6be67779SAndroid Build Coastguard Worker       if (fromLim - from < 4) {
437*6be67779SAndroid Build Coastguard Worker         res = XML_CONVERT_INPUT_INCOMPLETE;
438*6be67779SAndroid Build Coastguard Worker         goto after;
439*6be67779SAndroid Build Coastguard Worker       }
440*6be67779SAndroid Build Coastguard Worker       n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
441*6be67779SAndroid Build Coastguard Worker           | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
442*6be67779SAndroid Build Coastguard Worker       n -= 0x10000;
443*6be67779SAndroid Build Coastguard Worker       to[0] = (unsigned short)((n >> 10) | 0xD800);
444*6be67779SAndroid Build Coastguard Worker       to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
445*6be67779SAndroid Build Coastguard Worker       to += 2;
446*6be67779SAndroid Build Coastguard Worker       from += 4;
447*6be67779SAndroid Build Coastguard Worker     } break;
448*6be67779SAndroid Build Coastguard Worker     default:
449*6be67779SAndroid Build Coastguard Worker       *to++ = *from++;
450*6be67779SAndroid Build Coastguard Worker       break;
451*6be67779SAndroid Build Coastguard Worker     }
452*6be67779SAndroid Build Coastguard Worker   }
453*6be67779SAndroid Build Coastguard Worker   if (from < fromLim)
454*6be67779SAndroid Build Coastguard Worker     res = XML_CONVERT_OUTPUT_EXHAUSTED;
455*6be67779SAndroid Build Coastguard Worker after:
456*6be67779SAndroid Build Coastguard Worker   *fromP = from;
457*6be67779SAndroid Build Coastguard Worker   *toP = to;
458*6be67779SAndroid Build Coastguard Worker   return res;
459*6be67779SAndroid Build Coastguard Worker }
460*6be67779SAndroid Build Coastguard Worker 
461*6be67779SAndroid Build Coastguard Worker #ifdef XML_NS
462*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding utf8_encoding_ns
463*6be67779SAndroid Build Coastguard Worker     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
464*6be67779SAndroid Build Coastguard Worker        {
465*6be67779SAndroid Build Coastguard Worker #  include "asciitab.h"
466*6be67779SAndroid Build Coastguard Worker #  include "utf8tab.h"
467*6be67779SAndroid Build Coastguard Worker        },
468*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
469*6be67779SAndroid Build Coastguard Worker #endif
470*6be67779SAndroid Build Coastguard Worker 
471*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding utf8_encoding
472*6be67779SAndroid Build Coastguard Worker     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
473*6be67779SAndroid Build Coastguard Worker        {
474*6be67779SAndroid Build Coastguard Worker #define BT_COLON BT_NMSTRT
475*6be67779SAndroid Build Coastguard Worker #include "asciitab.h"
476*6be67779SAndroid Build Coastguard Worker #undef BT_COLON
477*6be67779SAndroid Build Coastguard Worker #include "utf8tab.h"
478*6be67779SAndroid Build Coastguard Worker        },
479*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
480*6be67779SAndroid Build Coastguard Worker 
481*6be67779SAndroid Build Coastguard Worker #ifdef XML_NS
482*6be67779SAndroid Build Coastguard Worker 
483*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding internal_utf8_encoding_ns
484*6be67779SAndroid Build Coastguard Worker     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
485*6be67779SAndroid Build Coastguard Worker        {
486*6be67779SAndroid Build Coastguard Worker #  include "iasciitab.h"
487*6be67779SAndroid Build Coastguard Worker #  include "utf8tab.h"
488*6be67779SAndroid Build Coastguard Worker        },
489*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
490*6be67779SAndroid Build Coastguard Worker 
491*6be67779SAndroid Build Coastguard Worker #endif
492*6be67779SAndroid Build Coastguard Worker 
493*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding internal_utf8_encoding
494*6be67779SAndroid Build Coastguard Worker     = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
495*6be67779SAndroid Build Coastguard Worker        {
496*6be67779SAndroid Build Coastguard Worker #define BT_COLON BT_NMSTRT
497*6be67779SAndroid Build Coastguard Worker #include "iasciitab.h"
498*6be67779SAndroid Build Coastguard Worker #undef BT_COLON
499*6be67779SAndroid Build Coastguard Worker #include "utf8tab.h"
500*6be67779SAndroid Build Coastguard Worker        },
501*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
502*6be67779SAndroid Build Coastguard Worker 
503*6be67779SAndroid Build Coastguard Worker static enum XML_Convert_Result PTRCALL
latin1_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)504*6be67779SAndroid Build Coastguard Worker latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
505*6be67779SAndroid Build Coastguard Worker               char **toP, const char *toLim) {
506*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
507*6be67779SAndroid Build Coastguard Worker   for (;;) {
508*6be67779SAndroid Build Coastguard Worker     unsigned char c;
509*6be67779SAndroid Build Coastguard Worker     if (*fromP == fromLim)
510*6be67779SAndroid Build Coastguard Worker       return XML_CONVERT_COMPLETED;
511*6be67779SAndroid Build Coastguard Worker     c = (unsigned char)**fromP;
512*6be67779SAndroid Build Coastguard Worker     if (c & 0x80) {
513*6be67779SAndroid Build Coastguard Worker       if (toLim - *toP < 2)
514*6be67779SAndroid Build Coastguard Worker         return XML_CONVERT_OUTPUT_EXHAUSTED;
515*6be67779SAndroid Build Coastguard Worker       *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
516*6be67779SAndroid Build Coastguard Worker       *(*toP)++ = (char)((c & 0x3f) | 0x80);
517*6be67779SAndroid Build Coastguard Worker       (*fromP)++;
518*6be67779SAndroid Build Coastguard Worker     } else {
519*6be67779SAndroid Build Coastguard Worker       if (*toP == toLim)
520*6be67779SAndroid Build Coastguard Worker         return XML_CONVERT_OUTPUT_EXHAUSTED;
521*6be67779SAndroid Build Coastguard Worker       *(*toP)++ = *(*fromP)++;
522*6be67779SAndroid Build Coastguard Worker     }
523*6be67779SAndroid Build Coastguard Worker   }
524*6be67779SAndroid Build Coastguard Worker }
525*6be67779SAndroid Build Coastguard Worker 
526*6be67779SAndroid Build Coastguard Worker static enum XML_Convert_Result PTRCALL
latin1_toUtf16(const ENCODING * enc,const char ** fromP,const char * fromLim,unsigned short ** toP,const unsigned short * toLim)527*6be67779SAndroid Build Coastguard Worker latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
528*6be67779SAndroid Build Coastguard Worker                unsigned short **toP, const unsigned short *toLim) {
529*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
530*6be67779SAndroid Build Coastguard Worker   while (*fromP < fromLim && *toP < toLim)
531*6be67779SAndroid Build Coastguard Worker     *(*toP)++ = (unsigned char)*(*fromP)++;
532*6be67779SAndroid Build Coastguard Worker 
533*6be67779SAndroid Build Coastguard Worker   if ((*toP == toLim) && (*fromP < fromLim))
534*6be67779SAndroid Build Coastguard Worker     return XML_CONVERT_OUTPUT_EXHAUSTED;
535*6be67779SAndroid Build Coastguard Worker   else
536*6be67779SAndroid Build Coastguard Worker     return XML_CONVERT_COMPLETED;
537*6be67779SAndroid Build Coastguard Worker }
538*6be67779SAndroid Build Coastguard Worker 
539*6be67779SAndroid Build Coastguard Worker #ifdef XML_NS
540*6be67779SAndroid Build Coastguard Worker 
541*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding latin1_encoding_ns
542*6be67779SAndroid Build Coastguard Worker     = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
543*6be67779SAndroid Build Coastguard Worker        {
544*6be67779SAndroid Build Coastguard Worker #  include "asciitab.h"
545*6be67779SAndroid Build Coastguard Worker #  include "latin1tab.h"
546*6be67779SAndroid Build Coastguard Worker        },
547*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(sb_) NULL_VTABLE};
548*6be67779SAndroid Build Coastguard Worker 
549*6be67779SAndroid Build Coastguard Worker #endif
550*6be67779SAndroid Build Coastguard Worker 
551*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding latin1_encoding
552*6be67779SAndroid Build Coastguard Worker     = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
553*6be67779SAndroid Build Coastguard Worker        {
554*6be67779SAndroid Build Coastguard Worker #define BT_COLON BT_NMSTRT
555*6be67779SAndroid Build Coastguard Worker #include "asciitab.h"
556*6be67779SAndroid Build Coastguard Worker #undef BT_COLON
557*6be67779SAndroid Build Coastguard Worker #include "latin1tab.h"
558*6be67779SAndroid Build Coastguard Worker        },
559*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(sb_) NULL_VTABLE};
560*6be67779SAndroid Build Coastguard Worker 
561*6be67779SAndroid Build Coastguard Worker static enum XML_Convert_Result PTRCALL
ascii_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)562*6be67779SAndroid Build Coastguard Worker ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
563*6be67779SAndroid Build Coastguard Worker              char **toP, const char *toLim) {
564*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
565*6be67779SAndroid Build Coastguard Worker   while (*fromP < fromLim && *toP < toLim)
566*6be67779SAndroid Build Coastguard Worker     *(*toP)++ = *(*fromP)++;
567*6be67779SAndroid Build Coastguard Worker 
568*6be67779SAndroid Build Coastguard Worker   if ((*toP == toLim) && (*fromP < fromLim))
569*6be67779SAndroid Build Coastguard Worker     return XML_CONVERT_OUTPUT_EXHAUSTED;
570*6be67779SAndroid Build Coastguard Worker   else
571*6be67779SAndroid Build Coastguard Worker     return XML_CONVERT_COMPLETED;
572*6be67779SAndroid Build Coastguard Worker }
573*6be67779SAndroid Build Coastguard Worker 
574*6be67779SAndroid Build Coastguard Worker #ifdef XML_NS
575*6be67779SAndroid Build Coastguard Worker 
576*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding ascii_encoding_ns
577*6be67779SAndroid Build Coastguard Worker     = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
578*6be67779SAndroid Build Coastguard Worker        {
579*6be67779SAndroid Build Coastguard Worker #  include "asciitab.h"
580*6be67779SAndroid Build Coastguard Worker            /* BT_NONXML == 0 */
581*6be67779SAndroid Build Coastguard Worker        },
582*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(sb_) NULL_VTABLE};
583*6be67779SAndroid Build Coastguard Worker 
584*6be67779SAndroid Build Coastguard Worker #endif
585*6be67779SAndroid Build Coastguard Worker 
586*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding ascii_encoding
587*6be67779SAndroid Build Coastguard Worker     = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
588*6be67779SAndroid Build Coastguard Worker        {
589*6be67779SAndroid Build Coastguard Worker #define BT_COLON BT_NMSTRT
590*6be67779SAndroid Build Coastguard Worker #include "asciitab.h"
591*6be67779SAndroid Build Coastguard Worker #undef BT_COLON
592*6be67779SAndroid Build Coastguard Worker            /* BT_NONXML == 0 */
593*6be67779SAndroid Build Coastguard Worker        },
594*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(sb_) NULL_VTABLE};
595*6be67779SAndroid Build Coastguard Worker 
596*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
unicode_byte_type(char hi,char lo)597*6be67779SAndroid Build Coastguard Worker unicode_byte_type(char hi, char lo) {
598*6be67779SAndroid Build Coastguard Worker   switch ((unsigned char)hi) {
599*6be67779SAndroid Build Coastguard Worker   /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */
600*6be67779SAndroid Build Coastguard Worker   case 0xD8:
601*6be67779SAndroid Build Coastguard Worker   case 0xD9:
602*6be67779SAndroid Build Coastguard Worker   case 0xDA:
603*6be67779SAndroid Build Coastguard Worker   case 0xDB:
604*6be67779SAndroid Build Coastguard Worker     return BT_LEAD4;
605*6be67779SAndroid Build Coastguard Worker   /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */
606*6be67779SAndroid Build Coastguard Worker   case 0xDC:
607*6be67779SAndroid Build Coastguard Worker   case 0xDD:
608*6be67779SAndroid Build Coastguard Worker   case 0xDE:
609*6be67779SAndroid Build Coastguard Worker   case 0xDF:
610*6be67779SAndroid Build Coastguard Worker     return BT_TRAIL;
611*6be67779SAndroid Build Coastguard Worker   case 0xFF:
612*6be67779SAndroid Build Coastguard Worker     switch ((unsigned char)lo) {
613*6be67779SAndroid Build Coastguard Worker     case 0xFF: /* noncharacter-FFFF */
614*6be67779SAndroid Build Coastguard Worker     case 0xFE: /* noncharacter-FFFE */
615*6be67779SAndroid Build Coastguard Worker       return BT_NONXML;
616*6be67779SAndroid Build Coastguard Worker     }
617*6be67779SAndroid Build Coastguard Worker     break;
618*6be67779SAndroid Build Coastguard Worker   }
619*6be67779SAndroid Build Coastguard Worker   return BT_NONASCII;
620*6be67779SAndroid Build Coastguard Worker }
621*6be67779SAndroid Build Coastguard Worker 
622*6be67779SAndroid Build Coastguard Worker #define DEFINE_UTF16_TO_UTF8(E)                                                \
623*6be67779SAndroid Build Coastguard Worker   static enum XML_Convert_Result PTRCALL E##toUtf8(                            \
624*6be67779SAndroid Build Coastguard Worker       const ENCODING *enc, const char **fromP, const char *fromLim,            \
625*6be67779SAndroid Build Coastguard Worker       char **toP, const char *toLim) {                                         \
626*6be67779SAndroid Build Coastguard Worker     const char *from = *fromP;                                                 \
627*6be67779SAndroid Build Coastguard Worker     UNUSED_P(enc);                                                             \
628*6be67779SAndroid Build Coastguard Worker     fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */      \
629*6be67779SAndroid Build Coastguard Worker     for (; from < fromLim; from += 2) {                                        \
630*6be67779SAndroid Build Coastguard Worker       int plane;                                                               \
631*6be67779SAndroid Build Coastguard Worker       unsigned char lo2;                                                       \
632*6be67779SAndroid Build Coastguard Worker       unsigned char lo = GET_LO(from);                                         \
633*6be67779SAndroid Build Coastguard Worker       unsigned char hi = GET_HI(from);                                         \
634*6be67779SAndroid Build Coastguard Worker       switch (hi) {                                                            \
635*6be67779SAndroid Build Coastguard Worker       case 0:                                                                  \
636*6be67779SAndroid Build Coastguard Worker         if (lo < 0x80) {                                                       \
637*6be67779SAndroid Build Coastguard Worker           if (*toP == toLim) {                                                 \
638*6be67779SAndroid Build Coastguard Worker             *fromP = from;                                                     \
639*6be67779SAndroid Build Coastguard Worker             return XML_CONVERT_OUTPUT_EXHAUSTED;                               \
640*6be67779SAndroid Build Coastguard Worker           }                                                                    \
641*6be67779SAndroid Build Coastguard Worker           *(*toP)++ = lo;                                                      \
642*6be67779SAndroid Build Coastguard Worker           break;                                                               \
643*6be67779SAndroid Build Coastguard Worker         }                                                                      \
644*6be67779SAndroid Build Coastguard Worker         /* fall through */                                                     \
645*6be67779SAndroid Build Coastguard Worker       case 0x1:                                                                \
646*6be67779SAndroid Build Coastguard Worker       case 0x2:                                                                \
647*6be67779SAndroid Build Coastguard Worker       case 0x3:                                                                \
648*6be67779SAndroid Build Coastguard Worker       case 0x4:                                                                \
649*6be67779SAndroid Build Coastguard Worker       case 0x5:                                                                \
650*6be67779SAndroid Build Coastguard Worker       case 0x6:                                                                \
651*6be67779SAndroid Build Coastguard Worker       case 0x7:                                                                \
652*6be67779SAndroid Build Coastguard Worker         if (toLim - *toP < 2) {                                                \
653*6be67779SAndroid Build Coastguard Worker           *fromP = from;                                                       \
654*6be67779SAndroid Build Coastguard Worker           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
655*6be67779SAndroid Build Coastguard Worker         }                                                                      \
656*6be67779SAndroid Build Coastguard Worker         *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2);                      \
657*6be67779SAndroid Build Coastguard Worker         *(*toP)++ = ((lo & 0x3f) | 0x80);                                      \
658*6be67779SAndroid Build Coastguard Worker         break;                                                                 \
659*6be67779SAndroid Build Coastguard Worker       default:                                                                 \
660*6be67779SAndroid Build Coastguard Worker         if (toLim - *toP < 3) {                                                \
661*6be67779SAndroid Build Coastguard Worker           *fromP = from;                                                       \
662*6be67779SAndroid Build Coastguard Worker           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
663*6be67779SAndroid Build Coastguard Worker         }                                                                      \
664*6be67779SAndroid Build Coastguard Worker         /* 16 bits divided 4, 6, 6 amongst 3 bytes */                          \
665*6be67779SAndroid Build Coastguard Worker         *(*toP)++ = ((hi >> 4) | UTF8_cval3);                                  \
666*6be67779SAndroid Build Coastguard Worker         *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80);                    \
667*6be67779SAndroid Build Coastguard Worker         *(*toP)++ = ((lo & 0x3f) | 0x80);                                      \
668*6be67779SAndroid Build Coastguard Worker         break;                                                                 \
669*6be67779SAndroid Build Coastguard Worker       case 0xD8:                                                               \
670*6be67779SAndroid Build Coastguard Worker       case 0xD9:                                                               \
671*6be67779SAndroid Build Coastguard Worker       case 0xDA:                                                               \
672*6be67779SAndroid Build Coastguard Worker       case 0xDB:                                                               \
673*6be67779SAndroid Build Coastguard Worker         if (toLim - *toP < 4) {                                                \
674*6be67779SAndroid Build Coastguard Worker           *fromP = from;                                                       \
675*6be67779SAndroid Build Coastguard Worker           return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
676*6be67779SAndroid Build Coastguard Worker         }                                                                      \
677*6be67779SAndroid Build Coastguard Worker         if (fromLim - from < 4) {                                              \
678*6be67779SAndroid Build Coastguard Worker           *fromP = from;                                                       \
679*6be67779SAndroid Build Coastguard Worker           return XML_CONVERT_INPUT_INCOMPLETE;                                 \
680*6be67779SAndroid Build Coastguard Worker         }                                                                      \
681*6be67779SAndroid Build Coastguard Worker         plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1;                   \
682*6be67779SAndroid Build Coastguard Worker         *(*toP)++ = (char)((plane >> 2) | UTF8_cval4);                         \
683*6be67779SAndroid Build Coastguard Worker         *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80);         \
684*6be67779SAndroid Build Coastguard Worker         from += 2;                                                             \
685*6be67779SAndroid Build Coastguard Worker         lo2 = GET_LO(from);                                                    \
686*6be67779SAndroid Build Coastguard Worker         *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2)           \
687*6be67779SAndroid Build Coastguard Worker                      | (lo2 >> 6) | 0x80);                                     \
688*6be67779SAndroid Build Coastguard Worker         *(*toP)++ = ((lo2 & 0x3f) | 0x80);                                     \
689*6be67779SAndroid Build Coastguard Worker         break;                                                                 \
690*6be67779SAndroid Build Coastguard Worker       }                                                                        \
691*6be67779SAndroid Build Coastguard Worker     }                                                                          \
692*6be67779SAndroid Build Coastguard Worker     *fromP = from;                                                             \
693*6be67779SAndroid Build Coastguard Worker     if (from < fromLim)                                                        \
694*6be67779SAndroid Build Coastguard Worker       return XML_CONVERT_INPUT_INCOMPLETE;                                     \
695*6be67779SAndroid Build Coastguard Worker     else                                                                       \
696*6be67779SAndroid Build Coastguard Worker       return XML_CONVERT_COMPLETED;                                            \
697*6be67779SAndroid Build Coastguard Worker   }
698*6be67779SAndroid Build Coastguard Worker 
699*6be67779SAndroid Build Coastguard Worker #define DEFINE_UTF16_TO_UTF16(E)                                               \
700*6be67779SAndroid Build Coastguard Worker   static enum XML_Convert_Result PTRCALL E##toUtf16(                           \
701*6be67779SAndroid Build Coastguard Worker       const ENCODING *enc, const char **fromP, const char *fromLim,            \
702*6be67779SAndroid Build Coastguard Worker       unsigned short **toP, const unsigned short *toLim) {                     \
703*6be67779SAndroid Build Coastguard Worker     enum XML_Convert_Result res = XML_CONVERT_COMPLETED;                       \
704*6be67779SAndroid Build Coastguard Worker     UNUSED_P(enc);                                                             \
705*6be67779SAndroid Build Coastguard Worker     fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */  \
706*6be67779SAndroid Build Coastguard Worker     /* Avoid copying first half only of surrogate */                           \
707*6be67779SAndroid Build Coastguard Worker     if (fromLim - *fromP > ((toLim - *toP) << 1)                               \
708*6be67779SAndroid Build Coastguard Worker         && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) {                             \
709*6be67779SAndroid Build Coastguard Worker       fromLim -= 2;                                                            \
710*6be67779SAndroid Build Coastguard Worker       res = XML_CONVERT_INPUT_INCOMPLETE;                                      \
711*6be67779SAndroid Build Coastguard Worker     }                                                                          \
712*6be67779SAndroid Build Coastguard Worker     for (; *fromP < fromLim && *toP < toLim; *fromP += 2)                      \
713*6be67779SAndroid Build Coastguard Worker       *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP);                      \
714*6be67779SAndroid Build Coastguard Worker     if ((*toP == toLim) && (*fromP < fromLim))                                 \
715*6be67779SAndroid Build Coastguard Worker       return XML_CONVERT_OUTPUT_EXHAUSTED;                                     \
716*6be67779SAndroid Build Coastguard Worker     else                                                                       \
717*6be67779SAndroid Build Coastguard Worker       return res;                                                              \
718*6be67779SAndroid Build Coastguard Worker   }
719*6be67779SAndroid Build Coastguard Worker 
720*6be67779SAndroid Build Coastguard Worker #define GET_LO(ptr) ((unsigned char)(ptr)[0])
721*6be67779SAndroid Build Coastguard Worker #define GET_HI(ptr) ((unsigned char)(ptr)[1])
722*6be67779SAndroid Build Coastguard Worker 
723*6be67779SAndroid Build Coastguard Worker DEFINE_UTF16_TO_UTF8(little2_)
DEFINE_UTF16_TO_UTF16(little2_)724*6be67779SAndroid Build Coastguard Worker DEFINE_UTF16_TO_UTF16(little2_)
725*6be67779SAndroid Build Coastguard Worker 
726*6be67779SAndroid Build Coastguard Worker #undef GET_LO
727*6be67779SAndroid Build Coastguard Worker #undef GET_HI
728*6be67779SAndroid Build Coastguard Worker 
729*6be67779SAndroid Build Coastguard Worker #define GET_LO(ptr) ((unsigned char)(ptr)[1])
730*6be67779SAndroid Build Coastguard Worker #define GET_HI(ptr) ((unsigned char)(ptr)[0])
731*6be67779SAndroid Build Coastguard Worker 
732*6be67779SAndroid Build Coastguard Worker DEFINE_UTF16_TO_UTF8(big2_)
733*6be67779SAndroid Build Coastguard Worker DEFINE_UTF16_TO_UTF16(big2_)
734*6be67779SAndroid Build Coastguard Worker 
735*6be67779SAndroid Build Coastguard Worker #undef GET_LO
736*6be67779SAndroid Build Coastguard Worker #undef GET_HI
737*6be67779SAndroid Build Coastguard Worker 
738*6be67779SAndroid Build Coastguard Worker #define LITTLE2_BYTE_TYPE(enc, p)                                              \
739*6be67779SAndroid Build Coastguard Worker   ((p)[1] == 0 ? SB_BYTE_TYPE(enc, p) : unicode_byte_type((p)[1], (p)[0]))
740*6be67779SAndroid Build Coastguard Worker #define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1)
741*6be67779SAndroid Build Coastguard Worker #define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c))
742*6be67779SAndroid Build Coastguard Worker #define LITTLE2_IS_NAME_CHAR_MINBPC(p)                                         \
743*6be67779SAndroid Build Coastguard Worker   UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
744*6be67779SAndroid Build Coastguard Worker #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)                                       \
745*6be67779SAndroid Build Coastguard Worker   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
746*6be67779SAndroid Build Coastguard Worker 
747*6be67779SAndroid Build Coastguard Worker #ifdef XML_MIN_SIZE
748*6be67779SAndroid Build Coastguard Worker 
749*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
750*6be67779SAndroid Build Coastguard Worker little2_byteType(const ENCODING *enc, const char *p) {
751*6be67779SAndroid Build Coastguard Worker   return LITTLE2_BYTE_TYPE(enc, p);
752*6be67779SAndroid Build Coastguard Worker }
753*6be67779SAndroid Build Coastguard Worker 
754*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
little2_byteToAscii(const ENCODING * enc,const char * p)755*6be67779SAndroid Build Coastguard Worker little2_byteToAscii(const ENCODING *enc, const char *p) {
756*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
757*6be67779SAndroid Build Coastguard Worker   return LITTLE2_BYTE_TO_ASCII(p);
758*6be67779SAndroid Build Coastguard Worker }
759*6be67779SAndroid Build Coastguard Worker 
760*6be67779SAndroid Build Coastguard Worker static int PTRCALL
little2_charMatches(const ENCODING * enc,const char * p,int c)761*6be67779SAndroid Build Coastguard Worker little2_charMatches(const ENCODING *enc, const char *p, int c) {
762*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
763*6be67779SAndroid Build Coastguard Worker   return LITTLE2_CHAR_MATCHES(p, c);
764*6be67779SAndroid Build Coastguard Worker }
765*6be67779SAndroid Build Coastguard Worker 
766*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
little2_isNameMin(const ENCODING * enc,const char * p)767*6be67779SAndroid Build Coastguard Worker little2_isNameMin(const ENCODING *enc, const char *p) {
768*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
769*6be67779SAndroid Build Coastguard Worker   return LITTLE2_IS_NAME_CHAR_MINBPC(p);
770*6be67779SAndroid Build Coastguard Worker }
771*6be67779SAndroid Build Coastguard Worker 
772*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
little2_isNmstrtMin(const ENCODING * enc,const char * p)773*6be67779SAndroid Build Coastguard Worker little2_isNmstrtMin(const ENCODING *enc, const char *p) {
774*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
775*6be67779SAndroid Build Coastguard Worker   return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p);
776*6be67779SAndroid Build Coastguard Worker }
777*6be67779SAndroid Build Coastguard Worker 
778*6be67779SAndroid Build Coastguard Worker #  undef VTABLE
779*6be67779SAndroid Build Coastguard Worker #  define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
780*6be67779SAndroid Build Coastguard Worker 
781*6be67779SAndroid Build Coastguard Worker #else /* not XML_MIN_SIZE */
782*6be67779SAndroid Build Coastguard Worker 
783*6be67779SAndroid Build Coastguard Worker #  undef PREFIX
784*6be67779SAndroid Build Coastguard Worker #  define PREFIX(ident) little2_##ident
785*6be67779SAndroid Build Coastguard Worker #  define MINBPC(enc) 2
786*6be67779SAndroid Build Coastguard Worker /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
787*6be67779SAndroid Build Coastguard Worker #  define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
788*6be67779SAndroid Build Coastguard Worker #  define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p)
789*6be67779SAndroid Build Coastguard Worker #  define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c)
790*6be67779SAndroid Build Coastguard Worker #  define IS_NAME_CHAR(enc, p, n) 0
791*6be67779SAndroid Build Coastguard Worker #  define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p)
792*6be67779SAndroid Build Coastguard Worker #  define IS_NMSTRT_CHAR(enc, p, n) (0)
793*6be67779SAndroid Build Coastguard Worker #  define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)
794*6be67779SAndroid Build Coastguard Worker 
795*6be67779SAndroid Build Coastguard Worker #  define XML_TOK_IMPL_C
796*6be67779SAndroid Build Coastguard Worker #  include "xmltok_impl.c"
797*6be67779SAndroid Build Coastguard Worker #  undef XML_TOK_IMPL_C
798*6be67779SAndroid Build Coastguard Worker 
799*6be67779SAndroid Build Coastguard Worker #  undef MINBPC
800*6be67779SAndroid Build Coastguard Worker #  undef BYTE_TYPE
801*6be67779SAndroid Build Coastguard Worker #  undef BYTE_TO_ASCII
802*6be67779SAndroid Build Coastguard Worker #  undef CHAR_MATCHES
803*6be67779SAndroid Build Coastguard Worker #  undef IS_NAME_CHAR
804*6be67779SAndroid Build Coastguard Worker #  undef IS_NAME_CHAR_MINBPC
805*6be67779SAndroid Build Coastguard Worker #  undef IS_NMSTRT_CHAR
806*6be67779SAndroid Build Coastguard Worker #  undef IS_NMSTRT_CHAR_MINBPC
807*6be67779SAndroid Build Coastguard Worker #  undef IS_INVALID_CHAR
808*6be67779SAndroid Build Coastguard Worker 
809*6be67779SAndroid Build Coastguard Worker #endif /* not XML_MIN_SIZE */
810*6be67779SAndroid Build Coastguard Worker 
811*6be67779SAndroid Build Coastguard Worker #ifdef XML_NS
812*6be67779SAndroid Build Coastguard Worker 
813*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding little2_encoding_ns
814*6be67779SAndroid Build Coastguard Worker     = {{VTABLE, 2, 0,
815*6be67779SAndroid Build Coastguard Worker #  if BYTEORDER == 1234
816*6be67779SAndroid Build Coastguard Worker         1
817*6be67779SAndroid Build Coastguard Worker #  else
818*6be67779SAndroid Build Coastguard Worker         0
819*6be67779SAndroid Build Coastguard Worker #  endif
820*6be67779SAndroid Build Coastguard Worker        },
821*6be67779SAndroid Build Coastguard Worker        {
822*6be67779SAndroid Build Coastguard Worker #  include "asciitab.h"
823*6be67779SAndroid Build Coastguard Worker #  include "latin1tab.h"
824*6be67779SAndroid Build Coastguard Worker        },
825*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(little2_) NULL_VTABLE};
826*6be67779SAndroid Build Coastguard Worker 
827*6be67779SAndroid Build Coastguard Worker #endif
828*6be67779SAndroid Build Coastguard Worker 
829*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding little2_encoding
830*6be67779SAndroid Build Coastguard Worker     = {{VTABLE, 2, 0,
831*6be67779SAndroid Build Coastguard Worker #if BYTEORDER == 1234
832*6be67779SAndroid Build Coastguard Worker         1
833*6be67779SAndroid Build Coastguard Worker #else
834*6be67779SAndroid Build Coastguard Worker         0
835*6be67779SAndroid Build Coastguard Worker #endif
836*6be67779SAndroid Build Coastguard Worker        },
837*6be67779SAndroid Build Coastguard Worker        {
838*6be67779SAndroid Build Coastguard Worker #define BT_COLON BT_NMSTRT
839*6be67779SAndroid Build Coastguard Worker #include "asciitab.h"
840*6be67779SAndroid Build Coastguard Worker #undef BT_COLON
841*6be67779SAndroid Build Coastguard Worker #include "latin1tab.h"
842*6be67779SAndroid Build Coastguard Worker        },
843*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(little2_) NULL_VTABLE};
844*6be67779SAndroid Build Coastguard Worker 
845*6be67779SAndroid Build Coastguard Worker #if BYTEORDER != 4321
846*6be67779SAndroid Build Coastguard Worker 
847*6be67779SAndroid Build Coastguard Worker #  ifdef XML_NS
848*6be67779SAndroid Build Coastguard Worker 
849*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding internal_little2_encoding_ns
850*6be67779SAndroid Build Coastguard Worker     = {{VTABLE, 2, 0, 1},
851*6be67779SAndroid Build Coastguard Worker        {
852*6be67779SAndroid Build Coastguard Worker #    include "iasciitab.h"
853*6be67779SAndroid Build Coastguard Worker #    include "latin1tab.h"
854*6be67779SAndroid Build Coastguard Worker        },
855*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(little2_) NULL_VTABLE};
856*6be67779SAndroid Build Coastguard Worker 
857*6be67779SAndroid Build Coastguard Worker #  endif
858*6be67779SAndroid Build Coastguard Worker 
859*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding internal_little2_encoding
860*6be67779SAndroid Build Coastguard Worker     = {{VTABLE, 2, 0, 1},
861*6be67779SAndroid Build Coastguard Worker        {
862*6be67779SAndroid Build Coastguard Worker #  define BT_COLON BT_NMSTRT
863*6be67779SAndroid Build Coastguard Worker #  include "iasciitab.h"
864*6be67779SAndroid Build Coastguard Worker #  undef BT_COLON
865*6be67779SAndroid Build Coastguard Worker #  include "latin1tab.h"
866*6be67779SAndroid Build Coastguard Worker        },
867*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(little2_) NULL_VTABLE};
868*6be67779SAndroid Build Coastguard Worker 
869*6be67779SAndroid Build Coastguard Worker #endif
870*6be67779SAndroid Build Coastguard Worker 
871*6be67779SAndroid Build Coastguard Worker #define BIG2_BYTE_TYPE(enc, p)                                                 \
872*6be67779SAndroid Build Coastguard Worker   ((p)[0] == 0 ? SB_BYTE_TYPE(enc, p + 1) : unicode_byte_type((p)[0], (p)[1]))
873*6be67779SAndroid Build Coastguard Worker #define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1)
874*6be67779SAndroid Build Coastguard Worker #define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c))
875*6be67779SAndroid Build Coastguard Worker #define BIG2_IS_NAME_CHAR_MINBPC(p)                                            \
876*6be67779SAndroid Build Coastguard Worker   UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
877*6be67779SAndroid Build Coastguard Worker #define BIG2_IS_NMSTRT_CHAR_MINBPC(p)                                          \
878*6be67779SAndroid Build Coastguard Worker   UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
879*6be67779SAndroid Build Coastguard Worker 
880*6be67779SAndroid Build Coastguard Worker #ifdef XML_MIN_SIZE
881*6be67779SAndroid Build Coastguard Worker 
882*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
big2_byteType(const ENCODING * enc,const char * p)883*6be67779SAndroid Build Coastguard Worker big2_byteType(const ENCODING *enc, const char *p) {
884*6be67779SAndroid Build Coastguard Worker   return BIG2_BYTE_TYPE(enc, p);
885*6be67779SAndroid Build Coastguard Worker }
886*6be67779SAndroid Build Coastguard Worker 
887*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
big2_byteToAscii(const ENCODING * enc,const char * p)888*6be67779SAndroid Build Coastguard Worker big2_byteToAscii(const ENCODING *enc, const char *p) {
889*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
890*6be67779SAndroid Build Coastguard Worker   return BIG2_BYTE_TO_ASCII(p);
891*6be67779SAndroid Build Coastguard Worker }
892*6be67779SAndroid Build Coastguard Worker 
893*6be67779SAndroid Build Coastguard Worker static int PTRCALL
big2_charMatches(const ENCODING * enc,const char * p,int c)894*6be67779SAndroid Build Coastguard Worker big2_charMatches(const ENCODING *enc, const char *p, int c) {
895*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
896*6be67779SAndroid Build Coastguard Worker   return BIG2_CHAR_MATCHES(p, c);
897*6be67779SAndroid Build Coastguard Worker }
898*6be67779SAndroid Build Coastguard Worker 
899*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
big2_isNameMin(const ENCODING * enc,const char * p)900*6be67779SAndroid Build Coastguard Worker big2_isNameMin(const ENCODING *enc, const char *p) {
901*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
902*6be67779SAndroid Build Coastguard Worker   return BIG2_IS_NAME_CHAR_MINBPC(p);
903*6be67779SAndroid Build Coastguard Worker }
904*6be67779SAndroid Build Coastguard Worker 
905*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
big2_isNmstrtMin(const ENCODING * enc,const char * p)906*6be67779SAndroid Build Coastguard Worker big2_isNmstrtMin(const ENCODING *enc, const char *p) {
907*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
908*6be67779SAndroid Build Coastguard Worker   return BIG2_IS_NMSTRT_CHAR_MINBPC(p);
909*6be67779SAndroid Build Coastguard Worker }
910*6be67779SAndroid Build Coastguard Worker 
911*6be67779SAndroid Build Coastguard Worker #  undef VTABLE
912*6be67779SAndroid Build Coastguard Worker #  define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
913*6be67779SAndroid Build Coastguard Worker 
914*6be67779SAndroid Build Coastguard Worker #else /* not XML_MIN_SIZE */
915*6be67779SAndroid Build Coastguard Worker 
916*6be67779SAndroid Build Coastguard Worker #  undef PREFIX
917*6be67779SAndroid Build Coastguard Worker #  define PREFIX(ident) big2_##ident
918*6be67779SAndroid Build Coastguard Worker #  define MINBPC(enc) 2
919*6be67779SAndroid Build Coastguard Worker /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
920*6be67779SAndroid Build Coastguard Worker #  define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
921*6be67779SAndroid Build Coastguard Worker #  define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p)
922*6be67779SAndroid Build Coastguard Worker #  define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c)
923*6be67779SAndroid Build Coastguard Worker #  define IS_NAME_CHAR(enc, p, n) 0
924*6be67779SAndroid Build Coastguard Worker #  define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p)
925*6be67779SAndroid Build Coastguard Worker #  define IS_NMSTRT_CHAR(enc, p, n) (0)
926*6be67779SAndroid Build Coastguard Worker #  define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p)
927*6be67779SAndroid Build Coastguard Worker 
928*6be67779SAndroid Build Coastguard Worker #  define XML_TOK_IMPL_C
929*6be67779SAndroid Build Coastguard Worker #  include "xmltok_impl.c"
930*6be67779SAndroid Build Coastguard Worker #  undef XML_TOK_IMPL_C
931*6be67779SAndroid Build Coastguard Worker 
932*6be67779SAndroid Build Coastguard Worker #  undef MINBPC
933*6be67779SAndroid Build Coastguard Worker #  undef BYTE_TYPE
934*6be67779SAndroid Build Coastguard Worker #  undef BYTE_TO_ASCII
935*6be67779SAndroid Build Coastguard Worker #  undef CHAR_MATCHES
936*6be67779SAndroid Build Coastguard Worker #  undef IS_NAME_CHAR
937*6be67779SAndroid Build Coastguard Worker #  undef IS_NAME_CHAR_MINBPC
938*6be67779SAndroid Build Coastguard Worker #  undef IS_NMSTRT_CHAR
939*6be67779SAndroid Build Coastguard Worker #  undef IS_NMSTRT_CHAR_MINBPC
940*6be67779SAndroid Build Coastguard Worker #  undef IS_INVALID_CHAR
941*6be67779SAndroid Build Coastguard Worker 
942*6be67779SAndroid Build Coastguard Worker #endif /* not XML_MIN_SIZE */
943*6be67779SAndroid Build Coastguard Worker 
944*6be67779SAndroid Build Coastguard Worker #ifdef XML_NS
945*6be67779SAndroid Build Coastguard Worker 
946*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding big2_encoding_ns
947*6be67779SAndroid Build Coastguard Worker     = {{VTABLE, 2, 0,
948*6be67779SAndroid Build Coastguard Worker #  if BYTEORDER == 4321
949*6be67779SAndroid Build Coastguard Worker         1
950*6be67779SAndroid Build Coastguard Worker #  else
951*6be67779SAndroid Build Coastguard Worker         0
952*6be67779SAndroid Build Coastguard Worker #  endif
953*6be67779SAndroid Build Coastguard Worker        },
954*6be67779SAndroid Build Coastguard Worker        {
955*6be67779SAndroid Build Coastguard Worker #  include "asciitab.h"
956*6be67779SAndroid Build Coastguard Worker #  include "latin1tab.h"
957*6be67779SAndroid Build Coastguard Worker        },
958*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(big2_) NULL_VTABLE};
959*6be67779SAndroid Build Coastguard Worker 
960*6be67779SAndroid Build Coastguard Worker #endif
961*6be67779SAndroid Build Coastguard Worker 
962*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding big2_encoding
963*6be67779SAndroid Build Coastguard Worker     = {{VTABLE, 2, 0,
964*6be67779SAndroid Build Coastguard Worker #if BYTEORDER == 4321
965*6be67779SAndroid Build Coastguard Worker         1
966*6be67779SAndroid Build Coastguard Worker #else
967*6be67779SAndroid Build Coastguard Worker         0
968*6be67779SAndroid Build Coastguard Worker #endif
969*6be67779SAndroid Build Coastguard Worker        },
970*6be67779SAndroid Build Coastguard Worker        {
971*6be67779SAndroid Build Coastguard Worker #define BT_COLON BT_NMSTRT
972*6be67779SAndroid Build Coastguard Worker #include "asciitab.h"
973*6be67779SAndroid Build Coastguard Worker #undef BT_COLON
974*6be67779SAndroid Build Coastguard Worker #include "latin1tab.h"
975*6be67779SAndroid Build Coastguard Worker        },
976*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(big2_) NULL_VTABLE};
977*6be67779SAndroid Build Coastguard Worker 
978*6be67779SAndroid Build Coastguard Worker #if BYTEORDER != 1234
979*6be67779SAndroid Build Coastguard Worker 
980*6be67779SAndroid Build Coastguard Worker #  ifdef XML_NS
981*6be67779SAndroid Build Coastguard Worker 
982*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding internal_big2_encoding_ns
983*6be67779SAndroid Build Coastguard Worker     = {{VTABLE, 2, 0, 1},
984*6be67779SAndroid Build Coastguard Worker        {
985*6be67779SAndroid Build Coastguard Worker #    include "iasciitab.h"
986*6be67779SAndroid Build Coastguard Worker #    include "latin1tab.h"
987*6be67779SAndroid Build Coastguard Worker        },
988*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(big2_) NULL_VTABLE};
989*6be67779SAndroid Build Coastguard Worker 
990*6be67779SAndroid Build Coastguard Worker #  endif
991*6be67779SAndroid Build Coastguard Worker 
992*6be67779SAndroid Build Coastguard Worker static const struct normal_encoding internal_big2_encoding
993*6be67779SAndroid Build Coastguard Worker     = {{VTABLE, 2, 0, 1},
994*6be67779SAndroid Build Coastguard Worker        {
995*6be67779SAndroid Build Coastguard Worker #  define BT_COLON BT_NMSTRT
996*6be67779SAndroid Build Coastguard Worker #  include "iasciitab.h"
997*6be67779SAndroid Build Coastguard Worker #  undef BT_COLON
998*6be67779SAndroid Build Coastguard Worker #  include "latin1tab.h"
999*6be67779SAndroid Build Coastguard Worker        },
1000*6be67779SAndroid Build Coastguard Worker        STANDARD_VTABLE(big2_) NULL_VTABLE};
1001*6be67779SAndroid Build Coastguard Worker 
1002*6be67779SAndroid Build Coastguard Worker #endif
1003*6be67779SAndroid Build Coastguard Worker 
1004*6be67779SAndroid Build Coastguard Worker #undef PREFIX
1005*6be67779SAndroid Build Coastguard Worker 
1006*6be67779SAndroid Build Coastguard Worker static int FASTCALL
streqci(const char * s1,const char * s2)1007*6be67779SAndroid Build Coastguard Worker streqci(const char *s1, const char *s2) {
1008*6be67779SAndroid Build Coastguard Worker   for (;;) {
1009*6be67779SAndroid Build Coastguard Worker     char c1 = *s1++;
1010*6be67779SAndroid Build Coastguard Worker     char c2 = *s2++;
1011*6be67779SAndroid Build Coastguard Worker     if (ASCII_a <= c1 && c1 <= ASCII_z)
1012*6be67779SAndroid Build Coastguard Worker       c1 += ASCII_A - ASCII_a;
1013*6be67779SAndroid Build Coastguard Worker     if (ASCII_a <= c2 && c2 <= ASCII_z)
1014*6be67779SAndroid Build Coastguard Worker       /* The following line will never get executed.  streqci() is
1015*6be67779SAndroid Build Coastguard Worker        * only called from two places, both of which guarantee to put
1016*6be67779SAndroid Build Coastguard Worker        * upper-case strings into s2.
1017*6be67779SAndroid Build Coastguard Worker        */
1018*6be67779SAndroid Build Coastguard Worker       c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
1019*6be67779SAndroid Build Coastguard Worker     if (c1 != c2)
1020*6be67779SAndroid Build Coastguard Worker       return 0;
1021*6be67779SAndroid Build Coastguard Worker     if (! c1)
1022*6be67779SAndroid Build Coastguard Worker       break;
1023*6be67779SAndroid Build Coastguard Worker   }
1024*6be67779SAndroid Build Coastguard Worker   return 1;
1025*6be67779SAndroid Build Coastguard Worker }
1026*6be67779SAndroid Build Coastguard Worker 
1027*6be67779SAndroid Build Coastguard Worker static void PTRCALL
initUpdatePosition(const ENCODING * enc,const char * ptr,const char * end,POSITION * pos)1028*6be67779SAndroid Build Coastguard Worker initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end,
1029*6be67779SAndroid Build Coastguard Worker                    POSITION *pos) {
1030*6be67779SAndroid Build Coastguard Worker   UNUSED_P(enc);
1031*6be67779SAndroid Build Coastguard Worker   normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
1032*6be67779SAndroid Build Coastguard Worker }
1033*6be67779SAndroid Build Coastguard Worker 
1034*6be67779SAndroid Build Coastguard Worker static int
toAscii(const ENCODING * enc,const char * ptr,const char * end)1035*6be67779SAndroid Build Coastguard Worker toAscii(const ENCODING *enc, const char *ptr, const char *end) {
1036*6be67779SAndroid Build Coastguard Worker   char buf[1];
1037*6be67779SAndroid Build Coastguard Worker   char *p = buf;
1038*6be67779SAndroid Build Coastguard Worker   XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
1039*6be67779SAndroid Build Coastguard Worker   if (p == buf)
1040*6be67779SAndroid Build Coastguard Worker     return -1;
1041*6be67779SAndroid Build Coastguard Worker   else
1042*6be67779SAndroid Build Coastguard Worker     return buf[0];
1043*6be67779SAndroid Build Coastguard Worker }
1044*6be67779SAndroid Build Coastguard Worker 
1045*6be67779SAndroid Build Coastguard Worker static int FASTCALL
isSpace(int c)1046*6be67779SAndroid Build Coastguard Worker isSpace(int c) {
1047*6be67779SAndroid Build Coastguard Worker   switch (c) {
1048*6be67779SAndroid Build Coastguard Worker   case 0x20:
1049*6be67779SAndroid Build Coastguard Worker   case 0xD:
1050*6be67779SAndroid Build Coastguard Worker   case 0xA:
1051*6be67779SAndroid Build Coastguard Worker   case 0x9:
1052*6be67779SAndroid Build Coastguard Worker     return 1;
1053*6be67779SAndroid Build Coastguard Worker   }
1054*6be67779SAndroid Build Coastguard Worker   return 0;
1055*6be67779SAndroid Build Coastguard Worker }
1056*6be67779SAndroid Build Coastguard Worker 
1057*6be67779SAndroid Build Coastguard Worker /* Return 1 if there's just optional white space or there's an S
1058*6be67779SAndroid Build Coastguard Worker    followed by name=val.
1059*6be67779SAndroid Build Coastguard Worker */
1060*6be67779SAndroid Build Coastguard Worker static int
parsePseudoAttribute(const ENCODING * enc,const char * ptr,const char * end,const char ** namePtr,const char ** nameEndPtr,const char ** valPtr,const char ** nextTokPtr)1061*6be67779SAndroid Build Coastguard Worker parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end,
1062*6be67779SAndroid Build Coastguard Worker                      const char **namePtr, const char **nameEndPtr,
1063*6be67779SAndroid Build Coastguard Worker                      const char **valPtr, const char **nextTokPtr) {
1064*6be67779SAndroid Build Coastguard Worker   int c;
1065*6be67779SAndroid Build Coastguard Worker   char open;
1066*6be67779SAndroid Build Coastguard Worker   if (ptr == end) {
1067*6be67779SAndroid Build Coastguard Worker     *namePtr = NULL;
1068*6be67779SAndroid Build Coastguard Worker     return 1;
1069*6be67779SAndroid Build Coastguard Worker   }
1070*6be67779SAndroid Build Coastguard Worker   if (! isSpace(toAscii(enc, ptr, end))) {
1071*6be67779SAndroid Build Coastguard Worker     *nextTokPtr = ptr;
1072*6be67779SAndroid Build Coastguard Worker     return 0;
1073*6be67779SAndroid Build Coastguard Worker   }
1074*6be67779SAndroid Build Coastguard Worker   do {
1075*6be67779SAndroid Build Coastguard Worker     ptr += enc->minBytesPerChar;
1076*6be67779SAndroid Build Coastguard Worker   } while (isSpace(toAscii(enc, ptr, end)));
1077*6be67779SAndroid Build Coastguard Worker   if (ptr == end) {
1078*6be67779SAndroid Build Coastguard Worker     *namePtr = NULL;
1079*6be67779SAndroid Build Coastguard Worker     return 1;
1080*6be67779SAndroid Build Coastguard Worker   }
1081*6be67779SAndroid Build Coastguard Worker   *namePtr = ptr;
1082*6be67779SAndroid Build Coastguard Worker   for (;;) {
1083*6be67779SAndroid Build Coastguard Worker     c = toAscii(enc, ptr, end);
1084*6be67779SAndroid Build Coastguard Worker     if (c == -1) {
1085*6be67779SAndroid Build Coastguard Worker       *nextTokPtr = ptr;
1086*6be67779SAndroid Build Coastguard Worker       return 0;
1087*6be67779SAndroid Build Coastguard Worker     }
1088*6be67779SAndroid Build Coastguard Worker     if (c == ASCII_EQUALS) {
1089*6be67779SAndroid Build Coastguard Worker       *nameEndPtr = ptr;
1090*6be67779SAndroid Build Coastguard Worker       break;
1091*6be67779SAndroid Build Coastguard Worker     }
1092*6be67779SAndroid Build Coastguard Worker     if (isSpace(c)) {
1093*6be67779SAndroid Build Coastguard Worker       *nameEndPtr = ptr;
1094*6be67779SAndroid Build Coastguard Worker       do {
1095*6be67779SAndroid Build Coastguard Worker         ptr += enc->minBytesPerChar;
1096*6be67779SAndroid Build Coastguard Worker       } while (isSpace(c = toAscii(enc, ptr, end)));
1097*6be67779SAndroid Build Coastguard Worker       if (c != ASCII_EQUALS) {
1098*6be67779SAndroid Build Coastguard Worker         *nextTokPtr = ptr;
1099*6be67779SAndroid Build Coastguard Worker         return 0;
1100*6be67779SAndroid Build Coastguard Worker       }
1101*6be67779SAndroid Build Coastguard Worker       break;
1102*6be67779SAndroid Build Coastguard Worker     }
1103*6be67779SAndroid Build Coastguard Worker     ptr += enc->minBytesPerChar;
1104*6be67779SAndroid Build Coastguard Worker   }
1105*6be67779SAndroid Build Coastguard Worker   if (ptr == *namePtr) {
1106*6be67779SAndroid Build Coastguard Worker     *nextTokPtr = ptr;
1107*6be67779SAndroid Build Coastguard Worker     return 0;
1108*6be67779SAndroid Build Coastguard Worker   }
1109*6be67779SAndroid Build Coastguard Worker   ptr += enc->minBytesPerChar;
1110*6be67779SAndroid Build Coastguard Worker   c = toAscii(enc, ptr, end);
1111*6be67779SAndroid Build Coastguard Worker   while (isSpace(c)) {
1112*6be67779SAndroid Build Coastguard Worker     ptr += enc->minBytesPerChar;
1113*6be67779SAndroid Build Coastguard Worker     c = toAscii(enc, ptr, end);
1114*6be67779SAndroid Build Coastguard Worker   }
1115*6be67779SAndroid Build Coastguard Worker   if (c != ASCII_QUOT && c != ASCII_APOS) {
1116*6be67779SAndroid Build Coastguard Worker     *nextTokPtr = ptr;
1117*6be67779SAndroid Build Coastguard Worker     return 0;
1118*6be67779SAndroid Build Coastguard Worker   }
1119*6be67779SAndroid Build Coastguard Worker   open = (char)c;
1120*6be67779SAndroid Build Coastguard Worker   ptr += enc->minBytesPerChar;
1121*6be67779SAndroid Build Coastguard Worker   *valPtr = ptr;
1122*6be67779SAndroid Build Coastguard Worker   for (;; ptr += enc->minBytesPerChar) {
1123*6be67779SAndroid Build Coastguard Worker     c = toAscii(enc, ptr, end);
1124*6be67779SAndroid Build Coastguard Worker     if (c == open)
1125*6be67779SAndroid Build Coastguard Worker       break;
1126*6be67779SAndroid Build Coastguard Worker     if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)
1127*6be67779SAndroid Build Coastguard Worker         && ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD
1128*6be67779SAndroid Build Coastguard Worker         && c != ASCII_MINUS && c != ASCII_UNDERSCORE) {
1129*6be67779SAndroid Build Coastguard Worker       *nextTokPtr = ptr;
1130*6be67779SAndroid Build Coastguard Worker       return 0;
1131*6be67779SAndroid Build Coastguard Worker     }
1132*6be67779SAndroid Build Coastguard Worker   }
1133*6be67779SAndroid Build Coastguard Worker   *nextTokPtr = ptr + enc->minBytesPerChar;
1134*6be67779SAndroid Build Coastguard Worker   return 1;
1135*6be67779SAndroid Build Coastguard Worker }
1136*6be67779SAndroid Build Coastguard Worker 
1137*6be67779SAndroid Build Coastguard Worker static const char KW_version[]
1138*6be67779SAndroid Build Coastguard Worker     = {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'};
1139*6be67779SAndroid Build Coastguard Worker 
1140*6be67779SAndroid Build Coastguard Worker static const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d,
1141*6be67779SAndroid Build Coastguard Worker                                    ASCII_i, ASCII_n, ASCII_g, '\0'};
1142*6be67779SAndroid Build Coastguard Worker 
1143*6be67779SAndroid Build Coastguard Worker static const char KW_standalone[]
1144*6be67779SAndroid Build Coastguard Worker     = {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a,
1145*6be67779SAndroid Build Coastguard Worker        ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'};
1146*6be67779SAndroid Build Coastguard Worker 
1147*6be67779SAndroid Build Coastguard Worker static const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'};
1148*6be67779SAndroid Build Coastguard Worker 
1149*6be67779SAndroid Build Coastguard Worker static const char KW_no[] = {ASCII_n, ASCII_o, '\0'};
1150*6be67779SAndroid Build Coastguard Worker 
1151*6be67779SAndroid Build Coastguard Worker static int
doParseXmlDecl(const ENCODING * (* encodingFinder)(const ENCODING *,const char *,const char *),int isGeneralTextEntity,const ENCODING * enc,const char * ptr,const char * end,const char ** badPtr,const char ** versionPtr,const char ** versionEndPtr,const char ** encodingName,const ENCODING ** encoding,int * standalone)1152*6be67779SAndroid Build Coastguard Worker doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *,
1153*6be67779SAndroid Build Coastguard Worker                                                  const char *),
1154*6be67779SAndroid Build Coastguard Worker                int isGeneralTextEntity, const ENCODING *enc, const char *ptr,
1155*6be67779SAndroid Build Coastguard Worker                const char *end, const char **badPtr, const char **versionPtr,
1156*6be67779SAndroid Build Coastguard Worker                const char **versionEndPtr, const char **encodingName,
1157*6be67779SAndroid Build Coastguard Worker                const ENCODING **encoding, int *standalone) {
1158*6be67779SAndroid Build Coastguard Worker   const char *val = NULL;
1159*6be67779SAndroid Build Coastguard Worker   const char *name = NULL;
1160*6be67779SAndroid Build Coastguard Worker   const char *nameEnd = NULL;
1161*6be67779SAndroid Build Coastguard Worker   ptr += 5 * enc->minBytesPerChar;
1162*6be67779SAndroid Build Coastguard Worker   end -= 2 * enc->minBytesPerChar;
1163*6be67779SAndroid Build Coastguard Worker   if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
1164*6be67779SAndroid Build Coastguard Worker       || ! name) {
1165*6be67779SAndroid Build Coastguard Worker     *badPtr = ptr;
1166*6be67779SAndroid Build Coastguard Worker     return 0;
1167*6be67779SAndroid Build Coastguard Worker   }
1168*6be67779SAndroid Build Coastguard Worker   if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
1169*6be67779SAndroid Build Coastguard Worker     if (! isGeneralTextEntity) {
1170*6be67779SAndroid Build Coastguard Worker       *badPtr = name;
1171*6be67779SAndroid Build Coastguard Worker       return 0;
1172*6be67779SAndroid Build Coastguard Worker     }
1173*6be67779SAndroid Build Coastguard Worker   } else {
1174*6be67779SAndroid Build Coastguard Worker     if (versionPtr)
1175*6be67779SAndroid Build Coastguard Worker       *versionPtr = val;
1176*6be67779SAndroid Build Coastguard Worker     if (versionEndPtr)
1177*6be67779SAndroid Build Coastguard Worker       *versionEndPtr = ptr;
1178*6be67779SAndroid Build Coastguard Worker     if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1179*6be67779SAndroid Build Coastguard Worker       *badPtr = ptr;
1180*6be67779SAndroid Build Coastguard Worker       return 0;
1181*6be67779SAndroid Build Coastguard Worker     }
1182*6be67779SAndroid Build Coastguard Worker     if (! name) {
1183*6be67779SAndroid Build Coastguard Worker       if (isGeneralTextEntity) {
1184*6be67779SAndroid Build Coastguard Worker         /* a TextDecl must have an EncodingDecl */
1185*6be67779SAndroid Build Coastguard Worker         *badPtr = ptr;
1186*6be67779SAndroid Build Coastguard Worker         return 0;
1187*6be67779SAndroid Build Coastguard Worker       }
1188*6be67779SAndroid Build Coastguard Worker       return 1;
1189*6be67779SAndroid Build Coastguard Worker     }
1190*6be67779SAndroid Build Coastguard Worker   }
1191*6be67779SAndroid Build Coastguard Worker   if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
1192*6be67779SAndroid Build Coastguard Worker     int c = toAscii(enc, val, end);
1193*6be67779SAndroid Build Coastguard Worker     if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) {
1194*6be67779SAndroid Build Coastguard Worker       *badPtr = val;
1195*6be67779SAndroid Build Coastguard Worker       return 0;
1196*6be67779SAndroid Build Coastguard Worker     }
1197*6be67779SAndroid Build Coastguard Worker     if (encodingName)
1198*6be67779SAndroid Build Coastguard Worker       *encodingName = val;
1199*6be67779SAndroid Build Coastguard Worker     if (encoding)
1200*6be67779SAndroid Build Coastguard Worker       *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
1201*6be67779SAndroid Build Coastguard Worker     if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
1202*6be67779SAndroid Build Coastguard Worker       *badPtr = ptr;
1203*6be67779SAndroid Build Coastguard Worker       return 0;
1204*6be67779SAndroid Build Coastguard Worker     }
1205*6be67779SAndroid Build Coastguard Worker     if (! name)
1206*6be67779SAndroid Build Coastguard Worker       return 1;
1207*6be67779SAndroid Build Coastguard Worker   }
1208*6be67779SAndroid Build Coastguard Worker   if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
1209*6be67779SAndroid Build Coastguard Worker       || isGeneralTextEntity) {
1210*6be67779SAndroid Build Coastguard Worker     *badPtr = name;
1211*6be67779SAndroid Build Coastguard Worker     return 0;
1212*6be67779SAndroid Build Coastguard Worker   }
1213*6be67779SAndroid Build Coastguard Worker   if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
1214*6be67779SAndroid Build Coastguard Worker     if (standalone)
1215*6be67779SAndroid Build Coastguard Worker       *standalone = 1;
1216*6be67779SAndroid Build Coastguard Worker   } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
1217*6be67779SAndroid Build Coastguard Worker     if (standalone)
1218*6be67779SAndroid Build Coastguard Worker       *standalone = 0;
1219*6be67779SAndroid Build Coastguard Worker   } else {
1220*6be67779SAndroid Build Coastguard Worker     *badPtr = val;
1221*6be67779SAndroid Build Coastguard Worker     return 0;
1222*6be67779SAndroid Build Coastguard Worker   }
1223*6be67779SAndroid Build Coastguard Worker   while (isSpace(toAscii(enc, ptr, end)))
1224*6be67779SAndroid Build Coastguard Worker     ptr += enc->minBytesPerChar;
1225*6be67779SAndroid Build Coastguard Worker   if (ptr != end) {
1226*6be67779SAndroid Build Coastguard Worker     *badPtr = ptr;
1227*6be67779SAndroid Build Coastguard Worker     return 0;
1228*6be67779SAndroid Build Coastguard Worker   }
1229*6be67779SAndroid Build Coastguard Worker   return 1;
1230*6be67779SAndroid Build Coastguard Worker }
1231*6be67779SAndroid Build Coastguard Worker 
1232*6be67779SAndroid Build Coastguard Worker static int FASTCALL
checkCharRefNumber(int result)1233*6be67779SAndroid Build Coastguard Worker checkCharRefNumber(int result) {
1234*6be67779SAndroid Build Coastguard Worker   switch (result >> 8) {
1235*6be67779SAndroid Build Coastguard Worker   case 0xD8:
1236*6be67779SAndroid Build Coastguard Worker   case 0xD9:
1237*6be67779SAndroid Build Coastguard Worker   case 0xDA:
1238*6be67779SAndroid Build Coastguard Worker   case 0xDB:
1239*6be67779SAndroid Build Coastguard Worker   case 0xDC:
1240*6be67779SAndroid Build Coastguard Worker   case 0xDD:
1241*6be67779SAndroid Build Coastguard Worker   case 0xDE:
1242*6be67779SAndroid Build Coastguard Worker   case 0xDF:
1243*6be67779SAndroid Build Coastguard Worker     return -1;
1244*6be67779SAndroid Build Coastguard Worker   case 0:
1245*6be67779SAndroid Build Coastguard Worker     if (latin1_encoding.type[result] == BT_NONXML)
1246*6be67779SAndroid Build Coastguard Worker       return -1;
1247*6be67779SAndroid Build Coastguard Worker     break;
1248*6be67779SAndroid Build Coastguard Worker   case 0xFF:
1249*6be67779SAndroid Build Coastguard Worker     if (result == 0xFFFE || result == 0xFFFF)
1250*6be67779SAndroid Build Coastguard Worker       return -1;
1251*6be67779SAndroid Build Coastguard Worker     break;
1252*6be67779SAndroid Build Coastguard Worker   }
1253*6be67779SAndroid Build Coastguard Worker   return result;
1254*6be67779SAndroid Build Coastguard Worker }
1255*6be67779SAndroid Build Coastguard Worker 
1256*6be67779SAndroid Build Coastguard Worker int FASTCALL
XmlUtf8Encode(int c,char * buf)1257*6be67779SAndroid Build Coastguard Worker XmlUtf8Encode(int c, char *buf) {
1258*6be67779SAndroid Build Coastguard Worker   enum {
1259*6be67779SAndroid Build Coastguard Worker     /* minN is minimum legal resulting value for N byte sequence */
1260*6be67779SAndroid Build Coastguard Worker     min2 = 0x80,
1261*6be67779SAndroid Build Coastguard Worker     min3 = 0x800,
1262*6be67779SAndroid Build Coastguard Worker     min4 = 0x10000
1263*6be67779SAndroid Build Coastguard Worker   };
1264*6be67779SAndroid Build Coastguard Worker 
1265*6be67779SAndroid Build Coastguard Worker   if (c < 0)
1266*6be67779SAndroid Build Coastguard Worker     return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
1267*6be67779SAndroid Build Coastguard Worker   if (c < min2) {
1268*6be67779SAndroid Build Coastguard Worker     buf[0] = (char)(c | UTF8_cval1);
1269*6be67779SAndroid Build Coastguard Worker     return 1;
1270*6be67779SAndroid Build Coastguard Worker   }
1271*6be67779SAndroid Build Coastguard Worker   if (c < min3) {
1272*6be67779SAndroid Build Coastguard Worker     buf[0] = (char)((c >> 6) | UTF8_cval2);
1273*6be67779SAndroid Build Coastguard Worker     buf[1] = (char)((c & 0x3f) | 0x80);
1274*6be67779SAndroid Build Coastguard Worker     return 2;
1275*6be67779SAndroid Build Coastguard Worker   }
1276*6be67779SAndroid Build Coastguard Worker   if (c < min4) {
1277*6be67779SAndroid Build Coastguard Worker     buf[0] = (char)((c >> 12) | UTF8_cval3);
1278*6be67779SAndroid Build Coastguard Worker     buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
1279*6be67779SAndroid Build Coastguard Worker     buf[2] = (char)((c & 0x3f) | 0x80);
1280*6be67779SAndroid Build Coastguard Worker     return 3;
1281*6be67779SAndroid Build Coastguard Worker   }
1282*6be67779SAndroid Build Coastguard Worker   if (c < 0x110000) {
1283*6be67779SAndroid Build Coastguard Worker     buf[0] = (char)((c >> 18) | UTF8_cval4);
1284*6be67779SAndroid Build Coastguard Worker     buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
1285*6be67779SAndroid Build Coastguard Worker     buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
1286*6be67779SAndroid Build Coastguard Worker     buf[3] = (char)((c & 0x3f) | 0x80);
1287*6be67779SAndroid Build Coastguard Worker     return 4;
1288*6be67779SAndroid Build Coastguard Worker   }
1289*6be67779SAndroid Build Coastguard Worker   return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
1290*6be67779SAndroid Build Coastguard Worker }
1291*6be67779SAndroid Build Coastguard Worker 
1292*6be67779SAndroid Build Coastguard Worker int FASTCALL
XmlUtf16Encode(int charNum,unsigned short * buf)1293*6be67779SAndroid Build Coastguard Worker XmlUtf16Encode(int charNum, unsigned short *buf) {
1294*6be67779SAndroid Build Coastguard Worker   if (charNum < 0)
1295*6be67779SAndroid Build Coastguard Worker     return 0;
1296*6be67779SAndroid Build Coastguard Worker   if (charNum < 0x10000) {
1297*6be67779SAndroid Build Coastguard Worker     buf[0] = (unsigned short)charNum;
1298*6be67779SAndroid Build Coastguard Worker     return 1;
1299*6be67779SAndroid Build Coastguard Worker   }
1300*6be67779SAndroid Build Coastguard Worker   if (charNum < 0x110000) {
1301*6be67779SAndroid Build Coastguard Worker     charNum -= 0x10000;
1302*6be67779SAndroid Build Coastguard Worker     buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
1303*6be67779SAndroid Build Coastguard Worker     buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
1304*6be67779SAndroid Build Coastguard Worker     return 2;
1305*6be67779SAndroid Build Coastguard Worker   }
1306*6be67779SAndroid Build Coastguard Worker   return 0;
1307*6be67779SAndroid Build Coastguard Worker }
1308*6be67779SAndroid Build Coastguard Worker 
1309*6be67779SAndroid Build Coastguard Worker struct unknown_encoding {
1310*6be67779SAndroid Build Coastguard Worker   struct normal_encoding normal;
1311*6be67779SAndroid Build Coastguard Worker   CONVERTER convert;
1312*6be67779SAndroid Build Coastguard Worker   void *userData;
1313*6be67779SAndroid Build Coastguard Worker   unsigned short utf16[256];
1314*6be67779SAndroid Build Coastguard Worker   char utf8[256][4];
1315*6be67779SAndroid Build Coastguard Worker };
1316*6be67779SAndroid Build Coastguard Worker 
1317*6be67779SAndroid Build Coastguard Worker #define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc))
1318*6be67779SAndroid Build Coastguard Worker 
1319*6be67779SAndroid Build Coastguard Worker int
XmlSizeOfUnknownEncoding(void)1320*6be67779SAndroid Build Coastguard Worker XmlSizeOfUnknownEncoding(void) {
1321*6be67779SAndroid Build Coastguard Worker   return sizeof(struct unknown_encoding);
1322*6be67779SAndroid Build Coastguard Worker }
1323*6be67779SAndroid Build Coastguard Worker 
1324*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
unknown_isName(const ENCODING * enc,const char * p)1325*6be67779SAndroid Build Coastguard Worker unknown_isName(const ENCODING *enc, const char *p) {
1326*6be67779SAndroid Build Coastguard Worker   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1327*6be67779SAndroid Build Coastguard Worker   int c = uenc->convert(uenc->userData, p);
1328*6be67779SAndroid Build Coastguard Worker   if (c & ~0xFFFF)
1329*6be67779SAndroid Build Coastguard Worker     return 0;
1330*6be67779SAndroid Build Coastguard Worker   return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
1331*6be67779SAndroid Build Coastguard Worker }
1332*6be67779SAndroid Build Coastguard Worker 
1333*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
unknown_isNmstrt(const ENCODING * enc,const char * p)1334*6be67779SAndroid Build Coastguard Worker unknown_isNmstrt(const ENCODING *enc, const char *p) {
1335*6be67779SAndroid Build Coastguard Worker   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1336*6be67779SAndroid Build Coastguard Worker   int c = uenc->convert(uenc->userData, p);
1337*6be67779SAndroid Build Coastguard Worker   if (c & ~0xFFFF)
1338*6be67779SAndroid Build Coastguard Worker     return 0;
1339*6be67779SAndroid Build Coastguard Worker   return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
1340*6be67779SAndroid Build Coastguard Worker }
1341*6be67779SAndroid Build Coastguard Worker 
1342*6be67779SAndroid Build Coastguard Worker static int PTRFASTCALL
unknown_isInvalid(const ENCODING * enc,const char * p)1343*6be67779SAndroid Build Coastguard Worker unknown_isInvalid(const ENCODING *enc, const char *p) {
1344*6be67779SAndroid Build Coastguard Worker   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1345*6be67779SAndroid Build Coastguard Worker   int c = uenc->convert(uenc->userData, p);
1346*6be67779SAndroid Build Coastguard Worker   return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
1347*6be67779SAndroid Build Coastguard Worker }
1348*6be67779SAndroid Build Coastguard Worker 
1349*6be67779SAndroid Build Coastguard Worker static enum XML_Convert_Result PTRCALL
unknown_toUtf8(const ENCODING * enc,const char ** fromP,const char * fromLim,char ** toP,const char * toLim)1350*6be67779SAndroid Build Coastguard Worker unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
1351*6be67779SAndroid Build Coastguard Worker                char **toP, const char *toLim) {
1352*6be67779SAndroid Build Coastguard Worker   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1353*6be67779SAndroid Build Coastguard Worker   char buf[XML_UTF8_ENCODE_MAX];
1354*6be67779SAndroid Build Coastguard Worker   for (;;) {
1355*6be67779SAndroid Build Coastguard Worker     const char *utf8;
1356*6be67779SAndroid Build Coastguard Worker     int n;
1357*6be67779SAndroid Build Coastguard Worker     if (*fromP == fromLim)
1358*6be67779SAndroid Build Coastguard Worker       return XML_CONVERT_COMPLETED;
1359*6be67779SAndroid Build Coastguard Worker     utf8 = uenc->utf8[(unsigned char)**fromP];
1360*6be67779SAndroid Build Coastguard Worker     n = *utf8++;
1361*6be67779SAndroid Build Coastguard Worker     if (n == 0) {
1362*6be67779SAndroid Build Coastguard Worker       int c = uenc->convert(uenc->userData, *fromP);
1363*6be67779SAndroid Build Coastguard Worker       n = XmlUtf8Encode(c, buf);
1364*6be67779SAndroid Build Coastguard Worker       if (n > toLim - *toP)
1365*6be67779SAndroid Build Coastguard Worker         return XML_CONVERT_OUTPUT_EXHAUSTED;
1366*6be67779SAndroid Build Coastguard Worker       utf8 = buf;
1367*6be67779SAndroid Build Coastguard Worker       *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1368*6be67779SAndroid Build Coastguard Worker                  - (BT_LEAD2 - 2));
1369*6be67779SAndroid Build Coastguard Worker     } else {
1370*6be67779SAndroid Build Coastguard Worker       if (n > toLim - *toP)
1371*6be67779SAndroid Build Coastguard Worker         return XML_CONVERT_OUTPUT_EXHAUSTED;
1372*6be67779SAndroid Build Coastguard Worker       (*fromP)++;
1373*6be67779SAndroid Build Coastguard Worker     }
1374*6be67779SAndroid Build Coastguard Worker     memcpy(*toP, utf8, n);
1375*6be67779SAndroid Build Coastguard Worker     *toP += n;
1376*6be67779SAndroid Build Coastguard Worker   }
1377*6be67779SAndroid Build Coastguard Worker }
1378*6be67779SAndroid Build Coastguard Worker 
1379*6be67779SAndroid Build Coastguard Worker static enum XML_Convert_Result PTRCALL
unknown_toUtf16(const ENCODING * enc,const char ** fromP,const char * fromLim,unsigned short ** toP,const unsigned short * toLim)1380*6be67779SAndroid Build Coastguard Worker unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
1381*6be67779SAndroid Build Coastguard Worker                 unsigned short **toP, const unsigned short *toLim) {
1382*6be67779SAndroid Build Coastguard Worker   const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
1383*6be67779SAndroid Build Coastguard Worker   while (*fromP < fromLim && *toP < toLim) {
1384*6be67779SAndroid Build Coastguard Worker     unsigned short c = uenc->utf16[(unsigned char)**fromP];
1385*6be67779SAndroid Build Coastguard Worker     if (c == 0) {
1386*6be67779SAndroid Build Coastguard Worker       c = (unsigned short)uenc->convert(uenc->userData, *fromP);
1387*6be67779SAndroid Build Coastguard Worker       *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
1388*6be67779SAndroid Build Coastguard Worker                  - (BT_LEAD2 - 2));
1389*6be67779SAndroid Build Coastguard Worker     } else
1390*6be67779SAndroid Build Coastguard Worker       (*fromP)++;
1391*6be67779SAndroid Build Coastguard Worker     *(*toP)++ = c;
1392*6be67779SAndroid Build Coastguard Worker   }
1393*6be67779SAndroid Build Coastguard Worker 
1394*6be67779SAndroid Build Coastguard Worker   if ((*toP == toLim) && (*fromP < fromLim))
1395*6be67779SAndroid Build Coastguard Worker     return XML_CONVERT_OUTPUT_EXHAUSTED;
1396*6be67779SAndroid Build Coastguard Worker   else
1397*6be67779SAndroid Build Coastguard Worker     return XML_CONVERT_COMPLETED;
1398*6be67779SAndroid Build Coastguard Worker }
1399*6be67779SAndroid Build Coastguard Worker 
1400*6be67779SAndroid Build Coastguard Worker ENCODING *
XmlInitUnknownEncoding(void * mem,int * table,CONVERTER convert,void * userData)1401*6be67779SAndroid Build Coastguard Worker XmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert,
1402*6be67779SAndroid Build Coastguard Worker                        void *userData) {
1403*6be67779SAndroid Build Coastguard Worker   int i;
1404*6be67779SAndroid Build Coastguard Worker   struct unknown_encoding *e = (struct unknown_encoding *)mem;
1405*6be67779SAndroid Build Coastguard Worker   memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding));
1406*6be67779SAndroid Build Coastguard Worker   for (i = 0; i < 128; i++)
1407*6be67779SAndroid Build Coastguard Worker     if (latin1_encoding.type[i] != BT_OTHER
1408*6be67779SAndroid Build Coastguard Worker         && latin1_encoding.type[i] != BT_NONXML && table[i] != i)
1409*6be67779SAndroid Build Coastguard Worker       return 0;
1410*6be67779SAndroid Build Coastguard Worker   for (i = 0; i < 256; i++) {
1411*6be67779SAndroid Build Coastguard Worker     int c = table[i];
1412*6be67779SAndroid Build Coastguard Worker     if (c == -1) {
1413*6be67779SAndroid Build Coastguard Worker       e->normal.type[i] = BT_MALFORM;
1414*6be67779SAndroid Build Coastguard Worker       /* This shouldn't really get used. */
1415*6be67779SAndroid Build Coastguard Worker       e->utf16[i] = 0xFFFF;
1416*6be67779SAndroid Build Coastguard Worker       e->utf8[i][0] = 1;
1417*6be67779SAndroid Build Coastguard Worker       e->utf8[i][1] = 0;
1418*6be67779SAndroid Build Coastguard Worker     } else if (c < 0) {
1419*6be67779SAndroid Build Coastguard Worker       if (c < -4)
1420*6be67779SAndroid Build Coastguard Worker         return 0;
1421*6be67779SAndroid Build Coastguard Worker       /* Multi-byte sequences need a converter function */
1422*6be67779SAndroid Build Coastguard Worker       if (! convert)
1423*6be67779SAndroid Build Coastguard Worker         return 0;
1424*6be67779SAndroid Build Coastguard Worker       e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
1425*6be67779SAndroid Build Coastguard Worker       e->utf8[i][0] = 0;
1426*6be67779SAndroid Build Coastguard Worker       e->utf16[i] = 0;
1427*6be67779SAndroid Build Coastguard Worker     } else if (c < 0x80) {
1428*6be67779SAndroid Build Coastguard Worker       if (latin1_encoding.type[c] != BT_OTHER
1429*6be67779SAndroid Build Coastguard Worker           && latin1_encoding.type[c] != BT_NONXML && c != i)
1430*6be67779SAndroid Build Coastguard Worker         return 0;
1431*6be67779SAndroid Build Coastguard Worker       e->normal.type[i] = latin1_encoding.type[c];
1432*6be67779SAndroid Build Coastguard Worker       e->utf8[i][0] = 1;
1433*6be67779SAndroid Build Coastguard Worker       e->utf8[i][1] = (char)c;
1434*6be67779SAndroid Build Coastguard Worker       e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
1435*6be67779SAndroid Build Coastguard Worker     } else if (checkCharRefNumber(c) < 0) {
1436*6be67779SAndroid Build Coastguard Worker       e->normal.type[i] = BT_NONXML;
1437*6be67779SAndroid Build Coastguard Worker       /* This shouldn't really get used. */
1438*6be67779SAndroid Build Coastguard Worker       e->utf16[i] = 0xFFFF;
1439*6be67779SAndroid Build Coastguard Worker       e->utf8[i][0] = 1;
1440*6be67779SAndroid Build Coastguard Worker       e->utf8[i][1] = 0;
1441*6be67779SAndroid Build Coastguard Worker     } else {
1442*6be67779SAndroid Build Coastguard Worker       if (c > 0xFFFF)
1443*6be67779SAndroid Build Coastguard Worker         return 0;
1444*6be67779SAndroid Build Coastguard Worker       if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
1445*6be67779SAndroid Build Coastguard Worker         e->normal.type[i] = BT_NMSTRT;
1446*6be67779SAndroid Build Coastguard Worker       else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
1447*6be67779SAndroid Build Coastguard Worker         e->normal.type[i] = BT_NAME;
1448*6be67779SAndroid Build Coastguard Worker       else
1449*6be67779SAndroid Build Coastguard Worker         e->normal.type[i] = BT_OTHER;
1450*6be67779SAndroid Build Coastguard Worker       e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
1451*6be67779SAndroid Build Coastguard Worker       e->utf16[i] = (unsigned short)c;
1452*6be67779SAndroid Build Coastguard Worker     }
1453*6be67779SAndroid Build Coastguard Worker   }
1454*6be67779SAndroid Build Coastguard Worker   e->userData = userData;
1455*6be67779SAndroid Build Coastguard Worker   e->convert = convert;
1456*6be67779SAndroid Build Coastguard Worker   if (convert) {
1457*6be67779SAndroid Build Coastguard Worker     e->normal.isName2 = unknown_isName;
1458*6be67779SAndroid Build Coastguard Worker     e->normal.isName3 = unknown_isName;
1459*6be67779SAndroid Build Coastguard Worker     e->normal.isName4 = unknown_isName;
1460*6be67779SAndroid Build Coastguard Worker     e->normal.isNmstrt2 = unknown_isNmstrt;
1461*6be67779SAndroid Build Coastguard Worker     e->normal.isNmstrt3 = unknown_isNmstrt;
1462*6be67779SAndroid Build Coastguard Worker     e->normal.isNmstrt4 = unknown_isNmstrt;
1463*6be67779SAndroid Build Coastguard Worker     e->normal.isInvalid2 = unknown_isInvalid;
1464*6be67779SAndroid Build Coastguard Worker     e->normal.isInvalid3 = unknown_isInvalid;
1465*6be67779SAndroid Build Coastguard Worker     e->normal.isInvalid4 = unknown_isInvalid;
1466*6be67779SAndroid Build Coastguard Worker   }
1467*6be67779SAndroid Build Coastguard Worker   e->normal.enc.utf8Convert = unknown_toUtf8;
1468*6be67779SAndroid Build Coastguard Worker   e->normal.enc.utf16Convert = unknown_toUtf16;
1469*6be67779SAndroid Build Coastguard Worker   return &(e->normal.enc);
1470*6be67779SAndroid Build Coastguard Worker }
1471*6be67779SAndroid Build Coastguard Worker 
1472*6be67779SAndroid Build Coastguard Worker /* If this enumeration is changed, getEncodingIndex and encodings
1473*6be67779SAndroid Build Coastguard Worker must also be changed. */
1474*6be67779SAndroid Build Coastguard Worker enum {
1475*6be67779SAndroid Build Coastguard Worker   UNKNOWN_ENC = -1,
1476*6be67779SAndroid Build Coastguard Worker   ISO_8859_1_ENC = 0,
1477*6be67779SAndroid Build Coastguard Worker   US_ASCII_ENC,
1478*6be67779SAndroid Build Coastguard Worker   UTF_8_ENC,
1479*6be67779SAndroid Build Coastguard Worker   UTF_16_ENC,
1480*6be67779SAndroid Build Coastguard Worker   UTF_16BE_ENC,
1481*6be67779SAndroid Build Coastguard Worker   UTF_16LE_ENC,
1482*6be67779SAndroid Build Coastguard Worker   /* must match encodingNames up to here */
1483*6be67779SAndroid Build Coastguard Worker   NO_ENC
1484*6be67779SAndroid Build Coastguard Worker };
1485*6be67779SAndroid Build Coastguard Worker 
1486*6be67779SAndroid Build Coastguard Worker static const char KW_ISO_8859_1[]
1487*6be67779SAndroid Build Coastguard Worker     = {ASCII_I, ASCII_S, ASCII_O,     ASCII_MINUS, ASCII_8, ASCII_8,
1488*6be67779SAndroid Build Coastguard Worker        ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1,     '\0'};
1489*6be67779SAndroid Build Coastguard Worker static const char KW_US_ASCII[]
1490*6be67779SAndroid Build Coastguard Worker     = {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S,
1491*6be67779SAndroid Build Coastguard Worker        ASCII_C, ASCII_I, ASCII_I,     '\0'};
1492*6be67779SAndroid Build Coastguard Worker static const char KW_UTF_8[]
1493*6be67779SAndroid Build Coastguard Worker     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'};
1494*6be67779SAndroid Build Coastguard Worker static const char KW_UTF_16[]
1495*6be67779SAndroid Build Coastguard Worker     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'};
1496*6be67779SAndroid Build Coastguard Worker static const char KW_UTF_16BE[]
1497*6be67779SAndroid Build Coastguard Worker     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
1498*6be67779SAndroid Build Coastguard Worker        ASCII_6, ASCII_B, ASCII_E, '\0'};
1499*6be67779SAndroid Build Coastguard Worker static const char KW_UTF_16LE[]
1500*6be67779SAndroid Build Coastguard Worker     = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
1501*6be67779SAndroid Build Coastguard Worker        ASCII_6, ASCII_L, ASCII_E, '\0'};
1502*6be67779SAndroid Build Coastguard Worker 
1503*6be67779SAndroid Build Coastguard Worker static int FASTCALL
getEncodingIndex(const char * name)1504*6be67779SAndroid Build Coastguard Worker getEncodingIndex(const char *name) {
1505*6be67779SAndroid Build Coastguard Worker   static const char *const encodingNames[] = {
1506*6be67779SAndroid Build Coastguard Worker       KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE,
1507*6be67779SAndroid Build Coastguard Worker   };
1508*6be67779SAndroid Build Coastguard Worker   int i;
1509*6be67779SAndroid Build Coastguard Worker   if (name == NULL)
1510*6be67779SAndroid Build Coastguard Worker     return NO_ENC;
1511*6be67779SAndroid Build Coastguard Worker   for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++)
1512*6be67779SAndroid Build Coastguard Worker     if (streqci(name, encodingNames[i]))
1513*6be67779SAndroid Build Coastguard Worker       return i;
1514*6be67779SAndroid Build Coastguard Worker   return UNKNOWN_ENC;
1515*6be67779SAndroid Build Coastguard Worker }
1516*6be67779SAndroid Build Coastguard Worker 
1517*6be67779SAndroid Build Coastguard Worker /* For binary compatibility, we store the index of the encoding
1518*6be67779SAndroid Build Coastguard Worker    specified at initialization in the isUtf16 member.
1519*6be67779SAndroid Build Coastguard Worker */
1520*6be67779SAndroid Build Coastguard Worker 
1521*6be67779SAndroid Build Coastguard Worker #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
1522*6be67779SAndroid Build Coastguard Worker #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
1523*6be67779SAndroid Build Coastguard Worker 
1524*6be67779SAndroid Build Coastguard Worker /* This is what detects the encoding.  encodingTable maps from
1525*6be67779SAndroid Build Coastguard Worker    encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
1526*6be67779SAndroid Build Coastguard Worker    the external (protocol) specified encoding; state is
1527*6be67779SAndroid Build Coastguard Worker    XML_CONTENT_STATE if we're parsing an external text entity, and
1528*6be67779SAndroid Build Coastguard Worker    XML_PROLOG_STATE otherwise.
1529*6be67779SAndroid Build Coastguard Worker */
1530*6be67779SAndroid Build Coastguard Worker 
1531*6be67779SAndroid Build Coastguard Worker static int
initScan(const ENCODING * const * encodingTable,const INIT_ENCODING * enc,int state,const char * ptr,const char * end,const char ** nextTokPtr)1532*6be67779SAndroid Build Coastguard Worker initScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc,
1533*6be67779SAndroid Build Coastguard Worker          int state, const char *ptr, const char *end, const char **nextTokPtr) {
1534*6be67779SAndroid Build Coastguard Worker   const ENCODING **encPtr;
1535*6be67779SAndroid Build Coastguard Worker 
1536*6be67779SAndroid Build Coastguard Worker   if (ptr >= end)
1537*6be67779SAndroid Build Coastguard Worker     return XML_TOK_NONE;
1538*6be67779SAndroid Build Coastguard Worker   encPtr = enc->encPtr;
1539*6be67779SAndroid Build Coastguard Worker   if (ptr + 1 == end) {
1540*6be67779SAndroid Build Coastguard Worker     /* only a single byte available for auto-detection */
1541*6be67779SAndroid Build Coastguard Worker #ifndef XML_DTD /* FIXME */
1542*6be67779SAndroid Build Coastguard Worker     /* a well-formed document entity must have more than one byte */
1543*6be67779SAndroid Build Coastguard Worker     if (state != XML_CONTENT_STATE)
1544*6be67779SAndroid Build Coastguard Worker       return XML_TOK_PARTIAL;
1545*6be67779SAndroid Build Coastguard Worker #endif
1546*6be67779SAndroid Build Coastguard Worker     /* so we're parsing an external text entity... */
1547*6be67779SAndroid Build Coastguard Worker     /* if UTF-16 was externally specified, then we need at least 2 bytes */
1548*6be67779SAndroid Build Coastguard Worker     switch (INIT_ENC_INDEX(enc)) {
1549*6be67779SAndroid Build Coastguard Worker     case UTF_16_ENC:
1550*6be67779SAndroid Build Coastguard Worker     case UTF_16LE_ENC:
1551*6be67779SAndroid Build Coastguard Worker     case UTF_16BE_ENC:
1552*6be67779SAndroid Build Coastguard Worker       return XML_TOK_PARTIAL;
1553*6be67779SAndroid Build Coastguard Worker     }
1554*6be67779SAndroid Build Coastguard Worker     switch ((unsigned char)*ptr) {
1555*6be67779SAndroid Build Coastguard Worker     case 0xFE:
1556*6be67779SAndroid Build Coastguard Worker     case 0xFF:
1557*6be67779SAndroid Build Coastguard Worker     case 0xEF: /* possibly first byte of UTF-8 BOM */
1558*6be67779SAndroid Build Coastguard Worker       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
1559*6be67779SAndroid Build Coastguard Worker         break;
1560*6be67779SAndroid Build Coastguard Worker       /* fall through */
1561*6be67779SAndroid Build Coastguard Worker     case 0x00:
1562*6be67779SAndroid Build Coastguard Worker     case 0x3C:
1563*6be67779SAndroid Build Coastguard Worker       return XML_TOK_PARTIAL;
1564*6be67779SAndroid Build Coastguard Worker     }
1565*6be67779SAndroid Build Coastguard Worker   } else {
1566*6be67779SAndroid Build Coastguard Worker     switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
1567*6be67779SAndroid Build Coastguard Worker     case 0xFEFF:
1568*6be67779SAndroid Build Coastguard Worker       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
1569*6be67779SAndroid Build Coastguard Worker         break;
1570*6be67779SAndroid Build Coastguard Worker       *nextTokPtr = ptr + 2;
1571*6be67779SAndroid Build Coastguard Worker       *encPtr = encodingTable[UTF_16BE_ENC];
1572*6be67779SAndroid Build Coastguard Worker       return XML_TOK_BOM;
1573*6be67779SAndroid Build Coastguard Worker     /* 00 3C is handled in the default case */
1574*6be67779SAndroid Build Coastguard Worker     case 0x3C00:
1575*6be67779SAndroid Build Coastguard Worker       if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
1576*6be67779SAndroid Build Coastguard Worker            || INIT_ENC_INDEX(enc) == UTF_16_ENC)
1577*6be67779SAndroid Build Coastguard Worker           && state == XML_CONTENT_STATE)
1578*6be67779SAndroid Build Coastguard Worker         break;
1579*6be67779SAndroid Build Coastguard Worker       *encPtr = encodingTable[UTF_16LE_ENC];
1580*6be67779SAndroid Build Coastguard Worker       return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1581*6be67779SAndroid Build Coastguard Worker     case 0xFFFE:
1582*6be67779SAndroid Build Coastguard Worker       if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
1583*6be67779SAndroid Build Coastguard Worker         break;
1584*6be67779SAndroid Build Coastguard Worker       *nextTokPtr = ptr + 2;
1585*6be67779SAndroid Build Coastguard Worker       *encPtr = encodingTable[UTF_16LE_ENC];
1586*6be67779SAndroid Build Coastguard Worker       return XML_TOK_BOM;
1587*6be67779SAndroid Build Coastguard Worker     case 0xEFBB:
1588*6be67779SAndroid Build Coastguard Worker       /* Maybe a UTF-8 BOM (EF BB BF) */
1589*6be67779SAndroid Build Coastguard Worker       /* If there's an explicitly specified (external) encoding
1590*6be67779SAndroid Build Coastguard Worker          of ISO-8859-1 or some flavour of UTF-16
1591*6be67779SAndroid Build Coastguard Worker          and this is an external text entity,
1592*6be67779SAndroid Build Coastguard Worker          don't look for the BOM,
1593*6be67779SAndroid Build Coastguard Worker          because it might be a legal data.
1594*6be67779SAndroid Build Coastguard Worker       */
1595*6be67779SAndroid Build Coastguard Worker       if (state == XML_CONTENT_STATE) {
1596*6be67779SAndroid Build Coastguard Worker         int e = INIT_ENC_INDEX(enc);
1597*6be67779SAndroid Build Coastguard Worker         if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC
1598*6be67779SAndroid Build Coastguard Worker             || e == UTF_16_ENC)
1599*6be67779SAndroid Build Coastguard Worker           break;
1600*6be67779SAndroid Build Coastguard Worker       }
1601*6be67779SAndroid Build Coastguard Worker       if (ptr + 2 == end)
1602*6be67779SAndroid Build Coastguard Worker         return XML_TOK_PARTIAL;
1603*6be67779SAndroid Build Coastguard Worker       if ((unsigned char)ptr[2] == 0xBF) {
1604*6be67779SAndroid Build Coastguard Worker         *nextTokPtr = ptr + 3;
1605*6be67779SAndroid Build Coastguard Worker         *encPtr = encodingTable[UTF_8_ENC];
1606*6be67779SAndroid Build Coastguard Worker         return XML_TOK_BOM;
1607*6be67779SAndroid Build Coastguard Worker       }
1608*6be67779SAndroid Build Coastguard Worker       break;
1609*6be67779SAndroid Build Coastguard Worker     default:
1610*6be67779SAndroid Build Coastguard Worker       if (ptr[0] == '\0') {
1611*6be67779SAndroid Build Coastguard Worker         /* 0 isn't a legal data character. Furthermore a document
1612*6be67779SAndroid Build Coastguard Worker            entity can only start with ASCII characters.  So the only
1613*6be67779SAndroid Build Coastguard Worker            way this can fail to be big-endian UTF-16 if it it's an
1614*6be67779SAndroid Build Coastguard Worker            external parsed general entity that's labelled as
1615*6be67779SAndroid Build Coastguard Worker            UTF-16LE.
1616*6be67779SAndroid Build Coastguard Worker         */
1617*6be67779SAndroid Build Coastguard Worker         if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
1618*6be67779SAndroid Build Coastguard Worker           break;
1619*6be67779SAndroid Build Coastguard Worker         *encPtr = encodingTable[UTF_16BE_ENC];
1620*6be67779SAndroid Build Coastguard Worker         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1621*6be67779SAndroid Build Coastguard Worker       } else if (ptr[1] == '\0') {
1622*6be67779SAndroid Build Coastguard Worker         /* We could recover here in the case:
1623*6be67779SAndroid Build Coastguard Worker             - parsing an external entity
1624*6be67779SAndroid Build Coastguard Worker             - second byte is 0
1625*6be67779SAndroid Build Coastguard Worker             - no externally specified encoding
1626*6be67779SAndroid Build Coastguard Worker             - no encoding declaration
1627*6be67779SAndroid Build Coastguard Worker            by assuming UTF-16LE.  But we don't, because this would mean when
1628*6be67779SAndroid Build Coastguard Worker            presented just with a single byte, we couldn't reliably determine
1629*6be67779SAndroid Build Coastguard Worker            whether we needed further bytes.
1630*6be67779SAndroid Build Coastguard Worker         */
1631*6be67779SAndroid Build Coastguard Worker         if (state == XML_CONTENT_STATE)
1632*6be67779SAndroid Build Coastguard Worker           break;
1633*6be67779SAndroid Build Coastguard Worker         *encPtr = encodingTable[UTF_16LE_ENC];
1634*6be67779SAndroid Build Coastguard Worker         return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1635*6be67779SAndroid Build Coastguard Worker       }
1636*6be67779SAndroid Build Coastguard Worker       break;
1637*6be67779SAndroid Build Coastguard Worker     }
1638*6be67779SAndroid Build Coastguard Worker   }
1639*6be67779SAndroid Build Coastguard Worker   *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
1640*6be67779SAndroid Build Coastguard Worker   return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
1641*6be67779SAndroid Build Coastguard Worker }
1642*6be67779SAndroid Build Coastguard Worker 
1643*6be67779SAndroid Build Coastguard Worker #define NS(x) x
1644*6be67779SAndroid Build Coastguard Worker #define ns(x) x
1645*6be67779SAndroid Build Coastguard Worker #define XML_TOK_NS_C
1646*6be67779SAndroid Build Coastguard Worker #include "xmltok_ns.c"
1647*6be67779SAndroid Build Coastguard Worker #undef XML_TOK_NS_C
1648*6be67779SAndroid Build Coastguard Worker #undef NS
1649*6be67779SAndroid Build Coastguard Worker #undef ns
1650*6be67779SAndroid Build Coastguard Worker 
1651*6be67779SAndroid Build Coastguard Worker #ifdef XML_NS
1652*6be67779SAndroid Build Coastguard Worker 
1653*6be67779SAndroid Build Coastguard Worker #  define NS(x) x##NS
1654*6be67779SAndroid Build Coastguard Worker #  define ns(x) x##_ns
1655*6be67779SAndroid Build Coastguard Worker 
1656*6be67779SAndroid Build Coastguard Worker #  define XML_TOK_NS_C
1657*6be67779SAndroid Build Coastguard Worker #  include "xmltok_ns.c"
1658*6be67779SAndroid Build Coastguard Worker #  undef XML_TOK_NS_C
1659*6be67779SAndroid Build Coastguard Worker 
1660*6be67779SAndroid Build Coastguard Worker #  undef NS
1661*6be67779SAndroid Build Coastguard Worker #  undef ns
1662*6be67779SAndroid Build Coastguard Worker 
1663*6be67779SAndroid Build Coastguard Worker ENCODING *
XmlInitUnknownEncodingNS(void * mem,int * table,CONVERTER convert,void * userData)1664*6be67779SAndroid Build Coastguard Worker XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert,
1665*6be67779SAndroid Build Coastguard Worker                          void *userData) {
1666*6be67779SAndroid Build Coastguard Worker   ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
1667*6be67779SAndroid Build Coastguard Worker   if (enc)
1668*6be67779SAndroid Build Coastguard Worker     ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
1669*6be67779SAndroid Build Coastguard Worker   return enc;
1670*6be67779SAndroid Build Coastguard Worker }
1671*6be67779SAndroid Build Coastguard Worker 
1672*6be67779SAndroid Build Coastguard Worker #endif /* XML_NS */
1673