xref: /btstack/3rd-party/yxml/yxml.c (revision 630ffdd469bbec3276322f46b93e6cfdfcb21c27)
1 /* THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT! */
2 
3 /* Copyright (c) 2013-2014 Yoran Heling
4 
5   Permission is hereby granted, free of charge, to any person obtaining
6   a copy of this software and associated documentation files (the
7   "Software"), to deal in the Software without restriction, including
8   without limitation the rights to use, copy, modify, merge, publish,
9   distribute, sublicense, and/or sell copies of the Software, and to
10   permit persons to whom the Software is furnished to do so, subject to
11   the following conditions:
12 
13   The above copyright notice and this permission notice shall be included
14   in all copies or substantial portions of the Software.
15 
16   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22   SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24 
25 #include <yxml.h>
26 #include <string.h>
27 
28 typedef enum {
29 	YXMLS_string,
30 	YXMLS_attr0,
31 	YXMLS_attr1,
32 	YXMLS_attr2,
33 	YXMLS_attr3,
34 	YXMLS_attr4,
35 	YXMLS_cd0,
36 	YXMLS_cd1,
37 	YXMLS_cd2,
38 	YXMLS_comment0,
39 	YXMLS_comment1,
40 	YXMLS_comment2,
41 	YXMLS_comment3,
42 	YXMLS_comment4,
43 	YXMLS_dt0,
44 	YXMLS_dt1,
45 	YXMLS_dt2,
46 	YXMLS_dt3,
47 	YXMLS_dt4,
48 	YXMLS_elem0,
49 	YXMLS_elem1,
50 	YXMLS_elem2,
51 	YXMLS_elem3,
52 	YXMLS_enc0,
53 	YXMLS_enc1,
54 	YXMLS_enc2,
55 	YXMLS_enc3,
56 	YXMLS_etag0,
57 	YXMLS_etag1,
58 	YXMLS_etag2,
59 	YXMLS_init,
60 	YXMLS_le0,
61 	YXMLS_le1,
62 	YXMLS_le2,
63 	YXMLS_le3,
64 	YXMLS_lee1,
65 	YXMLS_lee2,
66 	YXMLS_leq0,
67 	YXMLS_misc0,
68 	YXMLS_misc1,
69 	YXMLS_misc2,
70 	YXMLS_misc2a,
71 	YXMLS_misc3,
72 	YXMLS_pi0,
73 	YXMLS_pi1,
74 	YXMLS_pi2,
75 	YXMLS_pi3,
76 	YXMLS_pi4,
77 	YXMLS_std0,
78 	YXMLS_std1,
79 	YXMLS_std2,
80 	YXMLS_std3,
81 	YXMLS_ver0,
82 	YXMLS_ver1,
83 	YXMLS_ver2,
84 	YXMLS_ver3,
85 	YXMLS_xmldecl0,
86 	YXMLS_xmldecl1,
87 	YXMLS_xmldecl2,
88 	YXMLS_xmldecl3,
89 	YXMLS_xmldecl4,
90 	YXMLS_xmldecl5,
91 	YXMLS_xmldecl6,
92 	YXMLS_xmldecl7,
93 	YXMLS_xmldecl8,
94 	YXMLS_xmldecl9
95 } yxml_state_t;
96 
97 
98 #define yxml_isChar(c) 1
99 /* 0xd should be part of SP, too, but yxml_parse() already normalizes that into 0xa */
100 #define yxml_isSP(c) (c == 0x20 || c == 0x09 || c == 0x0a)
101 #define yxml_isAlpha(c) ((c|32)-'a' < 26)
102 #define yxml_isNum(c) (c-'0' < 10)
103 #define yxml_isHex(c) (yxml_isNum(c) || (c|32)-'a' < 6)
104 #define yxml_isEncName(c) (yxml_isAlpha(c) || yxml_isNum(c) || c == '.' || c == '_' || c == '-')
105 #define yxml_isNameStart(c) (yxml_isAlpha(c) || c == ':' || c == '_' || c >= 128)
106 #define yxml_isName(c) (yxml_isNameStart(c) || yxml_isNum(c) || c == '-' || c == '.')
107 /* XXX: The valid characters are dependent on the quote char, hence the access to x->quote */
108 #define yxml_isAttValue(c) (yxml_isChar(c) && c != x->quote && c != '<' && c != '&')
109 /* Anything between '&' and ';', the yxml_ref* functions will do further
110  * validation. Strictly speaking, this is "yxml_isName(c) || c == '#'", but
111  * this parser doesn't understand entities with '.', ':', etc, anwyay.  */
112 #define yxml_isRef(c) (yxml_isNum(c) || yxml_isAlpha(c) || c == '#')
113 
114 #define INTFROM5CHARS(a, b, c, d, e) ((((uint64_t)(a))<<32) | (((uint64_t)(b))<<24) | (((uint64_t)(c))<<16) | (((uint64_t)(d))<<8) | (uint64_t)(e))
115 
116 
117 /* Set the given char value to ch (0<=ch<=255).
118  * This can't be done with simple assignment because char may be signed, and
119  * unsigned-to-signed overflow is implementation defined in C. This function
120  * /looks/ inefficient, but gcc compiles it down to a single movb instruction
121  * on x86, even with -O0. */
122 static inline void yxml_setchar(char *dest, unsigned ch) {
123 	unsigned char _ch = ch;
124 	memcpy(dest, &_ch, 1);
125 }
126 
127 
128 /* Similar to yxml_setchar(), but will convert ch (any valid unicode point) to
129  * UTF-8 and appends a '\0'. dest must have room for at least 5 bytes. */
130 static void yxml_setutf8(char *dest, unsigned ch) {
131 	if(ch <= 0x007F)
132 		yxml_setchar(dest++, ch);
133 	else if(ch <= 0x07FF) {
134 		yxml_setchar(dest++, 0xC0 | (ch>>6));
135 		yxml_setchar(dest++, 0x80 | (ch & 0x3F));
136 	} else if(ch <= 0xFFFF) {
137 		yxml_setchar(dest++, 0xE0 | (ch>>12));
138 		yxml_setchar(dest++, 0x80 | ((ch>>6) & 0x3F));
139 		yxml_setchar(dest++, 0x80 | (ch & 0x3F));
140 	} else {
141 		yxml_setchar(dest++, 0xF0 | (ch>>18));
142 		yxml_setchar(dest++, 0x80 | ((ch>>12) & 0x3F));
143 		yxml_setchar(dest++, 0x80 | ((ch>>6) & 0x3F));
144 		yxml_setchar(dest++, 0x80 | (ch & 0x3F));
145 	}
146 	*dest = 0;
147 }
148 
149 
150 static inline yxml_ret_t yxml_datacontent(yxml_t *x, unsigned ch) {
151 	yxml_setchar(x->data, ch);
152 	x->data[1] = 0;
153 	return YXML_CONTENT;
154 }
155 
156 
157 static inline yxml_ret_t yxml_datapi1(yxml_t *x, unsigned ch) {
158 	yxml_setchar(x->data, ch);
159 	x->data[1] = 0;
160 	return YXML_PICONTENT;
161 }
162 
163 
164 static inline yxml_ret_t yxml_datapi2(yxml_t *x, unsigned ch) {
165 	x->data[0] = '?';
166 	yxml_setchar(x->data+1, ch);
167 	x->data[2] = 0;
168 	return YXML_PICONTENT;
169 }
170 
171 
172 static inline yxml_ret_t yxml_datacd1(yxml_t *x, unsigned ch) {
173 	x->data[0] = ']';
174 	yxml_setchar(x->data+1, ch);
175 	x->data[2] = 0;
176 	return YXML_CONTENT;
177 }
178 
179 
180 static inline yxml_ret_t yxml_datacd2(yxml_t *x, unsigned ch) {
181 	x->data[0] = ']';
182 	x->data[1] = ']';
183 	yxml_setchar(x->data+2, ch);
184 	x->data[3] = 0;
185 	return YXML_CONTENT;
186 }
187 
188 
189 static inline yxml_ret_t yxml_dataattr(yxml_t *x, unsigned ch) {
190 	/* Normalize attribute values according to the XML spec section 3.3.3. */
191 	yxml_setchar(x->data, ch == 0x9 || ch == 0xa ? 0x20 : ch);
192 	x->data[1] = 0;
193 	return YXML_ATTRVAL;
194 }
195 
196 
197 static yxml_ret_t yxml_pushstack(yxml_t *x, char **res, unsigned ch) {
198 	if(x->stacklen+2 >= x->stacksize)
199 		return YXML_ESTACK;
200 	x->stacklen++;
201 	*res = (char *)x->stack+x->stacklen;
202 	x->stack[x->stacklen] = ch;
203 	x->stacklen++;
204 	x->stack[x->stacklen] = 0;
205 	return YXML_OK;
206 }
207 
208 
209 static yxml_ret_t yxml_pushstackc(yxml_t *x, unsigned ch) {
210 	if(x->stacklen+1 >= x->stacksize)
211 		return YXML_ESTACK;
212 	x->stack[x->stacklen] = ch;
213 	x->stacklen++;
214 	x->stack[x->stacklen] = 0;
215 	return YXML_OK;
216 }
217 
218 
219 static void yxml_popstack(yxml_t *x) {
220 	do
221 		x->stacklen--;
222 	while(x->stack[x->stacklen]);
223 }
224 
225 
226 static inline yxml_ret_t yxml_elemstart  (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->elem, ch); }
227 static inline yxml_ret_t yxml_elemname   (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); }
228 static inline yxml_ret_t yxml_elemnameend(yxml_t *x, unsigned ch) { (void) x; (void) ch; return YXML_ELEMSTART; }
229 
230 
231 /* Also used in yxml_elemcloseend(), since this function just removes the last
232  * element from the stack and returns ELEMEND. */
233 static yxml_ret_t yxml_selfclose(yxml_t *x, unsigned ch) {
234 	(void) ch;
235 	yxml_popstack(x);
236 	if(x->stacklen) {
237 		x->elem = (char *)x->stack+x->stacklen-1;
238 		while(*(x->elem-1))
239 			x->elem--;
240 		return YXML_ELEMEND;
241 	}
242 	x->elem = (char *)x->stack;
243 	x->state = YXMLS_misc3;
244 	return YXML_ELEMEND;
245 }
246 
247 
248 static inline yxml_ret_t yxml_elemclose(yxml_t *x, unsigned ch) {
249 	if(*((unsigned char *)x->elem) != ch)
250 		return YXML_ECLOSE;
251 	x->elem++;
252 	return YXML_OK;
253 }
254 
255 
256 static inline yxml_ret_t yxml_elemcloseend(yxml_t *x, unsigned ch) {
257 	if(*x->elem)
258 		return YXML_ECLOSE;
259 	return yxml_selfclose(x, ch);
260 }
261 
262 
263 static inline yxml_ret_t yxml_attrstart  (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->attr, ch); }
264 static inline yxml_ret_t yxml_attrname   (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); }
265 static inline yxml_ret_t yxml_attrnameend(yxml_t *x, unsigned ch) { (void) x; (void) ch; return YXML_ATTRSTART; }
266 static inline yxml_ret_t yxml_attrvalend (yxml_t *x, unsigned ch) { (void) ch; yxml_popstack(x); return YXML_ATTREND; }
267 
268 
269 static inline yxml_ret_t yxml_pistart  (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->pi, ch); }
270 static inline yxml_ret_t yxml_piname   (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); }
271 static inline yxml_ret_t yxml_piabort  (yxml_t *x, unsigned ch) { (void) x; (void) ch; yxml_popstack(x); return YXML_OK; }
272 static inline yxml_ret_t yxml_pinameend(yxml_t *x, unsigned ch) { (void) ch;
273 	return (x->pi[0]|32) == 'x' && (x->pi[1]|32) == 'm' && (x->pi[2]|32) == 'l' && !x->pi[3] ? YXML_ESYN : YXML_PISTART;
274 }
275 static inline yxml_ret_t yxml_pivalend (yxml_t *x, unsigned ch) { (void) ch; yxml_popstack(x); x->pi = (char *)x->stack; return YXML_PIEND; }
276 
277 
278 static inline yxml_ret_t yxml_refstart(yxml_t *x, unsigned ch) {
279 	(void) ch;
280 	memset(x->data, 0, sizeof(x->data));
281 	x->reflen = 0;
282 	return YXML_OK;
283 }
284 
285 
286 static yxml_ret_t yxml_ref(yxml_t *x, unsigned ch) {
287 	if(x->reflen >= sizeof(x->data)-1)
288 		return YXML_EREF;
289 	yxml_setchar(x->data+x->reflen, ch);
290 	x->reflen++;
291 	return YXML_OK;
292 }
293 
294 
295 static yxml_ret_t yxml_refend(yxml_t *x, yxml_ret_t ret) {
296 	unsigned char *r = (unsigned char *)x->data;
297 	unsigned ch = 0;
298 	if(*r == '#') {
299 		if(r[1] == 'x')
300 			for(r += 2; yxml_isHex((unsigned)*r); r++)
301 				ch = (ch<<4) + (*r <= '9' ? *r-'0' : (*r|32)-'a' + 10);
302 		else
303 			for(r++; yxml_isNum((unsigned)*r); r++)
304 				ch = (ch*10) + (*r-'0');
305 		if(*r)
306 			ch = 0;
307 	} else {
308 		uint64_t i = INTFROM5CHARS(r[0], r[1], r[2], r[3], r[4]);
309 		ch =
310 			i == INTFROM5CHARS('l','t', 0,  0, 0) ? '<' :
311 			i == INTFROM5CHARS('g','t', 0,  0, 0) ? '>' :
312 			i == INTFROM5CHARS('a','m','p', 0, 0) ? '&' :
313 			i == INTFROM5CHARS('a','p','o','s',0) ? '\'':
314 			i == INTFROM5CHARS('q','u','o','t',0) ? '"' : 0;
315 	}
316 
317 	/* Codepoints not allowed in the XML 1.1 definition of a Char */
318 	if(!ch || ch > 0x10FFFF || ch == 0xFFFE || ch == 0xFFFF || (ch-0xDFFF) < 0x7FF)
319 		return YXML_EREF;
320 	yxml_setutf8(x->data, ch);
321 	return ret;
322 }
323 
324 
325 static inline yxml_ret_t yxml_refcontent(yxml_t *x, unsigned ch) { (void) ch; return yxml_refend(x, YXML_CONTENT); }
326 static inline yxml_ret_t yxml_refattrval(yxml_t *x, unsigned ch) { (void) ch; return yxml_refend(x, YXML_ATTRVAL); }
327 
328 
329 void yxml_init(yxml_t *x, void *stack, size_t stacksize) {
330 	memset(x, 0, sizeof(*x));
331 	x->line = 1;
332 	x->stack = stack;
333 	x->stacksize = stacksize;
334 	*x->stack = 0;
335 	x->elem = x->pi = x->attr = (char *)x->stack;
336 	x->state = YXMLS_init;
337 }
338 
339 
340 yxml_ret_t yxml_parse(yxml_t *x, int _ch) {
341 	/* Ensure that characters are in the range of 0..255 rather than -126..125.
342 	 * All character comparisons are done with positive integers. */
343 	unsigned ch = (unsigned)(_ch+256) & 0xff;
344 	if(!ch)
345 		return YXML_ESYN;
346 	x->total++;
347 
348 	/* End-of-Line normalization, "\rX", "\r\n" and "\n" are recognized and
349 	 * normalized to a single '\n' as per XML 1.0 section 2.11. XML 1.1 adds
350 	 * some non-ASCII character sequences to this list, but we can only handle
351 	 * ASCII here without making assumptions about the input encoding. */
352 	if(x->ignore == ch) {
353 		x->ignore = 0;
354 		return YXML_OK;
355 	}
356 	x->ignore = (ch == 0xd) * 0xa;
357 	if(ch == 0xa || ch == 0xd) {
358 		ch = 0xa;
359 		x->line++;
360 		x->byte = 0;
361 	}
362 	x->byte++;
363 
364 	switch((yxml_state_t)x->state) {
365 	case YXMLS_string:
366 		if(ch == *x->string) {
367 			x->string++;
368 			if(!*x->string)
369 				x->state = x->nextstate;
370 			return YXML_OK;
371 		}
372 		break;
373 	case YXMLS_attr0:
374 		if(yxml_isName(ch))
375 			return yxml_attrname(x, ch);
376 		if(yxml_isSP(ch)) {
377 			x->state = YXMLS_attr1;
378 			return yxml_attrnameend(x, ch);
379 		}
380 		if(ch == (unsigned char)'=') {
381 			x->state = YXMLS_attr2;
382 			return yxml_attrnameend(x, ch);
383 		}
384 		break;
385 	case YXMLS_attr1:
386 		if(yxml_isSP(ch))
387 			return YXML_OK;
388 		if(ch == (unsigned char)'=') {
389 			x->state = YXMLS_attr2;
390 			return YXML_OK;
391 		}
392 		break;
393 	case YXMLS_attr2:
394 		if(yxml_isSP(ch))
395 			return YXML_OK;
396 		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
397 			x->state = YXMLS_attr3;
398 			x->quote = ch;
399 			return YXML_OK;
400 		}
401 		break;
402 	case YXMLS_attr3:
403 		if(yxml_isAttValue(ch))
404 			return yxml_dataattr(x, ch);
405 		if(ch == (unsigned char)'&') {
406 			x->state = YXMLS_attr4;
407 			return yxml_refstart(x, ch);
408 		}
409 		if(x->quote == ch) {
410 			x->state = YXMLS_elem2;
411 			return yxml_attrvalend(x, ch);
412 		}
413 		break;
414 	case YXMLS_attr4:
415 		if(yxml_isRef(ch))
416 			return yxml_ref(x, ch);
417 		if(ch == (unsigned char)'\x3b') {
418 			x->state = YXMLS_attr3;
419 			return yxml_refattrval(x, ch);
420 		}
421 		break;
422 	case YXMLS_cd0:
423 		if(ch == (unsigned char)']') {
424 			x->state = YXMLS_cd1;
425 			return YXML_OK;
426 		}
427 		if(yxml_isChar(ch))
428 			return yxml_datacontent(x, ch);
429 		break;
430 	case YXMLS_cd1:
431 		if(ch == (unsigned char)']') {
432 			x->state = YXMLS_cd2;
433 			return YXML_OK;
434 		}
435 		if(yxml_isChar(ch)) {
436 			x->state = YXMLS_cd0;
437 			return yxml_datacd1(x, ch);
438 		}
439 		break;
440 	case YXMLS_cd2:
441 		if(ch == (unsigned char)']')
442 			return yxml_datacontent(x, ch);
443 		if(ch == (unsigned char)'>') {
444 			x->state = YXMLS_misc2;
445 			return YXML_OK;
446 		}
447 		if(yxml_isChar(ch)) {
448 			x->state = YXMLS_cd0;
449 			return yxml_datacd2(x, ch);
450 		}
451 		break;
452 	case YXMLS_comment0:
453 		if(ch == (unsigned char)'-') {
454 			x->state = YXMLS_comment1;
455 			return YXML_OK;
456 		}
457 		break;
458 	case YXMLS_comment1:
459 		if(ch == (unsigned char)'-') {
460 			x->state = YXMLS_comment2;
461 			return YXML_OK;
462 		}
463 		break;
464 	case YXMLS_comment2:
465 		if(ch == (unsigned char)'-') {
466 			x->state = YXMLS_comment3;
467 			return YXML_OK;
468 		}
469 		if(yxml_isChar(ch))
470 			return YXML_OK;
471 		break;
472 	case YXMLS_comment3:
473 		if(ch == (unsigned char)'-') {
474 			x->state = YXMLS_comment4;
475 			return YXML_OK;
476 		}
477 		if(yxml_isChar(ch)) {
478 			x->state = YXMLS_comment2;
479 			return YXML_OK;
480 		}
481 		break;
482 	case YXMLS_comment4:
483 		if(ch == (unsigned char)'>') {
484 			x->state = x->nextstate;
485 			return YXML_OK;
486 		}
487 		break;
488 	case YXMLS_dt0:
489 		if(ch == (unsigned char)'>') {
490 			x->state = YXMLS_misc1;
491 			return YXML_OK;
492 		}
493 		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
494 			x->state = YXMLS_dt1;
495 			x->quote = ch;
496 			x->nextstate = YXMLS_dt0;
497 			return YXML_OK;
498 		}
499 		if(ch == (unsigned char)'<') {
500 			x->state = YXMLS_dt2;
501 			return YXML_OK;
502 		}
503 		if(yxml_isChar(ch))
504 			return YXML_OK;
505 		break;
506 	case YXMLS_dt1:
507 		if(x->quote == ch) {
508 			x->state = x->nextstate;
509 			return YXML_OK;
510 		}
511 		if(yxml_isChar(ch))
512 			return YXML_OK;
513 		break;
514 	case YXMLS_dt2:
515 		if(ch == (unsigned char)'?') {
516 			x->state = YXMLS_pi0;
517 			x->nextstate = YXMLS_dt0;
518 			return YXML_OK;
519 		}
520 		if(ch == (unsigned char)'!') {
521 			x->state = YXMLS_dt3;
522 			return YXML_OK;
523 		}
524 		break;
525 	case YXMLS_dt3:
526 		if(ch == (unsigned char)'-') {
527 			x->state = YXMLS_comment1;
528 			x->nextstate = YXMLS_dt0;
529 			return YXML_OK;
530 		}
531 		if(yxml_isChar(ch)) {
532 			x->state = YXMLS_dt4;
533 			return YXML_OK;
534 		}
535 		break;
536 	case YXMLS_dt4:
537 		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
538 			x->state = YXMLS_dt1;
539 			x->quote = ch;
540 			x->nextstate = YXMLS_dt4;
541 			return YXML_OK;
542 		}
543 		if(ch == (unsigned char)'>') {
544 			x->state = YXMLS_dt0;
545 			return YXML_OK;
546 		}
547 		if(yxml_isChar(ch))
548 			return YXML_OK;
549 		break;
550 	case YXMLS_elem0:
551 		if(yxml_isName(ch))
552 			return yxml_elemname(x, ch);
553 		if(yxml_isSP(ch)) {
554 			x->state = YXMLS_elem1;
555 			return yxml_elemnameend(x, ch);
556 		}
557 		if(ch == (unsigned char)'/') {
558 			x->state = YXMLS_elem3;
559 			return yxml_elemnameend(x, ch);
560 		}
561 		if(ch == (unsigned char)'>') {
562 			x->state = YXMLS_misc2;
563 			return yxml_elemnameend(x, ch);
564 		}
565 		break;
566 	case YXMLS_elem1:
567 		if(yxml_isSP(ch))
568 			return YXML_OK;
569 		if(ch == (unsigned char)'/') {
570 			x->state = YXMLS_elem3;
571 			return YXML_OK;
572 		}
573 		if(ch == (unsigned char)'>') {
574 			x->state = YXMLS_misc2;
575 			return YXML_OK;
576 		}
577 		if(yxml_isNameStart(ch)) {
578 			x->state = YXMLS_attr0;
579 			return yxml_attrstart(x, ch);
580 		}
581 		break;
582 	case YXMLS_elem2:
583 		if(yxml_isSP(ch)) {
584 			x->state = YXMLS_elem1;
585 			return YXML_OK;
586 		}
587 		if(ch == (unsigned char)'/') {
588 			x->state = YXMLS_elem3;
589 			return YXML_OK;
590 		}
591 		if(ch == (unsigned char)'>') {
592 			x->state = YXMLS_misc2;
593 			return YXML_OK;
594 		}
595 		break;
596 	case YXMLS_elem3:
597 		if(ch == (unsigned char)'>') {
598 			x->state = YXMLS_misc2;
599 			return yxml_selfclose(x, ch);
600 		}
601 		break;
602 	case YXMLS_enc0:
603 		if(yxml_isSP(ch))
604 			return YXML_OK;
605 		if(ch == (unsigned char)'=') {
606 			x->state = YXMLS_enc1;
607 			return YXML_OK;
608 		}
609 		break;
610 	case YXMLS_enc1:
611 		if(yxml_isSP(ch))
612 			return YXML_OK;
613 		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
614 			x->state = YXMLS_enc2;
615 			x->quote = ch;
616 			return YXML_OK;
617 		}
618 		break;
619 	case YXMLS_enc2:
620 		if(yxml_isAlpha(ch)) {
621 			x->state = YXMLS_enc3;
622 			return YXML_OK;
623 		}
624 		break;
625 	case YXMLS_enc3:
626 		if(yxml_isEncName(ch))
627 			return YXML_OK;
628 		if(x->quote == ch) {
629 			x->state = YXMLS_xmldecl6;
630 			return YXML_OK;
631 		}
632 		break;
633 	case YXMLS_etag0:
634 		if(yxml_isNameStart(ch)) {
635 			x->state = YXMLS_etag1;
636 			return yxml_elemclose(x, ch);
637 		}
638 		break;
639 	case YXMLS_etag1:
640 		if(yxml_isName(ch))
641 			return yxml_elemclose(x, ch);
642 		if(yxml_isSP(ch)) {
643 			x->state = YXMLS_etag2;
644 			return yxml_elemcloseend(x, ch);
645 		}
646 		if(ch == (unsigned char)'>') {
647 			x->state = YXMLS_misc2;
648 			return yxml_elemcloseend(x, ch);
649 		}
650 		break;
651 	case YXMLS_etag2:
652 		if(yxml_isSP(ch))
653 			return YXML_OK;
654 		if(ch == (unsigned char)'>') {
655 			x->state = YXMLS_misc2;
656 			return YXML_OK;
657 		}
658 		break;
659 	case YXMLS_init:
660 		if(ch == (unsigned char)'\xef') {
661 			x->state = YXMLS_string;
662 			x->nextstate = YXMLS_misc0;
663 			x->string = (unsigned char *)"\xbb\xbf";
664 			return YXML_OK;
665 		}
666 		if(yxml_isSP(ch)) {
667 			x->state = YXMLS_misc0;
668 			return YXML_OK;
669 		}
670 		if(ch == (unsigned char)'<') {
671 			x->state = YXMLS_le0;
672 			return YXML_OK;
673 		}
674 		break;
675 	case YXMLS_le0:
676 		if(ch == (unsigned char)'!') {
677 			x->state = YXMLS_lee1;
678 			return YXML_OK;
679 		}
680 		if(ch == (unsigned char)'?') {
681 			x->state = YXMLS_leq0;
682 			return YXML_OK;
683 		}
684 		if(yxml_isNameStart(ch)) {
685 			x->state = YXMLS_elem0;
686 			return yxml_elemstart(x, ch);
687 		}
688 		break;
689 	case YXMLS_le1:
690 		if(ch == (unsigned char)'!') {
691 			x->state = YXMLS_lee1;
692 			return YXML_OK;
693 		}
694 		if(ch == (unsigned char)'?') {
695 			x->state = YXMLS_pi0;
696 			x->nextstate = YXMLS_misc1;
697 			return YXML_OK;
698 		}
699 		if(yxml_isNameStart(ch)) {
700 			x->state = YXMLS_elem0;
701 			return yxml_elemstart(x, ch);
702 		}
703 		break;
704 	case YXMLS_le2:
705 		if(ch == (unsigned char)'!') {
706 			x->state = YXMLS_lee2;
707 			return YXML_OK;
708 		}
709 		if(ch == (unsigned char)'?') {
710 			x->state = YXMLS_pi0;
711 			x->nextstate = YXMLS_misc2;
712 			return YXML_OK;
713 		}
714 		if(ch == (unsigned char)'/') {
715 			x->state = YXMLS_etag0;
716 			return YXML_OK;
717 		}
718 		if(yxml_isNameStart(ch)) {
719 			x->state = YXMLS_elem0;
720 			return yxml_elemstart(x, ch);
721 		}
722 		break;
723 	case YXMLS_le3:
724 		if(ch == (unsigned char)'!') {
725 			x->state = YXMLS_comment0;
726 			x->nextstate = YXMLS_misc3;
727 			return YXML_OK;
728 		}
729 		if(ch == (unsigned char)'?') {
730 			x->state = YXMLS_pi0;
731 			x->nextstate = YXMLS_misc3;
732 			return YXML_OK;
733 		}
734 		break;
735 	case YXMLS_lee1:
736 		if(ch == (unsigned char)'-') {
737 			x->state = YXMLS_comment1;
738 			x->nextstate = YXMLS_misc1;
739 			return YXML_OK;
740 		}
741 		if(ch == (unsigned char)'D') {
742 			x->state = YXMLS_string;
743 			x->nextstate = YXMLS_dt0;
744 			x->string = (unsigned char *)"OCTYPE";
745 			return YXML_OK;
746 		}
747 		break;
748 	case YXMLS_lee2:
749 		if(ch == (unsigned char)'-') {
750 			x->state = YXMLS_comment1;
751 			x->nextstate = YXMLS_misc2;
752 			return YXML_OK;
753 		}
754 		if(ch == (unsigned char)'[') {
755 			x->state = YXMLS_string;
756 			x->nextstate = YXMLS_cd0;
757 			x->string = (unsigned char *)"CDATA[";
758 			return YXML_OK;
759 		}
760 		break;
761 	case YXMLS_leq0:
762 		if(ch == (unsigned char)'x') {
763 			x->state = YXMLS_xmldecl0;
764 			x->nextstate = YXMLS_misc1;
765 			return yxml_pistart(x, ch);
766 		}
767 		if(yxml_isNameStart(ch)) {
768 			x->state = YXMLS_pi1;
769 			x->nextstate = YXMLS_misc1;
770 			return yxml_pistart(x, ch);
771 		}
772 		break;
773 	case YXMLS_misc0:
774 		if(yxml_isSP(ch))
775 			return YXML_OK;
776 		if(ch == (unsigned char)'<') {
777 			x->state = YXMLS_le0;
778 			return YXML_OK;
779 		}
780 		break;
781 	case YXMLS_misc1:
782 		if(yxml_isSP(ch))
783 			return YXML_OK;
784 		if(ch == (unsigned char)'<') {
785 			x->state = YXMLS_le1;
786 			return YXML_OK;
787 		}
788 		break;
789 	case YXMLS_misc2:
790 		if(ch == (unsigned char)'<') {
791 			x->state = YXMLS_le2;
792 			return YXML_OK;
793 		}
794 		if(ch == (unsigned char)'&') {
795 			x->state = YXMLS_misc2a;
796 			return yxml_refstart(x, ch);
797 		}
798 		if(yxml_isChar(ch))
799 			return yxml_datacontent(x, ch);
800 		break;
801 	case YXMLS_misc2a:
802 		if(yxml_isRef(ch))
803 			return yxml_ref(x, ch);
804 		if(ch == (unsigned char)'\x3b') {
805 			x->state = YXMLS_misc2;
806 			return yxml_refcontent(x, ch);
807 		}
808 		break;
809 	case YXMLS_misc3:
810 		if(yxml_isSP(ch))
811 			return YXML_OK;
812 		if(ch == (unsigned char)'<') {
813 			x->state = YXMLS_le3;
814 			return YXML_OK;
815 		}
816 		break;
817 	case YXMLS_pi0:
818 		if(yxml_isNameStart(ch)) {
819 			x->state = YXMLS_pi1;
820 			return yxml_pistart(x, ch);
821 		}
822 		break;
823 	case YXMLS_pi1:
824 		if(yxml_isName(ch))
825 			return yxml_piname(x, ch);
826 		if(ch == (unsigned char)'?') {
827 			x->state = YXMLS_pi4;
828 			return yxml_pinameend(x, ch);
829 		}
830 		if(yxml_isSP(ch)) {
831 			x->state = YXMLS_pi2;
832 			return yxml_pinameend(x, ch);
833 		}
834 		break;
835 	case YXMLS_pi2:
836 		if(ch == (unsigned char)'?') {
837 			x->state = YXMLS_pi3;
838 			return YXML_OK;
839 		}
840 		if(yxml_isChar(ch))
841 			return yxml_datapi1(x, ch);
842 		break;
843 	case YXMLS_pi3:
844 		if(ch == (unsigned char)'>') {
845 			x->state = x->nextstate;
846 			return yxml_pivalend(x, ch);
847 		}
848 		if(yxml_isChar(ch)) {
849 			x->state = YXMLS_pi2;
850 			return yxml_datapi2(x, ch);
851 		}
852 		break;
853 	case YXMLS_pi4:
854 		if(ch == (unsigned char)'>') {
855 			x->state = x->nextstate;
856 			return yxml_pivalend(x, ch);
857 		}
858 		break;
859 	case YXMLS_std0:
860 		if(yxml_isSP(ch))
861 			return YXML_OK;
862 		if(ch == (unsigned char)'=') {
863 			x->state = YXMLS_std1;
864 			return YXML_OK;
865 		}
866 		break;
867 	case YXMLS_std1:
868 		if(yxml_isSP(ch))
869 			return YXML_OK;
870 		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
871 			x->state = YXMLS_std2;
872 			x->quote = ch;
873 			return YXML_OK;
874 		}
875 		break;
876 	case YXMLS_std2:
877 		if(ch == (unsigned char)'y') {
878 			x->state = YXMLS_string;
879 			x->nextstate = YXMLS_std3;
880 			x->string = (unsigned char *)"es";
881 			return YXML_OK;
882 		}
883 		if(ch == (unsigned char)'n') {
884 			x->state = YXMLS_string;
885 			x->nextstate = YXMLS_std3;
886 			x->string = (unsigned char *)"o";
887 			return YXML_OK;
888 		}
889 		break;
890 	case YXMLS_std3:
891 		if(x->quote == ch) {
892 			x->state = YXMLS_xmldecl8;
893 			return YXML_OK;
894 		}
895 		break;
896 	case YXMLS_ver0:
897 		if(yxml_isSP(ch))
898 			return YXML_OK;
899 		if(ch == (unsigned char)'=') {
900 			x->state = YXMLS_ver1;
901 			return YXML_OK;
902 		}
903 		break;
904 	case YXMLS_ver1:
905 		if(yxml_isSP(ch))
906 			return YXML_OK;
907 		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
908 			x->state = YXMLS_string;
909 			x->quote = ch;
910 			x->nextstate = YXMLS_ver2;
911 			x->string = (unsigned char *)"1.";
912 			return YXML_OK;
913 		}
914 		break;
915 	case YXMLS_ver2:
916 		if(yxml_isNum(ch)) {
917 			x->state = YXMLS_ver3;
918 			return YXML_OK;
919 		}
920 		break;
921 	case YXMLS_ver3:
922 		if(yxml_isNum(ch))
923 			return YXML_OK;
924 		if(x->quote == ch) {
925 			x->state = YXMLS_xmldecl4;
926 			return YXML_OK;
927 		}
928 		break;
929 	case YXMLS_xmldecl0:
930 		if(ch == (unsigned char)'m') {
931 			x->state = YXMLS_xmldecl1;
932 			return yxml_piname(x, ch);
933 		}
934 		if(yxml_isName(ch)) {
935 			x->state = YXMLS_pi1;
936 			return yxml_piname(x, ch);
937 		}
938 		if(ch == (unsigned char)'?') {
939 			x->state = YXMLS_pi4;
940 			return yxml_pinameend(x, ch);
941 		}
942 		if(yxml_isSP(ch)) {
943 			x->state = YXMLS_pi2;
944 			return yxml_pinameend(x, ch);
945 		}
946 		break;
947 	case YXMLS_xmldecl1:
948 		if(ch == (unsigned char)'l') {
949 			x->state = YXMLS_xmldecl2;
950 			return yxml_piname(x, ch);
951 		}
952 		if(yxml_isName(ch)) {
953 			x->state = YXMLS_pi1;
954 			return yxml_piname(x, ch);
955 		}
956 		if(ch == (unsigned char)'?') {
957 			x->state = YXMLS_pi4;
958 			return yxml_pinameend(x, ch);
959 		}
960 		if(yxml_isSP(ch)) {
961 			x->state = YXMLS_pi2;
962 			return yxml_pinameend(x, ch);
963 		}
964 		break;
965 	case YXMLS_xmldecl2:
966 		if(yxml_isSP(ch)) {
967 			x->state = YXMLS_xmldecl3;
968 			return yxml_piabort(x, ch);
969 		}
970 		if(yxml_isName(ch)) {
971 			x->state = YXMLS_pi1;
972 			return yxml_piname(x, ch);
973 		}
974 		break;
975 	case YXMLS_xmldecl3:
976 		if(yxml_isSP(ch))
977 			return YXML_OK;
978 		if(ch == (unsigned char)'v') {
979 			x->state = YXMLS_string;
980 			x->nextstate = YXMLS_ver0;
981 			x->string = (unsigned char *)"ersion";
982 			return YXML_OK;
983 		}
984 		break;
985 	case YXMLS_xmldecl4:
986 		if(yxml_isSP(ch)) {
987 			x->state = YXMLS_xmldecl5;
988 			return YXML_OK;
989 		}
990 		if(ch == (unsigned char)'?') {
991 			x->state = YXMLS_xmldecl9;
992 			return YXML_OK;
993 		}
994 		break;
995 	case YXMLS_xmldecl5:
996 		if(yxml_isSP(ch))
997 			return YXML_OK;
998 		if(ch == (unsigned char)'?') {
999 			x->state = YXMLS_xmldecl9;
1000 			return YXML_OK;
1001 		}
1002 		if(ch == (unsigned char)'e') {
1003 			x->state = YXMLS_string;
1004 			x->nextstate = YXMLS_enc0;
1005 			x->string = (unsigned char *)"ncoding";
1006 			return YXML_OK;
1007 		}
1008 		if(ch == (unsigned char)'s') {
1009 			x->state = YXMLS_string;
1010 			x->nextstate = YXMLS_std0;
1011 			x->string = (unsigned char *)"tandalone";
1012 			return YXML_OK;
1013 		}
1014 		break;
1015 	case YXMLS_xmldecl6:
1016 		if(yxml_isSP(ch)) {
1017 			x->state = YXMLS_xmldecl7;
1018 			return YXML_OK;
1019 		}
1020 		if(ch == (unsigned char)'?') {
1021 			x->state = YXMLS_xmldecl9;
1022 			return YXML_OK;
1023 		}
1024 		break;
1025 	case YXMLS_xmldecl7:
1026 		if(yxml_isSP(ch))
1027 			return YXML_OK;
1028 		if(ch == (unsigned char)'?') {
1029 			x->state = YXMLS_xmldecl9;
1030 			return YXML_OK;
1031 		}
1032 		if(ch == (unsigned char)'s') {
1033 			x->state = YXMLS_string;
1034 			x->nextstate = YXMLS_std0;
1035 			x->string = (unsigned char *)"tandalone";
1036 			return YXML_OK;
1037 		}
1038 		break;
1039 	case YXMLS_xmldecl8:
1040 		if(yxml_isSP(ch))
1041 			return YXML_OK;
1042 		if(ch == (unsigned char)'?') {
1043 			x->state = YXMLS_xmldecl9;
1044 			return YXML_OK;
1045 		}
1046 		break;
1047 	case YXMLS_xmldecl9:
1048 		if(ch == (unsigned char)'>') {
1049 			x->state = YXMLS_misc1;
1050 			return YXML_OK;
1051 		}
1052 		break;
1053 	}
1054 	return YXML_ESYN;
1055 }
1056 
1057 
1058 yxml_ret_t yxml_eof(yxml_t *x) {
1059 	if(x->state != YXMLS_misc3)
1060 		return YXML_EEOF;
1061 	return YXML_OK;
1062 }
1063 
1064 
1065 /* vim: set noet sw=4 ts=4: */
1066