xref: /btstack/3rd-party/yxml/yxml.c (revision 6420389d1a158883e8ea2b4bc8a58cdca363bf96)
1 /* THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT! */
2 
3 /* Copyright (c) 2013-2014 Yoran Heling
4 
5   Permission is hereby granted, free of charge, to any person obtaining
6   a copy of this software and associated documentation files (the
7   "Software"), to deal in the Software without restriction, including
8   without limitation the rights to use, copy, modify, merge, publish,
9   distribute, sublicense, and/or sell copies of the Software, and to
10   permit persons to whom the Software is furnished to do so, subject to
11   the following conditions:
12 
13   The above copyright notice and this permission notice shall be included
14   in all copies or substantial portions of the Software.
15 
16   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19   IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20   CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22   SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24 
25 #include <yxml.h>
26 #include <string.h>
27 
28 typedef enum {
29 	YXMLS_string,
30 	YXMLS_attr0,
31 	YXMLS_attr1,
32 	YXMLS_attr2,
33 	YXMLS_attr3,
34 	YXMLS_attr4,
35 	YXMLS_cd0,
36 	YXMLS_cd1,
37 	YXMLS_cd2,
38 	YXMLS_comment0,
39 	YXMLS_comment1,
40 	YXMLS_comment2,
41 	YXMLS_comment3,
42 	YXMLS_comment4,
43 	YXMLS_dt0,
44 	YXMLS_dt1,
45 	YXMLS_dt2,
46 	YXMLS_dt3,
47 	YXMLS_dt4,
48 	YXMLS_elem0,
49 	YXMLS_elem1,
50 	YXMLS_elem2,
51 	YXMLS_elem3,
52 	YXMLS_enc0,
53 	YXMLS_enc1,
54 	YXMLS_enc2,
55 	YXMLS_enc3,
56 	YXMLS_etag0,
57 	YXMLS_etag1,
58 	YXMLS_etag2,
59 	YXMLS_init,
60 	YXMLS_le0,
61 	YXMLS_le1,
62 	YXMLS_le2,
63 	YXMLS_le3,
64 	YXMLS_lee1,
65 	YXMLS_lee2,
66 	YXMLS_leq0,
67 	YXMLS_misc0,
68 	YXMLS_misc1,
69 	YXMLS_misc2,
70 	YXMLS_misc2a,
71 	YXMLS_misc3,
72 	YXMLS_pi0,
73 	YXMLS_pi1,
74 	YXMLS_pi2,
75 	YXMLS_pi3,
76 	YXMLS_pi4,
77 	YXMLS_std0,
78 	YXMLS_std1,
79 	YXMLS_std2,
80 	YXMLS_std3,
81 	YXMLS_ver0,
82 	YXMLS_ver1,
83 	YXMLS_ver2,
84 	YXMLS_ver3,
85 	YXMLS_xmldecl0,
86 	YXMLS_xmldecl1,
87 	YXMLS_xmldecl2,
88 	YXMLS_xmldecl3,
89 	YXMLS_xmldecl4,
90 	YXMLS_xmldecl5,
91 	YXMLS_xmldecl6,
92 	YXMLS_xmldecl7,
93 	YXMLS_xmldecl8,
94 	YXMLS_xmldecl9
95 } yxml_state_t;
96 
97 
98 #define yxml_isChar(c) 1
99 /* 0xd should be part of SP, too, but yxml_parse() already normalizes that into 0xa */
100 #define yxml_isSP(c) (c == 0x20 || c == 0x09 || c == 0x0a)
101 #define yxml_isAlpha(c) ((c|32)-'a' < 26)
102 #define yxml_isNum(c) (c-'0' < 10)
103 #define yxml_isHex(c) (yxml_isNum(c) || (c|32)-'a' < 6)
104 #define yxml_isEncName(c) (yxml_isAlpha(c) || yxml_isNum(c) || c == '.' || c == '_' || c == '-')
105 #define yxml_isNameStart(c) (yxml_isAlpha(c) || c == ':' || c == '_' || c >= 128)
106 #define yxml_isName(c) (yxml_isNameStart(c) || yxml_isNum(c) || c == '-' || c == '.')
107 /* XXX: The valid characters are dependent on the quote char, hence the access to x->quote */
108 #define yxml_isAttValue(c) (yxml_isChar(c) && c != x->quote && c != '<' && c != '&')
109 /* Anything between '&' and ';', the yxml_ref* functions will do further
110  * validation. Strictly speaking, this is "yxml_isName(c) || c == '#'", but
111  * this parser doesn't understand entities with '.', ':', etc, anwyay.  */
112 #define yxml_isRef(c) (yxml_isNum(c) || yxml_isAlpha(c) || c == '#')
113 
114 #define INTFROM5CHARS(a, b, c, d, e) ((((uint64_t)(a))<<32) | (((uint64_t)(b))<<24) | (((uint64_t)(c))<<16) | (((uint64_t)(d))<<8) | (uint64_t)(e))
115 
116 
117 /* Set the given char value to ch (0<=ch<=255).
118  * This can't be done with simple assignment because char may be signed, and
119  * unsigned-to-signed overflow is implementation defined in C. This function
120  * /looks/ inefficient, but gcc compiles it down to a single movb instruction
121  * on x86, even with -O0. */
122 static inline void yxml_setchar(char *dest, unsigned ch) {
123 	unsigned char _ch = ch;
124 	memcpy(dest, &_ch, 1);
125 }
126 
127 
128 /* Similar to yxml_setchar(), but will convert ch (any valid unicode point) to
129  * UTF-8 and appends a '\0'. dest must have room for at least 5 bytes. */
130 static void yxml_setutf8(char *dest, unsigned ch) {
131 	if(ch <= 0x007F)
132 		yxml_setchar(dest++, ch);
133 	else if(ch <= 0x07FF) {
134 		yxml_setchar(dest++, 0xC0 | (ch>>6));
135 		yxml_setchar(dest++, 0x80 | (ch & 0x3F));
136 	} else if(ch <= 0xFFFF) {
137 		yxml_setchar(dest++, 0xE0 | (ch>>12));
138 		yxml_setchar(dest++, 0x80 | ((ch>>6) & 0x3F));
139 		yxml_setchar(dest++, 0x80 | (ch & 0x3F));
140 	} else {
141 		yxml_setchar(dest++, 0xF0 | (ch>>18));
142 		yxml_setchar(dest++, 0x80 | ((ch>>12) & 0x3F));
143 		yxml_setchar(dest++, 0x80 | ((ch>>6) & 0x3F));
144 		yxml_setchar(dest++, 0x80 | (ch & 0x3F));
145 	}
146 	*dest = 0;
147 }
148 
149 
150 static inline yxml_ret_t yxml_datacontent(yxml_t *x, unsigned ch) {
151 	yxml_setchar(x->data, ch);
152 	x->data[1] = 0;
153 	return YXML_CONTENT;
154 }
155 
156 
157 static inline yxml_ret_t yxml_datapi1(yxml_t *x, unsigned ch) {
158 	yxml_setchar(x->data, ch);
159 	x->data[1] = 0;
160 	return YXML_PICONTENT;
161 }
162 
163 
164 static inline yxml_ret_t yxml_datapi2(yxml_t *x, unsigned ch) {
165 	x->data[0] = '?';
166 	yxml_setchar(x->data+1, ch);
167 	x->data[2] = 0;
168 	return YXML_PICONTENT;
169 }
170 
171 
172 static inline yxml_ret_t yxml_datacd1(yxml_t *x, unsigned ch) {
173 	x->data[0] = ']';
174 	yxml_setchar(x->data+1, ch);
175 	x->data[2] = 0;
176 	return YXML_CONTENT;
177 }
178 
179 
180 static inline yxml_ret_t yxml_datacd2(yxml_t *x, unsigned ch) {
181 	x->data[0] = ']';
182 	x->data[1] = ']';
183 	yxml_setchar(x->data+2, ch);
184 	x->data[3] = 0;
185 	return YXML_CONTENT;
186 }
187 
188 
189 static inline yxml_ret_t yxml_dataattr(yxml_t *x, unsigned ch) {
190 	/* Normalize attribute values according to the XML spec section 3.3.3. */
191 	yxml_setchar(x->data, ch == 0x9 || ch == 0xa ? 0x20 : ch);
192 	x->data[1] = 0;
193 	return YXML_ATTRVAL;
194 }
195 
196 
197 static yxml_ret_t yxml_pushstack(yxml_t *x, char **res, unsigned ch) {
198 	if(x->stacklen+2 >= x->stacksize)
199 		return YXML_ESTACK;
200 	x->stacklen++;
201 	*res = (char *)x->stack+x->stacklen;
202 	x->stack[x->stacklen] = ch;
203 	x->stacklen++;
204 	x->stack[x->stacklen] = 0;
205 	return YXML_OK;
206 }
207 
208 
209 static yxml_ret_t yxml_pushstackc(yxml_t *x, unsigned ch) {
210 	if(x->stacklen+1 >= x->stacksize)
211 		return YXML_ESTACK;
212 	x->stack[x->stacklen] = ch;
213 	x->stacklen++;
214 	x->stack[x->stacklen] = 0;
215 	return YXML_OK;
216 }
217 
218 
219 static void yxml_popstack(yxml_t *x) {
220 	do
221 		x->stacklen--;
222 	while(x->stack[x->stacklen]);
223 }
224 
225 
226 static inline yxml_ret_t yxml_elemstart  (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->elem, ch); }
227 static inline yxml_ret_t yxml_elemname   (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); }
228 static inline yxml_ret_t yxml_elemnameend(yxml_t *x, unsigned ch) { return YXML_ELEMSTART; }
229 
230 
231 /* Also used in yxml_elemcloseend(), since this function just removes the last
232  * element from the stack and returns ELEMEND. */
233 static yxml_ret_t yxml_selfclose(yxml_t *x, unsigned ch) {
234 	yxml_popstack(x);
235 	if(x->stacklen) {
236 		x->elem = (char *)x->stack+x->stacklen-1;
237 		while(*(x->elem-1))
238 			x->elem--;
239 		return YXML_ELEMEND;
240 	}
241 	x->elem = (char *)x->stack;
242 	x->state = YXMLS_misc3;
243 	return YXML_ELEMEND;
244 }
245 
246 
247 static inline yxml_ret_t yxml_elemclose(yxml_t *x, unsigned ch) {
248 	if(*((unsigned char *)x->elem) != ch)
249 		return YXML_ECLOSE;
250 	x->elem++;
251 	return YXML_OK;
252 }
253 
254 
255 static inline yxml_ret_t yxml_elemcloseend(yxml_t *x, unsigned ch) {
256 	if(*x->elem)
257 		return YXML_ECLOSE;
258 	return yxml_selfclose(x, ch);
259 }
260 
261 
262 static inline yxml_ret_t yxml_attrstart  (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->attr, ch); }
263 static inline yxml_ret_t yxml_attrname   (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); }
264 static inline yxml_ret_t yxml_attrnameend(yxml_t *x, unsigned ch) { return YXML_ATTRSTART; }
265 static inline yxml_ret_t yxml_attrvalend (yxml_t *x, unsigned ch) { yxml_popstack(x); return YXML_ATTREND; }
266 
267 
268 static inline yxml_ret_t yxml_pistart  (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->pi, ch); }
269 static inline yxml_ret_t yxml_piname   (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); }
270 static inline yxml_ret_t yxml_piabort  (yxml_t *x, unsigned ch) { yxml_popstack(x); return YXML_OK; }
271 static inline yxml_ret_t yxml_pinameend(yxml_t *x, unsigned ch) {
272 	return (x->pi[0]|32) == 'x' && (x->pi[1]|32) == 'm' && (x->pi[2]|32) == 'l' && !x->pi[3] ? YXML_ESYN : YXML_PISTART;
273 }
274 static inline yxml_ret_t yxml_pivalend (yxml_t *x, unsigned ch) { yxml_popstack(x); x->pi = (char *)x->stack; return YXML_PIEND; }
275 
276 
277 static inline yxml_ret_t yxml_refstart(yxml_t *x, unsigned ch) {
278 	memset(x->data, 0, sizeof(x->data));
279 	x->reflen = 0;
280 	return YXML_OK;
281 }
282 
283 
284 static yxml_ret_t yxml_ref(yxml_t *x, unsigned ch) {
285 	if(x->reflen >= sizeof(x->data)-1)
286 		return YXML_EREF;
287 	yxml_setchar(x->data+x->reflen, ch);
288 	x->reflen++;
289 	return YXML_OK;
290 }
291 
292 
293 static yxml_ret_t yxml_refend(yxml_t *x, yxml_ret_t ret) {
294 	unsigned char *r = (unsigned char *)x->data;
295 	unsigned ch = 0;
296 	if(*r == '#') {
297 		if(r[1] == 'x')
298 			for(r += 2; yxml_isHex((unsigned)*r); r++)
299 				ch = (ch<<4) + (*r <= '9' ? *r-'0' : (*r|32)-'a' + 10);
300 		else
301 			for(r++; yxml_isNum((unsigned)*r); r++)
302 				ch = (ch*10) + (*r-'0');
303 		if(*r)
304 			ch = 0;
305 	} else {
306 		uint64_t i = INTFROM5CHARS(r[0], r[1], r[2], r[3], r[4]);
307 		ch =
308 			i == INTFROM5CHARS('l','t', 0,  0, 0) ? '<' :
309 			i == INTFROM5CHARS('g','t', 0,  0, 0) ? '>' :
310 			i == INTFROM5CHARS('a','m','p', 0, 0) ? '&' :
311 			i == INTFROM5CHARS('a','p','o','s',0) ? '\'':
312 			i == INTFROM5CHARS('q','u','o','t',0) ? '"' : 0;
313 	}
314 
315 	/* Codepoints not allowed in the XML 1.1 definition of a Char */
316 	if(!ch || ch > 0x10FFFF || ch == 0xFFFE || ch == 0xFFFF || (ch-0xDFFF) < 0x7FF)
317 		return YXML_EREF;
318 	yxml_setutf8(x->data, ch);
319 	return ret;
320 }
321 
322 
323 static inline yxml_ret_t yxml_refcontent(yxml_t *x, unsigned ch) { return yxml_refend(x, YXML_CONTENT); }
324 static inline yxml_ret_t yxml_refattrval(yxml_t *x, unsigned ch) { return yxml_refend(x, YXML_ATTRVAL); }
325 
326 
327 void yxml_init(yxml_t *x, void *stack, size_t stacksize) {
328 	memset(x, 0, sizeof(*x));
329 	x->line = 1;
330 	x->stack = stack;
331 	x->stacksize = stacksize;
332 	*x->stack = 0;
333 	x->elem = x->pi = x->attr = (char *)x->stack;
334 	x->state = YXMLS_init;
335 }
336 
337 
338 yxml_ret_t yxml_parse(yxml_t *x, int _ch) {
339 	/* Ensure that characters are in the range of 0..255 rather than -126..125.
340 	 * All character comparisons are done with positive integers. */
341 	unsigned ch = (unsigned)(_ch+256) & 0xff;
342 	if(!ch)
343 		return YXML_ESYN;
344 	x->total++;
345 
346 	/* End-of-Line normalization, "\rX", "\r\n" and "\n" are recognized and
347 	 * normalized to a single '\n' as per XML 1.0 section 2.11. XML 1.1 adds
348 	 * some non-ASCII character sequences to this list, but we can only handle
349 	 * ASCII here without making assumptions about the input encoding. */
350 	if(x->ignore == ch) {
351 		x->ignore = 0;
352 		return YXML_OK;
353 	}
354 	x->ignore = (ch == 0xd) * 0xa;
355 	if(ch == 0xa || ch == 0xd) {
356 		ch = 0xa;
357 		x->line++;
358 		x->byte = 0;
359 	}
360 	x->byte++;
361 
362 	switch((yxml_state_t)x->state) {
363 	case YXMLS_string:
364 		if(ch == *x->string) {
365 			x->string++;
366 			if(!*x->string)
367 				x->state = x->nextstate;
368 			return YXML_OK;
369 		}
370 		break;
371 	case YXMLS_attr0:
372 		if(yxml_isName(ch))
373 			return yxml_attrname(x, ch);
374 		if(yxml_isSP(ch)) {
375 			x->state = YXMLS_attr1;
376 			return yxml_attrnameend(x, ch);
377 		}
378 		if(ch == (unsigned char)'=') {
379 			x->state = YXMLS_attr2;
380 			return yxml_attrnameend(x, ch);
381 		}
382 		break;
383 	case YXMLS_attr1:
384 		if(yxml_isSP(ch))
385 			return YXML_OK;
386 		if(ch == (unsigned char)'=') {
387 			x->state = YXMLS_attr2;
388 			return YXML_OK;
389 		}
390 		break;
391 	case YXMLS_attr2:
392 		if(yxml_isSP(ch))
393 			return YXML_OK;
394 		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
395 			x->state = YXMLS_attr3;
396 			x->quote = ch;
397 			return YXML_OK;
398 		}
399 		break;
400 	case YXMLS_attr3:
401 		if(yxml_isAttValue(ch))
402 			return yxml_dataattr(x, ch);
403 		if(ch == (unsigned char)'&') {
404 			x->state = YXMLS_attr4;
405 			return yxml_refstart(x, ch);
406 		}
407 		if(x->quote == ch) {
408 			x->state = YXMLS_elem2;
409 			return yxml_attrvalend(x, ch);
410 		}
411 		break;
412 	case YXMLS_attr4:
413 		if(yxml_isRef(ch))
414 			return yxml_ref(x, ch);
415 		if(ch == (unsigned char)'\x3b') {
416 			x->state = YXMLS_attr3;
417 			return yxml_refattrval(x, ch);
418 		}
419 		break;
420 	case YXMLS_cd0:
421 		if(ch == (unsigned char)']') {
422 			x->state = YXMLS_cd1;
423 			return YXML_OK;
424 		}
425 		if(yxml_isChar(ch))
426 			return yxml_datacontent(x, ch);
427 		break;
428 	case YXMLS_cd1:
429 		if(ch == (unsigned char)']') {
430 			x->state = YXMLS_cd2;
431 			return YXML_OK;
432 		}
433 		if(yxml_isChar(ch)) {
434 			x->state = YXMLS_cd0;
435 			return yxml_datacd1(x, ch);
436 		}
437 		break;
438 	case YXMLS_cd2:
439 		if(ch == (unsigned char)']')
440 			return yxml_datacontent(x, ch);
441 		if(ch == (unsigned char)'>') {
442 			x->state = YXMLS_misc2;
443 			return YXML_OK;
444 		}
445 		if(yxml_isChar(ch)) {
446 			x->state = YXMLS_cd0;
447 			return yxml_datacd2(x, ch);
448 		}
449 		break;
450 	case YXMLS_comment0:
451 		if(ch == (unsigned char)'-') {
452 			x->state = YXMLS_comment1;
453 			return YXML_OK;
454 		}
455 		break;
456 	case YXMLS_comment1:
457 		if(ch == (unsigned char)'-') {
458 			x->state = YXMLS_comment2;
459 			return YXML_OK;
460 		}
461 		break;
462 	case YXMLS_comment2:
463 		if(ch == (unsigned char)'-') {
464 			x->state = YXMLS_comment3;
465 			return YXML_OK;
466 		}
467 		if(yxml_isChar(ch))
468 			return YXML_OK;
469 		break;
470 	case YXMLS_comment3:
471 		if(ch == (unsigned char)'-') {
472 			x->state = YXMLS_comment4;
473 			return YXML_OK;
474 		}
475 		if(yxml_isChar(ch)) {
476 			x->state = YXMLS_comment2;
477 			return YXML_OK;
478 		}
479 		break;
480 	case YXMLS_comment4:
481 		if(ch == (unsigned char)'>') {
482 			x->state = x->nextstate;
483 			return YXML_OK;
484 		}
485 		break;
486 	case YXMLS_dt0:
487 		if(ch == (unsigned char)'>') {
488 			x->state = YXMLS_misc1;
489 			return YXML_OK;
490 		}
491 		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
492 			x->state = YXMLS_dt1;
493 			x->quote = ch;
494 			x->nextstate = YXMLS_dt0;
495 			return YXML_OK;
496 		}
497 		if(ch == (unsigned char)'<') {
498 			x->state = YXMLS_dt2;
499 			return YXML_OK;
500 		}
501 		if(yxml_isChar(ch))
502 			return YXML_OK;
503 		break;
504 	case YXMLS_dt1:
505 		if(x->quote == ch) {
506 			x->state = x->nextstate;
507 			return YXML_OK;
508 		}
509 		if(yxml_isChar(ch))
510 			return YXML_OK;
511 		break;
512 	case YXMLS_dt2:
513 		if(ch == (unsigned char)'?') {
514 			x->state = YXMLS_pi0;
515 			x->nextstate = YXMLS_dt0;
516 			return YXML_OK;
517 		}
518 		if(ch == (unsigned char)'!') {
519 			x->state = YXMLS_dt3;
520 			return YXML_OK;
521 		}
522 		break;
523 	case YXMLS_dt3:
524 		if(ch == (unsigned char)'-') {
525 			x->state = YXMLS_comment1;
526 			x->nextstate = YXMLS_dt0;
527 			return YXML_OK;
528 		}
529 		if(yxml_isChar(ch)) {
530 			x->state = YXMLS_dt4;
531 			return YXML_OK;
532 		}
533 		break;
534 	case YXMLS_dt4:
535 		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
536 			x->state = YXMLS_dt1;
537 			x->quote = ch;
538 			x->nextstate = YXMLS_dt4;
539 			return YXML_OK;
540 		}
541 		if(ch == (unsigned char)'>') {
542 			x->state = YXMLS_dt0;
543 			return YXML_OK;
544 		}
545 		if(yxml_isChar(ch))
546 			return YXML_OK;
547 		break;
548 	case YXMLS_elem0:
549 		if(yxml_isName(ch))
550 			return yxml_elemname(x, ch);
551 		if(yxml_isSP(ch)) {
552 			x->state = YXMLS_elem1;
553 			return yxml_elemnameend(x, ch);
554 		}
555 		if(ch == (unsigned char)'/') {
556 			x->state = YXMLS_elem3;
557 			return yxml_elemnameend(x, ch);
558 		}
559 		if(ch == (unsigned char)'>') {
560 			x->state = YXMLS_misc2;
561 			return yxml_elemnameend(x, ch);
562 		}
563 		break;
564 	case YXMLS_elem1:
565 		if(yxml_isSP(ch))
566 			return YXML_OK;
567 		if(ch == (unsigned char)'/') {
568 			x->state = YXMLS_elem3;
569 			return YXML_OK;
570 		}
571 		if(ch == (unsigned char)'>') {
572 			x->state = YXMLS_misc2;
573 			return YXML_OK;
574 		}
575 		if(yxml_isNameStart(ch)) {
576 			x->state = YXMLS_attr0;
577 			return yxml_attrstart(x, ch);
578 		}
579 		break;
580 	case YXMLS_elem2:
581 		if(yxml_isSP(ch)) {
582 			x->state = YXMLS_elem1;
583 			return YXML_OK;
584 		}
585 		if(ch == (unsigned char)'/') {
586 			x->state = YXMLS_elem3;
587 			return YXML_OK;
588 		}
589 		if(ch == (unsigned char)'>') {
590 			x->state = YXMLS_misc2;
591 			return YXML_OK;
592 		}
593 		break;
594 	case YXMLS_elem3:
595 		if(ch == (unsigned char)'>') {
596 			x->state = YXMLS_misc2;
597 			return yxml_selfclose(x, ch);
598 		}
599 		break;
600 	case YXMLS_enc0:
601 		if(yxml_isSP(ch))
602 			return YXML_OK;
603 		if(ch == (unsigned char)'=') {
604 			x->state = YXMLS_enc1;
605 			return YXML_OK;
606 		}
607 		break;
608 	case YXMLS_enc1:
609 		if(yxml_isSP(ch))
610 			return YXML_OK;
611 		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
612 			x->state = YXMLS_enc2;
613 			x->quote = ch;
614 			return YXML_OK;
615 		}
616 		break;
617 	case YXMLS_enc2:
618 		if(yxml_isAlpha(ch)) {
619 			x->state = YXMLS_enc3;
620 			return YXML_OK;
621 		}
622 		break;
623 	case YXMLS_enc3:
624 		if(yxml_isEncName(ch))
625 			return YXML_OK;
626 		if(x->quote == ch) {
627 			x->state = YXMLS_xmldecl6;
628 			return YXML_OK;
629 		}
630 		break;
631 	case YXMLS_etag0:
632 		if(yxml_isNameStart(ch)) {
633 			x->state = YXMLS_etag1;
634 			return yxml_elemclose(x, ch);
635 		}
636 		break;
637 	case YXMLS_etag1:
638 		if(yxml_isName(ch))
639 			return yxml_elemclose(x, ch);
640 		if(yxml_isSP(ch)) {
641 			x->state = YXMLS_etag2;
642 			return yxml_elemcloseend(x, ch);
643 		}
644 		if(ch == (unsigned char)'>') {
645 			x->state = YXMLS_misc2;
646 			return yxml_elemcloseend(x, ch);
647 		}
648 		break;
649 	case YXMLS_etag2:
650 		if(yxml_isSP(ch))
651 			return YXML_OK;
652 		if(ch == (unsigned char)'>') {
653 			x->state = YXMLS_misc2;
654 			return YXML_OK;
655 		}
656 		break;
657 	case YXMLS_init:
658 		if(ch == (unsigned char)'\xef') {
659 			x->state = YXMLS_string;
660 			x->nextstate = YXMLS_misc0;
661 			x->string = (unsigned char *)"\xbb\xbf";
662 			return YXML_OK;
663 		}
664 		if(yxml_isSP(ch)) {
665 			x->state = YXMLS_misc0;
666 			return YXML_OK;
667 		}
668 		if(ch == (unsigned char)'<') {
669 			x->state = YXMLS_le0;
670 			return YXML_OK;
671 		}
672 		break;
673 	case YXMLS_le0:
674 		if(ch == (unsigned char)'!') {
675 			x->state = YXMLS_lee1;
676 			return YXML_OK;
677 		}
678 		if(ch == (unsigned char)'?') {
679 			x->state = YXMLS_leq0;
680 			return YXML_OK;
681 		}
682 		if(yxml_isNameStart(ch)) {
683 			x->state = YXMLS_elem0;
684 			return yxml_elemstart(x, ch);
685 		}
686 		break;
687 	case YXMLS_le1:
688 		if(ch == (unsigned char)'!') {
689 			x->state = YXMLS_lee1;
690 			return YXML_OK;
691 		}
692 		if(ch == (unsigned char)'?') {
693 			x->state = YXMLS_pi0;
694 			x->nextstate = YXMLS_misc1;
695 			return YXML_OK;
696 		}
697 		if(yxml_isNameStart(ch)) {
698 			x->state = YXMLS_elem0;
699 			return yxml_elemstart(x, ch);
700 		}
701 		break;
702 	case YXMLS_le2:
703 		if(ch == (unsigned char)'!') {
704 			x->state = YXMLS_lee2;
705 			return YXML_OK;
706 		}
707 		if(ch == (unsigned char)'?') {
708 			x->state = YXMLS_pi0;
709 			x->nextstate = YXMLS_misc2;
710 			return YXML_OK;
711 		}
712 		if(ch == (unsigned char)'/') {
713 			x->state = YXMLS_etag0;
714 			return YXML_OK;
715 		}
716 		if(yxml_isNameStart(ch)) {
717 			x->state = YXMLS_elem0;
718 			return yxml_elemstart(x, ch);
719 		}
720 		break;
721 	case YXMLS_le3:
722 		if(ch == (unsigned char)'!') {
723 			x->state = YXMLS_comment0;
724 			x->nextstate = YXMLS_misc3;
725 			return YXML_OK;
726 		}
727 		if(ch == (unsigned char)'?') {
728 			x->state = YXMLS_pi0;
729 			x->nextstate = YXMLS_misc3;
730 			return YXML_OK;
731 		}
732 		break;
733 	case YXMLS_lee1:
734 		if(ch == (unsigned char)'-') {
735 			x->state = YXMLS_comment1;
736 			x->nextstate = YXMLS_misc1;
737 			return YXML_OK;
738 		}
739 		if(ch == (unsigned char)'D') {
740 			x->state = YXMLS_string;
741 			x->nextstate = YXMLS_dt0;
742 			x->string = (unsigned char *)"OCTYPE";
743 			return YXML_OK;
744 		}
745 		break;
746 	case YXMLS_lee2:
747 		if(ch == (unsigned char)'-') {
748 			x->state = YXMLS_comment1;
749 			x->nextstate = YXMLS_misc2;
750 			return YXML_OK;
751 		}
752 		if(ch == (unsigned char)'[') {
753 			x->state = YXMLS_string;
754 			x->nextstate = YXMLS_cd0;
755 			x->string = (unsigned char *)"CDATA[";
756 			return YXML_OK;
757 		}
758 		break;
759 	case YXMLS_leq0:
760 		if(ch == (unsigned char)'x') {
761 			x->state = YXMLS_xmldecl0;
762 			x->nextstate = YXMLS_misc1;
763 			return yxml_pistart(x, ch);
764 		}
765 		if(yxml_isNameStart(ch)) {
766 			x->state = YXMLS_pi1;
767 			x->nextstate = YXMLS_misc1;
768 			return yxml_pistart(x, ch);
769 		}
770 		break;
771 	case YXMLS_misc0:
772 		if(yxml_isSP(ch))
773 			return YXML_OK;
774 		if(ch == (unsigned char)'<') {
775 			x->state = YXMLS_le0;
776 			return YXML_OK;
777 		}
778 		break;
779 	case YXMLS_misc1:
780 		if(yxml_isSP(ch))
781 			return YXML_OK;
782 		if(ch == (unsigned char)'<') {
783 			x->state = YXMLS_le1;
784 			return YXML_OK;
785 		}
786 		break;
787 	case YXMLS_misc2:
788 		if(ch == (unsigned char)'<') {
789 			x->state = YXMLS_le2;
790 			return YXML_OK;
791 		}
792 		if(ch == (unsigned char)'&') {
793 			x->state = YXMLS_misc2a;
794 			return yxml_refstart(x, ch);
795 		}
796 		if(yxml_isChar(ch))
797 			return yxml_datacontent(x, ch);
798 		break;
799 	case YXMLS_misc2a:
800 		if(yxml_isRef(ch))
801 			return yxml_ref(x, ch);
802 		if(ch == (unsigned char)'\x3b') {
803 			x->state = YXMLS_misc2;
804 			return yxml_refcontent(x, ch);
805 		}
806 		break;
807 	case YXMLS_misc3:
808 		if(yxml_isSP(ch))
809 			return YXML_OK;
810 		if(ch == (unsigned char)'<') {
811 			x->state = YXMLS_le3;
812 			return YXML_OK;
813 		}
814 		break;
815 	case YXMLS_pi0:
816 		if(yxml_isNameStart(ch)) {
817 			x->state = YXMLS_pi1;
818 			return yxml_pistart(x, ch);
819 		}
820 		break;
821 	case YXMLS_pi1:
822 		if(yxml_isName(ch))
823 			return yxml_piname(x, ch);
824 		if(ch == (unsigned char)'?') {
825 			x->state = YXMLS_pi4;
826 			return yxml_pinameend(x, ch);
827 		}
828 		if(yxml_isSP(ch)) {
829 			x->state = YXMLS_pi2;
830 			return yxml_pinameend(x, ch);
831 		}
832 		break;
833 	case YXMLS_pi2:
834 		if(ch == (unsigned char)'?') {
835 			x->state = YXMLS_pi3;
836 			return YXML_OK;
837 		}
838 		if(yxml_isChar(ch))
839 			return yxml_datapi1(x, ch);
840 		break;
841 	case YXMLS_pi3:
842 		if(ch == (unsigned char)'>') {
843 			x->state = x->nextstate;
844 			return yxml_pivalend(x, ch);
845 		}
846 		if(yxml_isChar(ch)) {
847 			x->state = YXMLS_pi2;
848 			return yxml_datapi2(x, ch);
849 		}
850 		break;
851 	case YXMLS_pi4:
852 		if(ch == (unsigned char)'>') {
853 			x->state = x->nextstate;
854 			return yxml_pivalend(x, ch);
855 		}
856 		break;
857 	case YXMLS_std0:
858 		if(yxml_isSP(ch))
859 			return YXML_OK;
860 		if(ch == (unsigned char)'=') {
861 			x->state = YXMLS_std1;
862 			return YXML_OK;
863 		}
864 		break;
865 	case YXMLS_std1:
866 		if(yxml_isSP(ch))
867 			return YXML_OK;
868 		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
869 			x->state = YXMLS_std2;
870 			x->quote = ch;
871 			return YXML_OK;
872 		}
873 		break;
874 	case YXMLS_std2:
875 		if(ch == (unsigned char)'y') {
876 			x->state = YXMLS_string;
877 			x->nextstate = YXMLS_std3;
878 			x->string = (unsigned char *)"es";
879 			return YXML_OK;
880 		}
881 		if(ch == (unsigned char)'n') {
882 			x->state = YXMLS_string;
883 			x->nextstate = YXMLS_std3;
884 			x->string = (unsigned char *)"o";
885 			return YXML_OK;
886 		}
887 		break;
888 	case YXMLS_std3:
889 		if(x->quote == ch) {
890 			x->state = YXMLS_xmldecl8;
891 			return YXML_OK;
892 		}
893 		break;
894 	case YXMLS_ver0:
895 		if(yxml_isSP(ch))
896 			return YXML_OK;
897 		if(ch == (unsigned char)'=') {
898 			x->state = YXMLS_ver1;
899 			return YXML_OK;
900 		}
901 		break;
902 	case YXMLS_ver1:
903 		if(yxml_isSP(ch))
904 			return YXML_OK;
905 		if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') {
906 			x->state = YXMLS_string;
907 			x->quote = ch;
908 			x->nextstate = YXMLS_ver2;
909 			x->string = (unsigned char *)"1.";
910 			return YXML_OK;
911 		}
912 		break;
913 	case YXMLS_ver2:
914 		if(yxml_isNum(ch)) {
915 			x->state = YXMLS_ver3;
916 			return YXML_OK;
917 		}
918 		break;
919 	case YXMLS_ver3:
920 		if(yxml_isNum(ch))
921 			return YXML_OK;
922 		if(x->quote == ch) {
923 			x->state = YXMLS_xmldecl4;
924 			return YXML_OK;
925 		}
926 		break;
927 	case YXMLS_xmldecl0:
928 		if(ch == (unsigned char)'m') {
929 			x->state = YXMLS_xmldecl1;
930 			return yxml_piname(x, ch);
931 		}
932 		if(yxml_isName(ch)) {
933 			x->state = YXMLS_pi1;
934 			return yxml_piname(x, ch);
935 		}
936 		if(ch == (unsigned char)'?') {
937 			x->state = YXMLS_pi4;
938 			return yxml_pinameend(x, ch);
939 		}
940 		if(yxml_isSP(ch)) {
941 			x->state = YXMLS_pi2;
942 			return yxml_pinameend(x, ch);
943 		}
944 		break;
945 	case YXMLS_xmldecl1:
946 		if(ch == (unsigned char)'l') {
947 			x->state = YXMLS_xmldecl2;
948 			return yxml_piname(x, ch);
949 		}
950 		if(yxml_isName(ch)) {
951 			x->state = YXMLS_pi1;
952 			return yxml_piname(x, ch);
953 		}
954 		if(ch == (unsigned char)'?') {
955 			x->state = YXMLS_pi4;
956 			return yxml_pinameend(x, ch);
957 		}
958 		if(yxml_isSP(ch)) {
959 			x->state = YXMLS_pi2;
960 			return yxml_pinameend(x, ch);
961 		}
962 		break;
963 	case YXMLS_xmldecl2:
964 		if(yxml_isSP(ch)) {
965 			x->state = YXMLS_xmldecl3;
966 			return yxml_piabort(x, ch);
967 		}
968 		if(yxml_isName(ch)) {
969 			x->state = YXMLS_pi1;
970 			return yxml_piname(x, ch);
971 		}
972 		break;
973 	case YXMLS_xmldecl3:
974 		if(yxml_isSP(ch))
975 			return YXML_OK;
976 		if(ch == (unsigned char)'v') {
977 			x->state = YXMLS_string;
978 			x->nextstate = YXMLS_ver0;
979 			x->string = (unsigned char *)"ersion";
980 			return YXML_OK;
981 		}
982 		break;
983 	case YXMLS_xmldecl4:
984 		if(yxml_isSP(ch)) {
985 			x->state = YXMLS_xmldecl5;
986 			return YXML_OK;
987 		}
988 		if(ch == (unsigned char)'?') {
989 			x->state = YXMLS_xmldecl9;
990 			return YXML_OK;
991 		}
992 		break;
993 	case YXMLS_xmldecl5:
994 		if(yxml_isSP(ch))
995 			return YXML_OK;
996 		if(ch == (unsigned char)'?') {
997 			x->state = YXMLS_xmldecl9;
998 			return YXML_OK;
999 		}
1000 		if(ch == (unsigned char)'e') {
1001 			x->state = YXMLS_string;
1002 			x->nextstate = YXMLS_enc0;
1003 			x->string = (unsigned char *)"ncoding";
1004 			return YXML_OK;
1005 		}
1006 		if(ch == (unsigned char)'s') {
1007 			x->state = YXMLS_string;
1008 			x->nextstate = YXMLS_std0;
1009 			x->string = (unsigned char *)"tandalone";
1010 			return YXML_OK;
1011 		}
1012 		break;
1013 	case YXMLS_xmldecl6:
1014 		if(yxml_isSP(ch)) {
1015 			x->state = YXMLS_xmldecl7;
1016 			return YXML_OK;
1017 		}
1018 		if(ch == (unsigned char)'?') {
1019 			x->state = YXMLS_xmldecl9;
1020 			return YXML_OK;
1021 		}
1022 		break;
1023 	case YXMLS_xmldecl7:
1024 		if(yxml_isSP(ch))
1025 			return YXML_OK;
1026 		if(ch == (unsigned char)'?') {
1027 			x->state = YXMLS_xmldecl9;
1028 			return YXML_OK;
1029 		}
1030 		if(ch == (unsigned char)'s') {
1031 			x->state = YXMLS_string;
1032 			x->nextstate = YXMLS_std0;
1033 			x->string = (unsigned char *)"tandalone";
1034 			return YXML_OK;
1035 		}
1036 		break;
1037 	case YXMLS_xmldecl8:
1038 		if(yxml_isSP(ch))
1039 			return YXML_OK;
1040 		if(ch == (unsigned char)'?') {
1041 			x->state = YXMLS_xmldecl9;
1042 			return YXML_OK;
1043 		}
1044 		break;
1045 	case YXMLS_xmldecl9:
1046 		if(ch == (unsigned char)'>') {
1047 			x->state = YXMLS_misc1;
1048 			return YXML_OK;
1049 		}
1050 		break;
1051 	}
1052 	return YXML_ESYN;
1053 }
1054 
1055 
1056 yxml_ret_t yxml_eof(yxml_t *x) {
1057 	if(x->state != YXMLS_misc3)
1058 		return YXML_EEOF;
1059 	return YXML_OK;
1060 }
1061 
1062 
1063 /* vim: set noet sw=4 ts=4: */
1064