1 /* THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT! */ 2 3 /* Copyright (c) 2013-2014 Yoran Heling 4 5 Permission is hereby granted, free of charge, to any person obtaining 6 a copy of this software and associated documentation files (the 7 "Software"), to deal in the Software without restriction, including 8 without limitation the rights to use, copy, modify, merge, publish, 9 distribute, sublicense, and/or sell copies of the Software, and to 10 permit persons to whom the Software is furnished to do so, subject to 11 the following conditions: 12 13 The above copyright notice and this permission notice shall be included 14 in all copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25 #include <yxml.h> 26 #include <string.h> 27 28 typedef enum { 29 YXMLS_string, 30 YXMLS_attr0, 31 YXMLS_attr1, 32 YXMLS_attr2, 33 YXMLS_attr3, 34 YXMLS_attr4, 35 YXMLS_cd0, 36 YXMLS_cd1, 37 YXMLS_cd2, 38 YXMLS_comment0, 39 YXMLS_comment1, 40 YXMLS_comment2, 41 YXMLS_comment3, 42 YXMLS_comment4, 43 YXMLS_dt0, 44 YXMLS_dt1, 45 YXMLS_dt2, 46 YXMLS_dt3, 47 YXMLS_dt4, 48 YXMLS_elem0, 49 YXMLS_elem1, 50 YXMLS_elem2, 51 YXMLS_elem3, 52 YXMLS_enc0, 53 YXMLS_enc1, 54 YXMLS_enc2, 55 YXMLS_enc3, 56 YXMLS_etag0, 57 YXMLS_etag1, 58 YXMLS_etag2, 59 YXMLS_init, 60 YXMLS_le0, 61 YXMLS_le1, 62 YXMLS_le2, 63 YXMLS_le3, 64 YXMLS_lee1, 65 YXMLS_lee2, 66 YXMLS_leq0, 67 YXMLS_misc0, 68 YXMLS_misc1, 69 YXMLS_misc2, 70 YXMLS_misc2a, 71 YXMLS_misc3, 72 YXMLS_pi0, 73 YXMLS_pi1, 74 YXMLS_pi2, 75 YXMLS_pi3, 76 YXMLS_pi4, 77 YXMLS_std0, 78 YXMLS_std1, 79 YXMLS_std2, 80 YXMLS_std3, 81 YXMLS_ver0, 82 YXMLS_ver1, 83 YXMLS_ver2, 84 YXMLS_ver3, 85 YXMLS_xmldecl0, 86 YXMLS_xmldecl1, 87 YXMLS_xmldecl2, 88 YXMLS_xmldecl3, 89 YXMLS_xmldecl4, 90 YXMLS_xmldecl5, 91 YXMLS_xmldecl6, 92 YXMLS_xmldecl7, 93 YXMLS_xmldecl8, 94 YXMLS_xmldecl9 95 } yxml_state_t; 96 97 98 #define yxml_isChar(c) 1 99 /* 0xd should be part of SP, too, but yxml_parse() already normalizes that into 0xa */ 100 #define yxml_isSP(c) (c == 0x20 || c == 0x09 || c == 0x0a) 101 #define yxml_isAlpha(c) ((c|32)-'a' < 26) 102 #define yxml_isNum(c) (c-'0' < 10) 103 #define yxml_isHex(c) (yxml_isNum(c) || (c|32)-'a' < 6) 104 #define yxml_isEncName(c) (yxml_isAlpha(c) || yxml_isNum(c) || c == '.' || c == '_' || c == '-') 105 #define yxml_isNameStart(c) (yxml_isAlpha(c) || c == ':' || c == '_' || c >= 128) 106 #define yxml_isName(c) (yxml_isNameStart(c) || yxml_isNum(c) || c == '-' || c == '.') 107 /* XXX: The valid characters are dependent on the quote char, hence the access to x->quote */ 108 #define yxml_isAttValue(c) (yxml_isChar(c) && c != x->quote && c != '<' && c != '&') 109 /* Anything between '&' and ';', the yxml_ref* functions will do further 110 * validation. Strictly speaking, this is "yxml_isName(c) || c == '#'", but 111 * this parser doesn't understand entities with '.', ':', etc, anwyay. */ 112 #define yxml_isRef(c) (yxml_isNum(c) || yxml_isAlpha(c) || c == '#') 113 114 #define INTFROM5CHARS(a, b, c, d, e) ((((uint64_t)(a))<<32) | (((uint64_t)(b))<<24) | (((uint64_t)(c))<<16) | (((uint64_t)(d))<<8) | (uint64_t)(e)) 115 116 117 /* Set the given char value to ch (0<=ch<=255). 118 * This can't be done with simple assignment because char may be signed, and 119 * unsigned-to-signed overflow is implementation defined in C. This function 120 * /looks/ inefficient, but gcc compiles it down to a single movb instruction 121 * on x86, even with -O0. */ 122 static inline void yxml_setchar(char *dest, unsigned ch) { 123 unsigned char _ch = ch; 124 memcpy(dest, &_ch, 1); 125 } 126 127 128 /* Similar to yxml_setchar(), but will convert ch (any valid unicode point) to 129 * UTF-8 and appends a '\0'. dest must have room for at least 5 bytes. */ 130 static void yxml_setutf8(char *dest, unsigned ch) { 131 if(ch <= 0x007F) 132 yxml_setchar(dest++, ch); 133 else if(ch <= 0x07FF) { 134 yxml_setchar(dest++, 0xC0 | (ch>>6)); 135 yxml_setchar(dest++, 0x80 | (ch & 0x3F)); 136 } else if(ch <= 0xFFFF) { 137 yxml_setchar(dest++, 0xE0 | (ch>>12)); 138 yxml_setchar(dest++, 0x80 | ((ch>>6) & 0x3F)); 139 yxml_setchar(dest++, 0x80 | (ch & 0x3F)); 140 } else { 141 yxml_setchar(dest++, 0xF0 | (ch>>18)); 142 yxml_setchar(dest++, 0x80 | ((ch>>12) & 0x3F)); 143 yxml_setchar(dest++, 0x80 | ((ch>>6) & 0x3F)); 144 yxml_setchar(dest++, 0x80 | (ch & 0x3F)); 145 } 146 *dest = 0; 147 } 148 149 150 static inline yxml_ret_t yxml_datacontent(yxml_t *x, unsigned ch) { 151 yxml_setchar(x->data, ch); 152 x->data[1] = 0; 153 return YXML_CONTENT; 154 } 155 156 157 static inline yxml_ret_t yxml_datapi1(yxml_t *x, unsigned ch) { 158 yxml_setchar(x->data, ch); 159 x->data[1] = 0; 160 return YXML_PICONTENT; 161 } 162 163 164 static inline yxml_ret_t yxml_datapi2(yxml_t *x, unsigned ch) { 165 x->data[0] = '?'; 166 yxml_setchar(x->data+1, ch); 167 x->data[2] = 0; 168 return YXML_PICONTENT; 169 } 170 171 172 static inline yxml_ret_t yxml_datacd1(yxml_t *x, unsigned ch) { 173 x->data[0] = ']'; 174 yxml_setchar(x->data+1, ch); 175 x->data[2] = 0; 176 return YXML_CONTENT; 177 } 178 179 180 static inline yxml_ret_t yxml_datacd2(yxml_t *x, unsigned ch) { 181 x->data[0] = ']'; 182 x->data[1] = ']'; 183 yxml_setchar(x->data+2, ch); 184 x->data[3] = 0; 185 return YXML_CONTENT; 186 } 187 188 189 static inline yxml_ret_t yxml_dataattr(yxml_t *x, unsigned ch) { 190 /* Normalize attribute values according to the XML spec section 3.3.3. */ 191 yxml_setchar(x->data, ch == 0x9 || ch == 0xa ? 0x20 : ch); 192 x->data[1] = 0; 193 return YXML_ATTRVAL; 194 } 195 196 197 static yxml_ret_t yxml_pushstack(yxml_t *x, char **res, unsigned ch) { 198 if(x->stacklen+2 >= x->stacksize) 199 return YXML_ESTACK; 200 x->stacklen++; 201 *res = (char *)x->stack+x->stacklen; 202 x->stack[x->stacklen] = ch; 203 x->stacklen++; 204 x->stack[x->stacklen] = 0; 205 return YXML_OK; 206 } 207 208 209 static yxml_ret_t yxml_pushstackc(yxml_t *x, unsigned ch) { 210 if(x->stacklen+1 >= x->stacksize) 211 return YXML_ESTACK; 212 x->stack[x->stacklen] = ch; 213 x->stacklen++; 214 x->stack[x->stacklen] = 0; 215 return YXML_OK; 216 } 217 218 219 static void yxml_popstack(yxml_t *x) { 220 do 221 x->stacklen--; 222 while(x->stack[x->stacklen]); 223 } 224 225 226 static inline yxml_ret_t yxml_elemstart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->elem, ch); } 227 static inline yxml_ret_t yxml_elemname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); } 228 static inline yxml_ret_t yxml_elemnameend(yxml_t *x, unsigned ch) { (void) x; (void) ch; return YXML_ELEMSTART; } 229 230 231 /* Also used in yxml_elemcloseend(), since this function just removes the last 232 * element from the stack and returns ELEMEND. */ 233 static yxml_ret_t yxml_selfclose(yxml_t *x, unsigned ch) { 234 (void) ch; 235 yxml_popstack(x); 236 if(x->stacklen) { 237 x->elem = (char *)x->stack+x->stacklen-1; 238 while(*(x->elem-1)) 239 x->elem--; 240 return YXML_ELEMEND; 241 } 242 x->elem = (char *)x->stack; 243 x->state = YXMLS_misc3; 244 return YXML_ELEMEND; 245 } 246 247 248 static inline yxml_ret_t yxml_elemclose(yxml_t *x, unsigned ch) { 249 if(*((unsigned char *)x->elem) != ch) 250 return YXML_ECLOSE; 251 x->elem++; 252 return YXML_OK; 253 } 254 255 256 static inline yxml_ret_t yxml_elemcloseend(yxml_t *x, unsigned ch) { 257 if(*x->elem) 258 return YXML_ECLOSE; 259 return yxml_selfclose(x, ch); 260 } 261 262 263 static inline yxml_ret_t yxml_attrstart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->attr, ch); } 264 static inline yxml_ret_t yxml_attrname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); } 265 static inline yxml_ret_t yxml_attrnameend(yxml_t *x, unsigned ch) { (void) x; (void) ch; return YXML_ATTRSTART; } 266 static inline yxml_ret_t yxml_attrvalend (yxml_t *x, unsigned ch) { (void) ch; yxml_popstack(x); return YXML_ATTREND; } 267 268 269 static inline yxml_ret_t yxml_pistart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->pi, ch); } 270 static inline yxml_ret_t yxml_piname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); } 271 static inline yxml_ret_t yxml_piabort (yxml_t *x, unsigned ch) { (void) x; (void) ch; yxml_popstack(x); return YXML_OK; } 272 static inline yxml_ret_t yxml_pinameend(yxml_t *x, unsigned ch) { (void) ch; 273 return (x->pi[0]|32) == 'x' && (x->pi[1]|32) == 'm' && (x->pi[2]|32) == 'l' && !x->pi[3] ? YXML_ESYN : YXML_PISTART; 274 } 275 static inline yxml_ret_t yxml_pivalend (yxml_t *x, unsigned ch) { (void) ch; yxml_popstack(x); x->pi = (char *)x->stack; return YXML_PIEND; } 276 277 278 static inline yxml_ret_t yxml_refstart(yxml_t *x, unsigned ch) { 279 (void) ch; 280 memset(x->data, 0, sizeof(x->data)); 281 x->reflen = 0; 282 return YXML_OK; 283 } 284 285 286 static yxml_ret_t yxml_ref(yxml_t *x, unsigned ch) { 287 if(x->reflen >= sizeof(x->data)-1) 288 return YXML_EREF; 289 yxml_setchar(x->data+x->reflen, ch); 290 x->reflen++; 291 return YXML_OK; 292 } 293 294 295 static yxml_ret_t yxml_refend(yxml_t *x, yxml_ret_t ret) { 296 unsigned char *r = (unsigned char *)x->data; 297 unsigned ch = 0; 298 if(*r == '#') { 299 if(r[1] == 'x') 300 for(r += 2; yxml_isHex((unsigned)*r); r++) 301 ch = (ch<<4) + (*r <= '9' ? *r-'0' : (*r|32)-'a' + 10); 302 else 303 for(r++; yxml_isNum((unsigned)*r); r++) 304 ch = (ch*10) + (*r-'0'); 305 if(*r) 306 ch = 0; 307 } else { 308 uint64_t i = INTFROM5CHARS(r[0], r[1], r[2], r[3], r[4]); 309 ch = 310 i == INTFROM5CHARS('l','t', 0, 0, 0) ? '<' : 311 i == INTFROM5CHARS('g','t', 0, 0, 0) ? '>' : 312 i == INTFROM5CHARS('a','m','p', 0, 0) ? '&' : 313 i == INTFROM5CHARS('a','p','o','s',0) ? '\'': 314 i == INTFROM5CHARS('q','u','o','t',0) ? '"' : 0; 315 } 316 317 /* Codepoints not allowed in the XML 1.1 definition of a Char */ 318 if(!ch || ch > 0x10FFFF || ch == 0xFFFE || ch == 0xFFFF || (ch-0xDFFF) < 0x7FF) 319 return YXML_EREF; 320 yxml_setutf8(x->data, ch); 321 return ret; 322 } 323 324 325 static inline yxml_ret_t yxml_refcontent(yxml_t *x, unsigned ch) { (void) ch; return yxml_refend(x, YXML_CONTENT); } 326 static inline yxml_ret_t yxml_refattrval(yxml_t *x, unsigned ch) { (void) ch; return yxml_refend(x, YXML_ATTRVAL); } 327 328 329 void yxml_init(yxml_t *x, void *stack, size_t stacksize) { 330 memset(x, 0, sizeof(*x)); 331 x->line = 1; 332 x->stack = stack; 333 x->stacksize = stacksize; 334 *x->stack = 0; 335 x->elem = x->pi = x->attr = (char *)x->stack; 336 x->state = YXMLS_init; 337 } 338 339 340 yxml_ret_t yxml_parse(yxml_t *x, int _ch) { 341 /* Ensure that characters are in the range of 0..255 rather than -126..125. 342 * All character comparisons are done with positive integers. */ 343 unsigned ch = (unsigned)(_ch+256) & 0xff; 344 if(!ch) 345 return YXML_ESYN; 346 x->total++; 347 348 /* End-of-Line normalization, "\rX", "\r\n" and "\n" are recognized and 349 * normalized to a single '\n' as per XML 1.0 section 2.11. XML 1.1 adds 350 * some non-ASCII character sequences to this list, but we can only handle 351 * ASCII here without making assumptions about the input encoding. */ 352 if(x->ignore == ch) { 353 x->ignore = 0; 354 return YXML_OK; 355 } 356 x->ignore = (ch == 0xd) * 0xa; 357 if(ch == 0xa || ch == 0xd) { 358 ch = 0xa; 359 x->line++; 360 x->byte = 0; 361 } 362 x->byte++; 363 364 switch((yxml_state_t)x->state) { 365 case YXMLS_string: 366 if(ch == *x->string) { 367 x->string++; 368 if(!*x->string) 369 x->state = x->nextstate; 370 return YXML_OK; 371 } 372 break; 373 case YXMLS_attr0: 374 if(yxml_isName(ch)) 375 return yxml_attrname(x, ch); 376 if(yxml_isSP(ch)) { 377 x->state = YXMLS_attr1; 378 return yxml_attrnameend(x, ch); 379 } 380 if(ch == (unsigned char)'=') { 381 x->state = YXMLS_attr2; 382 return yxml_attrnameend(x, ch); 383 } 384 break; 385 case YXMLS_attr1: 386 if(yxml_isSP(ch)) 387 return YXML_OK; 388 if(ch == (unsigned char)'=') { 389 x->state = YXMLS_attr2; 390 return YXML_OK; 391 } 392 break; 393 case YXMLS_attr2: 394 if(yxml_isSP(ch)) 395 return YXML_OK; 396 if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { 397 x->state = YXMLS_attr3; 398 x->quote = ch; 399 return YXML_OK; 400 } 401 break; 402 case YXMLS_attr3: 403 if(yxml_isAttValue(ch)) 404 return yxml_dataattr(x, ch); 405 if(ch == (unsigned char)'&') { 406 x->state = YXMLS_attr4; 407 return yxml_refstart(x, ch); 408 } 409 if(x->quote == ch) { 410 x->state = YXMLS_elem2; 411 return yxml_attrvalend(x, ch); 412 } 413 break; 414 case YXMLS_attr4: 415 if(yxml_isRef(ch)) 416 return yxml_ref(x, ch); 417 if(ch == (unsigned char)'\x3b') { 418 x->state = YXMLS_attr3; 419 return yxml_refattrval(x, ch); 420 } 421 break; 422 case YXMLS_cd0: 423 if(ch == (unsigned char)']') { 424 x->state = YXMLS_cd1; 425 return YXML_OK; 426 } 427 if(yxml_isChar(ch)) 428 return yxml_datacontent(x, ch); 429 break; 430 case YXMLS_cd1: 431 if(ch == (unsigned char)']') { 432 x->state = YXMLS_cd2; 433 return YXML_OK; 434 } 435 if(yxml_isChar(ch)) { 436 x->state = YXMLS_cd0; 437 return yxml_datacd1(x, ch); 438 } 439 break; 440 case YXMLS_cd2: 441 if(ch == (unsigned char)']') 442 return yxml_datacontent(x, ch); 443 if(ch == (unsigned char)'>') { 444 x->state = YXMLS_misc2; 445 return YXML_OK; 446 } 447 if(yxml_isChar(ch)) { 448 x->state = YXMLS_cd0; 449 return yxml_datacd2(x, ch); 450 } 451 break; 452 case YXMLS_comment0: 453 if(ch == (unsigned char)'-') { 454 x->state = YXMLS_comment1; 455 return YXML_OK; 456 } 457 break; 458 case YXMLS_comment1: 459 if(ch == (unsigned char)'-') { 460 x->state = YXMLS_comment2; 461 return YXML_OK; 462 } 463 break; 464 case YXMLS_comment2: 465 if(ch == (unsigned char)'-') { 466 x->state = YXMLS_comment3; 467 return YXML_OK; 468 } 469 if(yxml_isChar(ch)) 470 return YXML_OK; 471 break; 472 case YXMLS_comment3: 473 if(ch == (unsigned char)'-') { 474 x->state = YXMLS_comment4; 475 return YXML_OK; 476 } 477 if(yxml_isChar(ch)) { 478 x->state = YXMLS_comment2; 479 return YXML_OK; 480 } 481 break; 482 case YXMLS_comment4: 483 if(ch == (unsigned char)'>') { 484 x->state = x->nextstate; 485 return YXML_OK; 486 } 487 break; 488 case YXMLS_dt0: 489 if(ch == (unsigned char)'>') { 490 x->state = YXMLS_misc1; 491 return YXML_OK; 492 } 493 if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { 494 x->state = YXMLS_dt1; 495 x->quote = ch; 496 x->nextstate = YXMLS_dt0; 497 return YXML_OK; 498 } 499 if(ch == (unsigned char)'<') { 500 x->state = YXMLS_dt2; 501 return YXML_OK; 502 } 503 if(yxml_isChar(ch)) 504 return YXML_OK; 505 break; 506 case YXMLS_dt1: 507 if(x->quote == ch) { 508 x->state = x->nextstate; 509 return YXML_OK; 510 } 511 if(yxml_isChar(ch)) 512 return YXML_OK; 513 break; 514 case YXMLS_dt2: 515 if(ch == (unsigned char)'?') { 516 x->state = YXMLS_pi0; 517 x->nextstate = YXMLS_dt0; 518 return YXML_OK; 519 } 520 if(ch == (unsigned char)'!') { 521 x->state = YXMLS_dt3; 522 return YXML_OK; 523 } 524 break; 525 case YXMLS_dt3: 526 if(ch == (unsigned char)'-') { 527 x->state = YXMLS_comment1; 528 x->nextstate = YXMLS_dt0; 529 return YXML_OK; 530 } 531 if(yxml_isChar(ch)) { 532 x->state = YXMLS_dt4; 533 return YXML_OK; 534 } 535 break; 536 case YXMLS_dt4: 537 if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { 538 x->state = YXMLS_dt1; 539 x->quote = ch; 540 x->nextstate = YXMLS_dt4; 541 return YXML_OK; 542 } 543 if(ch == (unsigned char)'>') { 544 x->state = YXMLS_dt0; 545 return YXML_OK; 546 } 547 if(yxml_isChar(ch)) 548 return YXML_OK; 549 break; 550 case YXMLS_elem0: 551 if(yxml_isName(ch)) 552 return yxml_elemname(x, ch); 553 if(yxml_isSP(ch)) { 554 x->state = YXMLS_elem1; 555 return yxml_elemnameend(x, ch); 556 } 557 if(ch == (unsigned char)'/') { 558 x->state = YXMLS_elem3; 559 return yxml_elemnameend(x, ch); 560 } 561 if(ch == (unsigned char)'>') { 562 x->state = YXMLS_misc2; 563 return yxml_elemnameend(x, ch); 564 } 565 break; 566 case YXMLS_elem1: 567 if(yxml_isSP(ch)) 568 return YXML_OK; 569 if(ch == (unsigned char)'/') { 570 x->state = YXMLS_elem3; 571 return YXML_OK; 572 } 573 if(ch == (unsigned char)'>') { 574 x->state = YXMLS_misc2; 575 return YXML_OK; 576 } 577 if(yxml_isNameStart(ch)) { 578 x->state = YXMLS_attr0; 579 return yxml_attrstart(x, ch); 580 } 581 break; 582 case YXMLS_elem2: 583 if(yxml_isSP(ch)) { 584 x->state = YXMLS_elem1; 585 return YXML_OK; 586 } 587 if(ch == (unsigned char)'/') { 588 x->state = YXMLS_elem3; 589 return YXML_OK; 590 } 591 if(ch == (unsigned char)'>') { 592 x->state = YXMLS_misc2; 593 return YXML_OK; 594 } 595 break; 596 case YXMLS_elem3: 597 if(ch == (unsigned char)'>') { 598 x->state = YXMLS_misc2; 599 return yxml_selfclose(x, ch); 600 } 601 break; 602 case YXMLS_enc0: 603 if(yxml_isSP(ch)) 604 return YXML_OK; 605 if(ch == (unsigned char)'=') { 606 x->state = YXMLS_enc1; 607 return YXML_OK; 608 } 609 break; 610 case YXMLS_enc1: 611 if(yxml_isSP(ch)) 612 return YXML_OK; 613 if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { 614 x->state = YXMLS_enc2; 615 x->quote = ch; 616 return YXML_OK; 617 } 618 break; 619 case YXMLS_enc2: 620 if(yxml_isAlpha(ch)) { 621 x->state = YXMLS_enc3; 622 return YXML_OK; 623 } 624 break; 625 case YXMLS_enc3: 626 if(yxml_isEncName(ch)) 627 return YXML_OK; 628 if(x->quote == ch) { 629 x->state = YXMLS_xmldecl6; 630 return YXML_OK; 631 } 632 break; 633 case YXMLS_etag0: 634 if(yxml_isNameStart(ch)) { 635 x->state = YXMLS_etag1; 636 return yxml_elemclose(x, ch); 637 } 638 break; 639 case YXMLS_etag1: 640 if(yxml_isName(ch)) 641 return yxml_elemclose(x, ch); 642 if(yxml_isSP(ch)) { 643 x->state = YXMLS_etag2; 644 return yxml_elemcloseend(x, ch); 645 } 646 if(ch == (unsigned char)'>') { 647 x->state = YXMLS_misc2; 648 return yxml_elemcloseend(x, ch); 649 } 650 break; 651 case YXMLS_etag2: 652 if(yxml_isSP(ch)) 653 return YXML_OK; 654 if(ch == (unsigned char)'>') { 655 x->state = YXMLS_misc2; 656 return YXML_OK; 657 } 658 break; 659 case YXMLS_init: 660 if(ch == (unsigned char)'\xef') { 661 x->state = YXMLS_string; 662 x->nextstate = YXMLS_misc0; 663 x->string = (unsigned char *)"\xbb\xbf"; 664 return YXML_OK; 665 } 666 if(yxml_isSP(ch)) { 667 x->state = YXMLS_misc0; 668 return YXML_OK; 669 } 670 if(ch == (unsigned char)'<') { 671 x->state = YXMLS_le0; 672 return YXML_OK; 673 } 674 break; 675 case YXMLS_le0: 676 if(ch == (unsigned char)'!') { 677 x->state = YXMLS_lee1; 678 return YXML_OK; 679 } 680 if(ch == (unsigned char)'?') { 681 x->state = YXMLS_leq0; 682 return YXML_OK; 683 } 684 if(yxml_isNameStart(ch)) { 685 x->state = YXMLS_elem0; 686 return yxml_elemstart(x, ch); 687 } 688 break; 689 case YXMLS_le1: 690 if(ch == (unsigned char)'!') { 691 x->state = YXMLS_lee1; 692 return YXML_OK; 693 } 694 if(ch == (unsigned char)'?') { 695 x->state = YXMLS_pi0; 696 x->nextstate = YXMLS_misc1; 697 return YXML_OK; 698 } 699 if(yxml_isNameStart(ch)) { 700 x->state = YXMLS_elem0; 701 return yxml_elemstart(x, ch); 702 } 703 break; 704 case YXMLS_le2: 705 if(ch == (unsigned char)'!') { 706 x->state = YXMLS_lee2; 707 return YXML_OK; 708 } 709 if(ch == (unsigned char)'?') { 710 x->state = YXMLS_pi0; 711 x->nextstate = YXMLS_misc2; 712 return YXML_OK; 713 } 714 if(ch == (unsigned char)'/') { 715 x->state = YXMLS_etag0; 716 return YXML_OK; 717 } 718 if(yxml_isNameStart(ch)) { 719 x->state = YXMLS_elem0; 720 return yxml_elemstart(x, ch); 721 } 722 break; 723 case YXMLS_le3: 724 if(ch == (unsigned char)'!') { 725 x->state = YXMLS_comment0; 726 x->nextstate = YXMLS_misc3; 727 return YXML_OK; 728 } 729 if(ch == (unsigned char)'?') { 730 x->state = YXMLS_pi0; 731 x->nextstate = YXMLS_misc3; 732 return YXML_OK; 733 } 734 break; 735 case YXMLS_lee1: 736 if(ch == (unsigned char)'-') { 737 x->state = YXMLS_comment1; 738 x->nextstate = YXMLS_misc1; 739 return YXML_OK; 740 } 741 if(ch == (unsigned char)'D') { 742 x->state = YXMLS_string; 743 x->nextstate = YXMLS_dt0; 744 x->string = (unsigned char *)"OCTYPE"; 745 return YXML_OK; 746 } 747 break; 748 case YXMLS_lee2: 749 if(ch == (unsigned char)'-') { 750 x->state = YXMLS_comment1; 751 x->nextstate = YXMLS_misc2; 752 return YXML_OK; 753 } 754 if(ch == (unsigned char)'[') { 755 x->state = YXMLS_string; 756 x->nextstate = YXMLS_cd0; 757 x->string = (unsigned char *)"CDATA["; 758 return YXML_OK; 759 } 760 break; 761 case YXMLS_leq0: 762 if(ch == (unsigned char)'x') { 763 x->state = YXMLS_xmldecl0; 764 x->nextstate = YXMLS_misc1; 765 return yxml_pistart(x, ch); 766 } 767 if(yxml_isNameStart(ch)) { 768 x->state = YXMLS_pi1; 769 x->nextstate = YXMLS_misc1; 770 return yxml_pistart(x, ch); 771 } 772 break; 773 case YXMLS_misc0: 774 if(yxml_isSP(ch)) 775 return YXML_OK; 776 if(ch == (unsigned char)'<') { 777 x->state = YXMLS_le0; 778 return YXML_OK; 779 } 780 break; 781 case YXMLS_misc1: 782 if(yxml_isSP(ch)) 783 return YXML_OK; 784 if(ch == (unsigned char)'<') { 785 x->state = YXMLS_le1; 786 return YXML_OK; 787 } 788 break; 789 case YXMLS_misc2: 790 if(ch == (unsigned char)'<') { 791 x->state = YXMLS_le2; 792 return YXML_OK; 793 } 794 if(ch == (unsigned char)'&') { 795 x->state = YXMLS_misc2a; 796 return yxml_refstart(x, ch); 797 } 798 if(yxml_isChar(ch)) 799 return yxml_datacontent(x, ch); 800 break; 801 case YXMLS_misc2a: 802 if(yxml_isRef(ch)) 803 return yxml_ref(x, ch); 804 if(ch == (unsigned char)'\x3b') { 805 x->state = YXMLS_misc2; 806 return yxml_refcontent(x, ch); 807 } 808 break; 809 case YXMLS_misc3: 810 if(yxml_isSP(ch)) 811 return YXML_OK; 812 if(ch == (unsigned char)'<') { 813 x->state = YXMLS_le3; 814 return YXML_OK; 815 } 816 break; 817 case YXMLS_pi0: 818 if(yxml_isNameStart(ch)) { 819 x->state = YXMLS_pi1; 820 return yxml_pistart(x, ch); 821 } 822 break; 823 case YXMLS_pi1: 824 if(yxml_isName(ch)) 825 return yxml_piname(x, ch); 826 if(ch == (unsigned char)'?') { 827 x->state = YXMLS_pi4; 828 return yxml_pinameend(x, ch); 829 } 830 if(yxml_isSP(ch)) { 831 x->state = YXMLS_pi2; 832 return yxml_pinameend(x, ch); 833 } 834 break; 835 case YXMLS_pi2: 836 if(ch == (unsigned char)'?') { 837 x->state = YXMLS_pi3; 838 return YXML_OK; 839 } 840 if(yxml_isChar(ch)) 841 return yxml_datapi1(x, ch); 842 break; 843 case YXMLS_pi3: 844 if(ch == (unsigned char)'>') { 845 x->state = x->nextstate; 846 return yxml_pivalend(x, ch); 847 } 848 if(yxml_isChar(ch)) { 849 x->state = YXMLS_pi2; 850 return yxml_datapi2(x, ch); 851 } 852 break; 853 case YXMLS_pi4: 854 if(ch == (unsigned char)'>') { 855 x->state = x->nextstate; 856 return yxml_pivalend(x, ch); 857 } 858 break; 859 case YXMLS_std0: 860 if(yxml_isSP(ch)) 861 return YXML_OK; 862 if(ch == (unsigned char)'=') { 863 x->state = YXMLS_std1; 864 return YXML_OK; 865 } 866 break; 867 case YXMLS_std1: 868 if(yxml_isSP(ch)) 869 return YXML_OK; 870 if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { 871 x->state = YXMLS_std2; 872 x->quote = ch; 873 return YXML_OK; 874 } 875 break; 876 case YXMLS_std2: 877 if(ch == (unsigned char)'y') { 878 x->state = YXMLS_string; 879 x->nextstate = YXMLS_std3; 880 x->string = (unsigned char *)"es"; 881 return YXML_OK; 882 } 883 if(ch == (unsigned char)'n') { 884 x->state = YXMLS_string; 885 x->nextstate = YXMLS_std3; 886 x->string = (unsigned char *)"o"; 887 return YXML_OK; 888 } 889 break; 890 case YXMLS_std3: 891 if(x->quote == ch) { 892 x->state = YXMLS_xmldecl8; 893 return YXML_OK; 894 } 895 break; 896 case YXMLS_ver0: 897 if(yxml_isSP(ch)) 898 return YXML_OK; 899 if(ch == (unsigned char)'=') { 900 x->state = YXMLS_ver1; 901 return YXML_OK; 902 } 903 break; 904 case YXMLS_ver1: 905 if(yxml_isSP(ch)) 906 return YXML_OK; 907 if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { 908 x->state = YXMLS_string; 909 x->quote = ch; 910 x->nextstate = YXMLS_ver2; 911 x->string = (unsigned char *)"1."; 912 return YXML_OK; 913 } 914 break; 915 case YXMLS_ver2: 916 if(yxml_isNum(ch)) { 917 x->state = YXMLS_ver3; 918 return YXML_OK; 919 } 920 break; 921 case YXMLS_ver3: 922 if(yxml_isNum(ch)) 923 return YXML_OK; 924 if(x->quote == ch) { 925 x->state = YXMLS_xmldecl4; 926 return YXML_OK; 927 } 928 break; 929 case YXMLS_xmldecl0: 930 if(ch == (unsigned char)'m') { 931 x->state = YXMLS_xmldecl1; 932 return yxml_piname(x, ch); 933 } 934 if(yxml_isName(ch)) { 935 x->state = YXMLS_pi1; 936 return yxml_piname(x, ch); 937 } 938 if(ch == (unsigned char)'?') { 939 x->state = YXMLS_pi4; 940 return yxml_pinameend(x, ch); 941 } 942 if(yxml_isSP(ch)) { 943 x->state = YXMLS_pi2; 944 return yxml_pinameend(x, ch); 945 } 946 break; 947 case YXMLS_xmldecl1: 948 if(ch == (unsigned char)'l') { 949 x->state = YXMLS_xmldecl2; 950 return yxml_piname(x, ch); 951 } 952 if(yxml_isName(ch)) { 953 x->state = YXMLS_pi1; 954 return yxml_piname(x, ch); 955 } 956 if(ch == (unsigned char)'?') { 957 x->state = YXMLS_pi4; 958 return yxml_pinameend(x, ch); 959 } 960 if(yxml_isSP(ch)) { 961 x->state = YXMLS_pi2; 962 return yxml_pinameend(x, ch); 963 } 964 break; 965 case YXMLS_xmldecl2: 966 if(yxml_isSP(ch)) { 967 x->state = YXMLS_xmldecl3; 968 return yxml_piabort(x, ch); 969 } 970 if(yxml_isName(ch)) { 971 x->state = YXMLS_pi1; 972 return yxml_piname(x, ch); 973 } 974 break; 975 case YXMLS_xmldecl3: 976 if(yxml_isSP(ch)) 977 return YXML_OK; 978 if(ch == (unsigned char)'v') { 979 x->state = YXMLS_string; 980 x->nextstate = YXMLS_ver0; 981 x->string = (unsigned char *)"ersion"; 982 return YXML_OK; 983 } 984 break; 985 case YXMLS_xmldecl4: 986 if(yxml_isSP(ch)) { 987 x->state = YXMLS_xmldecl5; 988 return YXML_OK; 989 } 990 if(ch == (unsigned char)'?') { 991 x->state = YXMLS_xmldecl9; 992 return YXML_OK; 993 } 994 break; 995 case YXMLS_xmldecl5: 996 if(yxml_isSP(ch)) 997 return YXML_OK; 998 if(ch == (unsigned char)'?') { 999 x->state = YXMLS_xmldecl9; 1000 return YXML_OK; 1001 } 1002 if(ch == (unsigned char)'e') { 1003 x->state = YXMLS_string; 1004 x->nextstate = YXMLS_enc0; 1005 x->string = (unsigned char *)"ncoding"; 1006 return YXML_OK; 1007 } 1008 if(ch == (unsigned char)'s') { 1009 x->state = YXMLS_string; 1010 x->nextstate = YXMLS_std0; 1011 x->string = (unsigned char *)"tandalone"; 1012 return YXML_OK; 1013 } 1014 break; 1015 case YXMLS_xmldecl6: 1016 if(yxml_isSP(ch)) { 1017 x->state = YXMLS_xmldecl7; 1018 return YXML_OK; 1019 } 1020 if(ch == (unsigned char)'?') { 1021 x->state = YXMLS_xmldecl9; 1022 return YXML_OK; 1023 } 1024 break; 1025 case YXMLS_xmldecl7: 1026 if(yxml_isSP(ch)) 1027 return YXML_OK; 1028 if(ch == (unsigned char)'?') { 1029 x->state = YXMLS_xmldecl9; 1030 return YXML_OK; 1031 } 1032 if(ch == (unsigned char)'s') { 1033 x->state = YXMLS_string; 1034 x->nextstate = YXMLS_std0; 1035 x->string = (unsigned char *)"tandalone"; 1036 return YXML_OK; 1037 } 1038 break; 1039 case YXMLS_xmldecl8: 1040 if(yxml_isSP(ch)) 1041 return YXML_OK; 1042 if(ch == (unsigned char)'?') { 1043 x->state = YXMLS_xmldecl9; 1044 return YXML_OK; 1045 } 1046 break; 1047 case YXMLS_xmldecl9: 1048 if(ch == (unsigned char)'>') { 1049 x->state = YXMLS_misc1; 1050 return YXML_OK; 1051 } 1052 break; 1053 } 1054 return YXML_ESYN; 1055 } 1056 1057 1058 yxml_ret_t yxml_eof(yxml_t *x) { 1059 if(x->state != YXMLS_misc3) 1060 return YXML_EEOF; 1061 return YXML_OK; 1062 } 1063 1064 1065 /* vim: set noet sw=4 ts=4: */ 1066