1 /* THIS FILE IS AUTOMATICALLY GENERATED, DO NOT EDIT! */ 2 3 /* Copyright (c) 2013-2014 Yoran Heling 4 5 Permission is hereby granted, free of charge, to any person obtaining 6 a copy of this software and associated documentation files (the 7 "Software"), to deal in the Software without restriction, including 8 without limitation the rights to use, copy, modify, merge, publish, 9 distribute, sublicense, and/or sell copies of the Software, and to 10 permit persons to whom the Software is furnished to do so, subject to 11 the following conditions: 12 13 The above copyright notice and this permission notice shall be included 14 in all copies or substantial portions of the Software. 15 16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25 #include <yxml.h> 26 #include <string.h> 27 28 typedef enum { 29 YXMLS_string, 30 YXMLS_attr0, 31 YXMLS_attr1, 32 YXMLS_attr2, 33 YXMLS_attr3, 34 YXMLS_attr4, 35 YXMLS_cd0, 36 YXMLS_cd1, 37 YXMLS_cd2, 38 YXMLS_comment0, 39 YXMLS_comment1, 40 YXMLS_comment2, 41 YXMLS_comment3, 42 YXMLS_comment4, 43 YXMLS_dt0, 44 YXMLS_dt1, 45 YXMLS_dt2, 46 YXMLS_dt3, 47 YXMLS_dt4, 48 YXMLS_elem0, 49 YXMLS_elem1, 50 YXMLS_elem2, 51 YXMLS_elem3, 52 YXMLS_enc0, 53 YXMLS_enc1, 54 YXMLS_enc2, 55 YXMLS_enc3, 56 YXMLS_etag0, 57 YXMLS_etag1, 58 YXMLS_etag2, 59 YXMLS_init, 60 YXMLS_le0, 61 YXMLS_le1, 62 YXMLS_le2, 63 YXMLS_le3, 64 YXMLS_lee1, 65 YXMLS_lee2, 66 YXMLS_leq0, 67 YXMLS_misc0, 68 YXMLS_misc1, 69 YXMLS_misc2, 70 YXMLS_misc2a, 71 YXMLS_misc3, 72 YXMLS_pi0, 73 YXMLS_pi1, 74 YXMLS_pi2, 75 YXMLS_pi3, 76 YXMLS_pi4, 77 YXMLS_std0, 78 YXMLS_std1, 79 YXMLS_std2, 80 YXMLS_std3, 81 YXMLS_ver0, 82 YXMLS_ver1, 83 YXMLS_ver2, 84 YXMLS_ver3, 85 YXMLS_xmldecl0, 86 YXMLS_xmldecl1, 87 YXMLS_xmldecl2, 88 YXMLS_xmldecl3, 89 YXMLS_xmldecl4, 90 YXMLS_xmldecl5, 91 YXMLS_xmldecl6, 92 YXMLS_xmldecl7, 93 YXMLS_xmldecl8, 94 YXMLS_xmldecl9 95 } yxml_state_t; 96 97 98 #define yxml_isChar(c) 1 99 /* 0xd should be part of SP, too, but yxml_parse() already normalizes that into 0xa */ 100 #define yxml_isSP(c) (c == 0x20 || c == 0x09 || c == 0x0a) 101 #define yxml_isAlpha(c) ((c|32)-'a' < 26) 102 #define yxml_isNum(c) (c-'0' < 10) 103 #define yxml_isHex(c) (yxml_isNum(c) || (c|32)-'a' < 6) 104 #define yxml_isEncName(c) (yxml_isAlpha(c) || yxml_isNum(c) || c == '.' || c == '_' || c == '-') 105 #define yxml_isNameStart(c) (yxml_isAlpha(c) || c == ':' || c == '_' || c >= 128) 106 #define yxml_isName(c) (yxml_isNameStart(c) || yxml_isNum(c) || c == '-' || c == '.') 107 /* XXX: The valid characters are dependent on the quote char, hence the access to x->quote */ 108 #define yxml_isAttValue(c) (yxml_isChar(c) && c != x->quote && c != '<' && c != '&') 109 /* Anything between '&' and ';', the yxml_ref* functions will do further 110 * validation. Strictly speaking, this is "yxml_isName(c) || c == '#'", but 111 * this parser doesn't understand entities with '.', ':', etc, anwyay. */ 112 #define yxml_isRef(c) (yxml_isNum(c) || yxml_isAlpha(c) || c == '#') 113 114 #define INTFROM5CHARS(a, b, c, d, e) ((((uint64_t)(a))<<32) | (((uint64_t)(b))<<24) | (((uint64_t)(c))<<16) | (((uint64_t)(d))<<8) | (uint64_t)(e)) 115 116 117 /* Set the given char value to ch (0<=ch<=255). 118 * This can't be done with simple assignment because char may be signed, and 119 * unsigned-to-signed overflow is implementation defined in C. This function 120 * /looks/ inefficient, but gcc compiles it down to a single movb instruction 121 * on x86, even with -O0. */ 122 static inline void yxml_setchar(char *dest, unsigned ch) { 123 unsigned char _ch = ch; 124 memcpy(dest, &_ch, 1); 125 } 126 127 128 /* Similar to yxml_setchar(), but will convert ch (any valid unicode point) to 129 * UTF-8 and appends a '\0'. dest must have room for at least 5 bytes. */ 130 static void yxml_setutf8(char *dest, unsigned ch) { 131 if(ch <= 0x007F) 132 yxml_setchar(dest++, ch); 133 else if(ch <= 0x07FF) { 134 yxml_setchar(dest++, 0xC0 | (ch>>6)); 135 yxml_setchar(dest++, 0x80 | (ch & 0x3F)); 136 } else if(ch <= 0xFFFF) { 137 yxml_setchar(dest++, 0xE0 | (ch>>12)); 138 yxml_setchar(dest++, 0x80 | ((ch>>6) & 0x3F)); 139 yxml_setchar(dest++, 0x80 | (ch & 0x3F)); 140 } else { 141 yxml_setchar(dest++, 0xF0 | (ch>>18)); 142 yxml_setchar(dest++, 0x80 | ((ch>>12) & 0x3F)); 143 yxml_setchar(dest++, 0x80 | ((ch>>6) & 0x3F)); 144 yxml_setchar(dest++, 0x80 | (ch & 0x3F)); 145 } 146 *dest = 0; 147 } 148 149 150 static inline yxml_ret_t yxml_datacontent(yxml_t *x, unsigned ch) { 151 yxml_setchar(x->data, ch); 152 x->data[1] = 0; 153 return YXML_CONTENT; 154 } 155 156 157 static inline yxml_ret_t yxml_datapi1(yxml_t *x, unsigned ch) { 158 yxml_setchar(x->data, ch); 159 x->data[1] = 0; 160 return YXML_PICONTENT; 161 } 162 163 164 static inline yxml_ret_t yxml_datapi2(yxml_t *x, unsigned ch) { 165 x->data[0] = '?'; 166 yxml_setchar(x->data+1, ch); 167 x->data[2] = 0; 168 return YXML_PICONTENT; 169 } 170 171 172 static inline yxml_ret_t yxml_datacd1(yxml_t *x, unsigned ch) { 173 x->data[0] = ']'; 174 yxml_setchar(x->data+1, ch); 175 x->data[2] = 0; 176 return YXML_CONTENT; 177 } 178 179 180 static inline yxml_ret_t yxml_datacd2(yxml_t *x, unsigned ch) { 181 x->data[0] = ']'; 182 x->data[1] = ']'; 183 yxml_setchar(x->data+2, ch); 184 x->data[3] = 0; 185 return YXML_CONTENT; 186 } 187 188 189 static inline yxml_ret_t yxml_dataattr(yxml_t *x, unsigned ch) { 190 /* Normalize attribute values according to the XML spec section 3.3.3. */ 191 yxml_setchar(x->data, ch == 0x9 || ch == 0xa ? 0x20 : ch); 192 x->data[1] = 0; 193 return YXML_ATTRVAL; 194 } 195 196 197 static yxml_ret_t yxml_pushstack(yxml_t *x, char **res, unsigned ch) { 198 if(x->stacklen+2 >= x->stacksize) 199 return YXML_ESTACK; 200 x->stacklen++; 201 *res = (char *)x->stack+x->stacklen; 202 x->stack[x->stacklen] = ch; 203 x->stacklen++; 204 x->stack[x->stacklen] = 0; 205 return YXML_OK; 206 } 207 208 209 static yxml_ret_t yxml_pushstackc(yxml_t *x, unsigned ch) { 210 if(x->stacklen+1 >= x->stacksize) 211 return YXML_ESTACK; 212 x->stack[x->stacklen] = ch; 213 x->stacklen++; 214 x->stack[x->stacklen] = 0; 215 return YXML_OK; 216 } 217 218 219 static void yxml_popstack(yxml_t *x) { 220 do 221 x->stacklen--; 222 while(x->stack[x->stacklen]); 223 } 224 225 226 static inline yxml_ret_t yxml_elemstart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->elem, ch); } 227 static inline yxml_ret_t yxml_elemname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); } 228 static inline yxml_ret_t yxml_elemnameend(yxml_t *x, unsigned ch) { return YXML_ELEMSTART; } 229 230 231 /* Also used in yxml_elemcloseend(), since this function just removes the last 232 * element from the stack and returns ELEMEND. */ 233 static yxml_ret_t yxml_selfclose(yxml_t *x, unsigned ch) { 234 yxml_popstack(x); 235 if(x->stacklen) { 236 x->elem = (char *)x->stack+x->stacklen-1; 237 while(*(x->elem-1)) 238 x->elem--; 239 return YXML_ELEMEND; 240 } 241 x->elem = (char *)x->stack; 242 x->state = YXMLS_misc3; 243 return YXML_ELEMEND; 244 } 245 246 247 static inline yxml_ret_t yxml_elemclose(yxml_t *x, unsigned ch) { 248 if(*((unsigned char *)x->elem) != ch) 249 return YXML_ECLOSE; 250 x->elem++; 251 return YXML_OK; 252 } 253 254 255 static inline yxml_ret_t yxml_elemcloseend(yxml_t *x, unsigned ch) { 256 if(*x->elem) 257 return YXML_ECLOSE; 258 return yxml_selfclose(x, ch); 259 } 260 261 262 static inline yxml_ret_t yxml_attrstart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->attr, ch); } 263 static inline yxml_ret_t yxml_attrname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); } 264 static inline yxml_ret_t yxml_attrnameend(yxml_t *x, unsigned ch) { return YXML_ATTRSTART; } 265 static inline yxml_ret_t yxml_attrvalend (yxml_t *x, unsigned ch) { yxml_popstack(x); return YXML_ATTREND; } 266 267 268 static inline yxml_ret_t yxml_pistart (yxml_t *x, unsigned ch) { return yxml_pushstack(x, &x->pi, ch); } 269 static inline yxml_ret_t yxml_piname (yxml_t *x, unsigned ch) { return yxml_pushstackc(x, ch); } 270 static inline yxml_ret_t yxml_piabort (yxml_t *x, unsigned ch) { yxml_popstack(x); return YXML_OK; } 271 static inline yxml_ret_t yxml_pinameend(yxml_t *x, unsigned ch) { 272 return (x->pi[0]|32) == 'x' && (x->pi[1]|32) == 'm' && (x->pi[2]|32) == 'l' && !x->pi[3] ? YXML_ESYN : YXML_PISTART; 273 } 274 static inline yxml_ret_t yxml_pivalend (yxml_t *x, unsigned ch) { yxml_popstack(x); x->pi = (char *)x->stack; return YXML_PIEND; } 275 276 277 static inline yxml_ret_t yxml_refstart(yxml_t *x, unsigned ch) { 278 memset(x->data, 0, sizeof(x->data)); 279 x->reflen = 0; 280 return YXML_OK; 281 } 282 283 284 static yxml_ret_t yxml_ref(yxml_t *x, unsigned ch) { 285 if(x->reflen >= sizeof(x->data)-1) 286 return YXML_EREF; 287 yxml_setchar(x->data+x->reflen, ch); 288 x->reflen++; 289 return YXML_OK; 290 } 291 292 293 static yxml_ret_t yxml_refend(yxml_t *x, yxml_ret_t ret) { 294 unsigned char *r = (unsigned char *)x->data; 295 unsigned ch = 0; 296 if(*r == '#') { 297 if(r[1] == 'x') 298 for(r += 2; yxml_isHex((unsigned)*r); r++) 299 ch = (ch<<4) + (*r <= '9' ? *r-'0' : (*r|32)-'a' + 10); 300 else 301 for(r++; yxml_isNum((unsigned)*r); r++) 302 ch = (ch*10) + (*r-'0'); 303 if(*r) 304 ch = 0; 305 } else { 306 uint64_t i = INTFROM5CHARS(r[0], r[1], r[2], r[3], r[4]); 307 ch = 308 i == INTFROM5CHARS('l','t', 0, 0, 0) ? '<' : 309 i == INTFROM5CHARS('g','t', 0, 0, 0) ? '>' : 310 i == INTFROM5CHARS('a','m','p', 0, 0) ? '&' : 311 i == INTFROM5CHARS('a','p','o','s',0) ? '\'': 312 i == INTFROM5CHARS('q','u','o','t',0) ? '"' : 0; 313 } 314 315 /* Codepoints not allowed in the XML 1.1 definition of a Char */ 316 if(!ch || ch > 0x10FFFF || ch == 0xFFFE || ch == 0xFFFF || (ch-0xDFFF) < 0x7FF) 317 return YXML_EREF; 318 yxml_setutf8(x->data, ch); 319 return ret; 320 } 321 322 323 static inline yxml_ret_t yxml_refcontent(yxml_t *x, unsigned ch) { return yxml_refend(x, YXML_CONTENT); } 324 static inline yxml_ret_t yxml_refattrval(yxml_t *x, unsigned ch) { return yxml_refend(x, YXML_ATTRVAL); } 325 326 327 void yxml_init(yxml_t *x, void *stack, size_t stacksize) { 328 memset(x, 0, sizeof(*x)); 329 x->line = 1; 330 x->stack = stack; 331 x->stacksize = stacksize; 332 *x->stack = 0; 333 x->elem = x->pi = x->attr = (char *)x->stack; 334 x->state = YXMLS_init; 335 } 336 337 338 yxml_ret_t yxml_parse(yxml_t *x, int _ch) { 339 /* Ensure that characters are in the range of 0..255 rather than -126..125. 340 * All character comparisons are done with positive integers. */ 341 unsigned ch = (unsigned)(_ch+256) & 0xff; 342 if(!ch) 343 return YXML_ESYN; 344 x->total++; 345 346 /* End-of-Line normalization, "\rX", "\r\n" and "\n" are recognized and 347 * normalized to a single '\n' as per XML 1.0 section 2.11. XML 1.1 adds 348 * some non-ASCII character sequences to this list, but we can only handle 349 * ASCII here without making assumptions about the input encoding. */ 350 if(x->ignore == ch) { 351 x->ignore = 0; 352 return YXML_OK; 353 } 354 x->ignore = (ch == 0xd) * 0xa; 355 if(ch == 0xa || ch == 0xd) { 356 ch = 0xa; 357 x->line++; 358 x->byte = 0; 359 } 360 x->byte++; 361 362 switch((yxml_state_t)x->state) { 363 case YXMLS_string: 364 if(ch == *x->string) { 365 x->string++; 366 if(!*x->string) 367 x->state = x->nextstate; 368 return YXML_OK; 369 } 370 break; 371 case YXMLS_attr0: 372 if(yxml_isName(ch)) 373 return yxml_attrname(x, ch); 374 if(yxml_isSP(ch)) { 375 x->state = YXMLS_attr1; 376 return yxml_attrnameend(x, ch); 377 } 378 if(ch == (unsigned char)'=') { 379 x->state = YXMLS_attr2; 380 return yxml_attrnameend(x, ch); 381 } 382 break; 383 case YXMLS_attr1: 384 if(yxml_isSP(ch)) 385 return YXML_OK; 386 if(ch == (unsigned char)'=') { 387 x->state = YXMLS_attr2; 388 return YXML_OK; 389 } 390 break; 391 case YXMLS_attr2: 392 if(yxml_isSP(ch)) 393 return YXML_OK; 394 if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { 395 x->state = YXMLS_attr3; 396 x->quote = ch; 397 return YXML_OK; 398 } 399 break; 400 case YXMLS_attr3: 401 if(yxml_isAttValue(ch)) 402 return yxml_dataattr(x, ch); 403 if(ch == (unsigned char)'&') { 404 x->state = YXMLS_attr4; 405 return yxml_refstart(x, ch); 406 } 407 if(x->quote == ch) { 408 x->state = YXMLS_elem2; 409 return yxml_attrvalend(x, ch); 410 } 411 break; 412 case YXMLS_attr4: 413 if(yxml_isRef(ch)) 414 return yxml_ref(x, ch); 415 if(ch == (unsigned char)'\x3b') { 416 x->state = YXMLS_attr3; 417 return yxml_refattrval(x, ch); 418 } 419 break; 420 case YXMLS_cd0: 421 if(ch == (unsigned char)']') { 422 x->state = YXMLS_cd1; 423 return YXML_OK; 424 } 425 if(yxml_isChar(ch)) 426 return yxml_datacontent(x, ch); 427 break; 428 case YXMLS_cd1: 429 if(ch == (unsigned char)']') { 430 x->state = YXMLS_cd2; 431 return YXML_OK; 432 } 433 if(yxml_isChar(ch)) { 434 x->state = YXMLS_cd0; 435 return yxml_datacd1(x, ch); 436 } 437 break; 438 case YXMLS_cd2: 439 if(ch == (unsigned char)']') 440 return yxml_datacontent(x, ch); 441 if(ch == (unsigned char)'>') { 442 x->state = YXMLS_misc2; 443 return YXML_OK; 444 } 445 if(yxml_isChar(ch)) { 446 x->state = YXMLS_cd0; 447 return yxml_datacd2(x, ch); 448 } 449 break; 450 case YXMLS_comment0: 451 if(ch == (unsigned char)'-') { 452 x->state = YXMLS_comment1; 453 return YXML_OK; 454 } 455 break; 456 case YXMLS_comment1: 457 if(ch == (unsigned char)'-') { 458 x->state = YXMLS_comment2; 459 return YXML_OK; 460 } 461 break; 462 case YXMLS_comment2: 463 if(ch == (unsigned char)'-') { 464 x->state = YXMLS_comment3; 465 return YXML_OK; 466 } 467 if(yxml_isChar(ch)) 468 return YXML_OK; 469 break; 470 case YXMLS_comment3: 471 if(ch == (unsigned char)'-') { 472 x->state = YXMLS_comment4; 473 return YXML_OK; 474 } 475 if(yxml_isChar(ch)) { 476 x->state = YXMLS_comment2; 477 return YXML_OK; 478 } 479 break; 480 case YXMLS_comment4: 481 if(ch == (unsigned char)'>') { 482 x->state = x->nextstate; 483 return YXML_OK; 484 } 485 break; 486 case YXMLS_dt0: 487 if(ch == (unsigned char)'>') { 488 x->state = YXMLS_misc1; 489 return YXML_OK; 490 } 491 if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { 492 x->state = YXMLS_dt1; 493 x->quote = ch; 494 x->nextstate = YXMLS_dt0; 495 return YXML_OK; 496 } 497 if(ch == (unsigned char)'<') { 498 x->state = YXMLS_dt2; 499 return YXML_OK; 500 } 501 if(yxml_isChar(ch)) 502 return YXML_OK; 503 break; 504 case YXMLS_dt1: 505 if(x->quote == ch) { 506 x->state = x->nextstate; 507 return YXML_OK; 508 } 509 if(yxml_isChar(ch)) 510 return YXML_OK; 511 break; 512 case YXMLS_dt2: 513 if(ch == (unsigned char)'?') { 514 x->state = YXMLS_pi0; 515 x->nextstate = YXMLS_dt0; 516 return YXML_OK; 517 } 518 if(ch == (unsigned char)'!') { 519 x->state = YXMLS_dt3; 520 return YXML_OK; 521 } 522 break; 523 case YXMLS_dt3: 524 if(ch == (unsigned char)'-') { 525 x->state = YXMLS_comment1; 526 x->nextstate = YXMLS_dt0; 527 return YXML_OK; 528 } 529 if(yxml_isChar(ch)) { 530 x->state = YXMLS_dt4; 531 return YXML_OK; 532 } 533 break; 534 case YXMLS_dt4: 535 if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { 536 x->state = YXMLS_dt1; 537 x->quote = ch; 538 x->nextstate = YXMLS_dt4; 539 return YXML_OK; 540 } 541 if(ch == (unsigned char)'>') { 542 x->state = YXMLS_dt0; 543 return YXML_OK; 544 } 545 if(yxml_isChar(ch)) 546 return YXML_OK; 547 break; 548 case YXMLS_elem0: 549 if(yxml_isName(ch)) 550 return yxml_elemname(x, ch); 551 if(yxml_isSP(ch)) { 552 x->state = YXMLS_elem1; 553 return yxml_elemnameend(x, ch); 554 } 555 if(ch == (unsigned char)'/') { 556 x->state = YXMLS_elem3; 557 return yxml_elemnameend(x, ch); 558 } 559 if(ch == (unsigned char)'>') { 560 x->state = YXMLS_misc2; 561 return yxml_elemnameend(x, ch); 562 } 563 break; 564 case YXMLS_elem1: 565 if(yxml_isSP(ch)) 566 return YXML_OK; 567 if(ch == (unsigned char)'/') { 568 x->state = YXMLS_elem3; 569 return YXML_OK; 570 } 571 if(ch == (unsigned char)'>') { 572 x->state = YXMLS_misc2; 573 return YXML_OK; 574 } 575 if(yxml_isNameStart(ch)) { 576 x->state = YXMLS_attr0; 577 return yxml_attrstart(x, ch); 578 } 579 break; 580 case YXMLS_elem2: 581 if(yxml_isSP(ch)) { 582 x->state = YXMLS_elem1; 583 return YXML_OK; 584 } 585 if(ch == (unsigned char)'/') { 586 x->state = YXMLS_elem3; 587 return YXML_OK; 588 } 589 if(ch == (unsigned char)'>') { 590 x->state = YXMLS_misc2; 591 return YXML_OK; 592 } 593 break; 594 case YXMLS_elem3: 595 if(ch == (unsigned char)'>') { 596 x->state = YXMLS_misc2; 597 return yxml_selfclose(x, ch); 598 } 599 break; 600 case YXMLS_enc0: 601 if(yxml_isSP(ch)) 602 return YXML_OK; 603 if(ch == (unsigned char)'=') { 604 x->state = YXMLS_enc1; 605 return YXML_OK; 606 } 607 break; 608 case YXMLS_enc1: 609 if(yxml_isSP(ch)) 610 return YXML_OK; 611 if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { 612 x->state = YXMLS_enc2; 613 x->quote = ch; 614 return YXML_OK; 615 } 616 break; 617 case YXMLS_enc2: 618 if(yxml_isAlpha(ch)) { 619 x->state = YXMLS_enc3; 620 return YXML_OK; 621 } 622 break; 623 case YXMLS_enc3: 624 if(yxml_isEncName(ch)) 625 return YXML_OK; 626 if(x->quote == ch) { 627 x->state = YXMLS_xmldecl6; 628 return YXML_OK; 629 } 630 break; 631 case YXMLS_etag0: 632 if(yxml_isNameStart(ch)) { 633 x->state = YXMLS_etag1; 634 return yxml_elemclose(x, ch); 635 } 636 break; 637 case YXMLS_etag1: 638 if(yxml_isName(ch)) 639 return yxml_elemclose(x, ch); 640 if(yxml_isSP(ch)) { 641 x->state = YXMLS_etag2; 642 return yxml_elemcloseend(x, ch); 643 } 644 if(ch == (unsigned char)'>') { 645 x->state = YXMLS_misc2; 646 return yxml_elemcloseend(x, ch); 647 } 648 break; 649 case YXMLS_etag2: 650 if(yxml_isSP(ch)) 651 return YXML_OK; 652 if(ch == (unsigned char)'>') { 653 x->state = YXMLS_misc2; 654 return YXML_OK; 655 } 656 break; 657 case YXMLS_init: 658 if(ch == (unsigned char)'\xef') { 659 x->state = YXMLS_string; 660 x->nextstate = YXMLS_misc0; 661 x->string = (unsigned char *)"\xbb\xbf"; 662 return YXML_OK; 663 } 664 if(yxml_isSP(ch)) { 665 x->state = YXMLS_misc0; 666 return YXML_OK; 667 } 668 if(ch == (unsigned char)'<') { 669 x->state = YXMLS_le0; 670 return YXML_OK; 671 } 672 break; 673 case YXMLS_le0: 674 if(ch == (unsigned char)'!') { 675 x->state = YXMLS_lee1; 676 return YXML_OK; 677 } 678 if(ch == (unsigned char)'?') { 679 x->state = YXMLS_leq0; 680 return YXML_OK; 681 } 682 if(yxml_isNameStart(ch)) { 683 x->state = YXMLS_elem0; 684 return yxml_elemstart(x, ch); 685 } 686 break; 687 case YXMLS_le1: 688 if(ch == (unsigned char)'!') { 689 x->state = YXMLS_lee1; 690 return YXML_OK; 691 } 692 if(ch == (unsigned char)'?') { 693 x->state = YXMLS_pi0; 694 x->nextstate = YXMLS_misc1; 695 return YXML_OK; 696 } 697 if(yxml_isNameStart(ch)) { 698 x->state = YXMLS_elem0; 699 return yxml_elemstart(x, ch); 700 } 701 break; 702 case YXMLS_le2: 703 if(ch == (unsigned char)'!') { 704 x->state = YXMLS_lee2; 705 return YXML_OK; 706 } 707 if(ch == (unsigned char)'?') { 708 x->state = YXMLS_pi0; 709 x->nextstate = YXMLS_misc2; 710 return YXML_OK; 711 } 712 if(ch == (unsigned char)'/') { 713 x->state = YXMLS_etag0; 714 return YXML_OK; 715 } 716 if(yxml_isNameStart(ch)) { 717 x->state = YXMLS_elem0; 718 return yxml_elemstart(x, ch); 719 } 720 break; 721 case YXMLS_le3: 722 if(ch == (unsigned char)'!') { 723 x->state = YXMLS_comment0; 724 x->nextstate = YXMLS_misc3; 725 return YXML_OK; 726 } 727 if(ch == (unsigned char)'?') { 728 x->state = YXMLS_pi0; 729 x->nextstate = YXMLS_misc3; 730 return YXML_OK; 731 } 732 break; 733 case YXMLS_lee1: 734 if(ch == (unsigned char)'-') { 735 x->state = YXMLS_comment1; 736 x->nextstate = YXMLS_misc1; 737 return YXML_OK; 738 } 739 if(ch == (unsigned char)'D') { 740 x->state = YXMLS_string; 741 x->nextstate = YXMLS_dt0; 742 x->string = (unsigned char *)"OCTYPE"; 743 return YXML_OK; 744 } 745 break; 746 case YXMLS_lee2: 747 if(ch == (unsigned char)'-') { 748 x->state = YXMLS_comment1; 749 x->nextstate = YXMLS_misc2; 750 return YXML_OK; 751 } 752 if(ch == (unsigned char)'[') { 753 x->state = YXMLS_string; 754 x->nextstate = YXMLS_cd0; 755 x->string = (unsigned char *)"CDATA["; 756 return YXML_OK; 757 } 758 break; 759 case YXMLS_leq0: 760 if(ch == (unsigned char)'x') { 761 x->state = YXMLS_xmldecl0; 762 x->nextstate = YXMLS_misc1; 763 return yxml_pistart(x, ch); 764 } 765 if(yxml_isNameStart(ch)) { 766 x->state = YXMLS_pi1; 767 x->nextstate = YXMLS_misc1; 768 return yxml_pistart(x, ch); 769 } 770 break; 771 case YXMLS_misc0: 772 if(yxml_isSP(ch)) 773 return YXML_OK; 774 if(ch == (unsigned char)'<') { 775 x->state = YXMLS_le0; 776 return YXML_OK; 777 } 778 break; 779 case YXMLS_misc1: 780 if(yxml_isSP(ch)) 781 return YXML_OK; 782 if(ch == (unsigned char)'<') { 783 x->state = YXMLS_le1; 784 return YXML_OK; 785 } 786 break; 787 case YXMLS_misc2: 788 if(ch == (unsigned char)'<') { 789 x->state = YXMLS_le2; 790 return YXML_OK; 791 } 792 if(ch == (unsigned char)'&') { 793 x->state = YXMLS_misc2a; 794 return yxml_refstart(x, ch); 795 } 796 if(yxml_isChar(ch)) 797 return yxml_datacontent(x, ch); 798 break; 799 case YXMLS_misc2a: 800 if(yxml_isRef(ch)) 801 return yxml_ref(x, ch); 802 if(ch == (unsigned char)'\x3b') { 803 x->state = YXMLS_misc2; 804 return yxml_refcontent(x, ch); 805 } 806 break; 807 case YXMLS_misc3: 808 if(yxml_isSP(ch)) 809 return YXML_OK; 810 if(ch == (unsigned char)'<') { 811 x->state = YXMLS_le3; 812 return YXML_OK; 813 } 814 break; 815 case YXMLS_pi0: 816 if(yxml_isNameStart(ch)) { 817 x->state = YXMLS_pi1; 818 return yxml_pistart(x, ch); 819 } 820 break; 821 case YXMLS_pi1: 822 if(yxml_isName(ch)) 823 return yxml_piname(x, ch); 824 if(ch == (unsigned char)'?') { 825 x->state = YXMLS_pi4; 826 return yxml_pinameend(x, ch); 827 } 828 if(yxml_isSP(ch)) { 829 x->state = YXMLS_pi2; 830 return yxml_pinameend(x, ch); 831 } 832 break; 833 case YXMLS_pi2: 834 if(ch == (unsigned char)'?') { 835 x->state = YXMLS_pi3; 836 return YXML_OK; 837 } 838 if(yxml_isChar(ch)) 839 return yxml_datapi1(x, ch); 840 break; 841 case YXMLS_pi3: 842 if(ch == (unsigned char)'>') { 843 x->state = x->nextstate; 844 return yxml_pivalend(x, ch); 845 } 846 if(yxml_isChar(ch)) { 847 x->state = YXMLS_pi2; 848 return yxml_datapi2(x, ch); 849 } 850 break; 851 case YXMLS_pi4: 852 if(ch == (unsigned char)'>') { 853 x->state = x->nextstate; 854 return yxml_pivalend(x, ch); 855 } 856 break; 857 case YXMLS_std0: 858 if(yxml_isSP(ch)) 859 return YXML_OK; 860 if(ch == (unsigned char)'=') { 861 x->state = YXMLS_std1; 862 return YXML_OK; 863 } 864 break; 865 case YXMLS_std1: 866 if(yxml_isSP(ch)) 867 return YXML_OK; 868 if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { 869 x->state = YXMLS_std2; 870 x->quote = ch; 871 return YXML_OK; 872 } 873 break; 874 case YXMLS_std2: 875 if(ch == (unsigned char)'y') { 876 x->state = YXMLS_string; 877 x->nextstate = YXMLS_std3; 878 x->string = (unsigned char *)"es"; 879 return YXML_OK; 880 } 881 if(ch == (unsigned char)'n') { 882 x->state = YXMLS_string; 883 x->nextstate = YXMLS_std3; 884 x->string = (unsigned char *)"o"; 885 return YXML_OK; 886 } 887 break; 888 case YXMLS_std3: 889 if(x->quote == ch) { 890 x->state = YXMLS_xmldecl8; 891 return YXML_OK; 892 } 893 break; 894 case YXMLS_ver0: 895 if(yxml_isSP(ch)) 896 return YXML_OK; 897 if(ch == (unsigned char)'=') { 898 x->state = YXMLS_ver1; 899 return YXML_OK; 900 } 901 break; 902 case YXMLS_ver1: 903 if(yxml_isSP(ch)) 904 return YXML_OK; 905 if(ch == (unsigned char)'\'' || ch == (unsigned char)'"') { 906 x->state = YXMLS_string; 907 x->quote = ch; 908 x->nextstate = YXMLS_ver2; 909 x->string = (unsigned char *)"1."; 910 return YXML_OK; 911 } 912 break; 913 case YXMLS_ver2: 914 if(yxml_isNum(ch)) { 915 x->state = YXMLS_ver3; 916 return YXML_OK; 917 } 918 break; 919 case YXMLS_ver3: 920 if(yxml_isNum(ch)) 921 return YXML_OK; 922 if(x->quote == ch) { 923 x->state = YXMLS_xmldecl4; 924 return YXML_OK; 925 } 926 break; 927 case YXMLS_xmldecl0: 928 if(ch == (unsigned char)'m') { 929 x->state = YXMLS_xmldecl1; 930 return yxml_piname(x, ch); 931 } 932 if(yxml_isName(ch)) { 933 x->state = YXMLS_pi1; 934 return yxml_piname(x, ch); 935 } 936 if(ch == (unsigned char)'?') { 937 x->state = YXMLS_pi4; 938 return yxml_pinameend(x, ch); 939 } 940 if(yxml_isSP(ch)) { 941 x->state = YXMLS_pi2; 942 return yxml_pinameend(x, ch); 943 } 944 break; 945 case YXMLS_xmldecl1: 946 if(ch == (unsigned char)'l') { 947 x->state = YXMLS_xmldecl2; 948 return yxml_piname(x, ch); 949 } 950 if(yxml_isName(ch)) { 951 x->state = YXMLS_pi1; 952 return yxml_piname(x, ch); 953 } 954 if(ch == (unsigned char)'?') { 955 x->state = YXMLS_pi4; 956 return yxml_pinameend(x, ch); 957 } 958 if(yxml_isSP(ch)) { 959 x->state = YXMLS_pi2; 960 return yxml_pinameend(x, ch); 961 } 962 break; 963 case YXMLS_xmldecl2: 964 if(yxml_isSP(ch)) { 965 x->state = YXMLS_xmldecl3; 966 return yxml_piabort(x, ch); 967 } 968 if(yxml_isName(ch)) { 969 x->state = YXMLS_pi1; 970 return yxml_piname(x, ch); 971 } 972 break; 973 case YXMLS_xmldecl3: 974 if(yxml_isSP(ch)) 975 return YXML_OK; 976 if(ch == (unsigned char)'v') { 977 x->state = YXMLS_string; 978 x->nextstate = YXMLS_ver0; 979 x->string = (unsigned char *)"ersion"; 980 return YXML_OK; 981 } 982 break; 983 case YXMLS_xmldecl4: 984 if(yxml_isSP(ch)) { 985 x->state = YXMLS_xmldecl5; 986 return YXML_OK; 987 } 988 if(ch == (unsigned char)'?') { 989 x->state = YXMLS_xmldecl9; 990 return YXML_OK; 991 } 992 break; 993 case YXMLS_xmldecl5: 994 if(yxml_isSP(ch)) 995 return YXML_OK; 996 if(ch == (unsigned char)'?') { 997 x->state = YXMLS_xmldecl9; 998 return YXML_OK; 999 } 1000 if(ch == (unsigned char)'e') { 1001 x->state = YXMLS_string; 1002 x->nextstate = YXMLS_enc0; 1003 x->string = (unsigned char *)"ncoding"; 1004 return YXML_OK; 1005 } 1006 if(ch == (unsigned char)'s') { 1007 x->state = YXMLS_string; 1008 x->nextstate = YXMLS_std0; 1009 x->string = (unsigned char *)"tandalone"; 1010 return YXML_OK; 1011 } 1012 break; 1013 case YXMLS_xmldecl6: 1014 if(yxml_isSP(ch)) { 1015 x->state = YXMLS_xmldecl7; 1016 return YXML_OK; 1017 } 1018 if(ch == (unsigned char)'?') { 1019 x->state = YXMLS_xmldecl9; 1020 return YXML_OK; 1021 } 1022 break; 1023 case YXMLS_xmldecl7: 1024 if(yxml_isSP(ch)) 1025 return YXML_OK; 1026 if(ch == (unsigned char)'?') { 1027 x->state = YXMLS_xmldecl9; 1028 return YXML_OK; 1029 } 1030 if(ch == (unsigned char)'s') { 1031 x->state = YXMLS_string; 1032 x->nextstate = YXMLS_std0; 1033 x->string = (unsigned char *)"tandalone"; 1034 return YXML_OK; 1035 } 1036 break; 1037 case YXMLS_xmldecl8: 1038 if(yxml_isSP(ch)) 1039 return YXML_OK; 1040 if(ch == (unsigned char)'?') { 1041 x->state = YXMLS_xmldecl9; 1042 return YXML_OK; 1043 } 1044 break; 1045 case YXMLS_xmldecl9: 1046 if(ch == (unsigned char)'>') { 1047 x->state = YXMLS_misc1; 1048 return YXML_OK; 1049 } 1050 break; 1051 } 1052 return YXML_ESYN; 1053 } 1054 1055 1056 yxml_ret_t yxml_eof(yxml_t *x) { 1057 if(x->state != YXMLS_misc3) 1058 return YXML_EEOF; 1059 return YXML_OK; 1060 } 1061 1062 1063 /* vim: set noet sw=4 ts=4: */ 1064