1 /* 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10 Copyright (c) 2000-2017 Expat development team 11 Licensed under the MIT license: 12 13 Permission is hereby granted, free of charge, to any person obtaining 14 a copy of this software and associated documentation files (the 15 "Software"), to deal in the Software without restriction, including 16 without limitation the rights to use, copy, modify, merge, publish, 17 distribute, sublicense, and/or sell copies of the Software, and to permit 18 persons to whom the Software is furnished to do so, subject to the 19 following conditions: 20 21 The above copyright notice and this permission notice shall be included 22 in all copies or substantial portions of the Software. 23 24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 27 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 28 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 29 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 30 USE OR OTHER DEALINGS IN THE SOFTWARE. 31 */ 32 33 #ifndef Expat_INCLUDED 34 #define Expat_INCLUDED 1 35 36 #include <stdlib.h> 37 #include "expat_external.h" 38 39 #ifdef __cplusplus 40 extern "C" { 41 #endif 42 43 struct XML_ParserStruct; 44 typedef struct XML_ParserStruct *XML_Parser; 45 46 typedef unsigned char XML_Bool; 47 #define XML_TRUE ((XML_Bool)1) 48 #define XML_FALSE ((XML_Bool)0) 49 50 /* The XML_Status enum gives the possible return values for several 51 API functions. The preprocessor #defines are included so this 52 stanza can be added to code that still needs to support older 53 versions of Expat 1.95.x: 54 55 #ifndef XML_STATUS_OK 56 #define XML_STATUS_OK 1 57 #define XML_STATUS_ERROR 0 58 #endif 59 60 Otherwise, the #define hackery is quite ugly and would have been 61 dropped. 62 */ 63 enum XML_Status { 64 XML_STATUS_ERROR = 0, 65 #define XML_STATUS_ERROR XML_STATUS_ERROR 66 XML_STATUS_OK = 1, 67 #define XML_STATUS_OK XML_STATUS_OK 68 XML_STATUS_SUSPENDED = 2 69 #define XML_STATUS_SUSPENDED XML_STATUS_SUSPENDED 70 }; 71 72 enum XML_Error { 73 XML_ERROR_NONE, 74 XML_ERROR_NO_MEMORY, 75 XML_ERROR_SYNTAX, 76 XML_ERROR_NO_ELEMENTS, 77 XML_ERROR_INVALID_TOKEN, 78 XML_ERROR_UNCLOSED_TOKEN, 79 XML_ERROR_PARTIAL_CHAR, 80 XML_ERROR_TAG_MISMATCH, 81 XML_ERROR_DUPLICATE_ATTRIBUTE, 82 XML_ERROR_JUNK_AFTER_DOC_ELEMENT, 83 XML_ERROR_PARAM_ENTITY_REF, 84 XML_ERROR_UNDEFINED_ENTITY, 85 XML_ERROR_RECURSIVE_ENTITY_REF, 86 XML_ERROR_ASYNC_ENTITY, 87 XML_ERROR_BAD_CHAR_REF, 88 XML_ERROR_BINARY_ENTITY_REF, 89 XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, 90 XML_ERROR_MISPLACED_XML_PI, 91 XML_ERROR_UNKNOWN_ENCODING, 92 XML_ERROR_INCORRECT_ENCODING, 93 XML_ERROR_UNCLOSED_CDATA_SECTION, 94 XML_ERROR_EXTERNAL_ENTITY_HANDLING, 95 XML_ERROR_NOT_STANDALONE, 96 XML_ERROR_UNEXPECTED_STATE, 97 XML_ERROR_ENTITY_DECLARED_IN_PE, 98 XML_ERROR_FEATURE_REQUIRES_XML_DTD, 99 XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING, 100 /* Added in 1.95.7. */ 101 XML_ERROR_UNBOUND_PREFIX, 102 /* Added in 1.95.8. */ 103 XML_ERROR_UNDECLARING_PREFIX, 104 XML_ERROR_INCOMPLETE_PE, 105 XML_ERROR_XML_DECL, 106 XML_ERROR_TEXT_DECL, 107 XML_ERROR_PUBLICID, 108 XML_ERROR_SUSPENDED, 109 XML_ERROR_NOT_SUSPENDED, 110 XML_ERROR_ABORTED, 111 XML_ERROR_FINISHED, 112 XML_ERROR_SUSPEND_PE, 113 /* Added in 2.0. */ 114 XML_ERROR_RESERVED_PREFIX_XML, 115 XML_ERROR_RESERVED_PREFIX_XMLNS, 116 XML_ERROR_RESERVED_NAMESPACE_URI, 117 /* Added in 2.2.1. */ 118 XML_ERROR_INVALID_ARGUMENT, 119 /* Added in 2.3.0. */ 120 XML_ERROR_NO_BUFFER 121 }; 122 123 enum XML_Content_Type { 124 XML_CTYPE_EMPTY = 1, 125 XML_CTYPE_ANY, 126 XML_CTYPE_MIXED, 127 XML_CTYPE_NAME, 128 XML_CTYPE_CHOICE, 129 XML_CTYPE_SEQ 130 }; 131 132 enum XML_Content_Quant { 133 XML_CQUANT_NONE, 134 XML_CQUANT_OPT, 135 XML_CQUANT_REP, 136 XML_CQUANT_PLUS 137 }; 138 139 /* If type == XML_CTYPE_EMPTY or XML_CTYPE_ANY, then quant will be 140 XML_CQUANT_NONE, and the other fields will be zero or NULL. 141 If type == XML_CTYPE_MIXED, then quant will be NONE or REP and 142 numchildren will contain number of elements that may be mixed in 143 and children point to an array of XML_Content cells that will be 144 all of XML_CTYPE_NAME type with no quantification. 145 146 If type == XML_CTYPE_NAME, then the name points to the name, and 147 the numchildren field will be zero and children will be NULL. The 148 quant fields indicates any quantifiers placed on the name. 149 150 CHOICE and SEQ will have name NULL, the number of children in 151 numchildren and children will point, recursively, to an array 152 of XML_Content cells. 153 154 The EMPTY, ANY, and MIXED types will only occur at top level. 155 */ 156 157 typedef struct XML_cp XML_Content; 158 159 struct XML_cp { 160 enum XML_Content_Type type; 161 enum XML_Content_Quant quant; 162 XML_Char *name; 163 unsigned int numchildren; 164 XML_Content *children; 165 }; 166 167 /* This is called for an element declaration. See above for 168 description of the model argument. It's the caller's responsibility 169 to free model when finished with it. 170 */ 171 typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData, 172 const XML_Char *name, 173 XML_Content *model); 174 175 XMLPARSEAPI(void) 176 XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl); 177 178 /* The Attlist declaration handler is called for *each* attribute. So 179 a single Attlist declaration with multiple attributes declared will 180 generate multiple calls to this handler. The "default" parameter 181 may be NULL in the case of the "#IMPLIED" or "#REQUIRED" 182 keyword. The "isrequired" parameter will be true and the default 183 value will be NULL in the case of "#REQUIRED". If "isrequired" is 184 true and default is non-NULL, then this is a "#FIXED" default. 185 */ 186 typedef void(XMLCALL *XML_AttlistDeclHandler)( 187 void *userData, const XML_Char *elname, const XML_Char *attname, 188 const XML_Char *att_type, const XML_Char *dflt, int isrequired); 189 190 XMLPARSEAPI(void) 191 XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl); 192 193 /* The XML declaration handler is called for *both* XML declarations 194 and text declarations. The way to distinguish is that the version 195 parameter will be NULL for text declarations. The encoding 196 parameter may be NULL for XML declarations. The standalone 197 parameter will be -1, 0, or 1 indicating respectively that there 198 was no standalone parameter in the declaration, that it was given 199 as no, or that it was given as yes. 200 */ 201 typedef void(XMLCALL *XML_XmlDeclHandler)(void *userData, 202 const XML_Char *version, 203 const XML_Char *encoding, 204 int standalone); 205 206 XMLPARSEAPI(void) 207 XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler xmldecl); 208 209 typedef struct { 210 void *(*malloc_fcn)(size_t size); 211 void *(*realloc_fcn)(void *ptr, size_t size); 212 void (*free_fcn)(void *ptr); 213 } XML_Memory_Handling_Suite; 214 215 /* Constructs a new parser; encoding is the encoding specified by the 216 external protocol or NULL if there is none specified. 217 */ 218 XMLPARSEAPI(XML_Parser) 219 XML_ParserCreate(const XML_Char *encoding); 220 221 /* Constructs a new parser and namespace processor. Element type 222 names and attribute names that belong to a namespace will be 223 expanded; unprefixed attribute names are never expanded; unprefixed 224 element type names are expanded only if there is a default 225 namespace. The expanded name is the concatenation of the namespace 226 URI, the namespace separator character, and the local part of the 227 name. If the namespace separator is '\0' then the namespace URI 228 and the local part will be concatenated without any separator. 229 It is a programming error to use the separator '\0' with namespace 230 triplets (see XML_SetReturnNSTriplet). 231 */ 232 XMLPARSEAPI(XML_Parser) 233 XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); 234 235 /* Constructs a new parser using the memory management suite referred to 236 by memsuite. If memsuite is NULL, then use the standard library memory 237 suite. If namespaceSeparator is non-NULL it creates a parser with 238 namespace processing as described above. The character pointed at 239 will serve as the namespace separator. 240 241 All further memory operations used for the created parser will come from 242 the given suite. 243 */ 244 XMLPARSEAPI(XML_Parser) 245 XML_ParserCreate_MM(const XML_Char *encoding, 246 const XML_Memory_Handling_Suite *memsuite, 247 const XML_Char *namespaceSeparator); 248 249 /* Prepare a parser object to be re-used. This is particularly 250 valuable when memory allocation overhead is disproportionately high, 251 such as when a large number of small documnents need to be parsed. 252 All handlers are cleared from the parser, except for the 253 unknownEncodingHandler. The parser's external state is re-initialized 254 except for the values of ns and ns_triplets. 255 256 Added in Expat 1.95.3. 257 */ 258 XMLPARSEAPI(XML_Bool) 259 XML_ParserReset(XML_Parser parser, const XML_Char *encoding); 260 261 /* atts is array of name/value pairs, terminated by 0; 262 names and values are 0 terminated. 263 */ 264 typedef void(XMLCALL *XML_StartElementHandler)(void *userData, 265 const XML_Char *name, 266 const XML_Char **atts); 267 268 typedef void(XMLCALL *XML_EndElementHandler)(void *userData, 269 const XML_Char *name); 270 271 /* s is not 0 terminated. */ 272 typedef void(XMLCALL *XML_CharacterDataHandler)(void *userData, 273 const XML_Char *s, int len); 274 275 /* target and data are 0 terminated */ 276 typedef void(XMLCALL *XML_ProcessingInstructionHandler)(void *userData, 277 const XML_Char *target, 278 const XML_Char *data); 279 280 /* data is 0 terminated */ 281 typedef void(XMLCALL *XML_CommentHandler)(void *userData, const XML_Char *data); 282 283 typedef void(XMLCALL *XML_StartCdataSectionHandler)(void *userData); 284 typedef void(XMLCALL *XML_EndCdataSectionHandler)(void *userData); 285 286 /* This is called for any characters in the XML document for which 287 there is no applicable handler. This includes both characters that 288 are part of markup which is of a kind that is not reported 289 (comments, markup declarations), or characters that are part of a 290 construct which could be reported but for which no handler has been 291 supplied. The characters are passed exactly as they were in the XML 292 document except that they will be encoded in UTF-8 or UTF-16. 293 Line boundaries are not normalized. Note that a byte order mark 294 character is not passed to the default handler. There are no 295 guarantees about how characters are divided between calls to the 296 default handler: for example, a comment might be split between 297 multiple calls. 298 */ 299 typedef void(XMLCALL *XML_DefaultHandler)(void *userData, const XML_Char *s, 300 int len); 301 302 /* This is called for the start of the DOCTYPE declaration, before 303 any DTD or internal subset is parsed. 304 */ 305 typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData, 306 const XML_Char *doctypeName, 307 const XML_Char *sysid, 308 const XML_Char *pubid, 309 int has_internal_subset); 310 311 /* This is called for the start of the DOCTYPE declaration when the 312 closing > is encountered, but after processing any external 313 subset. 314 */ 315 typedef void(XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); 316 317 /* This is called for entity declarations. The is_parameter_entity 318 argument will be non-zero if the entity is a parameter entity, zero 319 otherwise. 320 321 For internal entities (<!ENTITY foo "bar">), value will 322 be non-NULL and systemId, publicID, and notationName will be NULL. 323 The value string is NOT null-terminated; the length is provided in 324 the value_length argument. Since it is legal to have zero-length 325 values, do not use this argument to test for internal entities. 326 327 For external entities, value will be NULL and systemId will be 328 non-NULL. The publicId argument will be NULL unless a public 329 identifier was provided. The notationName argument will have a 330 non-NULL value only for unparsed entity declarations. 331 332 Note that is_parameter_entity can't be changed to XML_Bool, since 333 that would break binary compatibility. 334 */ 335 typedef void(XMLCALL *XML_EntityDeclHandler)( 336 void *userData, const XML_Char *entityName, int is_parameter_entity, 337 const XML_Char *value, int value_length, const XML_Char *base, 338 const XML_Char *systemId, const XML_Char *publicId, 339 const XML_Char *notationName); 340 341 XMLPARSEAPI(void) 342 XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler); 343 344 /* OBSOLETE -- OBSOLETE -- OBSOLETE 345 This handler has been superseded by the EntityDeclHandler above. 346 It is provided here for backward compatibility. 347 348 This is called for a declaration of an unparsed (NDATA) entity. 349 The base argument is whatever was set by XML_SetBase. The 350 entityName, systemId and notationName arguments will never be 351 NULL. The other arguments may be. 352 */ 353 typedef void(XMLCALL *XML_UnparsedEntityDeclHandler)( 354 void *userData, const XML_Char *entityName, const XML_Char *base, 355 const XML_Char *systemId, const XML_Char *publicId, 356 const XML_Char *notationName); 357 358 /* This is called for a declaration of notation. The base argument is 359 whatever was set by XML_SetBase. The notationName will never be 360 NULL. The other arguments can be. 361 */ 362 typedef void(XMLCALL *XML_NotationDeclHandler)(void *userData, 363 const XML_Char *notationName, 364 const XML_Char *base, 365 const XML_Char *systemId, 366 const XML_Char *publicId); 367 368 /* When namespace processing is enabled, these are called once for 369 each namespace declaration. The call to the start and end element 370 handlers occur between the calls to the start and end namespace 371 declaration handlers. For an xmlns attribute, prefix will be 372 NULL. For an xmlns="" attribute, uri will be NULL. 373 */ 374 typedef void(XMLCALL *XML_StartNamespaceDeclHandler)(void *userData, 375 const XML_Char *prefix, 376 const XML_Char *uri); 377 378 typedef void(XMLCALL *XML_EndNamespaceDeclHandler)(void *userData, 379 const XML_Char *prefix); 380 381 /* This is called if the document is not standalone, that is, it has an 382 external subset or a reference to a parameter entity, but does not 383 have standalone="yes". If this handler returns XML_STATUS_ERROR, 384 then processing will not continue, and the parser will return a 385 XML_ERROR_NOT_STANDALONE error. 386 If parameter entity parsing is enabled, then in addition to the 387 conditions above this handler will only be called if the referenced 388 entity was actually read. 389 */ 390 typedef int(XMLCALL *XML_NotStandaloneHandler)(void *userData); 391 392 /* This is called for a reference to an external parsed general 393 entity. The referenced entity is not automatically parsed. The 394 application can parse it immediately or later using 395 XML_ExternalEntityParserCreate. 396 397 The parser argument is the parser parsing the entity containing the 398 reference; it can be passed as the parser argument to 399 XML_ExternalEntityParserCreate. The systemId argument is the 400 system identifier as specified in the entity declaration; it will 401 not be NULL. 402 403 The base argument is the system identifier that should be used as 404 the base for resolving systemId if systemId was relative; this is 405 set by XML_SetBase; it may be NULL. 406 407 The publicId argument is the public identifier as specified in the 408 entity declaration, or NULL if none was specified; the whitespace 409 in the public identifier will have been normalized as required by 410 the XML spec. 411 412 The context argument specifies the parsing context in the format 413 expected by the context argument to XML_ExternalEntityParserCreate; 414 context is valid only until the handler returns, so if the 415 referenced entity is to be parsed later, it must be copied. 416 context is NULL only when the entity is a parameter entity. 417 418 The handler should return XML_STATUS_ERROR if processing should not 419 continue because of a fatal error in the handling of the external 420 entity. In this case the calling parser will return an 421 XML_ERROR_EXTERNAL_ENTITY_HANDLING error. 422 423 Note that unlike other handlers the first argument is the parser, 424 not userData. 425 */ 426 typedef int(XMLCALL *XML_ExternalEntityRefHandler)(XML_Parser parser, 427 const XML_Char *context, 428 const XML_Char *base, 429 const XML_Char *systemId, 430 const XML_Char *publicId); 431 432 /* This is called in two situations: 433 1) An entity reference is encountered for which no declaration 434 has been read *and* this is not an error. 435 2) An internal entity reference is read, but not expanded, because 436 XML_SetDefaultHandler has been called. 437 Note: skipped parameter entities in declarations and skipped general 438 entities in attribute values cannot be reported, because 439 the event would be out of sync with the reporting of the 440 declarations or attribute values 441 */ 442 typedef void(XMLCALL *XML_SkippedEntityHandler)(void *userData, 443 const XML_Char *entityName, 444 int is_parameter_entity); 445 446 /* This structure is filled in by the XML_UnknownEncodingHandler to 447 provide information to the parser about encodings that are unknown 448 to the parser. 449 450 The map[b] member gives information about byte sequences whose 451 first byte is b. 452 453 If map[b] is c where c is >= 0, then b by itself encodes the 454 Unicode scalar value c. 455 456 If map[b] is -1, then the byte sequence is malformed. 457 458 If map[b] is -n, where n >= 2, then b is the first byte of an 459 n-byte sequence that encodes a single Unicode scalar value. 460 461 The data member will be passed as the first argument to the convert 462 function. 463 464 The convert function is used to convert multibyte sequences; s will 465 point to a n-byte sequence where map[(unsigned char)*s] == -n. The 466 convert function must return the Unicode scalar value represented 467 by this byte sequence or -1 if the byte sequence is malformed. 468 469 The convert function may be NULL if the encoding is a single-byte 470 encoding, that is if map[b] >= -1 for all bytes b. 471 472 When the parser is finished with the encoding, then if release is 473 not NULL, it will call release passing it the data member; once 474 release has been called, the convert function will not be called 475 again. 476 477 Expat places certain restrictions on the encodings that are supported 478 using this mechanism. 479 480 1. Every ASCII character that can appear in a well-formed XML document, 481 other than the characters 482 483 $@\^`{}~ 484 485 must be represented by a single byte, and that byte must be the 486 same byte that represents that character in ASCII. 487 488 2. No character may require more than 4 bytes to encode. 489 490 3. All characters encoded must have Unicode scalar values <= 491 0xFFFF, (i.e., characters that would be encoded by surrogates in 492 UTF-16 are not allowed). Note that this restriction doesn't 493 apply to the built-in support for UTF-8 and UTF-16. 494 495 4. No Unicode character may be encoded by more than one distinct 496 sequence of bytes. 497 */ 498 typedef struct { 499 int map[256]; 500 void *data; 501 int(XMLCALL *convert)(void *data, const char *s); 502 void(XMLCALL *release)(void *data); 503 } XML_Encoding; 504 505 /* This is called for an encoding that is unknown to the parser. 506 507 The encodingHandlerData argument is that which was passed as the 508 second argument to XML_SetUnknownEncodingHandler. 509 510 The name argument gives the name of the encoding as specified in 511 the encoding declaration. 512 513 If the callback can provide information about the encoding, it must 514 fill in the XML_Encoding structure, and return XML_STATUS_OK. 515 Otherwise it must return XML_STATUS_ERROR. 516 517 If info does not describe a suitable encoding, then the parser will 518 return an XML_ERROR_UNKNOWN_ENCODING error. 519 */ 520 typedef int(XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData, 521 const XML_Char *name, 522 XML_Encoding *info); 523 524 XMLPARSEAPI(void) 525 XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, 526 XML_EndElementHandler end); 527 528 XMLPARSEAPI(void) 529 XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler handler); 530 531 XMLPARSEAPI(void) 532 XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler handler); 533 534 XMLPARSEAPI(void) 535 XML_SetCharacterDataHandler(XML_Parser parser, 536 XML_CharacterDataHandler handler); 537 538 XMLPARSEAPI(void) 539 XML_SetProcessingInstructionHandler(XML_Parser parser, 540 XML_ProcessingInstructionHandler handler); 541 XMLPARSEAPI(void) 542 XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler); 543 544 XMLPARSEAPI(void) 545 XML_SetCdataSectionHandler(XML_Parser parser, 546 XML_StartCdataSectionHandler start, 547 XML_EndCdataSectionHandler end); 548 549 XMLPARSEAPI(void) 550 XML_SetStartCdataSectionHandler(XML_Parser parser, 551 XML_StartCdataSectionHandler start); 552 553 XMLPARSEAPI(void) 554 XML_SetEndCdataSectionHandler(XML_Parser parser, 555 XML_EndCdataSectionHandler end); 556 557 /* This sets the default handler and also inhibits expansion of 558 internal entities. These entity references will be passed to the 559 default handler, or to the skipped entity handler, if one is set. 560 */ 561 XMLPARSEAPI(void) 562 XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler); 563 564 /* This sets the default handler but does not inhibit expansion of 565 internal entities. The entity reference will not be passed to the 566 default handler. 567 */ 568 XMLPARSEAPI(void) 569 XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler); 570 571 XMLPARSEAPI(void) 572 XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, 573 XML_EndDoctypeDeclHandler end); 574 575 XMLPARSEAPI(void) 576 XML_SetStartDoctypeDeclHandler(XML_Parser parser, 577 XML_StartDoctypeDeclHandler start); 578 579 XMLPARSEAPI(void) 580 XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end); 581 582 XMLPARSEAPI(void) 583 XML_SetUnparsedEntityDeclHandler(XML_Parser parser, 584 XML_UnparsedEntityDeclHandler handler); 585 586 XMLPARSEAPI(void) 587 XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler); 588 589 XMLPARSEAPI(void) 590 XML_SetNamespaceDeclHandler(XML_Parser parser, 591 XML_StartNamespaceDeclHandler start, 592 XML_EndNamespaceDeclHandler end); 593 594 XMLPARSEAPI(void) 595 XML_SetStartNamespaceDeclHandler(XML_Parser parser, 596 XML_StartNamespaceDeclHandler start); 597 598 XMLPARSEAPI(void) 599 XML_SetEndNamespaceDeclHandler(XML_Parser parser, 600 XML_EndNamespaceDeclHandler end); 601 602 XMLPARSEAPI(void) 603 XML_SetNotStandaloneHandler(XML_Parser parser, 604 XML_NotStandaloneHandler handler); 605 606 XMLPARSEAPI(void) 607 XML_SetExternalEntityRefHandler(XML_Parser parser, 608 XML_ExternalEntityRefHandler handler); 609 610 /* If a non-NULL value for arg is specified here, then it will be 611 passed as the first argument to the external entity ref handler 612 instead of the parser object. 613 */ 614 XMLPARSEAPI(void) 615 XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg); 616 617 XMLPARSEAPI(void) 618 XML_SetSkippedEntityHandler(XML_Parser parser, 619 XML_SkippedEntityHandler handler); 620 621 XMLPARSEAPI(void) 622 XML_SetUnknownEncodingHandler(XML_Parser parser, 623 XML_UnknownEncodingHandler handler, 624 void *encodingHandlerData); 625 626 /* This can be called within a handler for a start element, end 627 element, processing instruction or character data. It causes the 628 corresponding markup to be passed to the default handler. 629 */ 630 XMLPARSEAPI(void) 631 XML_DefaultCurrent(XML_Parser parser); 632 633 /* If do_nst is non-zero, and namespace processing is in effect, and 634 a name has a prefix (i.e. an explicit namespace qualifier) then 635 that name is returned as a triplet in a single string separated by 636 the separator character specified when the parser was created: URI 637 + sep + local_name + sep + prefix. 638 639 If do_nst is zero, then namespace information is returned in the 640 default manner (URI + sep + local_name) whether or not the name 641 has a prefix. 642 643 Note: Calling XML_SetReturnNSTriplet after XML_Parse or 644 XML_ParseBuffer has no effect. 645 */ 646 647 XMLPARSEAPI(void) 648 XML_SetReturnNSTriplet(XML_Parser parser, int do_nst); 649 650 /* This value is passed as the userData argument to callbacks. */ 651 XMLPARSEAPI(void) 652 XML_SetUserData(XML_Parser parser, void *userData); 653 654 /* Returns the last value set by XML_SetUserData or NULL. */ 655 #define XML_GetUserData(parser) (*(void **)(parser)) 656 657 /* This is equivalent to supplying an encoding argument to 658 XML_ParserCreate. On success XML_SetEncoding returns non-zero, 659 zero otherwise. 660 Note: Calling XML_SetEncoding after XML_Parse or XML_ParseBuffer 661 has no effect and returns XML_STATUS_ERROR. 662 */ 663 XMLPARSEAPI(enum XML_Status) 664 XML_SetEncoding(XML_Parser parser, const XML_Char *encoding); 665 666 /* If this function is called, then the parser will be passed as the 667 first argument to callbacks instead of userData. The userData will 668 still be accessible using XML_GetUserData. 669 */ 670 XMLPARSEAPI(void) 671 XML_UseParserAsHandlerArg(XML_Parser parser); 672 673 /* If useDTD == XML_TRUE is passed to this function, then the parser 674 will assume that there is an external subset, even if none is 675 specified in the document. In such a case the parser will call the 676 externalEntityRefHandler with a value of NULL for the systemId 677 argument (the publicId and context arguments will be NULL as well). 678 Note: For the purpose of checking WFC: Entity Declared, passing 679 useDTD == XML_TRUE will make the parser behave as if the document 680 had a DTD with an external subset. 681 Note: If this function is called, then this must be done before 682 the first call to XML_Parse or XML_ParseBuffer, since it will 683 have no effect after that. Returns 684 XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING. 685 Note: If the document does not have a DOCTYPE declaration at all, 686 then startDoctypeDeclHandler and endDoctypeDeclHandler will not 687 be called, despite an external subset being parsed. 688 Note: If XML_DTD is not defined when Expat is compiled, returns 689 XML_ERROR_FEATURE_REQUIRES_XML_DTD. 690 Note: If parser == NULL, returns XML_ERROR_INVALID_ARGUMENT. 691 */ 692 XMLPARSEAPI(enum XML_Error) 693 XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD); 694 695 /* Sets the base to be used for resolving relative URIs in system 696 identifiers in declarations. Resolving relative identifiers is 697 left to the application: this value will be passed through as the 698 base argument to the XML_ExternalEntityRefHandler, 699 XML_NotationDeclHandler and XML_UnparsedEntityDeclHandler. The base 700 argument will be copied. Returns XML_STATUS_ERROR if out of memory, 701 XML_STATUS_OK otherwise. 702 */ 703 XMLPARSEAPI(enum XML_Status) 704 XML_SetBase(XML_Parser parser, const XML_Char *base); 705 706 XMLPARSEAPI(const XML_Char *) 707 XML_GetBase(XML_Parser parser); 708 709 /* Returns the number of the attribute/value pairs passed in last call 710 to the XML_StartElementHandler that were specified in the start-tag 711 rather than defaulted. Each attribute/value pair counts as 2; thus 712 this corresponds to an index into the atts array passed to the 713 XML_StartElementHandler. Returns -1 if parser == NULL. 714 */ 715 XMLPARSEAPI(int) 716 XML_GetSpecifiedAttributeCount(XML_Parser parser); 717 718 /* Returns the index of the ID attribute passed in the last call to 719 XML_StartElementHandler, or -1 if there is no ID attribute or 720 parser == NULL. Each attribute/value pair counts as 2; thus this 721 corresponds to an index into the atts array passed to the 722 XML_StartElementHandler. 723 */ 724 XMLPARSEAPI(int) 725 XML_GetIdAttributeIndex(XML_Parser parser); 726 727 #ifdef XML_ATTR_INFO 728 /* Source file byte offsets for the start and end of attribute names and values. 729 The value indices are exclusive of surrounding quotes; thus in a UTF-8 source 730 file an attribute value of "blah" will yield: 731 info->valueEnd - info->valueStart = 4 bytes. 732 */ 733 typedef struct { 734 XML_Index nameStart; /* Offset to beginning of the attribute name. */ 735 XML_Index nameEnd; /* Offset after the attribute name's last byte. */ 736 XML_Index valueStart; /* Offset to beginning of the attribute value. */ 737 XML_Index valueEnd; /* Offset after the attribute value's last byte. */ 738 } XML_AttrInfo; 739 740 /* Returns an array of XML_AttrInfo structures for the attribute/value pairs 741 passed in last call to the XML_StartElementHandler that were specified 742 in the start-tag rather than defaulted. Each attribute/value pair counts 743 as 1; thus the number of entries in the array is 744 XML_GetSpecifiedAttributeCount(parser) / 2. 745 */ 746 XMLPARSEAPI(const XML_AttrInfo *) 747 XML_GetAttributeInfo(XML_Parser parser); 748 #endif 749 750 /* Parses some input. Returns XML_STATUS_ERROR if a fatal error is 751 detected. The last call to XML_Parse must have isFinal true; len 752 may be zero for this call (or any other). 753 754 Though the return values for these functions has always been 755 described as a Boolean value, the implementation, at least for the 756 1.95.x series, has always returned exactly one of the XML_Status 757 values. 758 */ 759 XMLPARSEAPI(enum XML_Status) 760 XML_Parse(XML_Parser parser, const char *s, int len, int isFinal); 761 762 XMLPARSEAPI(void *) 763 XML_GetBuffer(XML_Parser parser, int len); 764 765 XMLPARSEAPI(enum XML_Status) 766 XML_ParseBuffer(XML_Parser parser, int len, int isFinal); 767 768 /* Stops parsing, causing XML_Parse() or XML_ParseBuffer() to return. 769 Must be called from within a call-back handler, except when aborting 770 (resumable = 0) an already suspended parser. Some call-backs may 771 still follow because they would otherwise get lost. Examples: 772 - endElementHandler() for empty elements when stopped in 773 startElementHandler(), 774 - endNameSpaceDeclHandler() when stopped in endElementHandler(), 775 and possibly others. 776 777 Can be called from most handlers, including DTD related call-backs, 778 except when parsing an external parameter entity and resumable != 0. 779 Returns XML_STATUS_OK when successful, XML_STATUS_ERROR otherwise. 780 Possible error codes: 781 - XML_ERROR_SUSPENDED: when suspending an already suspended parser. 782 - XML_ERROR_FINISHED: when the parser has already finished. 783 - XML_ERROR_SUSPEND_PE: when suspending while parsing an external PE. 784 785 When resumable != 0 (true) then parsing is suspended, that is, 786 XML_Parse() and XML_ParseBuffer() return XML_STATUS_SUSPENDED. 787 Otherwise, parsing is aborted, that is, XML_Parse() and XML_ParseBuffer() 788 return XML_STATUS_ERROR with error code XML_ERROR_ABORTED. 789 790 *Note*: 791 This will be applied to the current parser instance only, that is, if 792 there is a parent parser then it will continue parsing when the 793 externalEntityRefHandler() returns. It is up to the implementation of 794 the externalEntityRefHandler() to call XML_StopParser() on the parent 795 parser (recursively), if one wants to stop parsing altogether. 796 797 When suspended, parsing can be resumed by calling XML_ResumeParser(). 798 */ 799 XMLPARSEAPI(enum XML_Status) 800 XML_StopParser(XML_Parser parser, XML_Bool resumable); 801 802 /* Resumes parsing after it has been suspended with XML_StopParser(). 803 Must not be called from within a handler call-back. Returns same 804 status codes as XML_Parse() or XML_ParseBuffer(). 805 Additional error code XML_ERROR_NOT_SUSPENDED possible. 806 807 *Note*: 808 This must be called on the most deeply nested child parser instance 809 first, and on its parent parser only after the child parser has finished, 810 to be applied recursively until the document entity's parser is restarted. 811 That is, the parent parser will not resume by itself and it is up to the 812 application to call XML_ResumeParser() on it at the appropriate moment. 813 */ 814 XMLPARSEAPI(enum XML_Status) 815 XML_ResumeParser(XML_Parser parser); 816 817 enum XML_Parsing { XML_INITIALIZED, XML_PARSING, XML_FINISHED, XML_SUSPENDED }; 818 819 typedef struct { 820 enum XML_Parsing parsing; 821 XML_Bool finalBuffer; 822 } XML_ParsingStatus; 823 824 /* Returns status of parser with respect to being initialized, parsing, 825 finished, or suspended and processing the final buffer. 826 XXX XML_Parse() and XML_ParseBuffer() should return XML_ParsingStatus, 827 XXX with XML_FINISHED_OK or XML_FINISHED_ERROR replacing XML_FINISHED 828 */ 829 XMLPARSEAPI(void) 830 XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status); 831 832 /* Creates an XML_Parser object that can parse an external general 833 entity; context is a '\0'-terminated string specifying the parse 834 context; encoding is a '\0'-terminated string giving the name of 835 the externally specified encoding, or NULL if there is no 836 externally specified encoding. The context string consists of a 837 sequence of tokens separated by formfeeds (\f); a token consisting 838 of a name specifies that the general entity of the name is open; a 839 token of the form prefix=uri specifies the namespace for a 840 particular prefix; a token of the form =uri specifies the default 841 namespace. This can be called at any point after the first call to 842 an ExternalEntityRefHandler so longer as the parser has not yet 843 been freed. The new parser is completely independent and may 844 safely be used in a separate thread. The handlers and userData are 845 initialized from the parser argument. Returns NULL if out of memory. 846 Otherwise returns a new XML_Parser object. 847 */ 848 XMLPARSEAPI(XML_Parser) 849 XML_ExternalEntityParserCreate(XML_Parser parser, const XML_Char *context, 850 const XML_Char *encoding); 851 852 enum XML_ParamEntityParsing { 853 XML_PARAM_ENTITY_PARSING_NEVER, 854 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE, 855 XML_PARAM_ENTITY_PARSING_ALWAYS 856 }; 857 858 /* Controls parsing of parameter entities (including the external DTD 859 subset). If parsing of parameter entities is enabled, then 860 references to external parameter entities (including the external 861 DTD subset) will be passed to the handler set with 862 XML_SetExternalEntityRefHandler. The context passed will be 0. 863 864 Unlike external general entities, external parameter entities can 865 only be parsed synchronously. If the external parameter entity is 866 to be parsed, it must be parsed during the call to the external 867 entity ref handler: the complete sequence of 868 XML_ExternalEntityParserCreate, XML_Parse/XML_ParseBuffer and 869 XML_ParserFree calls must be made during this call. After 870 XML_ExternalEntityParserCreate has been called to create the parser 871 for the external parameter entity (context must be 0 for this 872 call), it is illegal to make any calls on the old parser until 873 XML_ParserFree has been called on the newly created parser. 874 If the library has been compiled without support for parameter 875 entity parsing (ie without XML_DTD being defined), then 876 XML_SetParamEntityParsing will return 0 if parsing of parameter 877 entities is requested; otherwise it will return non-zero. 878 Note: If XML_SetParamEntityParsing is called after XML_Parse or 879 XML_ParseBuffer, then it has no effect and will always return 0. 880 Note: If parser == NULL, the function will do nothing and return 0. 881 */ 882 XMLPARSEAPI(int) 883 XML_SetParamEntityParsing(XML_Parser parser, 884 enum XML_ParamEntityParsing parsing); 885 886 /* Sets the hash salt to use for internal hash calculations. 887 Helps in preventing DoS attacks based on predicting hash 888 function behavior. This must be called before parsing is started. 889 Returns 1 if successful, 0 when called after parsing has started. 890 Note: If parser == NULL, the function will do nothing and return 0. 891 */ 892 XMLPARSEAPI(int) 893 XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt); 894 895 /* If XML_Parse or XML_ParseBuffer have returned XML_STATUS_ERROR, then 896 XML_GetErrorCode returns information about the error. 897 */ 898 XMLPARSEAPI(enum XML_Error) 899 XML_GetErrorCode(XML_Parser parser); 900 901 /* These functions return information about the current parse 902 location. They may be called from any callback called to report 903 some parse event; in this case the location is the location of the 904 first of the sequence of characters that generated the event. When 905 called from callbacks generated by declarations in the document 906 prologue, the location identified isn't as neatly defined, but will 907 be within the relevant markup. When called outside of the callback 908 functions, the position indicated will be just past the last parse 909 event (regardless of whether there was an associated callback). 910 911 They may also be called after returning from a call to XML_Parse 912 or XML_ParseBuffer. If the return value is XML_STATUS_ERROR then 913 the location is the location of the character at which the error 914 was detected; otherwise the location is the location of the last 915 parse event, as described above. 916 917 Note: XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber 918 return 0 to indicate an error. 919 Note: XML_GetCurrentByteIndex returns -1 to indicate an error. 920 */ 921 XMLPARSEAPI(XML_Size) XML_GetCurrentLineNumber(XML_Parser parser); 922 XMLPARSEAPI(XML_Size) XML_GetCurrentColumnNumber(XML_Parser parser); 923 XMLPARSEAPI(XML_Index) XML_GetCurrentByteIndex(XML_Parser parser); 924 925 /* Return the number of bytes in the current event. 926 Returns 0 if the event is in an internal entity. 927 */ 928 XMLPARSEAPI(int) 929 XML_GetCurrentByteCount(XML_Parser parser); 930 931 /* If XML_CONTEXT_BYTES is defined, returns the input buffer, sets 932 the integer pointed to by offset to the offset within this buffer 933 of the current parse position, and sets the integer pointed to by size 934 to the size of this buffer (the number of input bytes). Otherwise 935 returns a NULL pointer. Also returns a NULL pointer if a parse isn't 936 active. 937 938 NOTE: The character pointer returned should not be used outside 939 the handler that makes the call. 940 */ 941 XMLPARSEAPI(const char *) 942 XML_GetInputContext(XML_Parser parser, int *offset, int *size); 943 944 /* For backwards compatibility with previous versions. */ 945 #define XML_GetErrorLineNumber XML_GetCurrentLineNumber 946 #define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber 947 #define XML_GetErrorByteIndex XML_GetCurrentByteIndex 948 949 /* Frees the content model passed to the element declaration handler */ 950 XMLPARSEAPI(void) 951 XML_FreeContentModel(XML_Parser parser, XML_Content *model); 952 953 /* Exposing the memory handling functions used in Expat */ 954 XMLPARSEAPI(void *) 955 XML_ATTR_MALLOC 956 XML_ATTR_ALLOC_SIZE(2) 957 XML_MemMalloc(XML_Parser parser, size_t size); 958 959 XMLPARSEAPI(void *) 960 XML_ATTR_ALLOC_SIZE(3) 961 XML_MemRealloc(XML_Parser parser, void *ptr, size_t size); 962 963 XMLPARSEAPI(void) 964 XML_MemFree(XML_Parser parser, void *ptr); 965 966 /* Frees memory used by the parser. */ 967 XMLPARSEAPI(void) 968 XML_ParserFree(XML_Parser parser); 969 970 /* Returns a string describing the error. */ 971 XMLPARSEAPI(const XML_LChar *) 972 XML_ErrorString(enum XML_Error code); 973 974 /* Return a string containing the version number of this expat */ 975 XMLPARSEAPI(const XML_LChar *) 976 XML_ExpatVersion(void); 977 978 typedef struct { 979 int major; 980 int minor; 981 int micro; 982 } XML_Expat_Version; 983 984 /* Return an XML_Expat_Version structure containing numeric version 985 number information for this version of expat. 986 */ 987 XMLPARSEAPI(XML_Expat_Version) 988 XML_ExpatVersionInfo(void); 989 990 /* Added in Expat 1.95.5. */ 991 enum XML_FeatureEnum { 992 XML_FEATURE_END = 0, 993 XML_FEATURE_UNICODE, 994 XML_FEATURE_UNICODE_WCHAR_T, 995 XML_FEATURE_DTD, 996 XML_FEATURE_CONTEXT_BYTES, 997 XML_FEATURE_MIN_SIZE, 998 XML_FEATURE_SIZEOF_XML_CHAR, 999 XML_FEATURE_SIZEOF_XML_LCHAR, 1000 XML_FEATURE_NS, 1001 XML_FEATURE_LARGE_SIZE, 1002 XML_FEATURE_ATTR_INFO 1003 /* Additional features must be added to the end of this enum. */ 1004 }; 1005 1006 typedef struct { 1007 enum XML_FeatureEnum feature; 1008 const XML_LChar *name; 1009 long int value; 1010 } XML_Feature; 1011 1012 XMLPARSEAPI(const XML_Feature *) 1013 XML_GetFeatureList(void); 1014 1015 /* Expat follows the semantic versioning convention. 1016 See http://semver.org. 1017 */ 1018 #define XML_MAJOR_VERSION 2 1019 #define XML_MINOR_VERSION 3 1020 #define XML_MICRO_VERSION 0 1021 1022 #ifdef __cplusplus 1023 } 1024 #endif 1025 1026 #endif /* not Expat_INCLUDED */ 1027