1 /* 2 * Summary: internals routines and limits exported by the parser. 3 * Description: this module exports a number of internal parsing routines 4 * they are not really all intended for applications but 5 * can prove useful doing low level processing. 6 * 7 * Copy: See Copyright for the status of this software. 8 * 9 * Author: Daniel Veillard 10 */ 11 12 #ifndef __XML_PARSER_INTERNALS_H__ 13 #define __XML_PARSER_INTERNALS_H__ 14 15 #include <libxml/xmlversion.h> 16 #include <libxml/parser.h> 17 #include <libxml/HTMLparser.h> 18 #include <libxml/chvalid.h> 19 #include <libxml/SAX2.h> 20 21 #ifdef __cplusplus 22 extern "C" { 23 #endif 24 25 /** 26 * xmlParserMaxDepth: 27 * 28 * DEPRECATED: has no effect 29 * 30 * arbitrary depth limit for the XML documents that we allow to 31 * process. This is not a limitation of the parser but a safety 32 * boundary feature, use XML_PARSE_HUGE option to override it. 33 */ 34 XML_DEPRECATED 35 XMLPUBVAR const unsigned int xmlParserMaxDepth; 36 37 /** 38 * XML_MAX_TEXT_LENGTH: 39 * 40 * Maximum size allowed for a single text node when building a tree. 41 * This is not a limitation of the parser but a safety boundary feature, 42 * use XML_PARSE_HUGE option to override it. 43 * Introduced in 2.9.0 44 */ 45 #define XML_MAX_TEXT_LENGTH 10000000 46 47 /** 48 * XML_MAX_HUGE_LENGTH: 49 * 50 * Maximum size allowed when XML_PARSE_HUGE is set. 51 */ 52 #define XML_MAX_HUGE_LENGTH 1000000000 53 54 /** 55 * XML_MAX_NAME_LENGTH: 56 * 57 * Maximum size allowed for a markup identifier. 58 * This is not a limitation of the parser but a safety boundary feature, 59 * use XML_PARSE_HUGE option to override it. 60 * Note that with the use of parsing dictionaries overriding the limit 61 * may result in more runtime memory usage in face of "unfriendly' content 62 * Introduced in 2.9.0 63 */ 64 #define XML_MAX_NAME_LENGTH 50000 65 66 /** 67 * XML_MAX_DICTIONARY_LIMIT: 68 * 69 * Maximum size allowed by the parser for a dictionary by default 70 * This is not a limitation of the parser but a safety boundary feature, 71 * use XML_PARSE_HUGE option to override it. 72 * Introduced in 2.9.0 73 */ 74 #define XML_MAX_DICTIONARY_LIMIT 100000000 75 76 /** 77 * XML_MAX_LOOKUP_LIMIT: 78 * 79 * Maximum size allowed by the parser for ahead lookup 80 * This is an upper boundary enforced by the parser to avoid bad 81 * behaviour on "unfriendly' content 82 * Introduced in 2.9.0 83 */ 84 #define XML_MAX_LOOKUP_LIMIT 10000000 85 86 /** 87 * XML_MAX_NAMELEN: 88 * 89 * Identifiers can be longer, but this will be more costly 90 * at runtime. 91 */ 92 #define XML_MAX_NAMELEN 100 93 94 /** 95 * INPUT_CHUNK: 96 * 97 * The parser tries to always have that amount of input ready. 98 * One of the point is providing context when reporting errors. 99 */ 100 #define INPUT_CHUNK 250 101 102 /************************************************************************ 103 * * 104 * UNICODE version of the macros. * 105 * * 106 ************************************************************************/ 107 /** 108 * IS_BYTE_CHAR: 109 * @c: an byte value (int) 110 * 111 * Macro to check the following production in the XML spec: 112 * 113 * [2] Char ::= #x9 | #xA | #xD | [#x20...] 114 * any byte character in the accepted range 115 */ 116 #define IS_BYTE_CHAR(c) xmlIsChar_ch(c) 117 118 /** 119 * IS_CHAR: 120 * @c: an UNICODE value (int) 121 * 122 * Macro to check the following production in the XML spec: 123 * 124 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] 125 * | [#x10000-#x10FFFF] 126 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. 127 */ 128 #define IS_CHAR(c) xmlIsCharQ(c) 129 130 /** 131 * IS_CHAR_CH: 132 * @c: an xmlChar (usually an unsigned char) 133 * 134 * Behaves like IS_CHAR on single-byte value 135 */ 136 #define IS_CHAR_CH(c) xmlIsChar_ch(c) 137 138 /** 139 * IS_BLANK: 140 * @c: an UNICODE value (int) 141 * 142 * Macro to check the following production in the XML spec: 143 * 144 * [3] S ::= (#x20 | #x9 | #xD | #xA)+ 145 */ 146 #define IS_BLANK(c) xmlIsBlankQ(c) 147 148 /** 149 * IS_BLANK_CH: 150 * @c: an xmlChar value (normally unsigned char) 151 * 152 * Behaviour same as IS_BLANK 153 */ 154 #define IS_BLANK_CH(c) xmlIsBlank_ch(c) 155 156 /** 157 * IS_BASECHAR: 158 * @c: an UNICODE value (int) 159 * 160 * Macro to check the following production in the XML spec: 161 * 162 * [85] BaseChar ::= ... long list see REC ... 163 */ 164 #define IS_BASECHAR(c) xmlIsBaseCharQ(c) 165 166 /** 167 * IS_DIGIT: 168 * @c: an UNICODE value (int) 169 * 170 * Macro to check the following production in the XML spec: 171 * 172 * [88] Digit ::= ... long list see REC ... 173 */ 174 #define IS_DIGIT(c) xmlIsDigitQ(c) 175 176 /** 177 * IS_DIGIT_CH: 178 * @c: an xmlChar value (usually an unsigned char) 179 * 180 * Behaves like IS_DIGIT but with a single byte argument 181 */ 182 #define IS_DIGIT_CH(c) xmlIsDigit_ch(c) 183 184 /** 185 * IS_COMBINING: 186 * @c: an UNICODE value (int) 187 * 188 * Macro to check the following production in the XML spec: 189 * 190 * [87] CombiningChar ::= ... long list see REC ... 191 */ 192 #define IS_COMBINING(c) xmlIsCombiningQ(c) 193 194 /** 195 * IS_COMBINING_CH: 196 * @c: an xmlChar (usually an unsigned char) 197 * 198 * Always false (all combining chars > 0xff) 199 */ 200 #define IS_COMBINING_CH(c) 0 201 202 /** 203 * IS_EXTENDER: 204 * @c: an UNICODE value (int) 205 * 206 * Macro to check the following production in the XML spec: 207 * 208 * 209 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | 210 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | 211 * [#x309D-#x309E] | [#x30FC-#x30FE] 212 */ 213 #define IS_EXTENDER(c) xmlIsExtenderQ(c) 214 215 /** 216 * IS_EXTENDER_CH: 217 * @c: an xmlChar value (usually an unsigned char) 218 * 219 * Behaves like IS_EXTENDER but with a single-byte argument 220 */ 221 #define IS_EXTENDER_CH(c) xmlIsExtender_ch(c) 222 223 /** 224 * IS_IDEOGRAPHIC: 225 * @c: an UNICODE value (int) 226 * 227 * Macro to check the following production in the XML spec: 228 * 229 * 230 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] 231 */ 232 #define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c) 233 234 /** 235 * IS_LETTER: 236 * @c: an UNICODE value (int) 237 * 238 * Macro to check the following production in the XML spec: 239 * 240 * 241 * [84] Letter ::= BaseChar | Ideographic 242 */ 243 #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) 244 245 /** 246 * IS_LETTER_CH: 247 * @c: an xmlChar value (normally unsigned char) 248 * 249 * Macro behaves like IS_LETTER, but only check base chars 250 * 251 */ 252 #define IS_LETTER_CH(c) xmlIsBaseChar_ch(c) 253 254 /** 255 * IS_ASCII_LETTER: 256 * @c: an xmlChar value 257 * 258 * Macro to check [a-zA-Z] 259 * 260 */ 261 #define IS_ASCII_LETTER(c) ((0x61 <= ((c) | 0x20)) && \ 262 (((c) | 0x20) <= 0x7a)) 263 264 /** 265 * IS_ASCII_DIGIT: 266 * @c: an xmlChar value 267 * 268 * Macro to check [0-9] 269 * 270 */ 271 #define IS_ASCII_DIGIT(c) ((0x30 <= (c)) && ((c) <= 0x39)) 272 273 /** 274 * IS_PUBIDCHAR: 275 * @c: an UNICODE value (int) 276 * 277 * Macro to check the following production in the XML spec: 278 * 279 * 280 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 281 */ 282 #define IS_PUBIDCHAR(c) xmlIsPubidCharQ(c) 283 284 /** 285 * IS_PUBIDCHAR_CH: 286 * @c: an xmlChar value (normally unsigned char) 287 * 288 * Same as IS_PUBIDCHAR but for single-byte value 289 */ 290 #define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c) 291 292 /** 293 * Global variables used for predefined strings. 294 */ 295 XMLPUBVAR const xmlChar xmlStringText[]; 296 XMLPUBVAR const xmlChar xmlStringTextNoenc[]; 297 XMLPUBVAR const xmlChar xmlStringComment[]; 298 299 XML_DEPRECATED 300 XMLPUBFUN int xmlIsLetter (int c); 301 302 /** 303 * Parser context. 304 */ 305 XMLPUBFUN xmlParserCtxtPtr 306 xmlCreateFileParserCtxt (const char *filename); 307 XMLPUBFUN xmlParserCtxtPtr 308 xmlCreateURLParserCtxt (const char *filename, 309 int options); 310 XMLPUBFUN xmlParserCtxtPtr 311 xmlCreateMemoryParserCtxt(const char *buffer, 312 int size); 313 XMLPUBFUN xmlParserCtxtPtr 314 xmlCreateEntityParserCtxt(const xmlChar *URL, 315 const xmlChar *ID, 316 const xmlChar *base); 317 XMLPUBFUN void 318 xmlCtxtErrMemory (xmlParserCtxtPtr ctxt); 319 XMLPUBFUN int 320 xmlSwitchEncoding (xmlParserCtxtPtr ctxt, 321 xmlCharEncoding enc); 322 XMLPUBFUN int 323 xmlSwitchEncodingName (xmlParserCtxtPtr ctxt, 324 const char *encoding); 325 XMLPUBFUN int 326 xmlSwitchToEncoding (xmlParserCtxtPtr ctxt, 327 xmlCharEncodingHandlerPtr handler); 328 XML_DEPRECATED 329 XMLPUBFUN int 330 xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt, 331 xmlParserInputPtr input, 332 xmlCharEncodingHandlerPtr handler); 333 334 /** 335 * Input Streams. 336 */ 337 XMLPUBFUN xmlParserInputPtr 338 xmlNewStringInputStream (xmlParserCtxtPtr ctxt, 339 const xmlChar *buffer); 340 XML_DEPRECATED 341 XMLPUBFUN xmlParserInputPtr 342 xmlNewEntityInputStream (xmlParserCtxtPtr ctxt, 343 xmlEntityPtr entity); 344 XMLPUBFUN int 345 xmlPushInput (xmlParserCtxtPtr ctxt, 346 xmlParserInputPtr input); 347 XMLPUBFUN xmlChar 348 xmlPopInput (xmlParserCtxtPtr ctxt); 349 XMLPUBFUN void 350 xmlFreeInputStream (xmlParserInputPtr input); 351 XMLPUBFUN xmlParserInputPtr 352 xmlNewInputFromFile (xmlParserCtxtPtr ctxt, 353 const char *filename); 354 XMLPUBFUN xmlParserInputPtr 355 xmlNewInputStream (xmlParserCtxtPtr ctxt); 356 357 /** 358 * Namespaces. 359 */ 360 XMLPUBFUN xmlChar * 361 xmlSplitQName (xmlParserCtxtPtr ctxt, 362 const xmlChar *name, 363 xmlChar **prefix); 364 365 /** 366 * Generic production rules. 367 */ 368 XML_DEPRECATED 369 XMLPUBFUN const xmlChar * 370 xmlParseName (xmlParserCtxtPtr ctxt); 371 XML_DEPRECATED 372 XMLPUBFUN xmlChar * 373 xmlParseNmtoken (xmlParserCtxtPtr ctxt); 374 XML_DEPRECATED 375 XMLPUBFUN xmlChar * 376 xmlParseEntityValue (xmlParserCtxtPtr ctxt, 377 xmlChar **orig); 378 XML_DEPRECATED 379 XMLPUBFUN xmlChar * 380 xmlParseAttValue (xmlParserCtxtPtr ctxt); 381 XML_DEPRECATED 382 XMLPUBFUN xmlChar * 383 xmlParseSystemLiteral (xmlParserCtxtPtr ctxt); 384 XML_DEPRECATED 385 XMLPUBFUN xmlChar * 386 xmlParsePubidLiteral (xmlParserCtxtPtr ctxt); 387 XML_DEPRECATED 388 XMLPUBFUN void 389 xmlParseCharData (xmlParserCtxtPtr ctxt, 390 int cdata); 391 XML_DEPRECATED 392 XMLPUBFUN xmlChar * 393 xmlParseExternalID (xmlParserCtxtPtr ctxt, 394 xmlChar **publicID, 395 int strict); 396 XML_DEPRECATED 397 XMLPUBFUN void 398 xmlParseComment (xmlParserCtxtPtr ctxt); 399 XML_DEPRECATED 400 XMLPUBFUN const xmlChar * 401 xmlParsePITarget (xmlParserCtxtPtr ctxt); 402 XML_DEPRECATED 403 XMLPUBFUN void 404 xmlParsePI (xmlParserCtxtPtr ctxt); 405 XML_DEPRECATED 406 XMLPUBFUN void 407 xmlParseNotationDecl (xmlParserCtxtPtr ctxt); 408 XML_DEPRECATED 409 XMLPUBFUN void 410 xmlParseEntityDecl (xmlParserCtxtPtr ctxt); 411 XML_DEPRECATED 412 XMLPUBFUN int 413 xmlParseDefaultDecl (xmlParserCtxtPtr ctxt, 414 xmlChar **value); 415 XML_DEPRECATED 416 XMLPUBFUN xmlEnumerationPtr 417 xmlParseNotationType (xmlParserCtxtPtr ctxt); 418 XML_DEPRECATED 419 XMLPUBFUN xmlEnumerationPtr 420 xmlParseEnumerationType (xmlParserCtxtPtr ctxt); 421 XML_DEPRECATED 422 XMLPUBFUN int 423 xmlParseEnumeratedType (xmlParserCtxtPtr ctxt, 424 xmlEnumerationPtr *tree); 425 XML_DEPRECATED 426 XMLPUBFUN int 427 xmlParseAttributeType (xmlParserCtxtPtr ctxt, 428 xmlEnumerationPtr *tree); 429 XML_DEPRECATED 430 XMLPUBFUN void 431 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt); 432 XML_DEPRECATED 433 XMLPUBFUN xmlElementContentPtr 434 xmlParseElementMixedContentDecl 435 (xmlParserCtxtPtr ctxt, 436 int inputchk); 437 XML_DEPRECATED 438 XMLPUBFUN xmlElementContentPtr 439 xmlParseElementChildrenContentDecl 440 (xmlParserCtxtPtr ctxt, 441 int inputchk); 442 XML_DEPRECATED 443 XMLPUBFUN int 444 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, 445 const xmlChar *name, 446 xmlElementContentPtr *result); 447 XML_DEPRECATED 448 XMLPUBFUN int 449 xmlParseElementDecl (xmlParserCtxtPtr ctxt); 450 XML_DEPRECATED 451 XMLPUBFUN void 452 xmlParseMarkupDecl (xmlParserCtxtPtr ctxt); 453 XML_DEPRECATED 454 XMLPUBFUN int 455 xmlParseCharRef (xmlParserCtxtPtr ctxt); 456 XML_DEPRECATED 457 XMLPUBFUN xmlEntityPtr 458 xmlParseEntityRef (xmlParserCtxtPtr ctxt); 459 XML_DEPRECATED 460 XMLPUBFUN void 461 xmlParseReference (xmlParserCtxtPtr ctxt); 462 XML_DEPRECATED 463 XMLPUBFUN void 464 xmlParsePEReference (xmlParserCtxtPtr ctxt); 465 XML_DEPRECATED 466 XMLPUBFUN void 467 xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt); 468 #ifdef LIBXML_SAX1_ENABLED 469 XML_DEPRECATED 470 XMLPUBFUN const xmlChar * 471 xmlParseAttribute (xmlParserCtxtPtr ctxt, 472 xmlChar **value); 473 XML_DEPRECATED 474 XMLPUBFUN const xmlChar * 475 xmlParseStartTag (xmlParserCtxtPtr ctxt); 476 XML_DEPRECATED 477 XMLPUBFUN void 478 xmlParseEndTag (xmlParserCtxtPtr ctxt); 479 #endif /* LIBXML_SAX1_ENABLED */ 480 XML_DEPRECATED 481 XMLPUBFUN void 482 xmlParseCDSect (xmlParserCtxtPtr ctxt); 483 XMLPUBFUN void 484 xmlParseContent (xmlParserCtxtPtr ctxt); 485 XML_DEPRECATED 486 XMLPUBFUN void 487 xmlParseElement (xmlParserCtxtPtr ctxt); 488 XML_DEPRECATED 489 XMLPUBFUN xmlChar * 490 xmlParseVersionNum (xmlParserCtxtPtr ctxt); 491 XML_DEPRECATED 492 XMLPUBFUN xmlChar * 493 xmlParseVersionInfo (xmlParserCtxtPtr ctxt); 494 XML_DEPRECATED 495 XMLPUBFUN xmlChar * 496 xmlParseEncName (xmlParserCtxtPtr ctxt); 497 XML_DEPRECATED 498 XMLPUBFUN const xmlChar * 499 xmlParseEncodingDecl (xmlParserCtxtPtr ctxt); 500 XML_DEPRECATED 501 XMLPUBFUN int 502 xmlParseSDDecl (xmlParserCtxtPtr ctxt); 503 XML_DEPRECATED 504 XMLPUBFUN void 505 xmlParseXMLDecl (xmlParserCtxtPtr ctxt); 506 XML_DEPRECATED 507 XMLPUBFUN void 508 xmlParseTextDecl (xmlParserCtxtPtr ctxt); 509 XML_DEPRECATED 510 XMLPUBFUN void 511 xmlParseMisc (xmlParserCtxtPtr ctxt); 512 XMLPUBFUN void 513 xmlParseExternalSubset (xmlParserCtxtPtr ctxt, 514 const xmlChar *ExternalID, 515 const xmlChar *SystemID); 516 /** 517 * XML_SUBSTITUTE_NONE: 518 * 519 * If no entities need to be substituted. 520 */ 521 #define XML_SUBSTITUTE_NONE 0 522 /** 523 * XML_SUBSTITUTE_REF: 524 * 525 * Whether general entities need to be substituted. 526 */ 527 #define XML_SUBSTITUTE_REF 1 528 /** 529 * XML_SUBSTITUTE_PEREF: 530 * 531 * Whether parameter entities need to be substituted. 532 */ 533 #define XML_SUBSTITUTE_PEREF 2 534 /** 535 * XML_SUBSTITUTE_BOTH: 536 * 537 * Both general and parameter entities need to be substituted. 538 */ 539 #define XML_SUBSTITUTE_BOTH 3 540 541 XML_DEPRECATED 542 XMLPUBFUN xmlChar * 543 xmlStringDecodeEntities (xmlParserCtxtPtr ctxt, 544 const xmlChar *str, 545 int what, 546 xmlChar end, 547 xmlChar end2, 548 xmlChar end3); 549 XML_DEPRECATED 550 XMLPUBFUN xmlChar * 551 xmlStringLenDecodeEntities (xmlParserCtxtPtr ctxt, 552 const xmlChar *str, 553 int len, 554 int what, 555 xmlChar end, 556 xmlChar end2, 557 xmlChar end3); 558 559 /* 560 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP. 561 */ 562 XML_DEPRECATED 563 XMLPUBFUN int nodePush (xmlParserCtxtPtr ctxt, 564 xmlNodePtr value); 565 XML_DEPRECATED 566 XMLPUBFUN xmlNodePtr nodePop (xmlParserCtxtPtr ctxt); 567 XMLPUBFUN int inputPush (xmlParserCtxtPtr ctxt, 568 xmlParserInputPtr value); 569 XMLPUBFUN xmlParserInputPtr inputPop (xmlParserCtxtPtr ctxt); 570 XML_DEPRECATED 571 XMLPUBFUN const xmlChar * namePop (xmlParserCtxtPtr ctxt); 572 XML_DEPRECATED 573 XMLPUBFUN int namePush (xmlParserCtxtPtr ctxt, 574 const xmlChar *value); 575 576 /* 577 * other commodities shared between parser.c and parserInternals. 578 */ 579 XML_DEPRECATED 580 XMLPUBFUN int xmlSkipBlankChars (xmlParserCtxtPtr ctxt); 581 XML_DEPRECATED 582 XMLPUBFUN int xmlStringCurrentChar (xmlParserCtxtPtr ctxt, 583 const xmlChar *cur, 584 int *len); 585 XML_DEPRECATED 586 XMLPUBFUN void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); 587 XML_DEPRECATED 588 XMLPUBFUN int xmlCheckLanguageID (const xmlChar *lang); 589 590 /* 591 * Really core function shared with HTML parser. 592 */ 593 XML_DEPRECATED 594 XMLPUBFUN int xmlCurrentChar (xmlParserCtxtPtr ctxt, 595 int *len); 596 XMLPUBFUN int xmlCopyCharMultiByte (xmlChar *out, 597 int val); 598 XML_DEPRECATED 599 XMLPUBFUN int xmlCopyChar (int len, 600 xmlChar *out, 601 int val); 602 XML_DEPRECATED 603 XMLPUBFUN void xmlNextChar (xmlParserCtxtPtr ctxt); 604 XML_DEPRECATED 605 XMLPUBFUN void xmlParserInputShrink (xmlParserInputPtr in); 606 607 #ifdef __cplusplus 608 } 609 #endif 610 #endif /* __XML_PARSER_INTERNALS_H__ */ 611