xref: /aosp_15_r20/external/libxml2/include/libxml/parserInternals.h (revision 7c5688314b92172186c154356a6374bf7684c3ca)
1 /*
2  * Summary: internals routines and limits exported by the parser.
3  * Description: this module exports a number of internal parsing routines
4  *              they are not really all intended for applications but
5  *              can prove useful doing low level processing.
6  *
7  * Copy: See Copyright for the status of this software.
8  *
9  * Author: Daniel Veillard
10  */
11 
12 #ifndef __XML_PARSER_INTERNALS_H__
13 #define __XML_PARSER_INTERNALS_H__
14 
15 #include <libxml/xmlversion.h>
16 #include <libxml/parser.h>
17 #include <libxml/HTMLparser.h>
18 #include <libxml/chvalid.h>
19 #include <libxml/SAX2.h>
20 
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24 
25 /**
26  * xmlParserMaxDepth:
27  *
28  * DEPRECATED: has no effect
29  *
30  * arbitrary depth limit for the XML documents that we allow to
31  * process. This is not a limitation of the parser but a safety
32  * boundary feature, use XML_PARSE_HUGE option to override it.
33  */
34 XML_DEPRECATED
35 XMLPUBVAR const unsigned int xmlParserMaxDepth;
36 
37 /**
38  * XML_MAX_TEXT_LENGTH:
39  *
40  * Maximum size allowed for a single text node when building a tree.
41  * This is not a limitation of the parser but a safety boundary feature,
42  * use XML_PARSE_HUGE option to override it.
43  * Introduced in 2.9.0
44  */
45 #define XML_MAX_TEXT_LENGTH 10000000
46 
47 /**
48  * XML_MAX_HUGE_LENGTH:
49  *
50  * Maximum size allowed when XML_PARSE_HUGE is set.
51  */
52 #define XML_MAX_HUGE_LENGTH 1000000000
53 
54 /**
55  * XML_MAX_NAME_LENGTH:
56  *
57  * Maximum size allowed for a markup identifier.
58  * This is not a limitation of the parser but a safety boundary feature,
59  * use XML_PARSE_HUGE option to override it.
60  * Note that with the use of parsing dictionaries overriding the limit
61  * may result in more runtime memory usage in face of "unfriendly' content
62  * Introduced in 2.9.0
63  */
64 #define XML_MAX_NAME_LENGTH 50000
65 
66 /**
67  * XML_MAX_DICTIONARY_LIMIT:
68  *
69  * Maximum size allowed by the parser for a dictionary by default
70  * This is not a limitation of the parser but a safety boundary feature,
71  * use XML_PARSE_HUGE option to override it.
72  * Introduced in 2.9.0
73  */
74 #define XML_MAX_DICTIONARY_LIMIT 100000000
75 
76 /**
77  * XML_MAX_LOOKUP_LIMIT:
78  *
79  * Maximum size allowed by the parser for ahead lookup
80  * This is an upper boundary enforced by the parser to avoid bad
81  * behaviour on "unfriendly' content
82  * Introduced in 2.9.0
83  */
84 #define XML_MAX_LOOKUP_LIMIT 10000000
85 
86 /**
87  * XML_MAX_NAMELEN:
88  *
89  * Identifiers can be longer, but this will be more costly
90  * at runtime.
91  */
92 #define XML_MAX_NAMELEN 100
93 
94 /**
95  * INPUT_CHUNK:
96  *
97  * The parser tries to always have that amount of input ready.
98  * One of the point is providing context when reporting errors.
99  */
100 #define INPUT_CHUNK	250
101 
102 /************************************************************************
103  *									*
104  * UNICODE version of the macros.					*
105  *									*
106  ************************************************************************/
107 /**
108  * IS_BYTE_CHAR:
109  * @c:  an byte value (int)
110  *
111  * Macro to check the following production in the XML spec:
112  *
113  * [2] Char ::= #x9 | #xA | #xD | [#x20...]
114  * any byte character in the accepted range
115  */
116 #define IS_BYTE_CHAR(c)	 xmlIsChar_ch(c)
117 
118 /**
119  * IS_CHAR:
120  * @c:  an UNICODE value (int)
121  *
122  * Macro to check the following production in the XML spec:
123  *
124  * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
125  *                  | [#x10000-#x10FFFF]
126  * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
127  */
128 #define IS_CHAR(c)   xmlIsCharQ(c)
129 
130 /**
131  * IS_CHAR_CH:
132  * @c: an xmlChar (usually an unsigned char)
133  *
134  * Behaves like IS_CHAR on single-byte value
135  */
136 #define IS_CHAR_CH(c)  xmlIsChar_ch(c)
137 
138 /**
139  * IS_BLANK:
140  * @c:  an UNICODE value (int)
141  *
142  * Macro to check the following production in the XML spec:
143  *
144  * [3] S ::= (#x20 | #x9 | #xD | #xA)+
145  */
146 #define IS_BLANK(c)  xmlIsBlankQ(c)
147 
148 /**
149  * IS_BLANK_CH:
150  * @c:  an xmlChar value (normally unsigned char)
151  *
152  * Behaviour same as IS_BLANK
153  */
154 #define IS_BLANK_CH(c)  xmlIsBlank_ch(c)
155 
156 /**
157  * IS_BASECHAR:
158  * @c:  an UNICODE value (int)
159  *
160  * Macro to check the following production in the XML spec:
161  *
162  * [85] BaseChar ::= ... long list see REC ...
163  */
164 #define IS_BASECHAR(c) xmlIsBaseCharQ(c)
165 
166 /**
167  * IS_DIGIT:
168  * @c:  an UNICODE value (int)
169  *
170  * Macro to check the following production in the XML spec:
171  *
172  * [88] Digit ::= ... long list see REC ...
173  */
174 #define IS_DIGIT(c) xmlIsDigitQ(c)
175 
176 /**
177  * IS_DIGIT_CH:
178  * @c:  an xmlChar value (usually an unsigned char)
179  *
180  * Behaves like IS_DIGIT but with a single byte argument
181  */
182 #define IS_DIGIT_CH(c)  xmlIsDigit_ch(c)
183 
184 /**
185  * IS_COMBINING:
186  * @c:  an UNICODE value (int)
187  *
188  * Macro to check the following production in the XML spec:
189  *
190  * [87] CombiningChar ::= ... long list see REC ...
191  */
192 #define IS_COMBINING(c) xmlIsCombiningQ(c)
193 
194 /**
195  * IS_COMBINING_CH:
196  * @c:  an xmlChar (usually an unsigned char)
197  *
198  * Always false (all combining chars > 0xff)
199  */
200 #define IS_COMBINING_CH(c) 0
201 
202 /**
203  * IS_EXTENDER:
204  * @c:  an UNICODE value (int)
205  *
206  * Macro to check the following production in the XML spec:
207  *
208  *
209  * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
210  *                   #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
211  *                   [#x309D-#x309E] | [#x30FC-#x30FE]
212  */
213 #define IS_EXTENDER(c) xmlIsExtenderQ(c)
214 
215 /**
216  * IS_EXTENDER_CH:
217  * @c:  an xmlChar value (usually an unsigned char)
218  *
219  * Behaves like IS_EXTENDER but with a single-byte argument
220  */
221 #define IS_EXTENDER_CH(c)  xmlIsExtender_ch(c)
222 
223 /**
224  * IS_IDEOGRAPHIC:
225  * @c:  an UNICODE value (int)
226  *
227  * Macro to check the following production in the XML spec:
228  *
229  *
230  * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
231  */
232 #define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c)
233 
234 /**
235  * IS_LETTER:
236  * @c:  an UNICODE value (int)
237  *
238  * Macro to check the following production in the XML spec:
239  *
240  *
241  * [84] Letter ::= BaseChar | Ideographic
242  */
243 #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
244 
245 /**
246  * IS_LETTER_CH:
247  * @c:  an xmlChar value (normally unsigned char)
248  *
249  * Macro behaves like IS_LETTER, but only check base chars
250  *
251  */
252 #define IS_LETTER_CH(c) xmlIsBaseChar_ch(c)
253 
254 /**
255  * IS_ASCII_LETTER:
256  * @c: an xmlChar value
257  *
258  * Macro to check [a-zA-Z]
259  *
260  */
261 #define IS_ASCII_LETTER(c)	((0x61 <= ((c) | 0x20)) && \
262                                  (((c) | 0x20) <= 0x7a))
263 
264 /**
265  * IS_ASCII_DIGIT:
266  * @c: an xmlChar value
267  *
268  * Macro to check [0-9]
269  *
270  */
271 #define IS_ASCII_DIGIT(c)	((0x30 <= (c)) && ((c) <= 0x39))
272 
273 /**
274  * IS_PUBIDCHAR:
275  * @c:  an UNICODE value (int)
276  *
277  * Macro to check the following production in the XML spec:
278  *
279  *
280  * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
281  */
282 #define IS_PUBIDCHAR(c)	xmlIsPubidCharQ(c)
283 
284 /**
285  * IS_PUBIDCHAR_CH:
286  * @c:  an xmlChar value (normally unsigned char)
287  *
288  * Same as IS_PUBIDCHAR but for single-byte value
289  */
290 #define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c)
291 
292 /**
293  * Global variables used for predefined strings.
294  */
295 XMLPUBVAR const xmlChar xmlStringText[];
296 XMLPUBVAR const xmlChar xmlStringTextNoenc[];
297 XMLPUBVAR const xmlChar xmlStringComment[];
298 
299 XML_DEPRECATED
300 XMLPUBFUN int                   xmlIsLetter     (int c);
301 
302 /**
303  * Parser context.
304  */
305 XMLPUBFUN xmlParserCtxtPtr
306 			xmlCreateFileParserCtxt	(const char *filename);
307 XMLPUBFUN xmlParserCtxtPtr
308 			xmlCreateURLParserCtxt	(const char *filename,
309 						 int options);
310 XMLPUBFUN xmlParserCtxtPtr
311 			xmlCreateMemoryParserCtxt(const char *buffer,
312 						 int size);
313 XMLPUBFUN xmlParserCtxtPtr
314 			xmlCreateEntityParserCtxt(const xmlChar *URL,
315 						 const xmlChar *ID,
316 						 const xmlChar *base);
317 XMLPUBFUN void
318 			xmlCtxtErrMemory	(xmlParserCtxtPtr ctxt);
319 XMLPUBFUN int
320 			xmlSwitchEncoding	(xmlParserCtxtPtr ctxt,
321 						 xmlCharEncoding enc);
322 XMLPUBFUN int
323 			xmlSwitchEncodingName	(xmlParserCtxtPtr ctxt,
324 						 const char *encoding);
325 XMLPUBFUN int
326 			xmlSwitchToEncoding	(xmlParserCtxtPtr ctxt,
327 					 xmlCharEncodingHandlerPtr handler);
328 XML_DEPRECATED
329 XMLPUBFUN int
330 			xmlSwitchInputEncoding	(xmlParserCtxtPtr ctxt,
331 						 xmlParserInputPtr input,
332 					 xmlCharEncodingHandlerPtr handler);
333 
334 /**
335  * Input Streams.
336  */
337 XMLPUBFUN xmlParserInputPtr
338 			xmlNewStringInputStream	(xmlParserCtxtPtr ctxt,
339 						 const xmlChar *buffer);
340 XML_DEPRECATED
341 XMLPUBFUN xmlParserInputPtr
342 			xmlNewEntityInputStream	(xmlParserCtxtPtr ctxt,
343 						 xmlEntityPtr entity);
344 XMLPUBFUN int
345 			xmlPushInput		(xmlParserCtxtPtr ctxt,
346 						 xmlParserInputPtr input);
347 XMLPUBFUN xmlChar
348 			xmlPopInput		(xmlParserCtxtPtr ctxt);
349 XMLPUBFUN void
350 			xmlFreeInputStream	(xmlParserInputPtr input);
351 XMLPUBFUN xmlParserInputPtr
352 			xmlNewInputFromFile	(xmlParserCtxtPtr ctxt,
353 						 const char *filename);
354 XMLPUBFUN xmlParserInputPtr
355 			xmlNewInputStream	(xmlParserCtxtPtr ctxt);
356 
357 /**
358  * Namespaces.
359  */
360 XMLPUBFUN xmlChar *
361 			xmlSplitQName		(xmlParserCtxtPtr ctxt,
362 						 const xmlChar *name,
363 						 xmlChar **prefix);
364 
365 /**
366  * Generic production rules.
367  */
368 XML_DEPRECATED
369 XMLPUBFUN const xmlChar *
370 			xmlParseName		(xmlParserCtxtPtr ctxt);
371 XML_DEPRECATED
372 XMLPUBFUN xmlChar *
373 			xmlParseNmtoken		(xmlParserCtxtPtr ctxt);
374 XML_DEPRECATED
375 XMLPUBFUN xmlChar *
376 			xmlParseEntityValue	(xmlParserCtxtPtr ctxt,
377 						 xmlChar **orig);
378 XML_DEPRECATED
379 XMLPUBFUN xmlChar *
380 			xmlParseAttValue	(xmlParserCtxtPtr ctxt);
381 XML_DEPRECATED
382 XMLPUBFUN xmlChar *
383 			xmlParseSystemLiteral	(xmlParserCtxtPtr ctxt);
384 XML_DEPRECATED
385 XMLPUBFUN xmlChar *
386 			xmlParsePubidLiteral	(xmlParserCtxtPtr ctxt);
387 XML_DEPRECATED
388 XMLPUBFUN void
389 			xmlParseCharData	(xmlParserCtxtPtr ctxt,
390 						 int cdata);
391 XML_DEPRECATED
392 XMLPUBFUN xmlChar *
393 			xmlParseExternalID	(xmlParserCtxtPtr ctxt,
394 						 xmlChar **publicID,
395 						 int strict);
396 XML_DEPRECATED
397 XMLPUBFUN void
398 			xmlParseComment		(xmlParserCtxtPtr ctxt);
399 XML_DEPRECATED
400 XMLPUBFUN const xmlChar *
401 			xmlParsePITarget	(xmlParserCtxtPtr ctxt);
402 XML_DEPRECATED
403 XMLPUBFUN void
404 			xmlParsePI		(xmlParserCtxtPtr ctxt);
405 XML_DEPRECATED
406 XMLPUBFUN void
407 			xmlParseNotationDecl	(xmlParserCtxtPtr ctxt);
408 XML_DEPRECATED
409 XMLPUBFUN void
410 			xmlParseEntityDecl	(xmlParserCtxtPtr ctxt);
411 XML_DEPRECATED
412 XMLPUBFUN int
413 			xmlParseDefaultDecl	(xmlParserCtxtPtr ctxt,
414 						 xmlChar **value);
415 XML_DEPRECATED
416 XMLPUBFUN xmlEnumerationPtr
417 			xmlParseNotationType	(xmlParserCtxtPtr ctxt);
418 XML_DEPRECATED
419 XMLPUBFUN xmlEnumerationPtr
420 			xmlParseEnumerationType	(xmlParserCtxtPtr ctxt);
421 XML_DEPRECATED
422 XMLPUBFUN int
423 			xmlParseEnumeratedType	(xmlParserCtxtPtr ctxt,
424 						 xmlEnumerationPtr *tree);
425 XML_DEPRECATED
426 XMLPUBFUN int
427 			xmlParseAttributeType	(xmlParserCtxtPtr ctxt,
428 						 xmlEnumerationPtr *tree);
429 XML_DEPRECATED
430 XMLPUBFUN void
431 			xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
432 XML_DEPRECATED
433 XMLPUBFUN xmlElementContentPtr
434 			xmlParseElementMixedContentDecl
435 						(xmlParserCtxtPtr ctxt,
436 						 int inputchk);
437 XML_DEPRECATED
438 XMLPUBFUN xmlElementContentPtr
439 			xmlParseElementChildrenContentDecl
440 						(xmlParserCtxtPtr ctxt,
441 						 int inputchk);
442 XML_DEPRECATED
443 XMLPUBFUN int
444 			xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
445 						 const xmlChar *name,
446 						 xmlElementContentPtr *result);
447 XML_DEPRECATED
448 XMLPUBFUN int
449 			xmlParseElementDecl	(xmlParserCtxtPtr ctxt);
450 XML_DEPRECATED
451 XMLPUBFUN void
452 			xmlParseMarkupDecl	(xmlParserCtxtPtr ctxt);
453 XML_DEPRECATED
454 XMLPUBFUN int
455 			xmlParseCharRef		(xmlParserCtxtPtr ctxt);
456 XML_DEPRECATED
457 XMLPUBFUN xmlEntityPtr
458 			xmlParseEntityRef	(xmlParserCtxtPtr ctxt);
459 XML_DEPRECATED
460 XMLPUBFUN void
461 			xmlParseReference	(xmlParserCtxtPtr ctxt);
462 XML_DEPRECATED
463 XMLPUBFUN void
464 			xmlParsePEReference	(xmlParserCtxtPtr ctxt);
465 XML_DEPRECATED
466 XMLPUBFUN void
467 			xmlParseDocTypeDecl	(xmlParserCtxtPtr ctxt);
468 #ifdef LIBXML_SAX1_ENABLED
469 XML_DEPRECATED
470 XMLPUBFUN const xmlChar *
471 			xmlParseAttribute	(xmlParserCtxtPtr ctxt,
472 						 xmlChar **value);
473 XML_DEPRECATED
474 XMLPUBFUN const xmlChar *
475 			xmlParseStartTag	(xmlParserCtxtPtr ctxt);
476 XML_DEPRECATED
477 XMLPUBFUN void
478 			xmlParseEndTag		(xmlParserCtxtPtr ctxt);
479 #endif /* LIBXML_SAX1_ENABLED */
480 XML_DEPRECATED
481 XMLPUBFUN void
482 			xmlParseCDSect		(xmlParserCtxtPtr ctxt);
483 XMLPUBFUN void
484 			xmlParseContent		(xmlParserCtxtPtr ctxt);
485 XML_DEPRECATED
486 XMLPUBFUN void
487 			xmlParseElement		(xmlParserCtxtPtr ctxt);
488 XML_DEPRECATED
489 XMLPUBFUN xmlChar *
490 			xmlParseVersionNum	(xmlParserCtxtPtr ctxt);
491 XML_DEPRECATED
492 XMLPUBFUN xmlChar *
493 			xmlParseVersionInfo	(xmlParserCtxtPtr ctxt);
494 XML_DEPRECATED
495 XMLPUBFUN xmlChar *
496 			xmlParseEncName		(xmlParserCtxtPtr ctxt);
497 XML_DEPRECATED
498 XMLPUBFUN const xmlChar *
499 			xmlParseEncodingDecl	(xmlParserCtxtPtr ctxt);
500 XML_DEPRECATED
501 XMLPUBFUN int
502 			xmlParseSDDecl		(xmlParserCtxtPtr ctxt);
503 XML_DEPRECATED
504 XMLPUBFUN void
505 			xmlParseXMLDecl		(xmlParserCtxtPtr ctxt);
506 XML_DEPRECATED
507 XMLPUBFUN void
508 			xmlParseTextDecl	(xmlParserCtxtPtr ctxt);
509 XML_DEPRECATED
510 XMLPUBFUN void
511 			xmlParseMisc		(xmlParserCtxtPtr ctxt);
512 XMLPUBFUN void
513 			xmlParseExternalSubset	(xmlParserCtxtPtr ctxt,
514 						 const xmlChar *ExternalID,
515 						 const xmlChar *SystemID);
516 /**
517  * XML_SUBSTITUTE_NONE:
518  *
519  * If no entities need to be substituted.
520  */
521 #define XML_SUBSTITUTE_NONE	0
522 /**
523  * XML_SUBSTITUTE_REF:
524  *
525  * Whether general entities need to be substituted.
526  */
527 #define XML_SUBSTITUTE_REF	1
528 /**
529  * XML_SUBSTITUTE_PEREF:
530  *
531  * Whether parameter entities need to be substituted.
532  */
533 #define XML_SUBSTITUTE_PEREF	2
534 /**
535  * XML_SUBSTITUTE_BOTH:
536  *
537  * Both general and parameter entities need to be substituted.
538  */
539 #define XML_SUBSTITUTE_BOTH	3
540 
541 XML_DEPRECATED
542 XMLPUBFUN xmlChar *
543 		xmlStringDecodeEntities		(xmlParserCtxtPtr ctxt,
544 						 const xmlChar *str,
545 						 int what,
546 						 xmlChar end,
547 						 xmlChar  end2,
548 						 xmlChar end3);
549 XML_DEPRECATED
550 XMLPUBFUN xmlChar *
551 		xmlStringLenDecodeEntities	(xmlParserCtxtPtr ctxt,
552 						 const xmlChar *str,
553 						 int len,
554 						 int what,
555 						 xmlChar end,
556 						 xmlChar  end2,
557 						 xmlChar end3);
558 
559 /*
560  * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP.
561  */
562 XML_DEPRECATED
563 XMLPUBFUN int			nodePush		(xmlParserCtxtPtr ctxt,
564 						 xmlNodePtr value);
565 XML_DEPRECATED
566 XMLPUBFUN xmlNodePtr		nodePop			(xmlParserCtxtPtr ctxt);
567 XMLPUBFUN int			inputPush		(xmlParserCtxtPtr ctxt,
568 						 xmlParserInputPtr value);
569 XMLPUBFUN xmlParserInputPtr	inputPop		(xmlParserCtxtPtr ctxt);
570 XML_DEPRECATED
571 XMLPUBFUN const xmlChar *	namePop			(xmlParserCtxtPtr ctxt);
572 XML_DEPRECATED
573 XMLPUBFUN int			namePush		(xmlParserCtxtPtr ctxt,
574 						 const xmlChar *value);
575 
576 /*
577  * other commodities shared between parser.c and parserInternals.
578  */
579 XML_DEPRECATED
580 XMLPUBFUN int			xmlSkipBlankChars	(xmlParserCtxtPtr ctxt);
581 XML_DEPRECATED
582 XMLPUBFUN int			xmlStringCurrentChar	(xmlParserCtxtPtr ctxt,
583 						 const xmlChar *cur,
584 						 int *len);
585 XML_DEPRECATED
586 XMLPUBFUN void			xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
587 XML_DEPRECATED
588 XMLPUBFUN int			xmlCheckLanguageID	(const xmlChar *lang);
589 
590 /*
591  * Really core function shared with HTML parser.
592  */
593 XML_DEPRECATED
594 XMLPUBFUN int			xmlCurrentChar		(xmlParserCtxtPtr ctxt,
595 						 int *len);
596 XMLPUBFUN int		xmlCopyCharMultiByte	(xmlChar *out,
597 						 int val);
598 XML_DEPRECATED
599 XMLPUBFUN int			xmlCopyChar		(int len,
600 						 xmlChar *out,
601 						 int val);
602 XML_DEPRECATED
603 XMLPUBFUN void			xmlNextChar		(xmlParserCtxtPtr ctxt);
604 XML_DEPRECATED
605 XMLPUBFUN void			xmlParserInputShrink	(xmlParserInputPtr in);
606 
607 #ifdef __cplusplus
608 }
609 #endif
610 #endif /* __XML_PARSER_INTERNALS_H__ */
611