xref: /aosp_15_r20/external/libxml2/parser.c (revision 7c5688314b92172186c154356a6374bf7684c3ca)
1 /*
2  * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3  *            implemented on top of the SAX interfaces
4  *
5  * References:
6  *   The XML specification:
7  *     http://www.w3.org/TR/REC-xml
8  *   Original 1.0 version:
9  *     http://www.w3.org/TR/1998/REC-xml-19980210
10  *   XML second edition working draft
11  *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12  *
13  * Okay this is a big file, the parser core is around 7000 lines, then it
14  * is followed by the progressive parser top routines, then the various
15  * high level APIs to call the parser and a few miscellaneous functions.
16  * A number of helper functions and deprecated ones have been moved to
17  * parserInternals.c to reduce this file size.
18  * As much as possible the functions are associated with their relative
19  * production in the XML specification. A few productions defining the
20  * different ranges of character are actually implanted either in
21  * parserInternals.h or parserInternals.c
22  * The DOM tree build is realized from the default SAX callbacks in
23  * the module SAX2.c.
24  * The routines doing the validation checks are in valid.c and called either
25  * from the SAX callbacks or as standalone functions using a preparsed
26  * document.
27  *
28  * See Copyright for the status of this software.
29  *
30  * [email protected]
31  */
32 
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37 
38 #define IN_LIBXML
39 #include "libxml.h"
40 
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46 
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/parser.h>
55 #include <libxml/xmlmemory.h>
56 #include <libxml/tree.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #include <libxml/SAX2.h>
65 #include <libxml/HTMLparser.h>
66 #ifdef LIBXML_CATALOG_ENABLED
67 #include <libxml/catalog.h>
68 #endif
69 
70 #include "private/buf.h"
71 #include "private/dict.h"
72 #include "private/entities.h"
73 #include "private/error.h"
74 #include "private/html.h"
75 #include "private/io.h"
76 #include "private/parser.h"
77 
78 #define NS_INDEX_EMPTY  INT_MAX
79 #define NS_INDEX_XML    (INT_MAX - 1)
80 #define URI_HASH_EMPTY  0xD943A04E
81 #define URI_HASH_XML    0xF0451F02
82 
83 #ifndef STDIN_FILENO
84   #define STDIN_FILENO 0
85 #endif
86 
87 struct _xmlStartTag {
88     const xmlChar *prefix;
89     const xmlChar *URI;
90     int line;
91     int nsNr;
92 };
93 
94 typedef struct {
95     void *saxData;
96     unsigned prefixHashValue;
97     unsigned uriHashValue;
98     unsigned elementId;
99     int oldIndex;
100 } xmlParserNsExtra;
101 
102 typedef struct {
103     unsigned hashValue;
104     int index;
105 } xmlParserNsBucket;
106 
107 struct _xmlParserNsData {
108     xmlParserNsExtra *extra;
109 
110     unsigned hashSize;
111     unsigned hashElems;
112     xmlParserNsBucket *hash;
113 
114     unsigned elementId;
115     int defaultNsIndex;
116     int minNsIndex;
117 };
118 
119 static int
120 xmlParseElementStart(xmlParserCtxtPtr ctxt);
121 
122 static void
123 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
124 
125 static xmlEntityPtr
126 xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
127 
128 static const xmlChar *
129 xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
130 
131 /************************************************************************
132  *									*
133  *	Arbitrary limits set in the parser. See XML_PARSE_HUGE		*
134  *									*
135  ************************************************************************/
136 
137 #define XML_PARSER_BIG_ENTITY 1000
138 #define XML_PARSER_LOT_ENTITY 5000
139 
140 /*
141  * Constants for protection against abusive entity expansion
142  * ("billion laughs").
143  */
144 
145 /*
146  * A certain amount of entity expansion which is always allowed.
147  */
148 #define XML_PARSER_ALLOWED_EXPANSION 1000000
149 
150 /*
151  * Fixed cost for each entity reference. This crudely models processing time
152  * as well to protect, for example, against exponential expansion of empty
153  * or very short entities.
154  */
155 #define XML_ENT_FIXED_COST 20
156 
157 /**
158  * xmlParserMaxDepth:
159  *
160  * arbitrary depth limit for the XML documents that we allow to
161  * process. This is not a limitation of the parser but a safety
162  * boundary feature. It can be disabled with the XML_PARSE_HUGE
163  * parser option.
164  */
165 const unsigned int xmlParserMaxDepth = 256;
166 
167 
168 
169 #define XML_PARSER_BIG_BUFFER_SIZE 300
170 #define XML_PARSER_BUFFER_SIZE 100
171 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
172 
173 /**
174  * XML_PARSER_CHUNK_SIZE
175  *
176  * When calling GROW that's the minimal amount of data
177  * the parser expected to have received. It is not a hard
178  * limit but an optimization when reading strings like Names
179  * It is not strictly needed as long as inputs available characters
180  * are followed by 0, which should be provided by the I/O level
181  */
182 #define XML_PARSER_CHUNK_SIZE 100
183 
184 /**
185  * xmlParserVersion:
186  *
187  * Constant string describing the internal version of the library
188  */
189 const char *const
190 xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
191 
192 /*
193  * List of XML prefixed PI allowed by W3C specs
194  */
195 
196 static const char* const xmlW3CPIs[] = {
197     "xml-stylesheet",
198     "xml-model",
199     NULL
200 };
201 
202 
203 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
204 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
205                                               const xmlChar **str);
206 
207 static void
208 xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
209 
210 static int
211 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
212 
213 /************************************************************************
214  *									*
215  *		Some factorized error routines				*
216  *									*
217  ************************************************************************/
218 
219 static void
xmlErrMemory(xmlParserCtxtPtr ctxt)220 xmlErrMemory(xmlParserCtxtPtr ctxt) {
221     xmlCtxtErrMemory(ctxt);
222 }
223 
224 /**
225  * xmlErrAttributeDup:
226  * @ctxt:  an XML parser context
227  * @prefix:  the attribute prefix
228  * @localname:  the attribute localname
229  *
230  * Handle a redefinition of attribute error
231  */
232 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)233 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
234                    const xmlChar * localname)
235 {
236     if (prefix == NULL)
237         xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
238                    XML_ERR_FATAL, localname, NULL, NULL, 0,
239                    "Attribute %s redefined\n", localname);
240     else
241         xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
242                    XML_ERR_FATAL, prefix, localname, NULL, 0,
243                    "Attribute %s:%s redefined\n", prefix, localname);
244 }
245 
246 /**
247  * xmlFatalErrMsg:
248  * @ctxt:  an XML parser context
249  * @error:  the error number
250  * @msg:  the error message
251  *
252  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
253  */
254 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)255 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
256                const char *msg)
257 {
258     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
259                NULL, NULL, NULL, 0, "%s", msg);
260 }
261 
262 /**
263  * xmlWarningMsg:
264  * @ctxt:  an XML parser context
265  * @error:  the error number
266  * @msg:  the error message
267  * @str1:  extra data
268  * @str2:  extra data
269  *
270  * Handle a warning.
271  */
272 void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)273 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
274               const char *msg, const xmlChar *str1, const xmlChar *str2)
275 {
276     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
277                str1, str2, NULL, 0, msg, str1, str2);
278 }
279 
280 /**
281  * xmlValidityError:
282  * @ctxt:  an XML parser context
283  * @error:  the error number
284  * @msg:  the error message
285  * @str1:  extra data
286  *
287  * Handle a validity error.
288  */
289 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)290 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
291               const char *msg, const xmlChar *str1, const xmlChar *str2)
292 {
293     ctxt->valid = 0;
294 
295     xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
296                str1, str2, NULL, 0, msg, str1, str2);
297 }
298 
299 /**
300  * xmlFatalErrMsgInt:
301  * @ctxt:  an XML parser context
302  * @error:  the error number
303  * @msg:  the error message
304  * @val:  an integer value
305  *
306  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
307  */
308 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)309 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
310                   const char *msg, int val)
311 {
312     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
313                NULL, NULL, NULL, val, msg, val);
314 }
315 
316 /**
317  * xmlFatalErrMsgStrIntStr:
318  * @ctxt:  an XML parser context
319  * @error:  the error number
320  * @msg:  the error message
321  * @str1:  an string info
322  * @val:  an integer value
323  * @str2:  an string info
324  *
325  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
326  */
327 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)328 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
329                   const char *msg, const xmlChar *str1, int val,
330 		  const xmlChar *str2)
331 {
332     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
333                str1, str2, NULL, val, msg, str1, val, str2);
334 }
335 
336 /**
337  * xmlFatalErrMsgStr:
338  * @ctxt:  an XML parser context
339  * @error:  the error number
340  * @msg:  the error message
341  * @val:  a string value
342  *
343  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
344  */
345 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)346 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347                   const char *msg, const xmlChar * val)
348 {
349     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350                val, NULL, NULL, 0, msg, val);
351 }
352 
353 /**
354  * xmlErrMsgStr:
355  * @ctxt:  an XML parser context
356  * @error:  the error number
357  * @msg:  the error message
358  * @val:  a string value
359  *
360  * Handle a non fatal parser error
361  */
362 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)363 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
364                   const char *msg, const xmlChar * val)
365 {
366     xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
367                val, NULL, NULL, 0, msg, val);
368 }
369 
370 /**
371  * xmlNsErr:
372  * @ctxt:  an XML parser context
373  * @error:  the error number
374  * @msg:  the message
375  * @info1:  extra information string
376  * @info2:  extra information string
377  *
378  * Handle a fatal parser error, i.e. violating Well-Formedness constraints
379  */
380 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)381 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
382          const char *msg,
383          const xmlChar * info1, const xmlChar * info2,
384          const xmlChar * info3)
385 {
386     ctxt->nsWellFormed = 0;
387 
388     xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
389                info1, info2, info3, 0, msg, info1, info2, info3);
390 }
391 
392 /**
393  * xmlNsWarn
394  * @ctxt:  an XML parser context
395  * @error:  the error number
396  * @msg:  the message
397  * @info1:  extra information string
398  * @info2:  extra information string
399  *
400  * Handle a namespace warning error
401  */
402 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)403 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
404          const char *msg,
405          const xmlChar * info1, const xmlChar * info2,
406          const xmlChar * info3)
407 {
408     xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
409                info1, info2, info3, 0, msg, info1, info2, info3);
410 }
411 
412 static void
xmlSaturatedAdd(unsigned long * dst,unsigned long val)413 xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
414     if (val > ULONG_MAX - *dst)
415         *dst = ULONG_MAX;
416     else
417         *dst += val;
418 }
419 
420 static void
xmlSaturatedAddSizeT(unsigned long * dst,unsigned long val)421 xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
422     if (val > ULONG_MAX - *dst)
423         *dst = ULONG_MAX;
424     else
425         *dst += val;
426 }
427 
428 /**
429  * xmlParserEntityCheck:
430  * @ctxt:  parser context
431  * @extra:  sum of unexpanded entity sizes
432  *
433  * Check for non-linear entity expansion behaviour.
434  *
435  * In some cases like xmlExpandEntityInAttValue, this function is called
436  * for each, possibly nested entity and its unexpanded content length.
437  *
438  * In other cases like xmlParseReference, it's only called for each
439  * top-level entity with its unexpanded content length plus the sum of
440  * the unexpanded content lengths (plus fixed cost) of all nested
441  * entities.
442  *
443  * Summing the unexpanded lengths also adds the length of the reference.
444  * This is by design. Taking the length of the entity name into account
445  * discourages attacks that try to waste CPU time with abusively long
446  * entity names. See test/recurse/lol6.xml for example. Each call also
447  * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
448  * short entities.
449  *
450  * Returns 1 on error, 0 on success.
451  */
452 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long extra)453 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
454 {
455     unsigned long consumed;
456     unsigned long *expandedSize;
457     xmlParserInputPtr input = ctxt->input;
458     xmlEntityPtr entity = input->entity;
459 
460     if ((entity) && (entity->flags & XML_ENT_CHECKED))
461         return(0);
462 
463     /*
464      * Compute total consumed bytes so far, including input streams of
465      * external entities.
466      */
467     consumed = input->consumed;
468     xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
469     xmlSaturatedAdd(&consumed, ctxt->sizeentities);
470 
471     if (entity)
472         expandedSize = &entity->expandedSize;
473     else
474         expandedSize = &ctxt->sizeentcopy;
475 
476     /*
477      * Add extra cost and some fixed cost.
478      */
479     xmlSaturatedAdd(expandedSize, extra);
480     xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
481 
482     /*
483      * It's important to always use saturation arithmetic when tracking
484      * entity sizes to make the size checks reliable. If "sizeentcopy"
485      * overflows, we have to abort.
486      */
487     if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
488         ((*expandedSize >= ULONG_MAX) ||
489          (*expandedSize / ctxt->maxAmpl > consumed))) {
490         xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
491                        "Maximum entity amplification factor exceeded, see "
492                        "xmlCtxtSetMaxAmplification.\n");
493         xmlHaltParser(ctxt);
494         return(1);
495     }
496 
497     return(0);
498 }
499 
500 /************************************************************************
501  *									*
502  *		Library wide options					*
503  *									*
504  ************************************************************************/
505 
506 /**
507   * xmlHasFeature:
508   * @feature: the feature to be examined
509   *
510   * Examines if the library has been compiled with a given feature.
511   *
512   * Returns a non-zero value if the feature exist, otherwise zero.
513   * Returns zero (0) if the feature does not exist or an unknown
514   * unknown feature is requested, non-zero otherwise.
515   */
516 int
xmlHasFeature(xmlFeature feature)517 xmlHasFeature(xmlFeature feature)
518 {
519     switch (feature) {
520 	case XML_WITH_THREAD:
521 #ifdef LIBXML_THREAD_ENABLED
522 	    return(1);
523 #else
524 	    return(0);
525 #endif
526         case XML_WITH_TREE:
527             return(1);
528         case XML_WITH_OUTPUT:
529 #ifdef LIBXML_OUTPUT_ENABLED
530             return(1);
531 #else
532             return(0);
533 #endif
534         case XML_WITH_PUSH:
535 #ifdef LIBXML_PUSH_ENABLED
536             return(1);
537 #else
538             return(0);
539 #endif
540         case XML_WITH_READER:
541 #ifdef LIBXML_READER_ENABLED
542             return(1);
543 #else
544             return(0);
545 #endif
546         case XML_WITH_PATTERN:
547 #ifdef LIBXML_PATTERN_ENABLED
548             return(1);
549 #else
550             return(0);
551 #endif
552         case XML_WITH_WRITER:
553 #ifdef LIBXML_WRITER_ENABLED
554             return(1);
555 #else
556             return(0);
557 #endif
558         case XML_WITH_SAX1:
559 #ifdef LIBXML_SAX1_ENABLED
560             return(1);
561 #else
562             return(0);
563 #endif
564         case XML_WITH_HTTP:
565 #ifdef LIBXML_HTTP_ENABLED
566             return(1);
567 #else
568             return(0);
569 #endif
570         case XML_WITH_VALID:
571 #ifdef LIBXML_VALID_ENABLED
572             return(1);
573 #else
574             return(0);
575 #endif
576         case XML_WITH_HTML:
577 #ifdef LIBXML_HTML_ENABLED
578             return(1);
579 #else
580             return(0);
581 #endif
582         case XML_WITH_LEGACY:
583 #ifdef LIBXML_LEGACY_ENABLED
584             return(1);
585 #else
586             return(0);
587 #endif
588         case XML_WITH_C14N:
589 #ifdef LIBXML_C14N_ENABLED
590             return(1);
591 #else
592             return(0);
593 #endif
594         case XML_WITH_CATALOG:
595 #ifdef LIBXML_CATALOG_ENABLED
596             return(1);
597 #else
598             return(0);
599 #endif
600         case XML_WITH_XPATH:
601 #ifdef LIBXML_XPATH_ENABLED
602             return(1);
603 #else
604             return(0);
605 #endif
606         case XML_WITH_XPTR:
607 #ifdef LIBXML_XPTR_ENABLED
608             return(1);
609 #else
610             return(0);
611 #endif
612         case XML_WITH_XINCLUDE:
613 #ifdef LIBXML_XINCLUDE_ENABLED
614             return(1);
615 #else
616             return(0);
617 #endif
618         case XML_WITH_ICONV:
619 #ifdef LIBXML_ICONV_ENABLED
620             return(1);
621 #else
622             return(0);
623 #endif
624         case XML_WITH_ISO8859X:
625 #ifdef LIBXML_ISO8859X_ENABLED
626             return(1);
627 #else
628             return(0);
629 #endif
630         case XML_WITH_UNICODE:
631 #ifdef LIBXML_UNICODE_ENABLED
632             return(1);
633 #else
634             return(0);
635 #endif
636         case XML_WITH_REGEXP:
637 #ifdef LIBXML_REGEXP_ENABLED
638             return(1);
639 #else
640             return(0);
641 #endif
642         case XML_WITH_AUTOMATA:
643 #ifdef LIBXML_REGEXP_ENABLED
644             return(1);
645 #else
646             return(0);
647 #endif
648         case XML_WITH_EXPR:
649 #ifdef LIBXML_EXPR_ENABLED
650             return(1);
651 #else
652             return(0);
653 #endif
654         case XML_WITH_SCHEMAS:
655 #ifdef LIBXML_SCHEMAS_ENABLED
656             return(1);
657 #else
658             return(0);
659 #endif
660         case XML_WITH_SCHEMATRON:
661 #ifdef LIBXML_SCHEMATRON_ENABLED
662             return(1);
663 #else
664             return(0);
665 #endif
666         case XML_WITH_MODULES:
667 #ifdef LIBXML_MODULES_ENABLED
668             return(1);
669 #else
670             return(0);
671 #endif
672         case XML_WITH_DEBUG:
673 #ifdef LIBXML_DEBUG_ENABLED
674             return(1);
675 #else
676             return(0);
677 #endif
678         case XML_WITH_DEBUG_MEM:
679             return(0);
680         case XML_WITH_ZLIB:
681 #ifdef LIBXML_ZLIB_ENABLED
682             return(1);
683 #else
684             return(0);
685 #endif
686         case XML_WITH_LZMA:
687 #ifdef LIBXML_LZMA_ENABLED
688             return(1);
689 #else
690             return(0);
691 #endif
692         case XML_WITH_ICU:
693 #ifdef LIBXML_ICU_ENABLED
694             return(1);
695 #else
696             return(0);
697 #endif
698         default:
699 	    break;
700      }
701      return(0);
702 }
703 
704 /************************************************************************
705  *									*
706  *			Simple string buffer				*
707  *									*
708  ************************************************************************/
709 
710 typedef struct {
711     xmlChar *mem;
712     unsigned size;
713     unsigned cap; /* size < cap */
714     unsigned max; /* size <= max */
715     xmlParserErrors code;
716 } xmlSBuf;
717 
718 static void
xmlSBufInit(xmlSBuf * buf,unsigned max)719 xmlSBufInit(xmlSBuf *buf, unsigned max) {
720     buf->mem = NULL;
721     buf->size = 0;
722     buf->cap = 0;
723     buf->max = max;
724     buf->code = XML_ERR_OK;
725 }
726 
727 static int
xmlSBufGrow(xmlSBuf * buf,unsigned len)728 xmlSBufGrow(xmlSBuf *buf, unsigned len) {
729     xmlChar *mem;
730     unsigned cap;
731 
732     if (len >= UINT_MAX / 2 - buf->size) {
733         if (buf->code == XML_ERR_OK)
734             buf->code = XML_ERR_RESOURCE_LIMIT;
735         return(-1);
736     }
737 
738     cap = (buf->size + len) * 2;
739     if (cap < 240)
740         cap = 240;
741 
742     mem = xmlRealloc(buf->mem, cap);
743     if (mem == NULL) {
744         buf->code = XML_ERR_NO_MEMORY;
745         return(-1);
746     }
747 
748     buf->mem = mem;
749     buf->cap = cap;
750 
751     return(0);
752 }
753 
754 static void
xmlSBufAddString(xmlSBuf * buf,const xmlChar * str,unsigned len)755 xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
756     if (buf->max - buf->size < len) {
757         if (buf->code == XML_ERR_OK)
758             buf->code = XML_ERR_RESOURCE_LIMIT;
759         return;
760     }
761 
762     if (buf->cap - buf->size <= len) {
763         if (xmlSBufGrow(buf, len) < 0)
764             return;
765     }
766 
767     if (len > 0)
768         memcpy(buf->mem + buf->size, str, len);
769     buf->size += len;
770 }
771 
772 static void
xmlSBufAddCString(xmlSBuf * buf,const char * str,unsigned len)773 xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
774     xmlSBufAddString(buf, (const xmlChar *) str, len);
775 }
776 
777 static void
xmlSBufAddChar(xmlSBuf * buf,int c)778 xmlSBufAddChar(xmlSBuf *buf, int c) {
779     xmlChar *end;
780 
781     if (buf->max - buf->size < 4) {
782         if (buf->code == XML_ERR_OK)
783             buf->code = XML_ERR_RESOURCE_LIMIT;
784         return;
785     }
786 
787     if (buf->cap - buf->size <= 4) {
788         if (xmlSBufGrow(buf, 4) < 0)
789             return;
790     }
791 
792     end = buf->mem + buf->size;
793 
794     if (c < 0x80) {
795         *end = (xmlChar) c;
796         buf->size += 1;
797     } else {
798         buf->size += xmlCopyCharMultiByte(end, c);
799     }
800 }
801 
802 static void
xmlSBufAddReplChar(xmlSBuf * buf)803 xmlSBufAddReplChar(xmlSBuf *buf) {
804     xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
805 }
806 
807 static void
xmlSBufReportError(xmlSBuf * buf,xmlParserCtxtPtr ctxt,const char * errMsg)808 xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
809     if (buf->code == XML_ERR_NO_MEMORY)
810         xmlCtxtErrMemory(ctxt);
811     else
812         xmlFatalErr(ctxt, buf->code, errMsg);
813 }
814 
815 static xmlChar *
xmlSBufFinish(xmlSBuf * buf,int * sizeOut,xmlParserCtxtPtr ctxt,const char * errMsg)816 xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
817               const char *errMsg) {
818     if (buf->mem == NULL) {
819         buf->mem = xmlMalloc(1);
820         if (buf->mem == NULL) {
821             buf->code = XML_ERR_NO_MEMORY;
822         } else {
823             buf->mem[0] = 0;
824         }
825     } else {
826         buf->mem[buf->size] = 0;
827     }
828 
829     if (buf->code == XML_ERR_OK) {
830         if (sizeOut != NULL)
831             *sizeOut = buf->size;
832         return(buf->mem);
833     }
834 
835     xmlSBufReportError(buf, ctxt, errMsg);
836 
837     xmlFree(buf->mem);
838 
839     if (sizeOut != NULL)
840         *sizeOut = 0;
841     return(NULL);
842 }
843 
844 static void
xmlSBufCleanup(xmlSBuf * buf,xmlParserCtxtPtr ctxt,const char * errMsg)845 xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
846     if (buf->code != XML_ERR_OK)
847         xmlSBufReportError(buf, ctxt, errMsg);
848 
849     xmlFree(buf->mem);
850 }
851 
852 static int
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * errMsg)853 xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
854                     const char *errMsg) {
855     int c = str[0];
856     int c1 = str[1];
857 
858     if ((c1 & 0xC0) != 0x80)
859         goto encoding_error;
860 
861     if (c < 0xE0) {
862         /* 2-byte sequence */
863         if (c < 0xC2)
864             goto encoding_error;
865 
866         return(2);
867     } else {
868         int c2 = str[2];
869 
870         if ((c2 & 0xC0) != 0x80)
871             goto encoding_error;
872 
873         if (c < 0xF0) {
874             /* 3-byte sequence */
875             if (c == 0xE0) {
876                 /* overlong */
877                 if (c1 < 0xA0)
878                     goto encoding_error;
879             } else if (c == 0xED) {
880                 /* surrogate */
881                 if (c1 >= 0xA0)
882                     goto encoding_error;
883             } else if (c == 0xEF) {
884                 /* U+FFFE and U+FFFF are invalid Chars */
885                 if ((c1 == 0xBF) && (c2 >= 0xBE))
886                     xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
887             }
888 
889             return(3);
890         } else {
891             /* 4-byte sequence */
892             if ((str[3] & 0xC0) != 0x80)
893                 goto encoding_error;
894             if (c == 0xF0) {
895                 /* overlong */
896                 if (c1 < 0x90)
897                     goto encoding_error;
898             } else if (c >= 0xF4) {
899                 /* greater than 0x10FFFF */
900                 if ((c > 0xF4) || (c1 >= 0x90))
901                     goto encoding_error;
902             }
903 
904             return(4);
905         }
906     }
907 
908 encoding_error:
909     /* Only report the first error */
910     if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
911         xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
912         ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
913     }
914 
915     return(0);
916 }
917 
918 /************************************************************************
919  *									*
920  *		SAX2 defaulted attributes handling			*
921  *									*
922  ************************************************************************/
923 
924 /**
925  * xmlCtxtInitializeLate:
926  * @ctxt:  an XML parser context
927  *
928  * Final initialization of the parser context before starting to parse.
929  *
930  * This accounts for users modifying struct members of parser context
931  * directly.
932  */
933 static void
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt)934 xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
935     xmlSAXHandlerPtr sax;
936 
937     /* Avoid unused variable warning if features are disabled. */
938     (void) sax;
939 
940     /*
941      * Changing the SAX struct directly is still widespread practice
942      * in internal and external code.
943      */
944     if (ctxt == NULL) return;
945     sax = ctxt->sax;
946 #ifdef LIBXML_SAX1_ENABLED
947     /*
948      * Only enable SAX2 if there SAX2 element handlers, except when there
949      * are no element handlers at all.
950      */
951     if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
952         (sax) &&
953         (sax->initialized == XML_SAX2_MAGIC) &&
954         ((sax->startElementNs != NULL) ||
955          (sax->endElementNs != NULL) ||
956          ((sax->startElement == NULL) && (sax->endElement == NULL))))
957         ctxt->sax2 = 1;
958 #else
959     ctxt->sax2 = 1;
960 #endif /* LIBXML_SAX1_ENABLED */
961 
962     /*
963      * Some users replace the dictionary directly in the context struct.
964      * We really need an API function to do that cleanly.
965      */
966     ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
967     ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
968     ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
969     if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
970 		(ctxt->str_xml_ns == NULL)) {
971         xmlErrMemory(ctxt);
972     }
973 
974     xmlDictSetLimit(ctxt->dict,
975                     (ctxt->options & XML_PARSE_HUGE) ?
976                         0 :
977                         XML_MAX_DICTIONARY_LIMIT);
978 }
979 
980 typedef struct {
981     xmlHashedString prefix;
982     xmlHashedString name;
983     xmlHashedString value;
984     const xmlChar *valueEnd;
985     int external;
986     int expandedSize;
987 } xmlDefAttr;
988 
989 typedef struct _xmlDefAttrs xmlDefAttrs;
990 typedef xmlDefAttrs *xmlDefAttrsPtr;
991 struct _xmlDefAttrs {
992     int nbAttrs;	/* number of defaulted attributes on that element */
993     int maxAttrs;       /* the size of the array */
994 #if __STDC_VERSION__ >= 199901L
995     /* Using a C99 flexible array member avoids UBSan errors. */
996     xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
997 #else
998     xmlDefAttr attrs[1];
999 #endif
1000 };
1001 
1002 /**
1003  * xmlAttrNormalizeSpace:
1004  * @src: the source string
1005  * @dst: the target string
1006  *
1007  * Normalize the space in non CDATA attribute values:
1008  * If the attribute type is not CDATA, then the XML processor MUST further
1009  * process the normalized attribute value by discarding any leading and
1010  * trailing space (#x20) characters, and by replacing sequences of space
1011  * (#x20) characters by a single space (#x20) character.
1012  * Note that the size of dst need to be at least src, and if one doesn't need
1013  * to preserve dst (and it doesn't come from a dictionary or read-only) then
1014  * passing src as dst is just fine.
1015  *
1016  * Returns a pointer to the normalized value (dst) or NULL if no conversion
1017  *         is needed.
1018  */
1019 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1020 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1021 {
1022     if ((src == NULL) || (dst == NULL))
1023         return(NULL);
1024 
1025     while (*src == 0x20) src++;
1026     while (*src != 0) {
1027 	if (*src == 0x20) {
1028 	    while (*src == 0x20) src++;
1029 	    if (*src != 0)
1030 		*dst++ = 0x20;
1031 	} else {
1032 	    *dst++ = *src++;
1033 	}
1034     }
1035     *dst = 0;
1036     if (dst == src)
1037        return(NULL);
1038     return(dst);
1039 }
1040 
1041 /**
1042  * xmlAddDefAttrs:
1043  * @ctxt:  an XML parser context
1044  * @fullname:  the element fullname
1045  * @fullattr:  the attribute fullname
1046  * @value:  the attribute value
1047  *
1048  * Add a defaulted attribute for an element
1049  */
1050 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1051 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1052                const xmlChar *fullname,
1053                const xmlChar *fullattr,
1054                const xmlChar *value) {
1055     xmlDefAttrsPtr defaults;
1056     xmlDefAttr *attr;
1057     int len, expandedSize;
1058     xmlHashedString name;
1059     xmlHashedString prefix;
1060     xmlHashedString hvalue;
1061     const xmlChar *localname;
1062 
1063     /*
1064      * Allows to detect attribute redefinitions
1065      */
1066     if (ctxt->attsSpecial != NULL) {
1067         if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1068 	    return;
1069     }
1070 
1071     if (ctxt->attsDefault == NULL) {
1072         ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1073 	if (ctxt->attsDefault == NULL)
1074 	    goto mem_error;
1075     }
1076 
1077     /*
1078      * split the element name into prefix:localname , the string found
1079      * are within the DTD and then not associated to namespace names.
1080      */
1081     localname = xmlSplitQName3(fullname, &len);
1082     if (localname == NULL) {
1083         name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1084 	prefix.name = NULL;
1085     } else {
1086         name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1087 	prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1088         if (prefix.name == NULL)
1089             goto mem_error;
1090     }
1091     if (name.name == NULL)
1092         goto mem_error;
1093 
1094     /*
1095      * make sure there is some storage
1096      */
1097     defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1098     if ((defaults == NULL) ||
1099         (defaults->nbAttrs >= defaults->maxAttrs)) {
1100         xmlDefAttrsPtr temp;
1101         int newSize;
1102 
1103         newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1104         temp = xmlRealloc(defaults,
1105                           sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1106 	if (temp == NULL)
1107 	    goto mem_error;
1108         if (defaults == NULL)
1109             temp->nbAttrs = 0;
1110 	temp->maxAttrs = newSize;
1111         defaults = temp;
1112 	if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1113 	                        defaults, NULL) < 0) {
1114 	    xmlFree(defaults);
1115 	    goto mem_error;
1116 	}
1117     }
1118 
1119     /*
1120      * Split the attribute name into prefix:localname , the string found
1121      * are within the DTD and hen not associated to namespace names.
1122      */
1123     localname = xmlSplitQName3(fullattr, &len);
1124     if (localname == NULL) {
1125         name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1126 	prefix.name = NULL;
1127     } else {
1128         name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1129 	prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1130         if (prefix.name == NULL)
1131             goto mem_error;
1132     }
1133     if (name.name == NULL)
1134         goto mem_error;
1135 
1136     /* intern the string and precompute the end */
1137     len = strlen((const char *) value);
1138     hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1139     if (hvalue.name == NULL)
1140         goto mem_error;
1141 
1142     expandedSize = strlen((const char *) name.name);
1143     if (prefix.name != NULL)
1144         expandedSize += strlen((const char *) prefix.name);
1145     expandedSize += len;
1146 
1147     attr = &defaults->attrs[defaults->nbAttrs++];
1148     attr->name = name;
1149     attr->prefix = prefix;
1150     attr->value = hvalue;
1151     attr->valueEnd = hvalue.name + len;
1152     attr->external = PARSER_EXTERNAL(ctxt);
1153     attr->expandedSize = expandedSize;
1154 
1155     return;
1156 
1157 mem_error:
1158     xmlErrMemory(ctxt);
1159 }
1160 
1161 /**
1162  * xmlAddSpecialAttr:
1163  * @ctxt:  an XML parser context
1164  * @fullname:  the element fullname
1165  * @fullattr:  the attribute fullname
1166  * @type:  the attribute type
1167  *
1168  * Register this attribute type
1169  */
1170 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1171 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1172 		  const xmlChar *fullname,
1173 		  const xmlChar *fullattr,
1174 		  int type)
1175 {
1176     if (ctxt->attsSpecial == NULL) {
1177         ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1178 	if (ctxt->attsSpecial == NULL)
1179 	    goto mem_error;
1180     }
1181 
1182     if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1183                     (void *) (ptrdiff_t) type) < 0)
1184         goto mem_error;
1185     return;
1186 
1187 mem_error:
1188     xmlErrMemory(ctxt);
1189 }
1190 
1191 /**
1192  * xmlCleanSpecialAttrCallback:
1193  *
1194  * Removes CDATA attributes from the special attribute table
1195  */
1196 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1197 xmlCleanSpecialAttrCallback(void *payload, void *data,
1198                             const xmlChar *fullname, const xmlChar *fullattr,
1199                             const xmlChar *unused ATTRIBUTE_UNUSED) {
1200     xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1201 
1202     if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1203         xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1204     }
1205 }
1206 
1207 /**
1208  * xmlCleanSpecialAttr:
1209  * @ctxt:  an XML parser context
1210  *
1211  * Trim the list of attributes defined to remove all those of type
1212  * CDATA as they are not special. This call should be done when finishing
1213  * to parse the DTD and before starting to parse the document root.
1214  */
1215 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1216 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1217 {
1218     if (ctxt->attsSpecial == NULL)
1219         return;
1220 
1221     xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1222 
1223     if (xmlHashSize(ctxt->attsSpecial) == 0) {
1224         xmlHashFree(ctxt->attsSpecial, NULL);
1225         ctxt->attsSpecial = NULL;
1226     }
1227 }
1228 
1229 /**
1230  * xmlCheckLanguageID:
1231  * @lang:  pointer to the string value
1232  *
1233  * DEPRECATED: Internal function, do not use.
1234  *
1235  * Checks that the value conforms to the LanguageID production:
1236  *
1237  * NOTE: this is somewhat deprecated, those productions were removed from
1238  *       the XML Second edition.
1239  *
1240  * [33] LanguageID ::= Langcode ('-' Subcode)*
1241  * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1242  * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1243  * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1244  * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1245  * [38] Subcode ::= ([a-z] | [A-Z])+
1246  *
1247  * The current REC reference the successors of RFC 1766, currently 5646
1248  *
1249  * http://www.rfc-editor.org/rfc/rfc5646.txt
1250  * langtag       = language
1251  *                 ["-" script]
1252  *                 ["-" region]
1253  *                 *("-" variant)
1254  *                 *("-" extension)
1255  *                 ["-" privateuse]
1256  * language      = 2*3ALPHA            ; shortest ISO 639 code
1257  *                 ["-" extlang]       ; sometimes followed by
1258  *                                     ; extended language subtags
1259  *               / 4ALPHA              ; or reserved for future use
1260  *               / 5*8ALPHA            ; or registered language subtag
1261  *
1262  * extlang       = 3ALPHA              ; selected ISO 639 codes
1263  *                 *2("-" 3ALPHA)      ; permanently reserved
1264  *
1265  * script        = 4ALPHA              ; ISO 15924 code
1266  *
1267  * region        = 2ALPHA              ; ISO 3166-1 code
1268  *               / 3DIGIT              ; UN M.49 code
1269  *
1270  * variant       = 5*8alphanum         ; registered variants
1271  *               / (DIGIT 3alphanum)
1272  *
1273  * extension     = singleton 1*("-" (2*8alphanum))
1274  *
1275  *                                     ; Single alphanumerics
1276  *                                     ; "x" reserved for private use
1277  * singleton     = DIGIT               ; 0 - 9
1278  *               / %x41-57             ; A - W
1279  *               / %x59-5A             ; Y - Z
1280  *               / %x61-77             ; a - w
1281  *               / %x79-7A             ; y - z
1282  *
1283  * it sounds right to still allow Irregular i-xxx IANA and user codes too
1284  * The parser below doesn't try to cope with extension or privateuse
1285  * that could be added but that's not interoperable anyway
1286  *
1287  * Returns 1 if correct 0 otherwise
1288  **/
1289 int
xmlCheckLanguageID(const xmlChar * lang)1290 xmlCheckLanguageID(const xmlChar * lang)
1291 {
1292     const xmlChar *cur = lang, *nxt;
1293 
1294     if (cur == NULL)
1295         return (0);
1296     if (((cur[0] == 'i') && (cur[1] == '-')) ||
1297         ((cur[0] == 'I') && (cur[1] == '-')) ||
1298         ((cur[0] == 'x') && (cur[1] == '-')) ||
1299         ((cur[0] == 'X') && (cur[1] == '-'))) {
1300         /*
1301          * Still allow IANA code and user code which were coming
1302          * from the previous version of the XML-1.0 specification
1303          * it's deprecated but we should not fail
1304          */
1305         cur += 2;
1306         while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1307                ((cur[0] >= 'a') && (cur[0] <= 'z')))
1308             cur++;
1309         return(cur[0] == 0);
1310     }
1311     nxt = cur;
1312     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1313            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1314            nxt++;
1315     if (nxt - cur >= 4) {
1316         /*
1317          * Reserved
1318          */
1319         if ((nxt - cur > 8) || (nxt[0] != 0))
1320             return(0);
1321         return(1);
1322     }
1323     if (nxt - cur < 2)
1324         return(0);
1325     /* we got an ISO 639 code */
1326     if (nxt[0] == 0)
1327         return(1);
1328     if (nxt[0] != '-')
1329         return(0);
1330 
1331     nxt++;
1332     cur = nxt;
1333     /* now we can have extlang or script or region or variant */
1334     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1335         goto region_m49;
1336 
1337     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1338            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1339            nxt++;
1340     if (nxt - cur == 4)
1341         goto script;
1342     if (nxt - cur == 2)
1343         goto region;
1344     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1345         goto variant;
1346     if (nxt - cur != 3)
1347         return(0);
1348     /* we parsed an extlang */
1349     if (nxt[0] == 0)
1350         return(1);
1351     if (nxt[0] != '-')
1352         return(0);
1353 
1354     nxt++;
1355     cur = nxt;
1356     /* now we can have script or region or variant */
1357     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1358         goto region_m49;
1359 
1360     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1361            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1362            nxt++;
1363     if (nxt - cur == 2)
1364         goto region;
1365     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1366         goto variant;
1367     if (nxt - cur != 4)
1368         return(0);
1369     /* we parsed a script */
1370 script:
1371     if (nxt[0] == 0)
1372         return(1);
1373     if (nxt[0] != '-')
1374         return(0);
1375 
1376     nxt++;
1377     cur = nxt;
1378     /* now we can have region or variant */
1379     if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1380         goto region_m49;
1381 
1382     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1383            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1384            nxt++;
1385 
1386     if ((nxt - cur >= 5) && (nxt - cur <= 8))
1387         goto variant;
1388     if (nxt - cur != 2)
1389         return(0);
1390     /* we parsed a region */
1391 region:
1392     if (nxt[0] == 0)
1393         return(1);
1394     if (nxt[0] != '-')
1395         return(0);
1396 
1397     nxt++;
1398     cur = nxt;
1399     /* now we can just have a variant */
1400     while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1401            ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1402            nxt++;
1403 
1404     if ((nxt - cur < 5) || (nxt - cur > 8))
1405         return(0);
1406 
1407     /* we parsed a variant */
1408 variant:
1409     if (nxt[0] == 0)
1410         return(1);
1411     if (nxt[0] != '-')
1412         return(0);
1413     /* extensions and private use subtags not checked */
1414     return (1);
1415 
1416 region_m49:
1417     if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1418         ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1419         nxt += 3;
1420         goto region;
1421     }
1422     return(0);
1423 }
1424 
1425 /************************************************************************
1426  *									*
1427  *		Parser stacks related functions and macros		*
1428  *									*
1429  ************************************************************************/
1430 
1431 static xmlChar *
1432 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1433 
1434 /**
1435  * xmlParserNsCreate:
1436  *
1437  * Create a new namespace database.
1438  *
1439  * Returns the new obejct.
1440  */
1441 xmlParserNsData *
xmlParserNsCreate(void)1442 xmlParserNsCreate(void) {
1443     xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1444 
1445     if (nsdb == NULL)
1446         return(NULL);
1447     memset(nsdb, 0, sizeof(*nsdb));
1448     nsdb->defaultNsIndex = INT_MAX;
1449 
1450     return(nsdb);
1451 }
1452 
1453 /**
1454  * xmlParserNsFree:
1455  * @nsdb: namespace database
1456  *
1457  * Free a namespace database.
1458  */
1459 void
xmlParserNsFree(xmlParserNsData * nsdb)1460 xmlParserNsFree(xmlParserNsData *nsdb) {
1461     if (nsdb == NULL)
1462         return;
1463 
1464     xmlFree(nsdb->extra);
1465     xmlFree(nsdb->hash);
1466     xmlFree(nsdb);
1467 }
1468 
1469 /**
1470  * xmlParserNsReset:
1471  * @nsdb: namespace database
1472  *
1473  * Reset a namespace database.
1474  */
1475 static void
xmlParserNsReset(xmlParserNsData * nsdb)1476 xmlParserNsReset(xmlParserNsData *nsdb) {
1477     if (nsdb == NULL)
1478         return;
1479 
1480     nsdb->hashElems = 0;
1481     nsdb->elementId = 0;
1482     nsdb->defaultNsIndex = INT_MAX;
1483 
1484     if (nsdb->hash)
1485         memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1486 }
1487 
1488 /**
1489  * xmlParserStartElement:
1490  * @nsdb: namespace database
1491  *
1492  * Signal that a new element has started.
1493  *
1494  * Returns 0 on success, -1 if the element counter overflowed.
1495  */
1496 static int
xmlParserNsStartElement(xmlParserNsData * nsdb)1497 xmlParserNsStartElement(xmlParserNsData *nsdb) {
1498     if (nsdb->elementId == UINT_MAX)
1499         return(-1);
1500     nsdb->elementId++;
1501 
1502     return(0);
1503 }
1504 
1505 /**
1506  * xmlParserNsLookup:
1507  * @ctxt: parser context
1508  * @prefix: namespace prefix
1509  * @bucketPtr: optional bucket (return value)
1510  *
1511  * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1512  * be set to the matching bucket, or the first empty bucket if no match
1513  * was found.
1514  *
1515  * Returns the namespace index on success, INT_MAX if no namespace was
1516  * found.
1517  */
1518 static int
xmlParserNsLookup(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,xmlParserNsBucket ** bucketPtr)1519 xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1520                   xmlParserNsBucket **bucketPtr) {
1521     xmlParserNsBucket *bucket, *tombstone;
1522     unsigned index, hashValue;
1523 
1524     if (prefix->name == NULL)
1525         return(ctxt->nsdb->defaultNsIndex);
1526 
1527     if (ctxt->nsdb->hashSize == 0)
1528         return(INT_MAX);
1529 
1530     hashValue = prefix->hashValue;
1531     index = hashValue & (ctxt->nsdb->hashSize - 1);
1532     bucket = &ctxt->nsdb->hash[index];
1533     tombstone = NULL;
1534 
1535     while (bucket->hashValue) {
1536         if (bucket->index == INT_MAX) {
1537             if (tombstone == NULL)
1538                 tombstone = bucket;
1539         } else if (bucket->hashValue == hashValue) {
1540             if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1541                 if (bucketPtr != NULL)
1542                     *bucketPtr = bucket;
1543                 return(bucket->index);
1544             }
1545         }
1546 
1547         index++;
1548         bucket++;
1549         if (index == ctxt->nsdb->hashSize) {
1550             index = 0;
1551             bucket = ctxt->nsdb->hash;
1552         }
1553     }
1554 
1555     if (bucketPtr != NULL)
1556         *bucketPtr = tombstone ? tombstone : bucket;
1557     return(INT_MAX);
1558 }
1559 
1560 /**
1561  * xmlParserNsLookupUri:
1562  * @ctxt: parser context
1563  * @prefix: namespace prefix
1564  *
1565  * Lookup namespace URI with given prefix.
1566  *
1567  * Returns the namespace URI on success, NULL if no namespace was found.
1568  */
1569 static const xmlChar *
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix)1570 xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1571     const xmlChar *ret;
1572     int nsIndex;
1573 
1574     if (prefix->name == ctxt->str_xml)
1575         return(ctxt->str_xml_ns);
1576 
1577     /*
1578      * minNsIndex is used when building an entity tree. We must
1579      * ignore namespaces declared outside the entity.
1580      */
1581     nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1582     if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1583         return(NULL);
1584 
1585     ret = ctxt->nsTab[nsIndex * 2 + 1];
1586     if (ret[0] == 0)
1587         ret = NULL;
1588     return(ret);
1589 }
1590 
1591 /**
1592  * xmlParserNsLookupSax:
1593  * @ctxt: parser context
1594  * @prefix: namespace prefix
1595  *
1596  * Lookup extra data for the given prefix. This returns data stored
1597  * with xmlParserNsUdpateSax.
1598  *
1599  * Returns the data on success, NULL if no namespace was found.
1600  */
1601 void *
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix)1602 xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1603     xmlHashedString hprefix;
1604     int nsIndex;
1605 
1606     if (prefix == ctxt->str_xml)
1607         return(NULL);
1608 
1609     hprefix.name = prefix;
1610     if (prefix != NULL)
1611         hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1612     else
1613         hprefix.hashValue = 0;
1614     nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1615     if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1616         return(NULL);
1617 
1618     return(ctxt->nsdb->extra[nsIndex].saxData);
1619 }
1620 
1621 /**
1622  * xmlParserNsUpdateSax:
1623  * @ctxt: parser context
1624  * @prefix: namespace prefix
1625  * @saxData: extra data for SAX handler
1626  *
1627  * Sets or updates extra data for the given prefix. This value will be
1628  * returned by xmlParserNsLookupSax as long as the namespace with the
1629  * given prefix is in scope.
1630  *
1631  * Returns the data on success, NULL if no namespace was found.
1632  */
1633 int
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix,void * saxData)1634 xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1635                      void *saxData) {
1636     xmlHashedString hprefix;
1637     int nsIndex;
1638 
1639     if (prefix == ctxt->str_xml)
1640         return(-1);
1641 
1642     hprefix.name = prefix;
1643     if (prefix != NULL)
1644         hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1645     else
1646         hprefix.hashValue = 0;
1647     nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1648     if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1649         return(-1);
1650 
1651     ctxt->nsdb->extra[nsIndex].saxData = saxData;
1652     return(0);
1653 }
1654 
1655 /**
1656  * xmlParserNsGrow:
1657  * @ctxt: parser context
1658  *
1659  * Grows the namespace tables.
1660  *
1661  * Returns 0 on success, -1 if a memory allocation failed.
1662  */
1663 static int
xmlParserNsGrow(xmlParserCtxtPtr ctxt)1664 xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1665     const xmlChar **table;
1666     xmlParserNsExtra *extra;
1667     int newSize;
1668 
1669     if (ctxt->nsMax > INT_MAX / 2)
1670         goto error;
1671     newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1672 
1673     table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1674     if (table == NULL)
1675         goto error;
1676     ctxt->nsTab = table;
1677 
1678     extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1679     if (extra == NULL)
1680         goto error;
1681     ctxt->nsdb->extra = extra;
1682 
1683     ctxt->nsMax = newSize;
1684     return(0);
1685 
1686 error:
1687     xmlErrMemory(ctxt);
1688     return(-1);
1689 }
1690 
1691 /**
1692  * xmlParserNsPush:
1693  * @ctxt: parser context
1694  * @prefix: prefix with hash value
1695  * @uri: uri with hash value
1696  * @saxData: extra data for SAX handler
1697  * @defAttr: whether the namespace comes from a default attribute
1698  *
1699  * Push a new namespace on the table.
1700  *
1701  * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1702  * -1 if a memory allocation failed.
1703  */
1704 static int
xmlParserNsPush(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,const xmlHashedString * uri,void * saxData,int defAttr)1705 xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1706                 const xmlHashedString *uri, void *saxData, int defAttr) {
1707     xmlParserNsBucket *bucket = NULL;
1708     xmlParserNsExtra *extra;
1709     const xmlChar **ns;
1710     unsigned hashValue, nsIndex, oldIndex;
1711 
1712     if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1713         return(0);
1714 
1715     if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1716         xmlErrMemory(ctxt);
1717         return(-1);
1718     }
1719 
1720     /*
1721      * Default namespace and 'xml' namespace
1722      */
1723     if ((prefix == NULL) || (prefix->name == NULL)) {
1724         oldIndex = ctxt->nsdb->defaultNsIndex;
1725 
1726         if (oldIndex != INT_MAX) {
1727             extra = &ctxt->nsdb->extra[oldIndex];
1728 
1729             if (extra->elementId == ctxt->nsdb->elementId) {
1730                 if (defAttr == 0)
1731                     xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1732                 return(0);
1733             }
1734 
1735             if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1736                 (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1737                 return(0);
1738         }
1739 
1740         ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1741         goto populate_entry;
1742     }
1743 
1744     /*
1745      * Hash table lookup
1746      */
1747     oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1748     if (oldIndex != INT_MAX) {
1749         extra = &ctxt->nsdb->extra[oldIndex];
1750 
1751         /*
1752          * Check for duplicate definitions on the same element.
1753          */
1754         if (extra->elementId == ctxt->nsdb->elementId) {
1755             if (defAttr == 0)
1756                 xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1757             return(0);
1758         }
1759 
1760         if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1761             (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1762             return(0);
1763 
1764         bucket->index = ctxt->nsNr;
1765         goto populate_entry;
1766     }
1767 
1768     /*
1769      * Insert new bucket
1770      */
1771 
1772     hashValue = prefix->hashValue;
1773 
1774     /*
1775      * Grow hash table, 50% fill factor
1776      */
1777     if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1778         xmlParserNsBucket *newHash;
1779         unsigned newSize, i, index;
1780 
1781         if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1782             xmlErrMemory(ctxt);
1783             return(-1);
1784         }
1785         newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1786         newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1787         if (newHash == NULL) {
1788             xmlErrMemory(ctxt);
1789             return(-1);
1790         }
1791         memset(newHash, 0, newSize * sizeof(newHash[0]));
1792 
1793         for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1794             unsigned hv = ctxt->nsdb->hash[i].hashValue;
1795             unsigned newIndex;
1796 
1797             if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1798                 continue;
1799             newIndex = hv & (newSize - 1);
1800 
1801             while (newHash[newIndex].hashValue != 0) {
1802                 newIndex++;
1803                 if (newIndex == newSize)
1804                     newIndex = 0;
1805             }
1806 
1807             newHash[newIndex] = ctxt->nsdb->hash[i];
1808         }
1809 
1810         xmlFree(ctxt->nsdb->hash);
1811         ctxt->nsdb->hash = newHash;
1812         ctxt->nsdb->hashSize = newSize;
1813 
1814         /*
1815          * Relookup
1816          */
1817         index = hashValue & (newSize - 1);
1818 
1819         while (newHash[index].hashValue != 0) {
1820             index++;
1821             if (index == newSize)
1822                 index = 0;
1823         }
1824 
1825         bucket = &newHash[index];
1826     }
1827 
1828     bucket->hashValue = hashValue;
1829     bucket->index = ctxt->nsNr;
1830     ctxt->nsdb->hashElems++;
1831     oldIndex = INT_MAX;
1832 
1833 populate_entry:
1834     nsIndex = ctxt->nsNr;
1835 
1836     ns = &ctxt->nsTab[nsIndex * 2];
1837     ns[0] = prefix ? prefix->name : NULL;
1838     ns[1] = uri->name;
1839 
1840     extra = &ctxt->nsdb->extra[nsIndex];
1841     extra->saxData = saxData;
1842     extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1843     extra->uriHashValue = uri->hashValue;
1844     extra->elementId = ctxt->nsdb->elementId;
1845     extra->oldIndex = oldIndex;
1846 
1847     ctxt->nsNr++;
1848 
1849     return(1);
1850 }
1851 
1852 /**
1853  * xmlParserNsPop:
1854  * @ctxt: an XML parser context
1855  * @nr:  the number to pop
1856  *
1857  * Pops the top @nr namespaces and restores the hash table.
1858  *
1859  * Returns the number of namespaces popped.
1860  */
1861 static int
xmlParserNsPop(xmlParserCtxtPtr ctxt,int nr)1862 xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1863 {
1864     int i;
1865 
1866     /* assert(nr <= ctxt->nsNr); */
1867 
1868     for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1869         const xmlChar *prefix = ctxt->nsTab[i * 2];
1870         xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1871 
1872         if (prefix == NULL) {
1873             ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1874         } else {
1875             xmlHashedString hprefix;
1876             xmlParserNsBucket *bucket = NULL;
1877 
1878             hprefix.name = prefix;
1879             hprefix.hashValue = extra->prefixHashValue;
1880             xmlParserNsLookup(ctxt, &hprefix, &bucket);
1881             /* assert(bucket && bucket->hashValue); */
1882             bucket->index = extra->oldIndex;
1883         }
1884     }
1885 
1886     ctxt->nsNr -= nr;
1887     return(nr);
1888 }
1889 
1890 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1891 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1892     const xmlChar **atts;
1893     unsigned *attallocs;
1894     int maxatts;
1895 
1896     if (nr + 5 > ctxt->maxatts) {
1897 	maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1898 	atts = (const xmlChar **) xmlMalloc(
1899 				     maxatts * sizeof(const xmlChar *));
1900 	if (atts == NULL) goto mem_error;
1901 	attallocs = xmlRealloc(ctxt->attallocs,
1902                                (maxatts / 5) * sizeof(attallocs[0]));
1903 	if (attallocs == NULL) {
1904             xmlFree(atts);
1905             goto mem_error;
1906         }
1907         if (ctxt->maxatts > 0)
1908             memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1909         xmlFree(ctxt->atts);
1910 	ctxt->atts = atts;
1911 	ctxt->attallocs = attallocs;
1912 	ctxt->maxatts = maxatts;
1913     }
1914     return(ctxt->maxatts);
1915 mem_error:
1916     xmlErrMemory(ctxt);
1917     return(-1);
1918 }
1919 
1920 /**
1921  * inputPush:
1922  * @ctxt:  an XML parser context
1923  * @value:  the parser input
1924  *
1925  * Pushes a new parser input on top of the input stack
1926  *
1927  * Returns -1 in case of error, the index in the stack otherwise
1928  */
1929 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1930 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1931 {
1932     char *directory = NULL;
1933 
1934     if ((ctxt == NULL) || (value == NULL))
1935         return(-1);
1936 
1937     if (ctxt->inputNr >= ctxt->inputMax) {
1938         size_t newSize = ctxt->inputMax * 2;
1939         xmlParserInputPtr *tmp;
1940 
1941         tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1942                                                newSize * sizeof(*tmp));
1943         if (tmp == NULL) {
1944             xmlErrMemory(ctxt);
1945             return (-1);
1946         }
1947         ctxt->inputTab = tmp;
1948         ctxt->inputMax = newSize;
1949     }
1950 
1951     if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1952         directory = xmlParserGetDirectory(value->filename);
1953         if (directory == NULL) {
1954             xmlErrMemory(ctxt);
1955             return(-1);
1956         }
1957     }
1958 
1959     if (ctxt->input_id >= INT_MAX) {
1960         xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1961         return(-1);
1962     }
1963 
1964     ctxt->inputTab[ctxt->inputNr] = value;
1965     ctxt->input = value;
1966 
1967     if (ctxt->inputNr == 0) {
1968         xmlFree(ctxt->directory);
1969         ctxt->directory = directory;
1970     }
1971 
1972     /*
1973      * Internally, the input ID is only used to detect parameter entity
1974      * boundaries. But there are entity loaders in downstream code that
1975      * detect the main document by checking for "input_id == 1".
1976      */
1977     value->id = ctxt->input_id++;
1978 
1979     return(ctxt->inputNr++);
1980 }
1981 /**
1982  * inputPop:
1983  * @ctxt: an XML parser context
1984  *
1985  * Pops the top parser input from the input stack
1986  *
1987  * Returns the input just removed
1988  */
1989 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1990 inputPop(xmlParserCtxtPtr ctxt)
1991 {
1992     xmlParserInputPtr ret;
1993 
1994     if (ctxt == NULL)
1995         return(NULL);
1996     if (ctxt->inputNr <= 0)
1997         return (NULL);
1998     ctxt->inputNr--;
1999     if (ctxt->inputNr > 0)
2000         ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
2001     else
2002         ctxt->input = NULL;
2003     ret = ctxt->inputTab[ctxt->inputNr];
2004     ctxt->inputTab[ctxt->inputNr] = NULL;
2005     return (ret);
2006 }
2007 /**
2008  * nodePush:
2009  * @ctxt:  an XML parser context
2010  * @value:  the element node
2011  *
2012  * DEPRECATED: Internal function, do not use.
2013  *
2014  * Pushes a new element node on top of the node stack
2015  *
2016  * Returns -1 in case of error, the index in the stack otherwise
2017  */
2018 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)2019 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
2020 {
2021     int maxDepth;
2022 
2023     if (ctxt == NULL)
2024         return(0);
2025 
2026     maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2027     if (ctxt->nodeNr > maxDepth) {
2028         xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2029                 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2030                 ctxt->nodeNr);
2031         xmlHaltParser(ctxt);
2032         return(-1);
2033     }
2034     if (ctxt->nodeNr >= ctxt->nodeMax) {
2035         xmlNodePtr *tmp;
2036 
2037 	tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2038                                       ctxt->nodeMax * 2 *
2039                                       sizeof(ctxt->nodeTab[0]));
2040         if (tmp == NULL) {
2041             xmlErrMemory(ctxt);
2042             return (-1);
2043         }
2044         ctxt->nodeTab = tmp;
2045 	ctxt->nodeMax *= 2;
2046     }
2047     ctxt->nodeTab[ctxt->nodeNr] = value;
2048     ctxt->node = value;
2049     return (ctxt->nodeNr++);
2050 }
2051 
2052 /**
2053  * nodePop:
2054  * @ctxt: an XML parser context
2055  *
2056  * DEPRECATED: Internal function, do not use.
2057  *
2058  * Pops the top element node from the node stack
2059  *
2060  * Returns the node just removed
2061  */
2062 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)2063 nodePop(xmlParserCtxtPtr ctxt)
2064 {
2065     xmlNodePtr ret;
2066 
2067     if (ctxt == NULL) return(NULL);
2068     if (ctxt->nodeNr <= 0)
2069         return (NULL);
2070     ctxt->nodeNr--;
2071     if (ctxt->nodeNr > 0)
2072         ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2073     else
2074         ctxt->node = NULL;
2075     ret = ctxt->nodeTab[ctxt->nodeNr];
2076     ctxt->nodeTab[ctxt->nodeNr] = NULL;
2077     return (ret);
2078 }
2079 
2080 /**
2081  * nameNsPush:
2082  * @ctxt:  an XML parser context
2083  * @value:  the element name
2084  * @prefix:  the element prefix
2085  * @URI:  the element namespace name
2086  * @line:  the current line number for error messages
2087  * @nsNr:  the number of namespaces pushed on the namespace table
2088  *
2089  * Pushes a new element name/prefix/URL on top of the name stack
2090  *
2091  * Returns -1 in case of error, the index in the stack otherwise
2092  */
2093 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)2094 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2095            const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2096 {
2097     xmlStartTag *tag;
2098 
2099     if (ctxt->nameNr >= ctxt->nameMax) {
2100         const xmlChar * *tmp;
2101         xmlStartTag *tmp2;
2102         ctxt->nameMax *= 2;
2103         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2104                                     ctxt->nameMax *
2105                                     sizeof(ctxt->nameTab[0]));
2106         if (tmp == NULL) {
2107 	    ctxt->nameMax /= 2;
2108 	    goto mem_error;
2109         }
2110 	ctxt->nameTab = tmp;
2111         tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2112                                     ctxt->nameMax *
2113                                     sizeof(ctxt->pushTab[0]));
2114         if (tmp2 == NULL) {
2115 	    ctxt->nameMax /= 2;
2116 	    goto mem_error;
2117         }
2118 	ctxt->pushTab = tmp2;
2119     } else if (ctxt->pushTab == NULL) {
2120         ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2121                                             sizeof(ctxt->pushTab[0]));
2122         if (ctxt->pushTab == NULL)
2123             goto mem_error;
2124     }
2125     ctxt->nameTab[ctxt->nameNr] = value;
2126     ctxt->name = value;
2127     tag = &ctxt->pushTab[ctxt->nameNr];
2128     tag->prefix = prefix;
2129     tag->URI = URI;
2130     tag->line = line;
2131     tag->nsNr = nsNr;
2132     return (ctxt->nameNr++);
2133 mem_error:
2134     xmlErrMemory(ctxt);
2135     return (-1);
2136 }
2137 #ifdef LIBXML_PUSH_ENABLED
2138 /**
2139  * nameNsPop:
2140  * @ctxt: an XML parser context
2141  *
2142  * Pops the top element/prefix/URI name from the name stack
2143  *
2144  * Returns the name just removed
2145  */
2146 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)2147 nameNsPop(xmlParserCtxtPtr ctxt)
2148 {
2149     const xmlChar *ret;
2150 
2151     if (ctxt->nameNr <= 0)
2152         return (NULL);
2153     ctxt->nameNr--;
2154     if (ctxt->nameNr > 0)
2155         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2156     else
2157         ctxt->name = NULL;
2158     ret = ctxt->nameTab[ctxt->nameNr];
2159     ctxt->nameTab[ctxt->nameNr] = NULL;
2160     return (ret);
2161 }
2162 #endif /* LIBXML_PUSH_ENABLED */
2163 
2164 /**
2165  * namePush:
2166  * @ctxt:  an XML parser context
2167  * @value:  the element name
2168  *
2169  * DEPRECATED: Internal function, do not use.
2170  *
2171  * Pushes a new element name on top of the name stack
2172  *
2173  * Returns -1 in case of error, the index in the stack otherwise
2174  */
2175 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)2176 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2177 {
2178     if (ctxt == NULL) return (-1);
2179 
2180     if (ctxt->nameNr >= ctxt->nameMax) {
2181         const xmlChar * *tmp;
2182         tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2183                                     ctxt->nameMax * 2 *
2184                                     sizeof(ctxt->nameTab[0]));
2185         if (tmp == NULL) {
2186 	    goto mem_error;
2187         }
2188 	ctxt->nameTab = tmp;
2189         ctxt->nameMax *= 2;
2190     }
2191     ctxt->nameTab[ctxt->nameNr] = value;
2192     ctxt->name = value;
2193     return (ctxt->nameNr++);
2194 mem_error:
2195     xmlErrMemory(ctxt);
2196     return (-1);
2197 }
2198 
2199 /**
2200  * namePop:
2201  * @ctxt: an XML parser context
2202  *
2203  * DEPRECATED: Internal function, do not use.
2204  *
2205  * Pops the top element name from the name stack
2206  *
2207  * Returns the name just removed
2208  */
2209 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)2210 namePop(xmlParserCtxtPtr ctxt)
2211 {
2212     const xmlChar *ret;
2213 
2214     if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2215         return (NULL);
2216     ctxt->nameNr--;
2217     if (ctxt->nameNr > 0)
2218         ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2219     else
2220         ctxt->name = NULL;
2221     ret = ctxt->nameTab[ctxt->nameNr];
2222     ctxt->nameTab[ctxt->nameNr] = NULL;
2223     return (ret);
2224 }
2225 
spacePush(xmlParserCtxtPtr ctxt,int val)2226 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2227     if (ctxt->spaceNr >= ctxt->spaceMax) {
2228         int *tmp;
2229 
2230 	ctxt->spaceMax *= 2;
2231         tmp = (int *) xmlRealloc(ctxt->spaceTab,
2232 	                         ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2233         if (tmp == NULL) {
2234 	    xmlErrMemory(ctxt);
2235 	    ctxt->spaceMax /=2;
2236 	    return(-1);
2237 	}
2238 	ctxt->spaceTab = tmp;
2239     }
2240     ctxt->spaceTab[ctxt->spaceNr] = val;
2241     ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2242     return(ctxt->spaceNr++);
2243 }
2244 
spacePop(xmlParserCtxtPtr ctxt)2245 static int spacePop(xmlParserCtxtPtr ctxt) {
2246     int ret;
2247     if (ctxt->spaceNr <= 0) return(0);
2248     ctxt->spaceNr--;
2249     if (ctxt->spaceNr > 0)
2250 	ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2251     else
2252         ctxt->space = &ctxt->spaceTab[0];
2253     ret = ctxt->spaceTab[ctxt->spaceNr];
2254     ctxt->spaceTab[ctxt->spaceNr] = -1;
2255     return(ret);
2256 }
2257 
2258 /*
2259  * Macros for accessing the content. Those should be used only by the parser,
2260  * and not exported.
2261  *
2262  * Dirty macros, i.e. one often need to make assumption on the context to
2263  * use them
2264  *
2265  *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2266  *           To be used with extreme caution since operations consuming
2267  *           characters may move the input buffer to a different location !
2268  *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2269  *           This should be used internally by the parser
2270  *           only to compare to ASCII values otherwise it would break when
2271  *           running with UTF-8 encoding.
2272  *   RAW     same as CUR but in the input buffer, bypass any token
2273  *           extraction that may have been done
2274  *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2275  *           to compare on ASCII based substring.
2276  *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2277  *           strings without newlines within the parser.
2278  *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2279  *           defined char within the parser.
2280  * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2281  *
2282  *   NEXT    Skip to the next character, this does the proper decoding
2283  *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2284  *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2285  *   CUR_SCHAR  same but operate on a string instead of the context
2286  *   COPY_BUF  copy the current unicode char to the target buffer, increment
2287  *            the index
2288  *   GROW, SHRINK  handling of input buffers
2289  */
2290 
2291 #define RAW (*ctxt->input->cur)
2292 #define CUR (*ctxt->input->cur)
2293 #define NXT(val) ctxt->input->cur[(val)]
2294 #define CUR_PTR ctxt->input->cur
2295 #define BASE_PTR ctxt->input->base
2296 
2297 #define CMP4( s, c1, c2, c3, c4 ) \
2298   ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2299     ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2300 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2301   ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2302 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2303   ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2304 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2305   ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2306 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2307   ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2308 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2309   ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2310     ((unsigned char *) s)[ 8 ] == c9 )
2311 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2312   ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2313     ((unsigned char *) s)[ 9 ] == c10 )
2314 
2315 #define SKIP(val) do {							\
2316     ctxt->input->cur += (val),ctxt->input->col+=(val);			\
2317     if (*ctxt->input->cur == 0)						\
2318         xmlParserGrow(ctxt);						\
2319   } while (0)
2320 
2321 #define SKIPL(val) do {							\
2322     int skipl;								\
2323     for(skipl=0; skipl<val; skipl++) {					\
2324 	if (*(ctxt->input->cur) == '\n') {				\
2325 	ctxt->input->line++; ctxt->input->col = 1;			\
2326 	} else ctxt->input->col++;					\
2327 	ctxt->input->cur++;						\
2328     }									\
2329     if (*ctxt->input->cur == 0)						\
2330         xmlParserGrow(ctxt);						\
2331   } while (0)
2332 
2333 #define SHRINK \
2334     if ((!PARSER_PROGRESSIVE(ctxt)) && \
2335         (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2336 	(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2337 	xmlParserShrink(ctxt);
2338 
2339 #define GROW \
2340     if ((!PARSER_PROGRESSIVE(ctxt)) && \
2341         (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2342 	xmlParserGrow(ctxt);
2343 
2344 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2345 
2346 #define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2347 
2348 #define NEXT xmlNextChar(ctxt)
2349 
2350 #define NEXT1 {								\
2351 	ctxt->input->col++;						\
2352 	ctxt->input->cur++;						\
2353 	if (*ctxt->input->cur == 0)					\
2354 	    xmlParserGrow(ctxt);						\
2355     }
2356 
2357 #define NEXTL(l) do {							\
2358     if (*(ctxt->input->cur) == '\n') {					\
2359 	ctxt->input->line++; ctxt->input->col = 1;			\
2360     } else ctxt->input->col++;						\
2361     ctxt->input->cur += l;				\
2362   } while (0)
2363 
2364 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2365 
2366 #define COPY_BUF(b, i, v)						\
2367     if (v < 0x80) b[i++] = v;						\
2368     else i += xmlCopyCharMultiByte(&b[i],v)
2369 
2370 static int
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt,int * len)2371 xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2372     int c = xmlCurrentChar(ctxt, len);
2373 
2374     if (c == XML_INVALID_CHAR)
2375         c = 0xFFFD; /* replacement character */
2376 
2377     return(c);
2378 }
2379 
2380 /**
2381  * xmlSkipBlankChars:
2382  * @ctxt:  the XML parser context
2383  *
2384  * DEPRECATED: Internal function, do not use.
2385  *
2386  * Skip whitespace in the input stream.
2387  *
2388  * Returns the number of space chars skipped
2389  */
2390 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2391 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2392     const xmlChar *cur;
2393     int res = 0;
2394 
2395     /*
2396      * It's Okay to use CUR/NEXT here since all the blanks are on
2397      * the ASCII range.
2398      */
2399     cur = ctxt->input->cur;
2400     while (IS_BLANK_CH(*cur)) {
2401         if (*cur == '\n') {
2402             ctxt->input->line++; ctxt->input->col = 1;
2403         } else {
2404             ctxt->input->col++;
2405         }
2406         cur++;
2407         if (res < INT_MAX)
2408             res++;
2409         if (*cur == 0) {
2410             ctxt->input->cur = cur;
2411             xmlParserGrow(ctxt);
2412             cur = ctxt->input->cur;
2413         }
2414     }
2415     ctxt->input->cur = cur;
2416 
2417     return(res);
2418 }
2419 
2420 static void
xmlPopPE(xmlParserCtxtPtr ctxt)2421 xmlPopPE(xmlParserCtxtPtr ctxt) {
2422     unsigned long consumed;
2423     xmlEntityPtr ent;
2424 
2425     ent = ctxt->input->entity;
2426 
2427     ent->flags &= ~XML_ENT_EXPANDING;
2428 
2429     if ((ent->flags & XML_ENT_CHECKED) == 0) {
2430         int result;
2431 
2432         /*
2433          * Read the rest of the stream in case of errors. We want
2434          * to account for the whole entity size.
2435          */
2436         do {
2437             ctxt->input->cur = ctxt->input->end;
2438             xmlParserShrink(ctxt);
2439             result = xmlParserGrow(ctxt);
2440         } while (result > 0);
2441 
2442         consumed = ctxt->input->consumed;
2443         xmlSaturatedAddSizeT(&consumed,
2444                              ctxt->input->end - ctxt->input->base);
2445 
2446         xmlSaturatedAdd(&ent->expandedSize, consumed);
2447 
2448         /*
2449          * Add to sizeentities when parsing an external entity
2450          * for the first time.
2451          */
2452         if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2453             xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2454         }
2455 
2456         ent->flags |= XML_ENT_CHECKED;
2457     }
2458 
2459     xmlPopInput(ctxt);
2460 
2461     xmlParserEntityCheck(ctxt, ent->expandedSize);
2462 }
2463 
2464 /**
2465  * xmlSkipBlankCharsPE:
2466  * @ctxt:  the XML parser context
2467  *
2468  * Skip whitespace in the input stream, also handling parameter
2469  * entities.
2470  *
2471  * Returns the number of space chars skipped
2472  */
2473 static int
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt)2474 xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2475     int res = 0;
2476     int inParam;
2477     int expandParam;
2478 
2479     inParam = PARSER_IN_PE(ctxt);
2480     expandParam = PARSER_EXTERNAL(ctxt);
2481 
2482     if (!inParam && !expandParam)
2483         return(xmlSkipBlankChars(ctxt));
2484 
2485     while (PARSER_STOPPED(ctxt) == 0) {
2486         if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2487             NEXT;
2488         } else if (CUR == '%') {
2489             if ((expandParam == 0) ||
2490                 (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2491                 break;
2492 
2493             /*
2494              * Expand parameter entity. We continue to consume
2495              * whitespace at the start of the entity and possible
2496              * even consume the whole entity and pop it. We might
2497              * even pop multiple PEs in this loop.
2498              */
2499             xmlParsePEReference(ctxt);
2500 
2501             inParam = PARSER_IN_PE(ctxt);
2502             expandParam = PARSER_EXTERNAL(ctxt);
2503         } else if (CUR == 0) {
2504             if (inParam == 0)
2505                 break;
2506 
2507             xmlPopPE(ctxt);
2508 
2509             inParam = PARSER_IN_PE(ctxt);
2510             expandParam = PARSER_EXTERNAL(ctxt);
2511         } else {
2512             break;
2513         }
2514 
2515         /*
2516          * Also increase the counter when entering or exiting a PERef.
2517          * The spec says: "When a parameter-entity reference is recognized
2518          * in the DTD and included, its replacement text MUST be enlarged
2519          * by the attachment of one leading and one following space (#x20)
2520          * character."
2521          */
2522         if (res < INT_MAX)
2523             res++;
2524     }
2525 
2526     return(res);
2527 }
2528 
2529 /************************************************************************
2530  *									*
2531  *		Commodity functions to handle entities			*
2532  *									*
2533  ************************************************************************/
2534 
2535 /**
2536  * xmlPopInput:
2537  * @ctxt:  an XML parser context
2538  *
2539  * xmlPopInput: the current input pointed by ctxt->input came to an end
2540  *          pop it and return the next char.
2541  *
2542  * Returns the current xmlChar in the parser context
2543  */
2544 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2545 xmlPopInput(xmlParserCtxtPtr ctxt) {
2546     xmlParserInputPtr input;
2547 
2548     if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2549     input = inputPop(ctxt);
2550     xmlFreeInputStream(input);
2551     if (*ctxt->input->cur == 0)
2552         xmlParserGrow(ctxt);
2553     return(CUR);
2554 }
2555 
2556 /**
2557  * xmlPushInput:
2558  * @ctxt:  an XML parser context
2559  * @input:  an XML parser input fragment (entity, XML fragment ...).
2560  *
2561  * Push an input stream onto the stack.
2562  *
2563  * Returns -1 in case of error or the index in the input stack
2564  */
2565 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2566 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2567     int maxDepth;
2568     int ret;
2569 
2570     if ((ctxt == NULL) || (input == NULL))
2571         return(-1);
2572 
2573     maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2574     if (ctxt->inputNr > maxDepth) {
2575         xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2576                        "Maximum entity nesting depth exceeded");
2577         xmlHaltParser(ctxt);
2578 	return(-1);
2579     }
2580     ret = inputPush(ctxt, input);
2581     if (ret >= 0)
2582         GROW;
2583     return(ret);
2584 }
2585 
2586 /**
2587  * xmlParseCharRef:
2588  * @ctxt:  an XML parser context
2589  *
2590  * DEPRECATED: Internal function, don't use.
2591  *
2592  * Parse a numeric character reference. Always consumes '&'.
2593  *
2594  * [66] CharRef ::= '&#' [0-9]+ ';' |
2595  *                  '&#x' [0-9a-fA-F]+ ';'
2596  *
2597  * [ WFC: Legal Character ]
2598  * Characters referred to using character references must match the
2599  * production for Char.
2600  *
2601  * Returns the value parsed (as an int), 0 in case of error
2602  */
2603 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2604 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2605     int val = 0;
2606     int count = 0;
2607 
2608     /*
2609      * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2610      */
2611     if ((RAW == '&') && (NXT(1) == '#') &&
2612         (NXT(2) == 'x')) {
2613 	SKIP(3);
2614 	GROW;
2615 	while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2616 	    if (count++ > 20) {
2617 		count = 0;
2618 		GROW;
2619 	    }
2620 	    if ((RAW >= '0') && (RAW <= '9'))
2621 	        val = val * 16 + (CUR - '0');
2622 	    else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2623 	        val = val * 16 + (CUR - 'a') + 10;
2624 	    else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2625 	        val = val * 16 + (CUR - 'A') + 10;
2626 	    else {
2627 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2628 		val = 0;
2629 		break;
2630 	    }
2631 	    if (val > 0x110000)
2632 	        val = 0x110000;
2633 
2634 	    NEXT;
2635 	    count++;
2636 	}
2637 	if (RAW == ';') {
2638 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2639 	    ctxt->input->col++;
2640 	    ctxt->input->cur++;
2641 	}
2642     } else if  ((RAW == '&') && (NXT(1) == '#')) {
2643 	SKIP(2);
2644 	GROW;
2645 	while (RAW != ';') { /* loop blocked by count */
2646 	    if (count++ > 20) {
2647 		count = 0;
2648 		GROW;
2649 	    }
2650 	    if ((RAW >= '0') && (RAW <= '9'))
2651 	        val = val * 10 + (CUR - '0');
2652 	    else {
2653 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2654 		val = 0;
2655 		break;
2656 	    }
2657 	    if (val > 0x110000)
2658 	        val = 0x110000;
2659 
2660 	    NEXT;
2661 	    count++;
2662 	}
2663 	if (RAW == ';') {
2664 	    /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2665 	    ctxt->input->col++;
2666 	    ctxt->input->cur++;
2667 	}
2668     } else {
2669         if (RAW == '&')
2670             SKIP(1);
2671         xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2672     }
2673 
2674     /*
2675      * [ WFC: Legal Character ]
2676      * Characters referred to using character references must match the
2677      * production for Char.
2678      */
2679     if (val >= 0x110000) {
2680         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2681                 "xmlParseCharRef: character reference out of bounds\n",
2682 	        val);
2683     } else if (IS_CHAR(val)) {
2684         return(val);
2685     } else {
2686         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2687                           "xmlParseCharRef: invalid xmlChar value %d\n",
2688 	                  val);
2689     }
2690     return(0);
2691 }
2692 
2693 /**
2694  * xmlParseStringCharRef:
2695  * @ctxt:  an XML parser context
2696  * @str:  a pointer to an index in the string
2697  *
2698  * parse Reference declarations, variant parsing from a string rather
2699  * than an an input flow.
2700  *
2701  * [66] CharRef ::= '&#' [0-9]+ ';' |
2702  *                  '&#x' [0-9a-fA-F]+ ';'
2703  *
2704  * [ WFC: Legal Character ]
2705  * Characters referred to using character references must match the
2706  * production for Char.
2707  *
2708  * Returns the value parsed (as an int), 0 in case of error, str will be
2709  *         updated to the current value of the index
2710  */
2711 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2712 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2713     const xmlChar *ptr;
2714     xmlChar cur;
2715     int val = 0;
2716 
2717     if ((str == NULL) || (*str == NULL)) return(0);
2718     ptr = *str;
2719     cur = *ptr;
2720     if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2721 	ptr += 3;
2722 	cur = *ptr;
2723 	while (cur != ';') { /* Non input consuming loop */
2724 	    if ((cur >= '0') && (cur <= '9'))
2725 	        val = val * 16 + (cur - '0');
2726 	    else if ((cur >= 'a') && (cur <= 'f'))
2727 	        val = val * 16 + (cur - 'a') + 10;
2728 	    else if ((cur >= 'A') && (cur <= 'F'))
2729 	        val = val * 16 + (cur - 'A') + 10;
2730 	    else {
2731 		xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2732 		val = 0;
2733 		break;
2734 	    }
2735 	    if (val > 0x110000)
2736 	        val = 0x110000;
2737 
2738 	    ptr++;
2739 	    cur = *ptr;
2740 	}
2741 	if (cur == ';')
2742 	    ptr++;
2743     } else if  ((cur == '&') && (ptr[1] == '#')){
2744 	ptr += 2;
2745 	cur = *ptr;
2746 	while (cur != ';') { /* Non input consuming loops */
2747 	    if ((cur >= '0') && (cur <= '9'))
2748 	        val = val * 10 + (cur - '0');
2749 	    else {
2750 		xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2751 		val = 0;
2752 		break;
2753 	    }
2754 	    if (val > 0x110000)
2755 	        val = 0x110000;
2756 
2757 	    ptr++;
2758 	    cur = *ptr;
2759 	}
2760 	if (cur == ';')
2761 	    ptr++;
2762     } else {
2763 	xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2764 	return(0);
2765     }
2766     *str = ptr;
2767 
2768     /*
2769      * [ WFC: Legal Character ]
2770      * Characters referred to using character references must match the
2771      * production for Char.
2772      */
2773     if (val >= 0x110000) {
2774         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2775                 "xmlParseStringCharRef: character reference out of bounds\n",
2776                 val);
2777     } else if (IS_CHAR(val)) {
2778         return(val);
2779     } else {
2780         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2781 			  "xmlParseStringCharRef: invalid xmlChar value %d\n",
2782 			  val);
2783     }
2784     return(0);
2785 }
2786 
2787 /**
2788  * xmlParserHandlePEReference:
2789  * @ctxt:  the parser context
2790  *
2791  * DEPRECATED: Internal function, do not use.
2792  *
2793  * [69] PEReference ::= '%' Name ';'
2794  *
2795  * [ WFC: No Recursion ]
2796  * A parsed entity must not contain a recursive
2797  * reference to itself, either directly or indirectly.
2798  *
2799  * [ WFC: Entity Declared ]
2800  * In a document without any DTD, a document with only an internal DTD
2801  * subset which contains no parameter entity references, or a document
2802  * with "standalone='yes'", ...  ... The declaration of a parameter
2803  * entity must precede any reference to it...
2804  *
2805  * [ VC: Entity Declared ]
2806  * In a document with an external subset or external parameter entities
2807  * with "standalone='no'", ...  ... The declaration of a parameter entity
2808  * must precede any reference to it...
2809  *
2810  * [ WFC: In DTD ]
2811  * Parameter-entity references may only appear in the DTD.
2812  * NOTE: misleading but this is handled.
2813  *
2814  * A PEReference may have been detected in the current input stream
2815  * the handling is done accordingly to
2816  *      http://www.w3.org/TR/REC-xml#entproc
2817  * i.e.
2818  *   - Included in literal in entity values
2819  *   - Included as Parameter Entity reference within DTDs
2820  */
2821 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2822 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2823     xmlParsePEReference(ctxt);
2824 }
2825 
2826 /**
2827  * xmlStringLenDecodeEntities:
2828  * @ctxt:  the parser context
2829  * @str:  the input string
2830  * @len: the string length
2831  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2832  * @end:  an end marker xmlChar, 0 if none
2833  * @end2:  an end marker xmlChar, 0 if none
2834  * @end3:  an end marker xmlChar, 0 if none
2835  *
2836  * DEPRECATED: Internal function, don't use.
2837  *
2838  * Returns A newly allocated string with the substitution done. The caller
2839  *      must deallocate it !
2840  */
2841 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what ATTRIBUTE_UNUSED,xmlChar end,xmlChar end2,xmlChar end3)2842 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2843                            int what ATTRIBUTE_UNUSED,
2844                            xmlChar end, xmlChar end2, xmlChar end3) {
2845     if ((ctxt == NULL) || (str == NULL) || (len < 0))
2846         return(NULL);
2847 
2848     if ((str[len] != 0) ||
2849         (end != 0) || (end2 != 0) || (end3 != 0))
2850         return(NULL);
2851 
2852     return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2853 }
2854 
2855 /**
2856  * xmlStringDecodeEntities:
2857  * @ctxt:  the parser context
2858  * @str:  the input string
2859  * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2860  * @end:  an end marker xmlChar, 0 if none
2861  * @end2:  an end marker xmlChar, 0 if none
2862  * @end3:  an end marker xmlChar, 0 if none
2863  *
2864  * DEPRECATED: Internal function, don't use.
2865  *
2866  * Returns A newly allocated string with the substitution done. The caller
2867  *      must deallocate it !
2868  */
2869 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what ATTRIBUTE_UNUSED,xmlChar end,xmlChar end2,xmlChar end3)2870 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2871                         int what ATTRIBUTE_UNUSED,
2872 		        xmlChar end, xmlChar  end2, xmlChar end3) {
2873     if ((ctxt == NULL) || (str == NULL))
2874         return(NULL);
2875 
2876     if ((end != 0) || (end2 != 0) || (end3 != 0))
2877         return(NULL);
2878 
2879     return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2880 }
2881 
2882 /************************************************************************
2883  *									*
2884  *		Commodity functions, cleanup needed ?			*
2885  *									*
2886  ************************************************************************/
2887 
2888 /**
2889  * areBlanks:
2890  * @ctxt:  an XML parser context
2891  * @str:  a xmlChar *
2892  * @len:  the size of @str
2893  * @blank_chars: we know the chars are blanks
2894  *
2895  * Is this a sequence of blank chars that one can ignore ?
2896  *
2897  * Returns 1 if ignorable 0 otherwise.
2898  */
2899 
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2900 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2901                      int blank_chars) {
2902     int i;
2903     xmlNodePtr lastChild;
2904 
2905     /*
2906      * Don't spend time trying to differentiate them, the same callback is
2907      * used !
2908      */
2909     if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2910 	return(0);
2911 
2912     /*
2913      * Check for xml:space value.
2914      */
2915     if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2916         (*(ctxt->space) == -2))
2917 	return(0);
2918 
2919     /*
2920      * Check that the string is made of blanks
2921      */
2922     if (blank_chars == 0) {
2923 	for (i = 0;i < len;i++)
2924 	    if (!(IS_BLANK_CH(str[i]))) return(0);
2925     }
2926 
2927     /*
2928      * Look if the element is mixed content in the DTD if available
2929      */
2930     if (ctxt->node == NULL) return(0);
2931     if (ctxt->myDoc != NULL) {
2932         xmlElementPtr elemDecl = NULL;
2933         xmlDocPtr doc = ctxt->myDoc;
2934         const xmlChar *prefix = NULL;
2935 
2936         if (ctxt->node->ns)
2937             prefix = ctxt->node->ns->prefix;
2938         if (doc->intSubset != NULL)
2939             elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2940                                       prefix);
2941         if ((elemDecl == NULL) && (doc->extSubset != NULL))
2942             elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2943                                       prefix);
2944         if (elemDecl != NULL) {
2945             if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2946                 return(1);
2947             if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2948                 (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2949                 return(0);
2950         }
2951     }
2952 
2953     /*
2954      * Otherwise, heuristic :-\
2955      */
2956     if ((RAW != '<') && (RAW != 0xD)) return(0);
2957     if ((ctxt->node->children == NULL) &&
2958 	(RAW == '<') && (NXT(1) == '/')) return(0);
2959 
2960     lastChild = xmlGetLastChild(ctxt->node);
2961     if (lastChild == NULL) {
2962         if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2963             (ctxt->node->content != NULL)) return(0);
2964     } else if (xmlNodeIsText(lastChild))
2965         return(0);
2966     else if ((ctxt->node->children != NULL) &&
2967              (xmlNodeIsText(ctxt->node->children)))
2968         return(0);
2969     return(1);
2970 }
2971 
2972 /************************************************************************
2973  *									*
2974  *		Extra stuff for namespace support			*
2975  *	Relates to http://www.w3.org/TR/WD-xml-names			*
2976  *									*
2977  ************************************************************************/
2978 
2979 /**
2980  * xmlSplitQName:
2981  * @ctxt:  an XML parser context
2982  * @name:  an XML parser context
2983  * @prefixOut:  a xmlChar **
2984  *
2985  * DEPRECATED: Don't use.
2986  *
2987  * parse an UTF8 encoded XML qualified name string
2988  *
2989  * [NS 5] QName ::= (Prefix ':')? LocalPart
2990  *
2991  * [NS 6] Prefix ::= NCName
2992  *
2993  * [NS 7] LocalPart ::= NCName
2994  *
2995  * Returns the local part, and prefix is updated
2996  *   to get the Prefix if any.
2997  */
2998 
2999 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefixOut)3000 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
3001     xmlChar buf[XML_MAX_NAMELEN + 5];
3002     xmlChar *buffer = NULL;
3003     int len = 0;
3004     int max = XML_MAX_NAMELEN;
3005     xmlChar *ret = NULL;
3006     xmlChar *prefix;
3007     const xmlChar *cur = name;
3008     int c;
3009 
3010     if (prefixOut == NULL) return(NULL);
3011     *prefixOut = NULL;
3012 
3013     if (cur == NULL) return(NULL);
3014 
3015     /* nasty but well=formed */
3016     if (cur[0] == ':')
3017 	return(xmlStrdup(name));
3018 
3019     c = *cur++;
3020     while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3021 	buf[len++] = c;
3022 	c = *cur++;
3023     }
3024     if (len >= max) {
3025 	/*
3026 	 * Okay someone managed to make a huge name, so he's ready to pay
3027 	 * for the processing speed.
3028 	 */
3029 	max = len * 2;
3030 
3031 	buffer = xmlMalloc(max);
3032 	if (buffer == NULL) {
3033 	    xmlErrMemory(ctxt);
3034 	    return(NULL);
3035 	}
3036 	memcpy(buffer, buf, len);
3037 	while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3038 	    if (len + 10 > max) {
3039 	        xmlChar *tmp;
3040 
3041 		max *= 2;
3042 		tmp = (xmlChar *) xmlRealloc(buffer, max);
3043 		if (tmp == NULL) {
3044 		    xmlFree(buffer);
3045 		    xmlErrMemory(ctxt);
3046 		    return(NULL);
3047 		}
3048 		buffer = tmp;
3049 	    }
3050 	    buffer[len++] = c;
3051 	    c = *cur++;
3052 	}
3053 	buffer[len] = 0;
3054     }
3055 
3056     if ((c == ':') && (*cur == 0)) {
3057         if (buffer != NULL)
3058 	    xmlFree(buffer);
3059 	return(xmlStrdup(name));
3060     }
3061 
3062     if (buffer == NULL) {
3063 	ret = xmlStrndup(buf, len);
3064         if (ret == NULL) {
3065 	    xmlErrMemory(ctxt);
3066 	    return(NULL);
3067         }
3068     } else {
3069 	ret = buffer;
3070 	buffer = NULL;
3071 	max = XML_MAX_NAMELEN;
3072     }
3073 
3074 
3075     if (c == ':') {
3076 	c = *cur;
3077         prefix = ret;
3078 	if (c == 0) {
3079 	    ret = xmlStrndup(BAD_CAST "", 0);
3080             if (ret == NULL) {
3081                 xmlFree(prefix);
3082                 return(NULL);
3083             }
3084             *prefixOut = prefix;
3085             return(ret);
3086 	}
3087 	len = 0;
3088 
3089 	/*
3090 	 * Check that the first character is proper to start
3091 	 * a new name
3092 	 */
3093 	if (!(((c >= 0x61) && (c <= 0x7A)) ||
3094 	      ((c >= 0x41) && (c <= 0x5A)) ||
3095 	      (c == '_') || (c == ':'))) {
3096 	    int l;
3097 	    int first = CUR_SCHAR(cur, l);
3098 
3099 	    if (!IS_LETTER(first) && (first != '_')) {
3100 		xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3101 			    "Name %s is not XML Namespace compliant\n",
3102 				  name);
3103 	    }
3104 	}
3105 	cur++;
3106 
3107 	while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3108 	    buf[len++] = c;
3109 	    c = *cur++;
3110 	}
3111 	if (len >= max) {
3112 	    /*
3113 	     * Okay someone managed to make a huge name, so he's ready to pay
3114 	     * for the processing speed.
3115 	     */
3116 	    max = len * 2;
3117 
3118 	    buffer = xmlMalloc(max);
3119 	    if (buffer == NULL) {
3120 	        xmlErrMemory(ctxt);
3121                 xmlFree(prefix);
3122 		return(NULL);
3123 	    }
3124 	    memcpy(buffer, buf, len);
3125 	    while (c != 0) { /* tested bigname2.xml */
3126 		if (len + 10 > max) {
3127 		    xmlChar *tmp;
3128 
3129 		    max *= 2;
3130 		    tmp = (xmlChar *) xmlRealloc(buffer, max);
3131 		    if (tmp == NULL) {
3132 			xmlErrMemory(ctxt);
3133                         xmlFree(prefix);
3134 			xmlFree(buffer);
3135 			return(NULL);
3136 		    }
3137 		    buffer = tmp;
3138 		}
3139 		buffer[len++] = c;
3140 		c = *cur++;
3141 	    }
3142 	    buffer[len] = 0;
3143 	}
3144 
3145 	if (buffer == NULL) {
3146 	    ret = xmlStrndup(buf, len);
3147             if (ret == NULL) {
3148                 xmlFree(prefix);
3149                 return(NULL);
3150             }
3151 	} else {
3152 	    ret = buffer;
3153 	}
3154 
3155         *prefixOut = prefix;
3156     }
3157 
3158     return(ret);
3159 }
3160 
3161 /************************************************************************
3162  *									*
3163  *			The parser itself				*
3164  *	Relates to http://www.w3.org/TR/REC-xml				*
3165  *									*
3166  ************************************************************************/
3167 
3168 /************************************************************************
3169  *									*
3170  *	Routines to parse Name, NCName and NmToken			*
3171  *									*
3172  ************************************************************************/
3173 
3174 /*
3175  * The two following functions are related to the change of accepted
3176  * characters for Name and NmToken in the Revision 5 of XML-1.0
3177  * They correspond to the modified production [4] and the new production [4a]
3178  * changes in that revision. Also note that the macros used for the
3179  * productions Letter, Digit, CombiningChar and Extender are not needed
3180  * anymore.
3181  * We still keep compatibility to pre-revision5 parsing semantic if the
3182  * new XML_PARSE_OLD10 option is given to the parser.
3183  */
3184 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3185 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3186     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3187         /*
3188 	 * Use the new checks of production [4] [4a] amd [5] of the
3189 	 * Update 5 of XML-1.0
3190 	 */
3191 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3192 	    (((c >= 'a') && (c <= 'z')) ||
3193 	     ((c >= 'A') && (c <= 'Z')) ||
3194 	     (c == '_') || (c == ':') ||
3195 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3196 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3197 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3198 	     ((c >= 0x370) && (c <= 0x37D)) ||
3199 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3200 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3201 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3202 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3203 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3204 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3205 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3206 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3207 	    return(1);
3208     } else {
3209         if (IS_LETTER(c) || (c == '_') || (c == ':'))
3210 	    return(1);
3211     }
3212     return(0);
3213 }
3214 
3215 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3216 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3217     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3218         /*
3219 	 * Use the new checks of production [4] [4a] amd [5] of the
3220 	 * Update 5 of XML-1.0
3221 	 */
3222 	if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3223 	    (((c >= 'a') && (c <= 'z')) ||
3224 	     ((c >= 'A') && (c <= 'Z')) ||
3225 	     ((c >= '0') && (c <= '9')) || /* !start */
3226 	     (c == '_') || (c == ':') ||
3227 	     (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3228 	     ((c >= 0xC0) && (c <= 0xD6)) ||
3229 	     ((c >= 0xD8) && (c <= 0xF6)) ||
3230 	     ((c >= 0xF8) && (c <= 0x2FF)) ||
3231 	     ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3232 	     ((c >= 0x370) && (c <= 0x37D)) ||
3233 	     ((c >= 0x37F) && (c <= 0x1FFF)) ||
3234 	     ((c >= 0x200C) && (c <= 0x200D)) ||
3235 	     ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3236 	     ((c >= 0x2070) && (c <= 0x218F)) ||
3237 	     ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3238 	     ((c >= 0x3001) && (c <= 0xD7FF)) ||
3239 	     ((c >= 0xF900) && (c <= 0xFDCF)) ||
3240 	     ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3241 	     ((c >= 0x10000) && (c <= 0xEFFFF))))
3242 	     return(1);
3243     } else {
3244         if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3245             (c == '.') || (c == '-') ||
3246 	    (c == '_') || (c == ':') ||
3247 	    (IS_COMBINING(c)) ||
3248 	    (IS_EXTENDER(c)))
3249 	    return(1);
3250     }
3251     return(0);
3252 }
3253 
3254 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3255 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3256     const xmlChar *ret;
3257     int len = 0, l;
3258     int c;
3259     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3260                     XML_MAX_TEXT_LENGTH :
3261                     XML_MAX_NAME_LENGTH;
3262 
3263     /*
3264      * Handler for more complex cases
3265      */
3266     c = xmlCurrentChar(ctxt, &l);
3267     if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3268         /*
3269 	 * Use the new checks of production [4] [4a] amd [5] of the
3270 	 * Update 5 of XML-1.0
3271 	 */
3272 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3273 	    (!(((c >= 'a') && (c <= 'z')) ||
3274 	       ((c >= 'A') && (c <= 'Z')) ||
3275 	       (c == '_') || (c == ':') ||
3276 	       ((c >= 0xC0) && (c <= 0xD6)) ||
3277 	       ((c >= 0xD8) && (c <= 0xF6)) ||
3278 	       ((c >= 0xF8) && (c <= 0x2FF)) ||
3279 	       ((c >= 0x370) && (c <= 0x37D)) ||
3280 	       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3281 	       ((c >= 0x200C) && (c <= 0x200D)) ||
3282 	       ((c >= 0x2070) && (c <= 0x218F)) ||
3283 	       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3284 	       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3285 	       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3286 	       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3287 	       ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3288 	    return(NULL);
3289 	}
3290 	len += l;
3291 	NEXTL(l);
3292 	c = xmlCurrentChar(ctxt, &l);
3293 	while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3294 	       (((c >= 'a') && (c <= 'z')) ||
3295 	        ((c >= 'A') && (c <= 'Z')) ||
3296 	        ((c >= '0') && (c <= '9')) || /* !start */
3297 	        (c == '_') || (c == ':') ||
3298 	        (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3299 	        ((c >= 0xC0) && (c <= 0xD6)) ||
3300 	        ((c >= 0xD8) && (c <= 0xF6)) ||
3301 	        ((c >= 0xF8) && (c <= 0x2FF)) ||
3302 	        ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3303 	        ((c >= 0x370) && (c <= 0x37D)) ||
3304 	        ((c >= 0x37F) && (c <= 0x1FFF)) ||
3305 	        ((c >= 0x200C) && (c <= 0x200D)) ||
3306 	        ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3307 	        ((c >= 0x2070) && (c <= 0x218F)) ||
3308 	        ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3309 	        ((c >= 0x3001) && (c <= 0xD7FF)) ||
3310 	        ((c >= 0xF900) && (c <= 0xFDCF)) ||
3311 	        ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3312 	        ((c >= 0x10000) && (c <= 0xEFFFF))
3313 		)) {
3314             if (len <= INT_MAX - l)
3315 	        len += l;
3316 	    NEXTL(l);
3317 	    c = xmlCurrentChar(ctxt, &l);
3318 	}
3319     } else {
3320 	if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3321 	    (!IS_LETTER(c) && (c != '_') &&
3322 	     (c != ':'))) {
3323 	    return(NULL);
3324 	}
3325 	len += l;
3326 	NEXTL(l);
3327 	c = xmlCurrentChar(ctxt, &l);
3328 
3329 	while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3330 	       ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3331 		(c == '.') || (c == '-') ||
3332 		(c == '_') || (c == ':') ||
3333 		(IS_COMBINING(c)) ||
3334 		(IS_EXTENDER(c)))) {
3335             if (len <= INT_MAX - l)
3336 	        len += l;
3337 	    NEXTL(l);
3338 	    c = xmlCurrentChar(ctxt, &l);
3339 	}
3340     }
3341     if (len > maxLength) {
3342         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3343         return(NULL);
3344     }
3345     if (ctxt->input->cur - ctxt->input->base < len) {
3346         /*
3347          * There were a couple of bugs where PERefs lead to to a change
3348          * of the buffer. Check the buffer size to avoid passing an invalid
3349          * pointer to xmlDictLookup.
3350          */
3351         xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3352                     "unexpected change of input buffer");
3353         return (NULL);
3354     }
3355     if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3356         ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3357     else
3358         ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3359     if (ret == NULL)
3360         xmlErrMemory(ctxt);
3361     return(ret);
3362 }
3363 
3364 /**
3365  * xmlParseName:
3366  * @ctxt:  an XML parser context
3367  *
3368  * DEPRECATED: Internal function, don't use.
3369  *
3370  * parse an XML name.
3371  *
3372  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3373  *                  CombiningChar | Extender
3374  *
3375  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3376  *
3377  * [6] Names ::= Name (#x20 Name)*
3378  *
3379  * Returns the Name parsed or NULL
3380  */
3381 
3382 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3383 xmlParseName(xmlParserCtxtPtr ctxt) {
3384     const xmlChar *in;
3385     const xmlChar *ret;
3386     size_t count = 0;
3387     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3388                        XML_MAX_TEXT_LENGTH :
3389                        XML_MAX_NAME_LENGTH;
3390 
3391     GROW;
3392 
3393     /*
3394      * Accelerator for simple ASCII names
3395      */
3396     in = ctxt->input->cur;
3397     if (((*in >= 0x61) && (*in <= 0x7A)) ||
3398 	((*in >= 0x41) && (*in <= 0x5A)) ||
3399 	(*in == '_') || (*in == ':')) {
3400 	in++;
3401 	while (((*in >= 0x61) && (*in <= 0x7A)) ||
3402 	       ((*in >= 0x41) && (*in <= 0x5A)) ||
3403 	       ((*in >= 0x30) && (*in <= 0x39)) ||
3404 	       (*in == '_') || (*in == '-') ||
3405 	       (*in == ':') || (*in == '.'))
3406 	    in++;
3407 	if ((*in > 0) && (*in < 0x80)) {
3408 	    count = in - ctxt->input->cur;
3409             if (count > maxLength) {
3410                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3411                 return(NULL);
3412             }
3413 	    ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3414 	    ctxt->input->cur = in;
3415 	    ctxt->input->col += count;
3416 	    if (ret == NULL)
3417 	        xmlErrMemory(ctxt);
3418 	    return(ret);
3419 	}
3420     }
3421     /* accelerator for special cases */
3422     return(xmlParseNameComplex(ctxt));
3423 }
3424 
3425 static xmlHashedString
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3426 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3427     xmlHashedString ret;
3428     int len = 0, l;
3429     int c;
3430     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3431                     XML_MAX_TEXT_LENGTH :
3432                     XML_MAX_NAME_LENGTH;
3433     size_t startPosition = 0;
3434 
3435     ret.name = NULL;
3436     ret.hashValue = 0;
3437 
3438     /*
3439      * Handler for more complex cases
3440      */
3441     startPosition = CUR_PTR - BASE_PTR;
3442     c = xmlCurrentChar(ctxt, &l);
3443     if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3444 	(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3445 	return(ret);
3446     }
3447 
3448     while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3449 	   (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3450         if (len <= INT_MAX - l)
3451 	    len += l;
3452 	NEXTL(l);
3453 	c = xmlCurrentChar(ctxt, &l);
3454     }
3455     if (len > maxLength) {
3456         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3457         return(ret);
3458     }
3459     ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3460     if (ret.name == NULL)
3461         xmlErrMemory(ctxt);
3462     return(ret);
3463 }
3464 
3465 /**
3466  * xmlParseNCName:
3467  * @ctxt:  an XML parser context
3468  * @len:  length of the string parsed
3469  *
3470  * parse an XML name.
3471  *
3472  * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3473  *                      CombiningChar | Extender
3474  *
3475  * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3476  *
3477  * Returns the Name parsed or NULL
3478  */
3479 
3480 static xmlHashedString
xmlParseNCName(xmlParserCtxtPtr ctxt)3481 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3482     const xmlChar *in, *e;
3483     xmlHashedString ret;
3484     size_t count = 0;
3485     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3486                        XML_MAX_TEXT_LENGTH :
3487                        XML_MAX_NAME_LENGTH;
3488 
3489     ret.name = NULL;
3490 
3491     /*
3492      * Accelerator for simple ASCII names
3493      */
3494     in = ctxt->input->cur;
3495     e = ctxt->input->end;
3496     if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3497 	 ((*in >= 0x41) && (*in <= 0x5A)) ||
3498 	 (*in == '_')) && (in < e)) {
3499 	in++;
3500 	while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3501 	        ((*in >= 0x41) && (*in <= 0x5A)) ||
3502 	        ((*in >= 0x30) && (*in <= 0x39)) ||
3503 	        (*in == '_') || (*in == '-') ||
3504 	        (*in == '.')) && (in < e))
3505 	    in++;
3506 	if (in >= e)
3507 	    goto complex;
3508 	if ((*in > 0) && (*in < 0x80)) {
3509 	    count = in - ctxt->input->cur;
3510             if (count > maxLength) {
3511                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3512                 return(ret);
3513             }
3514 	    ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3515 	    ctxt->input->cur = in;
3516 	    ctxt->input->col += count;
3517 	    if (ret.name == NULL) {
3518 	        xmlErrMemory(ctxt);
3519 	    }
3520 	    return(ret);
3521 	}
3522     }
3523 complex:
3524     return(xmlParseNCNameComplex(ctxt));
3525 }
3526 
3527 /**
3528  * xmlParseNameAndCompare:
3529  * @ctxt:  an XML parser context
3530  *
3531  * parse an XML name and compares for match
3532  * (specialized for endtag parsing)
3533  *
3534  * Returns NULL for an illegal name, (xmlChar*) 1 for success
3535  * and the name for mismatch
3536  */
3537 
3538 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3539 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3540     register const xmlChar *cmp = other;
3541     register const xmlChar *in;
3542     const xmlChar *ret;
3543 
3544     GROW;
3545 
3546     in = ctxt->input->cur;
3547     while (*in != 0 && *in == *cmp) {
3548 	++in;
3549 	++cmp;
3550     }
3551     if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3552 	/* success */
3553 	ctxt->input->col += in - ctxt->input->cur;
3554 	ctxt->input->cur = in;
3555 	return (const xmlChar*) 1;
3556     }
3557     /* failure (or end of input buffer), check with full function */
3558     ret = xmlParseName (ctxt);
3559     /* strings coming from the dictionary direct compare possible */
3560     if (ret == other) {
3561 	return (const xmlChar*) 1;
3562     }
3563     return ret;
3564 }
3565 
3566 /**
3567  * xmlParseStringName:
3568  * @ctxt:  an XML parser context
3569  * @str:  a pointer to the string pointer (IN/OUT)
3570  *
3571  * parse an XML name.
3572  *
3573  * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3574  *                  CombiningChar | Extender
3575  *
3576  * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3577  *
3578  * [6] Names ::= Name (#x20 Name)*
3579  *
3580  * Returns the Name parsed or NULL. The @str pointer
3581  * is updated to the current location in the string.
3582  */
3583 
3584 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3585 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3586     xmlChar buf[XML_MAX_NAMELEN + 5];
3587     xmlChar *ret;
3588     const xmlChar *cur = *str;
3589     int len = 0, l;
3590     int c;
3591     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3592                     XML_MAX_TEXT_LENGTH :
3593                     XML_MAX_NAME_LENGTH;
3594 
3595     c = CUR_SCHAR(cur, l);
3596     if (!xmlIsNameStartChar(ctxt, c)) {
3597 	return(NULL);
3598     }
3599 
3600     COPY_BUF(buf, len, c);
3601     cur += l;
3602     c = CUR_SCHAR(cur, l);
3603     while (xmlIsNameChar(ctxt, c)) {
3604 	COPY_BUF(buf, len, c);
3605 	cur += l;
3606 	c = CUR_SCHAR(cur, l);
3607 	if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3608 	    /*
3609 	     * Okay someone managed to make a huge name, so he's ready to pay
3610 	     * for the processing speed.
3611 	     */
3612 	    xmlChar *buffer;
3613 	    int max = len * 2;
3614 
3615 	    buffer = xmlMalloc(max);
3616 	    if (buffer == NULL) {
3617 	        xmlErrMemory(ctxt);
3618 		return(NULL);
3619 	    }
3620 	    memcpy(buffer, buf, len);
3621 	    while (xmlIsNameChar(ctxt, c)) {
3622 		if (len + 10 > max) {
3623 		    xmlChar *tmp;
3624 
3625 		    max *= 2;
3626 		    tmp = (xmlChar *) xmlRealloc(buffer, max);
3627 		    if (tmp == NULL) {
3628 			xmlErrMemory(ctxt);
3629 			xmlFree(buffer);
3630 			return(NULL);
3631 		    }
3632 		    buffer = tmp;
3633 		}
3634 		COPY_BUF(buffer, len, c);
3635 		cur += l;
3636 		c = CUR_SCHAR(cur, l);
3637                 if (len > maxLength) {
3638                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3639                     xmlFree(buffer);
3640                     return(NULL);
3641                 }
3642 	    }
3643 	    buffer[len] = 0;
3644 	    *str = cur;
3645 	    return(buffer);
3646 	}
3647     }
3648     if (len > maxLength) {
3649         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3650         return(NULL);
3651     }
3652     *str = cur;
3653     ret = xmlStrndup(buf, len);
3654     if (ret == NULL)
3655         xmlErrMemory(ctxt);
3656     return(ret);
3657 }
3658 
3659 /**
3660  * xmlParseNmtoken:
3661  * @ctxt:  an XML parser context
3662  *
3663  * DEPRECATED: Internal function, don't use.
3664  *
3665  * parse an XML Nmtoken.
3666  *
3667  * [7] Nmtoken ::= (NameChar)+
3668  *
3669  * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3670  *
3671  * Returns the Nmtoken parsed or NULL
3672  */
3673 
3674 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3675 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3676     xmlChar buf[XML_MAX_NAMELEN + 5];
3677     xmlChar *ret;
3678     int len = 0, l;
3679     int c;
3680     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3681                     XML_MAX_TEXT_LENGTH :
3682                     XML_MAX_NAME_LENGTH;
3683 
3684     c = xmlCurrentChar(ctxt, &l);
3685 
3686     while (xmlIsNameChar(ctxt, c)) {
3687 	COPY_BUF(buf, len, c);
3688 	NEXTL(l);
3689 	c = xmlCurrentChar(ctxt, &l);
3690 	if (len >= XML_MAX_NAMELEN) {
3691 	    /*
3692 	     * Okay someone managed to make a huge token, so he's ready to pay
3693 	     * for the processing speed.
3694 	     */
3695 	    xmlChar *buffer;
3696 	    int max = len * 2;
3697 
3698 	    buffer = xmlMalloc(max);
3699 	    if (buffer == NULL) {
3700 	        xmlErrMemory(ctxt);
3701 		return(NULL);
3702 	    }
3703 	    memcpy(buffer, buf, len);
3704 	    while (xmlIsNameChar(ctxt, c)) {
3705 		if (len + 10 > max) {
3706 		    xmlChar *tmp;
3707 
3708 		    max *= 2;
3709 		    tmp = (xmlChar *) xmlRealloc(buffer, max);
3710 		    if (tmp == NULL) {
3711 			xmlErrMemory(ctxt);
3712 			xmlFree(buffer);
3713 			return(NULL);
3714 		    }
3715 		    buffer = tmp;
3716 		}
3717 		COPY_BUF(buffer, len, c);
3718                 if (len > maxLength) {
3719                     xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3720                     xmlFree(buffer);
3721                     return(NULL);
3722                 }
3723 		NEXTL(l);
3724 		c = xmlCurrentChar(ctxt, &l);
3725 	    }
3726 	    buffer[len] = 0;
3727 	    return(buffer);
3728 	}
3729     }
3730     if (len == 0)
3731         return(NULL);
3732     if (len > maxLength) {
3733         xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3734         return(NULL);
3735     }
3736     ret = xmlStrndup(buf, len);
3737     if (ret == NULL)
3738         xmlErrMemory(ctxt);
3739     return(ret);
3740 }
3741 
3742 /**
3743  * xmlExpandPEsInEntityValue:
3744  * @ctxt:  parser context
3745  * @buf:  string buffer
3746  * @str:  entity value
3747  * @length:  size of entity value
3748  * @depth:  nesting depth
3749  *
3750  * Validate an entity value and expand parameter entities.
3751  */
3752 static void
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt,xmlSBuf * buf,const xmlChar * str,int length,int depth)3753 xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3754                           const xmlChar *str, int length, int depth) {
3755     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3756     const xmlChar *end, *chunk;
3757     int c, l;
3758 
3759     if (str == NULL)
3760         return;
3761 
3762     depth += 1;
3763     if (depth > maxDepth) {
3764 	xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3765                        "Maximum entity nesting depth exceeded");
3766 	return;
3767     }
3768 
3769     end = str + length;
3770     chunk = str;
3771 
3772     while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3773         c = *str;
3774 
3775         if (c >= 0x80) {
3776             l = xmlUTF8MultibyteLen(ctxt, str,
3777                     "invalid character in entity value\n");
3778             if (l == 0) {
3779                 if (chunk < str)
3780                     xmlSBufAddString(buf, chunk, str - chunk);
3781                 xmlSBufAddReplChar(buf);
3782                 str += 1;
3783                 chunk = str;
3784             } else {
3785                 str += l;
3786             }
3787         } else if (c == '&') {
3788             if (str[1] == '#') {
3789                 if (chunk < str)
3790                     xmlSBufAddString(buf, chunk, str - chunk);
3791 
3792                 c = xmlParseStringCharRef(ctxt, &str);
3793                 if (c == 0)
3794                     return;
3795 
3796                 xmlSBufAddChar(buf, c);
3797 
3798                 chunk = str;
3799             } else {
3800                 xmlChar *name;
3801 
3802                 /*
3803                  * General entity references are checked for
3804                  * syntactic validity.
3805                  */
3806                 str++;
3807                 name = xmlParseStringName(ctxt, &str);
3808 
3809                 if ((name == NULL) || (*str++ != ';')) {
3810                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3811                             "EntityValue: '&' forbidden except for entities "
3812                             "references\n");
3813                     xmlFree(name);
3814                     return;
3815                 }
3816 
3817                 xmlFree(name);
3818             }
3819         } else if (c == '%') {
3820             xmlEntityPtr ent;
3821 
3822             if (chunk < str)
3823                 xmlSBufAddString(buf, chunk, str - chunk);
3824 
3825             ent = xmlParseStringPEReference(ctxt, &str);
3826             if (ent == NULL)
3827                 return;
3828 
3829             if (!PARSER_EXTERNAL(ctxt)) {
3830                 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3831                 return;
3832             }
3833 
3834             if (ent->content == NULL) {
3835                 /*
3836                  * Note: external parsed entities will not be loaded,
3837                  * it is not required for a non-validating parser to
3838                  * complete external PEReferences coming from the
3839                  * internal subset
3840                  */
3841                 if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3842                     ((ctxt->replaceEntities) ||
3843                      (ctxt->validate))) {
3844                     xmlLoadEntityContent(ctxt, ent);
3845                 } else {
3846                     xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3847                                   "not validating will not read content for "
3848                                   "PE entity %s\n", ent->name, NULL);
3849                 }
3850             }
3851 
3852             /*
3853              * TODO: Skip if ent->content is still NULL.
3854              */
3855 
3856             if (xmlParserEntityCheck(ctxt, ent->length))
3857                 return;
3858 
3859             if (ent->flags & XML_ENT_EXPANDING) {
3860                 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3861                 xmlHaltParser(ctxt);
3862                 return;
3863             }
3864 
3865             ent->flags |= XML_ENT_EXPANDING;
3866             xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3867                                       depth);
3868             ent->flags &= ~XML_ENT_EXPANDING;
3869 
3870             chunk = str;
3871         } else {
3872             /* Normal ASCII char */
3873             if (!IS_BYTE_CHAR(c)) {
3874                 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3875                         "invalid character in entity value\n");
3876                 if (chunk < str)
3877                     xmlSBufAddString(buf, chunk, str - chunk);
3878                 xmlSBufAddReplChar(buf);
3879                 str += 1;
3880                 chunk = str;
3881             } else {
3882                 str += 1;
3883             }
3884         }
3885     }
3886 
3887     if (chunk < str)
3888         xmlSBufAddString(buf, chunk, str - chunk);
3889 }
3890 
3891 /**
3892  * xmlParseEntityValue:
3893  * @ctxt:  an XML parser context
3894  * @orig:  if non-NULL store a copy of the original entity value
3895  *
3896  * DEPRECATED: Internal function, don't use.
3897  *
3898  * parse a value for ENTITY declarations
3899  *
3900  * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3901  *	               "'" ([^%&'] | PEReference | Reference)* "'"
3902  *
3903  * Returns the EntityValue parsed with reference substituted or NULL
3904  */
3905 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3906 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3907     unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3908                          XML_MAX_HUGE_LENGTH :
3909                          XML_MAX_TEXT_LENGTH;
3910     xmlSBuf buf;
3911     const xmlChar *start;
3912     int quote, length;
3913 
3914     xmlSBufInit(&buf, maxLength);
3915 
3916     GROW;
3917 
3918     quote = CUR;
3919     if ((quote != '"') && (quote != '\'')) {
3920 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3921 	return(NULL);
3922     }
3923     CUR_PTR++;
3924 
3925     length = 0;
3926 
3927     /*
3928      * Copy raw content of the entity into a buffer
3929      */
3930     while (1) {
3931         int c;
3932 
3933         if (PARSER_STOPPED(ctxt))
3934             goto error;
3935 
3936         if (CUR_PTR >= ctxt->input->end) {
3937             xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3938             goto error;
3939         }
3940 
3941         c = CUR;
3942 
3943         if (c == 0) {
3944             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3945                     "invalid character in entity value\n");
3946             goto error;
3947         }
3948         if (c == quote)
3949             break;
3950         NEXTL(1);
3951         length += 1;
3952 
3953         /*
3954          * TODO: Check growth threshold
3955          */
3956         if (ctxt->input->end - CUR_PTR < 10)
3957             GROW;
3958     }
3959 
3960     start = CUR_PTR - length;
3961 
3962     if (orig != NULL) {
3963         *orig = xmlStrndup(start, length);
3964         if (*orig == NULL)
3965             xmlErrMemory(ctxt);
3966     }
3967 
3968     xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3969 
3970     NEXTL(1);
3971 
3972     return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3973 
3974 error:
3975     xmlSBufCleanup(&buf, ctxt, "entity length too long");
3976     return(NULL);
3977 }
3978 
3979 /**
3980  * xmlCheckEntityInAttValue:
3981  * @ctxt:  parser context
3982  * @pent:  entity
3983  * @depth:  nesting depth
3984  *
3985  * Check an entity reference in an attribute value for validity
3986  * without expanding it.
3987  */
3988 static void
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt,xmlEntityPtr pent,int depth)3989 xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3990     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3991     const xmlChar *str;
3992     unsigned long expandedSize = pent->length;
3993     int c, flags;
3994 
3995     depth += 1;
3996     if (depth > maxDepth) {
3997 	xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3998                        "Maximum entity nesting depth exceeded");
3999 	return;
4000     }
4001 
4002     if (pent->flags & XML_ENT_EXPANDING) {
4003         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4004         xmlHaltParser(ctxt);
4005         return;
4006     }
4007 
4008     /*
4009      * If we're parsing a default attribute value in DTD content,
4010      * the entity might reference other entities which weren't
4011      * defined yet, so the check isn't reliable.
4012      */
4013     if (ctxt->inSubset == 0)
4014         flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4015     else
4016         flags = XML_ENT_VALIDATED;
4017 
4018     str = pent->content;
4019     if (str == NULL)
4020         goto done;
4021 
4022     /*
4023      * Note that entity values are already validated. We only check
4024      * for illegal less-than signs and compute the expanded size
4025      * of the entity. No special handling for multi-byte characters
4026      * is needed.
4027      */
4028     while (!PARSER_STOPPED(ctxt)) {
4029         c = *str;
4030 
4031 	if (c != '&') {
4032             if (c == 0)
4033                 break;
4034 
4035             if (c == '<')
4036                 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4037                         "'<' in entity '%s' is not allowed in attributes "
4038                         "values\n", pent->name);
4039 
4040             str += 1;
4041         } else if (str[1] == '#') {
4042             int val;
4043 
4044 	    val = xmlParseStringCharRef(ctxt, &str);
4045 	    if (val == 0) {
4046                 pent->content[0] = 0;
4047                 break;
4048             }
4049 	} else {
4050             xmlChar *name;
4051             xmlEntityPtr ent;
4052 
4053 	    name = xmlParseStringEntityRef(ctxt, &str);
4054 	    if (name == NULL) {
4055                 pent->content[0] = 0;
4056                 break;
4057             }
4058 
4059             ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4060             xmlFree(name);
4061 
4062             if ((ent != NULL) &&
4063                 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4064                 if ((ent->flags & flags) != flags) {
4065                     pent->flags |= XML_ENT_EXPANDING;
4066                     xmlCheckEntityInAttValue(ctxt, ent, depth);
4067                     pent->flags &= ~XML_ENT_EXPANDING;
4068                 }
4069 
4070                 xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4071                 xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4072             }
4073         }
4074     }
4075 
4076 done:
4077     if (ctxt->inSubset == 0)
4078         pent->expandedSize = expandedSize;
4079 
4080     pent->flags |= flags;
4081 }
4082 
4083 /**
4084  * xmlExpandEntityInAttValue:
4085  * @ctxt:  parser context
4086  * @buf:  string buffer
4087  * @str:  entity or attribute value
4088  * @pent:  entity for entity value, NULL for attribute values
4089  * @normalize:  whether to collapse whitespace
4090  * @inSpace:  whitespace state
4091  * @depth:  nesting depth
4092  * @check:  whether to check for amplification
4093  *
4094  * Expand general entity references in an entity or attribute value.
4095  * Perform attribute value normalization.
4096  */
4097 static void
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt,xmlSBuf * buf,const xmlChar * str,xmlEntityPtr pent,int normalize,int * inSpace,int depth,int check)4098 xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4099                           const xmlChar *str, xmlEntityPtr pent, int normalize,
4100                           int *inSpace, int depth, int check) {
4101     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4102     int c, chunkSize;
4103 
4104     if (str == NULL)
4105         return;
4106 
4107     depth += 1;
4108     if (depth > maxDepth) {
4109 	xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4110                        "Maximum entity nesting depth exceeded");
4111 	return;
4112     }
4113 
4114     if (pent != NULL) {
4115         if (pent->flags & XML_ENT_EXPANDING) {
4116             xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4117             xmlHaltParser(ctxt);
4118             return;
4119         }
4120 
4121         if (check) {
4122             if (xmlParserEntityCheck(ctxt, pent->length))
4123                 return;
4124         }
4125     }
4126 
4127     chunkSize = 0;
4128 
4129     /*
4130      * Note that entity values are already validated. No special
4131      * handling for multi-byte characters is needed.
4132      */
4133     while (!PARSER_STOPPED(ctxt)) {
4134         c = *str;
4135 
4136 	if (c != '&') {
4137             if (c == 0)
4138                 break;
4139 
4140             /*
4141              * If this function is called without an entity, it is used to
4142              * expand entities in an attribute content where less-than was
4143              * already unscaped and is allowed.
4144              */
4145             if ((pent != NULL) && (c == '<')) {
4146                 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4147                         "'<' in entity '%s' is not allowed in attributes "
4148                         "values\n", pent->name);
4149                 break;
4150             }
4151 
4152             if (c <= 0x20) {
4153                 if ((normalize) && (*inSpace)) {
4154                     /* Skip char */
4155                     if (chunkSize > 0) {
4156                         xmlSBufAddString(buf, str - chunkSize, chunkSize);
4157                         chunkSize = 0;
4158                     }
4159                 } else if (c < 0x20) {
4160                     if (chunkSize > 0) {
4161                         xmlSBufAddString(buf, str - chunkSize, chunkSize);
4162                         chunkSize = 0;
4163                     }
4164 
4165                     xmlSBufAddCString(buf, " ", 1);
4166                 } else {
4167                     chunkSize += 1;
4168                 }
4169 
4170                 *inSpace = 1;
4171             } else {
4172                 chunkSize += 1;
4173                 *inSpace = 0;
4174             }
4175 
4176             str += 1;
4177         } else if (str[1] == '#') {
4178             int val;
4179 
4180             if (chunkSize > 0) {
4181                 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4182                 chunkSize = 0;
4183             }
4184 
4185 	    val = xmlParseStringCharRef(ctxt, &str);
4186 	    if (val == 0) {
4187                 if (pent != NULL)
4188                     pent->content[0] = 0;
4189                 break;
4190             }
4191 
4192             if (val == ' ') {
4193                 if ((!normalize) || (!*inSpace))
4194                     xmlSBufAddCString(buf, " ", 1);
4195                 *inSpace = 1;
4196             } else {
4197                 xmlSBufAddChar(buf, val);
4198                 *inSpace = 0;
4199             }
4200 	} else {
4201             xmlChar *name;
4202             xmlEntityPtr ent;
4203 
4204             if (chunkSize > 0) {
4205                 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4206                 chunkSize = 0;
4207             }
4208 
4209 	    name = xmlParseStringEntityRef(ctxt, &str);
4210             if (name == NULL) {
4211                 if (pent != NULL)
4212                     pent->content[0] = 0;
4213                 break;
4214             }
4215 
4216             ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4217             xmlFree(name);
4218 
4219 	    if ((ent != NULL) &&
4220 		(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4221 		if (ent->content == NULL) {
4222 		    xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4223 			    "predefined entity has no content\n");
4224                     break;
4225                 }
4226 
4227                 xmlSBufAddString(buf, ent->content, ent->length);
4228 
4229                 *inSpace = 0;
4230 	    } else if ((ent != NULL) && (ent->content != NULL)) {
4231                 if (pent != NULL)
4232                     pent->flags |= XML_ENT_EXPANDING;
4233 		xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4234                                           normalize, inSpace, depth, check);
4235                 if (pent != NULL)
4236                     pent->flags &= ~XML_ENT_EXPANDING;
4237 	    }
4238         }
4239     }
4240 
4241     if (chunkSize > 0)
4242         xmlSBufAddString(buf, str - chunkSize, chunkSize);
4243 }
4244 
4245 /**
4246  * xmlExpandEntitiesInAttValue:
4247  * @ctxt:  parser context
4248  * @str:  entity or attribute value
4249  * @normalize:  whether to collapse whitespace
4250  *
4251  * Expand general entity references in an entity or attribute value.
4252  * Perform attribute value normalization.
4253  *
4254  * Returns the expanded attribtue value.
4255  */
4256 xmlChar *
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt,const xmlChar * str,int normalize)4257 xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4258                             int normalize) {
4259     unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4260                          XML_MAX_HUGE_LENGTH :
4261                          XML_MAX_TEXT_LENGTH;
4262     xmlSBuf buf;
4263     int inSpace = 1;
4264 
4265     xmlSBufInit(&buf, maxLength);
4266 
4267     xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4268                               ctxt->inputNr, /* check */ 0);
4269 
4270     if ((normalize) && (inSpace) && (buf.size > 0))
4271         buf.size--;
4272 
4273     return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4274 }
4275 
4276 /**
4277  * xmlParseAttValueInternal:
4278  * @ctxt:  an XML parser context
4279  * @len:  attribute len result
4280  * @alloc:  whether the attribute was reallocated as a new string
4281  * @normalize:  if 1 then further non-CDATA normalization must be done
4282  *
4283  * parse a value for an attribute.
4284  * NOTE: if no normalization is needed, the routine will return pointers
4285  *       directly from the data buffer.
4286  *
4287  * 3.3.3 Attribute-Value Normalization:
4288  * Before the value of an attribute is passed to the application or
4289  * checked for validity, the XML processor must normalize it as follows:
4290  * - a character reference is processed by appending the referenced
4291  *   character to the attribute value
4292  * - an entity reference is processed by recursively processing the
4293  *   replacement text of the entity
4294  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4295  *   appending #x20 to the normalized value, except that only a single
4296  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4297  *   parsed entity or the literal entity value of an internal parsed entity
4298  * - other characters are processed by appending them to the normalized value
4299  * If the declared value is not CDATA, then the XML processor must further
4300  * process the normalized attribute value by discarding any leading and
4301  * trailing space (#x20) characters, and by replacing sequences of space
4302  * (#x20) characters by a single space (#x20) character.
4303  * All attributes for which no declaration has been read should be treated
4304  * by a non-validating parser as if declared CDATA.
4305  *
4306  * Returns the AttValue parsed or NULL. The value has to be freed by the
4307  *     caller if it was copied, this can be detected by val[*len] == 0.
4308  */
4309 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * attlen,int * alloc,int normalize,int isNamespace)4310 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4311                          int normalize, int isNamespace) {
4312     unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4313                          XML_MAX_HUGE_LENGTH :
4314                          XML_MAX_TEXT_LENGTH;
4315     xmlSBuf buf;
4316     xmlChar *ret;
4317     int c, l, quote, flags, chunkSize;
4318     int inSpace = 1;
4319     int replaceEntities;
4320 
4321     /* Always expand namespace URIs */
4322     replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4323 
4324     xmlSBufInit(&buf, maxLength);
4325 
4326     GROW;
4327 
4328     quote = CUR;
4329     if ((quote != '"') && (quote != '\'')) {
4330 	xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4331 	return(NULL);
4332     }
4333     NEXTL(1);
4334 
4335     if (ctxt->inSubset == 0)
4336         flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4337     else
4338         flags = XML_ENT_VALIDATED;
4339 
4340     inSpace = 1;
4341     chunkSize = 0;
4342 
4343     while (1) {
4344         if (PARSER_STOPPED(ctxt))
4345             goto error;
4346 
4347         if (CUR_PTR >= ctxt->input->end) {
4348             xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4349                            "AttValue: ' expected\n");
4350             goto error;
4351         }
4352 
4353         /*
4354          * TODO: Check growth threshold
4355          */
4356         if (ctxt->input->end - CUR_PTR < 10)
4357             GROW;
4358 
4359         c = CUR;
4360 
4361         if (c >= 0x80) {
4362             l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4363                     "invalid character in attribute value\n");
4364             if (l == 0) {
4365                 if (chunkSize > 0) {
4366                     xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367                     chunkSize = 0;
4368                 }
4369                 xmlSBufAddReplChar(&buf);
4370                 NEXTL(1);
4371             } else {
4372                 chunkSize += l;
4373                 NEXTL(l);
4374             }
4375 
4376             inSpace = 0;
4377         } else if (c != '&') {
4378             if (c > 0x20) {
4379                 if (c == quote)
4380                     break;
4381 
4382                 if (c == '<')
4383                     xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4384 
4385                 chunkSize += 1;
4386                 inSpace = 0;
4387             } else if (!IS_BYTE_CHAR(c)) {
4388                 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4389                         "invalid character in attribute value\n");
4390                 if (chunkSize > 0) {
4391                     xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4392                     chunkSize = 0;
4393                 }
4394                 xmlSBufAddReplChar(&buf);
4395                 inSpace = 0;
4396             } else {
4397                 /* Whitespace */
4398                 if ((normalize) && (inSpace)) {
4399                     /* Skip char */
4400                     if (chunkSize > 0) {
4401                         xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4402                         chunkSize = 0;
4403                     }
4404                 } else if (c < 0x20) {
4405                     /* Convert to space */
4406                     if (chunkSize > 0) {
4407                         xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4408                         chunkSize = 0;
4409                     }
4410 
4411                     xmlSBufAddCString(&buf, " ", 1);
4412                 } else {
4413                     chunkSize += 1;
4414                 }
4415 
4416                 inSpace = 1;
4417 
4418                 if ((c == 0xD) && (NXT(1) == 0xA))
4419                     CUR_PTR++;
4420             }
4421 
4422             NEXTL(1);
4423         } else if (NXT(1) == '#') {
4424             int val;
4425 
4426             if (chunkSize > 0) {
4427                 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4428                 chunkSize = 0;
4429             }
4430 
4431             val = xmlParseCharRef(ctxt);
4432             if (val == 0)
4433                 goto error;
4434 
4435             if ((val == '&') && (!replaceEntities)) {
4436                 /*
4437                  * The reparsing will be done in xmlNodeParseContent()
4438                  * called from SAX2.c
4439                  */
4440                 xmlSBufAddCString(&buf, "&#38;", 5);
4441                 inSpace = 0;
4442             } else if (val == ' ') {
4443                 if ((!normalize) || (!inSpace))
4444                     xmlSBufAddCString(&buf, " ", 1);
4445                 inSpace = 1;
4446             } else {
4447                 xmlSBufAddChar(&buf, val);
4448                 inSpace = 0;
4449             }
4450         } else {
4451             const xmlChar *name;
4452             xmlEntityPtr ent;
4453 
4454             if (chunkSize > 0) {
4455                 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4456                 chunkSize = 0;
4457             }
4458 
4459             name = xmlParseEntityRefInternal(ctxt);
4460             if (name == NULL) {
4461                 /*
4462                  * Probably a literal '&' which wasn't escaped.
4463                  * TODO: Handle gracefully in recovery mode.
4464                  */
4465                 continue;
4466             }
4467 
4468             ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4469             if (ent == NULL)
4470                 continue;
4471 
4472             if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4473                 if ((ent->content[0] == '&') && (!replaceEntities))
4474                     xmlSBufAddCString(&buf, "&#38;", 5);
4475                 else
4476                     xmlSBufAddString(&buf, ent->content, ent->length);
4477                 inSpace = 0;
4478             } else if (replaceEntities) {
4479                 xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4480                                           normalize, &inSpace, ctxt->inputNr,
4481                                           /* check */ 1);
4482             } else {
4483                 if ((ent->flags & flags) != flags)
4484                     xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4485 
4486                 if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4487                     ent->content[0] = 0;
4488                     goto error;
4489                 }
4490 
4491                 /*
4492                  * Just output the reference
4493                  */
4494                 xmlSBufAddCString(&buf, "&", 1);
4495                 xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4496                 xmlSBufAddCString(&buf, ";", 1);
4497 
4498                 inSpace = 0;
4499             }
4500 	}
4501     }
4502 
4503     if ((buf.mem == NULL) && (alloc != NULL)) {
4504         ret = (xmlChar *) CUR_PTR - chunkSize;
4505 
4506         if (attlen != NULL)
4507             *attlen = chunkSize;
4508         if ((normalize) && (inSpace) && (chunkSize > 0))
4509             *attlen -= 1;
4510         *alloc = 0;
4511 
4512         /* Report potential error */
4513         xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4514     } else {
4515         if (chunkSize > 0)
4516             xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4517 
4518         if ((normalize) && (inSpace) && (buf.size > 0))
4519             buf.size--;
4520 
4521         ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4522 
4523         if (ret != NULL) {
4524             if (attlen != NULL)
4525                 *attlen = buf.size;
4526             if (alloc != NULL)
4527                 *alloc = 1;
4528         }
4529     }
4530 
4531     NEXTL(1);
4532 
4533     return(ret);
4534 
4535 error:
4536     xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4537     return(NULL);
4538 }
4539 
4540 /**
4541  * xmlParseAttValue:
4542  * @ctxt:  an XML parser context
4543  *
4544  * DEPRECATED: Internal function, don't use.
4545  *
4546  * parse a value for an attribute
4547  * Note: the parser won't do substitution of entities here, this
4548  * will be handled later in xmlStringGetNodeList
4549  *
4550  * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4551  *                   "'" ([^<&'] | Reference)* "'"
4552  *
4553  * 3.3.3 Attribute-Value Normalization:
4554  * Before the value of an attribute is passed to the application or
4555  * checked for validity, the XML processor must normalize it as follows:
4556  * - a character reference is processed by appending the referenced
4557  *   character to the attribute value
4558  * - an entity reference is processed by recursively processing the
4559  *   replacement text of the entity
4560  * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4561  *   appending #x20 to the normalized value, except that only a single
4562  *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4563  *   parsed entity or the literal entity value of an internal parsed entity
4564  * - other characters are processed by appending them to the normalized value
4565  * If the declared value is not CDATA, then the XML processor must further
4566  * process the normalized attribute value by discarding any leading and
4567  * trailing space (#x20) characters, and by replacing sequences of space
4568  * (#x20) characters by a single space (#x20) character.
4569  * All attributes for which no declaration has been read should be treated
4570  * by a non-validating parser as if declared CDATA.
4571  *
4572  * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4573  */
4574 
4575 
4576 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4577 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4578     if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4579     return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4580 }
4581 
4582 /**
4583  * xmlParseSystemLiteral:
4584  * @ctxt:  an XML parser context
4585  *
4586  * DEPRECATED: Internal function, don't use.
4587  *
4588  * parse an XML Literal
4589  *
4590  * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4591  *
4592  * Returns the SystemLiteral parsed or NULL
4593  */
4594 
4595 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4596 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4597     xmlChar *buf = NULL;
4598     int len = 0;
4599     int size = XML_PARSER_BUFFER_SIZE;
4600     int cur, l;
4601     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4602                     XML_MAX_TEXT_LENGTH :
4603                     XML_MAX_NAME_LENGTH;
4604     xmlChar stop;
4605 
4606     if (RAW == '"') {
4607         NEXT;
4608 	stop = '"';
4609     } else if (RAW == '\'') {
4610         NEXT;
4611 	stop = '\'';
4612     } else {
4613 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4614 	return(NULL);
4615     }
4616 
4617     buf = xmlMalloc(size);
4618     if (buf == NULL) {
4619         xmlErrMemory(ctxt);
4620 	return(NULL);
4621     }
4622     cur = xmlCurrentCharRecover(ctxt, &l);
4623     while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4624 	if (len + 5 >= size) {
4625 	    xmlChar *tmp;
4626 
4627 	    size *= 2;
4628 	    tmp = (xmlChar *) xmlRealloc(buf, size);
4629 	    if (tmp == NULL) {
4630 	        xmlFree(buf);
4631 		xmlErrMemory(ctxt);
4632 		return(NULL);
4633 	    }
4634 	    buf = tmp;
4635 	}
4636 	COPY_BUF(buf, len, cur);
4637         if (len > maxLength) {
4638             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4639             xmlFree(buf);
4640             return(NULL);
4641         }
4642 	NEXTL(l);
4643 	cur = xmlCurrentCharRecover(ctxt, &l);
4644     }
4645     buf[len] = 0;
4646     if (!IS_CHAR(cur)) {
4647 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4648     } else {
4649 	NEXT;
4650     }
4651     return(buf);
4652 }
4653 
4654 /**
4655  * xmlParsePubidLiteral:
4656  * @ctxt:  an XML parser context
4657  *
4658  * DEPRECATED: Internal function, don't use.
4659  *
4660  * parse an XML public literal
4661  *
4662  * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4663  *
4664  * Returns the PubidLiteral parsed or NULL.
4665  */
4666 
4667 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4668 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4669     xmlChar *buf = NULL;
4670     int len = 0;
4671     int size = XML_PARSER_BUFFER_SIZE;
4672     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4673                     XML_MAX_TEXT_LENGTH :
4674                     XML_MAX_NAME_LENGTH;
4675     xmlChar cur;
4676     xmlChar stop;
4677 
4678     if (RAW == '"') {
4679         NEXT;
4680 	stop = '"';
4681     } else if (RAW == '\'') {
4682         NEXT;
4683 	stop = '\'';
4684     } else {
4685 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4686 	return(NULL);
4687     }
4688     buf = xmlMalloc(size);
4689     if (buf == NULL) {
4690 	xmlErrMemory(ctxt);
4691 	return(NULL);
4692     }
4693     cur = CUR;
4694     while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4695            (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4696 	if (len + 1 >= size) {
4697 	    xmlChar *tmp;
4698 
4699 	    size *= 2;
4700 	    tmp = (xmlChar *) xmlRealloc(buf, size);
4701 	    if (tmp == NULL) {
4702 		xmlErrMemory(ctxt);
4703 		xmlFree(buf);
4704 		return(NULL);
4705 	    }
4706 	    buf = tmp;
4707 	}
4708 	buf[len++] = cur;
4709         if (len > maxLength) {
4710             xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4711             xmlFree(buf);
4712             return(NULL);
4713         }
4714 	NEXT;
4715 	cur = CUR;
4716     }
4717     buf[len] = 0;
4718     if (cur != stop) {
4719 	xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4720     } else {
4721 	NEXTL(1);
4722     }
4723     return(buf);
4724 }
4725 
4726 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4727 
4728 /*
4729  * used for the test in the inner loop of the char data testing
4730  */
4731 static const unsigned char test_char_data[256] = {
4732     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4733     0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4734     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4735     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736     0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4737     0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4738     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4739     0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4740     0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4741     0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4742     0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4743     0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4744     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4745     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4746     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4747     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4748     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4749     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4750     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4751     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4752     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4753     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4754     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4755     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4756     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4757     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4758     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4759     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4760     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4761     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4762     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4763     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4764 };
4765 
4766 /**
4767  * xmlParseCharDataInternal:
4768  * @ctxt:  an XML parser context
4769  * @partial:  buffer may contain partial UTF-8 sequences
4770  *
4771  * Parse character data. Always makes progress if the first char isn't
4772  * '<' or '&'.
4773  *
4774  * The right angle bracket (>) may be represented using the string "&gt;",
4775  * and must, for compatibility, be escaped using "&gt;" or a character
4776  * reference when it appears in the string "]]>" in content, when that
4777  * string is not marking the end of a CDATA section.
4778  *
4779  * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4780  */
4781 static void
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt,int partial)4782 xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4783     const xmlChar *in;
4784     int nbchar = 0;
4785     int line = ctxt->input->line;
4786     int col = ctxt->input->col;
4787     int ccol;
4788 
4789     GROW;
4790     /*
4791      * Accelerated common case where input don't need to be
4792      * modified before passing it to the handler.
4793      */
4794     in = ctxt->input->cur;
4795     do {
4796 get_more_space:
4797         while (*in == 0x20) { in++; ctxt->input->col++; }
4798         if (*in == 0xA) {
4799             do {
4800                 ctxt->input->line++; ctxt->input->col = 1;
4801                 in++;
4802             } while (*in == 0xA);
4803             goto get_more_space;
4804         }
4805         if (*in == '<') {
4806             nbchar = in - ctxt->input->cur;
4807             if (nbchar > 0) {
4808                 const xmlChar *tmp = ctxt->input->cur;
4809                 ctxt->input->cur = in;
4810 
4811                 if ((ctxt->sax != NULL) &&
4812                     (ctxt->disableSAX == 0) &&
4813                     (ctxt->sax->ignorableWhitespace !=
4814                      ctxt->sax->characters)) {
4815                     if (areBlanks(ctxt, tmp, nbchar, 1)) {
4816                         if (ctxt->sax->ignorableWhitespace != NULL)
4817                             ctxt->sax->ignorableWhitespace(ctxt->userData,
4818                                                    tmp, nbchar);
4819                     } else {
4820                         if (ctxt->sax->characters != NULL)
4821                             ctxt->sax->characters(ctxt->userData,
4822                                                   tmp, nbchar);
4823                         if (*ctxt->space == -1)
4824                             *ctxt->space = -2;
4825                     }
4826                 } else if ((ctxt->sax != NULL) &&
4827                            (ctxt->disableSAX == 0) &&
4828                            (ctxt->sax->characters != NULL)) {
4829                     ctxt->sax->characters(ctxt->userData,
4830                                           tmp, nbchar);
4831                 }
4832             }
4833             return;
4834         }
4835 
4836 get_more:
4837         ccol = ctxt->input->col;
4838         while (test_char_data[*in]) {
4839             in++;
4840             ccol++;
4841         }
4842         ctxt->input->col = ccol;
4843         if (*in == 0xA) {
4844             do {
4845                 ctxt->input->line++; ctxt->input->col = 1;
4846                 in++;
4847             } while (*in == 0xA);
4848             goto get_more;
4849         }
4850         if (*in == ']') {
4851             if ((in[1] == ']') && (in[2] == '>')) {
4852                 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4853                 ctxt->input->cur = in + 1;
4854                 return;
4855             }
4856             in++;
4857             ctxt->input->col++;
4858             goto get_more;
4859         }
4860         nbchar = in - ctxt->input->cur;
4861         if (nbchar > 0) {
4862             if ((ctxt->sax != NULL) &&
4863                 (ctxt->disableSAX == 0) &&
4864                 (ctxt->sax->ignorableWhitespace !=
4865                  ctxt->sax->characters) &&
4866                 (IS_BLANK_CH(*ctxt->input->cur))) {
4867                 const xmlChar *tmp = ctxt->input->cur;
4868                 ctxt->input->cur = in;
4869 
4870                 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4871                     if (ctxt->sax->ignorableWhitespace != NULL)
4872                         ctxt->sax->ignorableWhitespace(ctxt->userData,
4873                                                        tmp, nbchar);
4874                 } else {
4875                     if (ctxt->sax->characters != NULL)
4876                         ctxt->sax->characters(ctxt->userData,
4877                                               tmp, nbchar);
4878                     if (*ctxt->space == -1)
4879                         *ctxt->space = -2;
4880                 }
4881                 line = ctxt->input->line;
4882                 col = ctxt->input->col;
4883             } else if ((ctxt->sax != NULL) &&
4884                        (ctxt->disableSAX == 0)) {
4885                 if (ctxt->sax->characters != NULL)
4886                     ctxt->sax->characters(ctxt->userData,
4887                                           ctxt->input->cur, nbchar);
4888                 line = ctxt->input->line;
4889                 col = ctxt->input->col;
4890             }
4891         }
4892         ctxt->input->cur = in;
4893         if (*in == 0xD) {
4894             in++;
4895             if (*in == 0xA) {
4896                 ctxt->input->cur = in;
4897                 in++;
4898                 ctxt->input->line++; ctxt->input->col = 1;
4899                 continue; /* while */
4900             }
4901             in--;
4902         }
4903         if (*in == '<') {
4904             return;
4905         }
4906         if (*in == '&') {
4907             return;
4908         }
4909         SHRINK;
4910         GROW;
4911         in = ctxt->input->cur;
4912     } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4913              (*in == 0x09) || (*in == 0x0a));
4914     ctxt->input->line = line;
4915     ctxt->input->col = col;
4916     xmlParseCharDataComplex(ctxt, partial);
4917 }
4918 
4919 /**
4920  * xmlParseCharDataComplex:
4921  * @ctxt:  an XML parser context
4922  * @cdata:  int indicating whether we are within a CDATA section
4923  *
4924  * Always makes progress if the first char isn't '<' or '&'.
4925  *
4926  * parse a CharData section.this is the fallback function
4927  * of xmlParseCharData() when the parsing requires handling
4928  * of non-ASCII characters.
4929  */
4930 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int partial)4931 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4932     xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4933     int nbchar = 0;
4934     int cur, l;
4935 
4936     cur = xmlCurrentCharRecover(ctxt, &l);
4937     while ((cur != '<') && /* checked */
4938            (cur != '&') &&
4939 	   (IS_CHAR(cur))) {
4940 	if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4941 	    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4942 	}
4943 	COPY_BUF(buf, nbchar, cur);
4944 	/* move current position before possible calling of ctxt->sax->characters */
4945 	NEXTL(l);
4946 	if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4947 	    buf[nbchar] = 0;
4948 
4949 	    /*
4950 	     * OK the segment is to be consumed as chars.
4951 	     */
4952 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4953 		if (areBlanks(ctxt, buf, nbchar, 0)) {
4954 		    if (ctxt->sax->ignorableWhitespace != NULL)
4955 			ctxt->sax->ignorableWhitespace(ctxt->userData,
4956 			                               buf, nbchar);
4957 		} else {
4958 		    if (ctxt->sax->characters != NULL)
4959 			ctxt->sax->characters(ctxt->userData, buf, nbchar);
4960 		    if ((ctxt->sax->characters !=
4961 		         ctxt->sax->ignorableWhitespace) &&
4962 			(*ctxt->space == -1))
4963 			*ctxt->space = -2;
4964 		}
4965 	    }
4966 	    nbchar = 0;
4967             SHRINK;
4968 	}
4969 	cur = xmlCurrentCharRecover(ctxt, &l);
4970     }
4971     if (nbchar != 0) {
4972         buf[nbchar] = 0;
4973 	/*
4974 	 * OK the segment is to be consumed as chars.
4975 	 */
4976 	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4977 	    if (areBlanks(ctxt, buf, nbchar, 0)) {
4978 		if (ctxt->sax->ignorableWhitespace != NULL)
4979 		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4980 	    } else {
4981 		if (ctxt->sax->characters != NULL)
4982 		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
4983 		if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4984 		    (*ctxt->space == -1))
4985 		    *ctxt->space = -2;
4986 	    }
4987 	}
4988     }
4989     /*
4990      * cur == 0 can mean
4991      *
4992      * - End of buffer.
4993      * - An actual 0 character.
4994      * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4995      */
4996     if (ctxt->input->cur < ctxt->input->end) {
4997         if ((cur == 0) && (CUR != 0)) {
4998             if (partial == 0) {
4999                 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5000                         "Incomplete UTF-8 sequence starting with %02X\n", CUR);
5001                 NEXTL(1);
5002             }
5003         } else if ((cur != '<') && (cur != '&')) {
5004             /* Generate the error and skip the offending character */
5005             xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5006                               "PCDATA invalid Char value %d\n", cur);
5007             NEXTL(l);
5008         }
5009     }
5010 }
5011 
5012 /**
5013  * xmlParseCharData:
5014  * @ctxt:  an XML parser context
5015  * @cdata:  unused
5016  *
5017  * DEPRECATED: Internal function, don't use.
5018  */
5019 void
xmlParseCharData(xmlParserCtxtPtr ctxt,ATTRIBUTE_UNUSED int cdata)5020 xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
5021     xmlParseCharDataInternal(ctxt, 0);
5022 }
5023 
5024 /**
5025  * xmlParseExternalID:
5026  * @ctxt:  an XML parser context
5027  * @publicID:  a xmlChar** receiving PubidLiteral
5028  * @strict: indicate whether we should restrict parsing to only
5029  *          production [75], see NOTE below
5030  *
5031  * DEPRECATED: Internal function, don't use.
5032  *
5033  * Parse an External ID or a Public ID
5034  *
5035  * NOTE: Productions [75] and [83] interact badly since [75] can generate
5036  *       'PUBLIC' S PubidLiteral S SystemLiteral
5037  *
5038  * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5039  *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5040  *
5041  * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5042  *
5043  * Returns the function returns SystemLiteral and in the second
5044  *                case publicID receives PubidLiteral, is strict is off
5045  *                it is possible to return NULL and have publicID set.
5046  */
5047 
5048 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)5049 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5050     xmlChar *URI = NULL;
5051 
5052     *publicID = NULL;
5053     if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5054         SKIP(6);
5055 	if (SKIP_BLANKS == 0) {
5056 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5057 	                   "Space required after 'SYSTEM'\n");
5058 	}
5059 	URI = xmlParseSystemLiteral(ctxt);
5060 	if (URI == NULL) {
5061 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5062         }
5063     } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5064         SKIP(6);
5065 	if (SKIP_BLANKS == 0) {
5066 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5067 		    "Space required after 'PUBLIC'\n");
5068 	}
5069 	*publicID = xmlParsePubidLiteral(ctxt);
5070 	if (*publicID == NULL) {
5071 	    xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5072 	}
5073 	if (strict) {
5074 	    /*
5075 	     * We don't handle [83] so "S SystemLiteral" is required.
5076 	     */
5077 	    if (SKIP_BLANKS == 0) {
5078 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5079 			"Space required after the Public Identifier\n");
5080 	    }
5081 	} else {
5082 	    /*
5083 	     * We handle [83] so we return immediately, if
5084 	     * "S SystemLiteral" is not detected. We skip blanks if no
5085              * system literal was found, but this is harmless since we must
5086              * be at the end of a NotationDecl.
5087 	     */
5088 	    if (SKIP_BLANKS == 0) return(NULL);
5089 	    if ((CUR != '\'') && (CUR != '"')) return(NULL);
5090 	}
5091 	URI = xmlParseSystemLiteral(ctxt);
5092 	if (URI == NULL) {
5093 	    xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5094         }
5095     }
5096     return(URI);
5097 }
5098 
5099 /**
5100  * xmlParseCommentComplex:
5101  * @ctxt:  an XML parser context
5102  * @buf:  the already parsed part of the buffer
5103  * @len:  number of bytes in the buffer
5104  * @size:  allocated size of the buffer
5105  *
5106  * Skip an XML (SGML) comment <!-- .... -->
5107  *  The spec says that "For compatibility, the string "--" (double-hyphen)
5108  *  must not occur within comments. "
5109  * This is the slow routine in case the accelerator for ascii didn't work
5110  *
5111  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5112  */
5113 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)5114 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5115                        size_t len, size_t size) {
5116     int q, ql;
5117     int r, rl;
5118     int cur, l;
5119     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5120                        XML_MAX_HUGE_LENGTH :
5121                        XML_MAX_TEXT_LENGTH;
5122 
5123     if (buf == NULL) {
5124         len = 0;
5125 	size = XML_PARSER_BUFFER_SIZE;
5126 	buf = xmlMalloc(size);
5127 	if (buf == NULL) {
5128 	    xmlErrMemory(ctxt);
5129 	    return;
5130 	}
5131     }
5132     q = xmlCurrentCharRecover(ctxt, &ql);
5133     if (q == 0)
5134         goto not_terminated;
5135     if (!IS_CHAR(q)) {
5136         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5137                           "xmlParseComment: invalid xmlChar value %d\n",
5138 	                  q);
5139 	xmlFree (buf);
5140 	return;
5141     }
5142     NEXTL(ql);
5143     r = xmlCurrentCharRecover(ctxt, &rl);
5144     if (r == 0)
5145         goto not_terminated;
5146     if (!IS_CHAR(r)) {
5147         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5148                           "xmlParseComment: invalid xmlChar value %d\n",
5149 	                  r);
5150 	xmlFree (buf);
5151 	return;
5152     }
5153     NEXTL(rl);
5154     cur = xmlCurrentCharRecover(ctxt, &l);
5155     if (cur == 0)
5156         goto not_terminated;
5157     while (IS_CHAR(cur) && /* checked */
5158            ((cur != '>') ||
5159 	    (r != '-') || (q != '-'))) {
5160 	if ((r == '-') && (q == '-')) {
5161 	    xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5162 	}
5163 	if (len + 5 >= size) {
5164 	    xmlChar *new_buf;
5165             size_t new_size;
5166 
5167 	    new_size = size * 2;
5168 	    new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5169 	    if (new_buf == NULL) {
5170 		xmlFree (buf);
5171 		xmlErrMemory(ctxt);
5172 		return;
5173 	    }
5174 	    buf = new_buf;
5175             size = new_size;
5176 	}
5177 	COPY_BUF(buf, len, q);
5178         if (len > maxLength) {
5179             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5180                          "Comment too big found", NULL);
5181             xmlFree (buf);
5182             return;
5183         }
5184 
5185 	q = r;
5186 	ql = rl;
5187 	r = cur;
5188 	rl = l;
5189 
5190 	NEXTL(l);
5191 	cur = xmlCurrentCharRecover(ctxt, &l);
5192 
5193     }
5194     buf[len] = 0;
5195     if (cur == 0) {
5196 	xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5197 	                     "Comment not terminated \n<!--%.50s\n", buf);
5198     } else if (!IS_CHAR(cur)) {
5199         xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5200                           "xmlParseComment: invalid xmlChar value %d\n",
5201 	                  cur);
5202     } else {
5203         NEXT;
5204 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5205 	    (!ctxt->disableSAX))
5206 	    ctxt->sax->comment(ctxt->userData, buf);
5207     }
5208     xmlFree(buf);
5209     return;
5210 not_terminated:
5211     xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5212 			 "Comment not terminated\n", NULL);
5213     xmlFree(buf);
5214 }
5215 
5216 /**
5217  * xmlParseComment:
5218  * @ctxt:  an XML parser context
5219  *
5220  * DEPRECATED: Internal function, don't use.
5221  *
5222  * Parse an XML (SGML) comment. Always consumes '<!'.
5223  *
5224  *  The spec says that "For compatibility, the string "--" (double-hyphen)
5225  *  must not occur within comments. "
5226  *
5227  * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5228  */
5229 void
xmlParseComment(xmlParserCtxtPtr ctxt)5230 xmlParseComment(xmlParserCtxtPtr ctxt) {
5231     xmlChar *buf = NULL;
5232     size_t size = XML_PARSER_BUFFER_SIZE;
5233     size_t len = 0;
5234     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5235                        XML_MAX_HUGE_LENGTH :
5236                        XML_MAX_TEXT_LENGTH;
5237     const xmlChar *in;
5238     size_t nbchar = 0;
5239     int ccol;
5240 
5241     /*
5242      * Check that there is a comment right here.
5243      */
5244     if ((RAW != '<') || (NXT(1) != '!'))
5245         return;
5246     SKIP(2);
5247     if ((RAW != '-') || (NXT(1) != '-'))
5248         return;
5249     SKIP(2);
5250     GROW;
5251 
5252     /*
5253      * Accelerated common case where input don't need to be
5254      * modified before passing it to the handler.
5255      */
5256     in = ctxt->input->cur;
5257     do {
5258 	if (*in == 0xA) {
5259 	    do {
5260 		ctxt->input->line++; ctxt->input->col = 1;
5261 		in++;
5262 	    } while (*in == 0xA);
5263 	}
5264 get_more:
5265         ccol = ctxt->input->col;
5266 	while (((*in > '-') && (*in <= 0x7F)) ||
5267 	       ((*in >= 0x20) && (*in < '-')) ||
5268 	       (*in == 0x09)) {
5269 		    in++;
5270 		    ccol++;
5271 	}
5272 	ctxt->input->col = ccol;
5273 	if (*in == 0xA) {
5274 	    do {
5275 		ctxt->input->line++; ctxt->input->col = 1;
5276 		in++;
5277 	    } while (*in == 0xA);
5278 	    goto get_more;
5279 	}
5280 	nbchar = in - ctxt->input->cur;
5281 	/*
5282 	 * save current set of data
5283 	 */
5284 	if (nbchar > 0) {
5285             if (buf == NULL) {
5286                 if ((*in == '-') && (in[1] == '-'))
5287                     size = nbchar + 1;
5288                 else
5289                     size = XML_PARSER_BUFFER_SIZE + nbchar;
5290                 buf = xmlMalloc(size);
5291                 if (buf == NULL) {
5292                     xmlErrMemory(ctxt);
5293                     return;
5294                 }
5295                 len = 0;
5296             } else if (len + nbchar + 1 >= size) {
5297                 xmlChar *new_buf;
5298                 size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5299                 new_buf = (xmlChar *) xmlRealloc(buf, size);
5300                 if (new_buf == NULL) {
5301                     xmlFree (buf);
5302                     xmlErrMemory(ctxt);
5303                     return;
5304                 }
5305                 buf = new_buf;
5306             }
5307             memcpy(&buf[len], ctxt->input->cur, nbchar);
5308             len += nbchar;
5309             buf[len] = 0;
5310 	}
5311         if (len > maxLength) {
5312             xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5313                          "Comment too big found", NULL);
5314             xmlFree (buf);
5315             return;
5316         }
5317 	ctxt->input->cur = in;
5318 	if (*in == 0xA) {
5319 	    in++;
5320 	    ctxt->input->line++; ctxt->input->col = 1;
5321 	}
5322 	if (*in == 0xD) {
5323 	    in++;
5324 	    if (*in == 0xA) {
5325 		ctxt->input->cur = in;
5326 		in++;
5327 		ctxt->input->line++; ctxt->input->col = 1;
5328 		goto get_more;
5329 	    }
5330 	    in--;
5331 	}
5332 	SHRINK;
5333 	GROW;
5334 	in = ctxt->input->cur;
5335 	if (*in == '-') {
5336 	    if (in[1] == '-') {
5337 	        if (in[2] == '>') {
5338 		    SKIP(3);
5339 		    if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5340 		        (!ctxt->disableSAX)) {
5341 			if (buf != NULL)
5342 			    ctxt->sax->comment(ctxt->userData, buf);
5343 			else
5344 			    ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5345 		    }
5346 		    if (buf != NULL)
5347 		        xmlFree(buf);
5348 		    return;
5349 		}
5350 		if (buf != NULL) {
5351 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5352 		                      "Double hyphen within comment: "
5353                                       "<!--%.50s\n",
5354 				      buf);
5355 		} else
5356 		    xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5357 		                      "Double hyphen within comment\n", NULL);
5358 		in++;
5359 		ctxt->input->col++;
5360 	    }
5361 	    in++;
5362 	    ctxt->input->col++;
5363 	    goto get_more;
5364 	}
5365     } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5366     xmlParseCommentComplex(ctxt, buf, len, size);
5367 }
5368 
5369 
5370 /**
5371  * xmlParsePITarget:
5372  * @ctxt:  an XML parser context
5373  *
5374  * DEPRECATED: Internal function, don't use.
5375  *
5376  * parse the name of a PI
5377  *
5378  * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5379  *
5380  * Returns the PITarget name or NULL
5381  */
5382 
5383 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5384 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5385     const xmlChar *name;
5386 
5387     name = xmlParseName(ctxt);
5388     if ((name != NULL) &&
5389         ((name[0] == 'x') || (name[0] == 'X')) &&
5390         ((name[1] == 'm') || (name[1] == 'M')) &&
5391         ((name[2] == 'l') || (name[2] == 'L'))) {
5392 	int i;
5393 	if ((name[0] == 'x') && (name[1] == 'm') &&
5394 	    (name[2] == 'l') && (name[3] == 0)) {
5395 	    xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5396 		 "XML declaration allowed only at the start of the document\n");
5397 	    return(name);
5398 	} else if (name[3] == 0) {
5399 	    xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5400 	    return(name);
5401 	}
5402 	for (i = 0;;i++) {
5403 	    if (xmlW3CPIs[i] == NULL) break;
5404 	    if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5405 	        return(name);
5406 	}
5407 	xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5408 		      "xmlParsePITarget: invalid name prefix 'xml'\n",
5409 		      NULL, NULL);
5410     }
5411     if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5412 	xmlNsErr(ctxt, XML_NS_ERR_COLON,
5413 		 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5414     }
5415     return(name);
5416 }
5417 
5418 #ifdef LIBXML_CATALOG_ENABLED
5419 /**
5420  * xmlParseCatalogPI:
5421  * @ctxt:  an XML parser context
5422  * @catalog:  the PI value string
5423  *
5424  * parse an XML Catalog Processing Instruction.
5425  *
5426  * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5427  *
5428  * Occurs only if allowed by the user and if happening in the Misc
5429  * part of the document before any doctype information
5430  * This will add the given catalog to the parsing context in order
5431  * to be used if there is a resolution need further down in the document
5432  */
5433 
5434 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5435 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5436     xmlChar *URL = NULL;
5437     const xmlChar *tmp, *base;
5438     xmlChar marker;
5439 
5440     tmp = catalog;
5441     while (IS_BLANK_CH(*tmp)) tmp++;
5442     if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5443 	goto error;
5444     tmp += 7;
5445     while (IS_BLANK_CH(*tmp)) tmp++;
5446     if (*tmp != '=') {
5447 	return;
5448     }
5449     tmp++;
5450     while (IS_BLANK_CH(*tmp)) tmp++;
5451     marker = *tmp;
5452     if ((marker != '\'') && (marker != '"'))
5453 	goto error;
5454     tmp++;
5455     base = tmp;
5456     while ((*tmp != 0) && (*tmp != marker)) tmp++;
5457     if (*tmp == 0)
5458 	goto error;
5459     URL = xmlStrndup(base, tmp - base);
5460     tmp++;
5461     while (IS_BLANK_CH(*tmp)) tmp++;
5462     if (*tmp != 0)
5463 	goto error;
5464 
5465     if (URL != NULL) {
5466         /*
5467          * Unfortunately, the catalog API doesn't report OOM errors.
5468          * xmlGetLastError isn't very helpful since we don't know
5469          * where the last error came from. We'd have to reset it
5470          * before this call and restore it afterwards.
5471          */
5472 	ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5473 	xmlFree(URL);
5474     }
5475     return;
5476 
5477 error:
5478     xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5479 	          "Catalog PI syntax error: %s\n",
5480 		  catalog, NULL);
5481     if (URL != NULL)
5482 	xmlFree(URL);
5483 }
5484 #endif
5485 
5486 /**
5487  * xmlParsePI:
5488  * @ctxt:  an XML parser context
5489  *
5490  * DEPRECATED: Internal function, don't use.
5491  *
5492  * parse an XML Processing Instruction.
5493  *
5494  * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5495  *
5496  * The processing is transferred to SAX once parsed.
5497  */
5498 
5499 void
xmlParsePI(xmlParserCtxtPtr ctxt)5500 xmlParsePI(xmlParserCtxtPtr ctxt) {
5501     xmlChar *buf = NULL;
5502     size_t len = 0;
5503     size_t size = XML_PARSER_BUFFER_SIZE;
5504     size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5505                        XML_MAX_HUGE_LENGTH :
5506                        XML_MAX_TEXT_LENGTH;
5507     int cur, l;
5508     const xmlChar *target;
5509 
5510     if ((RAW == '<') && (NXT(1) == '?')) {
5511 	/*
5512 	 * this is a Processing Instruction.
5513 	 */
5514 	SKIP(2);
5515 
5516 	/*
5517 	 * Parse the target name and check for special support like
5518 	 * namespace.
5519 	 */
5520         target = xmlParsePITarget(ctxt);
5521 	if (target != NULL) {
5522 	    if ((RAW == '?') && (NXT(1) == '>')) {
5523 		SKIP(2);
5524 
5525 		/*
5526 		 * SAX: PI detected.
5527 		 */
5528 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5529 		    (ctxt->sax->processingInstruction != NULL))
5530 		    ctxt->sax->processingInstruction(ctxt->userData,
5531 		                                     target, NULL);
5532 		return;
5533 	    }
5534 	    buf = xmlMalloc(size);
5535 	    if (buf == NULL) {
5536 		xmlErrMemory(ctxt);
5537 		return;
5538 	    }
5539 	    if (SKIP_BLANKS == 0) {
5540 		xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5541 			  "ParsePI: PI %s space expected\n", target);
5542 	    }
5543 	    cur = xmlCurrentCharRecover(ctxt, &l);
5544 	    while (IS_CHAR(cur) && /* checked */
5545 		   ((cur != '?') || (NXT(1) != '>'))) {
5546 		if (len + 5 >= size) {
5547 		    xmlChar *tmp;
5548                     size_t new_size = size * 2;
5549 		    tmp = (xmlChar *) xmlRealloc(buf, new_size);
5550 		    if (tmp == NULL) {
5551 			xmlErrMemory(ctxt);
5552 			xmlFree(buf);
5553 			return;
5554 		    }
5555 		    buf = tmp;
5556                     size = new_size;
5557 		}
5558 		COPY_BUF(buf, len, cur);
5559                 if (len > maxLength) {
5560                     xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5561                                       "PI %s too big found", target);
5562                     xmlFree(buf);
5563                     return;
5564                 }
5565 		NEXTL(l);
5566 		cur = xmlCurrentCharRecover(ctxt, &l);
5567 	    }
5568 	    buf[len] = 0;
5569 	    if (cur != '?') {
5570 		xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5571 		      "ParsePI: PI %s never end ...\n", target);
5572 	    } else {
5573 		SKIP(2);
5574 
5575 #ifdef LIBXML_CATALOG_ENABLED
5576 		if ((ctxt->inSubset == 0) &&
5577 		    (xmlStrEqual(target, XML_CATALOG_PI))) {
5578 		    xmlCatalogAllow allow = xmlCatalogGetDefaults();
5579 
5580 		    if (((ctxt->options & XML_PARSE_NO_CATALOG_PI) == 0) &&
5581                         ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5582 			 (allow == XML_CATA_ALLOW_ALL)))
5583 			xmlParseCatalogPI(ctxt, buf);
5584 		}
5585 #endif
5586 
5587 		/*
5588 		 * SAX: PI detected.
5589 		 */
5590 		if ((ctxt->sax) && (!ctxt->disableSAX) &&
5591 		    (ctxt->sax->processingInstruction != NULL))
5592 		    ctxt->sax->processingInstruction(ctxt->userData,
5593 		                                     target, buf);
5594 	    }
5595 	    xmlFree(buf);
5596 	} else {
5597 	    xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5598 	}
5599     }
5600 }
5601 
5602 /**
5603  * xmlParseNotationDecl:
5604  * @ctxt:  an XML parser context
5605  *
5606  * DEPRECATED: Internal function, don't use.
5607  *
5608  * Parse a notation declaration. Always consumes '<!'.
5609  *
5610  * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5611  *
5612  * Hence there is actually 3 choices:
5613  *     'PUBLIC' S PubidLiteral
5614  *     'PUBLIC' S PubidLiteral S SystemLiteral
5615  * and 'SYSTEM' S SystemLiteral
5616  *
5617  * See the NOTE on xmlParseExternalID().
5618  */
5619 
5620 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5621 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5622     const xmlChar *name;
5623     xmlChar *Pubid;
5624     xmlChar *Systemid;
5625 
5626     if ((CUR != '<') || (NXT(1) != '!'))
5627         return;
5628     SKIP(2);
5629 
5630     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5631 	int inputid = ctxt->input->id;
5632 	SKIP(8);
5633 	if (SKIP_BLANKS_PE == 0) {
5634 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5635 			   "Space required after '<!NOTATION'\n");
5636 	    return;
5637 	}
5638 
5639         name = xmlParseName(ctxt);
5640 	if (name == NULL) {
5641 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5642 	    return;
5643 	}
5644 	if (xmlStrchr(name, ':') != NULL) {
5645 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5646 		     "colons are forbidden from notation names '%s'\n",
5647 		     name, NULL, NULL);
5648 	}
5649 	if (SKIP_BLANKS_PE == 0) {
5650 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5651 		     "Space required after the NOTATION name'\n");
5652 	    return;
5653 	}
5654 
5655 	/*
5656 	 * Parse the IDs.
5657 	 */
5658 	Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5659 	SKIP_BLANKS_PE;
5660 
5661 	if (RAW == '>') {
5662 	    if (inputid != ctxt->input->id) {
5663 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5664 	                       "Notation declaration doesn't start and stop"
5665                                " in the same entity\n");
5666 	    }
5667 	    NEXT;
5668 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5669 		(ctxt->sax->notationDecl != NULL))
5670 		ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5671 	} else {
5672 	    xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5673 	}
5674 	if (Systemid != NULL) xmlFree(Systemid);
5675 	if (Pubid != NULL) xmlFree(Pubid);
5676     }
5677 }
5678 
5679 /**
5680  * xmlParseEntityDecl:
5681  * @ctxt:  an XML parser context
5682  *
5683  * DEPRECATED: Internal function, don't use.
5684  *
5685  * Parse an entity declaration. Always consumes '<!'.
5686  *
5687  * [70] EntityDecl ::= GEDecl | PEDecl
5688  *
5689  * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5690  *
5691  * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5692  *
5693  * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5694  *
5695  * [74] PEDef ::= EntityValue | ExternalID
5696  *
5697  * [76] NDataDecl ::= S 'NDATA' S Name
5698  *
5699  * [ VC: Notation Declared ]
5700  * The Name must match the declared name of a notation.
5701  */
5702 
5703 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5704 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5705     const xmlChar *name = NULL;
5706     xmlChar *value = NULL;
5707     xmlChar *URI = NULL, *literal = NULL;
5708     const xmlChar *ndata = NULL;
5709     int isParameter = 0;
5710     xmlChar *orig = NULL;
5711 
5712     if ((CUR != '<') || (NXT(1) != '!'))
5713         return;
5714     SKIP(2);
5715 
5716     /* GROW; done in the caller */
5717     if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5718 	int inputid = ctxt->input->id;
5719 	SKIP(6);
5720 	if (SKIP_BLANKS_PE == 0) {
5721 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5722 			   "Space required after '<!ENTITY'\n");
5723 	}
5724 
5725 	if (RAW == '%') {
5726 	    NEXT;
5727 	    if (SKIP_BLANKS_PE == 0) {
5728 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5729 			       "Space required after '%%'\n");
5730 	    }
5731 	    isParameter = 1;
5732 	}
5733 
5734         name = xmlParseName(ctxt);
5735 	if (name == NULL) {
5736 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5737 	                   "xmlParseEntityDecl: no name\n");
5738             return;
5739 	}
5740 	if (xmlStrchr(name, ':') != NULL) {
5741 	    xmlNsErr(ctxt, XML_NS_ERR_COLON,
5742 		     "colons are forbidden from entities names '%s'\n",
5743 		     name, NULL, NULL);
5744 	}
5745 	if (SKIP_BLANKS_PE == 0) {
5746 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5747 			   "Space required after the entity name\n");
5748 	}
5749 
5750 	/*
5751 	 * handle the various case of definitions...
5752 	 */
5753 	if (isParameter) {
5754 	    if ((RAW == '"') || (RAW == '\'')) {
5755 	        value = xmlParseEntityValue(ctxt, &orig);
5756 		if (value) {
5757 		    if ((ctxt->sax != NULL) &&
5758 			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5759 			ctxt->sax->entityDecl(ctxt->userData, name,
5760 		                    XML_INTERNAL_PARAMETER_ENTITY,
5761 				    NULL, NULL, value);
5762 		}
5763 	    } else {
5764 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5765 		if ((URI == NULL) && (literal == NULL)) {
5766 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5767 		}
5768 		if (URI) {
5769                     if (xmlStrchr(URI, '#')) {
5770                         xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5771                     } else {
5772                         if ((ctxt->sax != NULL) &&
5773                             (!ctxt->disableSAX) &&
5774                             (ctxt->sax->entityDecl != NULL))
5775                             ctxt->sax->entityDecl(ctxt->userData, name,
5776                                         XML_EXTERNAL_PARAMETER_ENTITY,
5777                                         literal, URI, NULL);
5778                     }
5779 		}
5780 	    }
5781 	} else {
5782 	    if ((RAW == '"') || (RAW == '\'')) {
5783 	        value = xmlParseEntityValue(ctxt, &orig);
5784 		if ((ctxt->sax != NULL) &&
5785 		    (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5786 		    ctxt->sax->entityDecl(ctxt->userData, name,
5787 				XML_INTERNAL_GENERAL_ENTITY,
5788 				NULL, NULL, value);
5789 		/*
5790 		 * For expat compatibility in SAX mode.
5791 		 */
5792 		if ((ctxt->myDoc == NULL) ||
5793 		    (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5794 		    if (ctxt->myDoc == NULL) {
5795 			ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5796 			if (ctxt->myDoc == NULL) {
5797 			    xmlErrMemory(ctxt);
5798 			    goto done;
5799 			}
5800 			ctxt->myDoc->properties = XML_DOC_INTERNAL;
5801 		    }
5802 		    if (ctxt->myDoc->intSubset == NULL) {
5803 			ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5804 					    BAD_CAST "fake", NULL, NULL);
5805                         if (ctxt->myDoc->intSubset == NULL) {
5806                             xmlErrMemory(ctxt);
5807                             goto done;
5808                         }
5809                     }
5810 
5811 		    xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5812 			              NULL, NULL, value);
5813 		}
5814 	    } else {
5815 	        URI = xmlParseExternalID(ctxt, &literal, 1);
5816 		if ((URI == NULL) && (literal == NULL)) {
5817 		    xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5818 		}
5819 		if (URI) {
5820                     if (xmlStrchr(URI, '#')) {
5821                         xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5822                     }
5823 		}
5824 		if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5825 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5826 				   "Space required before 'NDATA'\n");
5827 		}
5828 		if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5829 		    SKIP(5);
5830 		    if (SKIP_BLANKS_PE == 0) {
5831 			xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5832 				       "Space required after 'NDATA'\n");
5833 		    }
5834 		    ndata = xmlParseName(ctxt);
5835 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5836 		        (ctxt->sax->unparsedEntityDecl != NULL))
5837 			ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5838 				    literal, URI, ndata);
5839 		} else {
5840 		    if ((ctxt->sax != NULL) &&
5841 		        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5842 			ctxt->sax->entityDecl(ctxt->userData, name,
5843 				    XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5844 				    literal, URI, NULL);
5845 		    /*
5846 		     * For expat compatibility in SAX mode.
5847 		     * assuming the entity replacement was asked for
5848 		     */
5849 		    if ((ctxt->replaceEntities != 0) &&
5850 			((ctxt->myDoc == NULL) ||
5851 			(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5852 			if (ctxt->myDoc == NULL) {
5853 			    ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5854 			    if (ctxt->myDoc == NULL) {
5855 			        xmlErrMemory(ctxt);
5856 				goto done;
5857 			    }
5858 			    ctxt->myDoc->properties = XML_DOC_INTERNAL;
5859 			}
5860 
5861 			if (ctxt->myDoc->intSubset == NULL) {
5862 			    ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5863 						BAD_CAST "fake", NULL, NULL);
5864                             if (ctxt->myDoc->intSubset == NULL) {
5865                                 xmlErrMemory(ctxt);
5866                                 goto done;
5867                             }
5868                         }
5869 			xmlSAX2EntityDecl(ctxt, name,
5870 				          XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5871 				          literal, URI, NULL);
5872 		    }
5873 		}
5874 	    }
5875 	}
5876 	SKIP_BLANKS_PE;
5877 	if (RAW != '>') {
5878 	    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5879 	            "xmlParseEntityDecl: entity %s not terminated\n", name);
5880 	    xmlHaltParser(ctxt);
5881 	} else {
5882 	    if (inputid != ctxt->input->id) {
5883 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5884 	                       "Entity declaration doesn't start and stop in"
5885                                " the same entity\n");
5886 	    }
5887 	    NEXT;
5888 	}
5889 	if (orig != NULL) {
5890 	    /*
5891 	     * Ugly mechanism to save the raw entity value.
5892 	     */
5893 	    xmlEntityPtr cur = NULL;
5894 
5895 	    if (isParameter) {
5896 	        if ((ctxt->sax != NULL) &&
5897 		    (ctxt->sax->getParameterEntity != NULL))
5898 		    cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5899 	    } else {
5900 	        if ((ctxt->sax != NULL) &&
5901 		    (ctxt->sax->getEntity != NULL))
5902 		    cur = ctxt->sax->getEntity(ctxt->userData, name);
5903 		if ((cur == NULL) && (ctxt->userData==ctxt)) {
5904 		    cur = xmlSAX2GetEntity(ctxt, name);
5905 		}
5906 	    }
5907             if ((cur != NULL) && (cur->orig == NULL)) {
5908 		cur->orig = orig;
5909                 orig = NULL;
5910 	    }
5911 	}
5912 
5913 done:
5914 	if (value != NULL) xmlFree(value);
5915 	if (URI != NULL) xmlFree(URI);
5916 	if (literal != NULL) xmlFree(literal);
5917         if (orig != NULL) xmlFree(orig);
5918     }
5919 }
5920 
5921 /**
5922  * xmlParseDefaultDecl:
5923  * @ctxt:  an XML parser context
5924  * @value:  Receive a possible fixed default value for the attribute
5925  *
5926  * DEPRECATED: Internal function, don't use.
5927  *
5928  * Parse an attribute default declaration
5929  *
5930  * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5931  *
5932  * [ VC: Required Attribute ]
5933  * if the default declaration is the keyword #REQUIRED, then the
5934  * attribute must be specified for all elements of the type in the
5935  * attribute-list declaration.
5936  *
5937  * [ VC: Attribute Default Legal ]
5938  * The declared default value must meet the lexical constraints of
5939  * the declared attribute type c.f. xmlValidateAttributeDecl()
5940  *
5941  * [ VC: Fixed Attribute Default ]
5942  * if an attribute has a default value declared with the #FIXED
5943  * keyword, instances of that attribute must match the default value.
5944  *
5945  * [ WFC: No < in Attribute Values ]
5946  * handled in xmlParseAttValue()
5947  *
5948  * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5949  *          or XML_ATTRIBUTE_FIXED.
5950  */
5951 
5952 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5953 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5954     int val;
5955     xmlChar *ret;
5956 
5957     *value = NULL;
5958     if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5959 	SKIP(9);
5960 	return(XML_ATTRIBUTE_REQUIRED);
5961     }
5962     if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5963 	SKIP(8);
5964 	return(XML_ATTRIBUTE_IMPLIED);
5965     }
5966     val = XML_ATTRIBUTE_NONE;
5967     if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5968 	SKIP(6);
5969 	val = XML_ATTRIBUTE_FIXED;
5970 	if (SKIP_BLANKS_PE == 0) {
5971 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5972 			   "Space required after '#FIXED'\n");
5973 	}
5974     }
5975     ret = xmlParseAttValue(ctxt);
5976     if (ret == NULL) {
5977 	xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5978 		       "Attribute default value declaration error\n");
5979     } else
5980         *value = ret;
5981     return(val);
5982 }
5983 
5984 /**
5985  * xmlParseNotationType:
5986  * @ctxt:  an XML parser context
5987  *
5988  * DEPRECATED: Internal function, don't use.
5989  *
5990  * parse an Notation attribute type.
5991  *
5992  * Note: the leading 'NOTATION' S part has already being parsed...
5993  *
5994  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5995  *
5996  * [ VC: Notation Attributes ]
5997  * Values of this type must match one of the notation names included
5998  * in the declaration; all notation names in the declaration must be declared.
5999  *
6000  * Returns: the notation attribute tree built while parsing
6001  */
6002 
6003 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)6004 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
6005     const xmlChar *name;
6006     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6007 
6008     if (RAW != '(') {
6009 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
6010 	return(NULL);
6011     }
6012     do {
6013         NEXT;
6014 	SKIP_BLANKS_PE;
6015         name = xmlParseName(ctxt);
6016 	if (name == NULL) {
6017 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6018 			   "Name expected in NOTATION declaration\n");
6019             xmlFreeEnumeration(ret);
6020 	    return(NULL);
6021 	}
6022 	tmp = ret;
6023 	while (tmp != NULL) {
6024 	    if (xmlStrEqual(name, tmp->name)) {
6025 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6026 	  "standalone: attribute notation value token %s duplicated\n",
6027 				 name, NULL);
6028 		if (!xmlDictOwns(ctxt->dict, name))
6029 		    xmlFree((xmlChar *) name);
6030 		break;
6031 	    }
6032 	    tmp = tmp->next;
6033 	}
6034 	if (tmp == NULL) {
6035 	    cur = xmlCreateEnumeration(name);
6036 	    if (cur == NULL) {
6037                 xmlErrMemory(ctxt);
6038                 xmlFreeEnumeration(ret);
6039                 return(NULL);
6040             }
6041 	    if (last == NULL) ret = last = cur;
6042 	    else {
6043 		last->next = cur;
6044 		last = cur;
6045 	    }
6046 	}
6047 	SKIP_BLANKS_PE;
6048     } while (RAW == '|');
6049     if (RAW != ')') {
6050 	xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6051         xmlFreeEnumeration(ret);
6052 	return(NULL);
6053     }
6054     NEXT;
6055     return(ret);
6056 }
6057 
6058 /**
6059  * xmlParseEnumerationType:
6060  * @ctxt:  an XML parser context
6061  *
6062  * DEPRECATED: Internal function, don't use.
6063  *
6064  * parse an Enumeration attribute type.
6065  *
6066  * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6067  *
6068  * [ VC: Enumeration ]
6069  * Values of this type must match one of the Nmtoken tokens in
6070  * the declaration
6071  *
6072  * Returns: the enumeration attribute tree built while parsing
6073  */
6074 
6075 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)6076 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6077     xmlChar *name;
6078     xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6079 
6080     if (RAW != '(') {
6081 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6082 	return(NULL);
6083     }
6084     do {
6085         NEXT;
6086 	SKIP_BLANKS_PE;
6087         name = xmlParseNmtoken(ctxt);
6088 	if (name == NULL) {
6089 	    xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6090 	    return(ret);
6091 	}
6092 	tmp = ret;
6093 	while (tmp != NULL) {
6094 	    if (xmlStrEqual(name, tmp->name)) {
6095 		xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6096 	  "standalone: attribute enumeration value token %s duplicated\n",
6097 				 name, NULL);
6098 		if (!xmlDictOwns(ctxt->dict, name))
6099 		    xmlFree(name);
6100 		break;
6101 	    }
6102 	    tmp = tmp->next;
6103 	}
6104 	if (tmp == NULL) {
6105 	    cur = xmlCreateEnumeration(name);
6106 	    if (!xmlDictOwns(ctxt->dict, name))
6107 		xmlFree(name);
6108 	    if (cur == NULL) {
6109                 xmlErrMemory(ctxt);
6110                 xmlFreeEnumeration(ret);
6111                 return(NULL);
6112             }
6113 	    if (last == NULL) ret = last = cur;
6114 	    else {
6115 		last->next = cur;
6116 		last = cur;
6117 	    }
6118 	}
6119 	SKIP_BLANKS_PE;
6120     } while (RAW == '|');
6121     if (RAW != ')') {
6122 	xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6123 	return(ret);
6124     }
6125     NEXT;
6126     return(ret);
6127 }
6128 
6129 /**
6130  * xmlParseEnumeratedType:
6131  * @ctxt:  an XML parser context
6132  * @tree:  the enumeration tree built while parsing
6133  *
6134  * DEPRECATED: Internal function, don't use.
6135  *
6136  * parse an Enumerated attribute type.
6137  *
6138  * [57] EnumeratedType ::= NotationType | Enumeration
6139  *
6140  * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6141  *
6142  *
6143  * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6144  */
6145 
6146 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6147 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6148     if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6149 	SKIP(8);
6150 	if (SKIP_BLANKS_PE == 0) {
6151 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6152 			   "Space required after 'NOTATION'\n");
6153 	    return(0);
6154 	}
6155 	*tree = xmlParseNotationType(ctxt);
6156 	if (*tree == NULL) return(0);
6157 	return(XML_ATTRIBUTE_NOTATION);
6158     }
6159     *tree = xmlParseEnumerationType(ctxt);
6160     if (*tree == NULL) return(0);
6161     return(XML_ATTRIBUTE_ENUMERATION);
6162 }
6163 
6164 /**
6165  * xmlParseAttributeType:
6166  * @ctxt:  an XML parser context
6167  * @tree:  the enumeration tree built while parsing
6168  *
6169  * DEPRECATED: Internal function, don't use.
6170  *
6171  * parse the Attribute list def for an element
6172  *
6173  * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6174  *
6175  * [55] StringType ::= 'CDATA'
6176  *
6177  * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6178  *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6179  *
6180  * Validity constraints for attribute values syntax are checked in
6181  * xmlValidateAttributeValue()
6182  *
6183  * [ VC: ID ]
6184  * Values of type ID must match the Name production. A name must not
6185  * appear more than once in an XML document as a value of this type;
6186  * i.e., ID values must uniquely identify the elements which bear them.
6187  *
6188  * [ VC: One ID per Element Type ]
6189  * No element type may have more than one ID attribute specified.
6190  *
6191  * [ VC: ID Attribute Default ]
6192  * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6193  *
6194  * [ VC: IDREF ]
6195  * Values of type IDREF must match the Name production, and values
6196  * of type IDREFS must match Names; each IDREF Name must match the value
6197  * of an ID attribute on some element in the XML document; i.e. IDREF
6198  * values must match the value of some ID attribute.
6199  *
6200  * [ VC: Entity Name ]
6201  * Values of type ENTITY must match the Name production, values
6202  * of type ENTITIES must match Names; each Entity Name must match the
6203  * name of an unparsed entity declared in the DTD.
6204  *
6205  * [ VC: Name Token ]
6206  * Values of type NMTOKEN must match the Nmtoken production; values
6207  * of type NMTOKENS must match Nmtokens.
6208  *
6209  * Returns the attribute type
6210  */
6211 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6212 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6213     if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6214 	SKIP(5);
6215 	return(XML_ATTRIBUTE_CDATA);
6216      } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6217 	SKIP(6);
6218 	return(XML_ATTRIBUTE_IDREFS);
6219      } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6220 	SKIP(5);
6221 	return(XML_ATTRIBUTE_IDREF);
6222      } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6223         SKIP(2);
6224 	return(XML_ATTRIBUTE_ID);
6225      } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6226 	SKIP(6);
6227 	return(XML_ATTRIBUTE_ENTITY);
6228      } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6229 	SKIP(8);
6230 	return(XML_ATTRIBUTE_ENTITIES);
6231      } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6232 	SKIP(8);
6233 	return(XML_ATTRIBUTE_NMTOKENS);
6234      } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6235 	SKIP(7);
6236 	return(XML_ATTRIBUTE_NMTOKEN);
6237      }
6238      return(xmlParseEnumeratedType(ctxt, tree));
6239 }
6240 
6241 /**
6242  * xmlParseAttributeListDecl:
6243  * @ctxt:  an XML parser context
6244  *
6245  * DEPRECATED: Internal function, don't use.
6246  *
6247  * Parse an attribute list declaration for an element. Always consumes '<!'.
6248  *
6249  * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6250  *
6251  * [53] AttDef ::= S Name S AttType S DefaultDecl
6252  *
6253  */
6254 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6255 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6256     const xmlChar *elemName;
6257     const xmlChar *attrName;
6258     xmlEnumerationPtr tree;
6259 
6260     if ((CUR != '<') || (NXT(1) != '!'))
6261         return;
6262     SKIP(2);
6263 
6264     if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6265 	int inputid = ctxt->input->id;
6266 
6267 	SKIP(7);
6268 	if (SKIP_BLANKS_PE == 0) {
6269 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6270 		                 "Space required after '<!ATTLIST'\n");
6271 	}
6272         elemName = xmlParseName(ctxt);
6273 	if (elemName == NULL) {
6274 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6275 			   "ATTLIST: no name for Element\n");
6276 	    return;
6277 	}
6278 	SKIP_BLANKS_PE;
6279 	GROW;
6280 	while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6281 	    int type;
6282 	    int def;
6283 	    xmlChar *defaultValue = NULL;
6284 
6285 	    GROW;
6286             tree = NULL;
6287 	    attrName = xmlParseName(ctxt);
6288 	    if (attrName == NULL) {
6289 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6290 			       "ATTLIST: no name for Attribute\n");
6291 		break;
6292 	    }
6293 	    GROW;
6294 	    if (SKIP_BLANKS_PE == 0) {
6295 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6296 		        "Space required after the attribute name\n");
6297 		break;
6298 	    }
6299 
6300 	    type = xmlParseAttributeType(ctxt, &tree);
6301 	    if (type <= 0) {
6302 	        break;
6303 	    }
6304 
6305 	    GROW;
6306 	    if (SKIP_BLANKS_PE == 0) {
6307 		xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6308 			       "Space required after the attribute type\n");
6309 	        if (tree != NULL)
6310 		    xmlFreeEnumeration(tree);
6311 		break;
6312 	    }
6313 
6314 	    def = xmlParseDefaultDecl(ctxt, &defaultValue);
6315 	    if (def <= 0) {
6316                 if (defaultValue != NULL)
6317 		    xmlFree(defaultValue);
6318 	        if (tree != NULL)
6319 		    xmlFreeEnumeration(tree);
6320 	        break;
6321 	    }
6322 	    if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6323 	        xmlAttrNormalizeSpace(defaultValue, defaultValue);
6324 
6325 	    GROW;
6326             if (RAW != '>') {
6327 		if (SKIP_BLANKS_PE == 0) {
6328 		    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6329 			"Space required after the attribute default value\n");
6330 		    if (defaultValue != NULL)
6331 			xmlFree(defaultValue);
6332 		    if (tree != NULL)
6333 			xmlFreeEnumeration(tree);
6334 		    break;
6335 		}
6336 	    }
6337 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6338 		(ctxt->sax->attributeDecl != NULL))
6339 		ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6340 	                        type, def, defaultValue, tree);
6341 	    else if (tree != NULL)
6342 		xmlFreeEnumeration(tree);
6343 
6344 	    if ((ctxt->sax2) && (defaultValue != NULL) &&
6345 	        (def != XML_ATTRIBUTE_IMPLIED) &&
6346 		(def != XML_ATTRIBUTE_REQUIRED)) {
6347 		xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6348 	    }
6349 	    if (ctxt->sax2) {
6350 		xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6351 	    }
6352 	    if (defaultValue != NULL)
6353 	        xmlFree(defaultValue);
6354 	    GROW;
6355 	}
6356 	if (RAW == '>') {
6357 	    if (inputid != ctxt->input->id) {
6358 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6359                                "Attribute list declaration doesn't start and"
6360                                " stop in the same entity\n");
6361 	    }
6362 	    NEXT;
6363 	}
6364     }
6365 }
6366 
6367 /**
6368  * xmlParseElementMixedContentDecl:
6369  * @ctxt:  an XML parser context
6370  * @inputchk:  the input used for the current entity, needed for boundary checks
6371  *
6372  * DEPRECATED: Internal function, don't use.
6373  *
6374  * parse the declaration for a Mixed Element content
6375  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6376  *
6377  * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6378  *                '(' S? '#PCDATA' S? ')'
6379  *
6380  * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6381  *
6382  * [ VC: No Duplicate Types ]
6383  * The same name must not appear more than once in a single
6384  * mixed-content declaration.
6385  *
6386  * returns: the list of the xmlElementContentPtr describing the element choices
6387  */
6388 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6389 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6390     xmlElementContentPtr ret = NULL, cur = NULL, n;
6391     const xmlChar *elem = NULL;
6392 
6393     GROW;
6394     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6395 	SKIP(7);
6396 	SKIP_BLANKS_PE;
6397 	if (RAW == ')') {
6398 	    if (ctxt->input->id != inputchk) {
6399 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6400                                "Element content declaration doesn't start and"
6401                                " stop in the same entity\n");
6402 	    }
6403 	    NEXT;
6404 	    ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6405 	    if (ret == NULL)
6406                 goto mem_error;
6407 	    if (RAW == '*') {
6408 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6409 		NEXT;
6410 	    }
6411 	    return(ret);
6412 	}
6413 	if ((RAW == '(') || (RAW == '|')) {
6414 	    ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6415 	    if (ret == NULL)
6416                 goto mem_error;
6417 	}
6418 	while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6419 	    NEXT;
6420             n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6421             if (n == NULL)
6422                 goto mem_error;
6423 	    if (elem == NULL) {
6424 		n->c1 = cur;
6425 		if (cur != NULL)
6426 		    cur->parent = n;
6427 		ret = cur = n;
6428 	    } else {
6429 	        cur->c2 = n;
6430 		n->parent = cur;
6431 		n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6432                 if (n->c1 == NULL)
6433                     goto mem_error;
6434 		n->c1->parent = n;
6435 		cur = n;
6436 	    }
6437 	    SKIP_BLANKS_PE;
6438 	    elem = xmlParseName(ctxt);
6439 	    if (elem == NULL) {
6440 		xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6441 			"xmlParseElementMixedContentDecl : Name expected\n");
6442 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6443 		return(NULL);
6444 	    }
6445 	    SKIP_BLANKS_PE;
6446 	    GROW;
6447 	}
6448 	if ((RAW == ')') && (NXT(1) == '*')) {
6449 	    if (elem != NULL) {
6450 		cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6451 		                               XML_ELEMENT_CONTENT_ELEMENT);
6452 		if (cur->c2 == NULL)
6453                     goto mem_error;
6454 		cur->c2->parent = cur;
6455             }
6456             if (ret != NULL)
6457                 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6458 	    if (ctxt->input->id != inputchk) {
6459 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6460                                "Element content declaration doesn't start and"
6461                                " stop in the same entity\n");
6462 	    }
6463 	    SKIP(2);
6464 	} else {
6465 	    xmlFreeDocElementContent(ctxt->myDoc, ret);
6466 	    xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6467 	    return(NULL);
6468 	}
6469 
6470     } else {
6471 	xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6472     }
6473     return(ret);
6474 
6475 mem_error:
6476     xmlErrMemory(ctxt);
6477     xmlFreeDocElementContent(ctxt->myDoc, ret);
6478     return(NULL);
6479 }
6480 
6481 /**
6482  * xmlParseElementChildrenContentDeclPriv:
6483  * @ctxt:  an XML parser context
6484  * @inputchk:  the input used for the current entity, needed for boundary checks
6485  * @depth: the level of recursion
6486  *
6487  * parse the declaration for a Mixed Element content
6488  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6489  *
6490  *
6491  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6492  *
6493  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6494  *
6495  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6496  *
6497  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6498  *
6499  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6500  * TODO Parameter-entity replacement text must be properly nested
6501  *	with parenthesized groups. That is to say, if either of the
6502  *	opening or closing parentheses in a choice, seq, or Mixed
6503  *	construct is contained in the replacement text for a parameter
6504  *	entity, both must be contained in the same replacement text. For
6505  *	interoperability, if a parameter-entity reference appears in a
6506  *	choice, seq, or Mixed construct, its replacement text should not
6507  *	be empty, and neither the first nor last non-blank character of
6508  *	the replacement text should be a connector (| or ,).
6509  *
6510  * Returns the tree of xmlElementContentPtr describing the element
6511  *          hierarchy.
6512  */
6513 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6514 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6515                                        int depth) {
6516     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6517     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6518     const xmlChar *elem;
6519     xmlChar type = 0;
6520 
6521     if (depth > maxDepth) {
6522         xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6523                 "xmlParseElementChildrenContentDecl : depth %d too deep, "
6524                 "use XML_PARSE_HUGE\n", depth);
6525 	return(NULL);
6526     }
6527     SKIP_BLANKS_PE;
6528     GROW;
6529     if (RAW == '(') {
6530 	int inputid = ctxt->input->id;
6531 
6532         /* Recurse on first child */
6533 	NEXT;
6534 	SKIP_BLANKS_PE;
6535         cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6536                                                            depth + 1);
6537         if (cur == NULL)
6538             return(NULL);
6539 	SKIP_BLANKS_PE;
6540 	GROW;
6541     } else {
6542 	elem = xmlParseName(ctxt);
6543 	if (elem == NULL) {
6544 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6545 	    return(NULL);
6546 	}
6547         cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6548 	if (cur == NULL) {
6549 	    xmlErrMemory(ctxt);
6550 	    return(NULL);
6551 	}
6552 	GROW;
6553 	if (RAW == '?') {
6554 	    cur->ocur = XML_ELEMENT_CONTENT_OPT;
6555 	    NEXT;
6556 	} else if (RAW == '*') {
6557 	    cur->ocur = XML_ELEMENT_CONTENT_MULT;
6558 	    NEXT;
6559 	} else if (RAW == '+') {
6560 	    cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6561 	    NEXT;
6562 	} else {
6563 	    cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6564 	}
6565 	GROW;
6566     }
6567     SKIP_BLANKS_PE;
6568     while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6569         /*
6570 	 * Each loop we parse one separator and one element.
6571 	 */
6572         if (RAW == ',') {
6573 	    if (type == 0) type = CUR;
6574 
6575 	    /*
6576 	     * Detect "Name | Name , Name" error
6577 	     */
6578 	    else if (type != CUR) {
6579 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6580 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6581 		                  type);
6582 		if ((last != NULL) && (last != ret))
6583 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6584 		if (ret != NULL)
6585 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6586 		return(NULL);
6587 	    }
6588 	    NEXT;
6589 
6590 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6591 	    if (op == NULL) {
6592                 xmlErrMemory(ctxt);
6593 		if ((last != NULL) && (last != ret))
6594 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6595 	        xmlFreeDocElementContent(ctxt->myDoc, ret);
6596 		return(NULL);
6597 	    }
6598 	    if (last == NULL) {
6599 		op->c1 = ret;
6600 		if (ret != NULL)
6601 		    ret->parent = op;
6602 		ret = cur = op;
6603 	    } else {
6604 	        cur->c2 = op;
6605 		if (op != NULL)
6606 		    op->parent = cur;
6607 		op->c1 = last;
6608 		if (last != NULL)
6609 		    last->parent = op;
6610 		cur =op;
6611 		last = NULL;
6612 	    }
6613 	} else if (RAW == '|') {
6614 	    if (type == 0) type = CUR;
6615 
6616 	    /*
6617 	     * Detect "Name , Name | Name" error
6618 	     */
6619 	    else if (type != CUR) {
6620 		xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6621 		    "xmlParseElementChildrenContentDecl : '%c' expected\n",
6622 				  type);
6623 		if ((last != NULL) && (last != ret))
6624 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6625 		if (ret != NULL)
6626 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6627 		return(NULL);
6628 	    }
6629 	    NEXT;
6630 
6631 	    op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6632 	    if (op == NULL) {
6633                 xmlErrMemory(ctxt);
6634 		if ((last != NULL) && (last != ret))
6635 		    xmlFreeDocElementContent(ctxt->myDoc, last);
6636 		if (ret != NULL)
6637 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6638 		return(NULL);
6639 	    }
6640 	    if (last == NULL) {
6641 		op->c1 = ret;
6642 		if (ret != NULL)
6643 		    ret->parent = op;
6644 		ret = cur = op;
6645 	    } else {
6646 	        cur->c2 = op;
6647 		if (op != NULL)
6648 		    op->parent = cur;
6649 		op->c1 = last;
6650 		if (last != NULL)
6651 		    last->parent = op;
6652 		cur =op;
6653 		last = NULL;
6654 	    }
6655 	} else {
6656 	    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6657 	    if ((last != NULL) && (last != ret))
6658 	        xmlFreeDocElementContent(ctxt->myDoc, last);
6659 	    if (ret != NULL)
6660 		xmlFreeDocElementContent(ctxt->myDoc, ret);
6661 	    return(NULL);
6662 	}
6663 	GROW;
6664 	SKIP_BLANKS_PE;
6665 	GROW;
6666 	if (RAW == '(') {
6667 	    int inputid = ctxt->input->id;
6668 	    /* Recurse on second child */
6669 	    NEXT;
6670 	    SKIP_BLANKS_PE;
6671 	    last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6672                                                           depth + 1);
6673             if (last == NULL) {
6674 		if (ret != NULL)
6675 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6676 		return(NULL);
6677             }
6678 	    SKIP_BLANKS_PE;
6679 	} else {
6680 	    elem = xmlParseName(ctxt);
6681 	    if (elem == NULL) {
6682 		xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6683 		if (ret != NULL)
6684 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6685 		return(NULL);
6686 	    }
6687 	    last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6688 	    if (last == NULL) {
6689                 xmlErrMemory(ctxt);
6690 		if (ret != NULL)
6691 		    xmlFreeDocElementContent(ctxt->myDoc, ret);
6692 		return(NULL);
6693 	    }
6694 	    if (RAW == '?') {
6695 		last->ocur = XML_ELEMENT_CONTENT_OPT;
6696 		NEXT;
6697 	    } else if (RAW == '*') {
6698 		last->ocur = XML_ELEMENT_CONTENT_MULT;
6699 		NEXT;
6700 	    } else if (RAW == '+') {
6701 		last->ocur = XML_ELEMENT_CONTENT_PLUS;
6702 		NEXT;
6703 	    } else {
6704 		last->ocur = XML_ELEMENT_CONTENT_ONCE;
6705 	    }
6706 	}
6707 	SKIP_BLANKS_PE;
6708 	GROW;
6709     }
6710     if ((cur != NULL) && (last != NULL)) {
6711         cur->c2 = last;
6712 	if (last != NULL)
6713 	    last->parent = cur;
6714     }
6715     if (ctxt->input->id != inputchk) {
6716 	xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6717                        "Element content declaration doesn't start and stop in"
6718                        " the same entity\n");
6719     }
6720     NEXT;
6721     if (RAW == '?') {
6722 	if (ret != NULL) {
6723 	    if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6724 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6725 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6726 	    else
6727 	        ret->ocur = XML_ELEMENT_CONTENT_OPT;
6728 	}
6729 	NEXT;
6730     } else if (RAW == '*') {
6731 	if (ret != NULL) {
6732 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6733 	    cur = ret;
6734 	    /*
6735 	     * Some normalization:
6736 	     * (a | b* | c?)* == (a | b | c)*
6737 	     */
6738 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6739 		if ((cur->c1 != NULL) &&
6740 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6741 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6742 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6743 		if ((cur->c2 != NULL) &&
6744 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6745 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6746 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6747 		cur = cur->c2;
6748 	    }
6749 	}
6750 	NEXT;
6751     } else if (RAW == '+') {
6752 	if (ret != NULL) {
6753 	    int found = 0;
6754 
6755 	    if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6756 	        (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6757 	        ret->ocur = XML_ELEMENT_CONTENT_MULT;
6758 	    else
6759 	        ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6760 	    /*
6761 	     * Some normalization:
6762 	     * (a | b*)+ == (a | b)*
6763 	     * (a | b?)+ == (a | b)*
6764 	     */
6765 	    while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6766 		if ((cur->c1 != NULL) &&
6767 	            ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6768 		     (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6769 		    cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6770 		    found = 1;
6771 		}
6772 		if ((cur->c2 != NULL) &&
6773 	            ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6774 		     (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6775 		    cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6776 		    found = 1;
6777 		}
6778 		cur = cur->c2;
6779 	    }
6780 	    if (found)
6781 		ret->ocur = XML_ELEMENT_CONTENT_MULT;
6782 	}
6783 	NEXT;
6784     }
6785     return(ret);
6786 }
6787 
6788 /**
6789  * xmlParseElementChildrenContentDecl:
6790  * @ctxt:  an XML parser context
6791  * @inputchk:  the input used for the current entity, needed for boundary checks
6792  *
6793  * DEPRECATED: Internal function, don't use.
6794  *
6795  * parse the declaration for a Mixed Element content
6796  * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6797  *
6798  * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6799  *
6800  * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6801  *
6802  * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6803  *
6804  * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6805  *
6806  * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6807  * TODO Parameter-entity replacement text must be properly nested
6808  *	with parenthesized groups. That is to say, if either of the
6809  *	opening or closing parentheses in a choice, seq, or Mixed
6810  *	construct is contained in the replacement text for a parameter
6811  *	entity, both must be contained in the same replacement text. For
6812  *	interoperability, if a parameter-entity reference appears in a
6813  *	choice, seq, or Mixed construct, its replacement text should not
6814  *	be empty, and neither the first nor last non-blank character of
6815  *	the replacement text should be a connector (| or ,).
6816  *
6817  * Returns the tree of xmlElementContentPtr describing the element
6818  *          hierarchy.
6819  */
6820 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6821 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6822     /* stub left for API/ABI compat */
6823     return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6824 }
6825 
6826 /**
6827  * xmlParseElementContentDecl:
6828  * @ctxt:  an XML parser context
6829  * @name:  the name of the element being defined.
6830  * @result:  the Element Content pointer will be stored here if any
6831  *
6832  * DEPRECATED: Internal function, don't use.
6833  *
6834  * parse the declaration for an Element content either Mixed or Children,
6835  * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6836  *
6837  * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6838  *
6839  * returns: the type of element content XML_ELEMENT_TYPE_xxx
6840  */
6841 
6842 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6843 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6844                            xmlElementContentPtr *result) {
6845 
6846     xmlElementContentPtr tree = NULL;
6847     int inputid = ctxt->input->id;
6848     int res;
6849 
6850     *result = NULL;
6851 
6852     if (RAW != '(') {
6853 	xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6854 		"xmlParseElementContentDecl : %s '(' expected\n", name);
6855 	return(-1);
6856     }
6857     NEXT;
6858     GROW;
6859     SKIP_BLANKS_PE;
6860     if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6861         tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6862 	res = XML_ELEMENT_TYPE_MIXED;
6863     } else {
6864         tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6865 	res = XML_ELEMENT_TYPE_ELEMENT;
6866     }
6867     SKIP_BLANKS_PE;
6868     *result = tree;
6869     return(res);
6870 }
6871 
6872 /**
6873  * xmlParseElementDecl:
6874  * @ctxt:  an XML parser context
6875  *
6876  * DEPRECATED: Internal function, don't use.
6877  *
6878  * Parse an element declaration. Always consumes '<!'.
6879  *
6880  * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6881  *
6882  * [ VC: Unique Element Type Declaration ]
6883  * No element type may be declared more than once
6884  *
6885  * Returns the type of the element, or -1 in case of error
6886  */
6887 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6888 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6889     const xmlChar *name;
6890     int ret = -1;
6891     xmlElementContentPtr content  = NULL;
6892 
6893     if ((CUR != '<') || (NXT(1) != '!'))
6894         return(ret);
6895     SKIP(2);
6896 
6897     /* GROW; done in the caller */
6898     if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6899 	int inputid = ctxt->input->id;
6900 
6901 	SKIP(7);
6902 	if (SKIP_BLANKS_PE == 0) {
6903 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6904 		           "Space required after 'ELEMENT'\n");
6905 	    return(-1);
6906 	}
6907         name = xmlParseName(ctxt);
6908 	if (name == NULL) {
6909 	    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6910 			   "xmlParseElementDecl: no name for Element\n");
6911 	    return(-1);
6912 	}
6913 	if (SKIP_BLANKS_PE == 0) {
6914 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6915 			   "Space required after the element name\n");
6916 	}
6917 	if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6918 	    SKIP(5);
6919 	    /*
6920 	     * Element must always be empty.
6921 	     */
6922 	    ret = XML_ELEMENT_TYPE_EMPTY;
6923 	} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6924 	           (NXT(2) == 'Y')) {
6925 	    SKIP(3);
6926 	    /*
6927 	     * Element is a generic container.
6928 	     */
6929 	    ret = XML_ELEMENT_TYPE_ANY;
6930 	} else if (RAW == '(') {
6931 	    ret = xmlParseElementContentDecl(ctxt, name, &content);
6932 	} else {
6933 	    /*
6934 	     * [ WFC: PEs in Internal Subset ] error handling.
6935 	     */
6936             xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6937                   "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6938 	    return(-1);
6939 	}
6940 
6941 	SKIP_BLANKS_PE;
6942 
6943 	if (RAW != '>') {
6944 	    xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6945 	    if (content != NULL) {
6946 		xmlFreeDocElementContent(ctxt->myDoc, content);
6947 	    }
6948 	} else {
6949 	    if (inputid != ctxt->input->id) {
6950 		xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6951                                "Element declaration doesn't start and stop in"
6952                                " the same entity\n");
6953 	    }
6954 
6955 	    NEXT;
6956 	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6957 		(ctxt->sax->elementDecl != NULL)) {
6958 		if (content != NULL)
6959 		    content->parent = NULL;
6960 	        ctxt->sax->elementDecl(ctxt->userData, name, ret,
6961 		                       content);
6962 		if ((content != NULL) && (content->parent == NULL)) {
6963 		    /*
6964 		     * this is a trick: if xmlAddElementDecl is called,
6965 		     * instead of copying the full tree it is plugged directly
6966 		     * if called from the parser. Avoid duplicating the
6967 		     * interfaces or change the API/ABI
6968 		     */
6969 		    xmlFreeDocElementContent(ctxt->myDoc, content);
6970 		}
6971 	    } else if (content != NULL) {
6972 		xmlFreeDocElementContent(ctxt->myDoc, content);
6973 	    }
6974 	}
6975     }
6976     return(ret);
6977 }
6978 
6979 /**
6980  * xmlParseConditionalSections
6981  * @ctxt:  an XML parser context
6982  *
6983  * Parse a conditional section. Always consumes '<!['.
6984  *
6985  * [61] conditionalSect ::= includeSect | ignoreSect
6986  * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6987  * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6988  * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6989  * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6990  */
6991 
6992 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6993 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6994     int *inputIds = NULL;
6995     size_t inputIdsSize = 0;
6996     size_t depth = 0;
6997 
6998     while (PARSER_STOPPED(ctxt) == 0) {
6999         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7000             int id = ctxt->input->id;
7001 
7002             SKIP(3);
7003             SKIP_BLANKS_PE;
7004 
7005             if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7006                 SKIP(7);
7007                 SKIP_BLANKS_PE;
7008                 if (RAW != '[') {
7009                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7010                     xmlHaltParser(ctxt);
7011                     goto error;
7012                 }
7013                 if (ctxt->input->id != id) {
7014                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7015                                    "All markup of the conditional section is"
7016                                    " not in the same entity\n");
7017                 }
7018                 NEXT;
7019 
7020                 if (inputIdsSize <= depth) {
7021                     int *tmp;
7022 
7023                     inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
7024                     tmp = (int *) xmlRealloc(inputIds,
7025                             inputIdsSize * sizeof(int));
7026                     if (tmp == NULL) {
7027                         xmlErrMemory(ctxt);
7028                         goto error;
7029                     }
7030                     inputIds = tmp;
7031                 }
7032                 inputIds[depth] = id;
7033                 depth++;
7034             } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7035                 size_t ignoreDepth = 0;
7036 
7037                 SKIP(6);
7038                 SKIP_BLANKS_PE;
7039                 if (RAW != '[') {
7040                     xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7041                     xmlHaltParser(ctxt);
7042                     goto error;
7043                 }
7044                 if (ctxt->input->id != id) {
7045                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7046                                    "All markup of the conditional section is"
7047                                    " not in the same entity\n");
7048                 }
7049                 NEXT;
7050 
7051                 while (PARSER_STOPPED(ctxt) == 0) {
7052                     if (RAW == 0) {
7053                         xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7054                         goto error;
7055                     }
7056                     if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7057                         SKIP(3);
7058                         ignoreDepth++;
7059                         /* Check for integer overflow */
7060                         if (ignoreDepth == 0) {
7061                             xmlErrMemory(ctxt);
7062                             goto error;
7063                         }
7064                     } else if ((RAW == ']') && (NXT(1) == ']') &&
7065                                (NXT(2) == '>')) {
7066                         SKIP(3);
7067                         if (ignoreDepth == 0)
7068                             break;
7069                         ignoreDepth--;
7070                     } else {
7071                         NEXT;
7072                     }
7073                 }
7074 
7075                 if (ctxt->input->id != id) {
7076                     xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7077                                    "All markup of the conditional section is"
7078                                    " not in the same entity\n");
7079                 }
7080             } else {
7081                 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7082                 xmlHaltParser(ctxt);
7083                 goto error;
7084             }
7085         } else if ((depth > 0) &&
7086                    (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7087             depth--;
7088             if (ctxt->input->id != inputIds[depth]) {
7089                 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7090                                "All markup of the conditional section is not"
7091                                " in the same entity\n");
7092             }
7093             SKIP(3);
7094         } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7095             xmlParseMarkupDecl(ctxt);
7096         } else {
7097             xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7098             xmlHaltParser(ctxt);
7099             goto error;
7100         }
7101 
7102         if (depth == 0)
7103             break;
7104 
7105         SKIP_BLANKS_PE;
7106         SHRINK;
7107         GROW;
7108     }
7109 
7110 error:
7111     xmlFree(inputIds);
7112 }
7113 
7114 /**
7115  * xmlParseMarkupDecl:
7116  * @ctxt:  an XML parser context
7117  *
7118  * DEPRECATED: Internal function, don't use.
7119  *
7120  * Parse markup declarations. Always consumes '<!' or '<?'.
7121  *
7122  * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7123  *                     NotationDecl | PI | Comment
7124  *
7125  * [ VC: Proper Declaration/PE Nesting ]
7126  * Parameter-entity replacement text must be properly nested with
7127  * markup declarations. That is to say, if either the first character
7128  * or the last character of a markup declaration (markupdecl above) is
7129  * contained in the replacement text for a parameter-entity reference,
7130  * both must be contained in the same replacement text.
7131  *
7132  * [ WFC: PEs in Internal Subset ]
7133  * In the internal DTD subset, parameter-entity references can occur
7134  * only where markup declarations can occur, not within markup declarations.
7135  * (This does not apply to references that occur in external parameter
7136  * entities or to the external subset.)
7137  */
7138 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)7139 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7140     GROW;
7141     if (CUR == '<') {
7142         if (NXT(1) == '!') {
7143 	    switch (NXT(2)) {
7144 	        case 'E':
7145 		    if (NXT(3) == 'L')
7146 			xmlParseElementDecl(ctxt);
7147 		    else if (NXT(3) == 'N')
7148 			xmlParseEntityDecl(ctxt);
7149                     else
7150                         SKIP(2);
7151 		    break;
7152 	        case 'A':
7153 		    xmlParseAttributeListDecl(ctxt);
7154 		    break;
7155 	        case 'N':
7156 		    xmlParseNotationDecl(ctxt);
7157 		    break;
7158 	        case '-':
7159 		    xmlParseComment(ctxt);
7160 		    break;
7161 		default:
7162 		    /* there is an error but it will be detected later */
7163                     SKIP(2);
7164 		    break;
7165 	    }
7166 	} else if (NXT(1) == '?') {
7167 	    xmlParsePI(ctxt);
7168 	}
7169     }
7170 }
7171 
7172 /**
7173  * xmlParseTextDecl:
7174  * @ctxt:  an XML parser context
7175  *
7176  * DEPRECATED: Internal function, don't use.
7177  *
7178  * parse an XML declaration header for external entities
7179  *
7180  * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7181  */
7182 
7183 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)7184 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7185     xmlChar *version;
7186 
7187     /*
7188      * We know that '<?xml' is here.
7189      */
7190     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7191 	SKIP(5);
7192     } else {
7193 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7194 	return;
7195     }
7196 
7197     if (SKIP_BLANKS == 0) {
7198 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7199 		       "Space needed after '<?xml'\n");
7200     }
7201 
7202     /*
7203      * We may have the VersionInfo here.
7204      */
7205     version = xmlParseVersionInfo(ctxt);
7206     if (version == NULL) {
7207 	version = xmlCharStrdup(XML_DEFAULT_VERSION);
7208         if (version == NULL) {
7209             xmlErrMemory(ctxt);
7210             return;
7211         }
7212     } else {
7213 	if (SKIP_BLANKS == 0) {
7214 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7215 		           "Space needed here\n");
7216 	}
7217     }
7218     ctxt->input->version = version;
7219 
7220     /*
7221      * We must have the encoding declaration
7222      */
7223     xmlParseEncodingDecl(ctxt);
7224 
7225     SKIP_BLANKS;
7226     if ((RAW == '?') && (NXT(1) == '>')) {
7227         SKIP(2);
7228     } else if (RAW == '>') {
7229         /* Deprecated old WD ... */
7230 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7231 	NEXT;
7232     } else {
7233         int c;
7234 
7235 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7236         while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7237             NEXT;
7238             if (c == '>')
7239                 break;
7240         }
7241     }
7242 }
7243 
7244 /**
7245  * xmlParseExternalSubset:
7246  * @ctxt:  an XML parser context
7247  * @ExternalID: the external identifier
7248  * @SystemID: the system identifier (or URL)
7249  *
7250  * parse Markup declarations from an external subset
7251  *
7252  * [30] extSubset ::= textDecl? extSubsetDecl
7253  *
7254  * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7255  */
7256 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7257 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7258                        const xmlChar *SystemID) {
7259     int oldInputNr;
7260 
7261     xmlCtxtInitializeLate(ctxt);
7262 
7263     xmlDetectEncoding(ctxt);
7264 
7265     if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7266 	xmlParseTextDecl(ctxt);
7267     }
7268     if (ctxt->myDoc == NULL) {
7269         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7270 	if (ctxt->myDoc == NULL) {
7271 	    xmlErrMemory(ctxt);
7272 	    return;
7273 	}
7274 	ctxt->myDoc->properties = XML_DOC_INTERNAL;
7275     }
7276     if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7277         (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7278         xmlErrMemory(ctxt);
7279     }
7280 
7281     ctxt->inSubset = 2;
7282     oldInputNr = ctxt->inputNr;
7283 
7284     SKIP_BLANKS_PE;
7285     while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7286            (!PARSER_STOPPED(ctxt))) {
7287 	GROW;
7288         if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7289             xmlParseConditionalSections(ctxt);
7290         } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7291             xmlParseMarkupDecl(ctxt);
7292         } else {
7293             xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7294             xmlHaltParser(ctxt);
7295             return;
7296         }
7297         SKIP_BLANKS_PE;
7298         SHRINK;
7299     }
7300 
7301     while (ctxt->inputNr > oldInputNr)
7302         xmlPopPE(ctxt);
7303 
7304     if (RAW != 0) {
7305 	xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7306     }
7307 }
7308 
7309 /**
7310  * xmlParseReference:
7311  * @ctxt:  an XML parser context
7312  *
7313  * DEPRECATED: Internal function, don't use.
7314  *
7315  * parse and handle entity references in content, depending on the SAX
7316  * interface, this may end-up in a call to character() if this is a
7317  * CharRef, a predefined entity, if there is no reference() callback.
7318  * or if the parser was asked to switch to that mode.
7319  *
7320  * Always consumes '&'.
7321  *
7322  * [67] Reference ::= EntityRef | CharRef
7323  */
7324 void
xmlParseReference(xmlParserCtxtPtr ctxt)7325 xmlParseReference(xmlParserCtxtPtr ctxt) {
7326     xmlEntityPtr ent = NULL;
7327     const xmlChar *name;
7328     xmlChar *val;
7329 
7330     if (RAW != '&')
7331         return;
7332 
7333     /*
7334      * Simple case of a CharRef
7335      */
7336     if (NXT(1) == '#') {
7337 	int i = 0;
7338 	xmlChar out[16];
7339 	int value = xmlParseCharRef(ctxt);
7340 
7341 	if (value == 0)
7342 	    return;
7343 
7344         /*
7345          * Just encode the value in UTF-8
7346          */
7347         COPY_BUF(out, i, value);
7348         out[i] = 0;
7349         if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7350             (!ctxt->disableSAX))
7351             ctxt->sax->characters(ctxt->userData, out, i);
7352 	return;
7353     }
7354 
7355     /*
7356      * We are seeing an entity reference
7357      */
7358     name = xmlParseEntityRefInternal(ctxt);
7359     if (name == NULL)
7360         return;
7361     ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7362     if (ent == NULL) {
7363         /*
7364          * Create a reference for undeclared entities.
7365          */
7366         if ((ctxt->replaceEntities == 0) &&
7367             (ctxt->sax != NULL) &&
7368             (ctxt->disableSAX == 0) &&
7369             (ctxt->sax->reference != NULL)) {
7370             ctxt->sax->reference(ctxt->userData, name);
7371         }
7372         return;
7373     }
7374     if (!ctxt->wellFormed)
7375 	return;
7376 
7377     /* special case of predefined entities */
7378     if ((ent->name == NULL) ||
7379         (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7380 	val = ent->content;
7381 	if (val == NULL) return;
7382 	/*
7383 	 * inline the entity.
7384 	 */
7385 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7386 	    (!ctxt->disableSAX))
7387 	    ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7388 	return;
7389     }
7390 
7391     /*
7392      * Some users try to parse entities on their own and used to set
7393      * the renamed "checked" member. Fix the flags to cover this
7394      * case.
7395      */
7396     if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7397         ent->flags |= XML_ENT_PARSED;
7398 
7399     /*
7400      * The first reference to the entity trigger a parsing phase
7401      * where the ent->children is filled with the result from
7402      * the parsing.
7403      * Note: external parsed entities will not be loaded, it is not
7404      * required for a non-validating parser, unless the parsing option
7405      * of validating, or substituting entities were given. Doing so is
7406      * far more secure as the parser will only process data coming from
7407      * the document entity by default.
7408      *
7409      * FIXME: This doesn't work correctly since entities can be
7410      * expanded with different namespace declarations in scope.
7411      * For example:
7412      *
7413      * <!DOCTYPE doc [
7414      *   <!ENTITY ent "<ns:elem/>">
7415      * ]>
7416      * <doc>
7417      *   <decl1 xmlns:ns="urn:ns1">
7418      *     &ent;
7419      *   </decl1>
7420      *   <decl2 xmlns:ns="urn:ns2">
7421      *     &ent;
7422      *   </decl2>
7423      * </doc>
7424      *
7425      * Proposed fix:
7426      *
7427      * - Ignore current namespace declarations when parsing the
7428      *   entity. If a prefix can't be resolved, don't report an error
7429      *   but mark it as unresolved.
7430      * - Try to resolve these prefixes when expanding the entity.
7431      *   This will require a specialized version of xmlStaticCopyNode
7432      *   which can also make use of the namespace hash table to avoid
7433      *   quadratic behavior.
7434      *
7435      * Alternatively, we could simply reparse the entity on each
7436      * expansion like we already do with custom SAX callbacks.
7437      * External entity content should be cached in this case.
7438      */
7439     if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7440         (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7441          ((ctxt->replaceEntities) ||
7442           (ctxt->validate)))) {
7443         if ((ent->flags & XML_ENT_PARSED) == 0) {
7444             xmlCtxtParseEntity(ctxt, ent);
7445         } else if (ent->children == NULL) {
7446             /*
7447              * Probably running in SAX mode and the callbacks don't
7448              * build the entity content. Parse the entity again.
7449              *
7450              * This will also be triggered in normal tree builder mode
7451              * if an entity happens to be empty, causing unnecessary
7452              * reloads. It's hard to come up with a reliable check in
7453              * which mode we're running.
7454              */
7455             xmlCtxtParseEntity(ctxt, ent);
7456         }
7457     }
7458 
7459     /*
7460      * We also check for amplification if entities aren't substituted.
7461      * They might be expanded later.
7462      */
7463     if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7464         return;
7465 
7466     if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7467         return;
7468 
7469     if (ctxt->replaceEntities == 0) {
7470 	/*
7471 	 * Create a reference
7472 	 */
7473         if (ctxt->sax->reference != NULL)
7474 	    ctxt->sax->reference(ctxt->userData, ent->name);
7475     } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7476         xmlNodePtr copy, cur;
7477 
7478         /*
7479          * Seems we are generating the DOM content, copy the tree
7480 	 */
7481         cur = ent->children;
7482 
7483         /*
7484          * Handle first text node with SAX to coalesce text efficiently
7485          */
7486         if ((cur->type == XML_TEXT_NODE) ||
7487             (cur->type == XML_CDATA_SECTION_NODE)) {
7488             int len = xmlStrlen(cur->content);
7489 
7490             if ((cur->type == XML_TEXT_NODE) ||
7491                 (ctxt->sax->cdataBlock == NULL)) {
7492                 if (ctxt->sax->characters != NULL)
7493                     ctxt->sax->characters(ctxt, cur->content, len);
7494             } else {
7495                 if (ctxt->sax->cdataBlock != NULL)
7496                     ctxt->sax->cdataBlock(ctxt, cur->content, len);
7497             }
7498 
7499             cur = cur->next;
7500         }
7501 
7502         while (cur != NULL) {
7503             xmlNodePtr last;
7504 
7505             /*
7506              * Handle last text node with SAX to coalesce text efficiently
7507              */
7508             if ((cur->next == NULL) &&
7509                 ((cur->type == XML_TEXT_NODE) ||
7510                  (cur->type == XML_CDATA_SECTION_NODE))) {
7511                 int len = xmlStrlen(cur->content);
7512 
7513                 if ((cur->type == XML_TEXT_NODE) ||
7514                     (ctxt->sax->cdataBlock == NULL)) {
7515                     if (ctxt->sax->characters != NULL)
7516                         ctxt->sax->characters(ctxt, cur->content, len);
7517                 } else {
7518                     if (ctxt->sax->cdataBlock != NULL)
7519                         ctxt->sax->cdataBlock(ctxt, cur->content, len);
7520                 }
7521 
7522                 break;
7523             }
7524 
7525             /*
7526              * Reset coalesce buffer stats only for non-text nodes.
7527              */
7528             ctxt->nodemem = 0;
7529             ctxt->nodelen = 0;
7530 
7531             copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7532 
7533             if (copy == NULL) {
7534                 xmlErrMemory(ctxt);
7535                 break;
7536             }
7537 
7538             if (ctxt->parseMode == XML_PARSE_READER) {
7539                 /* Needed for reader */
7540                 copy->extra = cur->extra;
7541                 /* Maybe needed for reader */
7542                 copy->_private = cur->_private;
7543             }
7544 
7545             copy->parent = ctxt->node;
7546             last = ctxt->node->last;
7547             if (last == NULL) {
7548                 ctxt->node->children = copy;
7549             } else {
7550                 last->next = copy;
7551                 copy->prev = last;
7552             }
7553             ctxt->node->last = copy;
7554 
7555             cur = cur->next;
7556         }
7557     }
7558 }
7559 
7560 static void
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt,const xmlChar * name)7561 xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7562     /*
7563      * [ WFC: Entity Declared ]
7564      * In a document without any DTD, a document with only an
7565      * internal DTD subset which contains no parameter entity
7566      * references, or a document with "standalone='yes'", the
7567      * Name given in the entity reference must match that in an
7568      * entity declaration, except that well-formed documents
7569      * need not declare any of the following entities: amp, lt,
7570      * gt, apos, quot.
7571      * The declaration of a parameter entity must precede any
7572      * reference to it.
7573      * Similarly, the declaration of a general entity must
7574      * precede any reference to it which appears in a default
7575      * value in an attribute-list declaration. Note that if
7576      * entities are declared in the external subset or in
7577      * external parameter entities, a non-validating processor
7578      * is not obligated to read and process their declarations;
7579      * for such documents, the rule that an entity must be
7580      * declared is a well-formedness constraint only if
7581      * standalone='yes'.
7582      */
7583     if ((ctxt->standalone == 1) ||
7584         ((ctxt->hasExternalSubset == 0) &&
7585          (ctxt->hasPErefs == 0))) {
7586         xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7587                           "Entity '%s' not defined\n", name);
7588     } else if (ctxt->validate) {
7589         /*
7590          * [ VC: Entity Declared ]
7591          * In a document with an external subset or external
7592          * parameter entities with "standalone='no'", ...
7593          * ... The declaration of a parameter entity must
7594          * precede any reference to it...
7595          */
7596         xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7597                          "Entity '%s' not defined\n", name, NULL);
7598     } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7599                ((ctxt->replaceEntities) &&
7600                 ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7601         /*
7602          * Also raise a non-fatal error
7603          *
7604          * - if the external subset is loaded and all entity declarations
7605          *   should be available, or
7606          * - entity substition was requested without restricting
7607          *   external entity access.
7608          */
7609         xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7610                      "Entity '%s' not defined\n", name);
7611     } else {
7612         xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7613                       "Entity '%s' not defined\n", name, NULL);
7614     }
7615 
7616     ctxt->valid = 0;
7617 }
7618 
7619 static xmlEntityPtr
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt,const xmlChar * name,int inAttr)7620 xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7621     xmlEntityPtr ent = NULL;
7622 
7623     /*
7624      * Predefined entities override any extra definition
7625      */
7626     if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7627         ent = xmlGetPredefinedEntity(name);
7628         if (ent != NULL)
7629             return(ent);
7630     }
7631 
7632     /*
7633      * Ask first SAX for entity resolution, otherwise try the
7634      * entities which may have stored in the parser context.
7635      */
7636     if (ctxt->sax != NULL) {
7637 	if (ctxt->sax->getEntity != NULL)
7638 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
7639 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7640 	    (ctxt->options & XML_PARSE_OLDSAX))
7641 	    ent = xmlGetPredefinedEntity(name);
7642 	if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7643 	    (ctxt->userData==ctxt)) {
7644 	    ent = xmlSAX2GetEntity(ctxt, name);
7645 	}
7646     }
7647 
7648     if (ent == NULL) {
7649         xmlHandleUndeclaredEntity(ctxt, name);
7650     }
7651 
7652     /*
7653      * [ WFC: Parsed Entity ]
7654      * An entity reference must not contain the name of an
7655      * unparsed entity
7656      */
7657     else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7658 	xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7659 		 "Entity reference to unparsed entity %s\n", name);
7660         ent = NULL;
7661     }
7662 
7663     /*
7664      * [ WFC: No External Entity References ]
7665      * Attribute values cannot contain direct or indirect
7666      * entity references to external entities.
7667      */
7668     else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7669         if (inAttr) {
7670             xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7671                  "Attribute references external entity '%s'\n", name);
7672             ent = NULL;
7673         }
7674     }
7675 
7676     return(ent);
7677 }
7678 
7679 /**
7680  * xmlParseEntityRefInternal:
7681  * @ctxt:  an XML parser context
7682  * @inAttr:  whether we are in an attribute value
7683  *
7684  * Parse an entity reference. Always consumes '&'.
7685  *
7686  * [68] EntityRef ::= '&' Name ';'
7687  *
7688  * Returns the name, or NULL in case of error.
7689  */
7690 static const xmlChar *
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt)7691 xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7692     const xmlChar *name;
7693 
7694     GROW;
7695 
7696     if (RAW != '&')
7697         return(NULL);
7698     NEXT;
7699     name = xmlParseName(ctxt);
7700     if (name == NULL) {
7701 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7702 		       "xmlParseEntityRef: no name\n");
7703         return(NULL);
7704     }
7705     if (RAW != ';') {
7706 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7707 	return(NULL);
7708     }
7709     NEXT;
7710 
7711     return(name);
7712 }
7713 
7714 /**
7715  * xmlParseEntityRef:
7716  * @ctxt:  an XML parser context
7717  *
7718  * DEPRECATED: Internal function, don't use.
7719  *
7720  * Returns the xmlEntityPtr if found, or NULL otherwise.
7721  */
7722 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7723 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7724     const xmlChar *name;
7725 
7726     if (ctxt == NULL)
7727         return(NULL);
7728 
7729     name = xmlParseEntityRefInternal(ctxt);
7730     if (name == NULL)
7731         return(NULL);
7732 
7733     return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7734 }
7735 
7736 /**
7737  * xmlParseStringEntityRef:
7738  * @ctxt:  an XML parser context
7739  * @str:  a pointer to an index in the string
7740  *
7741  * parse ENTITY references declarations, but this version parses it from
7742  * a string value.
7743  *
7744  * [68] EntityRef ::= '&' Name ';'
7745  *
7746  * [ WFC: Entity Declared ]
7747  * In a document without any DTD, a document with only an internal DTD
7748  * subset which contains no parameter entity references, or a document
7749  * with "standalone='yes'", the Name given in the entity reference
7750  * must match that in an entity declaration, except that well-formed
7751  * documents need not declare any of the following entities: amp, lt,
7752  * gt, apos, quot.  The declaration of a parameter entity must precede
7753  * any reference to it.  Similarly, the declaration of a general entity
7754  * must precede any reference to it which appears in a default value in an
7755  * attribute-list declaration. Note that if entities are declared in the
7756  * external subset or in external parameter entities, a non-validating
7757  * processor is not obligated to read and process their declarations;
7758  * for such documents, the rule that an entity must be declared is a
7759  * well-formedness constraint only if standalone='yes'.
7760  *
7761  * [ WFC: Parsed Entity ]
7762  * An entity reference must not contain the name of an unparsed entity
7763  *
7764  * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7765  * is updated to the current location in the string.
7766  */
7767 static xmlChar *
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7768 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7769     xmlChar *name;
7770     const xmlChar *ptr;
7771     xmlChar cur;
7772 
7773     if ((str == NULL) || (*str == NULL))
7774         return(NULL);
7775     ptr = *str;
7776     cur = *ptr;
7777     if (cur != '&')
7778 	return(NULL);
7779 
7780     ptr++;
7781     name = xmlParseStringName(ctxt, &ptr);
7782     if (name == NULL) {
7783 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7784 		       "xmlParseStringEntityRef: no name\n");
7785 	*str = ptr;
7786 	return(NULL);
7787     }
7788     if (*ptr != ';') {
7789 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7790         xmlFree(name);
7791 	*str = ptr;
7792 	return(NULL);
7793     }
7794     ptr++;
7795 
7796     *str = ptr;
7797     return(name);
7798 }
7799 
7800 /**
7801  * xmlParsePEReference:
7802  * @ctxt:  an XML parser context
7803  *
7804  * DEPRECATED: Internal function, don't use.
7805  *
7806  * Parse a parameter entity reference. Always consumes '%'.
7807  *
7808  * The entity content is handled directly by pushing it's content as
7809  * a new input stream.
7810  *
7811  * [69] PEReference ::= '%' Name ';'
7812  *
7813  * [ WFC: No Recursion ]
7814  * A parsed entity must not contain a recursive
7815  * reference to itself, either directly or indirectly.
7816  *
7817  * [ WFC: Entity Declared ]
7818  * In a document without any DTD, a document with only an internal DTD
7819  * subset which contains no parameter entity references, or a document
7820  * with "standalone='yes'", ...  ... The declaration of a parameter
7821  * entity must precede any reference to it...
7822  *
7823  * [ VC: Entity Declared ]
7824  * In a document with an external subset or external parameter entities
7825  * with "standalone='no'", ...  ... The declaration of a parameter entity
7826  * must precede any reference to it...
7827  *
7828  * [ WFC: In DTD ]
7829  * Parameter-entity references may only appear in the DTD.
7830  * NOTE: misleading but this is handled.
7831  */
7832 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7833 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7834 {
7835     const xmlChar *name;
7836     xmlEntityPtr entity = NULL;
7837     xmlParserInputPtr input;
7838 
7839     if (RAW != '%')
7840         return;
7841     NEXT;
7842     name = xmlParseName(ctxt);
7843     if (name == NULL) {
7844 	xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7845 	return;
7846     }
7847     if (RAW != ';') {
7848 	xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7849         return;
7850     }
7851 
7852     NEXT;
7853 
7854     /* Must be set before xmlHandleUndeclaredEntity */
7855     ctxt->hasPErefs = 1;
7856 
7857     /*
7858      * Request the entity from SAX
7859      */
7860     if ((ctxt->sax != NULL) &&
7861 	(ctxt->sax->getParameterEntity != NULL))
7862 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7863 
7864     if (entity == NULL) {
7865         xmlHandleUndeclaredEntity(ctxt, name);
7866     } else {
7867 	/*
7868 	 * Internal checking in case the entity quest barfed
7869 	 */
7870 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7871 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7872 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7873 		  "Internal: %%%s; is not a parameter entity\n",
7874 			  name, NULL);
7875 	} else {
7876 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7877                 ((ctxt->options & XML_PARSE_NO_XXE) ||
7878 		 ((ctxt->loadsubset == 0) &&
7879 		  (ctxt->replaceEntities == 0) &&
7880 		  (ctxt->validate == 0))))
7881 		return;
7882 
7883             if (entity->flags & XML_ENT_EXPANDING) {
7884                 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7885                 xmlHaltParser(ctxt);
7886                 return;
7887             }
7888 
7889 	    input = xmlNewEntityInputStream(ctxt, entity);
7890 	    if (xmlPushInput(ctxt, input) < 0) {
7891                 xmlFreeInputStream(input);
7892 		return;
7893             }
7894 
7895             entity->flags |= XML_ENT_EXPANDING;
7896 
7897 	    if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7898                 xmlDetectEncoding(ctxt);
7899 
7900                 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7901                     (IS_BLANK_CH(NXT(5)))) {
7902                     xmlParseTextDecl(ctxt);
7903                 }
7904             }
7905 	}
7906     }
7907 }
7908 
7909 /**
7910  * xmlLoadEntityContent:
7911  * @ctxt:  an XML parser context
7912  * @entity: an unloaded system entity
7913  *
7914  * Load the content of an entity.
7915  *
7916  * Returns 0 in case of success and -1 in case of failure
7917  */
7918 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7919 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7920     xmlParserInputPtr oldinput, input = NULL;
7921     xmlParserInputPtr *oldinputTab;
7922     const xmlChar *oldencoding;
7923     xmlChar *content = NULL;
7924     xmlResourceType rtype;
7925     size_t length, i;
7926     int oldinputNr, oldinputMax;
7927     int ret = -1;
7928     int res;
7929 
7930     if ((ctxt == NULL) || (entity == NULL) ||
7931         ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7932 	 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7933 	(entity->content != NULL)) {
7934 	xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7935 	            "xmlLoadEntityContent parameter error");
7936         return(-1);
7937     }
7938 
7939     if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7940         rtype = XML_RESOURCE_PARAMETER_ENTITY;
7941     else
7942         rtype = XML_RESOURCE_GENERAL_ENTITY;
7943 
7944     input = xmlLoadResource(ctxt, (char *) entity->URI,
7945                             (char *) entity->ExternalID, rtype);
7946     if (input == NULL)
7947         return(-1);
7948 
7949     oldinput = ctxt->input;
7950     oldinputNr = ctxt->inputNr;
7951     oldinputMax = ctxt->inputMax;
7952     oldinputTab = ctxt->inputTab;
7953     oldencoding = ctxt->encoding;
7954 
7955     ctxt->input = NULL;
7956     ctxt->inputNr = 0;
7957     ctxt->inputMax = 1;
7958     ctxt->encoding = NULL;
7959     ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7960     if (ctxt->inputTab == NULL) {
7961         xmlErrMemory(ctxt);
7962         xmlFreeInputStream(input);
7963         goto error;
7964     }
7965 
7966     xmlBufResetInput(input->buf->buffer, input);
7967 
7968     if (inputPush(ctxt, input) < 0) {
7969         xmlFreeInputStream(input);
7970         goto error;
7971     }
7972 
7973     xmlDetectEncoding(ctxt);
7974 
7975     /*
7976      * Parse a possible text declaration first
7977      */
7978     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7979 	xmlParseTextDecl(ctxt);
7980         /*
7981          * An XML-1.0 document can't reference an entity not XML-1.0
7982          */
7983         if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7984             (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7985             xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7986                            "Version mismatch between document and entity\n");
7987         }
7988     }
7989 
7990     length = input->cur - input->base;
7991     xmlBufShrink(input->buf->buffer, length);
7992     xmlSaturatedAdd(&ctxt->sizeentities, length);
7993 
7994     while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7995         ;
7996 
7997     xmlBufResetInput(input->buf->buffer, input);
7998 
7999     if (res < 0) {
8000         xmlCtxtErrIO(ctxt, input->buf->error, NULL);
8001         goto error;
8002     }
8003 
8004     length = xmlBufUse(input->buf->buffer);
8005     if (length > INT_MAX) {
8006         xmlErrMemory(ctxt);
8007         goto error;
8008     }
8009 
8010     content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
8011     if (content == NULL) {
8012         xmlErrMemory(ctxt);
8013         goto error;
8014     }
8015 
8016     for (i = 0; i < length; ) {
8017         int clen = length - i;
8018         int c = xmlGetUTF8Char(content + i, &clen);
8019 
8020         if ((c < 0) || (!IS_CHAR(c))) {
8021             xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8022                               "xmlLoadEntityContent: invalid char value %d\n",
8023                               content[i]);
8024             goto error;
8025         }
8026         i += clen;
8027     }
8028 
8029     xmlSaturatedAdd(&ctxt->sizeentities, length);
8030     entity->content = content;
8031     entity->length = length;
8032     content = NULL;
8033     ret = 0;
8034 
8035 error:
8036     while (ctxt->inputNr > 0)
8037         xmlFreeInputStream(inputPop(ctxt));
8038     xmlFree(ctxt->inputTab);
8039     xmlFree((xmlChar *) ctxt->encoding);
8040 
8041     ctxt->input = oldinput;
8042     ctxt->inputNr = oldinputNr;
8043     ctxt->inputMax = oldinputMax;
8044     ctxt->inputTab = oldinputTab;
8045     ctxt->encoding = oldencoding;
8046 
8047     xmlFree(content);
8048 
8049     return(ret);
8050 }
8051 
8052 /**
8053  * xmlParseStringPEReference:
8054  * @ctxt:  an XML parser context
8055  * @str:  a pointer to an index in the string
8056  *
8057  * parse PEReference declarations
8058  *
8059  * [69] PEReference ::= '%' Name ';'
8060  *
8061  * [ WFC: No Recursion ]
8062  * A parsed entity must not contain a recursive
8063  * reference to itself, either directly or indirectly.
8064  *
8065  * [ WFC: Entity Declared ]
8066  * In a document without any DTD, a document with only an internal DTD
8067  * subset which contains no parameter entity references, or a document
8068  * with "standalone='yes'", ...  ... The declaration of a parameter
8069  * entity must precede any reference to it...
8070  *
8071  * [ VC: Entity Declared ]
8072  * In a document with an external subset or external parameter entities
8073  * with "standalone='no'", ...  ... The declaration of a parameter entity
8074  * must precede any reference to it...
8075  *
8076  * [ WFC: In DTD ]
8077  * Parameter-entity references may only appear in the DTD.
8078  * NOTE: misleading but this is handled.
8079  *
8080  * Returns the string of the entity content.
8081  *         str is updated to the current value of the index
8082  */
8083 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8084 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8085     const xmlChar *ptr;
8086     xmlChar cur;
8087     xmlChar *name;
8088     xmlEntityPtr entity = NULL;
8089 
8090     if ((str == NULL) || (*str == NULL)) return(NULL);
8091     ptr = *str;
8092     cur = *ptr;
8093     if (cur != '%')
8094         return(NULL);
8095     ptr++;
8096     name = xmlParseStringName(ctxt, &ptr);
8097     if (name == NULL) {
8098 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8099 		       "xmlParseStringPEReference: no name\n");
8100 	*str = ptr;
8101 	return(NULL);
8102     }
8103     cur = *ptr;
8104     if (cur != ';') {
8105 	xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8106 	xmlFree(name);
8107 	*str = ptr;
8108 	return(NULL);
8109     }
8110     ptr++;
8111 
8112     /* Must be set before xmlHandleUndeclaredEntity */
8113     ctxt->hasPErefs = 1;
8114 
8115     /*
8116      * Request the entity from SAX
8117      */
8118     if ((ctxt->sax != NULL) &&
8119 	(ctxt->sax->getParameterEntity != NULL))
8120 	entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8121 
8122     if (entity == NULL) {
8123         xmlHandleUndeclaredEntity(ctxt, name);
8124     } else {
8125 	/*
8126 	 * Internal checking in case the entity quest barfed
8127 	 */
8128 	if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8129 	    (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8130 	    xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8131 			  "%%%s; is not a parameter entity\n",
8132 			  name, NULL);
8133 	}
8134     }
8135 
8136     xmlFree(name);
8137     *str = ptr;
8138     return(entity);
8139 }
8140 
8141 /**
8142  * xmlParseDocTypeDecl:
8143  * @ctxt:  an XML parser context
8144  *
8145  * DEPRECATED: Internal function, don't use.
8146  *
8147  * parse a DOCTYPE declaration
8148  *
8149  * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8150  *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8151  *
8152  * [ VC: Root Element Type ]
8153  * The Name in the document type declaration must match the element
8154  * type of the root element.
8155  */
8156 
8157 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8158 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8159     const xmlChar *name = NULL;
8160     xmlChar *ExternalID = NULL;
8161     xmlChar *URI = NULL;
8162 
8163     /*
8164      * We know that '<!DOCTYPE' has been detected.
8165      */
8166     SKIP(9);
8167 
8168     SKIP_BLANKS;
8169 
8170     /*
8171      * Parse the DOCTYPE name.
8172      */
8173     name = xmlParseName(ctxt);
8174     if (name == NULL) {
8175 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8176 		       "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8177     }
8178     ctxt->intSubName = name;
8179 
8180     SKIP_BLANKS;
8181 
8182     /*
8183      * Check for SystemID and ExternalID
8184      */
8185     URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8186 
8187     if ((URI != NULL) || (ExternalID != NULL)) {
8188         ctxt->hasExternalSubset = 1;
8189     }
8190     ctxt->extSubURI = URI;
8191     ctxt->extSubSystem = ExternalID;
8192 
8193     SKIP_BLANKS;
8194 
8195     /*
8196      * Create and update the internal subset.
8197      */
8198     if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8199 	(!ctxt->disableSAX))
8200 	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8201 
8202     /*
8203      * Is there any internal subset declarations ?
8204      * they are handled separately in xmlParseInternalSubset()
8205      */
8206     if (RAW == '[')
8207 	return;
8208 
8209     /*
8210      * We should be at the end of the DOCTYPE declaration.
8211      */
8212     if (RAW != '>') {
8213 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8214     }
8215     NEXT;
8216 }
8217 
8218 /**
8219  * xmlParseInternalSubset:
8220  * @ctxt:  an XML parser context
8221  *
8222  * parse the internal subset declaration
8223  *
8224  * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8225  */
8226 
8227 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8228 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8229     /*
8230      * Is there any DTD definition ?
8231      */
8232     if (RAW == '[') {
8233         int oldInputNr = ctxt->inputNr;
8234 
8235         NEXT;
8236 	/*
8237 	 * Parse the succession of Markup declarations and
8238 	 * PEReferences.
8239 	 * Subsequence (markupdecl | PEReference | S)*
8240 	 */
8241 	SKIP_BLANKS;
8242 	while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8243                (PARSER_STOPPED(ctxt) == 0)) {
8244 
8245             /*
8246              * Conditional sections are allowed from external entities included
8247              * by PE References in the internal subset.
8248              */
8249             if ((PARSER_EXTERNAL(ctxt)) &&
8250                 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8251                 xmlParseConditionalSections(ctxt);
8252             } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8253 	        xmlParseMarkupDecl(ctxt);
8254             } else if (RAW == '%') {
8255 	        xmlParsePEReference(ctxt);
8256             } else {
8257 		xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8258                 break;
8259             }
8260 	    SKIP_BLANKS_PE;
8261             SHRINK;
8262             GROW;
8263 	}
8264 
8265         while (ctxt->inputNr > oldInputNr)
8266             xmlPopPE(ctxt);
8267 
8268 	if (RAW == ']') {
8269 	    NEXT;
8270 	    SKIP_BLANKS;
8271 	}
8272     }
8273 
8274     /*
8275      * We should be at the end of the DOCTYPE declaration.
8276      */
8277     if ((ctxt->wellFormed) && (RAW != '>')) {
8278 	xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8279 	return;
8280     }
8281     NEXT;
8282 }
8283 
8284 #ifdef LIBXML_SAX1_ENABLED
8285 /**
8286  * xmlParseAttribute:
8287  * @ctxt:  an XML parser context
8288  * @value:  a xmlChar ** used to store the value of the attribute
8289  *
8290  * DEPRECATED: Internal function, don't use.
8291  *
8292  * parse an attribute
8293  *
8294  * [41] Attribute ::= Name Eq AttValue
8295  *
8296  * [ WFC: No External Entity References ]
8297  * Attribute values cannot contain direct or indirect entity references
8298  * to external entities.
8299  *
8300  * [ WFC: No < in Attribute Values ]
8301  * The replacement text of any entity referred to directly or indirectly in
8302  * an attribute value (other than "&lt;") must not contain a <.
8303  *
8304  * [ VC: Attribute Value Type ]
8305  * The attribute must have been declared; the value must be of the type
8306  * declared for it.
8307  *
8308  * [25] Eq ::= S? '=' S?
8309  *
8310  * With namespace:
8311  *
8312  * [NS 11] Attribute ::= QName Eq AttValue
8313  *
8314  * Also the case QName == xmlns:??? is handled independently as a namespace
8315  * definition.
8316  *
8317  * Returns the attribute name, and the value in *value.
8318  */
8319 
8320 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8321 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8322     const xmlChar *name;
8323     xmlChar *val;
8324 
8325     *value = NULL;
8326     GROW;
8327     name = xmlParseName(ctxt);
8328     if (name == NULL) {
8329 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8330 	               "error parsing attribute name\n");
8331         return(NULL);
8332     }
8333 
8334     /*
8335      * read the value
8336      */
8337     SKIP_BLANKS;
8338     if (RAW == '=') {
8339         NEXT;
8340 	SKIP_BLANKS;
8341 	val = xmlParseAttValue(ctxt);
8342     } else {
8343 	xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8344 	       "Specification mandates value for attribute %s\n", name);
8345 	return(name);
8346     }
8347 
8348     /*
8349      * Check that xml:lang conforms to the specification
8350      * No more registered as an error, just generate a warning now
8351      * since this was deprecated in XML second edition
8352      */
8353     if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8354 	if (!xmlCheckLanguageID(val)) {
8355 	    xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8356 		          "Malformed value for xml:lang : %s\n",
8357 			  val, NULL);
8358 	}
8359     }
8360 
8361     /*
8362      * Check that xml:space conforms to the specification
8363      */
8364     if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8365 	if (xmlStrEqual(val, BAD_CAST "default"))
8366 	    *(ctxt->space) = 0;
8367 	else if (xmlStrEqual(val, BAD_CAST "preserve"))
8368 	    *(ctxt->space) = 1;
8369 	else {
8370 		xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8371 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8372                                  val, NULL);
8373 	}
8374     }
8375 
8376     *value = val;
8377     return(name);
8378 }
8379 
8380 /**
8381  * xmlParseStartTag:
8382  * @ctxt:  an XML parser context
8383  *
8384  * DEPRECATED: Internal function, don't use.
8385  *
8386  * Parse a start tag. Always consumes '<'.
8387  *
8388  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8389  *
8390  * [ WFC: Unique Att Spec ]
8391  * No attribute name may appear more than once in the same start-tag or
8392  * empty-element tag.
8393  *
8394  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8395  *
8396  * [ WFC: Unique Att Spec ]
8397  * No attribute name may appear more than once in the same start-tag or
8398  * empty-element tag.
8399  *
8400  * With namespace:
8401  *
8402  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8403  *
8404  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8405  *
8406  * Returns the element name parsed
8407  */
8408 
8409 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8410 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8411     const xmlChar *name;
8412     const xmlChar *attname;
8413     xmlChar *attvalue;
8414     const xmlChar **atts = ctxt->atts;
8415     int nbatts = 0;
8416     int maxatts = ctxt->maxatts;
8417     int i;
8418 
8419     if (RAW != '<') return(NULL);
8420     NEXT1;
8421 
8422     name = xmlParseName(ctxt);
8423     if (name == NULL) {
8424 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8425 	     "xmlParseStartTag: invalid element name\n");
8426         return(NULL);
8427     }
8428 
8429     /*
8430      * Now parse the attributes, it ends up with the ending
8431      *
8432      * (S Attribute)* S?
8433      */
8434     SKIP_BLANKS;
8435     GROW;
8436 
8437     while (((RAW != '>') &&
8438 	   ((RAW != '/') || (NXT(1) != '>')) &&
8439 	   (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8440 	attname = xmlParseAttribute(ctxt, &attvalue);
8441         if (attname == NULL)
8442 	    break;
8443         if (attvalue != NULL) {
8444 	    /*
8445 	     * [ WFC: Unique Att Spec ]
8446 	     * No attribute name may appear more than once in the same
8447 	     * start-tag or empty-element tag.
8448 	     */
8449 	    for (i = 0; i < nbatts;i += 2) {
8450 	        if (xmlStrEqual(atts[i], attname)) {
8451 		    xmlErrAttributeDup(ctxt, NULL, attname);
8452 		    xmlFree(attvalue);
8453 		    goto failed;
8454 		}
8455 	    }
8456 	    /*
8457 	     * Add the pair to atts
8458 	     */
8459 	    if (atts == NULL) {
8460 	        maxatts = 22; /* allow for 10 attrs by default */
8461 	        atts = (const xmlChar **)
8462 		       xmlMalloc(maxatts * sizeof(xmlChar *));
8463 		if (atts == NULL) {
8464 		    xmlErrMemory(ctxt);
8465 		    if (attvalue != NULL)
8466 			xmlFree(attvalue);
8467 		    goto failed;
8468 		}
8469 		ctxt->atts = atts;
8470 		ctxt->maxatts = maxatts;
8471 	    } else if (nbatts + 4 > maxatts) {
8472 	        const xmlChar **n;
8473 
8474 	        maxatts *= 2;
8475 	        n = (const xmlChar **) xmlRealloc((void *) atts,
8476 					     maxatts * sizeof(const xmlChar *));
8477 		if (n == NULL) {
8478 		    xmlErrMemory(ctxt);
8479 		    if (attvalue != NULL)
8480 			xmlFree(attvalue);
8481 		    goto failed;
8482 		}
8483 		atts = n;
8484 		ctxt->atts = atts;
8485 		ctxt->maxatts = maxatts;
8486 	    }
8487 	    atts[nbatts++] = attname;
8488 	    atts[nbatts++] = attvalue;
8489 	    atts[nbatts] = NULL;
8490 	    atts[nbatts + 1] = NULL;
8491 	} else {
8492 	    if (attvalue != NULL)
8493 		xmlFree(attvalue);
8494 	}
8495 
8496 failed:
8497 
8498 	GROW
8499 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8500 	    break;
8501 	if (SKIP_BLANKS == 0) {
8502 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8503 			   "attributes construct error\n");
8504 	}
8505 	SHRINK;
8506         GROW;
8507     }
8508 
8509     /*
8510      * SAX: Start of Element !
8511      */
8512     if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8513 	(!ctxt->disableSAX)) {
8514 	if (nbatts > 0)
8515 	    ctxt->sax->startElement(ctxt->userData, name, atts);
8516 	else
8517 	    ctxt->sax->startElement(ctxt->userData, name, NULL);
8518     }
8519 
8520     if (atts != NULL) {
8521         /* Free only the content strings */
8522         for (i = 1;i < nbatts;i+=2)
8523 	    if (atts[i] != NULL)
8524 	       xmlFree((xmlChar *) atts[i]);
8525     }
8526     return(name);
8527 }
8528 
8529 /**
8530  * xmlParseEndTag1:
8531  * @ctxt:  an XML parser context
8532  * @line:  line of the start tag
8533  * @nsNr:  number of namespaces on the start tag
8534  *
8535  * Parse an end tag. Always consumes '</'.
8536  *
8537  * [42] ETag ::= '</' Name S? '>'
8538  *
8539  * With namespace
8540  *
8541  * [NS 9] ETag ::= '</' QName S? '>'
8542  */
8543 
8544 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8545 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8546     const xmlChar *name;
8547 
8548     GROW;
8549     if ((RAW != '<') || (NXT(1) != '/')) {
8550 	xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8551 		       "xmlParseEndTag: '</' not found\n");
8552 	return;
8553     }
8554     SKIP(2);
8555 
8556     name = xmlParseNameAndCompare(ctxt,ctxt->name);
8557 
8558     /*
8559      * We should definitely be at the ending "S? '>'" part
8560      */
8561     GROW;
8562     SKIP_BLANKS;
8563     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8564 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8565     } else
8566 	NEXT1;
8567 
8568     /*
8569      * [ WFC: Element Type Match ]
8570      * The Name in an element's end-tag must match the element type in the
8571      * start-tag.
8572      *
8573      */
8574     if (name != (xmlChar*)1) {
8575         if (name == NULL) name = BAD_CAST "unparsable";
8576         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8577 		     "Opening and ending tag mismatch: %s line %d and %s\n",
8578 		                ctxt->name, line, name);
8579     }
8580 
8581     /*
8582      * SAX: End of Tag
8583      */
8584     if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8585 	(!ctxt->disableSAX))
8586         ctxt->sax->endElement(ctxt->userData, ctxt->name);
8587 
8588     namePop(ctxt);
8589     spacePop(ctxt);
8590 }
8591 
8592 /**
8593  * xmlParseEndTag:
8594  * @ctxt:  an XML parser context
8595  *
8596  * DEPRECATED: Internal function, don't use.
8597  *
8598  * parse an end of tag
8599  *
8600  * [42] ETag ::= '</' Name S? '>'
8601  *
8602  * With namespace
8603  *
8604  * [NS 9] ETag ::= '</' QName S? '>'
8605  */
8606 
8607 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8608 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8609     xmlParseEndTag1(ctxt, 0);
8610 }
8611 #endif /* LIBXML_SAX1_ENABLED */
8612 
8613 /************************************************************************
8614  *									*
8615  *		      SAX 2 specific operations				*
8616  *									*
8617  ************************************************************************/
8618 
8619 /**
8620  * xmlParseQNameHashed:
8621  * @ctxt:  an XML parser context
8622  * @prefix:  pointer to store the prefix part
8623  *
8624  * parse an XML Namespace QName
8625  *
8626  * [6]  QName  ::= (Prefix ':')? LocalPart
8627  * [7]  Prefix  ::= NCName
8628  * [8]  LocalPart  ::= NCName
8629  *
8630  * Returns the Name parsed or NULL
8631  */
8632 
8633 static xmlHashedString
xmlParseQNameHashed(xmlParserCtxtPtr ctxt,xmlHashedString * prefix)8634 xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8635     xmlHashedString l, p;
8636     int start, isNCName = 0;
8637 
8638     l.name = NULL;
8639     p.name = NULL;
8640 
8641     GROW;
8642     start = CUR_PTR - BASE_PTR;
8643 
8644     l = xmlParseNCName(ctxt);
8645     if (l.name != NULL) {
8646         isNCName = 1;
8647         if (CUR == ':') {
8648             NEXT;
8649             p = l;
8650             l = xmlParseNCName(ctxt);
8651         }
8652     }
8653     if ((l.name == NULL) || (CUR == ':')) {
8654         xmlChar *tmp;
8655 
8656         l.name = NULL;
8657         p.name = NULL;
8658         if ((isNCName == 0) && (CUR != ':'))
8659             return(l);
8660         tmp = xmlParseNmtoken(ctxt);
8661         if (tmp != NULL)
8662             xmlFree(tmp);
8663         l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8664                                 CUR_PTR - (BASE_PTR + start));
8665         if (l.name == NULL) {
8666             xmlErrMemory(ctxt);
8667             return(l);
8668         }
8669         xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8670                  "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8671     }
8672 
8673     *prefix = p;
8674     return(l);
8675 }
8676 
8677 /**
8678  * xmlParseQName:
8679  * @ctxt:  an XML parser context
8680  * @prefix:  pointer to store the prefix part
8681  *
8682  * parse an XML Namespace QName
8683  *
8684  * [6]  QName  ::= (Prefix ':')? LocalPart
8685  * [7]  Prefix  ::= NCName
8686  * [8]  LocalPart  ::= NCName
8687  *
8688  * Returns the Name parsed or NULL
8689  */
8690 
8691 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8692 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8693     xmlHashedString n, p;
8694 
8695     n = xmlParseQNameHashed(ctxt, &p);
8696     if (n.name == NULL)
8697         return(NULL);
8698     *prefix = p.name;
8699     return(n.name);
8700 }
8701 
8702 /**
8703  * xmlParseQNameAndCompare:
8704  * @ctxt:  an XML parser context
8705  * @name:  the localname
8706  * @prefix:  the prefix, if any.
8707  *
8708  * parse an XML name and compares for match
8709  * (specialized for endtag parsing)
8710  *
8711  * Returns NULL for an illegal name, (xmlChar*) 1 for success
8712  * and the name for mismatch
8713  */
8714 
8715 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8716 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8717                         xmlChar const *prefix) {
8718     const xmlChar *cmp;
8719     const xmlChar *in;
8720     const xmlChar *ret;
8721     const xmlChar *prefix2;
8722 
8723     if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8724 
8725     GROW;
8726     in = ctxt->input->cur;
8727 
8728     cmp = prefix;
8729     while (*in != 0 && *in == *cmp) {
8730 	++in;
8731 	++cmp;
8732     }
8733     if ((*cmp == 0) && (*in == ':')) {
8734         in++;
8735 	cmp = name;
8736 	while (*in != 0 && *in == *cmp) {
8737 	    ++in;
8738 	    ++cmp;
8739 	}
8740 	if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8741 	    /* success */
8742             ctxt->input->col += in - ctxt->input->cur;
8743 	    ctxt->input->cur = in;
8744 	    return((const xmlChar*) 1);
8745 	}
8746     }
8747     /*
8748      * all strings coms from the dictionary, equality can be done directly
8749      */
8750     ret = xmlParseQName (ctxt, &prefix2);
8751     if (ret == NULL)
8752         return(NULL);
8753     if ((ret == name) && (prefix == prefix2))
8754 	return((const xmlChar*) 1);
8755     return ret;
8756 }
8757 
8758 /**
8759  * xmlParseAttribute2:
8760  * @ctxt:  an XML parser context
8761  * @pref:  the element prefix
8762  * @elem:  the element name
8763  * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8764  * @value:  a xmlChar ** used to store the value of the attribute
8765  * @len:  an int * to save the length of the attribute
8766  * @alloc:  an int * to indicate if the attribute was allocated
8767  *
8768  * parse an attribute in the new SAX2 framework.
8769  *
8770  * Returns the attribute name, and the value in *value, .
8771  */
8772 
8773 static xmlHashedString
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,xmlHashedString * hprefix,xmlChar ** value,int * len,int * alloc)8774 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8775                    const xmlChar * pref, const xmlChar * elem,
8776                    xmlHashedString * hprefix, xmlChar ** value,
8777                    int *len, int *alloc)
8778 {
8779     xmlHashedString hname;
8780     const xmlChar *prefix, *name;
8781     xmlChar *val = NULL, *internal_val = NULL;
8782     int normalize = 0;
8783     int isNamespace;
8784 
8785     *value = NULL;
8786     GROW;
8787     hname = xmlParseQNameHashed(ctxt, hprefix);
8788     if (hname.name == NULL) {
8789         xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8790                        "error parsing attribute name\n");
8791         return(hname);
8792     }
8793     name = hname.name;
8794     if (hprefix->name != NULL)
8795         prefix = hprefix->name;
8796     else
8797         prefix = NULL;
8798 
8799     /*
8800      * get the type if needed
8801      */
8802     if (ctxt->attsSpecial != NULL) {
8803         int type;
8804 
8805         type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8806                                                  pref, elem,
8807                                                  prefix, name);
8808         if (type != 0)
8809             normalize = 1;
8810     }
8811 
8812     /*
8813      * read the value
8814      */
8815     SKIP_BLANKS;
8816     if (RAW == '=') {
8817         NEXT;
8818         SKIP_BLANKS;
8819         isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8820                        (prefix == ctxt->str_xmlns));
8821         val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8822                                        isNamespace);
8823         if (val == NULL)
8824             goto error;
8825     } else {
8826         xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8827                           "Specification mandates value for attribute %s\n",
8828                           name);
8829         goto error;
8830     }
8831 
8832     if (prefix == ctxt->str_xml) {
8833         /*
8834          * Check that xml:lang conforms to the specification
8835          * No more registered as an error, just generate a warning now
8836          * since this was deprecated in XML second edition
8837          */
8838         if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8839             internal_val = xmlStrndup(val, *len);
8840             if (internal_val == NULL)
8841                 goto mem_error;
8842             if (!xmlCheckLanguageID(internal_val)) {
8843                 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8844                               "Malformed value for xml:lang : %s\n",
8845                               internal_val, NULL);
8846             }
8847         }
8848 
8849         /*
8850          * Check that xml:space conforms to the specification
8851          */
8852         if (xmlStrEqual(name, BAD_CAST "space")) {
8853             internal_val = xmlStrndup(val, *len);
8854             if (internal_val == NULL)
8855                 goto mem_error;
8856             if (xmlStrEqual(internal_val, BAD_CAST "default"))
8857                 *(ctxt->space) = 0;
8858             else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8859                 *(ctxt->space) = 1;
8860             else {
8861                 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8862                               "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8863                               internal_val, NULL);
8864             }
8865         }
8866         if (internal_val) {
8867             xmlFree(internal_val);
8868         }
8869     }
8870 
8871     *value = val;
8872     return (hname);
8873 
8874 mem_error:
8875     xmlErrMemory(ctxt);
8876 error:
8877     if ((val != NULL) && (*alloc != 0))
8878         xmlFree(val);
8879     return(hname);
8880 }
8881 
8882 /**
8883  * xmlAttrHashInsert:
8884  * @ctxt: parser context
8885  * @size: size of the hash table
8886  * @name: attribute name
8887  * @uri: namespace uri
8888  * @hashValue: combined hash value of name and uri
8889  * @aindex: attribute index (this is a multiple of 5)
8890  *
8891  * Inserts a new attribute into the hash table.
8892  *
8893  * Returns INT_MAX if no existing attribute was found, the attribute
8894  * index if an attribute was found, -1 if a memory allocation failed.
8895  */
8896 static int
xmlAttrHashInsert(xmlParserCtxtPtr ctxt,unsigned size,const xmlChar * name,const xmlChar * uri,unsigned hashValue,int aindex)8897 xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8898                   const xmlChar *uri, unsigned hashValue, int aindex) {
8899     xmlAttrHashBucket *table = ctxt->attrHash;
8900     xmlAttrHashBucket *bucket;
8901     unsigned hindex;
8902 
8903     hindex = hashValue & (size - 1);
8904     bucket = &table[hindex];
8905 
8906     while (bucket->index >= 0) {
8907         const xmlChar **atts = &ctxt->atts[bucket->index];
8908 
8909         if (name == atts[0]) {
8910             int nsIndex = (int) (ptrdiff_t) atts[2];
8911 
8912             if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8913                 (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8914                 (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8915                 return(bucket->index);
8916         }
8917 
8918         hindex++;
8919         bucket++;
8920         if (hindex >= size) {
8921             hindex = 0;
8922             bucket = table;
8923         }
8924     }
8925 
8926     bucket->index = aindex;
8927 
8928     return(INT_MAX);
8929 }
8930 
8931 /**
8932  * xmlParseStartTag2:
8933  * @ctxt:  an XML parser context
8934  *
8935  * Parse a start tag. Always consumes '<'.
8936  *
8937  * This routine is called when running SAX2 parsing
8938  *
8939  * [40] STag ::= '<' Name (S Attribute)* S? '>'
8940  *
8941  * [ WFC: Unique Att Spec ]
8942  * No attribute name may appear more than once in the same start-tag or
8943  * empty-element tag.
8944  *
8945  * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8946  *
8947  * [ WFC: Unique Att Spec ]
8948  * No attribute name may appear more than once in the same start-tag or
8949  * empty-element tag.
8950  *
8951  * With namespace:
8952  *
8953  * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8954  *
8955  * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8956  *
8957  * Returns the element name parsed
8958  */
8959 
8960 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * nbNsPtr)8961 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8962                   const xmlChar **URI, int *nbNsPtr) {
8963     xmlHashedString hlocalname;
8964     xmlHashedString hprefix;
8965     xmlHashedString hattname;
8966     xmlHashedString haprefix;
8967     const xmlChar *localname;
8968     const xmlChar *prefix;
8969     const xmlChar *attname;
8970     const xmlChar *aprefix;
8971     const xmlChar *uri;
8972     xmlChar *attvalue = NULL;
8973     const xmlChar **atts = ctxt->atts;
8974     unsigned attrHashSize = 0;
8975     int maxatts = ctxt->maxatts;
8976     int nratts, nbatts, nbdef;
8977     int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8978     int alloc = 0;
8979 
8980     if (RAW != '<') return(NULL);
8981     NEXT1;
8982 
8983     nbatts = 0;
8984     nratts = 0;
8985     nbdef = 0;
8986     nbNs = 0;
8987     nbTotalDef = 0;
8988     attval = 0;
8989 
8990     if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8991         xmlErrMemory(ctxt);
8992         return(NULL);
8993     }
8994 
8995     hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8996     if (hlocalname.name == NULL) {
8997 	xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8998 		       "StartTag: invalid element name\n");
8999         return(NULL);
9000     }
9001     localname = hlocalname.name;
9002     prefix = hprefix.name;
9003 
9004     /*
9005      * Now parse the attributes, it ends up with the ending
9006      *
9007      * (S Attribute)* S?
9008      */
9009     SKIP_BLANKS;
9010     GROW;
9011 
9012     /*
9013      * The ctxt->atts array will be ultimately passed to the SAX callback
9014      * containing five xmlChar pointers for each attribute:
9015      *
9016      * [0] attribute name
9017      * [1] attribute prefix
9018      * [2] namespace URI
9019      * [3] attribute value
9020      * [4] end of attribute value
9021      *
9022      * To save memory, we reuse this array temporarily and store integers
9023      * in these pointer variables.
9024      *
9025      * [0] attribute name
9026      * [1] attribute prefix
9027      * [2] hash value of attribute prefix, and later namespace index
9028      * [3] for non-allocated values: ptrdiff_t offset into input buffer
9029      * [4] for non-allocated values: ptrdiff_t offset into input buffer
9030      *
9031      * The ctxt->attallocs array contains an additional unsigned int for
9032      * each attribute, containing the hash value of the attribute name
9033      * and the alloc flag in bit 31.
9034      */
9035 
9036     while (((RAW != '>') &&
9037 	   ((RAW != '/') || (NXT(1) != '>')) &&
9038 	   (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9039 	int len = -1;
9040 
9041 	hattname = xmlParseAttribute2(ctxt, prefix, localname,
9042                                           &haprefix, &attvalue, &len,
9043                                           &alloc);
9044         if (hattname.name == NULL)
9045 	    break;
9046         if (attvalue == NULL)
9047             goto next_attr;
9048         attname = hattname.name;
9049         aprefix = haprefix.name;
9050 	if (len < 0) len = xmlStrlen(attvalue);
9051 
9052         if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9053             xmlHashedString huri;
9054             xmlURIPtr parsedUri;
9055 
9056             huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9057             uri = huri.name;
9058             if (uri == NULL) {
9059                 xmlErrMemory(ctxt);
9060                 goto next_attr;
9061             }
9062             if (*uri != 0) {
9063                 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9064                     xmlErrMemory(ctxt);
9065                     goto next_attr;
9066                 }
9067                 if (parsedUri == NULL) {
9068                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9069                              "xmlns: '%s' is not a valid URI\n",
9070                                        uri, NULL, NULL);
9071                 } else {
9072                     if (parsedUri->scheme == NULL) {
9073                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9074                                   "xmlns: URI %s is not absolute\n",
9075                                   uri, NULL, NULL);
9076                     }
9077                     xmlFreeURI(parsedUri);
9078                 }
9079                 if (uri == ctxt->str_xml_ns) {
9080                     if (attname != ctxt->str_xml) {
9081                         xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9082                      "xml namespace URI cannot be the default namespace\n",
9083                                  NULL, NULL, NULL);
9084                     }
9085                     goto next_attr;
9086                 }
9087                 if ((len == 29) &&
9088                     (xmlStrEqual(uri,
9089                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9090                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9091                          "reuse of the xmlns namespace name is forbidden\n",
9092                              NULL, NULL, NULL);
9093                     goto next_attr;
9094                 }
9095             }
9096 
9097             if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9098                 nbNs++;
9099         } else if (aprefix == ctxt->str_xmlns) {
9100             xmlHashedString huri;
9101             xmlURIPtr parsedUri;
9102 
9103             huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9104             uri = huri.name;
9105             if (uri == NULL) {
9106                 xmlErrMemory(ctxt);
9107                 goto next_attr;
9108             }
9109 
9110             if (attname == ctxt->str_xml) {
9111                 if (uri != ctxt->str_xml_ns) {
9112                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9113                              "xml namespace prefix mapped to wrong URI\n",
9114                              NULL, NULL, NULL);
9115                 }
9116                 /*
9117                  * Do not keep a namespace definition node
9118                  */
9119                 goto next_attr;
9120             }
9121             if (uri == ctxt->str_xml_ns) {
9122                 if (attname != ctxt->str_xml) {
9123                     xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9124                              "xml namespace URI mapped to wrong prefix\n",
9125                              NULL, NULL, NULL);
9126                 }
9127                 goto next_attr;
9128             }
9129             if (attname == ctxt->str_xmlns) {
9130                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9131                          "redefinition of the xmlns prefix is forbidden\n",
9132                          NULL, NULL, NULL);
9133                 goto next_attr;
9134             }
9135             if ((len == 29) &&
9136                 (xmlStrEqual(uri,
9137                              BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9138                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9139                          "reuse of the xmlns namespace name is forbidden\n",
9140                          NULL, NULL, NULL);
9141                 goto next_attr;
9142             }
9143             if ((uri == NULL) || (uri[0] == 0)) {
9144                 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9145                          "xmlns:%s: Empty XML namespace is not allowed\n",
9146                               attname, NULL, NULL);
9147                 goto next_attr;
9148             } else {
9149                 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9150                     xmlErrMemory(ctxt);
9151                     goto next_attr;
9152                 }
9153                 if (parsedUri == NULL) {
9154                     xmlNsErr(ctxt, XML_WAR_NS_URI,
9155                          "xmlns:%s: '%s' is not a valid URI\n",
9156                                        attname, uri, NULL);
9157                 } else {
9158                     if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9159                         xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9160                                   "xmlns:%s: URI %s is not absolute\n",
9161                                   attname, uri, NULL);
9162                     }
9163                     xmlFreeURI(parsedUri);
9164                 }
9165             }
9166 
9167             if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9168                 nbNs++;
9169         } else {
9170             /*
9171              * Populate attributes array, see above for repurposing
9172              * of xmlChar pointers.
9173              */
9174             if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9175                 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9176                     goto next_attr;
9177                 }
9178                 maxatts = ctxt->maxatts;
9179                 atts = ctxt->atts;
9180             }
9181             ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9182                                         ((unsigned) alloc << 31);
9183             atts[nbatts++] = attname;
9184             atts[nbatts++] = aprefix;
9185             atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9186             if (alloc) {
9187                 atts[nbatts++] = attvalue;
9188                 attvalue += len;
9189                 atts[nbatts++] = attvalue;
9190             } else {
9191                 /*
9192                  * attvalue points into the input buffer which can be
9193                  * reallocated. Store differences to input->base instead.
9194                  * The pointers will be reconstructed later.
9195                  */
9196                 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9197                 attvalue += len;
9198                 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9199             }
9200             /*
9201              * tag if some deallocation is needed
9202              */
9203             if (alloc != 0) attval = 1;
9204             attvalue = NULL; /* moved into atts */
9205         }
9206 
9207 next_attr:
9208         if ((attvalue != NULL) && (alloc != 0)) {
9209             xmlFree(attvalue);
9210             attvalue = NULL;
9211         }
9212 
9213 	GROW
9214 	if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9215 	    break;
9216 	if (SKIP_BLANKS == 0) {
9217 	    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9218 			   "attributes construct error\n");
9219 	    break;
9220 	}
9221         GROW;
9222     }
9223 
9224     /*
9225      * Namespaces from default attributes
9226      */
9227     if (ctxt->attsDefault != NULL) {
9228         xmlDefAttrsPtr defaults;
9229 
9230 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9231 	if (defaults != NULL) {
9232 	    for (i = 0; i < defaults->nbAttrs; i++) {
9233                 xmlDefAttr *attr = &defaults->attrs[i];
9234 
9235 	        attname = attr->name.name;
9236 		aprefix = attr->prefix.name;
9237 
9238 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9239                     xmlParserEntityCheck(ctxt, attr->expandedSize);
9240 
9241                     if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9242                         nbNs++;
9243 		} else if (aprefix == ctxt->str_xmlns) {
9244                     xmlParserEntityCheck(ctxt, attr->expandedSize);
9245 
9246                     if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9247                                       NULL, 1) > 0)
9248                         nbNs++;
9249 		} else {
9250                     nbTotalDef += 1;
9251                 }
9252 	    }
9253 	}
9254     }
9255 
9256     /*
9257      * Resolve attribute namespaces
9258      */
9259     for (i = 0; i < nbatts; i += 5) {
9260         attname = atts[i];
9261         aprefix = atts[i+1];
9262 
9263         /*
9264 	* The default namespace does not apply to attribute names.
9265 	*/
9266 	if (aprefix == NULL) {
9267             nsIndex = NS_INDEX_EMPTY;
9268         } else if (aprefix == ctxt->str_xml) {
9269             nsIndex = NS_INDEX_XML;
9270         } else {
9271             haprefix.name = aprefix;
9272             haprefix.hashValue = (size_t) atts[i+2];
9273             nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9274 
9275 	    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9276                 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9277 		    "Namespace prefix %s for %s on %s is not defined\n",
9278 		    aprefix, attname, localname);
9279                 nsIndex = NS_INDEX_EMPTY;
9280             }
9281         }
9282 
9283         atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9284     }
9285 
9286     /*
9287      * Maximum number of attributes including default attributes.
9288      */
9289     maxAtts = nratts + nbTotalDef;
9290 
9291     /*
9292      * Verify that attribute names are unique.
9293      */
9294     if (maxAtts > 1) {
9295         attrHashSize = 4;
9296         while (attrHashSize / 2 < (unsigned) maxAtts)
9297             attrHashSize *= 2;
9298 
9299         if (attrHashSize > ctxt->attrHashMax) {
9300             xmlAttrHashBucket *tmp;
9301 
9302             tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9303             if (tmp == NULL) {
9304                 xmlErrMemory(ctxt);
9305                 goto done;
9306             }
9307 
9308             ctxt->attrHash = tmp;
9309             ctxt->attrHashMax = attrHashSize;
9310         }
9311 
9312         memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9313 
9314         for (i = 0, j = 0; j < nratts; i += 5, j++) {
9315             const xmlChar *nsuri;
9316             unsigned hashValue, nameHashValue, uriHashValue;
9317             int res;
9318 
9319             attname = atts[i];
9320             aprefix = atts[i+1];
9321             nsIndex = (ptrdiff_t) atts[i+2];
9322             /* Hash values always have bit 31 set, see dict.c */
9323             nameHashValue = ctxt->attallocs[j] | 0x80000000;
9324 
9325             if (nsIndex == NS_INDEX_EMPTY) {
9326                 /*
9327                  * Prefix with empty namespace means an undeclared
9328                  * prefix which was already reported above.
9329                  */
9330                 if (aprefix != NULL)
9331                     continue;
9332                 nsuri = NULL;
9333                 uriHashValue = URI_HASH_EMPTY;
9334             } else if (nsIndex == NS_INDEX_XML) {
9335                 nsuri = ctxt->str_xml_ns;
9336                 uriHashValue = URI_HASH_XML;
9337             } else {
9338                 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9339                 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9340             }
9341 
9342             hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9343             res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9344                                     hashValue, i);
9345             if (res < 0)
9346                 continue;
9347 
9348             /*
9349              * [ WFC: Unique Att Spec ]
9350              * No attribute name may appear more than once in the same
9351              * start-tag or empty-element tag.
9352              * As extended by the Namespace in XML REC.
9353              */
9354             if (res < INT_MAX) {
9355                 if (aprefix == atts[res+1]) {
9356                     xmlErrAttributeDup(ctxt, aprefix, attname);
9357                 } else {
9358                     xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9359                              "Namespaced Attribute %s in '%s' redefined\n",
9360                              attname, nsuri, NULL);
9361                 }
9362             }
9363         }
9364     }
9365 
9366     /*
9367      * Default attributes
9368      */
9369     if (ctxt->attsDefault != NULL) {
9370         xmlDefAttrsPtr defaults;
9371 
9372 	defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9373 	if (defaults != NULL) {
9374 	    for (i = 0; i < defaults->nbAttrs; i++) {
9375                 xmlDefAttr *attr = &defaults->attrs[i];
9376                 const xmlChar *nsuri = NULL;
9377                 unsigned hashValue, uriHashValue = 0;
9378                 int res;
9379 
9380 	        attname = attr->name.name;
9381 		aprefix = attr->prefix.name;
9382 
9383 		if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9384                     continue;
9385 		if (aprefix == ctxt->str_xmlns)
9386                     continue;
9387 
9388                 if (aprefix == NULL) {
9389                     nsIndex = NS_INDEX_EMPTY;
9390                     nsuri = NULL;
9391                     uriHashValue = URI_HASH_EMPTY;
9392                 } else if (aprefix == ctxt->str_xml) {
9393                     nsIndex = NS_INDEX_XML;
9394                     nsuri = ctxt->str_xml_ns;
9395                     uriHashValue = URI_HASH_XML;
9396                 } else {
9397                     nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9398                     if ((nsIndex == INT_MAX) ||
9399                         (nsIndex < ctxt->nsdb->minNsIndex)) {
9400                         xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9401                                  "Namespace prefix %s for %s on %s is not "
9402                                  "defined\n",
9403                                  aprefix, attname, localname);
9404                         nsIndex = NS_INDEX_EMPTY;
9405                         nsuri = NULL;
9406                         uriHashValue = URI_HASH_EMPTY;
9407                     } else {
9408                         nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9409                         uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9410                     }
9411                 }
9412 
9413                 /*
9414                  * Check whether the attribute exists
9415                  */
9416                 if (maxAtts > 1) {
9417                     hashValue = xmlDictCombineHash(attr->name.hashValue,
9418                                                    uriHashValue);
9419                     res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9420                                             hashValue, nbatts);
9421                     if (res < 0)
9422                         continue;
9423                     if (res < INT_MAX) {
9424                         if (aprefix == atts[res+1])
9425                             continue;
9426                         xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9427                                  "Namespaced Attribute %s in '%s' redefined\n",
9428                                  attname, nsuri, NULL);
9429                     }
9430                 }
9431 
9432                 xmlParserEntityCheck(ctxt, attr->expandedSize);
9433 
9434                 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9435                     if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9436                         localname = NULL;
9437                         goto done;
9438                     }
9439                     maxatts = ctxt->maxatts;
9440                     atts = ctxt->atts;
9441                 }
9442 
9443                 atts[nbatts++] = attname;
9444                 atts[nbatts++] = aprefix;
9445                 atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9446                 atts[nbatts++] = attr->value.name;
9447                 atts[nbatts++] = attr->valueEnd;
9448                 if ((ctxt->standalone == 1) && (attr->external != 0)) {
9449                     xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9450                             "standalone: attribute %s on %s defaulted "
9451                             "from external subset\n",
9452                             attname, localname);
9453                 }
9454                 nbdef++;
9455 	    }
9456 	}
9457     }
9458 
9459     /*
9460      * Reconstruct attribute pointers
9461      */
9462     for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9463         /* namespace URI */
9464         nsIndex = (ptrdiff_t) atts[i+2];
9465         if (nsIndex == INT_MAX)
9466             atts[i+2] = NULL;
9467         else if (nsIndex == INT_MAX - 1)
9468             atts[i+2] = ctxt->str_xml_ns;
9469         else
9470             atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9471 
9472         if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9473             atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3];  /* value */
9474             atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4];  /* valuend */
9475         }
9476     }
9477 
9478     uri = xmlParserNsLookupUri(ctxt, &hprefix);
9479     if ((prefix != NULL) && (uri == NULL)) {
9480 	xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9481 	         "Namespace prefix %s on %s is not defined\n",
9482 		 prefix, localname, NULL);
9483     }
9484     *pref = prefix;
9485     *URI = uri;
9486 
9487     /*
9488      * SAX callback
9489      */
9490     if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9491 	(!ctxt->disableSAX)) {
9492 	if (nbNs > 0)
9493 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9494                           nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9495 			  nbatts / 5, nbdef, atts);
9496 	else
9497 	    ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9498                           0, NULL, nbatts / 5, nbdef, atts);
9499     }
9500 
9501 done:
9502     /*
9503      * Free allocated attribute values
9504      */
9505     if (attval != 0) {
9506 	for (i = 0, j = 0; j < nratts; i += 5, j++)
9507 	    if (ctxt->attallocs[j] & 0x80000000)
9508 	        xmlFree((xmlChar *) atts[i+3]);
9509     }
9510 
9511     *nbNsPtr = nbNs;
9512     return(localname);
9513 }
9514 
9515 /**
9516  * xmlParseEndTag2:
9517  * @ctxt:  an XML parser context
9518  * @line:  line of the start tag
9519  * @nsNr:  number of namespaces on the start tag
9520  *
9521  * Parse an end tag. Always consumes '</'.
9522  *
9523  * [42] ETag ::= '</' Name S? '>'
9524  *
9525  * With namespace
9526  *
9527  * [NS 9] ETag ::= '</' QName S? '>'
9528  */
9529 
9530 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9531 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9532     const xmlChar *name;
9533 
9534     GROW;
9535     if ((RAW != '<') || (NXT(1) != '/')) {
9536 	xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9537 	return;
9538     }
9539     SKIP(2);
9540 
9541     if (tag->prefix == NULL)
9542         name = xmlParseNameAndCompare(ctxt, ctxt->name);
9543     else
9544         name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9545 
9546     /*
9547      * We should definitely be at the ending "S? '>'" part
9548      */
9549     GROW;
9550     SKIP_BLANKS;
9551     if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9552 	xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9553     } else
9554 	NEXT1;
9555 
9556     /*
9557      * [ WFC: Element Type Match ]
9558      * The Name in an element's end-tag must match the element type in the
9559      * start-tag.
9560      *
9561      */
9562     if (name != (xmlChar*)1) {
9563         if (name == NULL) name = BAD_CAST "unparsable";
9564         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9565 		     "Opening and ending tag mismatch: %s line %d and %s\n",
9566 		                ctxt->name, tag->line, name);
9567     }
9568 
9569     /*
9570      * SAX: End of Tag
9571      */
9572     if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9573 	(!ctxt->disableSAX))
9574 	ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9575                                 tag->URI);
9576 
9577     spacePop(ctxt);
9578     if (tag->nsNr != 0)
9579 	xmlParserNsPop(ctxt, tag->nsNr);
9580 }
9581 
9582 /**
9583  * xmlParseCDSect:
9584  * @ctxt:  an XML parser context
9585  *
9586  * DEPRECATED: Internal function, don't use.
9587  *
9588  * Parse escaped pure raw content. Always consumes '<!['.
9589  *
9590  * [18] CDSect ::= CDStart CData CDEnd
9591  *
9592  * [19] CDStart ::= '<![CDATA['
9593  *
9594  * [20] Data ::= (Char* - (Char* ']]>' Char*))
9595  *
9596  * [21] CDEnd ::= ']]>'
9597  */
9598 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9599 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9600     xmlChar *buf = NULL;
9601     int len = 0;
9602     int size = XML_PARSER_BUFFER_SIZE;
9603     int r, rl;
9604     int	s, sl;
9605     int cur, l;
9606     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9607                     XML_MAX_HUGE_LENGTH :
9608                     XML_MAX_TEXT_LENGTH;
9609 
9610     if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9611         return;
9612     SKIP(3);
9613 
9614     if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9615         return;
9616     SKIP(6);
9617 
9618     r = xmlCurrentCharRecover(ctxt, &rl);
9619     if (!IS_CHAR(r)) {
9620 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9621         goto out;
9622     }
9623     NEXTL(rl);
9624     s = xmlCurrentCharRecover(ctxt, &sl);
9625     if (!IS_CHAR(s)) {
9626 	xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9627         goto out;
9628     }
9629     NEXTL(sl);
9630     cur = xmlCurrentCharRecover(ctxt, &l);
9631     buf = xmlMalloc(size);
9632     if (buf == NULL) {
9633 	xmlErrMemory(ctxt);
9634         goto out;
9635     }
9636     while (IS_CHAR(cur) &&
9637            ((r != ']') || (s != ']') || (cur != '>'))) {
9638 	if (len + 5 >= size) {
9639 	    xmlChar *tmp;
9640 
9641 	    tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9642 	    if (tmp == NULL) {
9643 		xmlErrMemory(ctxt);
9644                 goto out;
9645 	    }
9646 	    buf = tmp;
9647 	    size *= 2;
9648 	}
9649 	COPY_BUF(buf, len, r);
9650         if (len > maxLength) {
9651             xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9652                            "CData section too big found\n");
9653             goto out;
9654         }
9655 	r = s;
9656 	rl = sl;
9657 	s = cur;
9658 	sl = l;
9659 	NEXTL(l);
9660 	cur = xmlCurrentCharRecover(ctxt, &l);
9661     }
9662     buf[len] = 0;
9663     if (cur != '>') {
9664 	xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9665 	                     "CData section not finished\n%.50s\n", buf);
9666         goto out;
9667     }
9668     NEXTL(l);
9669 
9670     /*
9671      * OK the buffer is to be consumed as cdata.
9672      */
9673     if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9674 	if (ctxt->sax->cdataBlock != NULL)
9675 	    ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9676 	else if (ctxt->sax->characters != NULL)
9677 	    ctxt->sax->characters(ctxt->userData, buf, len);
9678     }
9679 
9680 out:
9681     xmlFree(buf);
9682 }
9683 
9684 /**
9685  * xmlParseContentInternal:
9686  * @ctxt:  an XML parser context
9687  *
9688  * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9689  * unexpected EOF to the caller.
9690  */
9691 
9692 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9693 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9694     int oldNameNr = ctxt->nameNr;
9695     int oldSpaceNr = ctxt->spaceNr;
9696     int oldNodeNr = ctxt->nodeNr;
9697 
9698     GROW;
9699     while ((ctxt->input->cur < ctxt->input->end) &&
9700 	   (PARSER_STOPPED(ctxt) == 0)) {
9701 	const xmlChar *cur = ctxt->input->cur;
9702 
9703 	/*
9704 	 * First case : a Processing Instruction.
9705 	 */
9706 	if ((*cur == '<') && (cur[1] == '?')) {
9707 	    xmlParsePI(ctxt);
9708 	}
9709 
9710 	/*
9711 	 * Second case : a CDSection
9712 	 */
9713 	/* 2.6.0 test was *cur not RAW */
9714 	else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9715 	    xmlParseCDSect(ctxt);
9716 	}
9717 
9718 	/*
9719 	 * Third case :  a comment
9720 	 */
9721 	else if ((*cur == '<') && (NXT(1) == '!') &&
9722 		 (NXT(2) == '-') && (NXT(3) == '-')) {
9723 	    xmlParseComment(ctxt);
9724 	}
9725 
9726 	/*
9727 	 * Fourth case :  a sub-element.
9728 	 */
9729 	else if (*cur == '<') {
9730             if (NXT(1) == '/') {
9731                 if (ctxt->nameNr <= oldNameNr)
9732                     break;
9733 	        xmlParseElementEnd(ctxt);
9734             } else {
9735 	        xmlParseElementStart(ctxt);
9736             }
9737 	}
9738 
9739 	/*
9740 	 * Fifth case : a reference. If if has not been resolved,
9741 	 *    parsing returns it's Name, create the node
9742 	 */
9743 
9744 	else if (*cur == '&') {
9745 	    xmlParseReference(ctxt);
9746 	}
9747 
9748 	/*
9749 	 * Last case, text. Note that References are handled directly.
9750 	 */
9751 	else {
9752 	    xmlParseCharDataInternal(ctxt, 0);
9753 	}
9754 
9755 	SHRINK;
9756 	GROW;
9757     }
9758 
9759     if ((ctxt->nameNr > oldNameNr) &&
9760         (ctxt->input->cur >= ctxt->input->end) &&
9761         (ctxt->wellFormed)) {
9762         const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9763         int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9764         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9765                 "Premature end of data in tag %s line %d\n",
9766                 name, line, NULL);
9767     }
9768 
9769     /*
9770      * Clean up in error case
9771      */
9772 
9773     while (ctxt->nodeNr > oldNodeNr)
9774         nodePop(ctxt);
9775 
9776     while (ctxt->nameNr > oldNameNr) {
9777         xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9778 
9779         if (tag->nsNr != 0)
9780             xmlParserNsPop(ctxt, tag->nsNr);
9781 
9782         namePop(ctxt);
9783     }
9784 
9785     while (ctxt->spaceNr > oldSpaceNr)
9786         spacePop(ctxt);
9787 }
9788 
9789 /**
9790  * xmlParseContent:
9791  * @ctxt:  an XML parser context
9792  *
9793  * Parse XML element content. This is useful if you're only interested
9794  * in custom SAX callbacks. If you want a node list, use
9795  * xmlCtxtParseContent.
9796  */
9797 void
xmlParseContent(xmlParserCtxtPtr ctxt)9798 xmlParseContent(xmlParserCtxtPtr ctxt) {
9799     if ((ctxt == NULL) || (ctxt->input == NULL))
9800         return;
9801 
9802     xmlCtxtInitializeLate(ctxt);
9803 
9804     xmlParseContentInternal(ctxt);
9805 
9806     if (ctxt->input->cur < ctxt->input->end)
9807 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9808 }
9809 
9810 /**
9811  * xmlParseElement:
9812  * @ctxt:  an XML parser context
9813  *
9814  * DEPRECATED: Internal function, don't use.
9815  *
9816  * parse an XML element
9817  *
9818  * [39] element ::= EmptyElemTag | STag content ETag
9819  *
9820  * [ WFC: Element Type Match ]
9821  * The Name in an element's end-tag must match the element type in the
9822  * start-tag.
9823  *
9824  */
9825 
9826 void
xmlParseElement(xmlParserCtxtPtr ctxt)9827 xmlParseElement(xmlParserCtxtPtr ctxt) {
9828     if (xmlParseElementStart(ctxt) != 0)
9829         return;
9830 
9831     xmlParseContentInternal(ctxt);
9832 
9833     if (ctxt->input->cur >= ctxt->input->end) {
9834         if (ctxt->wellFormed) {
9835             const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9836             int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9837             xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9838                     "Premature end of data in tag %s line %d\n",
9839                     name, line, NULL);
9840         }
9841         return;
9842     }
9843 
9844     xmlParseElementEnd(ctxt);
9845 }
9846 
9847 /**
9848  * xmlParseElementStart:
9849  * @ctxt:  an XML parser context
9850  *
9851  * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9852  * opening tag was parsed, 1 if an empty element was parsed.
9853  *
9854  * Always consumes '<'.
9855  */
9856 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)9857 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9858     int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9859     const xmlChar *name;
9860     const xmlChar *prefix = NULL;
9861     const xmlChar *URI = NULL;
9862     xmlParserNodeInfo node_info;
9863     int line;
9864     xmlNodePtr cur;
9865     int nbNs = 0;
9866 
9867     if (ctxt->nameNr > maxDepth) {
9868         xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9869                 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9870                 ctxt->nameNr);
9871 	xmlHaltParser(ctxt);
9872 	return(-1);
9873     }
9874 
9875     /* Capture start position */
9876     if (ctxt->record_info) {
9877         node_info.begin_pos = ctxt->input->consumed +
9878                           (CUR_PTR - ctxt->input->base);
9879 	node_info.begin_line = ctxt->input->line;
9880     }
9881 
9882     if (ctxt->spaceNr == 0)
9883 	spacePush(ctxt, -1);
9884     else if (*ctxt->space == -2)
9885 	spacePush(ctxt, -1);
9886     else
9887 	spacePush(ctxt, *ctxt->space);
9888 
9889     line = ctxt->input->line;
9890 #ifdef LIBXML_SAX1_ENABLED
9891     if (ctxt->sax2)
9892 #endif /* LIBXML_SAX1_ENABLED */
9893         name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9894 #ifdef LIBXML_SAX1_ENABLED
9895     else
9896 	name = xmlParseStartTag(ctxt);
9897 #endif /* LIBXML_SAX1_ENABLED */
9898     if (name == NULL) {
9899 	spacePop(ctxt);
9900         return(-1);
9901     }
9902     nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9903     cur = ctxt->node;
9904 
9905 #ifdef LIBXML_VALID_ENABLED
9906     /*
9907      * [ VC: Root Element Type ]
9908      * The Name in the document type declaration must match the element
9909      * type of the root element.
9910      */
9911     if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9912         ctxt->node && (ctxt->node == ctxt->myDoc->children))
9913         ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9914 #endif /* LIBXML_VALID_ENABLED */
9915 
9916     /*
9917      * Check for an Empty Element.
9918      */
9919     if ((RAW == '/') && (NXT(1) == '>')) {
9920         SKIP(2);
9921 	if (ctxt->sax2) {
9922 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9923 		(!ctxt->disableSAX))
9924 		ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9925 #ifdef LIBXML_SAX1_ENABLED
9926 	} else {
9927 	    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9928 		(!ctxt->disableSAX))
9929 		ctxt->sax->endElement(ctxt->userData, name);
9930 #endif /* LIBXML_SAX1_ENABLED */
9931 	}
9932 	namePop(ctxt);
9933 	spacePop(ctxt);
9934 	if (nbNs > 0)
9935 	    xmlParserNsPop(ctxt, nbNs);
9936 	if (cur != NULL && ctxt->record_info) {
9937             node_info.node = cur;
9938             node_info.end_pos = ctxt->input->consumed +
9939                                 (CUR_PTR - ctxt->input->base);
9940             node_info.end_line = ctxt->input->line;
9941             xmlParserAddNodeInfo(ctxt, &node_info);
9942 	}
9943 	return(1);
9944     }
9945     if (RAW == '>') {
9946         NEXT1;
9947         if (cur != NULL && ctxt->record_info) {
9948             node_info.node = cur;
9949             node_info.end_pos = 0;
9950             node_info.end_line = 0;
9951             xmlParserAddNodeInfo(ctxt, &node_info);
9952         }
9953     } else {
9954         xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9955 		     "Couldn't find end of Start Tag %s line %d\n",
9956 		                name, line, NULL);
9957 
9958 	/*
9959 	 * end of parsing of this node.
9960 	 */
9961 	nodePop(ctxt);
9962 	namePop(ctxt);
9963 	spacePop(ctxt);
9964 	if (nbNs > 0)
9965 	    xmlParserNsPop(ctxt, nbNs);
9966 	return(-1);
9967     }
9968 
9969     return(0);
9970 }
9971 
9972 /**
9973  * xmlParseElementEnd:
9974  * @ctxt:  an XML parser context
9975  *
9976  * Parse the end of an XML element. Always consumes '</'.
9977  */
9978 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)9979 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9980     xmlNodePtr cur = ctxt->node;
9981 
9982     if (ctxt->nameNr <= 0) {
9983         if ((RAW == '<') && (NXT(1) == '/'))
9984             SKIP(2);
9985         return;
9986     }
9987 
9988     /*
9989      * parse the end of tag: '</' should be here.
9990      */
9991     if (ctxt->sax2) {
9992 	xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9993 	namePop(ctxt);
9994     }
9995 #ifdef LIBXML_SAX1_ENABLED
9996     else
9997 	xmlParseEndTag1(ctxt, 0);
9998 #endif /* LIBXML_SAX1_ENABLED */
9999 
10000     /*
10001      * Capture end position
10002      */
10003     if (cur != NULL && ctxt->record_info) {
10004         xmlParserNodeInfoPtr node_info;
10005 
10006         node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10007         if (node_info != NULL) {
10008             node_info->end_pos = ctxt->input->consumed +
10009                                  (CUR_PTR - ctxt->input->base);
10010             node_info->end_line = ctxt->input->line;
10011         }
10012     }
10013 }
10014 
10015 /**
10016  * xmlParseVersionNum:
10017  * @ctxt:  an XML parser context
10018  *
10019  * DEPRECATED: Internal function, don't use.
10020  *
10021  * parse the XML version value.
10022  *
10023  * [26] VersionNum ::= '1.' [0-9]+
10024  *
10025  * In practice allow [0-9].[0-9]+ at that level
10026  *
10027  * Returns the string giving the XML version number, or NULL
10028  */
10029 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10030 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10031     xmlChar *buf = NULL;
10032     int len = 0;
10033     int size = 10;
10034     xmlChar cur;
10035 
10036     buf = xmlMalloc(size);
10037     if (buf == NULL) {
10038 	xmlErrMemory(ctxt);
10039 	return(NULL);
10040     }
10041     cur = CUR;
10042     if (!((cur >= '0') && (cur <= '9'))) {
10043 	xmlFree(buf);
10044 	return(NULL);
10045     }
10046     buf[len++] = cur;
10047     NEXT;
10048     cur=CUR;
10049     if (cur != '.') {
10050 	xmlFree(buf);
10051 	return(NULL);
10052     }
10053     buf[len++] = cur;
10054     NEXT;
10055     cur=CUR;
10056     while ((cur >= '0') && (cur <= '9')) {
10057 	if (len + 1 >= size) {
10058 	    xmlChar *tmp;
10059 
10060 	    size *= 2;
10061 	    tmp = (xmlChar *) xmlRealloc(buf, size);
10062 	    if (tmp == NULL) {
10063 	        xmlFree(buf);
10064 		xmlErrMemory(ctxt);
10065 		return(NULL);
10066 	    }
10067 	    buf = tmp;
10068 	}
10069 	buf[len++] = cur;
10070 	NEXT;
10071 	cur=CUR;
10072     }
10073     buf[len] = 0;
10074     return(buf);
10075 }
10076 
10077 /**
10078  * xmlParseVersionInfo:
10079  * @ctxt:  an XML parser context
10080  *
10081  * DEPRECATED: Internal function, don't use.
10082  *
10083  * parse the XML version.
10084  *
10085  * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10086  *
10087  * [25] Eq ::= S? '=' S?
10088  *
10089  * Returns the version string, e.g. "1.0"
10090  */
10091 
10092 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10093 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10094     xmlChar *version = NULL;
10095 
10096     if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10097 	SKIP(7);
10098 	SKIP_BLANKS;
10099 	if (RAW != '=') {
10100 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10101 	    return(NULL);
10102         }
10103 	NEXT;
10104 	SKIP_BLANKS;
10105 	if (RAW == '"') {
10106 	    NEXT;
10107 	    version = xmlParseVersionNum(ctxt);
10108 	    if (RAW != '"') {
10109 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10110 	    } else
10111 	        NEXT;
10112 	} else if (RAW == '\''){
10113 	    NEXT;
10114 	    version = xmlParseVersionNum(ctxt);
10115 	    if (RAW != '\'') {
10116 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10117 	    } else
10118 	        NEXT;
10119 	} else {
10120 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10121 	}
10122     }
10123     return(version);
10124 }
10125 
10126 /**
10127  * xmlParseEncName:
10128  * @ctxt:  an XML parser context
10129  *
10130  * DEPRECATED: Internal function, don't use.
10131  *
10132  * parse the XML encoding name
10133  *
10134  * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10135  *
10136  * Returns the encoding name value or NULL
10137  */
10138 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10139 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10140     xmlChar *buf = NULL;
10141     int len = 0;
10142     int size = 10;
10143     int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10144                     XML_MAX_TEXT_LENGTH :
10145                     XML_MAX_NAME_LENGTH;
10146     xmlChar cur;
10147 
10148     cur = CUR;
10149     if (((cur >= 'a') && (cur <= 'z')) ||
10150         ((cur >= 'A') && (cur <= 'Z'))) {
10151 	buf = xmlMalloc(size);
10152 	if (buf == NULL) {
10153 	    xmlErrMemory(ctxt);
10154 	    return(NULL);
10155 	}
10156 
10157 	buf[len++] = cur;
10158 	NEXT;
10159 	cur = CUR;
10160 	while (((cur >= 'a') && (cur <= 'z')) ||
10161 	       ((cur >= 'A') && (cur <= 'Z')) ||
10162 	       ((cur >= '0') && (cur <= '9')) ||
10163 	       (cur == '.') || (cur == '_') ||
10164 	       (cur == '-')) {
10165 	    if (len + 1 >= size) {
10166 	        xmlChar *tmp;
10167 
10168 		size *= 2;
10169 		tmp = (xmlChar *) xmlRealloc(buf, size);
10170 		if (tmp == NULL) {
10171 		    xmlErrMemory(ctxt);
10172 		    xmlFree(buf);
10173 		    return(NULL);
10174 		}
10175 		buf = tmp;
10176 	    }
10177 	    buf[len++] = cur;
10178             if (len > maxLength) {
10179                 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10180                 xmlFree(buf);
10181                 return(NULL);
10182             }
10183 	    NEXT;
10184 	    cur = CUR;
10185         }
10186 	buf[len] = 0;
10187     } else {
10188 	xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10189     }
10190     return(buf);
10191 }
10192 
10193 /**
10194  * xmlParseEncodingDecl:
10195  * @ctxt:  an XML parser context
10196  *
10197  * DEPRECATED: Internal function, don't use.
10198  *
10199  * parse the XML encoding declaration
10200  *
10201  * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10202  *
10203  * this setups the conversion filters.
10204  *
10205  * Returns the encoding value or NULL
10206  */
10207 
10208 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10209 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10210     xmlChar *encoding = NULL;
10211 
10212     SKIP_BLANKS;
10213     if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10214         return(NULL);
10215 
10216     SKIP(8);
10217     SKIP_BLANKS;
10218     if (RAW != '=') {
10219         xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10220         return(NULL);
10221     }
10222     NEXT;
10223     SKIP_BLANKS;
10224     if (RAW == '"') {
10225         NEXT;
10226         encoding = xmlParseEncName(ctxt);
10227         if (RAW != '"') {
10228             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10229             xmlFree((xmlChar *) encoding);
10230             return(NULL);
10231         } else
10232             NEXT;
10233     } else if (RAW == '\''){
10234         NEXT;
10235         encoding = xmlParseEncName(ctxt);
10236         if (RAW != '\'') {
10237             xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10238             xmlFree((xmlChar *) encoding);
10239             return(NULL);
10240         } else
10241             NEXT;
10242     } else {
10243         xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10244     }
10245 
10246     if (encoding == NULL)
10247         return(NULL);
10248 
10249     xmlSetDeclaredEncoding(ctxt, encoding);
10250 
10251     return(ctxt->encoding);
10252 }
10253 
10254 /**
10255  * xmlParseSDDecl:
10256  * @ctxt:  an XML parser context
10257  *
10258  * DEPRECATED: Internal function, don't use.
10259  *
10260  * parse the XML standalone declaration
10261  *
10262  * [32] SDDecl ::= S 'standalone' Eq
10263  *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10264  *
10265  * [ VC: Standalone Document Declaration ]
10266  * TODO The standalone document declaration must have the value "no"
10267  * if any external markup declarations contain declarations of:
10268  *  - attributes with default values, if elements to which these
10269  *    attributes apply appear in the document without specifications
10270  *    of values for these attributes, or
10271  *  - entities (other than amp, lt, gt, apos, quot), if references
10272  *    to those entities appear in the document, or
10273  *  - attributes with values subject to normalization, where the
10274  *    attribute appears in the document with a value which will change
10275  *    as a result of normalization, or
10276  *  - element types with element content, if white space occurs directly
10277  *    within any instance of those types.
10278  *
10279  * Returns:
10280  *   1 if standalone="yes"
10281  *   0 if standalone="no"
10282  *  -2 if standalone attribute is missing or invalid
10283  *	  (A standalone value of -2 means that the XML declaration was found,
10284  *	   but no value was specified for the standalone attribute).
10285  */
10286 
10287 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10288 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10289     int standalone = -2;
10290 
10291     SKIP_BLANKS;
10292     if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10293 	SKIP(10);
10294         SKIP_BLANKS;
10295 	if (RAW != '=') {
10296 	    xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10297 	    return(standalone);
10298         }
10299 	NEXT;
10300 	SKIP_BLANKS;
10301         if (RAW == '\''){
10302 	    NEXT;
10303 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10304 	        standalone = 0;
10305                 SKIP(2);
10306 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10307 	               (NXT(2) == 's')) {
10308 	        standalone = 1;
10309 		SKIP(3);
10310             } else {
10311 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10312 	    }
10313 	    if (RAW != '\'') {
10314 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10315 	    } else
10316 	        NEXT;
10317 	} else if (RAW == '"'){
10318 	    NEXT;
10319 	    if ((RAW == 'n') && (NXT(1) == 'o')) {
10320 	        standalone = 0;
10321 		SKIP(2);
10322 	    } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10323 	               (NXT(2) == 's')) {
10324 	        standalone = 1;
10325                 SKIP(3);
10326             } else {
10327 		xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10328 	    }
10329 	    if (RAW != '"') {
10330 		xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10331 	    } else
10332 	        NEXT;
10333 	} else {
10334 	    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10335         }
10336     }
10337     return(standalone);
10338 }
10339 
10340 /**
10341  * xmlParseXMLDecl:
10342  * @ctxt:  an XML parser context
10343  *
10344  * DEPRECATED: Internal function, don't use.
10345  *
10346  * parse an XML declaration header
10347  *
10348  * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10349  */
10350 
10351 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10352 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10353     xmlChar *version;
10354 
10355     /*
10356      * This value for standalone indicates that the document has an
10357      * XML declaration but it does not have a standalone attribute.
10358      * It will be overwritten later if a standalone attribute is found.
10359      */
10360 
10361     ctxt->standalone = -2;
10362 
10363     /*
10364      * We know that '<?xml' is here.
10365      */
10366     SKIP(5);
10367 
10368     if (!IS_BLANK_CH(RAW)) {
10369 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10370 	               "Blank needed after '<?xml'\n");
10371     }
10372     SKIP_BLANKS;
10373 
10374     /*
10375      * We must have the VersionInfo here.
10376      */
10377     version = xmlParseVersionInfo(ctxt);
10378     if (version == NULL) {
10379 	xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10380     } else {
10381 	if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10382 	    /*
10383 	     * Changed here for XML-1.0 5th edition
10384 	     */
10385 	    if (ctxt->options & XML_PARSE_OLD10) {
10386 		xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10387 			          "Unsupported version '%s'\n",
10388 			          version);
10389 	    } else {
10390 	        if ((version[0] == '1') && ((version[1] == '.'))) {
10391 		    xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10392 		                  "Unsupported version '%s'\n",
10393 				  version, NULL);
10394 		} else {
10395 		    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10396 				      "Unsupported version '%s'\n",
10397 				      version);
10398 		}
10399 	    }
10400 	}
10401 	if (ctxt->version != NULL)
10402 	    xmlFree((void *) ctxt->version);
10403 	ctxt->version = version;
10404     }
10405 
10406     /*
10407      * We may have the encoding declaration
10408      */
10409     if (!IS_BLANK_CH(RAW)) {
10410         if ((RAW == '?') && (NXT(1) == '>')) {
10411 	    SKIP(2);
10412 	    return;
10413 	}
10414 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10415     }
10416     xmlParseEncodingDecl(ctxt);
10417 
10418     /*
10419      * We may have the standalone status.
10420      */
10421     if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10422         if ((RAW == '?') && (NXT(1) == '>')) {
10423 	    SKIP(2);
10424 	    return;
10425 	}
10426 	xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10427     }
10428 
10429     /*
10430      * We can grow the input buffer freely at that point
10431      */
10432     GROW;
10433 
10434     SKIP_BLANKS;
10435     ctxt->standalone = xmlParseSDDecl(ctxt);
10436 
10437     SKIP_BLANKS;
10438     if ((RAW == '?') && (NXT(1) == '>')) {
10439         SKIP(2);
10440     } else if (RAW == '>') {
10441         /* Deprecated old WD ... */
10442 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10443 	NEXT;
10444     } else {
10445         int c;
10446 
10447 	xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10448         while ((PARSER_STOPPED(ctxt) == 0) &&
10449                ((c = CUR) != 0)) {
10450             NEXT;
10451             if (c == '>')
10452                 break;
10453         }
10454     }
10455 }
10456 
10457 /**
10458  * xmlCtxtGetVersion:
10459  * ctxt:  parser context
10460  *
10461  * Available since 2.14.0.
10462  *
10463  * Returns the version from the XML declaration.
10464  */
10465 const xmlChar *
xmlCtxtGetVersion(xmlParserCtxtPtr ctxt)10466 xmlCtxtGetVersion(xmlParserCtxtPtr ctxt) {
10467     if (ctxt == NULL)
10468         return(NULL);
10469 
10470     return(ctxt->version);
10471 }
10472 
10473 /**
10474  * xmlCtxtGetStandalone:
10475  * ctxt:  parser context
10476  *
10477  * Available since 2.14.0.
10478  *
10479  * Returns the value from the standalone document declaration.
10480  */
10481 int
xmlCtxtGetStandalone(xmlParserCtxtPtr ctxt)10482 xmlCtxtGetStandalone(xmlParserCtxtPtr ctxt) {
10483     if (ctxt == NULL)
10484         return(0);
10485 
10486     return(ctxt->standalone);
10487 }
10488 
10489 /**
10490  * xmlParseMisc:
10491  * @ctxt:  an XML parser context
10492  *
10493  * DEPRECATED: Internal function, don't use.
10494  *
10495  * parse an XML Misc* optional field.
10496  *
10497  * [27] Misc ::= Comment | PI |  S
10498  */
10499 
10500 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10501 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10502     while (PARSER_STOPPED(ctxt) == 0) {
10503         SKIP_BLANKS;
10504         GROW;
10505         if ((RAW == '<') && (NXT(1) == '?')) {
10506 	    xmlParsePI(ctxt);
10507         } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10508 	    xmlParseComment(ctxt);
10509         } else {
10510             break;
10511         }
10512     }
10513 }
10514 
10515 static void
xmlFinishDocument(xmlParserCtxtPtr ctxt)10516 xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10517     xmlDocPtr doc;
10518 
10519     /*
10520      * SAX: end of the document processing.
10521      */
10522     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10523         ctxt->sax->endDocument(ctxt->userData);
10524 
10525     doc = ctxt->myDoc;
10526     if (doc != NULL) {
10527         if (ctxt->wellFormed) {
10528             doc->properties |= XML_DOC_WELLFORMED;
10529             if (ctxt->valid)
10530                 doc->properties |= XML_DOC_DTDVALID;
10531             if (ctxt->nsWellFormed)
10532                 doc->properties |= XML_DOC_NSVALID;
10533         }
10534 
10535         if (ctxt->options & XML_PARSE_OLD10)
10536             doc->properties |= XML_DOC_OLD10;
10537 
10538         /*
10539          * Remove locally kept entity definitions if the tree was not built
10540          */
10541 	if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10542             xmlFreeDoc(doc);
10543             ctxt->myDoc = NULL;
10544         }
10545     }
10546 }
10547 
10548 /**
10549  * xmlParseDocument:
10550  * @ctxt:  an XML parser context
10551  *
10552  * Parse an XML document and invoke the SAX handlers. This is useful
10553  * if you're only interested in custom SAX callbacks. If you want a
10554  * document tree, use xmlCtxtParseDocument.
10555  *
10556  * Returns 0, -1 in case of error.
10557  */
10558 
10559 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10560 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10561     if ((ctxt == NULL) || (ctxt->input == NULL))
10562         return(-1);
10563 
10564     GROW;
10565 
10566     /*
10567      * SAX: detecting the level.
10568      */
10569     xmlCtxtInitializeLate(ctxt);
10570 
10571     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10572         ctxt->sax->setDocumentLocator(ctxt->userData,
10573                 (xmlSAXLocator *) &xmlDefaultSAXLocator);
10574     }
10575 
10576     xmlDetectEncoding(ctxt);
10577 
10578     if (CUR == 0) {
10579 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10580 	return(-1);
10581     }
10582 
10583     GROW;
10584     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10585 
10586 	/*
10587 	 * Note that we will switch encoding on the fly.
10588 	 */
10589 	xmlParseXMLDecl(ctxt);
10590 	SKIP_BLANKS;
10591     } else {
10592 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10593         if (ctxt->version == NULL) {
10594             xmlErrMemory(ctxt);
10595             return(-1);
10596         }
10597     }
10598     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10599         ctxt->sax->startDocument(ctxt->userData);
10600     if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10601         (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10602 	ctxt->myDoc->compression = ctxt->input->buf->compressed;
10603     }
10604 
10605     /*
10606      * The Misc part of the Prolog
10607      */
10608     xmlParseMisc(ctxt);
10609 
10610     /*
10611      * Then possibly doc type declaration(s) and more Misc
10612      * (doctypedecl Misc*)?
10613      */
10614     GROW;
10615     if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10616 
10617 	ctxt->inSubset = 1;
10618 	xmlParseDocTypeDecl(ctxt);
10619 	if (RAW == '[') {
10620 	    xmlParseInternalSubset(ctxt);
10621 	}
10622 
10623 	/*
10624 	 * Create and update the external subset.
10625 	 */
10626 	ctxt->inSubset = 2;
10627 	if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10628 	    (!ctxt->disableSAX))
10629 	    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10630 	                              ctxt->extSubSystem, ctxt->extSubURI);
10631 	ctxt->inSubset = 0;
10632 
10633         xmlCleanSpecialAttr(ctxt);
10634 
10635 	xmlParseMisc(ctxt);
10636     }
10637 
10638     /*
10639      * Time to start parsing the tree itself
10640      */
10641     GROW;
10642     if (RAW != '<') {
10643         if (ctxt->wellFormed)
10644             xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10645                            "Start tag expected, '<' not found\n");
10646     } else {
10647 	xmlParseElement(ctxt);
10648 
10649 	/*
10650 	 * The Misc part at the end
10651 	 */
10652 	xmlParseMisc(ctxt);
10653 
10654         if (ctxt->input->cur < ctxt->input->end) {
10655             if (ctxt->wellFormed)
10656 	        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10657         } else if ((ctxt->input->buf != NULL) &&
10658                    (ctxt->input->buf->encoder != NULL) &&
10659                    (ctxt->input->buf->error == 0) &&
10660                    (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10661             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10662                            "Truncated multi-byte sequence at EOF\n");
10663         }
10664     }
10665 
10666     ctxt->instate = XML_PARSER_EOF;
10667     xmlFinishDocument(ctxt);
10668 
10669     if (! ctxt->wellFormed) {
10670 	ctxt->valid = 0;
10671 	return(-1);
10672     }
10673 
10674     return(0);
10675 }
10676 
10677 /**
10678  * xmlParseExtParsedEnt:
10679  * @ctxt:  an XML parser context
10680  *
10681  * parse a general parsed entity
10682  * An external general parsed entity is well-formed if it matches the
10683  * production labeled extParsedEnt.
10684  *
10685  * [78] extParsedEnt ::= TextDecl? content
10686  *
10687  * Returns 0, -1 in case of error. the parser context is augmented
10688  *                as a result of the parsing.
10689  */
10690 
10691 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10692 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10693     if ((ctxt == NULL) || (ctxt->input == NULL))
10694         return(-1);
10695 
10696     xmlCtxtInitializeLate(ctxt);
10697 
10698     if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10699         ctxt->sax->setDocumentLocator(ctxt->userData,
10700                 (xmlSAXLocator *) &xmlDefaultSAXLocator);
10701     }
10702 
10703     xmlDetectEncoding(ctxt);
10704 
10705     if (CUR == 0) {
10706 	xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10707     }
10708 
10709     /*
10710      * Check for the XMLDecl in the Prolog.
10711      */
10712     GROW;
10713     if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10714 
10715 	/*
10716 	 * Note that we will switch encoding on the fly.
10717 	 */
10718 	xmlParseXMLDecl(ctxt);
10719 	SKIP_BLANKS;
10720     } else {
10721 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10722     }
10723     if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10724         ctxt->sax->startDocument(ctxt->userData);
10725 
10726     /*
10727      * Doing validity checking on chunk doesn't make sense
10728      */
10729     ctxt->options &= ~XML_PARSE_DTDVALID;
10730     ctxt->validate = 0;
10731     ctxt->depth = 0;
10732 
10733     xmlParseContentInternal(ctxt);
10734 
10735     if (ctxt->input->cur < ctxt->input->end)
10736 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10737 
10738     /*
10739      * SAX: end of the document processing.
10740      */
10741     if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10742         ctxt->sax->endDocument(ctxt->userData);
10743 
10744     if (! ctxt->wellFormed) return(-1);
10745     return(0);
10746 }
10747 
10748 #ifdef LIBXML_PUSH_ENABLED
10749 /************************************************************************
10750  *									*
10751  *		Progressive parsing interfaces				*
10752  *									*
10753  ************************************************************************/
10754 
10755 /**
10756  * xmlParseLookupChar:
10757  * @ctxt:  an XML parser context
10758  * @c:  character
10759  *
10760  * Check whether the input buffer contains a character.
10761  */
10762 static int
xmlParseLookupChar(xmlParserCtxtPtr ctxt,int c)10763 xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10764     const xmlChar *cur;
10765 
10766     if (ctxt->checkIndex == 0) {
10767         cur = ctxt->input->cur + 1;
10768     } else {
10769         cur = ctxt->input->cur + ctxt->checkIndex;
10770     }
10771 
10772     if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10773         size_t index = ctxt->input->end - ctxt->input->cur;
10774 
10775         if (index > LONG_MAX) {
10776             ctxt->checkIndex = 0;
10777             return(1);
10778         }
10779         ctxt->checkIndex = index;
10780         return(0);
10781     } else {
10782         ctxt->checkIndex = 0;
10783         return(1);
10784     }
10785 }
10786 
10787 /**
10788  * xmlParseLookupString:
10789  * @ctxt:  an XML parser context
10790  * @startDelta: delta to apply at the start
10791  * @str:  string
10792  * @strLen:  length of string
10793  *
10794  * Check whether the input buffer contains a string.
10795  */
10796 static const xmlChar *
xmlParseLookupString(xmlParserCtxtPtr ctxt,size_t startDelta,const char * str,size_t strLen)10797 xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10798                      const char *str, size_t strLen) {
10799     const xmlChar *cur, *term;
10800 
10801     if (ctxt->checkIndex == 0) {
10802         cur = ctxt->input->cur + startDelta;
10803     } else {
10804         cur = ctxt->input->cur + ctxt->checkIndex;
10805     }
10806 
10807     term = BAD_CAST strstr((const char *) cur, str);
10808     if (term == NULL) {
10809         const xmlChar *end = ctxt->input->end;
10810         size_t index;
10811 
10812         /* Rescan (strLen - 1) characters. */
10813         if ((size_t) (end - cur) < strLen)
10814             end = cur;
10815         else
10816             end -= strLen - 1;
10817         index = end - ctxt->input->cur;
10818         if (index > LONG_MAX) {
10819             ctxt->checkIndex = 0;
10820             return(ctxt->input->end - strLen);
10821         }
10822         ctxt->checkIndex = index;
10823     } else {
10824         ctxt->checkIndex = 0;
10825     }
10826 
10827     return(term);
10828 }
10829 
10830 /**
10831  * xmlParseLookupCharData:
10832  * @ctxt:  an XML parser context
10833  *
10834  * Check whether the input buffer contains terminated char data.
10835  */
10836 static int
xmlParseLookupCharData(xmlParserCtxtPtr ctxt)10837 xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10838     const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10839     const xmlChar *end = ctxt->input->end;
10840     size_t index;
10841 
10842     while (cur < end) {
10843         if ((*cur == '<') || (*cur == '&')) {
10844             ctxt->checkIndex = 0;
10845             return(1);
10846         }
10847         cur++;
10848     }
10849 
10850     index = cur - ctxt->input->cur;
10851     if (index > LONG_MAX) {
10852         ctxt->checkIndex = 0;
10853         return(1);
10854     }
10855     ctxt->checkIndex = index;
10856     return(0);
10857 }
10858 
10859 /**
10860  * xmlParseLookupGt:
10861  * @ctxt:  an XML parser context
10862  *
10863  * Check whether there's enough data in the input buffer to finish parsing
10864  * a start tag. This has to take quotes into account.
10865  */
10866 static int
xmlParseLookupGt(xmlParserCtxtPtr ctxt)10867 xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10868     const xmlChar *cur;
10869     const xmlChar *end = ctxt->input->end;
10870     int state = ctxt->endCheckState;
10871     size_t index;
10872 
10873     if (ctxt->checkIndex == 0)
10874         cur = ctxt->input->cur + 1;
10875     else
10876         cur = ctxt->input->cur + ctxt->checkIndex;
10877 
10878     while (cur < end) {
10879         if (state) {
10880             if (*cur == state)
10881                 state = 0;
10882         } else if (*cur == '\'' || *cur == '"') {
10883             state = *cur;
10884         } else if (*cur == '>') {
10885             ctxt->checkIndex = 0;
10886             ctxt->endCheckState = 0;
10887             return(1);
10888         }
10889         cur++;
10890     }
10891 
10892     index = cur - ctxt->input->cur;
10893     if (index > LONG_MAX) {
10894         ctxt->checkIndex = 0;
10895         ctxt->endCheckState = 0;
10896         return(1);
10897     }
10898     ctxt->checkIndex = index;
10899     ctxt->endCheckState = state;
10900     return(0);
10901 }
10902 
10903 /**
10904  * xmlParseLookupInternalSubset:
10905  * @ctxt:  an XML parser context
10906  *
10907  * Check whether there's enough data in the input buffer to finish parsing
10908  * the internal subset.
10909  */
10910 static int
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt)10911 xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10912     /*
10913      * Sorry, but progressive parsing of the internal subset is not
10914      * supported. We first check that the full content of the internal
10915      * subset is available and parsing is launched only at that point.
10916      * Internal subset ends with "']' S? '>'" in an unescaped section and
10917      * not in a ']]>' sequence which are conditional sections.
10918      */
10919     const xmlChar *cur, *start;
10920     const xmlChar *end = ctxt->input->end;
10921     int state = ctxt->endCheckState;
10922     size_t index;
10923 
10924     if (ctxt->checkIndex == 0) {
10925         cur = ctxt->input->cur + 1;
10926     } else {
10927         cur = ctxt->input->cur + ctxt->checkIndex;
10928     }
10929     start = cur;
10930 
10931     while (cur < end) {
10932         if (state == '-') {
10933             if ((*cur == '-') &&
10934                 (cur[1] == '-') &&
10935                 (cur[2] == '>')) {
10936                 state = 0;
10937                 cur += 3;
10938                 start = cur;
10939                 continue;
10940             }
10941         }
10942         else if (state == ']') {
10943             if (*cur == '>') {
10944                 ctxt->checkIndex = 0;
10945                 ctxt->endCheckState = 0;
10946                 return(1);
10947             }
10948             if (IS_BLANK_CH(*cur)) {
10949                 state = ' ';
10950             } else if (*cur != ']') {
10951                 state = 0;
10952                 start = cur;
10953                 continue;
10954             }
10955         }
10956         else if (state == ' ') {
10957             if (*cur == '>') {
10958                 ctxt->checkIndex = 0;
10959                 ctxt->endCheckState = 0;
10960                 return(1);
10961             }
10962             if (!IS_BLANK_CH(*cur)) {
10963                 state = 0;
10964                 start = cur;
10965                 continue;
10966             }
10967         }
10968         else if (state != 0) {
10969             if (*cur == state) {
10970                 state = 0;
10971                 start = cur + 1;
10972             }
10973         }
10974         else if (*cur == '<') {
10975             if ((cur[1] == '!') &&
10976                 (cur[2] == '-') &&
10977                 (cur[3] == '-')) {
10978                 state = '-';
10979                 cur += 4;
10980                 /* Don't treat <!--> as comment */
10981                 start = cur;
10982                 continue;
10983             }
10984         }
10985         else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10986             state = *cur;
10987         }
10988 
10989         cur++;
10990     }
10991 
10992     /*
10993      * Rescan the three last characters to detect "<!--" and "-->"
10994      * split across chunks.
10995      */
10996     if ((state == 0) || (state == '-')) {
10997         if (cur - start < 3)
10998             cur = start;
10999         else
11000             cur -= 3;
11001     }
11002     index = cur - ctxt->input->cur;
11003     if (index > LONG_MAX) {
11004         ctxt->checkIndex = 0;
11005         ctxt->endCheckState = 0;
11006         return(1);
11007     }
11008     ctxt->checkIndex = index;
11009     ctxt->endCheckState = state;
11010     return(0);
11011 }
11012 
11013 /**
11014  * xmlParseTryOrFinish:
11015  * @ctxt:  an XML parser context
11016  * @terminate:  last chunk indicator
11017  *
11018  * Try to progress on parsing
11019  *
11020  * Returns zero if no parsing was possible
11021  */
11022 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11023 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11024     int ret = 0;
11025     size_t avail;
11026     xmlChar cur, next;
11027 
11028     if (ctxt->input == NULL)
11029         return(0);
11030 
11031     if ((ctxt->input != NULL) &&
11032         (ctxt->input->cur - ctxt->input->base > 4096)) {
11033         xmlParserShrink(ctxt);
11034     }
11035 
11036     while (ctxt->disableSAX == 0) {
11037         avail = ctxt->input->end - ctxt->input->cur;
11038         if (avail < 1)
11039 	    goto done;
11040         switch (ctxt->instate) {
11041             case XML_PARSER_EOF:
11042 	        /*
11043 		 * Document parsing is done !
11044 		 */
11045 	        goto done;
11046             case XML_PARSER_START:
11047                 /*
11048                  * Very first chars read from the document flow.
11049                  */
11050                 if ((!terminate) && (avail < 4))
11051                     goto done;
11052 
11053                 /*
11054                  * We need more bytes to detect EBCDIC code pages.
11055                  * See xmlDetectEBCDIC.
11056                  */
11057                 if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11058                     (!terminate) && (avail < 200))
11059                     goto done;
11060 
11061                 xmlDetectEncoding(ctxt);
11062                 ctxt->instate = XML_PARSER_XML_DECL;
11063 		break;
11064 
11065             case XML_PARSER_XML_DECL:
11066 		if ((!terminate) && (avail < 2))
11067 		    goto done;
11068 		cur = ctxt->input->cur[0];
11069 		next = ctxt->input->cur[1];
11070 	        if ((cur == '<') && (next == '?')) {
11071 		    /* PI or XML decl */
11072 		    if ((!terminate) &&
11073                         (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11074 			goto done;
11075 		    if ((ctxt->input->cur[2] == 'x') &&
11076 			(ctxt->input->cur[3] == 'm') &&
11077 			(ctxt->input->cur[4] == 'l') &&
11078 			(IS_BLANK_CH(ctxt->input->cur[5]))) {
11079 			ret += 5;
11080 			xmlParseXMLDecl(ctxt);
11081 		    } else {
11082 			ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11083                         if (ctxt->version == NULL) {
11084                             xmlErrMemory(ctxt);
11085                             break;
11086                         }
11087 		    }
11088 		} else {
11089 		    ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11090 		    if (ctxt->version == NULL) {
11091 		        xmlErrMemory(ctxt);
11092 			break;
11093 		    }
11094 		}
11095                 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11096                     ctxt->sax->setDocumentLocator(ctxt->userData,
11097                             (xmlSAXLocator *) &xmlDefaultSAXLocator);
11098                 }
11099                 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11100                     (!ctxt->disableSAX))
11101                     ctxt->sax->startDocument(ctxt->userData);
11102                 ctxt->instate = XML_PARSER_MISC;
11103 		break;
11104             case XML_PARSER_START_TAG: {
11105 	        const xmlChar *name;
11106 		const xmlChar *prefix = NULL;
11107 		const xmlChar *URI = NULL;
11108                 int line = ctxt->input->line;
11109 		int nbNs = 0;
11110 
11111 		if ((!terminate) && (avail < 2))
11112 		    goto done;
11113 		cur = ctxt->input->cur[0];
11114 	        if (cur != '<') {
11115 		    xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11116                                    "Start tag expected, '<' not found");
11117                     ctxt->instate = XML_PARSER_EOF;
11118                     xmlFinishDocument(ctxt);
11119 		    goto done;
11120 		}
11121 		if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11122                     goto done;
11123 		if (ctxt->spaceNr == 0)
11124 		    spacePush(ctxt, -1);
11125 		else if (*ctxt->space == -2)
11126 		    spacePush(ctxt, -1);
11127 		else
11128 		    spacePush(ctxt, *ctxt->space);
11129 #ifdef LIBXML_SAX1_ENABLED
11130 		if (ctxt->sax2)
11131 #endif /* LIBXML_SAX1_ENABLED */
11132 		    name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11133 #ifdef LIBXML_SAX1_ENABLED
11134 		else
11135 		    name = xmlParseStartTag(ctxt);
11136 #endif /* LIBXML_SAX1_ENABLED */
11137 		if (name == NULL) {
11138 		    spacePop(ctxt);
11139                     ctxt->instate = XML_PARSER_EOF;
11140                     xmlFinishDocument(ctxt);
11141 		    goto done;
11142 		}
11143 #ifdef LIBXML_VALID_ENABLED
11144 		/*
11145 		 * [ VC: Root Element Type ]
11146 		 * The Name in the document type declaration must match
11147 		 * the element type of the root element.
11148 		 */
11149 		if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11150 		    ctxt->node && (ctxt->node == ctxt->myDoc->children))
11151 		    ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11152 #endif /* LIBXML_VALID_ENABLED */
11153 
11154 		/*
11155 		 * Check for an Empty Element.
11156 		 */
11157 		if ((RAW == '/') && (NXT(1) == '>')) {
11158 		    SKIP(2);
11159 
11160 		    if (ctxt->sax2) {
11161 			if ((ctxt->sax != NULL) &&
11162 			    (ctxt->sax->endElementNs != NULL) &&
11163 			    (!ctxt->disableSAX))
11164 			    ctxt->sax->endElementNs(ctxt->userData, name,
11165 			                            prefix, URI);
11166 			if (nbNs > 0)
11167 			    xmlParserNsPop(ctxt, nbNs);
11168 #ifdef LIBXML_SAX1_ENABLED
11169 		    } else {
11170 			if ((ctxt->sax != NULL) &&
11171 			    (ctxt->sax->endElement != NULL) &&
11172 			    (!ctxt->disableSAX))
11173 			    ctxt->sax->endElement(ctxt->userData, name);
11174 #endif /* LIBXML_SAX1_ENABLED */
11175 		    }
11176 		    spacePop(ctxt);
11177 		} else if (RAW == '>') {
11178 		    NEXT;
11179                     nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11180 		} else {
11181 		    xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11182 					 "Couldn't find end of Start Tag %s\n",
11183 					 name);
11184 		    nodePop(ctxt);
11185 		    spacePop(ctxt);
11186                     if (nbNs > 0)
11187                         xmlParserNsPop(ctxt, nbNs);
11188 		}
11189 
11190                 if (ctxt->nameNr == 0)
11191                     ctxt->instate = XML_PARSER_EPILOG;
11192                 else
11193                     ctxt->instate = XML_PARSER_CONTENT;
11194                 break;
11195 	    }
11196             case XML_PARSER_CONTENT: {
11197 		cur = ctxt->input->cur[0];
11198 
11199 		if (cur == '<') {
11200                     if ((!terminate) && (avail < 2))
11201                         goto done;
11202 		    next = ctxt->input->cur[1];
11203 
11204                     if (next == '/') {
11205                         ctxt->instate = XML_PARSER_END_TAG;
11206                         break;
11207                     } else if (next == '?') {
11208                         if ((!terminate) &&
11209                             (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11210                             goto done;
11211                         xmlParsePI(ctxt);
11212                         ctxt->instate = XML_PARSER_CONTENT;
11213                         break;
11214                     } else if (next == '!') {
11215                         if ((!terminate) && (avail < 3))
11216                             goto done;
11217                         next = ctxt->input->cur[2];
11218 
11219                         if (next == '-') {
11220                             if ((!terminate) && (avail < 4))
11221                                 goto done;
11222                             if (ctxt->input->cur[3] == '-') {
11223                                 if ((!terminate) &&
11224                                     (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11225                                     goto done;
11226                                 xmlParseComment(ctxt);
11227                                 ctxt->instate = XML_PARSER_CONTENT;
11228                                 break;
11229                             }
11230                         } else if (next == '[') {
11231                             if ((!terminate) && (avail < 9))
11232                                 goto done;
11233                             if ((ctxt->input->cur[2] == '[') &&
11234                                 (ctxt->input->cur[3] == 'C') &&
11235                                 (ctxt->input->cur[4] == 'D') &&
11236                                 (ctxt->input->cur[5] == 'A') &&
11237                                 (ctxt->input->cur[6] == 'T') &&
11238                                 (ctxt->input->cur[7] == 'A') &&
11239                                 (ctxt->input->cur[8] == '[')) {
11240                                 if ((!terminate) &&
11241                                     (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11242                                     goto done;
11243                                 ctxt->instate = XML_PARSER_CDATA_SECTION;
11244                                 xmlParseCDSect(ctxt);
11245                                 ctxt->instate = XML_PARSER_CONTENT;
11246                                 break;
11247                             }
11248                         }
11249                     }
11250 		} else if (cur == '&') {
11251 		    if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11252 			goto done;
11253 		    xmlParseReference(ctxt);
11254                     break;
11255 		} else {
11256 		    /* TODO Avoid the extra copy, handle directly !!! */
11257 		    /*
11258 		     * Goal of the following test is:
11259 		     *  - minimize calls to the SAX 'character' callback
11260 		     *    when they are mergeable
11261 		     *  - handle an problem for isBlank when we only parse
11262 		     *    a sequence of blank chars and the next one is
11263 		     *    not available to check against '<' presence.
11264 		     *  - tries to homogenize the differences in SAX
11265 		     *    callbacks between the push and pull versions
11266 		     *    of the parser.
11267 		     */
11268 		    if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11269 			if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11270 			    goto done;
11271                     }
11272                     ctxt->checkIndex = 0;
11273 		    xmlParseCharDataInternal(ctxt, !terminate);
11274                     break;
11275 		}
11276 
11277                 ctxt->instate = XML_PARSER_START_TAG;
11278 		break;
11279 	    }
11280             case XML_PARSER_END_TAG:
11281 		if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11282 		    goto done;
11283 		if (ctxt->sax2) {
11284 	            xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11285 		    nameNsPop(ctxt);
11286 		}
11287 #ifdef LIBXML_SAX1_ENABLED
11288 		  else
11289 		    xmlParseEndTag1(ctxt, 0);
11290 #endif /* LIBXML_SAX1_ENABLED */
11291 		if (ctxt->nameNr == 0) {
11292 		    ctxt->instate = XML_PARSER_EPILOG;
11293 		} else {
11294 		    ctxt->instate = XML_PARSER_CONTENT;
11295 		}
11296 		break;
11297             case XML_PARSER_MISC:
11298             case XML_PARSER_PROLOG:
11299             case XML_PARSER_EPILOG:
11300 		SKIP_BLANKS;
11301                 avail = ctxt->input->end - ctxt->input->cur;
11302 		if (avail < 1)
11303 		    goto done;
11304 		if (ctxt->input->cur[0] == '<') {
11305                     if ((!terminate) && (avail < 2))
11306                         goto done;
11307                     next = ctxt->input->cur[1];
11308                     if (next == '?') {
11309                         if ((!terminate) &&
11310                             (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11311                             goto done;
11312                         xmlParsePI(ctxt);
11313                         break;
11314                     } else if (next == '!') {
11315                         if ((!terminate) && (avail < 3))
11316                             goto done;
11317 
11318                         if (ctxt->input->cur[2] == '-') {
11319                             if ((!terminate) && (avail < 4))
11320                                 goto done;
11321                             if (ctxt->input->cur[3] == '-') {
11322                                 if ((!terminate) &&
11323                                     (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11324                                     goto done;
11325                                 xmlParseComment(ctxt);
11326                                 break;
11327                             }
11328                         } else if (ctxt->instate == XML_PARSER_MISC) {
11329                             if ((!terminate) && (avail < 9))
11330                                 goto done;
11331                             if ((ctxt->input->cur[2] == 'D') &&
11332                                 (ctxt->input->cur[3] == 'O') &&
11333                                 (ctxt->input->cur[4] == 'C') &&
11334                                 (ctxt->input->cur[5] == 'T') &&
11335                                 (ctxt->input->cur[6] == 'Y') &&
11336                                 (ctxt->input->cur[7] == 'P') &&
11337                                 (ctxt->input->cur[8] == 'E')) {
11338                                 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11339                                     goto done;
11340                                 ctxt->inSubset = 1;
11341                                 xmlParseDocTypeDecl(ctxt);
11342                                 if (RAW == '[') {
11343                                     ctxt->instate = XML_PARSER_DTD;
11344                                 } else {
11345                                     /*
11346                                      * Create and update the external subset.
11347                                      */
11348                                     ctxt->inSubset = 2;
11349                                     if ((ctxt->sax != NULL) &&
11350                                         (!ctxt->disableSAX) &&
11351                                         (ctxt->sax->externalSubset != NULL))
11352                                         ctxt->sax->externalSubset(
11353                                                 ctxt->userData,
11354                                                 ctxt->intSubName,
11355                                                 ctxt->extSubSystem,
11356                                                 ctxt->extSubURI);
11357                                     ctxt->inSubset = 0;
11358                                     xmlCleanSpecialAttr(ctxt);
11359                                     ctxt->instate = XML_PARSER_PROLOG;
11360                                 }
11361                                 break;
11362                             }
11363                         }
11364                     }
11365                 }
11366 
11367                 if (ctxt->instate == XML_PARSER_EPILOG) {
11368                     if (ctxt->errNo == XML_ERR_OK)
11369                         xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11370 		    ctxt->instate = XML_PARSER_EOF;
11371                     xmlFinishDocument(ctxt);
11372                 } else {
11373 		    ctxt->instate = XML_PARSER_START_TAG;
11374 		}
11375 		break;
11376             case XML_PARSER_DTD: {
11377                 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11378                     goto done;
11379 		xmlParseInternalSubset(ctxt);
11380 		ctxt->inSubset = 2;
11381 		if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11382 		    (ctxt->sax->externalSubset != NULL))
11383 		    ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11384 			    ctxt->extSubSystem, ctxt->extSubURI);
11385 		ctxt->inSubset = 0;
11386 		xmlCleanSpecialAttr(ctxt);
11387 		ctxt->instate = XML_PARSER_PROLOG;
11388                 break;
11389 	    }
11390             default:
11391                 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11392 			"PP: internal error\n");
11393 		ctxt->instate = XML_PARSER_EOF;
11394 		break;
11395 	}
11396     }
11397 done:
11398     return(ret);
11399 }
11400 
11401 /**
11402  * xmlParseChunk:
11403  * @ctxt:  an XML parser context
11404  * @chunk:  chunk of memory
11405  * @size:  size of chunk in bytes
11406  * @terminate:  last chunk indicator
11407  *
11408  * Parse a chunk of memory in push parser mode.
11409  *
11410  * Assumes that the parser context was initialized with
11411  * xmlCreatePushParserCtxt.
11412  *
11413  * The last chunk, which will often be empty, must be marked with
11414  * the @terminate flag. With the default SAX callbacks, the resulting
11415  * document will be available in ctxt->myDoc. This pointer will not
11416  * be freed by the library.
11417  *
11418  * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11419  * The push parser doesn't support recovery mode.
11420  *
11421  * Returns an xmlParserErrors code (0 on success).
11422  */
11423 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11424 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11425               int terminate) {
11426     size_t curBase;
11427     size_t maxLength;
11428     size_t pos;
11429     int end_in_lf = 0;
11430     int res;
11431 
11432     if ((ctxt == NULL) || (size < 0))
11433         return(XML_ERR_ARGUMENT);
11434     if ((chunk == NULL) && (size > 0))
11435         return(XML_ERR_ARGUMENT);
11436     if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11437         return(XML_ERR_ARGUMENT);
11438     if (ctxt->disableSAX != 0)
11439         return(ctxt->errNo);
11440 
11441     ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11442     if (ctxt->instate == XML_PARSER_START)
11443         xmlCtxtInitializeLate(ctxt);
11444     if ((size > 0) && (chunk != NULL) && (!terminate) &&
11445         (chunk[size - 1] == '\r')) {
11446 	end_in_lf = 1;
11447 	size--;
11448     }
11449 
11450     /*
11451      * Also push an empty chunk to make sure that the raw buffer
11452      * will be flushed if there is an encoder.
11453      */
11454     pos = ctxt->input->cur - ctxt->input->base;
11455     res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11456     xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11457     if (res < 0) {
11458         xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11459         xmlHaltParser(ctxt);
11460         return(ctxt->errNo);
11461     }
11462 
11463     xmlParseTryOrFinish(ctxt, terminate);
11464 
11465     curBase = ctxt->input->cur - ctxt->input->base;
11466     maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11467                 XML_MAX_HUGE_LENGTH :
11468                 XML_MAX_LOOKUP_LIMIT;
11469     if (curBase > maxLength) {
11470         xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11471                     "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11472         xmlHaltParser(ctxt);
11473     }
11474 
11475     if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11476         return(ctxt->errNo);
11477 
11478     if (end_in_lf == 1) {
11479 	pos = ctxt->input->cur - ctxt->input->base;
11480 	res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11481 	xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11482         if (res < 0) {
11483             xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11484             xmlHaltParser(ctxt);
11485             return(ctxt->errNo);
11486         }
11487     }
11488     if (terminate) {
11489 	/*
11490 	 * Check for termination
11491 	 */
11492         if ((ctxt->instate != XML_PARSER_EOF) &&
11493             (ctxt->instate != XML_PARSER_EPILOG)) {
11494             if (ctxt->nameNr > 0) {
11495                 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11496                 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11497                 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11498                         "Premature end of data in tag %s line %d\n",
11499                         name, line, NULL);
11500             } else if (ctxt->instate == XML_PARSER_START) {
11501                 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11502             } else {
11503                 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11504                                "Start tag expected, '<' not found\n");
11505             }
11506         } else if ((ctxt->input->buf->encoder != NULL) &&
11507                    (ctxt->input->buf->error == 0) &&
11508                    (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11509             xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11510                            "Truncated multi-byte sequence at EOF\n");
11511         }
11512 	if (ctxt->instate != XML_PARSER_EOF) {
11513             ctxt->instate = XML_PARSER_EOF;
11514             xmlFinishDocument(ctxt);
11515 	}
11516     }
11517     if (ctxt->wellFormed == 0)
11518 	return((xmlParserErrors) ctxt->errNo);
11519     else
11520         return(0);
11521 }
11522 
11523 /************************************************************************
11524  *									*
11525  *		I/O front end functions to the parser			*
11526  *									*
11527  ************************************************************************/
11528 
11529 /**
11530  * xmlCreatePushParserCtxt:
11531  * @sax:  a SAX handler (optional)
11532  * @user_data:  user data for SAX callbacks (optional)
11533  * @chunk:  initial chunk (optional, deprecated)
11534  * @size:  size of initial chunk in bytes
11535  * @filename:  file name or URI (optional)
11536  *
11537  * Create a parser context for using the XML parser in push mode.
11538  * See xmlParseChunk.
11539  *
11540  * Passing an initial chunk is useless and deprecated.
11541  *
11542  * @filename is used as base URI to fetch external entities and for
11543  * error reports.
11544  *
11545  * Returns the new parser context or NULL if a memory allocation
11546  * failed.
11547  */
11548 
11549 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11550 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11551                         const char *chunk, int size, const char *filename) {
11552     xmlParserCtxtPtr ctxt;
11553     xmlParserInputPtr input;
11554 
11555     ctxt = xmlNewSAXParserCtxt(sax, user_data);
11556     if (ctxt == NULL)
11557 	return(NULL);
11558 
11559     ctxt->options &= ~XML_PARSE_NODICT;
11560     ctxt->dictNames = 1;
11561 
11562     input = xmlNewPushInput(filename, chunk, size);
11563     if (input == NULL) {
11564 	xmlFreeParserCtxt(ctxt);
11565 	return(NULL);
11566     }
11567     if (inputPush(ctxt, input) < 0) {
11568         xmlFreeInputStream(input);
11569         xmlFreeParserCtxt(ctxt);
11570         return(NULL);
11571     }
11572 
11573     return(ctxt);
11574 }
11575 #endif /* LIBXML_PUSH_ENABLED */
11576 
11577 /**
11578  * xmlStopParser:
11579  * @ctxt:  an XML parser context
11580  *
11581  * Blocks further parser processing
11582  */
11583 void
xmlStopParser(xmlParserCtxtPtr ctxt)11584 xmlStopParser(xmlParserCtxtPtr ctxt) {
11585     if (ctxt == NULL)
11586         return;
11587     xmlHaltParser(ctxt);
11588     if (ctxt->errNo != XML_ERR_NO_MEMORY)
11589         ctxt->errNo = XML_ERR_USER_STOP;
11590 }
11591 
11592 /**
11593  * xmlCreateIOParserCtxt:
11594  * @sax:  a SAX handler (optional)
11595  * @user_data:  user data for SAX callbacks (optional)
11596  * @ioread:  an I/O read function
11597  * @ioclose:  an I/O close function (optional)
11598  * @ioctx:  an I/O handler
11599  * @enc:  the charset encoding if known (deprecated)
11600  *
11601  * Create a parser context for using the XML parser with an existing
11602  * I/O stream
11603  *
11604  * Returns the new parser context or NULL
11605  */
11606 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11607 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11608                       xmlInputReadCallback ioread,
11609                       xmlInputCloseCallback ioclose,
11610                       void *ioctx, xmlCharEncoding enc) {
11611     xmlParserCtxtPtr ctxt;
11612     xmlParserInputPtr input;
11613     const char *encoding;
11614 
11615     ctxt = xmlNewSAXParserCtxt(sax, user_data);
11616     if (ctxt == NULL)
11617 	return(NULL);
11618 
11619     encoding = xmlGetCharEncodingName(enc);
11620     input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11621                                   encoding, 0);
11622     if (input == NULL) {
11623 	xmlFreeParserCtxt(ctxt);
11624         return (NULL);
11625     }
11626     if (inputPush(ctxt, input) < 0) {
11627         xmlFreeInputStream(input);
11628         xmlFreeParserCtxt(ctxt);
11629         return(NULL);
11630     }
11631 
11632     return(ctxt);
11633 }
11634 
11635 #ifdef LIBXML_VALID_ENABLED
11636 /************************************************************************
11637  *									*
11638  *		Front ends when parsing a DTD				*
11639  *									*
11640  ************************************************************************/
11641 
11642 /**
11643  * xmlIOParseDTD:
11644  * @sax:  the SAX handler block or NULL
11645  * @input:  an Input Buffer
11646  * @enc:  the charset encoding if known
11647  *
11648  * Load and parse a DTD
11649  *
11650  * Returns the resulting xmlDtdPtr or NULL in case of error.
11651  * @input will be freed by the function in any case.
11652  */
11653 
11654 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)11655 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11656 	      xmlCharEncoding enc) {
11657     xmlDtdPtr ret = NULL;
11658     xmlParserCtxtPtr ctxt;
11659     xmlParserInputPtr pinput = NULL;
11660 
11661     if (input == NULL)
11662 	return(NULL);
11663 
11664     ctxt = xmlNewSAXParserCtxt(sax, NULL);
11665     if (ctxt == NULL) {
11666         xmlFreeParserInputBuffer(input);
11667 	return(NULL);
11668     }
11669 
11670     /*
11671      * generate a parser input from the I/O handler
11672      */
11673 
11674     pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11675     if (pinput == NULL) {
11676         xmlFreeParserInputBuffer(input);
11677 	xmlFreeParserCtxt(ctxt);
11678 	return(NULL);
11679     }
11680 
11681     /*
11682      * plug some encoding conversion routines here.
11683      */
11684     if (xmlPushInput(ctxt, pinput) < 0) {
11685         xmlFreeInputStream(pinput);
11686 	xmlFreeParserCtxt(ctxt);
11687 	return(NULL);
11688     }
11689     if (enc != XML_CHAR_ENCODING_NONE) {
11690         xmlSwitchEncoding(ctxt, enc);
11691     }
11692 
11693     /*
11694      * let's parse that entity knowing it's an external subset.
11695      */
11696     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11697     if (ctxt->myDoc == NULL) {
11698 	xmlErrMemory(ctxt);
11699 	return(NULL);
11700     }
11701     ctxt->myDoc->properties = XML_DOC_INTERNAL;
11702     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11703 	                               BAD_CAST "none", BAD_CAST "none");
11704 
11705     xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11706 
11707     if (ctxt->myDoc != NULL) {
11708 	if (ctxt->wellFormed) {
11709 	    ret = ctxt->myDoc->extSubset;
11710 	    ctxt->myDoc->extSubset = NULL;
11711 	    if (ret != NULL) {
11712 		xmlNodePtr tmp;
11713 
11714 		ret->doc = NULL;
11715 		tmp = ret->children;
11716 		while (tmp != NULL) {
11717 		    tmp->doc = NULL;
11718 		    tmp = tmp->next;
11719 		}
11720 	    }
11721 	} else {
11722 	    ret = NULL;
11723 	}
11724         xmlFreeDoc(ctxt->myDoc);
11725         ctxt->myDoc = NULL;
11726     }
11727     xmlFreeParserCtxt(ctxt);
11728 
11729     return(ret);
11730 }
11731 
11732 /**
11733  * xmlSAXParseDTD:
11734  * @sax:  the SAX handler block
11735  * @ExternalID:  a NAME* containing the External ID of the DTD
11736  * @SystemID:  a NAME* containing the URL to the DTD
11737  *
11738  * DEPRECATED: Don't use.
11739  *
11740  * Load and parse an external subset.
11741  *
11742  * Returns the resulting xmlDtdPtr or NULL in case of error.
11743  */
11744 
11745 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)11746 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11747                           const xmlChar *SystemID) {
11748     xmlDtdPtr ret = NULL;
11749     xmlParserCtxtPtr ctxt;
11750     xmlParserInputPtr input = NULL;
11751     xmlChar* systemIdCanonic;
11752 
11753     if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11754 
11755     ctxt = xmlNewSAXParserCtxt(sax, NULL);
11756     if (ctxt == NULL) {
11757 	return(NULL);
11758     }
11759 
11760     /*
11761      * Canonicalise the system ID
11762      */
11763     systemIdCanonic = xmlCanonicPath(SystemID);
11764     if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11765 	xmlFreeParserCtxt(ctxt);
11766 	return(NULL);
11767     }
11768 
11769     /*
11770      * Ask the Entity resolver to load the damn thing
11771      */
11772 
11773     if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11774 	input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11775 	                                 systemIdCanonic);
11776     if (input == NULL) {
11777 	xmlFreeParserCtxt(ctxt);
11778 	if (systemIdCanonic != NULL)
11779 	    xmlFree(systemIdCanonic);
11780 	return(NULL);
11781     }
11782 
11783     /*
11784      * plug some encoding conversion routines here.
11785      */
11786     if (xmlPushInput(ctxt, input) < 0) {
11787         xmlFreeInputStream(input);
11788 	xmlFreeParserCtxt(ctxt);
11789 	if (systemIdCanonic != NULL)
11790 	    xmlFree(systemIdCanonic);
11791 	return(NULL);
11792     }
11793 
11794     xmlDetectEncoding(ctxt);
11795 
11796     if (input->filename == NULL)
11797 	input->filename = (char *) systemIdCanonic;
11798     else
11799 	xmlFree(systemIdCanonic);
11800 
11801     /*
11802      * let's parse that entity knowing it's an external subset.
11803      */
11804     ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11805     if (ctxt->myDoc == NULL) {
11806 	xmlErrMemory(ctxt);
11807 	xmlFreeParserCtxt(ctxt);
11808 	return(NULL);
11809     }
11810     ctxt->myDoc->properties = XML_DOC_INTERNAL;
11811     ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11812 	                               ExternalID, SystemID);
11813     if (ctxt->myDoc->extSubset == NULL) {
11814         xmlFreeDoc(ctxt->myDoc);
11815         xmlFreeParserCtxt(ctxt);
11816         return(NULL);
11817     }
11818     xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11819 
11820     if (ctxt->myDoc != NULL) {
11821 	if (ctxt->wellFormed) {
11822 	    ret = ctxt->myDoc->extSubset;
11823 	    ctxt->myDoc->extSubset = NULL;
11824 	    if (ret != NULL) {
11825 		xmlNodePtr tmp;
11826 
11827 		ret->doc = NULL;
11828 		tmp = ret->children;
11829 		while (tmp != NULL) {
11830 		    tmp->doc = NULL;
11831 		    tmp = tmp->next;
11832 		}
11833 	    }
11834 	} else {
11835 	    ret = NULL;
11836 	}
11837         xmlFreeDoc(ctxt->myDoc);
11838         ctxt->myDoc = NULL;
11839     }
11840     xmlFreeParserCtxt(ctxt);
11841 
11842     return(ret);
11843 }
11844 
11845 
11846 /**
11847  * xmlParseDTD:
11848  * @ExternalID:  a NAME* containing the External ID of the DTD
11849  * @SystemID:  a NAME* containing the URL to the DTD
11850  *
11851  * Load and parse an external subset.
11852  *
11853  * Returns the resulting xmlDtdPtr or NULL in case of error.
11854  */
11855 
11856 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)11857 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11858     return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11859 }
11860 #endif /* LIBXML_VALID_ENABLED */
11861 
11862 /************************************************************************
11863  *									*
11864  *		Front ends when parsing an Entity			*
11865  *									*
11866  ************************************************************************/
11867 
11868 static xmlNodePtr
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,int hasTextDecl,int buildTree)11869 xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11870                             int hasTextDecl, int buildTree) {
11871     xmlNodePtr root = NULL;
11872     xmlNodePtr list = NULL;
11873     xmlChar *rootName = BAD_CAST "#root";
11874     int result;
11875 
11876     if (buildTree) {
11877         root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11878         if (root == NULL) {
11879             xmlErrMemory(ctxt);
11880             goto error;
11881         }
11882     }
11883 
11884     if (xmlPushInput(ctxt, input) < 0)
11885         goto error;
11886 
11887     nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11888     spacePush(ctxt, -1);
11889 
11890     if (buildTree)
11891         nodePush(ctxt, root);
11892 
11893     if (hasTextDecl) {
11894         xmlDetectEncoding(ctxt);
11895 
11896         /*
11897          * Parse a possible text declaration first
11898          */
11899         if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11900             (IS_BLANK_CH(NXT(5)))) {
11901             xmlParseTextDecl(ctxt);
11902             /*
11903              * An XML-1.0 document can't reference an entity not XML-1.0
11904              */
11905             if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11906                 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11907                 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11908                                "Version mismatch between document and "
11909                                "entity\n");
11910             }
11911         }
11912     }
11913 
11914     xmlParseContentInternal(ctxt);
11915 
11916     if (ctxt->input->cur < ctxt->input->end)
11917 	xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11918 
11919     if ((ctxt->wellFormed) ||
11920         ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
11921         if (root != NULL) {
11922             xmlNodePtr cur;
11923 
11924             /*
11925              * Unlink newly created node list.
11926              */
11927             list = root->children;
11928             root->children = NULL;
11929             root->last = NULL;
11930             for (cur = list; cur != NULL; cur = cur->next)
11931                 cur->parent = NULL;
11932         }
11933     }
11934 
11935     /*
11936      * Read the rest of the stream in case of errors. We want
11937      * to account for the whole entity size.
11938      */
11939     do {
11940         ctxt->input->cur = ctxt->input->end;
11941         xmlParserShrink(ctxt);
11942         result = xmlParserGrow(ctxt);
11943     } while (result > 0);
11944 
11945     if (buildTree)
11946         nodePop(ctxt);
11947 
11948     namePop(ctxt);
11949     spacePop(ctxt);
11950 
11951     /* xmlPopInput would free the stream */
11952     inputPop(ctxt);
11953 
11954 error:
11955     xmlFreeNode(root);
11956 
11957     return(list);
11958 }
11959 
11960 static void
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt,xmlEntityPtr ent)11961 xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11962     xmlParserInputPtr input;
11963     xmlNodePtr list;
11964     unsigned long consumed;
11965     int isExternal;
11966     int buildTree;
11967     int oldMinNsIndex;
11968     int oldNodelen, oldNodemem;
11969 
11970     isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11971     buildTree = (ctxt->node != NULL);
11972 
11973     /*
11974      * Recursion check
11975      */
11976     if (ent->flags & XML_ENT_EXPANDING) {
11977         xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11978         xmlHaltParser(ctxt);
11979         goto error;
11980     }
11981 
11982     /*
11983      * Load entity
11984      */
11985     input = xmlNewEntityInputStream(ctxt, ent);
11986     if (input == NULL)
11987         goto error;
11988 
11989     /*
11990      * When building a tree, we need to limit the scope of namespace
11991      * declarations, so that entities don't reference xmlNs structs
11992      * from the parent of a reference.
11993      */
11994     oldMinNsIndex = ctxt->nsdb->minNsIndex;
11995     if (buildTree)
11996         ctxt->nsdb->minNsIndex = ctxt->nsNr;
11997 
11998     oldNodelen = ctxt->nodelen;
11999     oldNodemem = ctxt->nodemem;
12000     ctxt->nodelen = 0;
12001     ctxt->nodemem = 0;
12002 
12003     /*
12004      * Parse content
12005      *
12006      * This initiates a recursive call chain:
12007      *
12008      * - xmlCtxtParseContentInternal
12009      * - xmlParseContentInternal
12010      * - xmlParseReference
12011      * - xmlCtxtParseEntity
12012      *
12013      * The nesting depth is limited by the maximum number of inputs,
12014      * see xmlPushInput.
12015      *
12016      * It's possible to make this non-recursive (minNsIndex must be
12017      * stored in the input struct) at the expense of code readability.
12018      */
12019 
12020     ent->flags |= XML_ENT_EXPANDING;
12021 
12022     list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
12023 
12024     ent->flags &= ~XML_ENT_EXPANDING;
12025 
12026     ctxt->nsdb->minNsIndex = oldMinNsIndex;
12027     ctxt->nodelen = oldNodelen;
12028     ctxt->nodemem = oldNodemem;
12029 
12030     /*
12031      * Entity size accounting
12032      */
12033     consumed = input->consumed;
12034     xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12035 
12036     if ((ent->flags & XML_ENT_CHECKED) == 0)
12037         xmlSaturatedAdd(&ent->expandedSize, consumed);
12038 
12039     if ((ent->flags & XML_ENT_PARSED) == 0) {
12040         if (isExternal)
12041             xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12042 
12043         ent->children = list;
12044 
12045         while (list != NULL) {
12046             list->parent = (xmlNodePtr) ent;
12047             if (list->next == NULL)
12048                 ent->last = list;
12049             list = list->next;
12050         }
12051     } else {
12052         xmlFreeNodeList(list);
12053     }
12054 
12055     xmlFreeInputStream(input);
12056 
12057 error:
12058     ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12059 }
12060 
12061 /**
12062  * xmlParseCtxtExternalEntity:
12063  * @ctxt:  the existing parsing context
12064  * @URL:  the URL for the entity to load
12065  * @ID:  the System ID for the entity to load
12066  * @listOut:  the return value for the set of parsed nodes
12067  *
12068  * Parse an external general entity within an existing parsing context
12069  * An external general parsed entity is well-formed if it matches the
12070  * production labeled extParsedEnt.
12071  *
12072  * [78] extParsedEnt ::= TextDecl? content
12073  *
12074  * Returns 0 if the entity is well formed, -1 in case of args problem and
12075  *    the parser error code otherwise
12076  */
12077 
12078 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * listOut)12079 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12080                            const xmlChar *ID, xmlNodePtr *listOut) {
12081     xmlParserInputPtr input;
12082     xmlNodePtr list;
12083 
12084     if (listOut != NULL)
12085         *listOut = NULL;
12086 
12087     if (ctxt == NULL)
12088         return(XML_ERR_ARGUMENT);
12089 
12090     input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12091                             XML_RESOURCE_GENERAL_ENTITY);
12092     if (input == NULL)
12093         return(ctxt->errNo);
12094 
12095     xmlCtxtInitializeLate(ctxt);
12096 
12097     list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
12098     if (listOut != NULL)
12099         *listOut = list;
12100     else
12101         xmlFreeNodeList(list);
12102 
12103     xmlFreeInputStream(input);
12104     return(ctxt->errNo);
12105 }
12106 
12107 #ifdef LIBXML_SAX1_ENABLED
12108 /**
12109  * xmlParseExternalEntity:
12110  * @doc:  the document the chunk pertains to
12111  * @sax:  the SAX handler block (possibly NULL)
12112  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12113  * @depth:  Used for loop detection, use 0
12114  * @URL:  the URL for the entity to load
12115  * @ID:  the System ID for the entity to load
12116  * @list:  the return value for the set of parsed nodes
12117  *
12118  * DEPRECATED: Use xmlParseCtxtExternalEntity.
12119  *
12120  * Parse an external general entity
12121  * An external general parsed entity is well-formed if it matches the
12122  * production labeled extParsedEnt.
12123  *
12124  * [78] extParsedEnt ::= TextDecl? content
12125  *
12126  * Returns 0 if the entity is well formed, -1 in case of args problem and
12127  *    the parser error code otherwise
12128  */
12129 
12130 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12131 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12132 	  int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12133     xmlParserCtxtPtr ctxt;
12134     int ret;
12135 
12136     if (list != NULL)
12137         *list = NULL;
12138 
12139     if (doc == NULL)
12140         return(XML_ERR_ARGUMENT);
12141 
12142     ctxt = xmlNewSAXParserCtxt(sax, user_data);
12143     if (ctxt == NULL)
12144         return(XML_ERR_NO_MEMORY);
12145 
12146     ctxt->depth = depth;
12147     ctxt->myDoc = doc;
12148     ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12149 
12150     xmlFreeParserCtxt(ctxt);
12151     return(ret);
12152 }
12153 
12154 /**
12155  * xmlParseBalancedChunkMemory:
12156  * @doc:  the document the chunk pertains to (must not be NULL)
12157  * @sax:  the SAX handler block (possibly NULL)
12158  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12159  * @depth:  Used for loop detection, use 0
12160  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12161  * @lst:  the return value for the set of parsed nodes
12162  *
12163  * Parse a well-balanced chunk of an XML document
12164  * called by the parser
12165  * The allowed sequence for the Well Balanced Chunk is the one defined by
12166  * the content production in the XML grammar:
12167  *
12168  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12169  *
12170  * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12171  *    the parser error code otherwise
12172  */
12173 
12174 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12175 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12176      void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12177     return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12178                                                 depth, string, lst, 0 );
12179 }
12180 #endif /* LIBXML_SAX1_ENABLED */
12181 
12182 /**
12183  * xmlCtxtParseContent:
12184  * @ctxt:  parser context
12185  * @input:  parser input
12186  * @node:  target node or document
12187  * @hasTextDecl:  whether to parse text declaration
12188  *
12189  * Parse a well-balanced chunk of XML matching the 'content' production.
12190  *
12191  * Namespaces in scope of @node and entities of @node's document are
12192  * recognized. When validating, the DTD of @node's document is used.
12193  *
12194  * Always consumes @input even in error case.
12195  *
12196  * Available since 2.14.0.
12197  *
12198  * Returns a node list or NULL in case of error.
12199  */
12200 xmlNodePtr
xmlCtxtParseContent(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,xmlNodePtr node,int hasTextDecl)12201 xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
12202                     xmlNodePtr node, int hasTextDecl) {
12203     xmlDocPtr doc;
12204     xmlNodePtr cur, list = NULL;
12205     int nsnr = 0;
12206     xmlDictPtr oldDict;
12207     int oldOptions, oldDictNames, oldLoadSubset;
12208 
12209     if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12210         xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12211         goto exit;
12212     }
12213 
12214     doc = node->doc;
12215     if (doc == NULL) {
12216         xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12217         goto exit;
12218     }
12219 
12220     switch (node->type) {
12221         case XML_ELEMENT_NODE:
12222         case XML_DOCUMENT_NODE:
12223         case XML_HTML_DOCUMENT_NODE:
12224             break;
12225 
12226         case XML_ATTRIBUTE_NODE:
12227         case XML_TEXT_NODE:
12228         case XML_CDATA_SECTION_NODE:
12229         case XML_ENTITY_REF_NODE:
12230         case XML_PI_NODE:
12231         case XML_COMMENT_NODE:
12232             for (cur = node->parent; cur != NULL; cur = node->parent) {
12233                 if ((cur->type == XML_ELEMENT_NODE) ||
12234                     (cur->type == XML_DOCUMENT_NODE) ||
12235                     (cur->type == XML_HTML_DOCUMENT_NODE)) {
12236                     node = cur;
12237                     break;
12238                 }
12239             }
12240             break;
12241 
12242         default:
12243             xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12244             goto exit;
12245     }
12246 
12247 #ifdef LIBXML_HTML_ENABLED
12248     if (ctxt->html)
12249         htmlCtxtReset(ctxt);
12250     else
12251 #endif
12252         xmlCtxtReset(ctxt);
12253 
12254     oldDict = ctxt->dict;
12255     oldOptions = ctxt->options;
12256     oldDictNames = ctxt->dictNames;
12257     oldLoadSubset = ctxt->loadsubset;
12258 
12259     /*
12260      * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12261      */
12262     if (doc->dict != NULL) {
12263         ctxt->dict = doc->dict;
12264     } else {
12265         ctxt->options |= XML_PARSE_NODICT;
12266         ctxt->dictNames = 0;
12267     }
12268 
12269     /*
12270      * Disable IDs
12271      */
12272     ctxt->loadsubset |= XML_SKIP_IDS;
12273 
12274     ctxt->myDoc = doc;
12275 
12276 #ifdef LIBXML_HTML_ENABLED
12277     if (ctxt->html) {
12278         /*
12279          * When parsing in context, it makes no sense to add implied
12280          * elements like html/body/etc...
12281          */
12282         ctxt->options |= HTML_PARSE_NOIMPLIED;
12283 
12284         list = htmlCtxtParseContentInternal(ctxt, input);
12285     } else
12286 #endif
12287     {
12288         xmlCtxtInitializeLate(ctxt);
12289 
12290         /*
12291          * This hack lowers the error level of undeclared entities
12292          * from XML_ERR_FATAL (well-formedness error) to XML_ERR_ERROR
12293          * or XML_ERR_WARNING.
12294          */
12295         ctxt->hasExternalSubset = 1;
12296 
12297         /*
12298          * initialize the SAX2 namespaces stack
12299          */
12300         cur = node;
12301         while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12302             xmlNsPtr ns = cur->nsDef;
12303             xmlHashedString hprefix, huri;
12304 
12305             while (ns != NULL) {
12306                 hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12307                 huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12308                 if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12309                     nsnr++;
12310                 ns = ns->next;
12311             }
12312             cur = cur->parent;
12313         }
12314 
12315         list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12316 
12317         if (nsnr > 0)
12318             xmlParserNsPop(ctxt, nsnr);
12319     }
12320 
12321     ctxt->dict = oldDict;
12322     ctxt->options = oldOptions;
12323     ctxt->dictNames = oldDictNames;
12324     ctxt->loadsubset = oldLoadSubset;
12325     ctxt->myDoc = NULL;
12326     ctxt->node = NULL;
12327 
12328 exit:
12329     xmlFreeInputStream(input);
12330     return(list);
12331 }
12332 
12333 /**
12334  * xmlParseInNodeContext:
12335  * @node:  the context node
12336  * @data:  the input string
12337  * @datalen:  the input string length in bytes
12338  * @options:  a combination of xmlParserOption
12339  * @listOut:  the return value for the set of parsed nodes
12340  *
12341  * Parse a well-balanced chunk of an XML document
12342  * within the context (DTD, namespaces, etc ...) of the given node.
12343  *
12344  * The allowed sequence for the data is a Well Balanced Chunk defined by
12345  * the content production in the XML grammar:
12346  *
12347  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12348  *
12349  * This function assumes the encoding of @node's document which is
12350  * typically not what you want. A better alternative is
12351  * xmlCtxtParseContent.
12352  *
12353  * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12354  * error code otherwise
12355  */
12356 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * listOut)12357 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12358                       int options, xmlNodePtr *listOut) {
12359     xmlParserCtxtPtr ctxt;
12360     xmlParserInputPtr input;
12361     xmlDocPtr doc;
12362     xmlNodePtr list;
12363     xmlParserErrors ret;
12364 
12365     if (listOut == NULL)
12366         return(XML_ERR_INTERNAL_ERROR);
12367     *listOut = NULL;
12368 
12369     if ((node == NULL) || (data == NULL) || (datalen < 0))
12370         return(XML_ERR_INTERNAL_ERROR);
12371 
12372     doc = node->doc;
12373     if (doc == NULL)
12374         return(XML_ERR_INTERNAL_ERROR);
12375 
12376 #ifdef LIBXML_HTML_ENABLED
12377     if (doc->type == XML_HTML_DOCUMENT_NODE) {
12378         ctxt = htmlNewParserCtxt();
12379     }
12380     else
12381 #endif
12382         ctxt = xmlNewParserCtxt();
12383 
12384     if (ctxt == NULL)
12385         return(XML_ERR_NO_MEMORY);
12386 
12387     input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12388                                       (const char *) doc->encoding,
12389                                       XML_INPUT_BUF_STATIC);
12390     if (input == NULL) {
12391         xmlFreeParserCtxt(ctxt);
12392         return(XML_ERR_NO_MEMORY);
12393     }
12394 
12395     xmlCtxtUseOptions(ctxt, options);
12396 
12397     list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12398 
12399     if (list == NULL) {
12400         ret = ctxt->errNo;
12401         if (ret == XML_ERR_ARGUMENT)
12402             ret = XML_ERR_INTERNAL_ERROR;
12403     } else {
12404         ret = XML_ERR_OK;
12405         *listOut = list;
12406     }
12407 
12408     xmlFreeParserCtxt(ctxt);
12409 
12410     return(ret);
12411 }
12412 
12413 #ifdef LIBXML_SAX1_ENABLED
12414 /**
12415  * xmlParseBalancedChunkMemoryRecover:
12416  * @doc:  the document the chunk pertains to (must not be NULL)
12417  * @sax:  the SAX handler block (possibly NULL)
12418  * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12419  * @depth:  Used for loop detection, use 0
12420  * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12421  * @listOut:  the return value for the set of parsed nodes
12422  * @recover: return nodes even if the data is broken (use 0)
12423  *
12424  * Parse a well-balanced chunk of an XML document
12425  *
12426  * The allowed sequence for the Well Balanced Chunk is the one defined by
12427  * the content production in the XML grammar:
12428  *
12429  * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12430  *
12431  * Returns 0 if the chunk is well balanced, or thehe parser error code
12432  * otherwise.
12433  *
12434  * In case recover is set to 1, the nodelist will not be empty even if
12435  * the parsed chunk is not well balanced, assuming the parsing succeeded to
12436  * some extent.
12437  */
12438 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * listOut,int recover)12439 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12440      void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12441      int recover) {
12442     xmlParserCtxtPtr ctxt;
12443     xmlParserInputPtr input;
12444     xmlNodePtr list;
12445     int ret;
12446 
12447     if (listOut != NULL)
12448         *listOut = NULL;
12449 
12450     if (string == NULL)
12451         return(XML_ERR_ARGUMENT);
12452 
12453     ctxt = xmlNewSAXParserCtxt(sax, user_data);
12454     if (ctxt == NULL)
12455         return(XML_ERR_NO_MEMORY);
12456 
12457     xmlCtxtInitializeLate(ctxt);
12458 
12459     ctxt->depth = depth;
12460     ctxt->myDoc = doc;
12461     if (recover) {
12462         ctxt->options |= XML_PARSE_RECOVER;
12463         ctxt->recovery = 1;
12464     }
12465 
12466     input = xmlNewStringInputStream(ctxt, string);
12467     if (input == NULL) {
12468         ret = ctxt->errNo;
12469         goto error;
12470     }
12471 
12472     list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12473     if (listOut != NULL)
12474         *listOut = list;
12475     else
12476         xmlFreeNodeList(list);
12477 
12478     if (!ctxt->wellFormed)
12479         ret = ctxt->errNo;
12480     else
12481         ret = XML_ERR_OK;
12482 
12483 error:
12484     xmlFreeInputStream(input);
12485     xmlFreeParserCtxt(ctxt);
12486     return(ret);
12487 }
12488 
12489 /**
12490  * xmlSAXParseEntity:
12491  * @sax:  the SAX handler block
12492  * @filename:  the filename
12493  *
12494  * DEPRECATED: Don't use.
12495  *
12496  * parse an XML external entity out of context and build a tree.
12497  * It use the given SAX function block to handle the parsing callback.
12498  * If sax is NULL, fallback to the default DOM tree building routines.
12499  *
12500  * [78] extParsedEnt ::= TextDecl? content
12501  *
12502  * This correspond to a "Well Balanced" chunk
12503  *
12504  * Returns the resulting document tree
12505  */
12506 
12507 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)12508 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12509     xmlDocPtr ret;
12510     xmlParserCtxtPtr ctxt;
12511 
12512     ctxt = xmlCreateFileParserCtxt(filename);
12513     if (ctxt == NULL) {
12514 	return(NULL);
12515     }
12516     if (sax != NULL) {
12517         if (sax->initialized == XML_SAX2_MAGIC) {
12518             *ctxt->sax = *sax;
12519         } else {
12520             memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12521             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12522         }
12523         ctxt->userData = NULL;
12524     }
12525 
12526     xmlParseExtParsedEnt(ctxt);
12527 
12528     if (ctxt->wellFormed) {
12529 	ret = ctxt->myDoc;
12530     } else {
12531         ret = NULL;
12532         xmlFreeDoc(ctxt->myDoc);
12533     }
12534 
12535     xmlFreeParserCtxt(ctxt);
12536 
12537     return(ret);
12538 }
12539 
12540 /**
12541  * xmlParseEntity:
12542  * @filename:  the filename
12543  *
12544  * parse an XML external entity out of context and build a tree.
12545  *
12546  * [78] extParsedEnt ::= TextDecl? content
12547  *
12548  * This correspond to a "Well Balanced" chunk
12549  *
12550  * Returns the resulting document tree
12551  */
12552 
12553 xmlDocPtr
xmlParseEntity(const char * filename)12554 xmlParseEntity(const char *filename) {
12555     return(xmlSAXParseEntity(NULL, filename));
12556 }
12557 #endif /* LIBXML_SAX1_ENABLED */
12558 
12559 /**
12560  * xmlCreateEntityParserCtxt:
12561  * @URL:  the entity URL
12562  * @ID:  the entity PUBLIC ID
12563  * @base:  a possible base for the target URI
12564  *
12565  * DEPRECATED: Don't use.
12566  *
12567  * Create a parser context for an external entity
12568  * Automatic support for ZLIB/Compress compressed document is provided
12569  * by default if found at compile-time.
12570  *
12571  * Returns the new parser context or NULL
12572  */
12573 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)12574 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12575 	                  const xmlChar *base) {
12576     xmlParserCtxtPtr ctxt;
12577     xmlParserInputPtr input;
12578     xmlChar *uri = NULL;
12579 
12580     ctxt = xmlNewParserCtxt();
12581     if (ctxt == NULL)
12582 	return(NULL);
12583 
12584     if (base != NULL) {
12585         if (xmlBuildURISafe(URL, base, &uri) < 0)
12586             goto error;
12587         if (uri != NULL)
12588             URL = uri;
12589     }
12590 
12591     input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12592                             XML_RESOURCE_UNKNOWN);
12593     if (input == NULL)
12594         goto error;
12595 
12596     if (inputPush(ctxt, input) < 0) {
12597         xmlFreeInputStream(input);
12598         goto error;
12599     }
12600 
12601     xmlFree(uri);
12602     return(ctxt);
12603 
12604 error:
12605     xmlFree(uri);
12606     xmlFreeParserCtxt(ctxt);
12607     return(NULL);
12608 }
12609 
12610 /************************************************************************
12611  *									*
12612  *		Front ends when parsing from a file			*
12613  *									*
12614  ************************************************************************/
12615 
12616 /**
12617  * xmlCreateURLParserCtxt:
12618  * @filename:  the filename or URL
12619  * @options:  a combination of xmlParserOption
12620  *
12621  * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12622  *
12623  * Create a parser context for a file or URL content.
12624  * Automatic support for ZLIB/Compress compressed document is provided
12625  * by default if found at compile-time and for file accesses
12626  *
12627  * Returns the new parser context or NULL
12628  */
12629 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)12630 xmlCreateURLParserCtxt(const char *filename, int options)
12631 {
12632     xmlParserCtxtPtr ctxt;
12633     xmlParserInputPtr input;
12634 
12635     ctxt = xmlNewParserCtxt();
12636     if (ctxt == NULL)
12637 	return(NULL);
12638 
12639     xmlCtxtUseOptions(ctxt, options);
12640     ctxt->linenumbers = 1;
12641 
12642     input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12643     if (input == NULL) {
12644 	xmlFreeParserCtxt(ctxt);
12645 	return(NULL);
12646     }
12647     if (inputPush(ctxt, input) < 0) {
12648         xmlFreeInputStream(input);
12649         xmlFreeParserCtxt(ctxt);
12650         return(NULL);
12651     }
12652 
12653     return(ctxt);
12654 }
12655 
12656 /**
12657  * xmlCreateFileParserCtxt:
12658  * @filename:  the filename
12659  *
12660  * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12661  *
12662  * Create a parser context for a file content.
12663  * Automatic support for ZLIB/Compress compressed document is provided
12664  * by default if found at compile-time.
12665  *
12666  * Returns the new parser context or NULL
12667  */
12668 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)12669 xmlCreateFileParserCtxt(const char *filename)
12670 {
12671     return(xmlCreateURLParserCtxt(filename, 0));
12672 }
12673 
12674 #ifdef LIBXML_SAX1_ENABLED
12675 /**
12676  * xmlSAXParseFileWithData:
12677  * @sax:  the SAX handler block
12678  * @filename:  the filename
12679  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12680  *             documents
12681  * @data:  the userdata
12682  *
12683  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12684  *
12685  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12686  * compressed document is provided by default if found at compile-time.
12687  * It use the given SAX function block to handle the parsing callback.
12688  * If sax is NULL, fallback to the default DOM tree building routines.
12689  *
12690  * User data (void *) is stored within the parser context in the
12691  * context's _private member, so it is available nearly everywhere in libxml
12692  *
12693  * Returns the resulting document tree
12694  */
12695 
12696 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)12697 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12698                         int recovery, void *data) {
12699     xmlDocPtr ret;
12700     xmlParserCtxtPtr ctxt;
12701     xmlParserInputPtr input;
12702 
12703     ctxt = xmlNewSAXParserCtxt(sax, NULL);
12704     if (ctxt == NULL)
12705 	return(NULL);
12706 
12707     if (data != NULL)
12708 	ctxt->_private = data;
12709 
12710     if (recovery) {
12711         ctxt->options |= XML_PARSE_RECOVER;
12712         ctxt->recovery = 1;
12713     }
12714 
12715     if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12716         input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12717     else
12718         input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12719 
12720     ret = xmlCtxtParseDocument(ctxt, input);
12721 
12722     xmlFreeParserCtxt(ctxt);
12723     return(ret);
12724 }
12725 
12726 /**
12727  * xmlSAXParseFile:
12728  * @sax:  the SAX handler block
12729  * @filename:  the filename
12730  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12731  *             documents
12732  *
12733  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12734  *
12735  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12736  * compressed document is provided by default if found at compile-time.
12737  * It use the given SAX function block to handle the parsing callback.
12738  * If sax is NULL, fallback to the default DOM tree building routines.
12739  *
12740  * Returns the resulting document tree
12741  */
12742 
12743 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)12744 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12745                           int recovery) {
12746     return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12747 }
12748 
12749 /**
12750  * xmlRecoverDoc:
12751  * @cur:  a pointer to an array of xmlChar
12752  *
12753  * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12754  *
12755  * parse an XML in-memory document and build a tree.
12756  * In the case the document is not Well Formed, a attempt to build a
12757  * tree is tried anyway
12758  *
12759  * Returns the resulting document tree or NULL in case of failure
12760  */
12761 
12762 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)12763 xmlRecoverDoc(const xmlChar *cur) {
12764     return(xmlSAXParseDoc(NULL, cur, 1));
12765 }
12766 
12767 /**
12768  * xmlParseFile:
12769  * @filename:  the filename
12770  *
12771  * DEPRECATED: Use xmlReadFile.
12772  *
12773  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12774  * compressed document is provided by default if found at compile-time.
12775  *
12776  * Returns the resulting document tree if the file was wellformed,
12777  * NULL otherwise.
12778  */
12779 
12780 xmlDocPtr
xmlParseFile(const char * filename)12781 xmlParseFile(const char *filename) {
12782     return(xmlSAXParseFile(NULL, filename, 0));
12783 }
12784 
12785 /**
12786  * xmlRecoverFile:
12787  * @filename:  the filename
12788  *
12789  * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12790  *
12791  * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12792  * compressed document is provided by default if found at compile-time.
12793  * In the case the document is not Well Formed, it attempts to build
12794  * a tree anyway
12795  *
12796  * Returns the resulting document tree or NULL in case of failure
12797  */
12798 
12799 xmlDocPtr
xmlRecoverFile(const char * filename)12800 xmlRecoverFile(const char *filename) {
12801     return(xmlSAXParseFile(NULL, filename, 1));
12802 }
12803 
12804 
12805 /**
12806  * xmlSetupParserForBuffer:
12807  * @ctxt:  an XML parser context
12808  * @buffer:  a xmlChar * buffer
12809  * @filename:  a file name
12810  *
12811  * DEPRECATED: Don't use.
12812  *
12813  * Setup the parser context to parse a new buffer; Clears any prior
12814  * contents from the parser context. The buffer parameter must not be
12815  * NULL, but the filename parameter can be
12816  */
12817 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)12818 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12819                              const char* filename)
12820 {
12821     xmlParserInputPtr input;
12822 
12823     if ((ctxt == NULL) || (buffer == NULL))
12824         return;
12825 
12826     xmlClearParserCtxt(ctxt);
12827 
12828     input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12829                                       NULL, 0);
12830     if (input == NULL)
12831         return;
12832     if (inputPush(ctxt, input) < 0)
12833         xmlFreeInputStream(input);
12834 }
12835 
12836 /**
12837  * xmlSAXUserParseFile:
12838  * @sax:  a SAX handler
12839  * @user_data:  The user data returned on SAX callbacks
12840  * @filename:  a file name
12841  *
12842  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12843  *
12844  * parse an XML file and call the given SAX handler routines.
12845  * Automatic support for ZLIB/Compress compressed document is provided
12846  *
12847  * Returns 0 in case of success or a error number otherwise
12848  */
12849 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)12850 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12851                     const char *filename) {
12852     int ret = 0;
12853     xmlParserCtxtPtr ctxt;
12854 
12855     ctxt = xmlCreateFileParserCtxt(filename);
12856     if (ctxt == NULL) return -1;
12857     if (sax != NULL) {
12858         if (sax->initialized == XML_SAX2_MAGIC) {
12859             *ctxt->sax = *sax;
12860         } else {
12861             memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12862             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12863         }
12864 	ctxt->userData = user_data;
12865     }
12866 
12867     xmlParseDocument(ctxt);
12868 
12869     if (ctxt->wellFormed)
12870 	ret = 0;
12871     else {
12872         if (ctxt->errNo != 0)
12873 	    ret = ctxt->errNo;
12874 	else
12875 	    ret = -1;
12876     }
12877     if (ctxt->myDoc != NULL) {
12878         xmlFreeDoc(ctxt->myDoc);
12879 	ctxt->myDoc = NULL;
12880     }
12881     xmlFreeParserCtxt(ctxt);
12882 
12883     return ret;
12884 }
12885 #endif /* LIBXML_SAX1_ENABLED */
12886 
12887 /************************************************************************
12888  *									*
12889  *		Front ends when parsing from memory			*
12890  *									*
12891  ************************************************************************/
12892 
12893 /**
12894  * xmlCreateMemoryParserCtxt:
12895  * @buffer:  a pointer to a char array
12896  * @size:  the size of the array
12897  *
12898  * Create a parser context for an XML in-memory document. The input buffer
12899  * must not contain a terminating null byte.
12900  *
12901  * Returns the new parser context or NULL
12902  */
12903 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)12904 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12905     xmlParserCtxtPtr ctxt;
12906     xmlParserInputPtr input;
12907 
12908     if (size < 0)
12909 	return(NULL);
12910 
12911     ctxt = xmlNewParserCtxt();
12912     if (ctxt == NULL)
12913 	return(NULL);
12914 
12915     input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12916     if (input == NULL) {
12917 	xmlFreeParserCtxt(ctxt);
12918 	return(NULL);
12919     }
12920     if (inputPush(ctxt, input) < 0) {
12921         xmlFreeInputStream(input);
12922         xmlFreeParserCtxt(ctxt);
12923         return(NULL);
12924     }
12925 
12926     return(ctxt);
12927 }
12928 
12929 #ifdef LIBXML_SAX1_ENABLED
12930 /**
12931  * xmlSAXParseMemoryWithData:
12932  * @sax:  the SAX handler block
12933  * @buffer:  an pointer to a char array
12934  * @size:  the size of the array
12935  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
12936  *             documents
12937  * @data:  the userdata
12938  *
12939  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
12940  *
12941  * parse an XML in-memory block and use the given SAX function block
12942  * to handle the parsing callback. If sax is NULL, fallback to the default
12943  * DOM tree building routines.
12944  *
12945  * User data (void *) is stored within the parser context in the
12946  * context's _private member, so it is available nearly everywhere in libxml
12947  *
12948  * Returns the resulting document tree
12949  */
12950 
12951 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)12952 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12953                           int size, int recovery, void *data) {
12954     xmlDocPtr ret;
12955     xmlParserCtxtPtr ctxt;
12956     xmlParserInputPtr input;
12957 
12958     if (size < 0)
12959         return(NULL);
12960 
12961     ctxt = xmlNewSAXParserCtxt(sax, NULL);
12962     if (ctxt == NULL)
12963         return(NULL);
12964 
12965     if (data != NULL)
12966 	ctxt->_private=data;
12967 
12968     if (recovery) {
12969         ctxt->options |= XML_PARSE_RECOVER;
12970         ctxt->recovery = 1;
12971     }
12972 
12973     input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12974                                       XML_INPUT_BUF_STATIC);
12975 
12976     ret = xmlCtxtParseDocument(ctxt, input);
12977 
12978     xmlFreeParserCtxt(ctxt);
12979     return(ret);
12980 }
12981 
12982 /**
12983  * xmlSAXParseMemory:
12984  * @sax:  the SAX handler block
12985  * @buffer:  an pointer to a char array
12986  * @size:  the size of the array
12987  * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
12988  *             documents
12989  *
12990  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
12991  *
12992  * parse an XML in-memory block and use the given SAX function block
12993  * to handle the parsing callback. If sax is NULL, fallback to the default
12994  * DOM tree building routines.
12995  *
12996  * Returns the resulting document tree
12997  */
12998 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)12999 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13000 	          int size, int recovery) {
13001     return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13002 }
13003 
13004 /**
13005  * xmlParseMemory:
13006  * @buffer:  an pointer to a char array
13007  * @size:  the size of the array
13008  *
13009  * DEPRECATED: Use xmlReadMemory.
13010  *
13011  * parse an XML in-memory block and build a tree.
13012  *
13013  * Returns the resulting document tree
13014  */
13015 
xmlParseMemory(const char * buffer,int size)13016 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13017    return(xmlSAXParseMemory(NULL, buffer, size, 0));
13018 }
13019 
13020 /**
13021  * xmlRecoverMemory:
13022  * @buffer:  an pointer to a char array
13023  * @size:  the size of the array
13024  *
13025  * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13026  *
13027  * parse an XML in-memory block and build a tree.
13028  * In the case the document is not Well Formed, an attempt to
13029  * build a tree is tried anyway
13030  *
13031  * Returns the resulting document tree or NULL in case of error
13032  */
13033 
xmlRecoverMemory(const char * buffer,int size)13034 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13035    return(xmlSAXParseMemory(NULL, buffer, size, 1));
13036 }
13037 
13038 /**
13039  * xmlSAXUserParseMemory:
13040  * @sax:  a SAX handler
13041  * @user_data:  The user data returned on SAX callbacks
13042  * @buffer:  an in-memory XML document input
13043  * @size:  the length of the XML document in bytes
13044  *
13045  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13046  *
13047  * parse an XML in-memory buffer and call the given SAX handler routines.
13048  *
13049  * Returns 0 in case of success or a error number otherwise
13050  */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13051 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13052 			  const char *buffer, int size) {
13053     int ret = 0;
13054     xmlParserCtxtPtr ctxt;
13055 
13056     ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13057     if (ctxt == NULL) return -1;
13058     if (sax != NULL) {
13059         if (sax->initialized == XML_SAX2_MAGIC) {
13060             *ctxt->sax = *sax;
13061         } else {
13062             memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13063             memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13064         }
13065 	ctxt->userData = user_data;
13066     }
13067 
13068     xmlParseDocument(ctxt);
13069 
13070     if (ctxt->wellFormed)
13071 	ret = 0;
13072     else {
13073         if (ctxt->errNo != 0)
13074 	    ret = ctxt->errNo;
13075 	else
13076 	    ret = -1;
13077     }
13078     if (ctxt->myDoc != NULL) {
13079         xmlFreeDoc(ctxt->myDoc);
13080 	ctxt->myDoc = NULL;
13081     }
13082     xmlFreeParserCtxt(ctxt);
13083 
13084     return ret;
13085 }
13086 #endif /* LIBXML_SAX1_ENABLED */
13087 
13088 /**
13089  * xmlCreateDocParserCtxt:
13090  * @str:  a pointer to an array of xmlChar
13091  *
13092  * Creates a parser context for an XML in-memory document.
13093  *
13094  * Returns the new parser context or NULL
13095  */
13096 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * str)13097 xmlCreateDocParserCtxt(const xmlChar *str) {
13098     xmlParserCtxtPtr ctxt;
13099     xmlParserInputPtr input;
13100 
13101     ctxt = xmlNewParserCtxt();
13102     if (ctxt == NULL)
13103 	return(NULL);
13104 
13105     input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
13106     if (input == NULL) {
13107 	xmlFreeParserCtxt(ctxt);
13108 	return(NULL);
13109     }
13110     if (inputPush(ctxt, input) < 0) {
13111         xmlFreeInputStream(input);
13112         xmlFreeParserCtxt(ctxt);
13113         return(NULL);
13114     }
13115 
13116     return(ctxt);
13117 }
13118 
13119 #ifdef LIBXML_SAX1_ENABLED
13120 /**
13121  * xmlSAXParseDoc:
13122  * @sax:  the SAX handler block
13123  * @cur:  a pointer to an array of xmlChar
13124  * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13125  *             documents
13126  *
13127  * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13128  *
13129  * parse an XML in-memory document and build a tree.
13130  * It use the given SAX function block to handle the parsing callback.
13131  * If sax is NULL, fallback to the default DOM tree building routines.
13132  *
13133  * Returns the resulting document tree
13134  */
13135 
13136 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)13137 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13138     xmlDocPtr ret;
13139     xmlParserCtxtPtr ctxt;
13140     xmlSAXHandlerPtr oldsax = NULL;
13141 
13142     if (cur == NULL) return(NULL);
13143 
13144 
13145     ctxt = xmlCreateDocParserCtxt(cur);
13146     if (ctxt == NULL) return(NULL);
13147     if (sax != NULL) {
13148         oldsax = ctxt->sax;
13149         ctxt->sax = sax;
13150         ctxt->userData = NULL;
13151     }
13152 
13153     xmlParseDocument(ctxt);
13154     if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13155     else {
13156        ret = NULL;
13157        xmlFreeDoc(ctxt->myDoc);
13158        ctxt->myDoc = NULL;
13159     }
13160     if (sax != NULL)
13161 	ctxt->sax = oldsax;
13162     xmlFreeParserCtxt(ctxt);
13163 
13164     return(ret);
13165 }
13166 
13167 /**
13168  * xmlParseDoc:
13169  * @cur:  a pointer to an array of xmlChar
13170  *
13171  * DEPRECATED: Use xmlReadDoc.
13172  *
13173  * parse an XML in-memory document and build a tree.
13174  *
13175  * Returns the resulting document tree
13176  */
13177 
13178 xmlDocPtr
xmlParseDoc(const xmlChar * cur)13179 xmlParseDoc(const xmlChar *cur) {
13180     return(xmlSAXParseDoc(NULL, cur, 0));
13181 }
13182 #endif /* LIBXML_SAX1_ENABLED */
13183 
13184 /************************************************************************
13185  *									*
13186  *	New set (2.6.0) of simpler and more flexible APIs		*
13187  *									*
13188  ************************************************************************/
13189 
13190 /**
13191  * DICT_FREE:
13192  * @str:  a string
13193  *
13194  * Free a string if it is not owned by the "dict" dictionary in the
13195  * current scope
13196  */
13197 #define DICT_FREE(str)						\
13198 	if ((str) && ((!dict) ||				\
13199 	    (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))	\
13200 	    xmlFree((char *)(str));
13201 
13202 /**
13203  * xmlCtxtReset:
13204  * @ctxt: an XML parser context
13205  *
13206  * Reset a parser context
13207  */
13208 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)13209 xmlCtxtReset(xmlParserCtxtPtr ctxt)
13210 {
13211     xmlParserInputPtr input;
13212     xmlDictPtr dict;
13213 
13214     if (ctxt == NULL)
13215         return;
13216 
13217     dict = ctxt->dict;
13218 
13219     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13220         xmlFreeInputStream(input);
13221     }
13222     ctxt->inputNr = 0;
13223     ctxt->input = NULL;
13224 
13225     ctxt->spaceNr = 0;
13226     if (ctxt->spaceTab != NULL) {
13227 	ctxt->spaceTab[0] = -1;
13228 	ctxt->space = &ctxt->spaceTab[0];
13229     } else {
13230         ctxt->space = NULL;
13231     }
13232 
13233 
13234     ctxt->nodeNr = 0;
13235     ctxt->node = NULL;
13236 
13237     ctxt->nameNr = 0;
13238     ctxt->name = NULL;
13239 
13240     ctxt->nsNr = 0;
13241     xmlParserNsReset(ctxt->nsdb);
13242 
13243     DICT_FREE(ctxt->version);
13244     ctxt->version = NULL;
13245     DICT_FREE(ctxt->encoding);
13246     ctxt->encoding = NULL;
13247     DICT_FREE(ctxt->extSubURI);
13248     ctxt->extSubURI = NULL;
13249     DICT_FREE(ctxt->extSubSystem);
13250     ctxt->extSubSystem = NULL;
13251 
13252     if (ctxt->directory != NULL) {
13253         xmlFree(ctxt->directory);
13254         ctxt->directory = NULL;
13255     }
13256 
13257     if (ctxt->myDoc != NULL)
13258         xmlFreeDoc(ctxt->myDoc);
13259     ctxt->myDoc = NULL;
13260 
13261     ctxt->standalone = -1;
13262     ctxt->hasExternalSubset = 0;
13263     ctxt->hasPErefs = 0;
13264     ctxt->html = 0;
13265     ctxt->instate = XML_PARSER_START;
13266 
13267     ctxt->wellFormed = 1;
13268     ctxt->nsWellFormed = 1;
13269     ctxt->disableSAX = 0;
13270     ctxt->valid = 1;
13271     ctxt->record_info = 0;
13272     ctxt->checkIndex = 0;
13273     ctxt->endCheckState = 0;
13274     ctxt->inSubset = 0;
13275     ctxt->errNo = XML_ERR_OK;
13276     ctxt->depth = 0;
13277     ctxt->catalogs = NULL;
13278     ctxt->sizeentities = 0;
13279     ctxt->sizeentcopy = 0;
13280     xmlInitNodeInfoSeq(&ctxt->node_seq);
13281 
13282     if (ctxt->attsDefault != NULL) {
13283         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13284         ctxt->attsDefault = NULL;
13285     }
13286     if (ctxt->attsSpecial != NULL) {
13287         xmlHashFree(ctxt->attsSpecial, NULL);
13288         ctxt->attsSpecial = NULL;
13289     }
13290 
13291 #ifdef LIBXML_CATALOG_ENABLED
13292     if (ctxt->catalogs != NULL)
13293 	xmlCatalogFreeLocal(ctxt->catalogs);
13294 #endif
13295     ctxt->nbErrors = 0;
13296     ctxt->nbWarnings = 0;
13297     if (ctxt->lastError.code != XML_ERR_OK)
13298         xmlResetError(&ctxt->lastError);
13299 }
13300 
13301 /**
13302  * xmlCtxtResetPush:
13303  * @ctxt: an XML parser context
13304  * @chunk:  a pointer to an array of chars
13305  * @size:  number of chars in the array
13306  * @filename:  an optional file name or URI
13307  * @encoding:  the document encoding, or NULL
13308  *
13309  * Reset a push parser context
13310  *
13311  * Returns 0 in case of success and 1 in case of error
13312  */
13313 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)13314 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13315                  int size, const char *filename, const char *encoding)
13316 {
13317     xmlParserInputPtr input;
13318 
13319     if (ctxt == NULL)
13320         return(1);
13321 
13322     xmlCtxtReset(ctxt);
13323 
13324     input = xmlNewPushInput(filename, chunk, size);
13325     if (input == NULL)
13326         return(1);
13327 
13328     if (inputPush(ctxt, input) < 0) {
13329         xmlFreeInputStream(input);
13330         return(1);
13331     }
13332 
13333     if (encoding != NULL)
13334         xmlSwitchEncodingName(ctxt, encoding);
13335 
13336     return(0);
13337 }
13338 
13339 static int
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt,int options,int keepMask)13340 xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13341 {
13342     int allMask;
13343 
13344     if (ctxt == NULL)
13345         return(-1);
13346 
13347     /*
13348      * XInclude options aren't handled by the parser.
13349      *
13350      * XML_PARSE_XINCLUDE
13351      * XML_PARSE_NOXINCNODE
13352      * XML_PARSE_NOBASEFIX
13353      */
13354     allMask = XML_PARSE_RECOVER |
13355               XML_PARSE_NOENT |
13356               XML_PARSE_DTDLOAD |
13357               XML_PARSE_DTDATTR |
13358               XML_PARSE_DTDVALID |
13359               XML_PARSE_NOERROR |
13360               XML_PARSE_NOWARNING |
13361               XML_PARSE_PEDANTIC |
13362               XML_PARSE_NOBLANKS |
13363 #ifdef LIBXML_SAX1_ENABLED
13364               XML_PARSE_SAX1 |
13365 #endif
13366               XML_PARSE_NONET |
13367               XML_PARSE_NODICT |
13368               XML_PARSE_NSCLEAN |
13369               XML_PARSE_NOCDATA |
13370               XML_PARSE_COMPACT |
13371               XML_PARSE_OLD10 |
13372               XML_PARSE_HUGE |
13373               XML_PARSE_OLDSAX |
13374               XML_PARSE_IGNORE_ENC |
13375               XML_PARSE_BIG_LINES |
13376               XML_PARSE_NO_XXE |
13377               XML_PARSE_NO_UNZIP |
13378               XML_PARSE_NO_SYS_CATALOG |
13379               XML_PARSE_NO_CATALOG_PI;
13380 
13381     ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13382 
13383     /*
13384      * For some options, struct members are historically the source
13385      * of truth. The values are initalized from global variables and
13386      * old code could also modify them directly. Several older API
13387      * functions that don't take an options argument rely on these
13388      * deprecated mechanisms.
13389      *
13390      * Once public access to struct members and the globals are
13391      * disabled, we can use the options bitmask as source of
13392      * truth, making all these struct members obsolete.
13393      *
13394      * The XML_DETECT_IDS flags is misnamed. It simply enables
13395      * loading of the external subset.
13396      */
13397     ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13398     ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13399     ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13400     ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13401     ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13402     ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13403     ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13404     ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13405 
13406     /*
13407      * Changing SAX callbacks is a bad idea. This should be fixed.
13408      */
13409     if (options & XML_PARSE_NOBLANKS) {
13410         ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13411     }
13412     if (options & XML_PARSE_NOCDATA) {
13413         ctxt->sax->cdataBlock = NULL;
13414     }
13415     if (options & XML_PARSE_HUGE) {
13416         if (ctxt->dict != NULL)
13417             xmlDictSetLimit(ctxt->dict, 0);
13418     }
13419 
13420     ctxt->linenumbers = 1;
13421 
13422     return(options & ~allMask);
13423 }
13424 
13425 /**
13426  * xmlCtxtSetOptions:
13427  * @ctxt: an XML parser context
13428  * @options:  a bitmask of xmlParserOption values
13429  *
13430  * Applies the options to the parser context. Unset options are
13431  * cleared.
13432  *
13433  * Available since 2.13.0. With older versions, you can use
13434  * xmlCtxtUseOptions.
13435  *
13436  * XML_PARSE_RECOVER
13437  *
13438  * Enable "recovery" mode which allows non-wellformed documents.
13439  * How this mode behaves exactly is unspecified and may change
13440  * without further notice. Use of this feature is DISCOURAGED.
13441  *
13442  * XML_PARSE_NOENT
13443  *
13444  * Despite the confusing name, this option enables substitution
13445  * of entities. The resulting tree won't contain any entity
13446  * reference nodes.
13447  *
13448  * This option also enables loading of external entities (both
13449  * general and parameter entities) which is dangerous. If you
13450  * process untrusted data, it's recommended to set the
13451  * XML_PARSE_NO_XXE option to disable loading of external
13452  * entities.
13453  *
13454  * XML_PARSE_DTDLOAD
13455  *
13456  * Enables loading of an external DTD and the loading and
13457  * substitution of external parameter entities. Has no effect
13458  * if XML_PARSE_NO_XXE is set.
13459  *
13460  * XML_PARSE_DTDATTR
13461  *
13462  * Adds default attributes from the DTD to the result document.
13463  *
13464  * Implies XML_PARSE_DTDLOAD, but loading of external content
13465  * can be disabled with XML_PARSE_NO_XXE.
13466  *
13467  * XML_PARSE_DTDVALID
13468  *
13469  * This option enables DTD validation which requires to load
13470  * external DTDs and external entities (both general and
13471  * parameter entities) unless XML_PARSE_NO_XXE was set.
13472  *
13473  * XML_PARSE_NO_XXE
13474  *
13475  * Disables loading of external DTDs or entities.
13476  *
13477  * Available since 2.13.0.
13478  *
13479  * XML_PARSE_NOERROR
13480  *
13481  * Disable error and warning reports to the error handlers.
13482  * Errors are still accessible with xmlCtxtGetLastError.
13483  *
13484  * XML_PARSE_NOWARNING
13485  *
13486  * Disable warning reports.
13487  *
13488  * XML_PARSE_PEDANTIC
13489  *
13490  * Enable some pedantic warnings.
13491  *
13492  * XML_PARSE_NOBLANKS
13493  *
13494  * Remove some text nodes containing only whitespace from the
13495  * result document. Which nodes are removed depends on DTD
13496  * element declarations or a conservative heuristic. The
13497  * reindenting feature of the serialization code relies on this
13498  * option to be set when parsing. Use of this option is
13499  * DISCOURAGED.
13500  *
13501  * XML_PARSE_SAX1
13502  *
13503  * Always invoke the deprecated SAX1 startElement and endElement
13504  * handlers. This option is DEPRECATED.
13505  *
13506  * XML_PARSE_NONET
13507  *
13508  * Disable network access with the builtin HTTP client.
13509  *
13510  * XML_PARSE_NODICT
13511  *
13512  * Create a document without interned strings, making all
13513  * strings separate memory allocations.
13514  *
13515  * XML_PARSE_NSCLEAN
13516  *
13517  * Remove redundant namespace declarations from the result
13518  * document.
13519  *
13520  * XML_PARSE_NOCDATA
13521  *
13522  * Output normal text nodes instead of CDATA nodes.
13523  *
13524  * XML_PARSE_COMPACT
13525  *
13526  * Store small strings directly in the node struct to save
13527  * memory.
13528  *
13529  * XML_PARSE_OLD10
13530  *
13531  * Use old Name productions from before XML 1.0 Fifth Edition.
13532  * This options is DEPRECATED.
13533  *
13534  * XML_PARSE_HUGE
13535  *
13536  * Relax some internal limits.
13537  *
13538  * Maximum size of text nodes, tags, comments, processing instructions,
13539  * CDATA sections, entity values
13540  *
13541  * normal: 10M
13542  * huge:    1B
13543  *
13544  * Maximum size of names, system literals, pubid literals
13545  *
13546  * normal: 50K
13547  * huge:   10M
13548  *
13549  * Maximum nesting depth of elements
13550  *
13551  * normal:  256
13552  * huge:   2048
13553  *
13554  * Maximum nesting depth of entities
13555  *
13556  * normal: 20
13557  * huge:   40
13558  *
13559  * XML_PARSE_OLDSAX
13560  *
13561  * Enable an unspecified legacy mode for SAX parsers. This
13562  * option is DEPRECATED.
13563  *
13564  * XML_PARSE_IGNORE_ENC
13565  *
13566  * Ignore the encoding in the XML declaration. This option is
13567  * mostly unneeded these days. The only effect is to enforce
13568  * UTF-8 decoding of ASCII-like data.
13569  *
13570  * XML_PARSE_BIG_LINES
13571  *
13572  * Enable reporting of line numbers larger than 65535.
13573  *
13574  * XML_PARSE_NO_UNZIP
13575  *
13576  * Disables input decompression. Setting this option is recommended
13577  * to avoid zip bombs.
13578  *
13579  * Available since 2.14.0.
13580  *
13581  * XML_PARSE_NO_SYS_CATALOG
13582  *
13583  * Disables the global system XML catalog.
13584  *
13585  * Available since 2.14.0.
13586  *
13587  * XML_PARSE_NO_CATALOG_PI
13588  *
13589  * Ignore XML catalog processing instructions.
13590  *
13591  * Available since 2.14.0.
13592  *
13593  * Returns 0 in case of success, the set of unknown or unimplemented options
13594  *         in case of error.
13595  */
13596 int
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt,int options)13597 xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13598 {
13599 #ifdef LIBXML_HTML_ENABLED
13600     if ((ctxt != NULL) && (ctxt->html))
13601         return(htmlCtxtSetOptions(ctxt, options));
13602 #endif
13603 
13604     return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13605 }
13606 
13607 /**
13608  * xmlCtxtGetOptions:
13609  * @ctxt: an XML parser context
13610  *
13611  * Get the current options of the parser context.
13612  *
13613  * Available since 2.14.0.
13614  *
13615  * Returns the current options set in the parser context, or -1 if ctxt is NULL.
13616  */
13617 int
xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)13618 xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
13619 {
13620     if (ctxt == NULL)
13621         return(-1);
13622 
13623     return(ctxt->options);
13624 }
13625 
13626 /**
13627  * xmlCtxtUseOptions:
13628  * @ctxt: an XML parser context
13629  * @options:  a combination of xmlParserOption
13630  *
13631  * DEPRECATED: Use xmlCtxtSetOptions.
13632  *
13633  * Applies the options to the parser context. The following options
13634  * are never cleared and can only be enabled:
13635  *
13636  * XML_PARSE_NOERROR
13637  * XML_PARSE_NOWARNING
13638  * XML_PARSE_NONET
13639  * XML_PARSE_NSCLEAN
13640  * XML_PARSE_NOCDATA
13641  * XML_PARSE_COMPACT
13642  * XML_PARSE_OLD10
13643  * XML_PARSE_HUGE
13644  * XML_PARSE_OLDSAX
13645  * XML_PARSE_IGNORE_ENC
13646  * XML_PARSE_BIG_LINES
13647  *
13648  * Returns 0 in case of success, the set of unknown or unimplemented options
13649  *         in case of error.
13650  */
13651 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)13652 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13653 {
13654     int keepMask;
13655 
13656 #ifdef LIBXML_HTML_ENABLED
13657     if ((ctxt != NULL) && (ctxt->html))
13658         return(htmlCtxtUseOptions(ctxt, options));
13659 #endif
13660 
13661     /*
13662      * For historic reasons, some options can only be enabled.
13663      */
13664     keepMask = XML_PARSE_NOERROR |
13665                XML_PARSE_NOWARNING |
13666                XML_PARSE_NONET |
13667                XML_PARSE_NSCLEAN |
13668                XML_PARSE_NOCDATA |
13669                XML_PARSE_COMPACT |
13670                XML_PARSE_OLD10 |
13671                XML_PARSE_HUGE |
13672                XML_PARSE_OLDSAX |
13673                XML_PARSE_IGNORE_ENC |
13674                XML_PARSE_BIG_LINES;
13675 
13676     return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13677 }
13678 
13679 /**
13680  * xmlCtxtSetMaxAmplification:
13681  * @ctxt: an XML parser context
13682  * @maxAmpl:  maximum amplification factor
13683  *
13684  * To protect against exponential entity expansion ("billion laughs"), the
13685  * size of serialized output is (roughly) limited to the input size
13686  * multiplied by this factor. The default value is 5.
13687  *
13688  * When working with documents making heavy use of entity expansion, it can
13689  * be necessary to increase the value. For security reasons, this should only
13690  * be considered when processing trusted input.
13691  */
13692 void
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt,unsigned maxAmpl)13693 xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13694 {
13695     ctxt->maxAmpl = maxAmpl;
13696 }
13697 
13698 /**
13699  * xmlCtxtParseDocument:
13700  * @ctxt:  an XML parser context
13701  * @input:  parser input
13702  *
13703  * Parse an XML document and return the resulting document tree.
13704  * Takes ownership of the input object.
13705  *
13706  * Available since 2.13.0.
13707  *
13708  * Returns the resulting document tree or NULL
13709  */
13710 xmlDocPtr
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)13711 xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13712 {
13713     xmlDocPtr ret = NULL;
13714 
13715     if ((ctxt == NULL) || (input == NULL))
13716         return(NULL);
13717 
13718     /* assert(ctxt->inputNr == 0); */
13719     while (ctxt->inputNr > 0)
13720         xmlFreeInputStream(inputPop(ctxt));
13721 
13722     if (inputPush(ctxt, input) < 0) {
13723         xmlFreeInputStream(input);
13724         return(NULL);
13725     }
13726 
13727     xmlParseDocument(ctxt);
13728 
13729     if ((ctxt->wellFormed) ||
13730         ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13731         ret = ctxt->myDoc;
13732     } else {
13733         if (ctxt->errNo == XML_ERR_OK)
13734             xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13735 
13736         ret = NULL;
13737 	xmlFreeDoc(ctxt->myDoc);
13738     }
13739     ctxt->myDoc = NULL;
13740 
13741     /* assert(ctxt->inputNr == 1); */
13742     while (ctxt->inputNr > 0)
13743         xmlFreeInputStream(inputPop(ctxt));
13744 
13745     return(ret);
13746 }
13747 
13748 /**
13749  * xmlReadDoc:
13750  * @cur:  a pointer to a zero terminated string
13751  * @URL:  base URL (optional)
13752  * @encoding:  the document encoding (optional)
13753  * @options:  a combination of xmlParserOption
13754  *
13755  * Convenience function to parse an XML document from a
13756  * zero-terminated string.
13757  *
13758  * See xmlCtxtReadDoc for details.
13759  *
13760  * Returns the resulting document tree
13761  */
13762 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)13763 xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13764            int options)
13765 {
13766     xmlParserCtxtPtr ctxt;
13767     xmlParserInputPtr input;
13768     xmlDocPtr doc;
13769 
13770     ctxt = xmlNewParserCtxt();
13771     if (ctxt == NULL)
13772         return(NULL);
13773 
13774     xmlCtxtUseOptions(ctxt, options);
13775 
13776     input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13777                                       XML_INPUT_BUF_STATIC);
13778 
13779     doc = xmlCtxtParseDocument(ctxt, input);
13780 
13781     xmlFreeParserCtxt(ctxt);
13782     return(doc);
13783 }
13784 
13785 /**
13786  * xmlReadFile:
13787  * @filename:  a file or URL
13788  * @encoding:  the document encoding (optional)
13789  * @options:  a combination of xmlParserOption
13790  *
13791  * Convenience function to parse an XML file from the filesystem,
13792  * the network or a global user-define resource loader.
13793  *
13794  * See xmlCtxtReadFile for details.
13795  *
13796  * Returns the resulting document tree
13797  */
13798 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)13799 xmlReadFile(const char *filename, const char *encoding, int options)
13800 {
13801     xmlParserCtxtPtr ctxt;
13802     xmlParserInputPtr input;
13803     xmlDocPtr doc;
13804 
13805     ctxt = xmlNewParserCtxt();
13806     if (ctxt == NULL)
13807         return(NULL);
13808 
13809     xmlCtxtUseOptions(ctxt, options);
13810 
13811     /*
13812      * Backward compatibility for users of command line utilities like
13813      * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13814      * should be removed at some point.
13815      */
13816     if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13817         input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13818                                       encoding, 0);
13819     else
13820         input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13821 
13822     doc = xmlCtxtParseDocument(ctxt, input);
13823 
13824     xmlFreeParserCtxt(ctxt);
13825     return(doc);
13826 }
13827 
13828 /**
13829  * xmlReadMemory:
13830  * @buffer:  a pointer to a char array
13831  * @size:  the size of the array
13832  * @url:  base URL (optional)
13833  * @encoding:  the document encoding (optional)
13834  * @options:  a combination of xmlParserOption
13835  *
13836  * Parse an XML in-memory document and build a tree. The input buffer must
13837  * not contain a terminating null byte.
13838  *
13839  * See xmlCtxtReadMemory for details.
13840  *
13841  * Returns the resulting document tree
13842  */
13843 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * url,const char * encoding,int options)13844 xmlReadMemory(const char *buffer, int size, const char *url,
13845               const char *encoding, int options)
13846 {
13847     xmlParserCtxtPtr ctxt;
13848     xmlParserInputPtr input;
13849     xmlDocPtr doc;
13850 
13851     if (size < 0)
13852 	return(NULL);
13853 
13854     ctxt = xmlNewParserCtxt();
13855     if (ctxt == NULL)
13856         return(NULL);
13857 
13858     xmlCtxtUseOptions(ctxt, options);
13859 
13860     input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13861                                       XML_INPUT_BUF_STATIC);
13862 
13863     doc = xmlCtxtParseDocument(ctxt, input);
13864 
13865     xmlFreeParserCtxt(ctxt);
13866     return(doc);
13867 }
13868 
13869 /**
13870  * xmlReadFd:
13871  * @fd:  an open file descriptor
13872  * @URL:  base URL (optional)
13873  * @encoding:  the document encoding (optional)
13874  * @options:  a combination of xmlParserOption
13875  *
13876  * Parse an XML from a file descriptor and build a tree.
13877  *
13878  * See xmlCtxtReadFd for details.
13879  *
13880  * NOTE that the file descriptor will not be closed when the
13881  * context is freed or reset.
13882  *
13883  * Returns the resulting document tree
13884  */
13885 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)13886 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13887 {
13888     xmlParserCtxtPtr ctxt;
13889     xmlParserInputPtr input;
13890     xmlDocPtr doc;
13891 
13892     ctxt = xmlNewParserCtxt();
13893     if (ctxt == NULL)
13894         return(NULL);
13895 
13896     xmlCtxtUseOptions(ctxt, options);
13897 
13898     input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13899 
13900     doc = xmlCtxtParseDocument(ctxt, input);
13901 
13902     xmlFreeParserCtxt(ctxt);
13903     return(doc);
13904 }
13905 
13906 /**
13907  * xmlReadIO:
13908  * @ioread:  an I/O read function
13909  * @ioclose:  an I/O close function (optional)
13910  * @ioctx:  an I/O handler
13911  * @URL:  base URL (optional)
13912  * @encoding:  the document encoding (optional)
13913  * @options:  a combination of xmlParserOption
13914  *
13915  * Parse an XML document from I/O functions and context and build a tree.
13916  *
13917  * See xmlCtxtReadIO for details.
13918  *
13919  * Returns the resulting document tree
13920  */
13921 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)13922 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13923           void *ioctx, const char *URL, const char *encoding, int options)
13924 {
13925     xmlParserCtxtPtr ctxt;
13926     xmlParserInputPtr input;
13927     xmlDocPtr doc;
13928 
13929     ctxt = xmlNewParserCtxt();
13930     if (ctxt == NULL)
13931         return(NULL);
13932 
13933     xmlCtxtUseOptions(ctxt, options);
13934 
13935     input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13936                                   encoding, 0);
13937 
13938     doc = xmlCtxtParseDocument(ctxt, input);
13939 
13940     xmlFreeParserCtxt(ctxt);
13941     return(doc);
13942 }
13943 
13944 /**
13945  * xmlCtxtReadDoc:
13946  * @ctxt:  an XML parser context
13947  * @str:  a pointer to a zero terminated string
13948  * @URL:  base URL (optional)
13949  * @encoding:  the document encoding (optional)
13950  * @options:  a combination of xmlParserOption
13951  *
13952  * Parse an XML in-memory document and build a tree.
13953  *
13954  * @URL is used as base to resolve external entities and for error
13955  * reporting.
13956  *
13957  * See xmlCtxtUseOptions for details.
13958  *
13959  * Returns the resulting document tree
13960  */
13961 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * URL,const char * encoding,int options)13962 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13963                const char *URL, const char *encoding, int options)
13964 {
13965     xmlParserInputPtr input;
13966 
13967     if (ctxt == NULL)
13968         return(NULL);
13969 
13970     xmlCtxtReset(ctxt);
13971     xmlCtxtUseOptions(ctxt, options);
13972 
13973     input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13974                                       XML_INPUT_BUF_STATIC);
13975 
13976     return(xmlCtxtParseDocument(ctxt, input));
13977 }
13978 
13979 /**
13980  * xmlCtxtReadFile:
13981  * @ctxt:  an XML parser context
13982  * @filename:  a file or URL
13983  * @encoding:  the document encoding (optional)
13984  * @options:  a combination of xmlParserOption
13985  *
13986  * Parse an XML file from the filesystem, the network or a user-defined
13987  * resource loader.
13988  *
13989  * Returns the resulting document tree
13990  */
13991 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)13992 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13993                 const char *encoding, int options)
13994 {
13995     xmlParserInputPtr input;
13996 
13997     if (ctxt == NULL)
13998         return(NULL);
13999 
14000     xmlCtxtReset(ctxt);
14001     xmlCtxtUseOptions(ctxt, options);
14002 
14003     input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
14004 
14005     return(xmlCtxtParseDocument(ctxt, input));
14006 }
14007 
14008 /**
14009  * xmlCtxtReadMemory:
14010  * @ctxt:  an XML parser context
14011  * @buffer:  a pointer to a char array
14012  * @size:  the size of the array
14013  * @URL:  base URL (optional)
14014  * @encoding:  the document encoding (optional)
14015  * @options:  a combination of xmlParserOption
14016  *
14017  * Parse an XML in-memory document and build a tree. The input buffer must
14018  * not contain a terminating null byte.
14019  *
14020  * @URL is used as base to resolve external entities and for error
14021  * reporting.
14022  *
14023  * See xmlCtxtUseOptions for details.
14024  *
14025  * Returns the resulting document tree
14026  */
14027 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)14028 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14029                   const char *URL, const char *encoding, int options)
14030 {
14031     xmlParserInputPtr input;
14032 
14033     if ((ctxt == NULL) || (size < 0))
14034         return(NULL);
14035 
14036     xmlCtxtReset(ctxt);
14037     xmlCtxtUseOptions(ctxt, options);
14038 
14039     input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
14040                                       XML_INPUT_BUF_STATIC);
14041 
14042     return(xmlCtxtParseDocument(ctxt, input));
14043 }
14044 
14045 /**
14046  * xmlCtxtReadFd:
14047  * @ctxt:  an XML parser context
14048  * @fd:  an open file descriptor
14049  * @URL:  base URL (optional)
14050  * @encoding:  the document encoding (optional)
14051  * @options:  a combination of xmlParserOption
14052  *
14053  * Parse an XML document from a file descriptor and build a tree.
14054  *
14055  * NOTE that the file descriptor will not be closed when the
14056  * context is freed or reset.
14057  *
14058  * @URL is used as base to resolve external entities and for error
14059  * reporting.
14060  *
14061  * See xmlCtxtUseOptions for details.
14062  *
14063  * Returns the resulting document tree
14064  */
14065 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14066 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14067               const char *URL, const char *encoding, int options)
14068 {
14069     xmlParserInputPtr input;
14070 
14071     if (ctxt == NULL)
14072         return(NULL);
14073 
14074     xmlCtxtReset(ctxt);
14075     xmlCtxtUseOptions(ctxt, options);
14076 
14077     input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
14078 
14079     return(xmlCtxtParseDocument(ctxt, input));
14080 }
14081 
14082 /**
14083  * xmlCtxtReadIO:
14084  * @ctxt:  an XML parser context
14085  * @ioread:  an I/O read function
14086  * @ioclose:  an I/O close function
14087  * @ioctx:  an I/O handler
14088  * @URL:  the base URL to use for the document
14089  * @encoding:  the document encoding, or NULL
14090  * @options:  a combination of xmlParserOption
14091  *
14092  * parse an XML document from I/O functions and source and build a tree.
14093  * This reuses the existing @ctxt parser context
14094  *
14095  * @URL is used as base to resolve external entities and for error
14096  * reporting.
14097  *
14098  * See xmlCtxtUseOptions for details.
14099  *
14100  * Returns the resulting document tree
14101  */
14102 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14103 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14104               xmlInputCloseCallback ioclose, void *ioctx,
14105 	      const char *URL,
14106               const char *encoding, int options)
14107 {
14108     xmlParserInputPtr input;
14109 
14110     if (ctxt == NULL)
14111         return(NULL);
14112 
14113     xmlCtxtReset(ctxt);
14114     xmlCtxtUseOptions(ctxt, options);
14115 
14116     input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14117                                   encoding, 0);
14118 
14119     return(xmlCtxtParseDocument(ctxt, input));
14120 }
14121 
14122