1 /*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscellaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX2.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAX callbacks or as standalone functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * [email protected]
31 */
32
33 /* To avoid EBCDIC trouble when parsing on zOS */
34 #if defined(__MVS__)
35 #pragma convert("ISO8859-1")
36 #endif
37
38 #define IN_LIBXML
39 #include "libxml.h"
40
41 #if defined(_WIN32)
42 #define XML_DIR_SEP '\\'
43 #else
44 #define XML_DIR_SEP '/'
45 #endif
46
47 #include <stdlib.h>
48 #include <limits.h>
49 #include <string.h>
50 #include <stdarg.h>
51 #include <stddef.h>
52 #include <ctype.h>
53 #include <stdlib.h>
54 #include <libxml/parser.h>
55 #include <libxml/xmlmemory.h>
56 #include <libxml/tree.h>
57 #include <libxml/parserInternals.h>
58 #include <libxml/valid.h>
59 #include <libxml/entities.h>
60 #include <libxml/xmlerror.h>
61 #include <libxml/encoding.h>
62 #include <libxml/xmlIO.h>
63 #include <libxml/uri.h>
64 #include <libxml/SAX2.h>
65 #include <libxml/HTMLparser.h>
66 #ifdef LIBXML_CATALOG_ENABLED
67 #include <libxml/catalog.h>
68 #endif
69
70 #include "private/buf.h"
71 #include "private/dict.h"
72 #include "private/entities.h"
73 #include "private/error.h"
74 #include "private/html.h"
75 #include "private/io.h"
76 #include "private/parser.h"
77
78 #define NS_INDEX_EMPTY INT_MAX
79 #define NS_INDEX_XML (INT_MAX - 1)
80 #define URI_HASH_EMPTY 0xD943A04E
81 #define URI_HASH_XML 0xF0451F02
82
83 #ifndef STDIN_FILENO
84 #define STDIN_FILENO 0
85 #endif
86
87 struct _xmlStartTag {
88 const xmlChar *prefix;
89 const xmlChar *URI;
90 int line;
91 int nsNr;
92 };
93
94 typedef struct {
95 void *saxData;
96 unsigned prefixHashValue;
97 unsigned uriHashValue;
98 unsigned elementId;
99 int oldIndex;
100 } xmlParserNsExtra;
101
102 typedef struct {
103 unsigned hashValue;
104 int index;
105 } xmlParserNsBucket;
106
107 struct _xmlParserNsData {
108 xmlParserNsExtra *extra;
109
110 unsigned hashSize;
111 unsigned hashElems;
112 xmlParserNsBucket *hash;
113
114 unsigned elementId;
115 int defaultNsIndex;
116 int minNsIndex;
117 };
118
119 static int
120 xmlParseElementStart(xmlParserCtxtPtr ctxt);
121
122 static void
123 xmlParseElementEnd(xmlParserCtxtPtr ctxt);
124
125 static xmlEntityPtr
126 xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
127
128 static const xmlChar *
129 xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
130
131 /************************************************************************
132 * *
133 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
134 * *
135 ************************************************************************/
136
137 #define XML_PARSER_BIG_ENTITY 1000
138 #define XML_PARSER_LOT_ENTITY 5000
139
140 /*
141 * Constants for protection against abusive entity expansion
142 * ("billion laughs").
143 */
144
145 /*
146 * A certain amount of entity expansion which is always allowed.
147 */
148 #define XML_PARSER_ALLOWED_EXPANSION 1000000
149
150 /*
151 * Fixed cost for each entity reference. This crudely models processing time
152 * as well to protect, for example, against exponential expansion of empty
153 * or very short entities.
154 */
155 #define XML_ENT_FIXED_COST 20
156
157 /**
158 * xmlParserMaxDepth:
159 *
160 * arbitrary depth limit for the XML documents that we allow to
161 * process. This is not a limitation of the parser but a safety
162 * boundary feature. It can be disabled with the XML_PARSE_HUGE
163 * parser option.
164 */
165 const unsigned int xmlParserMaxDepth = 256;
166
167
168
169 #define XML_PARSER_BIG_BUFFER_SIZE 300
170 #define XML_PARSER_BUFFER_SIZE 100
171 #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
172
173 /**
174 * XML_PARSER_CHUNK_SIZE
175 *
176 * When calling GROW that's the minimal amount of data
177 * the parser expected to have received. It is not a hard
178 * limit but an optimization when reading strings like Names
179 * It is not strictly needed as long as inputs available characters
180 * are followed by 0, which should be provided by the I/O level
181 */
182 #define XML_PARSER_CHUNK_SIZE 100
183
184 /**
185 * xmlParserVersion:
186 *
187 * Constant string describing the internal version of the library
188 */
189 const char *const
190 xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
191
192 /*
193 * List of XML prefixed PI allowed by W3C specs
194 */
195
196 static const char* const xmlW3CPIs[] = {
197 "xml-stylesheet",
198 "xml-model",
199 NULL
200 };
201
202
203 /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
204 static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
205 const xmlChar **str);
206
207 static void
208 xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
209
210 static int
211 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
212
213 /************************************************************************
214 * *
215 * Some factorized error routines *
216 * *
217 ************************************************************************/
218
219 static void
xmlErrMemory(xmlParserCtxtPtr ctxt)220 xmlErrMemory(xmlParserCtxtPtr ctxt) {
221 xmlCtxtErrMemory(ctxt);
222 }
223
224 /**
225 * xmlErrAttributeDup:
226 * @ctxt: an XML parser context
227 * @prefix: the attribute prefix
228 * @localname: the attribute localname
229 *
230 * Handle a redefinition of attribute error
231 */
232 static void
xmlErrAttributeDup(xmlParserCtxtPtr ctxt,const xmlChar * prefix,const xmlChar * localname)233 xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
234 const xmlChar * localname)
235 {
236 if (prefix == NULL)
237 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
238 XML_ERR_FATAL, localname, NULL, NULL, 0,
239 "Attribute %s redefined\n", localname);
240 else
241 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
242 XML_ERR_FATAL, prefix, localname, NULL, 0,
243 "Attribute %s:%s redefined\n", prefix, localname);
244 }
245
246 /**
247 * xmlFatalErrMsg:
248 * @ctxt: an XML parser context
249 * @error: the error number
250 * @msg: the error message
251 *
252 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
253 */
254 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg)255 xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
256 const char *msg)
257 {
258 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
259 NULL, NULL, NULL, 0, "%s", msg);
260 }
261
262 /**
263 * xmlWarningMsg:
264 * @ctxt: an XML parser context
265 * @error: the error number
266 * @msg: the error message
267 * @str1: extra data
268 * @str2: extra data
269 *
270 * Handle a warning.
271 */
272 void LIBXML_ATTR_FORMAT(3,0)
xmlWarningMsg(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)273 xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
274 const char *msg, const xmlChar *str1, const xmlChar *str2)
275 {
276 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
277 str1, str2, NULL, 0, msg, str1, str2);
278 }
279
280 /**
281 * xmlValidityError:
282 * @ctxt: an XML parser context
283 * @error: the error number
284 * @msg: the error message
285 * @str1: extra data
286 *
287 * Handle a validity error.
288 */
289 static void LIBXML_ATTR_FORMAT(3,0)
xmlValidityError(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,const xmlChar * str2)290 xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
291 const char *msg, const xmlChar *str1, const xmlChar *str2)
292 {
293 ctxt->valid = 0;
294
295 xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
296 str1, str2, NULL, 0, msg, str1, str2);
297 }
298
299 /**
300 * xmlFatalErrMsgInt:
301 * @ctxt: an XML parser context
302 * @error: the error number
303 * @msg: the error message
304 * @val: an integer value
305 *
306 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
307 */
308 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,int val)309 xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
310 const char *msg, int val)
311 {
312 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
313 NULL, NULL, NULL, val, msg, val);
314 }
315
316 /**
317 * xmlFatalErrMsgStrIntStr:
318 * @ctxt: an XML parser context
319 * @error: the error number
320 * @msg: the error message
321 * @str1: an string info
322 * @val: an integer value
323 * @str2: an string info
324 *
325 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
326 */
327 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * str1,int val,const xmlChar * str2)328 xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
329 const char *msg, const xmlChar *str1, int val,
330 const xmlChar *str2)
331 {
332 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
333 str1, str2, NULL, val, msg, str1, val, str2);
334 }
335
336 /**
337 * xmlFatalErrMsgStr:
338 * @ctxt: an XML parser context
339 * @error: the error number
340 * @msg: the error message
341 * @val: a string value
342 *
343 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
344 */
345 static void LIBXML_ATTR_FORMAT(3,0)
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)346 xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
347 const char *msg, const xmlChar * val)
348 {
349 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
350 val, NULL, NULL, 0, msg, val);
351 }
352
353 /**
354 * xmlErrMsgStr:
355 * @ctxt: an XML parser context
356 * @error: the error number
357 * @msg: the error message
358 * @val: a string value
359 *
360 * Handle a non fatal parser error
361 */
362 static void LIBXML_ATTR_FORMAT(3,0)
xmlErrMsgStr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * val)363 xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
364 const char *msg, const xmlChar * val)
365 {
366 xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
367 val, NULL, NULL, 0, msg, val);
368 }
369
370 /**
371 * xmlNsErr:
372 * @ctxt: an XML parser context
373 * @error: the error number
374 * @msg: the message
375 * @info1: extra information string
376 * @info2: extra information string
377 *
378 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
379 */
380 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsErr(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)381 xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
382 const char *msg,
383 const xmlChar * info1, const xmlChar * info2,
384 const xmlChar * info3)
385 {
386 ctxt->nsWellFormed = 0;
387
388 xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
389 info1, info2, info3, 0, msg, info1, info2, info3);
390 }
391
392 /**
393 * xmlNsWarn
394 * @ctxt: an XML parser context
395 * @error: the error number
396 * @msg: the message
397 * @info1: extra information string
398 * @info2: extra information string
399 *
400 * Handle a namespace warning error
401 */
402 static void LIBXML_ATTR_FORMAT(3,0)
xmlNsWarn(xmlParserCtxtPtr ctxt,xmlParserErrors error,const char * msg,const xmlChar * info1,const xmlChar * info2,const xmlChar * info3)403 xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
404 const char *msg,
405 const xmlChar * info1, const xmlChar * info2,
406 const xmlChar * info3)
407 {
408 xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
409 info1, info2, info3, 0, msg, info1, info2, info3);
410 }
411
412 static void
xmlSaturatedAdd(unsigned long * dst,unsigned long val)413 xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
414 if (val > ULONG_MAX - *dst)
415 *dst = ULONG_MAX;
416 else
417 *dst += val;
418 }
419
420 static void
xmlSaturatedAddSizeT(unsigned long * dst,unsigned long val)421 xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
422 if (val > ULONG_MAX - *dst)
423 *dst = ULONG_MAX;
424 else
425 *dst += val;
426 }
427
428 /**
429 * xmlParserEntityCheck:
430 * @ctxt: parser context
431 * @extra: sum of unexpanded entity sizes
432 *
433 * Check for non-linear entity expansion behaviour.
434 *
435 * In some cases like xmlExpandEntityInAttValue, this function is called
436 * for each, possibly nested entity and its unexpanded content length.
437 *
438 * In other cases like xmlParseReference, it's only called for each
439 * top-level entity with its unexpanded content length plus the sum of
440 * the unexpanded content lengths (plus fixed cost) of all nested
441 * entities.
442 *
443 * Summing the unexpanded lengths also adds the length of the reference.
444 * This is by design. Taking the length of the entity name into account
445 * discourages attacks that try to waste CPU time with abusively long
446 * entity names. See test/recurse/lol6.xml for example. Each call also
447 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
448 * short entities.
449 *
450 * Returns 1 on error, 0 on success.
451 */
452 static int
xmlParserEntityCheck(xmlParserCtxtPtr ctxt,unsigned long extra)453 xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
454 {
455 unsigned long consumed;
456 unsigned long *expandedSize;
457 xmlParserInputPtr input = ctxt->input;
458 xmlEntityPtr entity = input->entity;
459
460 if ((entity) && (entity->flags & XML_ENT_CHECKED))
461 return(0);
462
463 /*
464 * Compute total consumed bytes so far, including input streams of
465 * external entities.
466 */
467 consumed = input->consumed;
468 xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
469 xmlSaturatedAdd(&consumed, ctxt->sizeentities);
470
471 if (entity)
472 expandedSize = &entity->expandedSize;
473 else
474 expandedSize = &ctxt->sizeentcopy;
475
476 /*
477 * Add extra cost and some fixed cost.
478 */
479 xmlSaturatedAdd(expandedSize, extra);
480 xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
481
482 /*
483 * It's important to always use saturation arithmetic when tracking
484 * entity sizes to make the size checks reliable. If "sizeentcopy"
485 * overflows, we have to abort.
486 */
487 if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
488 ((*expandedSize >= ULONG_MAX) ||
489 (*expandedSize / ctxt->maxAmpl > consumed))) {
490 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
491 "Maximum entity amplification factor exceeded, see "
492 "xmlCtxtSetMaxAmplification.\n");
493 xmlHaltParser(ctxt);
494 return(1);
495 }
496
497 return(0);
498 }
499
500 /************************************************************************
501 * *
502 * Library wide options *
503 * *
504 ************************************************************************/
505
506 /**
507 * xmlHasFeature:
508 * @feature: the feature to be examined
509 *
510 * Examines if the library has been compiled with a given feature.
511 *
512 * Returns a non-zero value if the feature exist, otherwise zero.
513 * Returns zero (0) if the feature does not exist or an unknown
514 * unknown feature is requested, non-zero otherwise.
515 */
516 int
xmlHasFeature(xmlFeature feature)517 xmlHasFeature(xmlFeature feature)
518 {
519 switch (feature) {
520 case XML_WITH_THREAD:
521 #ifdef LIBXML_THREAD_ENABLED
522 return(1);
523 #else
524 return(0);
525 #endif
526 case XML_WITH_TREE:
527 return(1);
528 case XML_WITH_OUTPUT:
529 #ifdef LIBXML_OUTPUT_ENABLED
530 return(1);
531 #else
532 return(0);
533 #endif
534 case XML_WITH_PUSH:
535 #ifdef LIBXML_PUSH_ENABLED
536 return(1);
537 #else
538 return(0);
539 #endif
540 case XML_WITH_READER:
541 #ifdef LIBXML_READER_ENABLED
542 return(1);
543 #else
544 return(0);
545 #endif
546 case XML_WITH_PATTERN:
547 #ifdef LIBXML_PATTERN_ENABLED
548 return(1);
549 #else
550 return(0);
551 #endif
552 case XML_WITH_WRITER:
553 #ifdef LIBXML_WRITER_ENABLED
554 return(1);
555 #else
556 return(0);
557 #endif
558 case XML_WITH_SAX1:
559 #ifdef LIBXML_SAX1_ENABLED
560 return(1);
561 #else
562 return(0);
563 #endif
564 case XML_WITH_HTTP:
565 #ifdef LIBXML_HTTP_ENABLED
566 return(1);
567 #else
568 return(0);
569 #endif
570 case XML_WITH_VALID:
571 #ifdef LIBXML_VALID_ENABLED
572 return(1);
573 #else
574 return(0);
575 #endif
576 case XML_WITH_HTML:
577 #ifdef LIBXML_HTML_ENABLED
578 return(1);
579 #else
580 return(0);
581 #endif
582 case XML_WITH_LEGACY:
583 #ifdef LIBXML_LEGACY_ENABLED
584 return(1);
585 #else
586 return(0);
587 #endif
588 case XML_WITH_C14N:
589 #ifdef LIBXML_C14N_ENABLED
590 return(1);
591 #else
592 return(0);
593 #endif
594 case XML_WITH_CATALOG:
595 #ifdef LIBXML_CATALOG_ENABLED
596 return(1);
597 #else
598 return(0);
599 #endif
600 case XML_WITH_XPATH:
601 #ifdef LIBXML_XPATH_ENABLED
602 return(1);
603 #else
604 return(0);
605 #endif
606 case XML_WITH_XPTR:
607 #ifdef LIBXML_XPTR_ENABLED
608 return(1);
609 #else
610 return(0);
611 #endif
612 case XML_WITH_XINCLUDE:
613 #ifdef LIBXML_XINCLUDE_ENABLED
614 return(1);
615 #else
616 return(0);
617 #endif
618 case XML_WITH_ICONV:
619 #ifdef LIBXML_ICONV_ENABLED
620 return(1);
621 #else
622 return(0);
623 #endif
624 case XML_WITH_ISO8859X:
625 #ifdef LIBXML_ISO8859X_ENABLED
626 return(1);
627 #else
628 return(0);
629 #endif
630 case XML_WITH_UNICODE:
631 #ifdef LIBXML_UNICODE_ENABLED
632 return(1);
633 #else
634 return(0);
635 #endif
636 case XML_WITH_REGEXP:
637 #ifdef LIBXML_REGEXP_ENABLED
638 return(1);
639 #else
640 return(0);
641 #endif
642 case XML_WITH_AUTOMATA:
643 #ifdef LIBXML_REGEXP_ENABLED
644 return(1);
645 #else
646 return(0);
647 #endif
648 case XML_WITH_EXPR:
649 #ifdef LIBXML_EXPR_ENABLED
650 return(1);
651 #else
652 return(0);
653 #endif
654 case XML_WITH_SCHEMAS:
655 #ifdef LIBXML_SCHEMAS_ENABLED
656 return(1);
657 #else
658 return(0);
659 #endif
660 case XML_WITH_SCHEMATRON:
661 #ifdef LIBXML_SCHEMATRON_ENABLED
662 return(1);
663 #else
664 return(0);
665 #endif
666 case XML_WITH_MODULES:
667 #ifdef LIBXML_MODULES_ENABLED
668 return(1);
669 #else
670 return(0);
671 #endif
672 case XML_WITH_DEBUG:
673 #ifdef LIBXML_DEBUG_ENABLED
674 return(1);
675 #else
676 return(0);
677 #endif
678 case XML_WITH_DEBUG_MEM:
679 return(0);
680 case XML_WITH_ZLIB:
681 #ifdef LIBXML_ZLIB_ENABLED
682 return(1);
683 #else
684 return(0);
685 #endif
686 case XML_WITH_LZMA:
687 #ifdef LIBXML_LZMA_ENABLED
688 return(1);
689 #else
690 return(0);
691 #endif
692 case XML_WITH_ICU:
693 #ifdef LIBXML_ICU_ENABLED
694 return(1);
695 #else
696 return(0);
697 #endif
698 default:
699 break;
700 }
701 return(0);
702 }
703
704 /************************************************************************
705 * *
706 * Simple string buffer *
707 * *
708 ************************************************************************/
709
710 typedef struct {
711 xmlChar *mem;
712 unsigned size;
713 unsigned cap; /* size < cap */
714 unsigned max; /* size <= max */
715 xmlParserErrors code;
716 } xmlSBuf;
717
718 static void
xmlSBufInit(xmlSBuf * buf,unsigned max)719 xmlSBufInit(xmlSBuf *buf, unsigned max) {
720 buf->mem = NULL;
721 buf->size = 0;
722 buf->cap = 0;
723 buf->max = max;
724 buf->code = XML_ERR_OK;
725 }
726
727 static int
xmlSBufGrow(xmlSBuf * buf,unsigned len)728 xmlSBufGrow(xmlSBuf *buf, unsigned len) {
729 xmlChar *mem;
730 unsigned cap;
731
732 if (len >= UINT_MAX / 2 - buf->size) {
733 if (buf->code == XML_ERR_OK)
734 buf->code = XML_ERR_RESOURCE_LIMIT;
735 return(-1);
736 }
737
738 cap = (buf->size + len) * 2;
739 if (cap < 240)
740 cap = 240;
741
742 mem = xmlRealloc(buf->mem, cap);
743 if (mem == NULL) {
744 buf->code = XML_ERR_NO_MEMORY;
745 return(-1);
746 }
747
748 buf->mem = mem;
749 buf->cap = cap;
750
751 return(0);
752 }
753
754 static void
xmlSBufAddString(xmlSBuf * buf,const xmlChar * str,unsigned len)755 xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
756 if (buf->max - buf->size < len) {
757 if (buf->code == XML_ERR_OK)
758 buf->code = XML_ERR_RESOURCE_LIMIT;
759 return;
760 }
761
762 if (buf->cap - buf->size <= len) {
763 if (xmlSBufGrow(buf, len) < 0)
764 return;
765 }
766
767 if (len > 0)
768 memcpy(buf->mem + buf->size, str, len);
769 buf->size += len;
770 }
771
772 static void
xmlSBufAddCString(xmlSBuf * buf,const char * str,unsigned len)773 xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
774 xmlSBufAddString(buf, (const xmlChar *) str, len);
775 }
776
777 static void
xmlSBufAddChar(xmlSBuf * buf,int c)778 xmlSBufAddChar(xmlSBuf *buf, int c) {
779 xmlChar *end;
780
781 if (buf->max - buf->size < 4) {
782 if (buf->code == XML_ERR_OK)
783 buf->code = XML_ERR_RESOURCE_LIMIT;
784 return;
785 }
786
787 if (buf->cap - buf->size <= 4) {
788 if (xmlSBufGrow(buf, 4) < 0)
789 return;
790 }
791
792 end = buf->mem + buf->size;
793
794 if (c < 0x80) {
795 *end = (xmlChar) c;
796 buf->size += 1;
797 } else {
798 buf->size += xmlCopyCharMultiByte(end, c);
799 }
800 }
801
802 static void
xmlSBufAddReplChar(xmlSBuf * buf)803 xmlSBufAddReplChar(xmlSBuf *buf) {
804 xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
805 }
806
807 static void
xmlSBufReportError(xmlSBuf * buf,xmlParserCtxtPtr ctxt,const char * errMsg)808 xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
809 if (buf->code == XML_ERR_NO_MEMORY)
810 xmlCtxtErrMemory(ctxt);
811 else
812 xmlFatalErr(ctxt, buf->code, errMsg);
813 }
814
815 static xmlChar *
xmlSBufFinish(xmlSBuf * buf,int * sizeOut,xmlParserCtxtPtr ctxt,const char * errMsg)816 xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
817 const char *errMsg) {
818 if (buf->mem == NULL) {
819 buf->mem = xmlMalloc(1);
820 if (buf->mem == NULL) {
821 buf->code = XML_ERR_NO_MEMORY;
822 } else {
823 buf->mem[0] = 0;
824 }
825 } else {
826 buf->mem[buf->size] = 0;
827 }
828
829 if (buf->code == XML_ERR_OK) {
830 if (sizeOut != NULL)
831 *sizeOut = buf->size;
832 return(buf->mem);
833 }
834
835 xmlSBufReportError(buf, ctxt, errMsg);
836
837 xmlFree(buf->mem);
838
839 if (sizeOut != NULL)
840 *sizeOut = 0;
841 return(NULL);
842 }
843
844 static void
xmlSBufCleanup(xmlSBuf * buf,xmlParserCtxtPtr ctxt,const char * errMsg)845 xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
846 if (buf->code != XML_ERR_OK)
847 xmlSBufReportError(buf, ctxt, errMsg);
848
849 xmlFree(buf->mem);
850 }
851
852 static int
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * errMsg)853 xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
854 const char *errMsg) {
855 int c = str[0];
856 int c1 = str[1];
857
858 if ((c1 & 0xC0) != 0x80)
859 goto encoding_error;
860
861 if (c < 0xE0) {
862 /* 2-byte sequence */
863 if (c < 0xC2)
864 goto encoding_error;
865
866 return(2);
867 } else {
868 int c2 = str[2];
869
870 if ((c2 & 0xC0) != 0x80)
871 goto encoding_error;
872
873 if (c < 0xF0) {
874 /* 3-byte sequence */
875 if (c == 0xE0) {
876 /* overlong */
877 if (c1 < 0xA0)
878 goto encoding_error;
879 } else if (c == 0xED) {
880 /* surrogate */
881 if (c1 >= 0xA0)
882 goto encoding_error;
883 } else if (c == 0xEF) {
884 /* U+FFFE and U+FFFF are invalid Chars */
885 if ((c1 == 0xBF) && (c2 >= 0xBE))
886 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
887 }
888
889 return(3);
890 } else {
891 /* 4-byte sequence */
892 if ((str[3] & 0xC0) != 0x80)
893 goto encoding_error;
894 if (c == 0xF0) {
895 /* overlong */
896 if (c1 < 0x90)
897 goto encoding_error;
898 } else if (c >= 0xF4) {
899 /* greater than 0x10FFFF */
900 if ((c > 0xF4) || (c1 >= 0x90))
901 goto encoding_error;
902 }
903
904 return(4);
905 }
906 }
907
908 encoding_error:
909 /* Only report the first error */
910 if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
911 xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
912 ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
913 }
914
915 return(0);
916 }
917
918 /************************************************************************
919 * *
920 * SAX2 defaulted attributes handling *
921 * *
922 ************************************************************************/
923
924 /**
925 * xmlCtxtInitializeLate:
926 * @ctxt: an XML parser context
927 *
928 * Final initialization of the parser context before starting to parse.
929 *
930 * This accounts for users modifying struct members of parser context
931 * directly.
932 */
933 static void
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt)934 xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
935 xmlSAXHandlerPtr sax;
936
937 /* Avoid unused variable warning if features are disabled. */
938 (void) sax;
939
940 /*
941 * Changing the SAX struct directly is still widespread practice
942 * in internal and external code.
943 */
944 if (ctxt == NULL) return;
945 sax = ctxt->sax;
946 #ifdef LIBXML_SAX1_ENABLED
947 /*
948 * Only enable SAX2 if there SAX2 element handlers, except when there
949 * are no element handlers at all.
950 */
951 if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
952 (sax) &&
953 (sax->initialized == XML_SAX2_MAGIC) &&
954 ((sax->startElementNs != NULL) ||
955 (sax->endElementNs != NULL) ||
956 ((sax->startElement == NULL) && (sax->endElement == NULL))))
957 ctxt->sax2 = 1;
958 #else
959 ctxt->sax2 = 1;
960 #endif /* LIBXML_SAX1_ENABLED */
961
962 /*
963 * Some users replace the dictionary directly in the context struct.
964 * We really need an API function to do that cleanly.
965 */
966 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
967 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
968 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
969 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
970 (ctxt->str_xml_ns == NULL)) {
971 xmlErrMemory(ctxt);
972 }
973
974 xmlDictSetLimit(ctxt->dict,
975 (ctxt->options & XML_PARSE_HUGE) ?
976 0 :
977 XML_MAX_DICTIONARY_LIMIT);
978 }
979
980 typedef struct {
981 xmlHashedString prefix;
982 xmlHashedString name;
983 xmlHashedString value;
984 const xmlChar *valueEnd;
985 int external;
986 int expandedSize;
987 } xmlDefAttr;
988
989 typedef struct _xmlDefAttrs xmlDefAttrs;
990 typedef xmlDefAttrs *xmlDefAttrsPtr;
991 struct _xmlDefAttrs {
992 int nbAttrs; /* number of defaulted attributes on that element */
993 int maxAttrs; /* the size of the array */
994 #if __STDC_VERSION__ >= 199901L
995 /* Using a C99 flexible array member avoids UBSan errors. */
996 xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
997 #else
998 xmlDefAttr attrs[1];
999 #endif
1000 };
1001
1002 /**
1003 * xmlAttrNormalizeSpace:
1004 * @src: the source string
1005 * @dst: the target string
1006 *
1007 * Normalize the space in non CDATA attribute values:
1008 * If the attribute type is not CDATA, then the XML processor MUST further
1009 * process the normalized attribute value by discarding any leading and
1010 * trailing space (#x20) characters, and by replacing sequences of space
1011 * (#x20) characters by a single space (#x20) character.
1012 * Note that the size of dst need to be at least src, and if one doesn't need
1013 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1014 * passing src as dst is just fine.
1015 *
1016 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1017 * is needed.
1018 */
1019 static xmlChar *
xmlAttrNormalizeSpace(const xmlChar * src,xmlChar * dst)1020 xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1021 {
1022 if ((src == NULL) || (dst == NULL))
1023 return(NULL);
1024
1025 while (*src == 0x20) src++;
1026 while (*src != 0) {
1027 if (*src == 0x20) {
1028 while (*src == 0x20) src++;
1029 if (*src != 0)
1030 *dst++ = 0x20;
1031 } else {
1032 *dst++ = *src++;
1033 }
1034 }
1035 *dst = 0;
1036 if (dst == src)
1037 return(NULL);
1038 return(dst);
1039 }
1040
1041 /**
1042 * xmlAddDefAttrs:
1043 * @ctxt: an XML parser context
1044 * @fullname: the element fullname
1045 * @fullattr: the attribute fullname
1046 * @value: the attribute value
1047 *
1048 * Add a defaulted attribute for an element
1049 */
1050 static void
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * value)1051 xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1052 const xmlChar *fullname,
1053 const xmlChar *fullattr,
1054 const xmlChar *value) {
1055 xmlDefAttrsPtr defaults;
1056 xmlDefAttr *attr;
1057 int len, expandedSize;
1058 xmlHashedString name;
1059 xmlHashedString prefix;
1060 xmlHashedString hvalue;
1061 const xmlChar *localname;
1062
1063 /*
1064 * Allows to detect attribute redefinitions
1065 */
1066 if (ctxt->attsSpecial != NULL) {
1067 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1068 return;
1069 }
1070
1071 if (ctxt->attsDefault == NULL) {
1072 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1073 if (ctxt->attsDefault == NULL)
1074 goto mem_error;
1075 }
1076
1077 /*
1078 * split the element name into prefix:localname , the string found
1079 * are within the DTD and then not associated to namespace names.
1080 */
1081 localname = xmlSplitQName3(fullname, &len);
1082 if (localname == NULL) {
1083 name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1084 prefix.name = NULL;
1085 } else {
1086 name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1087 prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1088 if (prefix.name == NULL)
1089 goto mem_error;
1090 }
1091 if (name.name == NULL)
1092 goto mem_error;
1093
1094 /*
1095 * make sure there is some storage
1096 */
1097 defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1098 if ((defaults == NULL) ||
1099 (defaults->nbAttrs >= defaults->maxAttrs)) {
1100 xmlDefAttrsPtr temp;
1101 int newSize;
1102
1103 newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1104 temp = xmlRealloc(defaults,
1105 sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1106 if (temp == NULL)
1107 goto mem_error;
1108 if (defaults == NULL)
1109 temp->nbAttrs = 0;
1110 temp->maxAttrs = newSize;
1111 defaults = temp;
1112 if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1113 defaults, NULL) < 0) {
1114 xmlFree(defaults);
1115 goto mem_error;
1116 }
1117 }
1118
1119 /*
1120 * Split the attribute name into prefix:localname , the string found
1121 * are within the DTD and hen not associated to namespace names.
1122 */
1123 localname = xmlSplitQName3(fullattr, &len);
1124 if (localname == NULL) {
1125 name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1126 prefix.name = NULL;
1127 } else {
1128 name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1129 prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1130 if (prefix.name == NULL)
1131 goto mem_error;
1132 }
1133 if (name.name == NULL)
1134 goto mem_error;
1135
1136 /* intern the string and precompute the end */
1137 len = strlen((const char *) value);
1138 hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1139 if (hvalue.name == NULL)
1140 goto mem_error;
1141
1142 expandedSize = strlen((const char *) name.name);
1143 if (prefix.name != NULL)
1144 expandedSize += strlen((const char *) prefix.name);
1145 expandedSize += len;
1146
1147 attr = &defaults->attrs[defaults->nbAttrs++];
1148 attr->name = name;
1149 attr->prefix = prefix;
1150 attr->value = hvalue;
1151 attr->valueEnd = hvalue.name + len;
1152 attr->external = PARSER_EXTERNAL(ctxt);
1153 attr->expandedSize = expandedSize;
1154
1155 return;
1156
1157 mem_error:
1158 xmlErrMemory(ctxt);
1159 }
1160
1161 /**
1162 * xmlAddSpecialAttr:
1163 * @ctxt: an XML parser context
1164 * @fullname: the element fullname
1165 * @fullattr: the attribute fullname
1166 * @type: the attribute type
1167 *
1168 * Register this attribute type
1169 */
1170 static void
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,const xmlChar * fullname,const xmlChar * fullattr,int type)1171 xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1172 const xmlChar *fullname,
1173 const xmlChar *fullattr,
1174 int type)
1175 {
1176 if (ctxt->attsSpecial == NULL) {
1177 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1178 if (ctxt->attsSpecial == NULL)
1179 goto mem_error;
1180 }
1181
1182 if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1183 (void *) (ptrdiff_t) type) < 0)
1184 goto mem_error;
1185 return;
1186
1187 mem_error:
1188 xmlErrMemory(ctxt);
1189 }
1190
1191 /**
1192 * xmlCleanSpecialAttrCallback:
1193 *
1194 * Removes CDATA attributes from the special attribute table
1195 */
1196 static void
xmlCleanSpecialAttrCallback(void * payload,void * data,const xmlChar * fullname,const xmlChar * fullattr,const xmlChar * unused ATTRIBUTE_UNUSED)1197 xmlCleanSpecialAttrCallback(void *payload, void *data,
1198 const xmlChar *fullname, const xmlChar *fullattr,
1199 const xmlChar *unused ATTRIBUTE_UNUSED) {
1200 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1201
1202 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1203 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1204 }
1205 }
1206
1207 /**
1208 * xmlCleanSpecialAttr:
1209 * @ctxt: an XML parser context
1210 *
1211 * Trim the list of attributes defined to remove all those of type
1212 * CDATA as they are not special. This call should be done when finishing
1213 * to parse the DTD and before starting to parse the document root.
1214 */
1215 static void
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)1216 xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1217 {
1218 if (ctxt->attsSpecial == NULL)
1219 return;
1220
1221 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1222
1223 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1224 xmlHashFree(ctxt->attsSpecial, NULL);
1225 ctxt->attsSpecial = NULL;
1226 }
1227 }
1228
1229 /**
1230 * xmlCheckLanguageID:
1231 * @lang: pointer to the string value
1232 *
1233 * DEPRECATED: Internal function, do not use.
1234 *
1235 * Checks that the value conforms to the LanguageID production:
1236 *
1237 * NOTE: this is somewhat deprecated, those productions were removed from
1238 * the XML Second edition.
1239 *
1240 * [33] LanguageID ::= Langcode ('-' Subcode)*
1241 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1242 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1243 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1244 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1245 * [38] Subcode ::= ([a-z] | [A-Z])+
1246 *
1247 * The current REC reference the successors of RFC 1766, currently 5646
1248 *
1249 * http://www.rfc-editor.org/rfc/rfc5646.txt
1250 * langtag = language
1251 * ["-" script]
1252 * ["-" region]
1253 * *("-" variant)
1254 * *("-" extension)
1255 * ["-" privateuse]
1256 * language = 2*3ALPHA ; shortest ISO 639 code
1257 * ["-" extlang] ; sometimes followed by
1258 * ; extended language subtags
1259 * / 4ALPHA ; or reserved for future use
1260 * / 5*8ALPHA ; or registered language subtag
1261 *
1262 * extlang = 3ALPHA ; selected ISO 639 codes
1263 * *2("-" 3ALPHA) ; permanently reserved
1264 *
1265 * script = 4ALPHA ; ISO 15924 code
1266 *
1267 * region = 2ALPHA ; ISO 3166-1 code
1268 * / 3DIGIT ; UN M.49 code
1269 *
1270 * variant = 5*8alphanum ; registered variants
1271 * / (DIGIT 3alphanum)
1272 *
1273 * extension = singleton 1*("-" (2*8alphanum))
1274 *
1275 * ; Single alphanumerics
1276 * ; "x" reserved for private use
1277 * singleton = DIGIT ; 0 - 9
1278 * / %x41-57 ; A - W
1279 * / %x59-5A ; Y - Z
1280 * / %x61-77 ; a - w
1281 * / %x79-7A ; y - z
1282 *
1283 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1284 * The parser below doesn't try to cope with extension or privateuse
1285 * that could be added but that's not interoperable anyway
1286 *
1287 * Returns 1 if correct 0 otherwise
1288 **/
1289 int
xmlCheckLanguageID(const xmlChar * lang)1290 xmlCheckLanguageID(const xmlChar * lang)
1291 {
1292 const xmlChar *cur = lang, *nxt;
1293
1294 if (cur == NULL)
1295 return (0);
1296 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1297 ((cur[0] == 'I') && (cur[1] == '-')) ||
1298 ((cur[0] == 'x') && (cur[1] == '-')) ||
1299 ((cur[0] == 'X') && (cur[1] == '-'))) {
1300 /*
1301 * Still allow IANA code and user code which were coming
1302 * from the previous version of the XML-1.0 specification
1303 * it's deprecated but we should not fail
1304 */
1305 cur += 2;
1306 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1307 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1308 cur++;
1309 return(cur[0] == 0);
1310 }
1311 nxt = cur;
1312 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1313 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1314 nxt++;
1315 if (nxt - cur >= 4) {
1316 /*
1317 * Reserved
1318 */
1319 if ((nxt - cur > 8) || (nxt[0] != 0))
1320 return(0);
1321 return(1);
1322 }
1323 if (nxt - cur < 2)
1324 return(0);
1325 /* we got an ISO 639 code */
1326 if (nxt[0] == 0)
1327 return(1);
1328 if (nxt[0] != '-')
1329 return(0);
1330
1331 nxt++;
1332 cur = nxt;
1333 /* now we can have extlang or script or region or variant */
1334 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1335 goto region_m49;
1336
1337 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1338 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1339 nxt++;
1340 if (nxt - cur == 4)
1341 goto script;
1342 if (nxt - cur == 2)
1343 goto region;
1344 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1345 goto variant;
1346 if (nxt - cur != 3)
1347 return(0);
1348 /* we parsed an extlang */
1349 if (nxt[0] == 0)
1350 return(1);
1351 if (nxt[0] != '-')
1352 return(0);
1353
1354 nxt++;
1355 cur = nxt;
1356 /* now we can have script or region or variant */
1357 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1358 goto region_m49;
1359
1360 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1361 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1362 nxt++;
1363 if (nxt - cur == 2)
1364 goto region;
1365 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1366 goto variant;
1367 if (nxt - cur != 4)
1368 return(0);
1369 /* we parsed a script */
1370 script:
1371 if (nxt[0] == 0)
1372 return(1);
1373 if (nxt[0] != '-')
1374 return(0);
1375
1376 nxt++;
1377 cur = nxt;
1378 /* now we can have region or variant */
1379 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1380 goto region_m49;
1381
1382 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1383 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1384 nxt++;
1385
1386 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1387 goto variant;
1388 if (nxt - cur != 2)
1389 return(0);
1390 /* we parsed a region */
1391 region:
1392 if (nxt[0] == 0)
1393 return(1);
1394 if (nxt[0] != '-')
1395 return(0);
1396
1397 nxt++;
1398 cur = nxt;
1399 /* now we can just have a variant */
1400 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1401 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1402 nxt++;
1403
1404 if ((nxt - cur < 5) || (nxt - cur > 8))
1405 return(0);
1406
1407 /* we parsed a variant */
1408 variant:
1409 if (nxt[0] == 0)
1410 return(1);
1411 if (nxt[0] != '-')
1412 return(0);
1413 /* extensions and private use subtags not checked */
1414 return (1);
1415
1416 region_m49:
1417 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1418 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1419 nxt += 3;
1420 goto region;
1421 }
1422 return(0);
1423 }
1424
1425 /************************************************************************
1426 * *
1427 * Parser stacks related functions and macros *
1428 * *
1429 ************************************************************************/
1430
1431 static xmlChar *
1432 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1433
1434 /**
1435 * xmlParserNsCreate:
1436 *
1437 * Create a new namespace database.
1438 *
1439 * Returns the new obejct.
1440 */
1441 xmlParserNsData *
xmlParserNsCreate(void)1442 xmlParserNsCreate(void) {
1443 xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1444
1445 if (nsdb == NULL)
1446 return(NULL);
1447 memset(nsdb, 0, sizeof(*nsdb));
1448 nsdb->defaultNsIndex = INT_MAX;
1449
1450 return(nsdb);
1451 }
1452
1453 /**
1454 * xmlParserNsFree:
1455 * @nsdb: namespace database
1456 *
1457 * Free a namespace database.
1458 */
1459 void
xmlParserNsFree(xmlParserNsData * nsdb)1460 xmlParserNsFree(xmlParserNsData *nsdb) {
1461 if (nsdb == NULL)
1462 return;
1463
1464 xmlFree(nsdb->extra);
1465 xmlFree(nsdb->hash);
1466 xmlFree(nsdb);
1467 }
1468
1469 /**
1470 * xmlParserNsReset:
1471 * @nsdb: namespace database
1472 *
1473 * Reset a namespace database.
1474 */
1475 static void
xmlParserNsReset(xmlParserNsData * nsdb)1476 xmlParserNsReset(xmlParserNsData *nsdb) {
1477 if (nsdb == NULL)
1478 return;
1479
1480 nsdb->hashElems = 0;
1481 nsdb->elementId = 0;
1482 nsdb->defaultNsIndex = INT_MAX;
1483
1484 if (nsdb->hash)
1485 memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1486 }
1487
1488 /**
1489 * xmlParserStartElement:
1490 * @nsdb: namespace database
1491 *
1492 * Signal that a new element has started.
1493 *
1494 * Returns 0 on success, -1 if the element counter overflowed.
1495 */
1496 static int
xmlParserNsStartElement(xmlParserNsData * nsdb)1497 xmlParserNsStartElement(xmlParserNsData *nsdb) {
1498 if (nsdb->elementId == UINT_MAX)
1499 return(-1);
1500 nsdb->elementId++;
1501
1502 return(0);
1503 }
1504
1505 /**
1506 * xmlParserNsLookup:
1507 * @ctxt: parser context
1508 * @prefix: namespace prefix
1509 * @bucketPtr: optional bucket (return value)
1510 *
1511 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1512 * be set to the matching bucket, or the first empty bucket if no match
1513 * was found.
1514 *
1515 * Returns the namespace index on success, INT_MAX if no namespace was
1516 * found.
1517 */
1518 static int
xmlParserNsLookup(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,xmlParserNsBucket ** bucketPtr)1519 xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1520 xmlParserNsBucket **bucketPtr) {
1521 xmlParserNsBucket *bucket, *tombstone;
1522 unsigned index, hashValue;
1523
1524 if (prefix->name == NULL)
1525 return(ctxt->nsdb->defaultNsIndex);
1526
1527 if (ctxt->nsdb->hashSize == 0)
1528 return(INT_MAX);
1529
1530 hashValue = prefix->hashValue;
1531 index = hashValue & (ctxt->nsdb->hashSize - 1);
1532 bucket = &ctxt->nsdb->hash[index];
1533 tombstone = NULL;
1534
1535 while (bucket->hashValue) {
1536 if (bucket->index == INT_MAX) {
1537 if (tombstone == NULL)
1538 tombstone = bucket;
1539 } else if (bucket->hashValue == hashValue) {
1540 if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1541 if (bucketPtr != NULL)
1542 *bucketPtr = bucket;
1543 return(bucket->index);
1544 }
1545 }
1546
1547 index++;
1548 bucket++;
1549 if (index == ctxt->nsdb->hashSize) {
1550 index = 0;
1551 bucket = ctxt->nsdb->hash;
1552 }
1553 }
1554
1555 if (bucketPtr != NULL)
1556 *bucketPtr = tombstone ? tombstone : bucket;
1557 return(INT_MAX);
1558 }
1559
1560 /**
1561 * xmlParserNsLookupUri:
1562 * @ctxt: parser context
1563 * @prefix: namespace prefix
1564 *
1565 * Lookup namespace URI with given prefix.
1566 *
1567 * Returns the namespace URI on success, NULL if no namespace was found.
1568 */
1569 static const xmlChar *
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix)1570 xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1571 const xmlChar *ret;
1572 int nsIndex;
1573
1574 if (prefix->name == ctxt->str_xml)
1575 return(ctxt->str_xml_ns);
1576
1577 /*
1578 * minNsIndex is used when building an entity tree. We must
1579 * ignore namespaces declared outside the entity.
1580 */
1581 nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1582 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1583 return(NULL);
1584
1585 ret = ctxt->nsTab[nsIndex * 2 + 1];
1586 if (ret[0] == 0)
1587 ret = NULL;
1588 return(ret);
1589 }
1590
1591 /**
1592 * xmlParserNsLookupSax:
1593 * @ctxt: parser context
1594 * @prefix: namespace prefix
1595 *
1596 * Lookup extra data for the given prefix. This returns data stored
1597 * with xmlParserNsUdpateSax.
1598 *
1599 * Returns the data on success, NULL if no namespace was found.
1600 */
1601 void *
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix)1602 xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1603 xmlHashedString hprefix;
1604 int nsIndex;
1605
1606 if (prefix == ctxt->str_xml)
1607 return(NULL);
1608
1609 hprefix.name = prefix;
1610 if (prefix != NULL)
1611 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1612 else
1613 hprefix.hashValue = 0;
1614 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1615 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1616 return(NULL);
1617
1618 return(ctxt->nsdb->extra[nsIndex].saxData);
1619 }
1620
1621 /**
1622 * xmlParserNsUpdateSax:
1623 * @ctxt: parser context
1624 * @prefix: namespace prefix
1625 * @saxData: extra data for SAX handler
1626 *
1627 * Sets or updates extra data for the given prefix. This value will be
1628 * returned by xmlParserNsLookupSax as long as the namespace with the
1629 * given prefix is in scope.
1630 *
1631 * Returns the data on success, NULL if no namespace was found.
1632 */
1633 int
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt,const xmlChar * prefix,void * saxData)1634 xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1635 void *saxData) {
1636 xmlHashedString hprefix;
1637 int nsIndex;
1638
1639 if (prefix == ctxt->str_xml)
1640 return(-1);
1641
1642 hprefix.name = prefix;
1643 if (prefix != NULL)
1644 hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1645 else
1646 hprefix.hashValue = 0;
1647 nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1648 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1649 return(-1);
1650
1651 ctxt->nsdb->extra[nsIndex].saxData = saxData;
1652 return(0);
1653 }
1654
1655 /**
1656 * xmlParserNsGrow:
1657 * @ctxt: parser context
1658 *
1659 * Grows the namespace tables.
1660 *
1661 * Returns 0 on success, -1 if a memory allocation failed.
1662 */
1663 static int
xmlParserNsGrow(xmlParserCtxtPtr ctxt)1664 xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1665 const xmlChar **table;
1666 xmlParserNsExtra *extra;
1667 int newSize;
1668
1669 if (ctxt->nsMax > INT_MAX / 2)
1670 goto error;
1671 newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1672
1673 table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1674 if (table == NULL)
1675 goto error;
1676 ctxt->nsTab = table;
1677
1678 extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1679 if (extra == NULL)
1680 goto error;
1681 ctxt->nsdb->extra = extra;
1682
1683 ctxt->nsMax = newSize;
1684 return(0);
1685
1686 error:
1687 xmlErrMemory(ctxt);
1688 return(-1);
1689 }
1690
1691 /**
1692 * xmlParserNsPush:
1693 * @ctxt: parser context
1694 * @prefix: prefix with hash value
1695 * @uri: uri with hash value
1696 * @saxData: extra data for SAX handler
1697 * @defAttr: whether the namespace comes from a default attribute
1698 *
1699 * Push a new namespace on the table.
1700 *
1701 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1702 * -1 if a memory allocation failed.
1703 */
1704 static int
xmlParserNsPush(xmlParserCtxtPtr ctxt,const xmlHashedString * prefix,const xmlHashedString * uri,void * saxData,int defAttr)1705 xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1706 const xmlHashedString *uri, void *saxData, int defAttr) {
1707 xmlParserNsBucket *bucket = NULL;
1708 xmlParserNsExtra *extra;
1709 const xmlChar **ns;
1710 unsigned hashValue, nsIndex, oldIndex;
1711
1712 if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1713 return(0);
1714
1715 if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1716 xmlErrMemory(ctxt);
1717 return(-1);
1718 }
1719
1720 /*
1721 * Default namespace and 'xml' namespace
1722 */
1723 if ((prefix == NULL) || (prefix->name == NULL)) {
1724 oldIndex = ctxt->nsdb->defaultNsIndex;
1725
1726 if (oldIndex != INT_MAX) {
1727 extra = &ctxt->nsdb->extra[oldIndex];
1728
1729 if (extra->elementId == ctxt->nsdb->elementId) {
1730 if (defAttr == 0)
1731 xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1732 return(0);
1733 }
1734
1735 if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1736 (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1737 return(0);
1738 }
1739
1740 ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1741 goto populate_entry;
1742 }
1743
1744 /*
1745 * Hash table lookup
1746 */
1747 oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1748 if (oldIndex != INT_MAX) {
1749 extra = &ctxt->nsdb->extra[oldIndex];
1750
1751 /*
1752 * Check for duplicate definitions on the same element.
1753 */
1754 if (extra->elementId == ctxt->nsdb->elementId) {
1755 if (defAttr == 0)
1756 xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1757 return(0);
1758 }
1759
1760 if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1761 (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1762 return(0);
1763
1764 bucket->index = ctxt->nsNr;
1765 goto populate_entry;
1766 }
1767
1768 /*
1769 * Insert new bucket
1770 */
1771
1772 hashValue = prefix->hashValue;
1773
1774 /*
1775 * Grow hash table, 50% fill factor
1776 */
1777 if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1778 xmlParserNsBucket *newHash;
1779 unsigned newSize, i, index;
1780
1781 if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1782 xmlErrMemory(ctxt);
1783 return(-1);
1784 }
1785 newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1786 newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1787 if (newHash == NULL) {
1788 xmlErrMemory(ctxt);
1789 return(-1);
1790 }
1791 memset(newHash, 0, newSize * sizeof(newHash[0]));
1792
1793 for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1794 unsigned hv = ctxt->nsdb->hash[i].hashValue;
1795 unsigned newIndex;
1796
1797 if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1798 continue;
1799 newIndex = hv & (newSize - 1);
1800
1801 while (newHash[newIndex].hashValue != 0) {
1802 newIndex++;
1803 if (newIndex == newSize)
1804 newIndex = 0;
1805 }
1806
1807 newHash[newIndex] = ctxt->nsdb->hash[i];
1808 }
1809
1810 xmlFree(ctxt->nsdb->hash);
1811 ctxt->nsdb->hash = newHash;
1812 ctxt->nsdb->hashSize = newSize;
1813
1814 /*
1815 * Relookup
1816 */
1817 index = hashValue & (newSize - 1);
1818
1819 while (newHash[index].hashValue != 0) {
1820 index++;
1821 if (index == newSize)
1822 index = 0;
1823 }
1824
1825 bucket = &newHash[index];
1826 }
1827
1828 bucket->hashValue = hashValue;
1829 bucket->index = ctxt->nsNr;
1830 ctxt->nsdb->hashElems++;
1831 oldIndex = INT_MAX;
1832
1833 populate_entry:
1834 nsIndex = ctxt->nsNr;
1835
1836 ns = &ctxt->nsTab[nsIndex * 2];
1837 ns[0] = prefix ? prefix->name : NULL;
1838 ns[1] = uri->name;
1839
1840 extra = &ctxt->nsdb->extra[nsIndex];
1841 extra->saxData = saxData;
1842 extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1843 extra->uriHashValue = uri->hashValue;
1844 extra->elementId = ctxt->nsdb->elementId;
1845 extra->oldIndex = oldIndex;
1846
1847 ctxt->nsNr++;
1848
1849 return(1);
1850 }
1851
1852 /**
1853 * xmlParserNsPop:
1854 * @ctxt: an XML parser context
1855 * @nr: the number to pop
1856 *
1857 * Pops the top @nr namespaces and restores the hash table.
1858 *
1859 * Returns the number of namespaces popped.
1860 */
1861 static int
xmlParserNsPop(xmlParserCtxtPtr ctxt,int nr)1862 xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1863 {
1864 int i;
1865
1866 /* assert(nr <= ctxt->nsNr); */
1867
1868 for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1869 const xmlChar *prefix = ctxt->nsTab[i * 2];
1870 xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1871
1872 if (prefix == NULL) {
1873 ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1874 } else {
1875 xmlHashedString hprefix;
1876 xmlParserNsBucket *bucket = NULL;
1877
1878 hprefix.name = prefix;
1879 hprefix.hashValue = extra->prefixHashValue;
1880 xmlParserNsLookup(ctxt, &hprefix, &bucket);
1881 /* assert(bucket && bucket->hashValue); */
1882 bucket->index = extra->oldIndex;
1883 }
1884 }
1885
1886 ctxt->nsNr -= nr;
1887 return(nr);
1888 }
1889
1890 static int
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt,int nr)1891 xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1892 const xmlChar **atts;
1893 unsigned *attallocs;
1894 int maxatts;
1895
1896 if (nr + 5 > ctxt->maxatts) {
1897 maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1898 atts = (const xmlChar **) xmlMalloc(
1899 maxatts * sizeof(const xmlChar *));
1900 if (atts == NULL) goto mem_error;
1901 attallocs = xmlRealloc(ctxt->attallocs,
1902 (maxatts / 5) * sizeof(attallocs[0]));
1903 if (attallocs == NULL) {
1904 xmlFree(atts);
1905 goto mem_error;
1906 }
1907 if (ctxt->maxatts > 0)
1908 memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1909 xmlFree(ctxt->atts);
1910 ctxt->atts = atts;
1911 ctxt->attallocs = attallocs;
1912 ctxt->maxatts = maxatts;
1913 }
1914 return(ctxt->maxatts);
1915 mem_error:
1916 xmlErrMemory(ctxt);
1917 return(-1);
1918 }
1919
1920 /**
1921 * inputPush:
1922 * @ctxt: an XML parser context
1923 * @value: the parser input
1924 *
1925 * Pushes a new parser input on top of the input stack
1926 *
1927 * Returns -1 in case of error, the index in the stack otherwise
1928 */
1929 int
inputPush(xmlParserCtxtPtr ctxt,xmlParserInputPtr value)1930 inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1931 {
1932 char *directory = NULL;
1933
1934 if ((ctxt == NULL) || (value == NULL))
1935 return(-1);
1936
1937 if (ctxt->inputNr >= ctxt->inputMax) {
1938 size_t newSize = ctxt->inputMax * 2;
1939 xmlParserInputPtr *tmp;
1940
1941 tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1942 newSize * sizeof(*tmp));
1943 if (tmp == NULL) {
1944 xmlErrMemory(ctxt);
1945 return (-1);
1946 }
1947 ctxt->inputTab = tmp;
1948 ctxt->inputMax = newSize;
1949 }
1950
1951 if ((ctxt->inputNr == 0) && (value->filename != NULL)) {
1952 directory = xmlParserGetDirectory(value->filename);
1953 if (directory == NULL) {
1954 xmlErrMemory(ctxt);
1955 return(-1);
1956 }
1957 }
1958
1959 if (ctxt->input_id >= INT_MAX) {
1960 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT, "Input ID overflow\n");
1961 return(-1);
1962 }
1963
1964 ctxt->inputTab[ctxt->inputNr] = value;
1965 ctxt->input = value;
1966
1967 if (ctxt->inputNr == 0) {
1968 xmlFree(ctxt->directory);
1969 ctxt->directory = directory;
1970 }
1971
1972 /*
1973 * Internally, the input ID is only used to detect parameter entity
1974 * boundaries. But there are entity loaders in downstream code that
1975 * detect the main document by checking for "input_id == 1".
1976 */
1977 value->id = ctxt->input_id++;
1978
1979 return(ctxt->inputNr++);
1980 }
1981 /**
1982 * inputPop:
1983 * @ctxt: an XML parser context
1984 *
1985 * Pops the top parser input from the input stack
1986 *
1987 * Returns the input just removed
1988 */
1989 xmlParserInputPtr
inputPop(xmlParserCtxtPtr ctxt)1990 inputPop(xmlParserCtxtPtr ctxt)
1991 {
1992 xmlParserInputPtr ret;
1993
1994 if (ctxt == NULL)
1995 return(NULL);
1996 if (ctxt->inputNr <= 0)
1997 return (NULL);
1998 ctxt->inputNr--;
1999 if (ctxt->inputNr > 0)
2000 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
2001 else
2002 ctxt->input = NULL;
2003 ret = ctxt->inputTab[ctxt->inputNr];
2004 ctxt->inputTab[ctxt->inputNr] = NULL;
2005 return (ret);
2006 }
2007 /**
2008 * nodePush:
2009 * @ctxt: an XML parser context
2010 * @value: the element node
2011 *
2012 * DEPRECATED: Internal function, do not use.
2013 *
2014 * Pushes a new element node on top of the node stack
2015 *
2016 * Returns -1 in case of error, the index in the stack otherwise
2017 */
2018 int
nodePush(xmlParserCtxtPtr ctxt,xmlNodePtr value)2019 nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
2020 {
2021 int maxDepth;
2022
2023 if (ctxt == NULL)
2024 return(0);
2025
2026 maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2027 if (ctxt->nodeNr > maxDepth) {
2028 xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2029 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2030 ctxt->nodeNr);
2031 xmlHaltParser(ctxt);
2032 return(-1);
2033 }
2034 if (ctxt->nodeNr >= ctxt->nodeMax) {
2035 xmlNodePtr *tmp;
2036
2037 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2038 ctxt->nodeMax * 2 *
2039 sizeof(ctxt->nodeTab[0]));
2040 if (tmp == NULL) {
2041 xmlErrMemory(ctxt);
2042 return (-1);
2043 }
2044 ctxt->nodeTab = tmp;
2045 ctxt->nodeMax *= 2;
2046 }
2047 ctxt->nodeTab[ctxt->nodeNr] = value;
2048 ctxt->node = value;
2049 return (ctxt->nodeNr++);
2050 }
2051
2052 /**
2053 * nodePop:
2054 * @ctxt: an XML parser context
2055 *
2056 * DEPRECATED: Internal function, do not use.
2057 *
2058 * Pops the top element node from the node stack
2059 *
2060 * Returns the node just removed
2061 */
2062 xmlNodePtr
nodePop(xmlParserCtxtPtr ctxt)2063 nodePop(xmlParserCtxtPtr ctxt)
2064 {
2065 xmlNodePtr ret;
2066
2067 if (ctxt == NULL) return(NULL);
2068 if (ctxt->nodeNr <= 0)
2069 return (NULL);
2070 ctxt->nodeNr--;
2071 if (ctxt->nodeNr > 0)
2072 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2073 else
2074 ctxt->node = NULL;
2075 ret = ctxt->nodeTab[ctxt->nodeNr];
2076 ctxt->nodeTab[ctxt->nodeNr] = NULL;
2077 return (ret);
2078 }
2079
2080 /**
2081 * nameNsPush:
2082 * @ctxt: an XML parser context
2083 * @value: the element name
2084 * @prefix: the element prefix
2085 * @URI: the element namespace name
2086 * @line: the current line number for error messages
2087 * @nsNr: the number of namespaces pushed on the namespace table
2088 *
2089 * Pushes a new element name/prefix/URL on top of the name stack
2090 *
2091 * Returns -1 in case of error, the index in the stack otherwise
2092 */
2093 static int
nameNsPush(xmlParserCtxtPtr ctxt,const xmlChar * value,const xmlChar * prefix,const xmlChar * URI,int line,int nsNr)2094 nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2095 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2096 {
2097 xmlStartTag *tag;
2098
2099 if (ctxt->nameNr >= ctxt->nameMax) {
2100 const xmlChar * *tmp;
2101 xmlStartTag *tmp2;
2102 ctxt->nameMax *= 2;
2103 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2104 ctxt->nameMax *
2105 sizeof(ctxt->nameTab[0]));
2106 if (tmp == NULL) {
2107 ctxt->nameMax /= 2;
2108 goto mem_error;
2109 }
2110 ctxt->nameTab = tmp;
2111 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2112 ctxt->nameMax *
2113 sizeof(ctxt->pushTab[0]));
2114 if (tmp2 == NULL) {
2115 ctxt->nameMax /= 2;
2116 goto mem_error;
2117 }
2118 ctxt->pushTab = tmp2;
2119 } else if (ctxt->pushTab == NULL) {
2120 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2121 sizeof(ctxt->pushTab[0]));
2122 if (ctxt->pushTab == NULL)
2123 goto mem_error;
2124 }
2125 ctxt->nameTab[ctxt->nameNr] = value;
2126 ctxt->name = value;
2127 tag = &ctxt->pushTab[ctxt->nameNr];
2128 tag->prefix = prefix;
2129 tag->URI = URI;
2130 tag->line = line;
2131 tag->nsNr = nsNr;
2132 return (ctxt->nameNr++);
2133 mem_error:
2134 xmlErrMemory(ctxt);
2135 return (-1);
2136 }
2137 #ifdef LIBXML_PUSH_ENABLED
2138 /**
2139 * nameNsPop:
2140 * @ctxt: an XML parser context
2141 *
2142 * Pops the top element/prefix/URI name from the name stack
2143 *
2144 * Returns the name just removed
2145 */
2146 static const xmlChar *
nameNsPop(xmlParserCtxtPtr ctxt)2147 nameNsPop(xmlParserCtxtPtr ctxt)
2148 {
2149 const xmlChar *ret;
2150
2151 if (ctxt->nameNr <= 0)
2152 return (NULL);
2153 ctxt->nameNr--;
2154 if (ctxt->nameNr > 0)
2155 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2156 else
2157 ctxt->name = NULL;
2158 ret = ctxt->nameTab[ctxt->nameNr];
2159 ctxt->nameTab[ctxt->nameNr] = NULL;
2160 return (ret);
2161 }
2162 #endif /* LIBXML_PUSH_ENABLED */
2163
2164 /**
2165 * namePush:
2166 * @ctxt: an XML parser context
2167 * @value: the element name
2168 *
2169 * DEPRECATED: Internal function, do not use.
2170 *
2171 * Pushes a new element name on top of the name stack
2172 *
2173 * Returns -1 in case of error, the index in the stack otherwise
2174 */
2175 int
namePush(xmlParserCtxtPtr ctxt,const xmlChar * value)2176 namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2177 {
2178 if (ctxt == NULL) return (-1);
2179
2180 if (ctxt->nameNr >= ctxt->nameMax) {
2181 const xmlChar * *tmp;
2182 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2183 ctxt->nameMax * 2 *
2184 sizeof(ctxt->nameTab[0]));
2185 if (tmp == NULL) {
2186 goto mem_error;
2187 }
2188 ctxt->nameTab = tmp;
2189 ctxt->nameMax *= 2;
2190 }
2191 ctxt->nameTab[ctxt->nameNr] = value;
2192 ctxt->name = value;
2193 return (ctxt->nameNr++);
2194 mem_error:
2195 xmlErrMemory(ctxt);
2196 return (-1);
2197 }
2198
2199 /**
2200 * namePop:
2201 * @ctxt: an XML parser context
2202 *
2203 * DEPRECATED: Internal function, do not use.
2204 *
2205 * Pops the top element name from the name stack
2206 *
2207 * Returns the name just removed
2208 */
2209 const xmlChar *
namePop(xmlParserCtxtPtr ctxt)2210 namePop(xmlParserCtxtPtr ctxt)
2211 {
2212 const xmlChar *ret;
2213
2214 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2215 return (NULL);
2216 ctxt->nameNr--;
2217 if (ctxt->nameNr > 0)
2218 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2219 else
2220 ctxt->name = NULL;
2221 ret = ctxt->nameTab[ctxt->nameNr];
2222 ctxt->nameTab[ctxt->nameNr] = NULL;
2223 return (ret);
2224 }
2225
spacePush(xmlParserCtxtPtr ctxt,int val)2226 static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2227 if (ctxt->spaceNr >= ctxt->spaceMax) {
2228 int *tmp;
2229
2230 ctxt->spaceMax *= 2;
2231 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2232 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2233 if (tmp == NULL) {
2234 xmlErrMemory(ctxt);
2235 ctxt->spaceMax /=2;
2236 return(-1);
2237 }
2238 ctxt->spaceTab = tmp;
2239 }
2240 ctxt->spaceTab[ctxt->spaceNr] = val;
2241 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2242 return(ctxt->spaceNr++);
2243 }
2244
spacePop(xmlParserCtxtPtr ctxt)2245 static int spacePop(xmlParserCtxtPtr ctxt) {
2246 int ret;
2247 if (ctxt->spaceNr <= 0) return(0);
2248 ctxt->spaceNr--;
2249 if (ctxt->spaceNr > 0)
2250 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2251 else
2252 ctxt->space = &ctxt->spaceTab[0];
2253 ret = ctxt->spaceTab[ctxt->spaceNr];
2254 ctxt->spaceTab[ctxt->spaceNr] = -1;
2255 return(ret);
2256 }
2257
2258 /*
2259 * Macros for accessing the content. Those should be used only by the parser,
2260 * and not exported.
2261 *
2262 * Dirty macros, i.e. one often need to make assumption on the context to
2263 * use them
2264 *
2265 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2266 * To be used with extreme caution since operations consuming
2267 * characters may move the input buffer to a different location !
2268 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2269 * This should be used internally by the parser
2270 * only to compare to ASCII values otherwise it would break when
2271 * running with UTF-8 encoding.
2272 * RAW same as CUR but in the input buffer, bypass any token
2273 * extraction that may have been done
2274 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2275 * to compare on ASCII based substring.
2276 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2277 * strings without newlines within the parser.
2278 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2279 * defined char within the parser.
2280 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2281 *
2282 * NEXT Skip to the next character, this does the proper decoding
2283 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
2284 * NEXTL(l) Skip the current unicode character of l xmlChars long.
2285 * CUR_SCHAR same but operate on a string instead of the context
2286 * COPY_BUF copy the current unicode char to the target buffer, increment
2287 * the index
2288 * GROW, SHRINK handling of input buffers
2289 */
2290
2291 #define RAW (*ctxt->input->cur)
2292 #define CUR (*ctxt->input->cur)
2293 #define NXT(val) ctxt->input->cur[(val)]
2294 #define CUR_PTR ctxt->input->cur
2295 #define BASE_PTR ctxt->input->base
2296
2297 #define CMP4( s, c1, c2, c3, c4 ) \
2298 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2299 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2300 #define CMP5( s, c1, c2, c3, c4, c5 ) \
2301 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2302 #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2303 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2304 #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2305 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2306 #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2307 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2308 #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2309 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2310 ((unsigned char *) s)[ 8 ] == c9 )
2311 #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2312 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2313 ((unsigned char *) s)[ 9 ] == c10 )
2314
2315 #define SKIP(val) do { \
2316 ctxt->input->cur += (val),ctxt->input->col+=(val); \
2317 if (*ctxt->input->cur == 0) \
2318 xmlParserGrow(ctxt); \
2319 } while (0)
2320
2321 #define SKIPL(val) do { \
2322 int skipl; \
2323 for(skipl=0; skipl<val; skipl++) { \
2324 if (*(ctxt->input->cur) == '\n') { \
2325 ctxt->input->line++; ctxt->input->col = 1; \
2326 } else ctxt->input->col++; \
2327 ctxt->input->cur++; \
2328 } \
2329 if (*ctxt->input->cur == 0) \
2330 xmlParserGrow(ctxt); \
2331 } while (0)
2332
2333 #define SHRINK \
2334 if ((!PARSER_PROGRESSIVE(ctxt)) && \
2335 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2336 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2337 xmlParserShrink(ctxt);
2338
2339 #define GROW \
2340 if ((!PARSER_PROGRESSIVE(ctxt)) && \
2341 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2342 xmlParserGrow(ctxt);
2343
2344 #define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2345
2346 #define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2347
2348 #define NEXT xmlNextChar(ctxt)
2349
2350 #define NEXT1 { \
2351 ctxt->input->col++; \
2352 ctxt->input->cur++; \
2353 if (*ctxt->input->cur == 0) \
2354 xmlParserGrow(ctxt); \
2355 }
2356
2357 #define NEXTL(l) do { \
2358 if (*(ctxt->input->cur) == '\n') { \
2359 ctxt->input->line++; ctxt->input->col = 1; \
2360 } else ctxt->input->col++; \
2361 ctxt->input->cur += l; \
2362 } while (0)
2363
2364 #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2365
2366 #define COPY_BUF(b, i, v) \
2367 if (v < 0x80) b[i++] = v; \
2368 else i += xmlCopyCharMultiByte(&b[i],v)
2369
2370 static int
xmlCurrentCharRecover(xmlParserCtxtPtr ctxt,int * len)2371 xmlCurrentCharRecover(xmlParserCtxtPtr ctxt, int *len) {
2372 int c = xmlCurrentChar(ctxt, len);
2373
2374 if (c == XML_INVALID_CHAR)
2375 c = 0xFFFD; /* replacement character */
2376
2377 return(c);
2378 }
2379
2380 /**
2381 * xmlSkipBlankChars:
2382 * @ctxt: the XML parser context
2383 *
2384 * DEPRECATED: Internal function, do not use.
2385 *
2386 * Skip whitespace in the input stream.
2387 *
2388 * Returns the number of space chars skipped
2389 */
2390 int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt)2391 xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2392 const xmlChar *cur;
2393 int res = 0;
2394
2395 /*
2396 * It's Okay to use CUR/NEXT here since all the blanks are on
2397 * the ASCII range.
2398 */
2399 cur = ctxt->input->cur;
2400 while (IS_BLANK_CH(*cur)) {
2401 if (*cur == '\n') {
2402 ctxt->input->line++; ctxt->input->col = 1;
2403 } else {
2404 ctxt->input->col++;
2405 }
2406 cur++;
2407 if (res < INT_MAX)
2408 res++;
2409 if (*cur == 0) {
2410 ctxt->input->cur = cur;
2411 xmlParserGrow(ctxt);
2412 cur = ctxt->input->cur;
2413 }
2414 }
2415 ctxt->input->cur = cur;
2416
2417 return(res);
2418 }
2419
2420 static void
xmlPopPE(xmlParserCtxtPtr ctxt)2421 xmlPopPE(xmlParserCtxtPtr ctxt) {
2422 unsigned long consumed;
2423 xmlEntityPtr ent;
2424
2425 ent = ctxt->input->entity;
2426
2427 ent->flags &= ~XML_ENT_EXPANDING;
2428
2429 if ((ent->flags & XML_ENT_CHECKED) == 0) {
2430 int result;
2431
2432 /*
2433 * Read the rest of the stream in case of errors. We want
2434 * to account for the whole entity size.
2435 */
2436 do {
2437 ctxt->input->cur = ctxt->input->end;
2438 xmlParserShrink(ctxt);
2439 result = xmlParserGrow(ctxt);
2440 } while (result > 0);
2441
2442 consumed = ctxt->input->consumed;
2443 xmlSaturatedAddSizeT(&consumed,
2444 ctxt->input->end - ctxt->input->base);
2445
2446 xmlSaturatedAdd(&ent->expandedSize, consumed);
2447
2448 /*
2449 * Add to sizeentities when parsing an external entity
2450 * for the first time.
2451 */
2452 if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2453 xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2454 }
2455
2456 ent->flags |= XML_ENT_CHECKED;
2457 }
2458
2459 xmlPopInput(ctxt);
2460
2461 xmlParserEntityCheck(ctxt, ent->expandedSize);
2462 }
2463
2464 /**
2465 * xmlSkipBlankCharsPE:
2466 * @ctxt: the XML parser context
2467 *
2468 * Skip whitespace in the input stream, also handling parameter
2469 * entities.
2470 *
2471 * Returns the number of space chars skipped
2472 */
2473 static int
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt)2474 xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2475 int res = 0;
2476 int inParam;
2477 int expandParam;
2478
2479 inParam = PARSER_IN_PE(ctxt);
2480 expandParam = PARSER_EXTERNAL(ctxt);
2481
2482 if (!inParam && !expandParam)
2483 return(xmlSkipBlankChars(ctxt));
2484
2485 while (PARSER_STOPPED(ctxt) == 0) {
2486 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2487 NEXT;
2488 } else if (CUR == '%') {
2489 if ((expandParam == 0) ||
2490 (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2491 break;
2492
2493 /*
2494 * Expand parameter entity. We continue to consume
2495 * whitespace at the start of the entity and possible
2496 * even consume the whole entity and pop it. We might
2497 * even pop multiple PEs in this loop.
2498 */
2499 xmlParsePEReference(ctxt);
2500
2501 inParam = PARSER_IN_PE(ctxt);
2502 expandParam = PARSER_EXTERNAL(ctxt);
2503 } else if (CUR == 0) {
2504 if (inParam == 0)
2505 break;
2506
2507 xmlPopPE(ctxt);
2508
2509 inParam = PARSER_IN_PE(ctxt);
2510 expandParam = PARSER_EXTERNAL(ctxt);
2511 } else {
2512 break;
2513 }
2514
2515 /*
2516 * Also increase the counter when entering or exiting a PERef.
2517 * The spec says: "When a parameter-entity reference is recognized
2518 * in the DTD and included, its replacement text MUST be enlarged
2519 * by the attachment of one leading and one following space (#x20)
2520 * character."
2521 */
2522 if (res < INT_MAX)
2523 res++;
2524 }
2525
2526 return(res);
2527 }
2528
2529 /************************************************************************
2530 * *
2531 * Commodity functions to handle entities *
2532 * *
2533 ************************************************************************/
2534
2535 /**
2536 * xmlPopInput:
2537 * @ctxt: an XML parser context
2538 *
2539 * xmlPopInput: the current input pointed by ctxt->input came to an end
2540 * pop it and return the next char.
2541 *
2542 * Returns the current xmlChar in the parser context
2543 */
2544 xmlChar
xmlPopInput(xmlParserCtxtPtr ctxt)2545 xmlPopInput(xmlParserCtxtPtr ctxt) {
2546 xmlParserInputPtr input;
2547
2548 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2549 input = inputPop(ctxt);
2550 xmlFreeInputStream(input);
2551 if (*ctxt->input->cur == 0)
2552 xmlParserGrow(ctxt);
2553 return(CUR);
2554 }
2555
2556 /**
2557 * xmlPushInput:
2558 * @ctxt: an XML parser context
2559 * @input: an XML parser input fragment (entity, XML fragment ...).
2560 *
2561 * Push an input stream onto the stack.
2562 *
2563 * Returns -1 in case of error or the index in the input stack
2564 */
2565 int
xmlPushInput(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)2566 xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2567 int maxDepth;
2568 int ret;
2569
2570 if ((ctxt == NULL) || (input == NULL))
2571 return(-1);
2572
2573 maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2574 if (ctxt->inputNr > maxDepth) {
2575 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2576 "Maximum entity nesting depth exceeded");
2577 xmlHaltParser(ctxt);
2578 return(-1);
2579 }
2580 ret = inputPush(ctxt, input);
2581 if (ret >= 0)
2582 GROW;
2583 return(ret);
2584 }
2585
2586 /**
2587 * xmlParseCharRef:
2588 * @ctxt: an XML parser context
2589 *
2590 * DEPRECATED: Internal function, don't use.
2591 *
2592 * Parse a numeric character reference. Always consumes '&'.
2593 *
2594 * [66] CharRef ::= '&#' [0-9]+ ';' |
2595 * '&#x' [0-9a-fA-F]+ ';'
2596 *
2597 * [ WFC: Legal Character ]
2598 * Characters referred to using character references must match the
2599 * production for Char.
2600 *
2601 * Returns the value parsed (as an int), 0 in case of error
2602 */
2603 int
xmlParseCharRef(xmlParserCtxtPtr ctxt)2604 xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2605 int val = 0;
2606 int count = 0;
2607
2608 /*
2609 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2610 */
2611 if ((RAW == '&') && (NXT(1) == '#') &&
2612 (NXT(2) == 'x')) {
2613 SKIP(3);
2614 GROW;
2615 while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2616 if (count++ > 20) {
2617 count = 0;
2618 GROW;
2619 }
2620 if ((RAW >= '0') && (RAW <= '9'))
2621 val = val * 16 + (CUR - '0');
2622 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2623 val = val * 16 + (CUR - 'a') + 10;
2624 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2625 val = val * 16 + (CUR - 'A') + 10;
2626 else {
2627 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2628 val = 0;
2629 break;
2630 }
2631 if (val > 0x110000)
2632 val = 0x110000;
2633
2634 NEXT;
2635 count++;
2636 }
2637 if (RAW == ';') {
2638 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2639 ctxt->input->col++;
2640 ctxt->input->cur++;
2641 }
2642 } else if ((RAW == '&') && (NXT(1) == '#')) {
2643 SKIP(2);
2644 GROW;
2645 while (RAW != ';') { /* loop blocked by count */
2646 if (count++ > 20) {
2647 count = 0;
2648 GROW;
2649 }
2650 if ((RAW >= '0') && (RAW <= '9'))
2651 val = val * 10 + (CUR - '0');
2652 else {
2653 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2654 val = 0;
2655 break;
2656 }
2657 if (val > 0x110000)
2658 val = 0x110000;
2659
2660 NEXT;
2661 count++;
2662 }
2663 if (RAW == ';') {
2664 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2665 ctxt->input->col++;
2666 ctxt->input->cur++;
2667 }
2668 } else {
2669 if (RAW == '&')
2670 SKIP(1);
2671 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2672 }
2673
2674 /*
2675 * [ WFC: Legal Character ]
2676 * Characters referred to using character references must match the
2677 * production for Char.
2678 */
2679 if (val >= 0x110000) {
2680 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2681 "xmlParseCharRef: character reference out of bounds\n",
2682 val);
2683 } else if (IS_CHAR(val)) {
2684 return(val);
2685 } else {
2686 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2687 "xmlParseCharRef: invalid xmlChar value %d\n",
2688 val);
2689 }
2690 return(0);
2691 }
2692
2693 /**
2694 * xmlParseStringCharRef:
2695 * @ctxt: an XML parser context
2696 * @str: a pointer to an index in the string
2697 *
2698 * parse Reference declarations, variant parsing from a string rather
2699 * than an an input flow.
2700 *
2701 * [66] CharRef ::= '&#' [0-9]+ ';' |
2702 * '&#x' [0-9a-fA-F]+ ';'
2703 *
2704 * [ WFC: Legal Character ]
2705 * Characters referred to using character references must match the
2706 * production for Char.
2707 *
2708 * Returns the value parsed (as an int), 0 in case of error, str will be
2709 * updated to the current value of the index
2710 */
2711 static int
xmlParseStringCharRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)2712 xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2713 const xmlChar *ptr;
2714 xmlChar cur;
2715 int val = 0;
2716
2717 if ((str == NULL) || (*str == NULL)) return(0);
2718 ptr = *str;
2719 cur = *ptr;
2720 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2721 ptr += 3;
2722 cur = *ptr;
2723 while (cur != ';') { /* Non input consuming loop */
2724 if ((cur >= '0') && (cur <= '9'))
2725 val = val * 16 + (cur - '0');
2726 else if ((cur >= 'a') && (cur <= 'f'))
2727 val = val * 16 + (cur - 'a') + 10;
2728 else if ((cur >= 'A') && (cur <= 'F'))
2729 val = val * 16 + (cur - 'A') + 10;
2730 else {
2731 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2732 val = 0;
2733 break;
2734 }
2735 if (val > 0x110000)
2736 val = 0x110000;
2737
2738 ptr++;
2739 cur = *ptr;
2740 }
2741 if (cur == ';')
2742 ptr++;
2743 } else if ((cur == '&') && (ptr[1] == '#')){
2744 ptr += 2;
2745 cur = *ptr;
2746 while (cur != ';') { /* Non input consuming loops */
2747 if ((cur >= '0') && (cur <= '9'))
2748 val = val * 10 + (cur - '0');
2749 else {
2750 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2751 val = 0;
2752 break;
2753 }
2754 if (val > 0x110000)
2755 val = 0x110000;
2756
2757 ptr++;
2758 cur = *ptr;
2759 }
2760 if (cur == ';')
2761 ptr++;
2762 } else {
2763 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2764 return(0);
2765 }
2766 *str = ptr;
2767
2768 /*
2769 * [ WFC: Legal Character ]
2770 * Characters referred to using character references must match the
2771 * production for Char.
2772 */
2773 if (val >= 0x110000) {
2774 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2775 "xmlParseStringCharRef: character reference out of bounds\n",
2776 val);
2777 } else if (IS_CHAR(val)) {
2778 return(val);
2779 } else {
2780 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2781 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2782 val);
2783 }
2784 return(0);
2785 }
2786
2787 /**
2788 * xmlParserHandlePEReference:
2789 * @ctxt: the parser context
2790 *
2791 * DEPRECATED: Internal function, do not use.
2792 *
2793 * [69] PEReference ::= '%' Name ';'
2794 *
2795 * [ WFC: No Recursion ]
2796 * A parsed entity must not contain a recursive
2797 * reference to itself, either directly or indirectly.
2798 *
2799 * [ WFC: Entity Declared ]
2800 * In a document without any DTD, a document with only an internal DTD
2801 * subset which contains no parameter entity references, or a document
2802 * with "standalone='yes'", ... ... The declaration of a parameter
2803 * entity must precede any reference to it...
2804 *
2805 * [ VC: Entity Declared ]
2806 * In a document with an external subset or external parameter entities
2807 * with "standalone='no'", ... ... The declaration of a parameter entity
2808 * must precede any reference to it...
2809 *
2810 * [ WFC: In DTD ]
2811 * Parameter-entity references may only appear in the DTD.
2812 * NOTE: misleading but this is handled.
2813 *
2814 * A PEReference may have been detected in the current input stream
2815 * the handling is done accordingly to
2816 * http://www.w3.org/TR/REC-xml#entproc
2817 * i.e.
2818 * - Included in literal in entity values
2819 * - Included as Parameter Entity reference within DTDs
2820 */
2821 void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt)2822 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2823 xmlParsePEReference(ctxt);
2824 }
2825
2826 /**
2827 * xmlStringLenDecodeEntities:
2828 * @ctxt: the parser context
2829 * @str: the input string
2830 * @len: the string length
2831 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2832 * @end: an end marker xmlChar, 0 if none
2833 * @end2: an end marker xmlChar, 0 if none
2834 * @end3: an end marker xmlChar, 0 if none
2835 *
2836 * DEPRECATED: Internal function, don't use.
2837 *
2838 * Returns A newly allocated string with the substitution done. The caller
2839 * must deallocate it !
2840 */
2841 xmlChar *
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int what ATTRIBUTE_UNUSED,xmlChar end,xmlChar end2,xmlChar end3)2842 xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2843 int what ATTRIBUTE_UNUSED,
2844 xmlChar end, xmlChar end2, xmlChar end3) {
2845 if ((ctxt == NULL) || (str == NULL) || (len < 0))
2846 return(NULL);
2847
2848 if ((str[len] != 0) ||
2849 (end != 0) || (end2 != 0) || (end3 != 0))
2850 return(NULL);
2851
2852 return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2853 }
2854
2855 /**
2856 * xmlStringDecodeEntities:
2857 * @ctxt: the parser context
2858 * @str: the input string
2859 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2860 * @end: an end marker xmlChar, 0 if none
2861 * @end2: an end marker xmlChar, 0 if none
2862 * @end3: an end marker xmlChar, 0 if none
2863 *
2864 * DEPRECATED: Internal function, don't use.
2865 *
2866 * Returns A newly allocated string with the substitution done. The caller
2867 * must deallocate it !
2868 */
2869 xmlChar *
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt,const xmlChar * str,int what ATTRIBUTE_UNUSED,xmlChar end,xmlChar end2,xmlChar end3)2870 xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2871 int what ATTRIBUTE_UNUSED,
2872 xmlChar end, xmlChar end2, xmlChar end3) {
2873 if ((ctxt == NULL) || (str == NULL))
2874 return(NULL);
2875
2876 if ((end != 0) || (end2 != 0) || (end3 != 0))
2877 return(NULL);
2878
2879 return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2880 }
2881
2882 /************************************************************************
2883 * *
2884 * Commodity functions, cleanup needed ? *
2885 * *
2886 ************************************************************************/
2887
2888 /**
2889 * areBlanks:
2890 * @ctxt: an XML parser context
2891 * @str: a xmlChar *
2892 * @len: the size of @str
2893 * @blank_chars: we know the chars are blanks
2894 *
2895 * Is this a sequence of blank chars that one can ignore ?
2896 *
2897 * Returns 1 if ignorable 0 otherwise.
2898 */
2899
areBlanks(xmlParserCtxtPtr ctxt,const xmlChar * str,int len,int blank_chars)2900 static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2901 int blank_chars) {
2902 int i;
2903 xmlNodePtr lastChild;
2904
2905 /*
2906 * Don't spend time trying to differentiate them, the same callback is
2907 * used !
2908 */
2909 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2910 return(0);
2911
2912 /*
2913 * Check for xml:space value.
2914 */
2915 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2916 (*(ctxt->space) == -2))
2917 return(0);
2918
2919 /*
2920 * Check that the string is made of blanks
2921 */
2922 if (blank_chars == 0) {
2923 for (i = 0;i < len;i++)
2924 if (!(IS_BLANK_CH(str[i]))) return(0);
2925 }
2926
2927 /*
2928 * Look if the element is mixed content in the DTD if available
2929 */
2930 if (ctxt->node == NULL) return(0);
2931 if (ctxt->myDoc != NULL) {
2932 xmlElementPtr elemDecl = NULL;
2933 xmlDocPtr doc = ctxt->myDoc;
2934 const xmlChar *prefix = NULL;
2935
2936 if (ctxt->node->ns)
2937 prefix = ctxt->node->ns->prefix;
2938 if (doc->intSubset != NULL)
2939 elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2940 prefix);
2941 if ((elemDecl == NULL) && (doc->extSubset != NULL))
2942 elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2943 prefix);
2944 if (elemDecl != NULL) {
2945 if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2946 return(1);
2947 if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2948 (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2949 return(0);
2950 }
2951 }
2952
2953 /*
2954 * Otherwise, heuristic :-\
2955 */
2956 if ((RAW != '<') && (RAW != 0xD)) return(0);
2957 if ((ctxt->node->children == NULL) &&
2958 (RAW == '<') && (NXT(1) == '/')) return(0);
2959
2960 lastChild = xmlGetLastChild(ctxt->node);
2961 if (lastChild == NULL) {
2962 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2963 (ctxt->node->content != NULL)) return(0);
2964 } else if (xmlNodeIsText(lastChild))
2965 return(0);
2966 else if ((ctxt->node->children != NULL) &&
2967 (xmlNodeIsText(ctxt->node->children)))
2968 return(0);
2969 return(1);
2970 }
2971
2972 /************************************************************************
2973 * *
2974 * Extra stuff for namespace support *
2975 * Relates to http://www.w3.org/TR/WD-xml-names *
2976 * *
2977 ************************************************************************/
2978
2979 /**
2980 * xmlSplitQName:
2981 * @ctxt: an XML parser context
2982 * @name: an XML parser context
2983 * @prefixOut: a xmlChar **
2984 *
2985 * DEPRECATED: Don't use.
2986 *
2987 * parse an UTF8 encoded XML qualified name string
2988 *
2989 * [NS 5] QName ::= (Prefix ':')? LocalPart
2990 *
2991 * [NS 6] Prefix ::= NCName
2992 *
2993 * [NS 7] LocalPart ::= NCName
2994 *
2995 * Returns the local part, and prefix is updated
2996 * to get the Prefix if any.
2997 */
2998
2999 xmlChar *
xmlSplitQName(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlChar ** prefixOut)3000 xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
3001 xmlChar buf[XML_MAX_NAMELEN + 5];
3002 xmlChar *buffer = NULL;
3003 int len = 0;
3004 int max = XML_MAX_NAMELEN;
3005 xmlChar *ret = NULL;
3006 xmlChar *prefix;
3007 const xmlChar *cur = name;
3008 int c;
3009
3010 if (prefixOut == NULL) return(NULL);
3011 *prefixOut = NULL;
3012
3013 if (cur == NULL) return(NULL);
3014
3015 /* nasty but well=formed */
3016 if (cur[0] == ':')
3017 return(xmlStrdup(name));
3018
3019 c = *cur++;
3020 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3021 buf[len++] = c;
3022 c = *cur++;
3023 }
3024 if (len >= max) {
3025 /*
3026 * Okay someone managed to make a huge name, so he's ready to pay
3027 * for the processing speed.
3028 */
3029 max = len * 2;
3030
3031 buffer = xmlMalloc(max);
3032 if (buffer == NULL) {
3033 xmlErrMemory(ctxt);
3034 return(NULL);
3035 }
3036 memcpy(buffer, buf, len);
3037 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3038 if (len + 10 > max) {
3039 xmlChar *tmp;
3040
3041 max *= 2;
3042 tmp = (xmlChar *) xmlRealloc(buffer, max);
3043 if (tmp == NULL) {
3044 xmlFree(buffer);
3045 xmlErrMemory(ctxt);
3046 return(NULL);
3047 }
3048 buffer = tmp;
3049 }
3050 buffer[len++] = c;
3051 c = *cur++;
3052 }
3053 buffer[len] = 0;
3054 }
3055
3056 if ((c == ':') && (*cur == 0)) {
3057 if (buffer != NULL)
3058 xmlFree(buffer);
3059 return(xmlStrdup(name));
3060 }
3061
3062 if (buffer == NULL) {
3063 ret = xmlStrndup(buf, len);
3064 if (ret == NULL) {
3065 xmlErrMemory(ctxt);
3066 return(NULL);
3067 }
3068 } else {
3069 ret = buffer;
3070 buffer = NULL;
3071 max = XML_MAX_NAMELEN;
3072 }
3073
3074
3075 if (c == ':') {
3076 c = *cur;
3077 prefix = ret;
3078 if (c == 0) {
3079 ret = xmlStrndup(BAD_CAST "", 0);
3080 if (ret == NULL) {
3081 xmlFree(prefix);
3082 return(NULL);
3083 }
3084 *prefixOut = prefix;
3085 return(ret);
3086 }
3087 len = 0;
3088
3089 /*
3090 * Check that the first character is proper to start
3091 * a new name
3092 */
3093 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3094 ((c >= 0x41) && (c <= 0x5A)) ||
3095 (c == '_') || (c == ':'))) {
3096 int l;
3097 int first = CUR_SCHAR(cur, l);
3098
3099 if (!IS_LETTER(first) && (first != '_')) {
3100 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3101 "Name %s is not XML Namespace compliant\n",
3102 name);
3103 }
3104 }
3105 cur++;
3106
3107 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3108 buf[len++] = c;
3109 c = *cur++;
3110 }
3111 if (len >= max) {
3112 /*
3113 * Okay someone managed to make a huge name, so he's ready to pay
3114 * for the processing speed.
3115 */
3116 max = len * 2;
3117
3118 buffer = xmlMalloc(max);
3119 if (buffer == NULL) {
3120 xmlErrMemory(ctxt);
3121 xmlFree(prefix);
3122 return(NULL);
3123 }
3124 memcpy(buffer, buf, len);
3125 while (c != 0) { /* tested bigname2.xml */
3126 if (len + 10 > max) {
3127 xmlChar *tmp;
3128
3129 max *= 2;
3130 tmp = (xmlChar *) xmlRealloc(buffer, max);
3131 if (tmp == NULL) {
3132 xmlErrMemory(ctxt);
3133 xmlFree(prefix);
3134 xmlFree(buffer);
3135 return(NULL);
3136 }
3137 buffer = tmp;
3138 }
3139 buffer[len++] = c;
3140 c = *cur++;
3141 }
3142 buffer[len] = 0;
3143 }
3144
3145 if (buffer == NULL) {
3146 ret = xmlStrndup(buf, len);
3147 if (ret == NULL) {
3148 xmlFree(prefix);
3149 return(NULL);
3150 }
3151 } else {
3152 ret = buffer;
3153 }
3154
3155 *prefixOut = prefix;
3156 }
3157
3158 return(ret);
3159 }
3160
3161 /************************************************************************
3162 * *
3163 * The parser itself *
3164 * Relates to http://www.w3.org/TR/REC-xml *
3165 * *
3166 ************************************************************************/
3167
3168 /************************************************************************
3169 * *
3170 * Routines to parse Name, NCName and NmToken *
3171 * *
3172 ************************************************************************/
3173
3174 /*
3175 * The two following functions are related to the change of accepted
3176 * characters for Name and NmToken in the Revision 5 of XML-1.0
3177 * They correspond to the modified production [4] and the new production [4a]
3178 * changes in that revision. Also note that the macros used for the
3179 * productions Letter, Digit, CombiningChar and Extender are not needed
3180 * anymore.
3181 * We still keep compatibility to pre-revision5 parsing semantic if the
3182 * new XML_PARSE_OLD10 option is given to the parser.
3183 */
3184 static int
xmlIsNameStartChar(xmlParserCtxtPtr ctxt,int c)3185 xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3186 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3187 /*
3188 * Use the new checks of production [4] [4a] amd [5] of the
3189 * Update 5 of XML-1.0
3190 */
3191 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3192 (((c >= 'a') && (c <= 'z')) ||
3193 ((c >= 'A') && (c <= 'Z')) ||
3194 (c == '_') || (c == ':') ||
3195 ((c >= 0xC0) && (c <= 0xD6)) ||
3196 ((c >= 0xD8) && (c <= 0xF6)) ||
3197 ((c >= 0xF8) && (c <= 0x2FF)) ||
3198 ((c >= 0x370) && (c <= 0x37D)) ||
3199 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3200 ((c >= 0x200C) && (c <= 0x200D)) ||
3201 ((c >= 0x2070) && (c <= 0x218F)) ||
3202 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3203 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3204 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3205 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3206 ((c >= 0x10000) && (c <= 0xEFFFF))))
3207 return(1);
3208 } else {
3209 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3210 return(1);
3211 }
3212 return(0);
3213 }
3214
3215 static int
xmlIsNameChar(xmlParserCtxtPtr ctxt,int c)3216 xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3217 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3218 /*
3219 * Use the new checks of production [4] [4a] amd [5] of the
3220 * Update 5 of XML-1.0
3221 */
3222 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3223 (((c >= 'a') && (c <= 'z')) ||
3224 ((c >= 'A') && (c <= 'Z')) ||
3225 ((c >= '0') && (c <= '9')) || /* !start */
3226 (c == '_') || (c == ':') ||
3227 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3228 ((c >= 0xC0) && (c <= 0xD6)) ||
3229 ((c >= 0xD8) && (c <= 0xF6)) ||
3230 ((c >= 0xF8) && (c <= 0x2FF)) ||
3231 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3232 ((c >= 0x370) && (c <= 0x37D)) ||
3233 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3234 ((c >= 0x200C) && (c <= 0x200D)) ||
3235 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3236 ((c >= 0x2070) && (c <= 0x218F)) ||
3237 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3238 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3239 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3240 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3241 ((c >= 0x10000) && (c <= 0xEFFFF))))
3242 return(1);
3243 } else {
3244 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3245 (c == '.') || (c == '-') ||
3246 (c == '_') || (c == ':') ||
3247 (IS_COMBINING(c)) ||
3248 (IS_EXTENDER(c)))
3249 return(1);
3250 }
3251 return(0);
3252 }
3253
3254 static const xmlChar *
xmlParseNameComplex(xmlParserCtxtPtr ctxt)3255 xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3256 const xmlChar *ret;
3257 int len = 0, l;
3258 int c;
3259 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3260 XML_MAX_TEXT_LENGTH :
3261 XML_MAX_NAME_LENGTH;
3262
3263 /*
3264 * Handler for more complex cases
3265 */
3266 c = xmlCurrentChar(ctxt, &l);
3267 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3268 /*
3269 * Use the new checks of production [4] [4a] amd [5] of the
3270 * Update 5 of XML-1.0
3271 */
3272 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3273 (!(((c >= 'a') && (c <= 'z')) ||
3274 ((c >= 'A') && (c <= 'Z')) ||
3275 (c == '_') || (c == ':') ||
3276 ((c >= 0xC0) && (c <= 0xD6)) ||
3277 ((c >= 0xD8) && (c <= 0xF6)) ||
3278 ((c >= 0xF8) && (c <= 0x2FF)) ||
3279 ((c >= 0x370) && (c <= 0x37D)) ||
3280 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3281 ((c >= 0x200C) && (c <= 0x200D)) ||
3282 ((c >= 0x2070) && (c <= 0x218F)) ||
3283 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3284 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3285 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3286 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3287 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3288 return(NULL);
3289 }
3290 len += l;
3291 NEXTL(l);
3292 c = xmlCurrentChar(ctxt, &l);
3293 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3294 (((c >= 'a') && (c <= 'z')) ||
3295 ((c >= 'A') && (c <= 'Z')) ||
3296 ((c >= '0') && (c <= '9')) || /* !start */
3297 (c == '_') || (c == ':') ||
3298 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3299 ((c >= 0xC0) && (c <= 0xD6)) ||
3300 ((c >= 0xD8) && (c <= 0xF6)) ||
3301 ((c >= 0xF8) && (c <= 0x2FF)) ||
3302 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3303 ((c >= 0x370) && (c <= 0x37D)) ||
3304 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3305 ((c >= 0x200C) && (c <= 0x200D)) ||
3306 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3307 ((c >= 0x2070) && (c <= 0x218F)) ||
3308 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3309 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3310 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3311 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3312 ((c >= 0x10000) && (c <= 0xEFFFF))
3313 )) {
3314 if (len <= INT_MAX - l)
3315 len += l;
3316 NEXTL(l);
3317 c = xmlCurrentChar(ctxt, &l);
3318 }
3319 } else {
3320 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3321 (!IS_LETTER(c) && (c != '_') &&
3322 (c != ':'))) {
3323 return(NULL);
3324 }
3325 len += l;
3326 NEXTL(l);
3327 c = xmlCurrentChar(ctxt, &l);
3328
3329 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3330 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3331 (c == '.') || (c == '-') ||
3332 (c == '_') || (c == ':') ||
3333 (IS_COMBINING(c)) ||
3334 (IS_EXTENDER(c)))) {
3335 if (len <= INT_MAX - l)
3336 len += l;
3337 NEXTL(l);
3338 c = xmlCurrentChar(ctxt, &l);
3339 }
3340 }
3341 if (len > maxLength) {
3342 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3343 return(NULL);
3344 }
3345 if (ctxt->input->cur - ctxt->input->base < len) {
3346 /*
3347 * There were a couple of bugs where PERefs lead to to a change
3348 * of the buffer. Check the buffer size to avoid passing an invalid
3349 * pointer to xmlDictLookup.
3350 */
3351 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3352 "unexpected change of input buffer");
3353 return (NULL);
3354 }
3355 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3356 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3357 else
3358 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3359 if (ret == NULL)
3360 xmlErrMemory(ctxt);
3361 return(ret);
3362 }
3363
3364 /**
3365 * xmlParseName:
3366 * @ctxt: an XML parser context
3367 *
3368 * DEPRECATED: Internal function, don't use.
3369 *
3370 * parse an XML name.
3371 *
3372 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3373 * CombiningChar | Extender
3374 *
3375 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3376 *
3377 * [6] Names ::= Name (#x20 Name)*
3378 *
3379 * Returns the Name parsed or NULL
3380 */
3381
3382 const xmlChar *
xmlParseName(xmlParserCtxtPtr ctxt)3383 xmlParseName(xmlParserCtxtPtr ctxt) {
3384 const xmlChar *in;
3385 const xmlChar *ret;
3386 size_t count = 0;
3387 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3388 XML_MAX_TEXT_LENGTH :
3389 XML_MAX_NAME_LENGTH;
3390
3391 GROW;
3392
3393 /*
3394 * Accelerator for simple ASCII names
3395 */
3396 in = ctxt->input->cur;
3397 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3398 ((*in >= 0x41) && (*in <= 0x5A)) ||
3399 (*in == '_') || (*in == ':')) {
3400 in++;
3401 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3402 ((*in >= 0x41) && (*in <= 0x5A)) ||
3403 ((*in >= 0x30) && (*in <= 0x39)) ||
3404 (*in == '_') || (*in == '-') ||
3405 (*in == ':') || (*in == '.'))
3406 in++;
3407 if ((*in > 0) && (*in < 0x80)) {
3408 count = in - ctxt->input->cur;
3409 if (count > maxLength) {
3410 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3411 return(NULL);
3412 }
3413 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3414 ctxt->input->cur = in;
3415 ctxt->input->col += count;
3416 if (ret == NULL)
3417 xmlErrMemory(ctxt);
3418 return(ret);
3419 }
3420 }
3421 /* accelerator for special cases */
3422 return(xmlParseNameComplex(ctxt));
3423 }
3424
3425 static xmlHashedString
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt)3426 xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3427 xmlHashedString ret;
3428 int len = 0, l;
3429 int c;
3430 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3431 XML_MAX_TEXT_LENGTH :
3432 XML_MAX_NAME_LENGTH;
3433 size_t startPosition = 0;
3434
3435 ret.name = NULL;
3436 ret.hashValue = 0;
3437
3438 /*
3439 * Handler for more complex cases
3440 */
3441 startPosition = CUR_PTR - BASE_PTR;
3442 c = xmlCurrentChar(ctxt, &l);
3443 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3444 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3445 return(ret);
3446 }
3447
3448 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3449 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3450 if (len <= INT_MAX - l)
3451 len += l;
3452 NEXTL(l);
3453 c = xmlCurrentChar(ctxt, &l);
3454 }
3455 if (len > maxLength) {
3456 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3457 return(ret);
3458 }
3459 ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3460 if (ret.name == NULL)
3461 xmlErrMemory(ctxt);
3462 return(ret);
3463 }
3464
3465 /**
3466 * xmlParseNCName:
3467 * @ctxt: an XML parser context
3468 * @len: length of the string parsed
3469 *
3470 * parse an XML name.
3471 *
3472 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3473 * CombiningChar | Extender
3474 *
3475 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3476 *
3477 * Returns the Name parsed or NULL
3478 */
3479
3480 static xmlHashedString
xmlParseNCName(xmlParserCtxtPtr ctxt)3481 xmlParseNCName(xmlParserCtxtPtr ctxt) {
3482 const xmlChar *in, *e;
3483 xmlHashedString ret;
3484 size_t count = 0;
3485 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3486 XML_MAX_TEXT_LENGTH :
3487 XML_MAX_NAME_LENGTH;
3488
3489 ret.name = NULL;
3490
3491 /*
3492 * Accelerator for simple ASCII names
3493 */
3494 in = ctxt->input->cur;
3495 e = ctxt->input->end;
3496 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3497 ((*in >= 0x41) && (*in <= 0x5A)) ||
3498 (*in == '_')) && (in < e)) {
3499 in++;
3500 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3501 ((*in >= 0x41) && (*in <= 0x5A)) ||
3502 ((*in >= 0x30) && (*in <= 0x39)) ||
3503 (*in == '_') || (*in == '-') ||
3504 (*in == '.')) && (in < e))
3505 in++;
3506 if (in >= e)
3507 goto complex;
3508 if ((*in > 0) && (*in < 0x80)) {
3509 count = in - ctxt->input->cur;
3510 if (count > maxLength) {
3511 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3512 return(ret);
3513 }
3514 ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3515 ctxt->input->cur = in;
3516 ctxt->input->col += count;
3517 if (ret.name == NULL) {
3518 xmlErrMemory(ctxt);
3519 }
3520 return(ret);
3521 }
3522 }
3523 complex:
3524 return(xmlParseNCNameComplex(ctxt));
3525 }
3526
3527 /**
3528 * xmlParseNameAndCompare:
3529 * @ctxt: an XML parser context
3530 *
3531 * parse an XML name and compares for match
3532 * (specialized for endtag parsing)
3533 *
3534 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3535 * and the name for mismatch
3536 */
3537
3538 static const xmlChar *
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * other)3539 xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3540 register const xmlChar *cmp = other;
3541 register const xmlChar *in;
3542 const xmlChar *ret;
3543
3544 GROW;
3545
3546 in = ctxt->input->cur;
3547 while (*in != 0 && *in == *cmp) {
3548 ++in;
3549 ++cmp;
3550 }
3551 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3552 /* success */
3553 ctxt->input->col += in - ctxt->input->cur;
3554 ctxt->input->cur = in;
3555 return (const xmlChar*) 1;
3556 }
3557 /* failure (or end of input buffer), check with full function */
3558 ret = xmlParseName (ctxt);
3559 /* strings coming from the dictionary direct compare possible */
3560 if (ret == other) {
3561 return (const xmlChar*) 1;
3562 }
3563 return ret;
3564 }
3565
3566 /**
3567 * xmlParseStringName:
3568 * @ctxt: an XML parser context
3569 * @str: a pointer to the string pointer (IN/OUT)
3570 *
3571 * parse an XML name.
3572 *
3573 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3574 * CombiningChar | Extender
3575 *
3576 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3577 *
3578 * [6] Names ::= Name (#x20 Name)*
3579 *
3580 * Returns the Name parsed or NULL. The @str pointer
3581 * is updated to the current location in the string.
3582 */
3583
3584 static xmlChar *
xmlParseStringName(xmlParserCtxtPtr ctxt,const xmlChar ** str)3585 xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3586 xmlChar buf[XML_MAX_NAMELEN + 5];
3587 xmlChar *ret;
3588 const xmlChar *cur = *str;
3589 int len = 0, l;
3590 int c;
3591 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3592 XML_MAX_TEXT_LENGTH :
3593 XML_MAX_NAME_LENGTH;
3594
3595 c = CUR_SCHAR(cur, l);
3596 if (!xmlIsNameStartChar(ctxt, c)) {
3597 return(NULL);
3598 }
3599
3600 COPY_BUF(buf, len, c);
3601 cur += l;
3602 c = CUR_SCHAR(cur, l);
3603 while (xmlIsNameChar(ctxt, c)) {
3604 COPY_BUF(buf, len, c);
3605 cur += l;
3606 c = CUR_SCHAR(cur, l);
3607 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3608 /*
3609 * Okay someone managed to make a huge name, so he's ready to pay
3610 * for the processing speed.
3611 */
3612 xmlChar *buffer;
3613 int max = len * 2;
3614
3615 buffer = xmlMalloc(max);
3616 if (buffer == NULL) {
3617 xmlErrMemory(ctxt);
3618 return(NULL);
3619 }
3620 memcpy(buffer, buf, len);
3621 while (xmlIsNameChar(ctxt, c)) {
3622 if (len + 10 > max) {
3623 xmlChar *tmp;
3624
3625 max *= 2;
3626 tmp = (xmlChar *) xmlRealloc(buffer, max);
3627 if (tmp == NULL) {
3628 xmlErrMemory(ctxt);
3629 xmlFree(buffer);
3630 return(NULL);
3631 }
3632 buffer = tmp;
3633 }
3634 COPY_BUF(buffer, len, c);
3635 cur += l;
3636 c = CUR_SCHAR(cur, l);
3637 if (len > maxLength) {
3638 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3639 xmlFree(buffer);
3640 return(NULL);
3641 }
3642 }
3643 buffer[len] = 0;
3644 *str = cur;
3645 return(buffer);
3646 }
3647 }
3648 if (len > maxLength) {
3649 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3650 return(NULL);
3651 }
3652 *str = cur;
3653 ret = xmlStrndup(buf, len);
3654 if (ret == NULL)
3655 xmlErrMemory(ctxt);
3656 return(ret);
3657 }
3658
3659 /**
3660 * xmlParseNmtoken:
3661 * @ctxt: an XML parser context
3662 *
3663 * DEPRECATED: Internal function, don't use.
3664 *
3665 * parse an XML Nmtoken.
3666 *
3667 * [7] Nmtoken ::= (NameChar)+
3668 *
3669 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3670 *
3671 * Returns the Nmtoken parsed or NULL
3672 */
3673
3674 xmlChar *
xmlParseNmtoken(xmlParserCtxtPtr ctxt)3675 xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3676 xmlChar buf[XML_MAX_NAMELEN + 5];
3677 xmlChar *ret;
3678 int len = 0, l;
3679 int c;
3680 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3681 XML_MAX_TEXT_LENGTH :
3682 XML_MAX_NAME_LENGTH;
3683
3684 c = xmlCurrentChar(ctxt, &l);
3685
3686 while (xmlIsNameChar(ctxt, c)) {
3687 COPY_BUF(buf, len, c);
3688 NEXTL(l);
3689 c = xmlCurrentChar(ctxt, &l);
3690 if (len >= XML_MAX_NAMELEN) {
3691 /*
3692 * Okay someone managed to make a huge token, so he's ready to pay
3693 * for the processing speed.
3694 */
3695 xmlChar *buffer;
3696 int max = len * 2;
3697
3698 buffer = xmlMalloc(max);
3699 if (buffer == NULL) {
3700 xmlErrMemory(ctxt);
3701 return(NULL);
3702 }
3703 memcpy(buffer, buf, len);
3704 while (xmlIsNameChar(ctxt, c)) {
3705 if (len + 10 > max) {
3706 xmlChar *tmp;
3707
3708 max *= 2;
3709 tmp = (xmlChar *) xmlRealloc(buffer, max);
3710 if (tmp == NULL) {
3711 xmlErrMemory(ctxt);
3712 xmlFree(buffer);
3713 return(NULL);
3714 }
3715 buffer = tmp;
3716 }
3717 COPY_BUF(buffer, len, c);
3718 if (len > maxLength) {
3719 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3720 xmlFree(buffer);
3721 return(NULL);
3722 }
3723 NEXTL(l);
3724 c = xmlCurrentChar(ctxt, &l);
3725 }
3726 buffer[len] = 0;
3727 return(buffer);
3728 }
3729 }
3730 if (len == 0)
3731 return(NULL);
3732 if (len > maxLength) {
3733 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3734 return(NULL);
3735 }
3736 ret = xmlStrndup(buf, len);
3737 if (ret == NULL)
3738 xmlErrMemory(ctxt);
3739 return(ret);
3740 }
3741
3742 /**
3743 * xmlExpandPEsInEntityValue:
3744 * @ctxt: parser context
3745 * @buf: string buffer
3746 * @str: entity value
3747 * @length: size of entity value
3748 * @depth: nesting depth
3749 *
3750 * Validate an entity value and expand parameter entities.
3751 */
3752 static void
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt,xmlSBuf * buf,const xmlChar * str,int length,int depth)3753 xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3754 const xmlChar *str, int length, int depth) {
3755 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3756 const xmlChar *end, *chunk;
3757 int c, l;
3758
3759 if (str == NULL)
3760 return;
3761
3762 depth += 1;
3763 if (depth > maxDepth) {
3764 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3765 "Maximum entity nesting depth exceeded");
3766 return;
3767 }
3768
3769 end = str + length;
3770 chunk = str;
3771
3772 while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3773 c = *str;
3774
3775 if (c >= 0x80) {
3776 l = xmlUTF8MultibyteLen(ctxt, str,
3777 "invalid character in entity value\n");
3778 if (l == 0) {
3779 if (chunk < str)
3780 xmlSBufAddString(buf, chunk, str - chunk);
3781 xmlSBufAddReplChar(buf);
3782 str += 1;
3783 chunk = str;
3784 } else {
3785 str += l;
3786 }
3787 } else if (c == '&') {
3788 if (str[1] == '#') {
3789 if (chunk < str)
3790 xmlSBufAddString(buf, chunk, str - chunk);
3791
3792 c = xmlParseStringCharRef(ctxt, &str);
3793 if (c == 0)
3794 return;
3795
3796 xmlSBufAddChar(buf, c);
3797
3798 chunk = str;
3799 } else {
3800 xmlChar *name;
3801
3802 /*
3803 * General entity references are checked for
3804 * syntactic validity.
3805 */
3806 str++;
3807 name = xmlParseStringName(ctxt, &str);
3808
3809 if ((name == NULL) || (*str++ != ';')) {
3810 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3811 "EntityValue: '&' forbidden except for entities "
3812 "references\n");
3813 xmlFree(name);
3814 return;
3815 }
3816
3817 xmlFree(name);
3818 }
3819 } else if (c == '%') {
3820 xmlEntityPtr ent;
3821
3822 if (chunk < str)
3823 xmlSBufAddString(buf, chunk, str - chunk);
3824
3825 ent = xmlParseStringPEReference(ctxt, &str);
3826 if (ent == NULL)
3827 return;
3828
3829 if (!PARSER_EXTERNAL(ctxt)) {
3830 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3831 return;
3832 }
3833
3834 if (ent->content == NULL) {
3835 /*
3836 * Note: external parsed entities will not be loaded,
3837 * it is not required for a non-validating parser to
3838 * complete external PEReferences coming from the
3839 * internal subset
3840 */
3841 if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3842 ((ctxt->replaceEntities) ||
3843 (ctxt->validate))) {
3844 xmlLoadEntityContent(ctxt, ent);
3845 } else {
3846 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3847 "not validating will not read content for "
3848 "PE entity %s\n", ent->name, NULL);
3849 }
3850 }
3851
3852 /*
3853 * TODO: Skip if ent->content is still NULL.
3854 */
3855
3856 if (xmlParserEntityCheck(ctxt, ent->length))
3857 return;
3858
3859 if (ent->flags & XML_ENT_EXPANDING) {
3860 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3861 xmlHaltParser(ctxt);
3862 return;
3863 }
3864
3865 ent->flags |= XML_ENT_EXPANDING;
3866 xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3867 depth);
3868 ent->flags &= ~XML_ENT_EXPANDING;
3869
3870 chunk = str;
3871 } else {
3872 /* Normal ASCII char */
3873 if (!IS_BYTE_CHAR(c)) {
3874 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3875 "invalid character in entity value\n");
3876 if (chunk < str)
3877 xmlSBufAddString(buf, chunk, str - chunk);
3878 xmlSBufAddReplChar(buf);
3879 str += 1;
3880 chunk = str;
3881 } else {
3882 str += 1;
3883 }
3884 }
3885 }
3886
3887 if (chunk < str)
3888 xmlSBufAddString(buf, chunk, str - chunk);
3889 }
3890
3891 /**
3892 * xmlParseEntityValue:
3893 * @ctxt: an XML parser context
3894 * @orig: if non-NULL store a copy of the original entity value
3895 *
3896 * DEPRECATED: Internal function, don't use.
3897 *
3898 * parse a value for ENTITY declarations
3899 *
3900 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3901 * "'" ([^%&'] | PEReference | Reference)* "'"
3902 *
3903 * Returns the EntityValue parsed with reference substituted or NULL
3904 */
3905 xmlChar *
xmlParseEntityValue(xmlParserCtxtPtr ctxt,xmlChar ** orig)3906 xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3907 unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3908 XML_MAX_HUGE_LENGTH :
3909 XML_MAX_TEXT_LENGTH;
3910 xmlSBuf buf;
3911 const xmlChar *start;
3912 int quote, length;
3913
3914 xmlSBufInit(&buf, maxLength);
3915
3916 GROW;
3917
3918 quote = CUR;
3919 if ((quote != '"') && (quote != '\'')) {
3920 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3921 return(NULL);
3922 }
3923 CUR_PTR++;
3924
3925 length = 0;
3926
3927 /*
3928 * Copy raw content of the entity into a buffer
3929 */
3930 while (1) {
3931 int c;
3932
3933 if (PARSER_STOPPED(ctxt))
3934 goto error;
3935
3936 if (CUR_PTR >= ctxt->input->end) {
3937 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3938 goto error;
3939 }
3940
3941 c = CUR;
3942
3943 if (c == 0) {
3944 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3945 "invalid character in entity value\n");
3946 goto error;
3947 }
3948 if (c == quote)
3949 break;
3950 NEXTL(1);
3951 length += 1;
3952
3953 /*
3954 * TODO: Check growth threshold
3955 */
3956 if (ctxt->input->end - CUR_PTR < 10)
3957 GROW;
3958 }
3959
3960 start = CUR_PTR - length;
3961
3962 if (orig != NULL) {
3963 *orig = xmlStrndup(start, length);
3964 if (*orig == NULL)
3965 xmlErrMemory(ctxt);
3966 }
3967
3968 xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3969
3970 NEXTL(1);
3971
3972 return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3973
3974 error:
3975 xmlSBufCleanup(&buf, ctxt, "entity length too long");
3976 return(NULL);
3977 }
3978
3979 /**
3980 * xmlCheckEntityInAttValue:
3981 * @ctxt: parser context
3982 * @pent: entity
3983 * @depth: nesting depth
3984 *
3985 * Check an entity reference in an attribute value for validity
3986 * without expanding it.
3987 */
3988 static void
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt,xmlEntityPtr pent,int depth)3989 xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3990 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3991 const xmlChar *str;
3992 unsigned long expandedSize = pent->length;
3993 int c, flags;
3994
3995 depth += 1;
3996 if (depth > maxDepth) {
3997 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3998 "Maximum entity nesting depth exceeded");
3999 return;
4000 }
4001
4002 if (pent->flags & XML_ENT_EXPANDING) {
4003 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4004 xmlHaltParser(ctxt);
4005 return;
4006 }
4007
4008 /*
4009 * If we're parsing a default attribute value in DTD content,
4010 * the entity might reference other entities which weren't
4011 * defined yet, so the check isn't reliable.
4012 */
4013 if (ctxt->inSubset == 0)
4014 flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4015 else
4016 flags = XML_ENT_VALIDATED;
4017
4018 str = pent->content;
4019 if (str == NULL)
4020 goto done;
4021
4022 /*
4023 * Note that entity values are already validated. We only check
4024 * for illegal less-than signs and compute the expanded size
4025 * of the entity. No special handling for multi-byte characters
4026 * is needed.
4027 */
4028 while (!PARSER_STOPPED(ctxt)) {
4029 c = *str;
4030
4031 if (c != '&') {
4032 if (c == 0)
4033 break;
4034
4035 if (c == '<')
4036 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4037 "'<' in entity '%s' is not allowed in attributes "
4038 "values\n", pent->name);
4039
4040 str += 1;
4041 } else if (str[1] == '#') {
4042 int val;
4043
4044 val = xmlParseStringCharRef(ctxt, &str);
4045 if (val == 0) {
4046 pent->content[0] = 0;
4047 break;
4048 }
4049 } else {
4050 xmlChar *name;
4051 xmlEntityPtr ent;
4052
4053 name = xmlParseStringEntityRef(ctxt, &str);
4054 if (name == NULL) {
4055 pent->content[0] = 0;
4056 break;
4057 }
4058
4059 ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4060 xmlFree(name);
4061
4062 if ((ent != NULL) &&
4063 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4064 if ((ent->flags & flags) != flags) {
4065 pent->flags |= XML_ENT_EXPANDING;
4066 xmlCheckEntityInAttValue(ctxt, ent, depth);
4067 pent->flags &= ~XML_ENT_EXPANDING;
4068 }
4069
4070 xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4071 xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4072 }
4073 }
4074 }
4075
4076 done:
4077 if (ctxt->inSubset == 0)
4078 pent->expandedSize = expandedSize;
4079
4080 pent->flags |= flags;
4081 }
4082
4083 /**
4084 * xmlExpandEntityInAttValue:
4085 * @ctxt: parser context
4086 * @buf: string buffer
4087 * @str: entity or attribute value
4088 * @pent: entity for entity value, NULL for attribute values
4089 * @normalize: whether to collapse whitespace
4090 * @inSpace: whitespace state
4091 * @depth: nesting depth
4092 * @check: whether to check for amplification
4093 *
4094 * Expand general entity references in an entity or attribute value.
4095 * Perform attribute value normalization.
4096 */
4097 static void
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt,xmlSBuf * buf,const xmlChar * str,xmlEntityPtr pent,int normalize,int * inSpace,int depth,int check)4098 xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4099 const xmlChar *str, xmlEntityPtr pent, int normalize,
4100 int *inSpace, int depth, int check) {
4101 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4102 int c, chunkSize;
4103
4104 if (str == NULL)
4105 return;
4106
4107 depth += 1;
4108 if (depth > maxDepth) {
4109 xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4110 "Maximum entity nesting depth exceeded");
4111 return;
4112 }
4113
4114 if (pent != NULL) {
4115 if (pent->flags & XML_ENT_EXPANDING) {
4116 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4117 xmlHaltParser(ctxt);
4118 return;
4119 }
4120
4121 if (check) {
4122 if (xmlParserEntityCheck(ctxt, pent->length))
4123 return;
4124 }
4125 }
4126
4127 chunkSize = 0;
4128
4129 /*
4130 * Note that entity values are already validated. No special
4131 * handling for multi-byte characters is needed.
4132 */
4133 while (!PARSER_STOPPED(ctxt)) {
4134 c = *str;
4135
4136 if (c != '&') {
4137 if (c == 0)
4138 break;
4139
4140 /*
4141 * If this function is called without an entity, it is used to
4142 * expand entities in an attribute content where less-than was
4143 * already unscaped and is allowed.
4144 */
4145 if ((pent != NULL) && (c == '<')) {
4146 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4147 "'<' in entity '%s' is not allowed in attributes "
4148 "values\n", pent->name);
4149 break;
4150 }
4151
4152 if (c <= 0x20) {
4153 if ((normalize) && (*inSpace)) {
4154 /* Skip char */
4155 if (chunkSize > 0) {
4156 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4157 chunkSize = 0;
4158 }
4159 } else if (c < 0x20) {
4160 if (chunkSize > 0) {
4161 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4162 chunkSize = 0;
4163 }
4164
4165 xmlSBufAddCString(buf, " ", 1);
4166 } else {
4167 chunkSize += 1;
4168 }
4169
4170 *inSpace = 1;
4171 } else {
4172 chunkSize += 1;
4173 *inSpace = 0;
4174 }
4175
4176 str += 1;
4177 } else if (str[1] == '#') {
4178 int val;
4179
4180 if (chunkSize > 0) {
4181 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4182 chunkSize = 0;
4183 }
4184
4185 val = xmlParseStringCharRef(ctxt, &str);
4186 if (val == 0) {
4187 if (pent != NULL)
4188 pent->content[0] = 0;
4189 break;
4190 }
4191
4192 if (val == ' ') {
4193 if ((!normalize) || (!*inSpace))
4194 xmlSBufAddCString(buf, " ", 1);
4195 *inSpace = 1;
4196 } else {
4197 xmlSBufAddChar(buf, val);
4198 *inSpace = 0;
4199 }
4200 } else {
4201 xmlChar *name;
4202 xmlEntityPtr ent;
4203
4204 if (chunkSize > 0) {
4205 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4206 chunkSize = 0;
4207 }
4208
4209 name = xmlParseStringEntityRef(ctxt, &str);
4210 if (name == NULL) {
4211 if (pent != NULL)
4212 pent->content[0] = 0;
4213 break;
4214 }
4215
4216 ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4217 xmlFree(name);
4218
4219 if ((ent != NULL) &&
4220 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4221 if (ent->content == NULL) {
4222 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4223 "predefined entity has no content\n");
4224 break;
4225 }
4226
4227 xmlSBufAddString(buf, ent->content, ent->length);
4228
4229 *inSpace = 0;
4230 } else if ((ent != NULL) && (ent->content != NULL)) {
4231 if (pent != NULL)
4232 pent->flags |= XML_ENT_EXPANDING;
4233 xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4234 normalize, inSpace, depth, check);
4235 if (pent != NULL)
4236 pent->flags &= ~XML_ENT_EXPANDING;
4237 }
4238 }
4239 }
4240
4241 if (chunkSize > 0)
4242 xmlSBufAddString(buf, str - chunkSize, chunkSize);
4243 }
4244
4245 /**
4246 * xmlExpandEntitiesInAttValue:
4247 * @ctxt: parser context
4248 * @str: entity or attribute value
4249 * @normalize: whether to collapse whitespace
4250 *
4251 * Expand general entity references in an entity or attribute value.
4252 * Perform attribute value normalization.
4253 *
4254 * Returns the expanded attribtue value.
4255 */
4256 xmlChar *
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt,const xmlChar * str,int normalize)4257 xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4258 int normalize) {
4259 unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4260 XML_MAX_HUGE_LENGTH :
4261 XML_MAX_TEXT_LENGTH;
4262 xmlSBuf buf;
4263 int inSpace = 1;
4264
4265 xmlSBufInit(&buf, maxLength);
4266
4267 xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4268 ctxt->inputNr, /* check */ 0);
4269
4270 if ((normalize) && (inSpace) && (buf.size > 0))
4271 buf.size--;
4272
4273 return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4274 }
4275
4276 /**
4277 * xmlParseAttValueInternal:
4278 * @ctxt: an XML parser context
4279 * @len: attribute len result
4280 * @alloc: whether the attribute was reallocated as a new string
4281 * @normalize: if 1 then further non-CDATA normalization must be done
4282 *
4283 * parse a value for an attribute.
4284 * NOTE: if no normalization is needed, the routine will return pointers
4285 * directly from the data buffer.
4286 *
4287 * 3.3.3 Attribute-Value Normalization:
4288 * Before the value of an attribute is passed to the application or
4289 * checked for validity, the XML processor must normalize it as follows:
4290 * - a character reference is processed by appending the referenced
4291 * character to the attribute value
4292 * - an entity reference is processed by recursively processing the
4293 * replacement text of the entity
4294 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4295 * appending #x20 to the normalized value, except that only a single
4296 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4297 * parsed entity or the literal entity value of an internal parsed entity
4298 * - other characters are processed by appending them to the normalized value
4299 * If the declared value is not CDATA, then the XML processor must further
4300 * process the normalized attribute value by discarding any leading and
4301 * trailing space (#x20) characters, and by replacing sequences of space
4302 * (#x20) characters by a single space (#x20) character.
4303 * All attributes for which no declaration has been read should be treated
4304 * by a non-validating parser as if declared CDATA.
4305 *
4306 * Returns the AttValue parsed or NULL. The value has to be freed by the
4307 * caller if it was copied, this can be detected by val[*len] == 0.
4308 */
4309 static xmlChar *
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,int * attlen,int * alloc,int normalize,int isNamespace)4310 xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4311 int normalize, int isNamespace) {
4312 unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4313 XML_MAX_HUGE_LENGTH :
4314 XML_MAX_TEXT_LENGTH;
4315 xmlSBuf buf;
4316 xmlChar *ret;
4317 int c, l, quote, flags, chunkSize;
4318 int inSpace = 1;
4319 int replaceEntities;
4320
4321 /* Always expand namespace URIs */
4322 replaceEntities = (ctxt->replaceEntities) || (isNamespace);
4323
4324 xmlSBufInit(&buf, maxLength);
4325
4326 GROW;
4327
4328 quote = CUR;
4329 if ((quote != '"') && (quote != '\'')) {
4330 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4331 return(NULL);
4332 }
4333 NEXTL(1);
4334
4335 if (ctxt->inSubset == 0)
4336 flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4337 else
4338 flags = XML_ENT_VALIDATED;
4339
4340 inSpace = 1;
4341 chunkSize = 0;
4342
4343 while (1) {
4344 if (PARSER_STOPPED(ctxt))
4345 goto error;
4346
4347 if (CUR_PTR >= ctxt->input->end) {
4348 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4349 "AttValue: ' expected\n");
4350 goto error;
4351 }
4352
4353 /*
4354 * TODO: Check growth threshold
4355 */
4356 if (ctxt->input->end - CUR_PTR < 10)
4357 GROW;
4358
4359 c = CUR;
4360
4361 if (c >= 0x80) {
4362 l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4363 "invalid character in attribute value\n");
4364 if (l == 0) {
4365 if (chunkSize > 0) {
4366 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367 chunkSize = 0;
4368 }
4369 xmlSBufAddReplChar(&buf);
4370 NEXTL(1);
4371 } else {
4372 chunkSize += l;
4373 NEXTL(l);
4374 }
4375
4376 inSpace = 0;
4377 } else if (c != '&') {
4378 if (c > 0x20) {
4379 if (c == quote)
4380 break;
4381
4382 if (c == '<')
4383 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4384
4385 chunkSize += 1;
4386 inSpace = 0;
4387 } else if (!IS_BYTE_CHAR(c)) {
4388 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4389 "invalid character in attribute value\n");
4390 if (chunkSize > 0) {
4391 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4392 chunkSize = 0;
4393 }
4394 xmlSBufAddReplChar(&buf);
4395 inSpace = 0;
4396 } else {
4397 /* Whitespace */
4398 if ((normalize) && (inSpace)) {
4399 /* Skip char */
4400 if (chunkSize > 0) {
4401 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4402 chunkSize = 0;
4403 }
4404 } else if (c < 0x20) {
4405 /* Convert to space */
4406 if (chunkSize > 0) {
4407 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4408 chunkSize = 0;
4409 }
4410
4411 xmlSBufAddCString(&buf, " ", 1);
4412 } else {
4413 chunkSize += 1;
4414 }
4415
4416 inSpace = 1;
4417
4418 if ((c == 0xD) && (NXT(1) == 0xA))
4419 CUR_PTR++;
4420 }
4421
4422 NEXTL(1);
4423 } else if (NXT(1) == '#') {
4424 int val;
4425
4426 if (chunkSize > 0) {
4427 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4428 chunkSize = 0;
4429 }
4430
4431 val = xmlParseCharRef(ctxt);
4432 if (val == 0)
4433 goto error;
4434
4435 if ((val == '&') && (!replaceEntities)) {
4436 /*
4437 * The reparsing will be done in xmlNodeParseContent()
4438 * called from SAX2.c
4439 */
4440 xmlSBufAddCString(&buf, "&", 5);
4441 inSpace = 0;
4442 } else if (val == ' ') {
4443 if ((!normalize) || (!inSpace))
4444 xmlSBufAddCString(&buf, " ", 1);
4445 inSpace = 1;
4446 } else {
4447 xmlSBufAddChar(&buf, val);
4448 inSpace = 0;
4449 }
4450 } else {
4451 const xmlChar *name;
4452 xmlEntityPtr ent;
4453
4454 if (chunkSize > 0) {
4455 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4456 chunkSize = 0;
4457 }
4458
4459 name = xmlParseEntityRefInternal(ctxt);
4460 if (name == NULL) {
4461 /*
4462 * Probably a literal '&' which wasn't escaped.
4463 * TODO: Handle gracefully in recovery mode.
4464 */
4465 continue;
4466 }
4467
4468 ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4469 if (ent == NULL)
4470 continue;
4471
4472 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4473 if ((ent->content[0] == '&') && (!replaceEntities))
4474 xmlSBufAddCString(&buf, "&", 5);
4475 else
4476 xmlSBufAddString(&buf, ent->content, ent->length);
4477 inSpace = 0;
4478 } else if (replaceEntities) {
4479 xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4480 normalize, &inSpace, ctxt->inputNr,
4481 /* check */ 1);
4482 } else {
4483 if ((ent->flags & flags) != flags)
4484 xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4485
4486 if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4487 ent->content[0] = 0;
4488 goto error;
4489 }
4490
4491 /*
4492 * Just output the reference
4493 */
4494 xmlSBufAddCString(&buf, "&", 1);
4495 xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4496 xmlSBufAddCString(&buf, ";", 1);
4497
4498 inSpace = 0;
4499 }
4500 }
4501 }
4502
4503 if ((buf.mem == NULL) && (alloc != NULL)) {
4504 ret = (xmlChar *) CUR_PTR - chunkSize;
4505
4506 if (attlen != NULL)
4507 *attlen = chunkSize;
4508 if ((normalize) && (inSpace) && (chunkSize > 0))
4509 *attlen -= 1;
4510 *alloc = 0;
4511
4512 /* Report potential error */
4513 xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4514 } else {
4515 if (chunkSize > 0)
4516 xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4517
4518 if ((normalize) && (inSpace) && (buf.size > 0))
4519 buf.size--;
4520
4521 ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4522
4523 if (ret != NULL) {
4524 if (attlen != NULL)
4525 *attlen = buf.size;
4526 if (alloc != NULL)
4527 *alloc = 1;
4528 }
4529 }
4530
4531 NEXTL(1);
4532
4533 return(ret);
4534
4535 error:
4536 xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4537 return(NULL);
4538 }
4539
4540 /**
4541 * xmlParseAttValue:
4542 * @ctxt: an XML parser context
4543 *
4544 * DEPRECATED: Internal function, don't use.
4545 *
4546 * parse a value for an attribute
4547 * Note: the parser won't do substitution of entities here, this
4548 * will be handled later in xmlStringGetNodeList
4549 *
4550 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4551 * "'" ([^<&'] | Reference)* "'"
4552 *
4553 * 3.3.3 Attribute-Value Normalization:
4554 * Before the value of an attribute is passed to the application or
4555 * checked for validity, the XML processor must normalize it as follows:
4556 * - a character reference is processed by appending the referenced
4557 * character to the attribute value
4558 * - an entity reference is processed by recursively processing the
4559 * replacement text of the entity
4560 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4561 * appending #x20 to the normalized value, except that only a single
4562 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4563 * parsed entity or the literal entity value of an internal parsed entity
4564 * - other characters are processed by appending them to the normalized value
4565 * If the declared value is not CDATA, then the XML processor must further
4566 * process the normalized attribute value by discarding any leading and
4567 * trailing space (#x20) characters, and by replacing sequences of space
4568 * (#x20) characters by a single space (#x20) character.
4569 * All attributes for which no declaration has been read should be treated
4570 * by a non-validating parser as if declared CDATA.
4571 *
4572 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4573 */
4574
4575
4576 xmlChar *
xmlParseAttValue(xmlParserCtxtPtr ctxt)4577 xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4578 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4579 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0));
4580 }
4581
4582 /**
4583 * xmlParseSystemLiteral:
4584 * @ctxt: an XML parser context
4585 *
4586 * DEPRECATED: Internal function, don't use.
4587 *
4588 * parse an XML Literal
4589 *
4590 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4591 *
4592 * Returns the SystemLiteral parsed or NULL
4593 */
4594
4595 xmlChar *
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt)4596 xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4597 xmlChar *buf = NULL;
4598 int len = 0;
4599 int size = XML_PARSER_BUFFER_SIZE;
4600 int cur, l;
4601 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4602 XML_MAX_TEXT_LENGTH :
4603 XML_MAX_NAME_LENGTH;
4604 xmlChar stop;
4605
4606 if (RAW == '"') {
4607 NEXT;
4608 stop = '"';
4609 } else if (RAW == '\'') {
4610 NEXT;
4611 stop = '\'';
4612 } else {
4613 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4614 return(NULL);
4615 }
4616
4617 buf = xmlMalloc(size);
4618 if (buf == NULL) {
4619 xmlErrMemory(ctxt);
4620 return(NULL);
4621 }
4622 cur = xmlCurrentCharRecover(ctxt, &l);
4623 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4624 if (len + 5 >= size) {
4625 xmlChar *tmp;
4626
4627 size *= 2;
4628 tmp = (xmlChar *) xmlRealloc(buf, size);
4629 if (tmp == NULL) {
4630 xmlFree(buf);
4631 xmlErrMemory(ctxt);
4632 return(NULL);
4633 }
4634 buf = tmp;
4635 }
4636 COPY_BUF(buf, len, cur);
4637 if (len > maxLength) {
4638 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4639 xmlFree(buf);
4640 return(NULL);
4641 }
4642 NEXTL(l);
4643 cur = xmlCurrentCharRecover(ctxt, &l);
4644 }
4645 buf[len] = 0;
4646 if (!IS_CHAR(cur)) {
4647 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4648 } else {
4649 NEXT;
4650 }
4651 return(buf);
4652 }
4653
4654 /**
4655 * xmlParsePubidLiteral:
4656 * @ctxt: an XML parser context
4657 *
4658 * DEPRECATED: Internal function, don't use.
4659 *
4660 * parse an XML public literal
4661 *
4662 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4663 *
4664 * Returns the PubidLiteral parsed or NULL.
4665 */
4666
4667 xmlChar *
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt)4668 xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4669 xmlChar *buf = NULL;
4670 int len = 0;
4671 int size = XML_PARSER_BUFFER_SIZE;
4672 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4673 XML_MAX_TEXT_LENGTH :
4674 XML_MAX_NAME_LENGTH;
4675 xmlChar cur;
4676 xmlChar stop;
4677
4678 if (RAW == '"') {
4679 NEXT;
4680 stop = '"';
4681 } else if (RAW == '\'') {
4682 NEXT;
4683 stop = '\'';
4684 } else {
4685 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4686 return(NULL);
4687 }
4688 buf = xmlMalloc(size);
4689 if (buf == NULL) {
4690 xmlErrMemory(ctxt);
4691 return(NULL);
4692 }
4693 cur = CUR;
4694 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4695 (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4696 if (len + 1 >= size) {
4697 xmlChar *tmp;
4698
4699 size *= 2;
4700 tmp = (xmlChar *) xmlRealloc(buf, size);
4701 if (tmp == NULL) {
4702 xmlErrMemory(ctxt);
4703 xmlFree(buf);
4704 return(NULL);
4705 }
4706 buf = tmp;
4707 }
4708 buf[len++] = cur;
4709 if (len > maxLength) {
4710 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4711 xmlFree(buf);
4712 return(NULL);
4713 }
4714 NEXT;
4715 cur = CUR;
4716 }
4717 buf[len] = 0;
4718 if (cur != stop) {
4719 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4720 } else {
4721 NEXTL(1);
4722 }
4723 return(buf);
4724 }
4725
4726 static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4727
4728 /*
4729 * used for the test in the inner loop of the char data testing
4730 */
4731 static const unsigned char test_char_data[256] = {
4732 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4733 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4734 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4735 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4737 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4738 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4739 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4740 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4741 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4742 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4743 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4744 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4745 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4746 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4747 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4748 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4749 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4750 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4751 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4752 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4753 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4754 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4755 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4756 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4757 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4758 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4759 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4760 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4761 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4762 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4763 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4764 };
4765
4766 /**
4767 * xmlParseCharDataInternal:
4768 * @ctxt: an XML parser context
4769 * @partial: buffer may contain partial UTF-8 sequences
4770 *
4771 * Parse character data. Always makes progress if the first char isn't
4772 * '<' or '&'.
4773 *
4774 * The right angle bracket (>) may be represented using the string ">",
4775 * and must, for compatibility, be escaped using ">" or a character
4776 * reference when it appears in the string "]]>" in content, when that
4777 * string is not marking the end of a CDATA section.
4778 *
4779 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4780 */
4781 static void
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt,int partial)4782 xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4783 const xmlChar *in;
4784 int nbchar = 0;
4785 int line = ctxt->input->line;
4786 int col = ctxt->input->col;
4787 int ccol;
4788
4789 GROW;
4790 /*
4791 * Accelerated common case where input don't need to be
4792 * modified before passing it to the handler.
4793 */
4794 in = ctxt->input->cur;
4795 do {
4796 get_more_space:
4797 while (*in == 0x20) { in++; ctxt->input->col++; }
4798 if (*in == 0xA) {
4799 do {
4800 ctxt->input->line++; ctxt->input->col = 1;
4801 in++;
4802 } while (*in == 0xA);
4803 goto get_more_space;
4804 }
4805 if (*in == '<') {
4806 nbchar = in - ctxt->input->cur;
4807 if (nbchar > 0) {
4808 const xmlChar *tmp = ctxt->input->cur;
4809 ctxt->input->cur = in;
4810
4811 if ((ctxt->sax != NULL) &&
4812 (ctxt->disableSAX == 0) &&
4813 (ctxt->sax->ignorableWhitespace !=
4814 ctxt->sax->characters)) {
4815 if (areBlanks(ctxt, tmp, nbchar, 1)) {
4816 if (ctxt->sax->ignorableWhitespace != NULL)
4817 ctxt->sax->ignorableWhitespace(ctxt->userData,
4818 tmp, nbchar);
4819 } else {
4820 if (ctxt->sax->characters != NULL)
4821 ctxt->sax->characters(ctxt->userData,
4822 tmp, nbchar);
4823 if (*ctxt->space == -1)
4824 *ctxt->space = -2;
4825 }
4826 } else if ((ctxt->sax != NULL) &&
4827 (ctxt->disableSAX == 0) &&
4828 (ctxt->sax->characters != NULL)) {
4829 ctxt->sax->characters(ctxt->userData,
4830 tmp, nbchar);
4831 }
4832 }
4833 return;
4834 }
4835
4836 get_more:
4837 ccol = ctxt->input->col;
4838 while (test_char_data[*in]) {
4839 in++;
4840 ccol++;
4841 }
4842 ctxt->input->col = ccol;
4843 if (*in == 0xA) {
4844 do {
4845 ctxt->input->line++; ctxt->input->col = 1;
4846 in++;
4847 } while (*in == 0xA);
4848 goto get_more;
4849 }
4850 if (*in == ']') {
4851 if ((in[1] == ']') && (in[2] == '>')) {
4852 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4853 ctxt->input->cur = in + 1;
4854 return;
4855 }
4856 in++;
4857 ctxt->input->col++;
4858 goto get_more;
4859 }
4860 nbchar = in - ctxt->input->cur;
4861 if (nbchar > 0) {
4862 if ((ctxt->sax != NULL) &&
4863 (ctxt->disableSAX == 0) &&
4864 (ctxt->sax->ignorableWhitespace !=
4865 ctxt->sax->characters) &&
4866 (IS_BLANK_CH(*ctxt->input->cur))) {
4867 const xmlChar *tmp = ctxt->input->cur;
4868 ctxt->input->cur = in;
4869
4870 if (areBlanks(ctxt, tmp, nbchar, 0)) {
4871 if (ctxt->sax->ignorableWhitespace != NULL)
4872 ctxt->sax->ignorableWhitespace(ctxt->userData,
4873 tmp, nbchar);
4874 } else {
4875 if (ctxt->sax->characters != NULL)
4876 ctxt->sax->characters(ctxt->userData,
4877 tmp, nbchar);
4878 if (*ctxt->space == -1)
4879 *ctxt->space = -2;
4880 }
4881 line = ctxt->input->line;
4882 col = ctxt->input->col;
4883 } else if ((ctxt->sax != NULL) &&
4884 (ctxt->disableSAX == 0)) {
4885 if (ctxt->sax->characters != NULL)
4886 ctxt->sax->characters(ctxt->userData,
4887 ctxt->input->cur, nbchar);
4888 line = ctxt->input->line;
4889 col = ctxt->input->col;
4890 }
4891 }
4892 ctxt->input->cur = in;
4893 if (*in == 0xD) {
4894 in++;
4895 if (*in == 0xA) {
4896 ctxt->input->cur = in;
4897 in++;
4898 ctxt->input->line++; ctxt->input->col = 1;
4899 continue; /* while */
4900 }
4901 in--;
4902 }
4903 if (*in == '<') {
4904 return;
4905 }
4906 if (*in == '&') {
4907 return;
4908 }
4909 SHRINK;
4910 GROW;
4911 in = ctxt->input->cur;
4912 } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4913 (*in == 0x09) || (*in == 0x0a));
4914 ctxt->input->line = line;
4915 ctxt->input->col = col;
4916 xmlParseCharDataComplex(ctxt, partial);
4917 }
4918
4919 /**
4920 * xmlParseCharDataComplex:
4921 * @ctxt: an XML parser context
4922 * @cdata: int indicating whether we are within a CDATA section
4923 *
4924 * Always makes progress if the first char isn't '<' or '&'.
4925 *
4926 * parse a CharData section.this is the fallback function
4927 * of xmlParseCharData() when the parsing requires handling
4928 * of non-ASCII characters.
4929 */
4930 static void
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt,int partial)4931 xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4932 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4933 int nbchar = 0;
4934 int cur, l;
4935
4936 cur = xmlCurrentCharRecover(ctxt, &l);
4937 while ((cur != '<') && /* checked */
4938 (cur != '&') &&
4939 (IS_CHAR(cur))) {
4940 if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4941 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4942 }
4943 COPY_BUF(buf, nbchar, cur);
4944 /* move current position before possible calling of ctxt->sax->characters */
4945 NEXTL(l);
4946 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4947 buf[nbchar] = 0;
4948
4949 /*
4950 * OK the segment is to be consumed as chars.
4951 */
4952 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4953 if (areBlanks(ctxt, buf, nbchar, 0)) {
4954 if (ctxt->sax->ignorableWhitespace != NULL)
4955 ctxt->sax->ignorableWhitespace(ctxt->userData,
4956 buf, nbchar);
4957 } else {
4958 if (ctxt->sax->characters != NULL)
4959 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4960 if ((ctxt->sax->characters !=
4961 ctxt->sax->ignorableWhitespace) &&
4962 (*ctxt->space == -1))
4963 *ctxt->space = -2;
4964 }
4965 }
4966 nbchar = 0;
4967 SHRINK;
4968 }
4969 cur = xmlCurrentCharRecover(ctxt, &l);
4970 }
4971 if (nbchar != 0) {
4972 buf[nbchar] = 0;
4973 /*
4974 * OK the segment is to be consumed as chars.
4975 */
4976 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4977 if (areBlanks(ctxt, buf, nbchar, 0)) {
4978 if (ctxt->sax->ignorableWhitespace != NULL)
4979 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4980 } else {
4981 if (ctxt->sax->characters != NULL)
4982 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4983 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4984 (*ctxt->space == -1))
4985 *ctxt->space = -2;
4986 }
4987 }
4988 }
4989 /*
4990 * cur == 0 can mean
4991 *
4992 * - End of buffer.
4993 * - An actual 0 character.
4994 * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4995 */
4996 if (ctxt->input->cur < ctxt->input->end) {
4997 if ((cur == 0) && (CUR != 0)) {
4998 if (partial == 0) {
4999 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5000 "Incomplete UTF-8 sequence starting with %02X\n", CUR);
5001 NEXTL(1);
5002 }
5003 } else if ((cur != '<') && (cur != '&')) {
5004 /* Generate the error and skip the offending character */
5005 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5006 "PCDATA invalid Char value %d\n", cur);
5007 NEXTL(l);
5008 }
5009 }
5010 }
5011
5012 /**
5013 * xmlParseCharData:
5014 * @ctxt: an XML parser context
5015 * @cdata: unused
5016 *
5017 * DEPRECATED: Internal function, don't use.
5018 */
5019 void
xmlParseCharData(xmlParserCtxtPtr ctxt,ATTRIBUTE_UNUSED int cdata)5020 xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
5021 xmlParseCharDataInternal(ctxt, 0);
5022 }
5023
5024 /**
5025 * xmlParseExternalID:
5026 * @ctxt: an XML parser context
5027 * @publicID: a xmlChar** receiving PubidLiteral
5028 * @strict: indicate whether we should restrict parsing to only
5029 * production [75], see NOTE below
5030 *
5031 * DEPRECATED: Internal function, don't use.
5032 *
5033 * Parse an External ID or a Public ID
5034 *
5035 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5036 * 'PUBLIC' S PubidLiteral S SystemLiteral
5037 *
5038 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5039 * | 'PUBLIC' S PubidLiteral S SystemLiteral
5040 *
5041 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5042 *
5043 * Returns the function returns SystemLiteral and in the second
5044 * case publicID receives PubidLiteral, is strict is off
5045 * it is possible to return NULL and have publicID set.
5046 */
5047
5048 xmlChar *
xmlParseExternalID(xmlParserCtxtPtr ctxt,xmlChar ** publicID,int strict)5049 xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5050 xmlChar *URI = NULL;
5051
5052 *publicID = NULL;
5053 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5054 SKIP(6);
5055 if (SKIP_BLANKS == 0) {
5056 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5057 "Space required after 'SYSTEM'\n");
5058 }
5059 URI = xmlParseSystemLiteral(ctxt);
5060 if (URI == NULL) {
5061 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5062 }
5063 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5064 SKIP(6);
5065 if (SKIP_BLANKS == 0) {
5066 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5067 "Space required after 'PUBLIC'\n");
5068 }
5069 *publicID = xmlParsePubidLiteral(ctxt);
5070 if (*publicID == NULL) {
5071 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5072 }
5073 if (strict) {
5074 /*
5075 * We don't handle [83] so "S SystemLiteral" is required.
5076 */
5077 if (SKIP_BLANKS == 0) {
5078 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5079 "Space required after the Public Identifier\n");
5080 }
5081 } else {
5082 /*
5083 * We handle [83] so we return immediately, if
5084 * "S SystemLiteral" is not detected. We skip blanks if no
5085 * system literal was found, but this is harmless since we must
5086 * be at the end of a NotationDecl.
5087 */
5088 if (SKIP_BLANKS == 0) return(NULL);
5089 if ((CUR != '\'') && (CUR != '"')) return(NULL);
5090 }
5091 URI = xmlParseSystemLiteral(ctxt);
5092 if (URI == NULL) {
5093 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5094 }
5095 }
5096 return(URI);
5097 }
5098
5099 /**
5100 * xmlParseCommentComplex:
5101 * @ctxt: an XML parser context
5102 * @buf: the already parsed part of the buffer
5103 * @len: number of bytes in the buffer
5104 * @size: allocated size of the buffer
5105 *
5106 * Skip an XML (SGML) comment <!-- .... -->
5107 * The spec says that "For compatibility, the string "--" (double-hyphen)
5108 * must not occur within comments. "
5109 * This is the slow routine in case the accelerator for ascii didn't work
5110 *
5111 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5112 */
5113 static void
xmlParseCommentComplex(xmlParserCtxtPtr ctxt,xmlChar * buf,size_t len,size_t size)5114 xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5115 size_t len, size_t size) {
5116 int q, ql;
5117 int r, rl;
5118 int cur, l;
5119 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5120 XML_MAX_HUGE_LENGTH :
5121 XML_MAX_TEXT_LENGTH;
5122
5123 if (buf == NULL) {
5124 len = 0;
5125 size = XML_PARSER_BUFFER_SIZE;
5126 buf = xmlMalloc(size);
5127 if (buf == NULL) {
5128 xmlErrMemory(ctxt);
5129 return;
5130 }
5131 }
5132 q = xmlCurrentCharRecover(ctxt, &ql);
5133 if (q == 0)
5134 goto not_terminated;
5135 if (!IS_CHAR(q)) {
5136 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5137 "xmlParseComment: invalid xmlChar value %d\n",
5138 q);
5139 xmlFree (buf);
5140 return;
5141 }
5142 NEXTL(ql);
5143 r = xmlCurrentCharRecover(ctxt, &rl);
5144 if (r == 0)
5145 goto not_terminated;
5146 if (!IS_CHAR(r)) {
5147 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5148 "xmlParseComment: invalid xmlChar value %d\n",
5149 r);
5150 xmlFree (buf);
5151 return;
5152 }
5153 NEXTL(rl);
5154 cur = xmlCurrentCharRecover(ctxt, &l);
5155 if (cur == 0)
5156 goto not_terminated;
5157 while (IS_CHAR(cur) && /* checked */
5158 ((cur != '>') ||
5159 (r != '-') || (q != '-'))) {
5160 if ((r == '-') && (q == '-')) {
5161 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5162 }
5163 if (len + 5 >= size) {
5164 xmlChar *new_buf;
5165 size_t new_size;
5166
5167 new_size = size * 2;
5168 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5169 if (new_buf == NULL) {
5170 xmlFree (buf);
5171 xmlErrMemory(ctxt);
5172 return;
5173 }
5174 buf = new_buf;
5175 size = new_size;
5176 }
5177 COPY_BUF(buf, len, q);
5178 if (len > maxLength) {
5179 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5180 "Comment too big found", NULL);
5181 xmlFree (buf);
5182 return;
5183 }
5184
5185 q = r;
5186 ql = rl;
5187 r = cur;
5188 rl = l;
5189
5190 NEXTL(l);
5191 cur = xmlCurrentCharRecover(ctxt, &l);
5192
5193 }
5194 buf[len] = 0;
5195 if (cur == 0) {
5196 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5197 "Comment not terminated \n<!--%.50s\n", buf);
5198 } else if (!IS_CHAR(cur)) {
5199 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5200 "xmlParseComment: invalid xmlChar value %d\n",
5201 cur);
5202 } else {
5203 NEXT;
5204 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5205 (!ctxt->disableSAX))
5206 ctxt->sax->comment(ctxt->userData, buf);
5207 }
5208 xmlFree(buf);
5209 return;
5210 not_terminated:
5211 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5212 "Comment not terminated\n", NULL);
5213 xmlFree(buf);
5214 }
5215
5216 /**
5217 * xmlParseComment:
5218 * @ctxt: an XML parser context
5219 *
5220 * DEPRECATED: Internal function, don't use.
5221 *
5222 * Parse an XML (SGML) comment. Always consumes '<!'.
5223 *
5224 * The spec says that "For compatibility, the string "--" (double-hyphen)
5225 * must not occur within comments. "
5226 *
5227 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5228 */
5229 void
xmlParseComment(xmlParserCtxtPtr ctxt)5230 xmlParseComment(xmlParserCtxtPtr ctxt) {
5231 xmlChar *buf = NULL;
5232 size_t size = XML_PARSER_BUFFER_SIZE;
5233 size_t len = 0;
5234 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5235 XML_MAX_HUGE_LENGTH :
5236 XML_MAX_TEXT_LENGTH;
5237 const xmlChar *in;
5238 size_t nbchar = 0;
5239 int ccol;
5240
5241 /*
5242 * Check that there is a comment right here.
5243 */
5244 if ((RAW != '<') || (NXT(1) != '!'))
5245 return;
5246 SKIP(2);
5247 if ((RAW != '-') || (NXT(1) != '-'))
5248 return;
5249 SKIP(2);
5250 GROW;
5251
5252 /*
5253 * Accelerated common case where input don't need to be
5254 * modified before passing it to the handler.
5255 */
5256 in = ctxt->input->cur;
5257 do {
5258 if (*in == 0xA) {
5259 do {
5260 ctxt->input->line++; ctxt->input->col = 1;
5261 in++;
5262 } while (*in == 0xA);
5263 }
5264 get_more:
5265 ccol = ctxt->input->col;
5266 while (((*in > '-') && (*in <= 0x7F)) ||
5267 ((*in >= 0x20) && (*in < '-')) ||
5268 (*in == 0x09)) {
5269 in++;
5270 ccol++;
5271 }
5272 ctxt->input->col = ccol;
5273 if (*in == 0xA) {
5274 do {
5275 ctxt->input->line++; ctxt->input->col = 1;
5276 in++;
5277 } while (*in == 0xA);
5278 goto get_more;
5279 }
5280 nbchar = in - ctxt->input->cur;
5281 /*
5282 * save current set of data
5283 */
5284 if (nbchar > 0) {
5285 if (buf == NULL) {
5286 if ((*in == '-') && (in[1] == '-'))
5287 size = nbchar + 1;
5288 else
5289 size = XML_PARSER_BUFFER_SIZE + nbchar;
5290 buf = xmlMalloc(size);
5291 if (buf == NULL) {
5292 xmlErrMemory(ctxt);
5293 return;
5294 }
5295 len = 0;
5296 } else if (len + nbchar + 1 >= size) {
5297 xmlChar *new_buf;
5298 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5299 new_buf = (xmlChar *) xmlRealloc(buf, size);
5300 if (new_buf == NULL) {
5301 xmlFree (buf);
5302 xmlErrMemory(ctxt);
5303 return;
5304 }
5305 buf = new_buf;
5306 }
5307 memcpy(&buf[len], ctxt->input->cur, nbchar);
5308 len += nbchar;
5309 buf[len] = 0;
5310 }
5311 if (len > maxLength) {
5312 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5313 "Comment too big found", NULL);
5314 xmlFree (buf);
5315 return;
5316 }
5317 ctxt->input->cur = in;
5318 if (*in == 0xA) {
5319 in++;
5320 ctxt->input->line++; ctxt->input->col = 1;
5321 }
5322 if (*in == 0xD) {
5323 in++;
5324 if (*in == 0xA) {
5325 ctxt->input->cur = in;
5326 in++;
5327 ctxt->input->line++; ctxt->input->col = 1;
5328 goto get_more;
5329 }
5330 in--;
5331 }
5332 SHRINK;
5333 GROW;
5334 in = ctxt->input->cur;
5335 if (*in == '-') {
5336 if (in[1] == '-') {
5337 if (in[2] == '>') {
5338 SKIP(3);
5339 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5340 (!ctxt->disableSAX)) {
5341 if (buf != NULL)
5342 ctxt->sax->comment(ctxt->userData, buf);
5343 else
5344 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5345 }
5346 if (buf != NULL)
5347 xmlFree(buf);
5348 return;
5349 }
5350 if (buf != NULL) {
5351 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5352 "Double hyphen within comment: "
5353 "<!--%.50s\n",
5354 buf);
5355 } else
5356 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5357 "Double hyphen within comment\n", NULL);
5358 in++;
5359 ctxt->input->col++;
5360 }
5361 in++;
5362 ctxt->input->col++;
5363 goto get_more;
5364 }
5365 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5366 xmlParseCommentComplex(ctxt, buf, len, size);
5367 }
5368
5369
5370 /**
5371 * xmlParsePITarget:
5372 * @ctxt: an XML parser context
5373 *
5374 * DEPRECATED: Internal function, don't use.
5375 *
5376 * parse the name of a PI
5377 *
5378 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5379 *
5380 * Returns the PITarget name or NULL
5381 */
5382
5383 const xmlChar *
xmlParsePITarget(xmlParserCtxtPtr ctxt)5384 xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5385 const xmlChar *name;
5386
5387 name = xmlParseName(ctxt);
5388 if ((name != NULL) &&
5389 ((name[0] == 'x') || (name[0] == 'X')) &&
5390 ((name[1] == 'm') || (name[1] == 'M')) &&
5391 ((name[2] == 'l') || (name[2] == 'L'))) {
5392 int i;
5393 if ((name[0] == 'x') && (name[1] == 'm') &&
5394 (name[2] == 'l') && (name[3] == 0)) {
5395 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5396 "XML declaration allowed only at the start of the document\n");
5397 return(name);
5398 } else if (name[3] == 0) {
5399 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5400 return(name);
5401 }
5402 for (i = 0;;i++) {
5403 if (xmlW3CPIs[i] == NULL) break;
5404 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5405 return(name);
5406 }
5407 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5408 "xmlParsePITarget: invalid name prefix 'xml'\n",
5409 NULL, NULL);
5410 }
5411 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5412 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5413 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5414 }
5415 return(name);
5416 }
5417
5418 #ifdef LIBXML_CATALOG_ENABLED
5419 /**
5420 * xmlParseCatalogPI:
5421 * @ctxt: an XML parser context
5422 * @catalog: the PI value string
5423 *
5424 * parse an XML Catalog Processing Instruction.
5425 *
5426 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5427 *
5428 * Occurs only if allowed by the user and if happening in the Misc
5429 * part of the document before any doctype information
5430 * This will add the given catalog to the parsing context in order
5431 * to be used if there is a resolution need further down in the document
5432 */
5433
5434 static void
xmlParseCatalogPI(xmlParserCtxtPtr ctxt,const xmlChar * catalog)5435 xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5436 xmlChar *URL = NULL;
5437 const xmlChar *tmp, *base;
5438 xmlChar marker;
5439
5440 tmp = catalog;
5441 while (IS_BLANK_CH(*tmp)) tmp++;
5442 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5443 goto error;
5444 tmp += 7;
5445 while (IS_BLANK_CH(*tmp)) tmp++;
5446 if (*tmp != '=') {
5447 return;
5448 }
5449 tmp++;
5450 while (IS_BLANK_CH(*tmp)) tmp++;
5451 marker = *tmp;
5452 if ((marker != '\'') && (marker != '"'))
5453 goto error;
5454 tmp++;
5455 base = tmp;
5456 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5457 if (*tmp == 0)
5458 goto error;
5459 URL = xmlStrndup(base, tmp - base);
5460 tmp++;
5461 while (IS_BLANK_CH(*tmp)) tmp++;
5462 if (*tmp != 0)
5463 goto error;
5464
5465 if (URL != NULL) {
5466 /*
5467 * Unfortunately, the catalog API doesn't report OOM errors.
5468 * xmlGetLastError isn't very helpful since we don't know
5469 * where the last error came from. We'd have to reset it
5470 * before this call and restore it afterwards.
5471 */
5472 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5473 xmlFree(URL);
5474 }
5475 return;
5476
5477 error:
5478 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5479 "Catalog PI syntax error: %s\n",
5480 catalog, NULL);
5481 if (URL != NULL)
5482 xmlFree(URL);
5483 }
5484 #endif
5485
5486 /**
5487 * xmlParsePI:
5488 * @ctxt: an XML parser context
5489 *
5490 * DEPRECATED: Internal function, don't use.
5491 *
5492 * parse an XML Processing Instruction.
5493 *
5494 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5495 *
5496 * The processing is transferred to SAX once parsed.
5497 */
5498
5499 void
xmlParsePI(xmlParserCtxtPtr ctxt)5500 xmlParsePI(xmlParserCtxtPtr ctxt) {
5501 xmlChar *buf = NULL;
5502 size_t len = 0;
5503 size_t size = XML_PARSER_BUFFER_SIZE;
5504 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5505 XML_MAX_HUGE_LENGTH :
5506 XML_MAX_TEXT_LENGTH;
5507 int cur, l;
5508 const xmlChar *target;
5509
5510 if ((RAW == '<') && (NXT(1) == '?')) {
5511 /*
5512 * this is a Processing Instruction.
5513 */
5514 SKIP(2);
5515
5516 /*
5517 * Parse the target name and check for special support like
5518 * namespace.
5519 */
5520 target = xmlParsePITarget(ctxt);
5521 if (target != NULL) {
5522 if ((RAW == '?') && (NXT(1) == '>')) {
5523 SKIP(2);
5524
5525 /*
5526 * SAX: PI detected.
5527 */
5528 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5529 (ctxt->sax->processingInstruction != NULL))
5530 ctxt->sax->processingInstruction(ctxt->userData,
5531 target, NULL);
5532 return;
5533 }
5534 buf = xmlMalloc(size);
5535 if (buf == NULL) {
5536 xmlErrMemory(ctxt);
5537 return;
5538 }
5539 if (SKIP_BLANKS == 0) {
5540 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5541 "ParsePI: PI %s space expected\n", target);
5542 }
5543 cur = xmlCurrentCharRecover(ctxt, &l);
5544 while (IS_CHAR(cur) && /* checked */
5545 ((cur != '?') || (NXT(1) != '>'))) {
5546 if (len + 5 >= size) {
5547 xmlChar *tmp;
5548 size_t new_size = size * 2;
5549 tmp = (xmlChar *) xmlRealloc(buf, new_size);
5550 if (tmp == NULL) {
5551 xmlErrMemory(ctxt);
5552 xmlFree(buf);
5553 return;
5554 }
5555 buf = tmp;
5556 size = new_size;
5557 }
5558 COPY_BUF(buf, len, cur);
5559 if (len > maxLength) {
5560 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5561 "PI %s too big found", target);
5562 xmlFree(buf);
5563 return;
5564 }
5565 NEXTL(l);
5566 cur = xmlCurrentCharRecover(ctxt, &l);
5567 }
5568 buf[len] = 0;
5569 if (cur != '?') {
5570 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5571 "ParsePI: PI %s never end ...\n", target);
5572 } else {
5573 SKIP(2);
5574
5575 #ifdef LIBXML_CATALOG_ENABLED
5576 if ((ctxt->inSubset == 0) &&
5577 (xmlStrEqual(target, XML_CATALOG_PI))) {
5578 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5579
5580 if (((ctxt->options & XML_PARSE_NO_CATALOG_PI) == 0) &&
5581 ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5582 (allow == XML_CATA_ALLOW_ALL)))
5583 xmlParseCatalogPI(ctxt, buf);
5584 }
5585 #endif
5586
5587 /*
5588 * SAX: PI detected.
5589 */
5590 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5591 (ctxt->sax->processingInstruction != NULL))
5592 ctxt->sax->processingInstruction(ctxt->userData,
5593 target, buf);
5594 }
5595 xmlFree(buf);
5596 } else {
5597 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5598 }
5599 }
5600 }
5601
5602 /**
5603 * xmlParseNotationDecl:
5604 * @ctxt: an XML parser context
5605 *
5606 * DEPRECATED: Internal function, don't use.
5607 *
5608 * Parse a notation declaration. Always consumes '<!'.
5609 *
5610 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5611 *
5612 * Hence there is actually 3 choices:
5613 * 'PUBLIC' S PubidLiteral
5614 * 'PUBLIC' S PubidLiteral S SystemLiteral
5615 * and 'SYSTEM' S SystemLiteral
5616 *
5617 * See the NOTE on xmlParseExternalID().
5618 */
5619
5620 void
xmlParseNotationDecl(xmlParserCtxtPtr ctxt)5621 xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5622 const xmlChar *name;
5623 xmlChar *Pubid;
5624 xmlChar *Systemid;
5625
5626 if ((CUR != '<') || (NXT(1) != '!'))
5627 return;
5628 SKIP(2);
5629
5630 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5631 int inputid = ctxt->input->id;
5632 SKIP(8);
5633 if (SKIP_BLANKS_PE == 0) {
5634 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5635 "Space required after '<!NOTATION'\n");
5636 return;
5637 }
5638
5639 name = xmlParseName(ctxt);
5640 if (name == NULL) {
5641 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5642 return;
5643 }
5644 if (xmlStrchr(name, ':') != NULL) {
5645 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5646 "colons are forbidden from notation names '%s'\n",
5647 name, NULL, NULL);
5648 }
5649 if (SKIP_BLANKS_PE == 0) {
5650 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5651 "Space required after the NOTATION name'\n");
5652 return;
5653 }
5654
5655 /*
5656 * Parse the IDs.
5657 */
5658 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5659 SKIP_BLANKS_PE;
5660
5661 if (RAW == '>') {
5662 if (inputid != ctxt->input->id) {
5663 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5664 "Notation declaration doesn't start and stop"
5665 " in the same entity\n");
5666 }
5667 NEXT;
5668 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5669 (ctxt->sax->notationDecl != NULL))
5670 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5671 } else {
5672 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5673 }
5674 if (Systemid != NULL) xmlFree(Systemid);
5675 if (Pubid != NULL) xmlFree(Pubid);
5676 }
5677 }
5678
5679 /**
5680 * xmlParseEntityDecl:
5681 * @ctxt: an XML parser context
5682 *
5683 * DEPRECATED: Internal function, don't use.
5684 *
5685 * Parse an entity declaration. Always consumes '<!'.
5686 *
5687 * [70] EntityDecl ::= GEDecl | PEDecl
5688 *
5689 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5690 *
5691 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5692 *
5693 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5694 *
5695 * [74] PEDef ::= EntityValue | ExternalID
5696 *
5697 * [76] NDataDecl ::= S 'NDATA' S Name
5698 *
5699 * [ VC: Notation Declared ]
5700 * The Name must match the declared name of a notation.
5701 */
5702
5703 void
xmlParseEntityDecl(xmlParserCtxtPtr ctxt)5704 xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5705 const xmlChar *name = NULL;
5706 xmlChar *value = NULL;
5707 xmlChar *URI = NULL, *literal = NULL;
5708 const xmlChar *ndata = NULL;
5709 int isParameter = 0;
5710 xmlChar *orig = NULL;
5711
5712 if ((CUR != '<') || (NXT(1) != '!'))
5713 return;
5714 SKIP(2);
5715
5716 /* GROW; done in the caller */
5717 if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5718 int inputid = ctxt->input->id;
5719 SKIP(6);
5720 if (SKIP_BLANKS_PE == 0) {
5721 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5722 "Space required after '<!ENTITY'\n");
5723 }
5724
5725 if (RAW == '%') {
5726 NEXT;
5727 if (SKIP_BLANKS_PE == 0) {
5728 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5729 "Space required after '%%'\n");
5730 }
5731 isParameter = 1;
5732 }
5733
5734 name = xmlParseName(ctxt);
5735 if (name == NULL) {
5736 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5737 "xmlParseEntityDecl: no name\n");
5738 return;
5739 }
5740 if (xmlStrchr(name, ':') != NULL) {
5741 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5742 "colons are forbidden from entities names '%s'\n",
5743 name, NULL, NULL);
5744 }
5745 if (SKIP_BLANKS_PE == 0) {
5746 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5747 "Space required after the entity name\n");
5748 }
5749
5750 /*
5751 * handle the various case of definitions...
5752 */
5753 if (isParameter) {
5754 if ((RAW == '"') || (RAW == '\'')) {
5755 value = xmlParseEntityValue(ctxt, &orig);
5756 if (value) {
5757 if ((ctxt->sax != NULL) &&
5758 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5759 ctxt->sax->entityDecl(ctxt->userData, name,
5760 XML_INTERNAL_PARAMETER_ENTITY,
5761 NULL, NULL, value);
5762 }
5763 } else {
5764 URI = xmlParseExternalID(ctxt, &literal, 1);
5765 if ((URI == NULL) && (literal == NULL)) {
5766 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5767 }
5768 if (URI) {
5769 if (xmlStrchr(URI, '#')) {
5770 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5771 } else {
5772 if ((ctxt->sax != NULL) &&
5773 (!ctxt->disableSAX) &&
5774 (ctxt->sax->entityDecl != NULL))
5775 ctxt->sax->entityDecl(ctxt->userData, name,
5776 XML_EXTERNAL_PARAMETER_ENTITY,
5777 literal, URI, NULL);
5778 }
5779 }
5780 }
5781 } else {
5782 if ((RAW == '"') || (RAW == '\'')) {
5783 value = xmlParseEntityValue(ctxt, &orig);
5784 if ((ctxt->sax != NULL) &&
5785 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5786 ctxt->sax->entityDecl(ctxt->userData, name,
5787 XML_INTERNAL_GENERAL_ENTITY,
5788 NULL, NULL, value);
5789 /*
5790 * For expat compatibility in SAX mode.
5791 */
5792 if ((ctxt->myDoc == NULL) ||
5793 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5794 if (ctxt->myDoc == NULL) {
5795 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5796 if (ctxt->myDoc == NULL) {
5797 xmlErrMemory(ctxt);
5798 goto done;
5799 }
5800 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5801 }
5802 if (ctxt->myDoc->intSubset == NULL) {
5803 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5804 BAD_CAST "fake", NULL, NULL);
5805 if (ctxt->myDoc->intSubset == NULL) {
5806 xmlErrMemory(ctxt);
5807 goto done;
5808 }
5809 }
5810
5811 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5812 NULL, NULL, value);
5813 }
5814 } else {
5815 URI = xmlParseExternalID(ctxt, &literal, 1);
5816 if ((URI == NULL) && (literal == NULL)) {
5817 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5818 }
5819 if (URI) {
5820 if (xmlStrchr(URI, '#')) {
5821 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5822 }
5823 }
5824 if ((RAW != '>') && (SKIP_BLANKS_PE == 0)) {
5825 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5826 "Space required before 'NDATA'\n");
5827 }
5828 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5829 SKIP(5);
5830 if (SKIP_BLANKS_PE == 0) {
5831 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5832 "Space required after 'NDATA'\n");
5833 }
5834 ndata = xmlParseName(ctxt);
5835 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5836 (ctxt->sax->unparsedEntityDecl != NULL))
5837 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5838 literal, URI, ndata);
5839 } else {
5840 if ((ctxt->sax != NULL) &&
5841 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5842 ctxt->sax->entityDecl(ctxt->userData, name,
5843 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5844 literal, URI, NULL);
5845 /*
5846 * For expat compatibility in SAX mode.
5847 * assuming the entity replacement was asked for
5848 */
5849 if ((ctxt->replaceEntities != 0) &&
5850 ((ctxt->myDoc == NULL) ||
5851 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5852 if (ctxt->myDoc == NULL) {
5853 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5854 if (ctxt->myDoc == NULL) {
5855 xmlErrMemory(ctxt);
5856 goto done;
5857 }
5858 ctxt->myDoc->properties = XML_DOC_INTERNAL;
5859 }
5860
5861 if (ctxt->myDoc->intSubset == NULL) {
5862 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5863 BAD_CAST "fake", NULL, NULL);
5864 if (ctxt->myDoc->intSubset == NULL) {
5865 xmlErrMemory(ctxt);
5866 goto done;
5867 }
5868 }
5869 xmlSAX2EntityDecl(ctxt, name,
5870 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5871 literal, URI, NULL);
5872 }
5873 }
5874 }
5875 }
5876 SKIP_BLANKS_PE;
5877 if (RAW != '>') {
5878 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5879 "xmlParseEntityDecl: entity %s not terminated\n", name);
5880 xmlHaltParser(ctxt);
5881 } else {
5882 if (inputid != ctxt->input->id) {
5883 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5884 "Entity declaration doesn't start and stop in"
5885 " the same entity\n");
5886 }
5887 NEXT;
5888 }
5889 if (orig != NULL) {
5890 /*
5891 * Ugly mechanism to save the raw entity value.
5892 */
5893 xmlEntityPtr cur = NULL;
5894
5895 if (isParameter) {
5896 if ((ctxt->sax != NULL) &&
5897 (ctxt->sax->getParameterEntity != NULL))
5898 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5899 } else {
5900 if ((ctxt->sax != NULL) &&
5901 (ctxt->sax->getEntity != NULL))
5902 cur = ctxt->sax->getEntity(ctxt->userData, name);
5903 if ((cur == NULL) && (ctxt->userData==ctxt)) {
5904 cur = xmlSAX2GetEntity(ctxt, name);
5905 }
5906 }
5907 if ((cur != NULL) && (cur->orig == NULL)) {
5908 cur->orig = orig;
5909 orig = NULL;
5910 }
5911 }
5912
5913 done:
5914 if (value != NULL) xmlFree(value);
5915 if (URI != NULL) xmlFree(URI);
5916 if (literal != NULL) xmlFree(literal);
5917 if (orig != NULL) xmlFree(orig);
5918 }
5919 }
5920
5921 /**
5922 * xmlParseDefaultDecl:
5923 * @ctxt: an XML parser context
5924 * @value: Receive a possible fixed default value for the attribute
5925 *
5926 * DEPRECATED: Internal function, don't use.
5927 *
5928 * Parse an attribute default declaration
5929 *
5930 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5931 *
5932 * [ VC: Required Attribute ]
5933 * if the default declaration is the keyword #REQUIRED, then the
5934 * attribute must be specified for all elements of the type in the
5935 * attribute-list declaration.
5936 *
5937 * [ VC: Attribute Default Legal ]
5938 * The declared default value must meet the lexical constraints of
5939 * the declared attribute type c.f. xmlValidateAttributeDecl()
5940 *
5941 * [ VC: Fixed Attribute Default ]
5942 * if an attribute has a default value declared with the #FIXED
5943 * keyword, instances of that attribute must match the default value.
5944 *
5945 * [ WFC: No < in Attribute Values ]
5946 * handled in xmlParseAttValue()
5947 *
5948 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5949 * or XML_ATTRIBUTE_FIXED.
5950 */
5951
5952 int
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt,xmlChar ** value)5953 xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5954 int val;
5955 xmlChar *ret;
5956
5957 *value = NULL;
5958 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5959 SKIP(9);
5960 return(XML_ATTRIBUTE_REQUIRED);
5961 }
5962 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5963 SKIP(8);
5964 return(XML_ATTRIBUTE_IMPLIED);
5965 }
5966 val = XML_ATTRIBUTE_NONE;
5967 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5968 SKIP(6);
5969 val = XML_ATTRIBUTE_FIXED;
5970 if (SKIP_BLANKS_PE == 0) {
5971 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5972 "Space required after '#FIXED'\n");
5973 }
5974 }
5975 ret = xmlParseAttValue(ctxt);
5976 if (ret == NULL) {
5977 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5978 "Attribute default value declaration error\n");
5979 } else
5980 *value = ret;
5981 return(val);
5982 }
5983
5984 /**
5985 * xmlParseNotationType:
5986 * @ctxt: an XML parser context
5987 *
5988 * DEPRECATED: Internal function, don't use.
5989 *
5990 * parse an Notation attribute type.
5991 *
5992 * Note: the leading 'NOTATION' S part has already being parsed...
5993 *
5994 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5995 *
5996 * [ VC: Notation Attributes ]
5997 * Values of this type must match one of the notation names included
5998 * in the declaration; all notation names in the declaration must be declared.
5999 *
6000 * Returns: the notation attribute tree built while parsing
6001 */
6002
6003 xmlEnumerationPtr
xmlParseNotationType(xmlParserCtxtPtr ctxt)6004 xmlParseNotationType(xmlParserCtxtPtr ctxt) {
6005 const xmlChar *name;
6006 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6007
6008 if (RAW != '(') {
6009 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
6010 return(NULL);
6011 }
6012 do {
6013 NEXT;
6014 SKIP_BLANKS_PE;
6015 name = xmlParseName(ctxt);
6016 if (name == NULL) {
6017 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6018 "Name expected in NOTATION declaration\n");
6019 xmlFreeEnumeration(ret);
6020 return(NULL);
6021 }
6022 tmp = ret;
6023 while (tmp != NULL) {
6024 if (xmlStrEqual(name, tmp->name)) {
6025 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6026 "standalone: attribute notation value token %s duplicated\n",
6027 name, NULL);
6028 if (!xmlDictOwns(ctxt->dict, name))
6029 xmlFree((xmlChar *) name);
6030 break;
6031 }
6032 tmp = tmp->next;
6033 }
6034 if (tmp == NULL) {
6035 cur = xmlCreateEnumeration(name);
6036 if (cur == NULL) {
6037 xmlErrMemory(ctxt);
6038 xmlFreeEnumeration(ret);
6039 return(NULL);
6040 }
6041 if (last == NULL) ret = last = cur;
6042 else {
6043 last->next = cur;
6044 last = cur;
6045 }
6046 }
6047 SKIP_BLANKS_PE;
6048 } while (RAW == '|');
6049 if (RAW != ')') {
6050 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
6051 xmlFreeEnumeration(ret);
6052 return(NULL);
6053 }
6054 NEXT;
6055 return(ret);
6056 }
6057
6058 /**
6059 * xmlParseEnumerationType:
6060 * @ctxt: an XML parser context
6061 *
6062 * DEPRECATED: Internal function, don't use.
6063 *
6064 * parse an Enumeration attribute type.
6065 *
6066 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
6067 *
6068 * [ VC: Enumeration ]
6069 * Values of this type must match one of the Nmtoken tokens in
6070 * the declaration
6071 *
6072 * Returns: the enumeration attribute tree built while parsing
6073 */
6074
6075 xmlEnumerationPtr
xmlParseEnumerationType(xmlParserCtxtPtr ctxt)6076 xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
6077 xmlChar *name;
6078 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
6079
6080 if (RAW != '(') {
6081 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
6082 return(NULL);
6083 }
6084 do {
6085 NEXT;
6086 SKIP_BLANKS_PE;
6087 name = xmlParseNmtoken(ctxt);
6088 if (name == NULL) {
6089 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
6090 return(ret);
6091 }
6092 tmp = ret;
6093 while (tmp != NULL) {
6094 if (xmlStrEqual(name, tmp->name)) {
6095 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
6096 "standalone: attribute enumeration value token %s duplicated\n",
6097 name, NULL);
6098 if (!xmlDictOwns(ctxt->dict, name))
6099 xmlFree(name);
6100 break;
6101 }
6102 tmp = tmp->next;
6103 }
6104 if (tmp == NULL) {
6105 cur = xmlCreateEnumeration(name);
6106 if (!xmlDictOwns(ctxt->dict, name))
6107 xmlFree(name);
6108 if (cur == NULL) {
6109 xmlErrMemory(ctxt);
6110 xmlFreeEnumeration(ret);
6111 return(NULL);
6112 }
6113 if (last == NULL) ret = last = cur;
6114 else {
6115 last->next = cur;
6116 last = cur;
6117 }
6118 }
6119 SKIP_BLANKS_PE;
6120 } while (RAW == '|');
6121 if (RAW != ')') {
6122 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
6123 return(ret);
6124 }
6125 NEXT;
6126 return(ret);
6127 }
6128
6129 /**
6130 * xmlParseEnumeratedType:
6131 * @ctxt: an XML parser context
6132 * @tree: the enumeration tree built while parsing
6133 *
6134 * DEPRECATED: Internal function, don't use.
6135 *
6136 * parse an Enumerated attribute type.
6137 *
6138 * [57] EnumeratedType ::= NotationType | Enumeration
6139 *
6140 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6141 *
6142 *
6143 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6144 */
6145
6146 int
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6147 xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6148 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6149 SKIP(8);
6150 if (SKIP_BLANKS_PE == 0) {
6151 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6152 "Space required after 'NOTATION'\n");
6153 return(0);
6154 }
6155 *tree = xmlParseNotationType(ctxt);
6156 if (*tree == NULL) return(0);
6157 return(XML_ATTRIBUTE_NOTATION);
6158 }
6159 *tree = xmlParseEnumerationType(ctxt);
6160 if (*tree == NULL) return(0);
6161 return(XML_ATTRIBUTE_ENUMERATION);
6162 }
6163
6164 /**
6165 * xmlParseAttributeType:
6166 * @ctxt: an XML parser context
6167 * @tree: the enumeration tree built while parsing
6168 *
6169 * DEPRECATED: Internal function, don't use.
6170 *
6171 * parse the Attribute list def for an element
6172 *
6173 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6174 *
6175 * [55] StringType ::= 'CDATA'
6176 *
6177 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6178 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6179 *
6180 * Validity constraints for attribute values syntax are checked in
6181 * xmlValidateAttributeValue()
6182 *
6183 * [ VC: ID ]
6184 * Values of type ID must match the Name production. A name must not
6185 * appear more than once in an XML document as a value of this type;
6186 * i.e., ID values must uniquely identify the elements which bear them.
6187 *
6188 * [ VC: One ID per Element Type ]
6189 * No element type may have more than one ID attribute specified.
6190 *
6191 * [ VC: ID Attribute Default ]
6192 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6193 *
6194 * [ VC: IDREF ]
6195 * Values of type IDREF must match the Name production, and values
6196 * of type IDREFS must match Names; each IDREF Name must match the value
6197 * of an ID attribute on some element in the XML document; i.e. IDREF
6198 * values must match the value of some ID attribute.
6199 *
6200 * [ VC: Entity Name ]
6201 * Values of type ENTITY must match the Name production, values
6202 * of type ENTITIES must match Names; each Entity Name must match the
6203 * name of an unparsed entity declared in the DTD.
6204 *
6205 * [ VC: Name Token ]
6206 * Values of type NMTOKEN must match the Nmtoken production; values
6207 * of type NMTOKENS must match Nmtokens.
6208 *
6209 * Returns the attribute type
6210 */
6211 int
xmlParseAttributeType(xmlParserCtxtPtr ctxt,xmlEnumerationPtr * tree)6212 xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6213 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6214 SKIP(5);
6215 return(XML_ATTRIBUTE_CDATA);
6216 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6217 SKIP(6);
6218 return(XML_ATTRIBUTE_IDREFS);
6219 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6220 SKIP(5);
6221 return(XML_ATTRIBUTE_IDREF);
6222 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6223 SKIP(2);
6224 return(XML_ATTRIBUTE_ID);
6225 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6226 SKIP(6);
6227 return(XML_ATTRIBUTE_ENTITY);
6228 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6229 SKIP(8);
6230 return(XML_ATTRIBUTE_ENTITIES);
6231 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6232 SKIP(8);
6233 return(XML_ATTRIBUTE_NMTOKENS);
6234 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6235 SKIP(7);
6236 return(XML_ATTRIBUTE_NMTOKEN);
6237 }
6238 return(xmlParseEnumeratedType(ctxt, tree));
6239 }
6240
6241 /**
6242 * xmlParseAttributeListDecl:
6243 * @ctxt: an XML parser context
6244 *
6245 * DEPRECATED: Internal function, don't use.
6246 *
6247 * Parse an attribute list declaration for an element. Always consumes '<!'.
6248 *
6249 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6250 *
6251 * [53] AttDef ::= S Name S AttType S DefaultDecl
6252 *
6253 */
6254 void
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt)6255 xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6256 const xmlChar *elemName;
6257 const xmlChar *attrName;
6258 xmlEnumerationPtr tree;
6259
6260 if ((CUR != '<') || (NXT(1) != '!'))
6261 return;
6262 SKIP(2);
6263
6264 if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6265 int inputid = ctxt->input->id;
6266
6267 SKIP(7);
6268 if (SKIP_BLANKS_PE == 0) {
6269 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6270 "Space required after '<!ATTLIST'\n");
6271 }
6272 elemName = xmlParseName(ctxt);
6273 if (elemName == NULL) {
6274 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6275 "ATTLIST: no name for Element\n");
6276 return;
6277 }
6278 SKIP_BLANKS_PE;
6279 GROW;
6280 while ((RAW != '>') && (PARSER_STOPPED(ctxt) == 0)) {
6281 int type;
6282 int def;
6283 xmlChar *defaultValue = NULL;
6284
6285 GROW;
6286 tree = NULL;
6287 attrName = xmlParseName(ctxt);
6288 if (attrName == NULL) {
6289 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6290 "ATTLIST: no name for Attribute\n");
6291 break;
6292 }
6293 GROW;
6294 if (SKIP_BLANKS_PE == 0) {
6295 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6296 "Space required after the attribute name\n");
6297 break;
6298 }
6299
6300 type = xmlParseAttributeType(ctxt, &tree);
6301 if (type <= 0) {
6302 break;
6303 }
6304
6305 GROW;
6306 if (SKIP_BLANKS_PE == 0) {
6307 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6308 "Space required after the attribute type\n");
6309 if (tree != NULL)
6310 xmlFreeEnumeration(tree);
6311 break;
6312 }
6313
6314 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6315 if (def <= 0) {
6316 if (defaultValue != NULL)
6317 xmlFree(defaultValue);
6318 if (tree != NULL)
6319 xmlFreeEnumeration(tree);
6320 break;
6321 }
6322 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6323 xmlAttrNormalizeSpace(defaultValue, defaultValue);
6324
6325 GROW;
6326 if (RAW != '>') {
6327 if (SKIP_BLANKS_PE == 0) {
6328 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6329 "Space required after the attribute default value\n");
6330 if (defaultValue != NULL)
6331 xmlFree(defaultValue);
6332 if (tree != NULL)
6333 xmlFreeEnumeration(tree);
6334 break;
6335 }
6336 }
6337 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6338 (ctxt->sax->attributeDecl != NULL))
6339 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6340 type, def, defaultValue, tree);
6341 else if (tree != NULL)
6342 xmlFreeEnumeration(tree);
6343
6344 if ((ctxt->sax2) && (defaultValue != NULL) &&
6345 (def != XML_ATTRIBUTE_IMPLIED) &&
6346 (def != XML_ATTRIBUTE_REQUIRED)) {
6347 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6348 }
6349 if (ctxt->sax2) {
6350 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6351 }
6352 if (defaultValue != NULL)
6353 xmlFree(defaultValue);
6354 GROW;
6355 }
6356 if (RAW == '>') {
6357 if (inputid != ctxt->input->id) {
6358 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6359 "Attribute list declaration doesn't start and"
6360 " stop in the same entity\n");
6361 }
6362 NEXT;
6363 }
6364 }
6365 }
6366
6367 /**
6368 * xmlParseElementMixedContentDecl:
6369 * @ctxt: an XML parser context
6370 * @inputchk: the input used for the current entity, needed for boundary checks
6371 *
6372 * DEPRECATED: Internal function, don't use.
6373 *
6374 * parse the declaration for a Mixed Element content
6375 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6376 *
6377 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6378 * '(' S? '#PCDATA' S? ')'
6379 *
6380 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6381 *
6382 * [ VC: No Duplicate Types ]
6383 * The same name must not appear more than once in a single
6384 * mixed-content declaration.
6385 *
6386 * returns: the list of the xmlElementContentPtr describing the element choices
6387 */
6388 xmlElementContentPtr
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6389 xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6390 xmlElementContentPtr ret = NULL, cur = NULL, n;
6391 const xmlChar *elem = NULL;
6392
6393 GROW;
6394 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6395 SKIP(7);
6396 SKIP_BLANKS_PE;
6397 if (RAW == ')') {
6398 if (ctxt->input->id != inputchk) {
6399 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6400 "Element content declaration doesn't start and"
6401 " stop in the same entity\n");
6402 }
6403 NEXT;
6404 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6405 if (ret == NULL)
6406 goto mem_error;
6407 if (RAW == '*') {
6408 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6409 NEXT;
6410 }
6411 return(ret);
6412 }
6413 if ((RAW == '(') || (RAW == '|')) {
6414 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6415 if (ret == NULL)
6416 goto mem_error;
6417 }
6418 while ((RAW == '|') && (PARSER_STOPPED(ctxt) == 0)) {
6419 NEXT;
6420 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6421 if (n == NULL)
6422 goto mem_error;
6423 if (elem == NULL) {
6424 n->c1 = cur;
6425 if (cur != NULL)
6426 cur->parent = n;
6427 ret = cur = n;
6428 } else {
6429 cur->c2 = n;
6430 n->parent = cur;
6431 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6432 if (n->c1 == NULL)
6433 goto mem_error;
6434 n->c1->parent = n;
6435 cur = n;
6436 }
6437 SKIP_BLANKS_PE;
6438 elem = xmlParseName(ctxt);
6439 if (elem == NULL) {
6440 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6441 "xmlParseElementMixedContentDecl : Name expected\n");
6442 xmlFreeDocElementContent(ctxt->myDoc, ret);
6443 return(NULL);
6444 }
6445 SKIP_BLANKS_PE;
6446 GROW;
6447 }
6448 if ((RAW == ')') && (NXT(1) == '*')) {
6449 if (elem != NULL) {
6450 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6451 XML_ELEMENT_CONTENT_ELEMENT);
6452 if (cur->c2 == NULL)
6453 goto mem_error;
6454 cur->c2->parent = cur;
6455 }
6456 if (ret != NULL)
6457 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6458 if (ctxt->input->id != inputchk) {
6459 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6460 "Element content declaration doesn't start and"
6461 " stop in the same entity\n");
6462 }
6463 SKIP(2);
6464 } else {
6465 xmlFreeDocElementContent(ctxt->myDoc, ret);
6466 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6467 return(NULL);
6468 }
6469
6470 } else {
6471 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6472 }
6473 return(ret);
6474
6475 mem_error:
6476 xmlErrMemory(ctxt);
6477 xmlFreeDocElementContent(ctxt->myDoc, ret);
6478 return(NULL);
6479 }
6480
6481 /**
6482 * xmlParseElementChildrenContentDeclPriv:
6483 * @ctxt: an XML parser context
6484 * @inputchk: the input used for the current entity, needed for boundary checks
6485 * @depth: the level of recursion
6486 *
6487 * parse the declaration for a Mixed Element content
6488 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6489 *
6490 *
6491 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6492 *
6493 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6494 *
6495 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6496 *
6497 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6498 *
6499 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6500 * TODO Parameter-entity replacement text must be properly nested
6501 * with parenthesized groups. That is to say, if either of the
6502 * opening or closing parentheses in a choice, seq, or Mixed
6503 * construct is contained in the replacement text for a parameter
6504 * entity, both must be contained in the same replacement text. For
6505 * interoperability, if a parameter-entity reference appears in a
6506 * choice, seq, or Mixed construct, its replacement text should not
6507 * be empty, and neither the first nor last non-blank character of
6508 * the replacement text should be a connector (| or ,).
6509 *
6510 * Returns the tree of xmlElementContentPtr describing the element
6511 * hierarchy.
6512 */
6513 static xmlElementContentPtr
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt,int inputchk,int depth)6514 xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6515 int depth) {
6516 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
6517 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6518 const xmlChar *elem;
6519 xmlChar type = 0;
6520
6521 if (depth > maxDepth) {
6522 xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
6523 "xmlParseElementChildrenContentDecl : depth %d too deep, "
6524 "use XML_PARSE_HUGE\n", depth);
6525 return(NULL);
6526 }
6527 SKIP_BLANKS_PE;
6528 GROW;
6529 if (RAW == '(') {
6530 int inputid = ctxt->input->id;
6531
6532 /* Recurse on first child */
6533 NEXT;
6534 SKIP_BLANKS_PE;
6535 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6536 depth + 1);
6537 if (cur == NULL)
6538 return(NULL);
6539 SKIP_BLANKS_PE;
6540 GROW;
6541 } else {
6542 elem = xmlParseName(ctxt);
6543 if (elem == NULL) {
6544 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6545 return(NULL);
6546 }
6547 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6548 if (cur == NULL) {
6549 xmlErrMemory(ctxt);
6550 return(NULL);
6551 }
6552 GROW;
6553 if (RAW == '?') {
6554 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6555 NEXT;
6556 } else if (RAW == '*') {
6557 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6558 NEXT;
6559 } else if (RAW == '+') {
6560 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6561 NEXT;
6562 } else {
6563 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6564 }
6565 GROW;
6566 }
6567 SKIP_BLANKS_PE;
6568 while ((RAW != ')') && (PARSER_STOPPED(ctxt) == 0)) {
6569 /*
6570 * Each loop we parse one separator and one element.
6571 */
6572 if (RAW == ',') {
6573 if (type == 0) type = CUR;
6574
6575 /*
6576 * Detect "Name | Name , Name" error
6577 */
6578 else if (type != CUR) {
6579 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6580 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6581 type);
6582 if ((last != NULL) && (last != ret))
6583 xmlFreeDocElementContent(ctxt->myDoc, last);
6584 if (ret != NULL)
6585 xmlFreeDocElementContent(ctxt->myDoc, ret);
6586 return(NULL);
6587 }
6588 NEXT;
6589
6590 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6591 if (op == NULL) {
6592 xmlErrMemory(ctxt);
6593 if ((last != NULL) && (last != ret))
6594 xmlFreeDocElementContent(ctxt->myDoc, last);
6595 xmlFreeDocElementContent(ctxt->myDoc, ret);
6596 return(NULL);
6597 }
6598 if (last == NULL) {
6599 op->c1 = ret;
6600 if (ret != NULL)
6601 ret->parent = op;
6602 ret = cur = op;
6603 } else {
6604 cur->c2 = op;
6605 if (op != NULL)
6606 op->parent = cur;
6607 op->c1 = last;
6608 if (last != NULL)
6609 last->parent = op;
6610 cur =op;
6611 last = NULL;
6612 }
6613 } else if (RAW == '|') {
6614 if (type == 0) type = CUR;
6615
6616 /*
6617 * Detect "Name , Name | Name" error
6618 */
6619 else if (type != CUR) {
6620 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6621 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6622 type);
6623 if ((last != NULL) && (last != ret))
6624 xmlFreeDocElementContent(ctxt->myDoc, last);
6625 if (ret != NULL)
6626 xmlFreeDocElementContent(ctxt->myDoc, ret);
6627 return(NULL);
6628 }
6629 NEXT;
6630
6631 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6632 if (op == NULL) {
6633 xmlErrMemory(ctxt);
6634 if ((last != NULL) && (last != ret))
6635 xmlFreeDocElementContent(ctxt->myDoc, last);
6636 if (ret != NULL)
6637 xmlFreeDocElementContent(ctxt->myDoc, ret);
6638 return(NULL);
6639 }
6640 if (last == NULL) {
6641 op->c1 = ret;
6642 if (ret != NULL)
6643 ret->parent = op;
6644 ret = cur = op;
6645 } else {
6646 cur->c2 = op;
6647 if (op != NULL)
6648 op->parent = cur;
6649 op->c1 = last;
6650 if (last != NULL)
6651 last->parent = op;
6652 cur =op;
6653 last = NULL;
6654 }
6655 } else {
6656 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6657 if ((last != NULL) && (last != ret))
6658 xmlFreeDocElementContent(ctxt->myDoc, last);
6659 if (ret != NULL)
6660 xmlFreeDocElementContent(ctxt->myDoc, ret);
6661 return(NULL);
6662 }
6663 GROW;
6664 SKIP_BLANKS_PE;
6665 GROW;
6666 if (RAW == '(') {
6667 int inputid = ctxt->input->id;
6668 /* Recurse on second child */
6669 NEXT;
6670 SKIP_BLANKS_PE;
6671 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6672 depth + 1);
6673 if (last == NULL) {
6674 if (ret != NULL)
6675 xmlFreeDocElementContent(ctxt->myDoc, ret);
6676 return(NULL);
6677 }
6678 SKIP_BLANKS_PE;
6679 } else {
6680 elem = xmlParseName(ctxt);
6681 if (elem == NULL) {
6682 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6683 if (ret != NULL)
6684 xmlFreeDocElementContent(ctxt->myDoc, ret);
6685 return(NULL);
6686 }
6687 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6688 if (last == NULL) {
6689 xmlErrMemory(ctxt);
6690 if (ret != NULL)
6691 xmlFreeDocElementContent(ctxt->myDoc, ret);
6692 return(NULL);
6693 }
6694 if (RAW == '?') {
6695 last->ocur = XML_ELEMENT_CONTENT_OPT;
6696 NEXT;
6697 } else if (RAW == '*') {
6698 last->ocur = XML_ELEMENT_CONTENT_MULT;
6699 NEXT;
6700 } else if (RAW == '+') {
6701 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6702 NEXT;
6703 } else {
6704 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6705 }
6706 }
6707 SKIP_BLANKS_PE;
6708 GROW;
6709 }
6710 if ((cur != NULL) && (last != NULL)) {
6711 cur->c2 = last;
6712 if (last != NULL)
6713 last->parent = cur;
6714 }
6715 if (ctxt->input->id != inputchk) {
6716 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6717 "Element content declaration doesn't start and stop in"
6718 " the same entity\n");
6719 }
6720 NEXT;
6721 if (RAW == '?') {
6722 if (ret != NULL) {
6723 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6724 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6725 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6726 else
6727 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6728 }
6729 NEXT;
6730 } else if (RAW == '*') {
6731 if (ret != NULL) {
6732 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6733 cur = ret;
6734 /*
6735 * Some normalization:
6736 * (a | b* | c?)* == (a | b | c)*
6737 */
6738 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6739 if ((cur->c1 != NULL) &&
6740 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6741 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6742 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6743 if ((cur->c2 != NULL) &&
6744 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6745 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6746 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6747 cur = cur->c2;
6748 }
6749 }
6750 NEXT;
6751 } else if (RAW == '+') {
6752 if (ret != NULL) {
6753 int found = 0;
6754
6755 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6756 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6757 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6758 else
6759 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6760 /*
6761 * Some normalization:
6762 * (a | b*)+ == (a | b)*
6763 * (a | b?)+ == (a | b)*
6764 */
6765 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6766 if ((cur->c1 != NULL) &&
6767 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6768 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6769 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6770 found = 1;
6771 }
6772 if ((cur->c2 != NULL) &&
6773 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6774 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6775 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6776 found = 1;
6777 }
6778 cur = cur->c2;
6779 }
6780 if (found)
6781 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6782 }
6783 NEXT;
6784 }
6785 return(ret);
6786 }
6787
6788 /**
6789 * xmlParseElementChildrenContentDecl:
6790 * @ctxt: an XML parser context
6791 * @inputchk: the input used for the current entity, needed for boundary checks
6792 *
6793 * DEPRECATED: Internal function, don't use.
6794 *
6795 * parse the declaration for a Mixed Element content
6796 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6797 *
6798 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6799 *
6800 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6801 *
6802 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6803 *
6804 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6805 *
6806 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6807 * TODO Parameter-entity replacement text must be properly nested
6808 * with parenthesized groups. That is to say, if either of the
6809 * opening or closing parentheses in a choice, seq, or Mixed
6810 * construct is contained in the replacement text for a parameter
6811 * entity, both must be contained in the same replacement text. For
6812 * interoperability, if a parameter-entity reference appears in a
6813 * choice, seq, or Mixed construct, its replacement text should not
6814 * be empty, and neither the first nor last non-blank character of
6815 * the replacement text should be a connector (| or ,).
6816 *
6817 * Returns the tree of xmlElementContentPtr describing the element
6818 * hierarchy.
6819 */
6820 xmlElementContentPtr
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt,int inputchk)6821 xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6822 /* stub left for API/ABI compat */
6823 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6824 }
6825
6826 /**
6827 * xmlParseElementContentDecl:
6828 * @ctxt: an XML parser context
6829 * @name: the name of the element being defined.
6830 * @result: the Element Content pointer will be stored here if any
6831 *
6832 * DEPRECATED: Internal function, don't use.
6833 *
6834 * parse the declaration for an Element content either Mixed or Children,
6835 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6836 *
6837 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6838 *
6839 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6840 */
6841
6842 int
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,const xmlChar * name,xmlElementContentPtr * result)6843 xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6844 xmlElementContentPtr *result) {
6845
6846 xmlElementContentPtr tree = NULL;
6847 int inputid = ctxt->input->id;
6848 int res;
6849
6850 *result = NULL;
6851
6852 if (RAW != '(') {
6853 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6854 "xmlParseElementContentDecl : %s '(' expected\n", name);
6855 return(-1);
6856 }
6857 NEXT;
6858 GROW;
6859 SKIP_BLANKS_PE;
6860 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6861 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6862 res = XML_ELEMENT_TYPE_MIXED;
6863 } else {
6864 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6865 res = XML_ELEMENT_TYPE_ELEMENT;
6866 }
6867 SKIP_BLANKS_PE;
6868 *result = tree;
6869 return(res);
6870 }
6871
6872 /**
6873 * xmlParseElementDecl:
6874 * @ctxt: an XML parser context
6875 *
6876 * DEPRECATED: Internal function, don't use.
6877 *
6878 * Parse an element declaration. Always consumes '<!'.
6879 *
6880 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6881 *
6882 * [ VC: Unique Element Type Declaration ]
6883 * No element type may be declared more than once
6884 *
6885 * Returns the type of the element, or -1 in case of error
6886 */
6887 int
xmlParseElementDecl(xmlParserCtxtPtr ctxt)6888 xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6889 const xmlChar *name;
6890 int ret = -1;
6891 xmlElementContentPtr content = NULL;
6892
6893 if ((CUR != '<') || (NXT(1) != '!'))
6894 return(ret);
6895 SKIP(2);
6896
6897 /* GROW; done in the caller */
6898 if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6899 int inputid = ctxt->input->id;
6900
6901 SKIP(7);
6902 if (SKIP_BLANKS_PE == 0) {
6903 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6904 "Space required after 'ELEMENT'\n");
6905 return(-1);
6906 }
6907 name = xmlParseName(ctxt);
6908 if (name == NULL) {
6909 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6910 "xmlParseElementDecl: no name for Element\n");
6911 return(-1);
6912 }
6913 if (SKIP_BLANKS_PE == 0) {
6914 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6915 "Space required after the element name\n");
6916 }
6917 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6918 SKIP(5);
6919 /*
6920 * Element must always be empty.
6921 */
6922 ret = XML_ELEMENT_TYPE_EMPTY;
6923 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6924 (NXT(2) == 'Y')) {
6925 SKIP(3);
6926 /*
6927 * Element is a generic container.
6928 */
6929 ret = XML_ELEMENT_TYPE_ANY;
6930 } else if (RAW == '(') {
6931 ret = xmlParseElementContentDecl(ctxt, name, &content);
6932 } else {
6933 /*
6934 * [ WFC: PEs in Internal Subset ] error handling.
6935 */
6936 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6937 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6938 return(-1);
6939 }
6940
6941 SKIP_BLANKS_PE;
6942
6943 if (RAW != '>') {
6944 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6945 if (content != NULL) {
6946 xmlFreeDocElementContent(ctxt->myDoc, content);
6947 }
6948 } else {
6949 if (inputid != ctxt->input->id) {
6950 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6951 "Element declaration doesn't start and stop in"
6952 " the same entity\n");
6953 }
6954
6955 NEXT;
6956 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6957 (ctxt->sax->elementDecl != NULL)) {
6958 if (content != NULL)
6959 content->parent = NULL;
6960 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6961 content);
6962 if ((content != NULL) && (content->parent == NULL)) {
6963 /*
6964 * this is a trick: if xmlAddElementDecl is called,
6965 * instead of copying the full tree it is plugged directly
6966 * if called from the parser. Avoid duplicating the
6967 * interfaces or change the API/ABI
6968 */
6969 xmlFreeDocElementContent(ctxt->myDoc, content);
6970 }
6971 } else if (content != NULL) {
6972 xmlFreeDocElementContent(ctxt->myDoc, content);
6973 }
6974 }
6975 }
6976 return(ret);
6977 }
6978
6979 /**
6980 * xmlParseConditionalSections
6981 * @ctxt: an XML parser context
6982 *
6983 * Parse a conditional section. Always consumes '<!['.
6984 *
6985 * [61] conditionalSect ::= includeSect | ignoreSect
6986 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6987 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6988 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6989 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6990 */
6991
6992 static void
xmlParseConditionalSections(xmlParserCtxtPtr ctxt)6993 xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6994 int *inputIds = NULL;
6995 size_t inputIdsSize = 0;
6996 size_t depth = 0;
6997
6998 while (PARSER_STOPPED(ctxt) == 0) {
6999 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7000 int id = ctxt->input->id;
7001
7002 SKIP(3);
7003 SKIP_BLANKS_PE;
7004
7005 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
7006 SKIP(7);
7007 SKIP_BLANKS_PE;
7008 if (RAW != '[') {
7009 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7010 xmlHaltParser(ctxt);
7011 goto error;
7012 }
7013 if (ctxt->input->id != id) {
7014 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7015 "All markup of the conditional section is"
7016 " not in the same entity\n");
7017 }
7018 NEXT;
7019
7020 if (inputIdsSize <= depth) {
7021 int *tmp;
7022
7023 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
7024 tmp = (int *) xmlRealloc(inputIds,
7025 inputIdsSize * sizeof(int));
7026 if (tmp == NULL) {
7027 xmlErrMemory(ctxt);
7028 goto error;
7029 }
7030 inputIds = tmp;
7031 }
7032 inputIds[depth] = id;
7033 depth++;
7034 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
7035 size_t ignoreDepth = 0;
7036
7037 SKIP(6);
7038 SKIP_BLANKS_PE;
7039 if (RAW != '[') {
7040 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
7041 xmlHaltParser(ctxt);
7042 goto error;
7043 }
7044 if (ctxt->input->id != id) {
7045 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7046 "All markup of the conditional section is"
7047 " not in the same entity\n");
7048 }
7049 NEXT;
7050
7051 while (PARSER_STOPPED(ctxt) == 0) {
7052 if (RAW == 0) {
7053 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
7054 goto error;
7055 }
7056 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7057 SKIP(3);
7058 ignoreDepth++;
7059 /* Check for integer overflow */
7060 if (ignoreDepth == 0) {
7061 xmlErrMemory(ctxt);
7062 goto error;
7063 }
7064 } else if ((RAW == ']') && (NXT(1) == ']') &&
7065 (NXT(2) == '>')) {
7066 SKIP(3);
7067 if (ignoreDepth == 0)
7068 break;
7069 ignoreDepth--;
7070 } else {
7071 NEXT;
7072 }
7073 }
7074
7075 if (ctxt->input->id != id) {
7076 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7077 "All markup of the conditional section is"
7078 " not in the same entity\n");
7079 }
7080 } else {
7081 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
7082 xmlHaltParser(ctxt);
7083 goto error;
7084 }
7085 } else if ((depth > 0) &&
7086 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
7087 depth--;
7088 if (ctxt->input->id != inputIds[depth]) {
7089 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
7090 "All markup of the conditional section is not"
7091 " in the same entity\n");
7092 }
7093 SKIP(3);
7094 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7095 xmlParseMarkupDecl(ctxt);
7096 } else {
7097 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7098 xmlHaltParser(ctxt);
7099 goto error;
7100 }
7101
7102 if (depth == 0)
7103 break;
7104
7105 SKIP_BLANKS_PE;
7106 SHRINK;
7107 GROW;
7108 }
7109
7110 error:
7111 xmlFree(inputIds);
7112 }
7113
7114 /**
7115 * xmlParseMarkupDecl:
7116 * @ctxt: an XML parser context
7117 *
7118 * DEPRECATED: Internal function, don't use.
7119 *
7120 * Parse markup declarations. Always consumes '<!' or '<?'.
7121 *
7122 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7123 * NotationDecl | PI | Comment
7124 *
7125 * [ VC: Proper Declaration/PE Nesting ]
7126 * Parameter-entity replacement text must be properly nested with
7127 * markup declarations. That is to say, if either the first character
7128 * or the last character of a markup declaration (markupdecl above) is
7129 * contained in the replacement text for a parameter-entity reference,
7130 * both must be contained in the same replacement text.
7131 *
7132 * [ WFC: PEs in Internal Subset ]
7133 * In the internal DTD subset, parameter-entity references can occur
7134 * only where markup declarations can occur, not within markup declarations.
7135 * (This does not apply to references that occur in external parameter
7136 * entities or to the external subset.)
7137 */
7138 void
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt)7139 xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7140 GROW;
7141 if (CUR == '<') {
7142 if (NXT(1) == '!') {
7143 switch (NXT(2)) {
7144 case 'E':
7145 if (NXT(3) == 'L')
7146 xmlParseElementDecl(ctxt);
7147 else if (NXT(3) == 'N')
7148 xmlParseEntityDecl(ctxt);
7149 else
7150 SKIP(2);
7151 break;
7152 case 'A':
7153 xmlParseAttributeListDecl(ctxt);
7154 break;
7155 case 'N':
7156 xmlParseNotationDecl(ctxt);
7157 break;
7158 case '-':
7159 xmlParseComment(ctxt);
7160 break;
7161 default:
7162 /* there is an error but it will be detected later */
7163 SKIP(2);
7164 break;
7165 }
7166 } else if (NXT(1) == '?') {
7167 xmlParsePI(ctxt);
7168 }
7169 }
7170 }
7171
7172 /**
7173 * xmlParseTextDecl:
7174 * @ctxt: an XML parser context
7175 *
7176 * DEPRECATED: Internal function, don't use.
7177 *
7178 * parse an XML declaration header for external entities
7179 *
7180 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7181 */
7182
7183 void
xmlParseTextDecl(xmlParserCtxtPtr ctxt)7184 xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7185 xmlChar *version;
7186
7187 /*
7188 * We know that '<?xml' is here.
7189 */
7190 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7191 SKIP(5);
7192 } else {
7193 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7194 return;
7195 }
7196
7197 if (SKIP_BLANKS == 0) {
7198 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7199 "Space needed after '<?xml'\n");
7200 }
7201
7202 /*
7203 * We may have the VersionInfo here.
7204 */
7205 version = xmlParseVersionInfo(ctxt);
7206 if (version == NULL) {
7207 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7208 if (version == NULL) {
7209 xmlErrMemory(ctxt);
7210 return;
7211 }
7212 } else {
7213 if (SKIP_BLANKS == 0) {
7214 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7215 "Space needed here\n");
7216 }
7217 }
7218 ctxt->input->version = version;
7219
7220 /*
7221 * We must have the encoding declaration
7222 */
7223 xmlParseEncodingDecl(ctxt);
7224
7225 SKIP_BLANKS;
7226 if ((RAW == '?') && (NXT(1) == '>')) {
7227 SKIP(2);
7228 } else if (RAW == '>') {
7229 /* Deprecated old WD ... */
7230 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7231 NEXT;
7232 } else {
7233 int c;
7234
7235 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7236 while ((PARSER_STOPPED(ctxt) == 0) && ((c = CUR) != 0)) {
7237 NEXT;
7238 if (c == '>')
7239 break;
7240 }
7241 }
7242 }
7243
7244 /**
7245 * xmlParseExternalSubset:
7246 * @ctxt: an XML parser context
7247 * @ExternalID: the external identifier
7248 * @SystemID: the system identifier (or URL)
7249 *
7250 * parse Markup declarations from an external subset
7251 *
7252 * [30] extSubset ::= textDecl? extSubsetDecl
7253 *
7254 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7255 */
7256 void
xmlParseExternalSubset(xmlParserCtxtPtr ctxt,const xmlChar * ExternalID,const xmlChar * SystemID)7257 xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7258 const xmlChar *SystemID) {
7259 int oldInputNr;
7260
7261 xmlCtxtInitializeLate(ctxt);
7262
7263 xmlDetectEncoding(ctxt);
7264
7265 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7266 xmlParseTextDecl(ctxt);
7267 }
7268 if (ctxt->myDoc == NULL) {
7269 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7270 if (ctxt->myDoc == NULL) {
7271 xmlErrMemory(ctxt);
7272 return;
7273 }
7274 ctxt->myDoc->properties = XML_DOC_INTERNAL;
7275 }
7276 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL) &&
7277 (xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID) == NULL)) {
7278 xmlErrMemory(ctxt);
7279 }
7280
7281 ctxt->inSubset = 2;
7282 oldInputNr = ctxt->inputNr;
7283
7284 SKIP_BLANKS_PE;
7285 while (((RAW != 0) || (ctxt->inputNr > oldInputNr)) &&
7286 (!PARSER_STOPPED(ctxt))) {
7287 GROW;
7288 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7289 xmlParseConditionalSections(ctxt);
7290 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7291 xmlParseMarkupDecl(ctxt);
7292 } else {
7293 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7294 xmlHaltParser(ctxt);
7295 return;
7296 }
7297 SKIP_BLANKS_PE;
7298 SHRINK;
7299 }
7300
7301 while (ctxt->inputNr > oldInputNr)
7302 xmlPopPE(ctxt);
7303
7304 if (RAW != 0) {
7305 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7306 }
7307 }
7308
7309 /**
7310 * xmlParseReference:
7311 * @ctxt: an XML parser context
7312 *
7313 * DEPRECATED: Internal function, don't use.
7314 *
7315 * parse and handle entity references in content, depending on the SAX
7316 * interface, this may end-up in a call to character() if this is a
7317 * CharRef, a predefined entity, if there is no reference() callback.
7318 * or if the parser was asked to switch to that mode.
7319 *
7320 * Always consumes '&'.
7321 *
7322 * [67] Reference ::= EntityRef | CharRef
7323 */
7324 void
xmlParseReference(xmlParserCtxtPtr ctxt)7325 xmlParseReference(xmlParserCtxtPtr ctxt) {
7326 xmlEntityPtr ent = NULL;
7327 const xmlChar *name;
7328 xmlChar *val;
7329
7330 if (RAW != '&')
7331 return;
7332
7333 /*
7334 * Simple case of a CharRef
7335 */
7336 if (NXT(1) == '#') {
7337 int i = 0;
7338 xmlChar out[16];
7339 int value = xmlParseCharRef(ctxt);
7340
7341 if (value == 0)
7342 return;
7343
7344 /*
7345 * Just encode the value in UTF-8
7346 */
7347 COPY_BUF(out, i, value);
7348 out[i] = 0;
7349 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7350 (!ctxt->disableSAX))
7351 ctxt->sax->characters(ctxt->userData, out, i);
7352 return;
7353 }
7354
7355 /*
7356 * We are seeing an entity reference
7357 */
7358 name = xmlParseEntityRefInternal(ctxt);
7359 if (name == NULL)
7360 return;
7361 ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0);
7362 if (ent == NULL) {
7363 /*
7364 * Create a reference for undeclared entities.
7365 */
7366 if ((ctxt->replaceEntities == 0) &&
7367 (ctxt->sax != NULL) &&
7368 (ctxt->disableSAX == 0) &&
7369 (ctxt->sax->reference != NULL)) {
7370 ctxt->sax->reference(ctxt->userData, name);
7371 }
7372 return;
7373 }
7374 if (!ctxt->wellFormed)
7375 return;
7376
7377 /* special case of predefined entities */
7378 if ((ent->name == NULL) ||
7379 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7380 val = ent->content;
7381 if (val == NULL) return;
7382 /*
7383 * inline the entity.
7384 */
7385 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7386 (!ctxt->disableSAX))
7387 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7388 return;
7389 }
7390
7391 /*
7392 * Some users try to parse entities on their own and used to set
7393 * the renamed "checked" member. Fix the flags to cover this
7394 * case.
7395 */
7396 if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7397 ent->flags |= XML_ENT_PARSED;
7398
7399 /*
7400 * The first reference to the entity trigger a parsing phase
7401 * where the ent->children is filled with the result from
7402 * the parsing.
7403 * Note: external parsed entities will not be loaded, it is not
7404 * required for a non-validating parser, unless the parsing option
7405 * of validating, or substituting entities were given. Doing so is
7406 * far more secure as the parser will only process data coming from
7407 * the document entity by default.
7408 *
7409 * FIXME: This doesn't work correctly since entities can be
7410 * expanded with different namespace declarations in scope.
7411 * For example:
7412 *
7413 * <!DOCTYPE doc [
7414 * <!ENTITY ent "<ns:elem/>">
7415 * ]>
7416 * <doc>
7417 * <decl1 xmlns:ns="urn:ns1">
7418 * &ent;
7419 * </decl1>
7420 * <decl2 xmlns:ns="urn:ns2">
7421 * &ent;
7422 * </decl2>
7423 * </doc>
7424 *
7425 * Proposed fix:
7426 *
7427 * - Ignore current namespace declarations when parsing the
7428 * entity. If a prefix can't be resolved, don't report an error
7429 * but mark it as unresolved.
7430 * - Try to resolve these prefixes when expanding the entity.
7431 * This will require a specialized version of xmlStaticCopyNode
7432 * which can also make use of the namespace hash table to avoid
7433 * quadratic behavior.
7434 *
7435 * Alternatively, we could simply reparse the entity on each
7436 * expansion like we already do with custom SAX callbacks.
7437 * External entity content should be cached in this case.
7438 */
7439 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7440 (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
7441 ((ctxt->replaceEntities) ||
7442 (ctxt->validate)))) {
7443 if ((ent->flags & XML_ENT_PARSED) == 0) {
7444 xmlCtxtParseEntity(ctxt, ent);
7445 } else if (ent->children == NULL) {
7446 /*
7447 * Probably running in SAX mode and the callbacks don't
7448 * build the entity content. Parse the entity again.
7449 *
7450 * This will also be triggered in normal tree builder mode
7451 * if an entity happens to be empty, causing unnecessary
7452 * reloads. It's hard to come up with a reliable check in
7453 * which mode we're running.
7454 */
7455 xmlCtxtParseEntity(ctxt, ent);
7456 }
7457 }
7458
7459 /*
7460 * We also check for amplification if entities aren't substituted.
7461 * They might be expanded later.
7462 */
7463 if (xmlParserEntityCheck(ctxt, ent->expandedSize))
7464 return;
7465
7466 if ((ctxt->sax == NULL) || (ctxt->disableSAX))
7467 return;
7468
7469 if (ctxt->replaceEntities == 0) {
7470 /*
7471 * Create a reference
7472 */
7473 if (ctxt->sax->reference != NULL)
7474 ctxt->sax->reference(ctxt->userData, ent->name);
7475 } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
7476 xmlNodePtr copy, cur;
7477
7478 /*
7479 * Seems we are generating the DOM content, copy the tree
7480 */
7481 cur = ent->children;
7482
7483 /*
7484 * Handle first text node with SAX to coalesce text efficiently
7485 */
7486 if ((cur->type == XML_TEXT_NODE) ||
7487 (cur->type == XML_CDATA_SECTION_NODE)) {
7488 int len = xmlStrlen(cur->content);
7489
7490 if ((cur->type == XML_TEXT_NODE) ||
7491 (ctxt->sax->cdataBlock == NULL)) {
7492 if (ctxt->sax->characters != NULL)
7493 ctxt->sax->characters(ctxt, cur->content, len);
7494 } else {
7495 if (ctxt->sax->cdataBlock != NULL)
7496 ctxt->sax->cdataBlock(ctxt, cur->content, len);
7497 }
7498
7499 cur = cur->next;
7500 }
7501
7502 while (cur != NULL) {
7503 xmlNodePtr last;
7504
7505 /*
7506 * Handle last text node with SAX to coalesce text efficiently
7507 */
7508 if ((cur->next == NULL) &&
7509 ((cur->type == XML_TEXT_NODE) ||
7510 (cur->type == XML_CDATA_SECTION_NODE))) {
7511 int len = xmlStrlen(cur->content);
7512
7513 if ((cur->type == XML_TEXT_NODE) ||
7514 (ctxt->sax->cdataBlock == NULL)) {
7515 if (ctxt->sax->characters != NULL)
7516 ctxt->sax->characters(ctxt, cur->content, len);
7517 } else {
7518 if (ctxt->sax->cdataBlock != NULL)
7519 ctxt->sax->cdataBlock(ctxt, cur->content, len);
7520 }
7521
7522 break;
7523 }
7524
7525 /*
7526 * Reset coalesce buffer stats only for non-text nodes.
7527 */
7528 ctxt->nodemem = 0;
7529 ctxt->nodelen = 0;
7530
7531 copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7532
7533 if (copy == NULL) {
7534 xmlErrMemory(ctxt);
7535 break;
7536 }
7537
7538 if (ctxt->parseMode == XML_PARSE_READER) {
7539 /* Needed for reader */
7540 copy->extra = cur->extra;
7541 /* Maybe needed for reader */
7542 copy->_private = cur->_private;
7543 }
7544
7545 copy->parent = ctxt->node;
7546 last = ctxt->node->last;
7547 if (last == NULL) {
7548 ctxt->node->children = copy;
7549 } else {
7550 last->next = copy;
7551 copy->prev = last;
7552 }
7553 ctxt->node->last = copy;
7554
7555 cur = cur->next;
7556 }
7557 }
7558 }
7559
7560 static void
xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt,const xmlChar * name)7561 xmlHandleUndeclaredEntity(xmlParserCtxtPtr ctxt, const xmlChar *name) {
7562 /*
7563 * [ WFC: Entity Declared ]
7564 * In a document without any DTD, a document with only an
7565 * internal DTD subset which contains no parameter entity
7566 * references, or a document with "standalone='yes'", the
7567 * Name given in the entity reference must match that in an
7568 * entity declaration, except that well-formed documents
7569 * need not declare any of the following entities: amp, lt,
7570 * gt, apos, quot.
7571 * The declaration of a parameter entity must precede any
7572 * reference to it.
7573 * Similarly, the declaration of a general entity must
7574 * precede any reference to it which appears in a default
7575 * value in an attribute-list declaration. Note that if
7576 * entities are declared in the external subset or in
7577 * external parameter entities, a non-validating processor
7578 * is not obligated to read and process their declarations;
7579 * for such documents, the rule that an entity must be
7580 * declared is a well-formedness constraint only if
7581 * standalone='yes'.
7582 */
7583 if ((ctxt->standalone == 1) ||
7584 ((ctxt->hasExternalSubset == 0) &&
7585 (ctxt->hasPErefs == 0))) {
7586 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7587 "Entity '%s' not defined\n", name);
7588 } else if (ctxt->validate) {
7589 /*
7590 * [ VC: Entity Declared ]
7591 * In a document with an external subset or external
7592 * parameter entities with "standalone='no'", ...
7593 * ... The declaration of a parameter entity must
7594 * precede any reference to it...
7595 */
7596 xmlValidityError(ctxt, XML_ERR_UNDECLARED_ENTITY,
7597 "Entity '%s' not defined\n", name, NULL);
7598 } else if ((ctxt->loadsubset & ~XML_SKIP_IDS) ||
7599 ((ctxt->replaceEntities) &&
7600 ((ctxt->options & XML_PARSE_NO_XXE) == 0))) {
7601 /*
7602 * Also raise a non-fatal error
7603 *
7604 * - if the external subset is loaded and all entity declarations
7605 * should be available, or
7606 * - entity substition was requested without restricting
7607 * external entity access.
7608 */
7609 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7610 "Entity '%s' not defined\n", name);
7611 } else {
7612 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7613 "Entity '%s' not defined\n", name, NULL);
7614 }
7615
7616 ctxt->valid = 0;
7617 }
7618
7619 static xmlEntityPtr
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt,const xmlChar * name,int inAttr)7620 xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) {
7621 xmlEntityPtr ent = NULL;
7622
7623 /*
7624 * Predefined entities override any extra definition
7625 */
7626 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7627 ent = xmlGetPredefinedEntity(name);
7628 if (ent != NULL)
7629 return(ent);
7630 }
7631
7632 /*
7633 * Ask first SAX for entity resolution, otherwise try the
7634 * entities which may have stored in the parser context.
7635 */
7636 if (ctxt->sax != NULL) {
7637 if (ctxt->sax->getEntity != NULL)
7638 ent = ctxt->sax->getEntity(ctxt->userData, name);
7639 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7640 (ctxt->options & XML_PARSE_OLDSAX))
7641 ent = xmlGetPredefinedEntity(name);
7642 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7643 (ctxt->userData==ctxt)) {
7644 ent = xmlSAX2GetEntity(ctxt, name);
7645 }
7646 }
7647
7648 if (ent == NULL) {
7649 xmlHandleUndeclaredEntity(ctxt, name);
7650 }
7651
7652 /*
7653 * [ WFC: Parsed Entity ]
7654 * An entity reference must not contain the name of an
7655 * unparsed entity
7656 */
7657 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7658 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7659 "Entity reference to unparsed entity %s\n", name);
7660 ent = NULL;
7661 }
7662
7663 /*
7664 * [ WFC: No External Entity References ]
7665 * Attribute values cannot contain direct or indirect
7666 * entity references to external entities.
7667 */
7668 else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7669 if (inAttr) {
7670 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7671 "Attribute references external entity '%s'\n", name);
7672 ent = NULL;
7673 }
7674 }
7675
7676 return(ent);
7677 }
7678
7679 /**
7680 * xmlParseEntityRefInternal:
7681 * @ctxt: an XML parser context
7682 * @inAttr: whether we are in an attribute value
7683 *
7684 * Parse an entity reference. Always consumes '&'.
7685 *
7686 * [68] EntityRef ::= '&' Name ';'
7687 *
7688 * Returns the name, or NULL in case of error.
7689 */
7690 static const xmlChar *
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt)7691 xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt) {
7692 const xmlChar *name;
7693
7694 GROW;
7695
7696 if (RAW != '&')
7697 return(NULL);
7698 NEXT;
7699 name = xmlParseName(ctxt);
7700 if (name == NULL) {
7701 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7702 "xmlParseEntityRef: no name\n");
7703 return(NULL);
7704 }
7705 if (RAW != ';') {
7706 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7707 return(NULL);
7708 }
7709 NEXT;
7710
7711 return(name);
7712 }
7713
7714 /**
7715 * xmlParseEntityRef:
7716 * @ctxt: an XML parser context
7717 *
7718 * DEPRECATED: Internal function, don't use.
7719 *
7720 * Returns the xmlEntityPtr if found, or NULL otherwise.
7721 */
7722 xmlEntityPtr
xmlParseEntityRef(xmlParserCtxtPtr ctxt)7723 xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7724 const xmlChar *name;
7725
7726 if (ctxt == NULL)
7727 return(NULL);
7728
7729 name = xmlParseEntityRefInternal(ctxt);
7730 if (name == NULL)
7731 return(NULL);
7732
7733 return(xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 0));
7734 }
7735
7736 /**
7737 * xmlParseStringEntityRef:
7738 * @ctxt: an XML parser context
7739 * @str: a pointer to an index in the string
7740 *
7741 * parse ENTITY references declarations, but this version parses it from
7742 * a string value.
7743 *
7744 * [68] EntityRef ::= '&' Name ';'
7745 *
7746 * [ WFC: Entity Declared ]
7747 * In a document without any DTD, a document with only an internal DTD
7748 * subset which contains no parameter entity references, or a document
7749 * with "standalone='yes'", the Name given in the entity reference
7750 * must match that in an entity declaration, except that well-formed
7751 * documents need not declare any of the following entities: amp, lt,
7752 * gt, apos, quot. The declaration of a parameter entity must precede
7753 * any reference to it. Similarly, the declaration of a general entity
7754 * must precede any reference to it which appears in a default value in an
7755 * attribute-list declaration. Note that if entities are declared in the
7756 * external subset or in external parameter entities, a non-validating
7757 * processor is not obligated to read and process their declarations;
7758 * for such documents, the rule that an entity must be declared is a
7759 * well-formedness constraint only if standalone='yes'.
7760 *
7761 * [ WFC: Parsed Entity ]
7762 * An entity reference must not contain the name of an unparsed entity
7763 *
7764 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7765 * is updated to the current location in the string.
7766 */
7767 static xmlChar *
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,const xmlChar ** str)7768 xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7769 xmlChar *name;
7770 const xmlChar *ptr;
7771 xmlChar cur;
7772
7773 if ((str == NULL) || (*str == NULL))
7774 return(NULL);
7775 ptr = *str;
7776 cur = *ptr;
7777 if (cur != '&')
7778 return(NULL);
7779
7780 ptr++;
7781 name = xmlParseStringName(ctxt, &ptr);
7782 if (name == NULL) {
7783 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7784 "xmlParseStringEntityRef: no name\n");
7785 *str = ptr;
7786 return(NULL);
7787 }
7788 if (*ptr != ';') {
7789 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7790 xmlFree(name);
7791 *str = ptr;
7792 return(NULL);
7793 }
7794 ptr++;
7795
7796 *str = ptr;
7797 return(name);
7798 }
7799
7800 /**
7801 * xmlParsePEReference:
7802 * @ctxt: an XML parser context
7803 *
7804 * DEPRECATED: Internal function, don't use.
7805 *
7806 * Parse a parameter entity reference. Always consumes '%'.
7807 *
7808 * The entity content is handled directly by pushing it's content as
7809 * a new input stream.
7810 *
7811 * [69] PEReference ::= '%' Name ';'
7812 *
7813 * [ WFC: No Recursion ]
7814 * A parsed entity must not contain a recursive
7815 * reference to itself, either directly or indirectly.
7816 *
7817 * [ WFC: Entity Declared ]
7818 * In a document without any DTD, a document with only an internal DTD
7819 * subset which contains no parameter entity references, or a document
7820 * with "standalone='yes'", ... ... The declaration of a parameter
7821 * entity must precede any reference to it...
7822 *
7823 * [ VC: Entity Declared ]
7824 * In a document with an external subset or external parameter entities
7825 * with "standalone='no'", ... ... The declaration of a parameter entity
7826 * must precede any reference to it...
7827 *
7828 * [ WFC: In DTD ]
7829 * Parameter-entity references may only appear in the DTD.
7830 * NOTE: misleading but this is handled.
7831 */
7832 void
xmlParsePEReference(xmlParserCtxtPtr ctxt)7833 xmlParsePEReference(xmlParserCtxtPtr ctxt)
7834 {
7835 const xmlChar *name;
7836 xmlEntityPtr entity = NULL;
7837 xmlParserInputPtr input;
7838
7839 if (RAW != '%')
7840 return;
7841 NEXT;
7842 name = xmlParseName(ctxt);
7843 if (name == NULL) {
7844 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7845 return;
7846 }
7847 if (RAW != ';') {
7848 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7849 return;
7850 }
7851
7852 NEXT;
7853
7854 /* Must be set before xmlHandleUndeclaredEntity */
7855 ctxt->hasPErefs = 1;
7856
7857 /*
7858 * Request the entity from SAX
7859 */
7860 if ((ctxt->sax != NULL) &&
7861 (ctxt->sax->getParameterEntity != NULL))
7862 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7863
7864 if (entity == NULL) {
7865 xmlHandleUndeclaredEntity(ctxt, name);
7866 } else {
7867 /*
7868 * Internal checking in case the entity quest barfed
7869 */
7870 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7871 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7872 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7873 "Internal: %%%s; is not a parameter entity\n",
7874 name, NULL);
7875 } else {
7876 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7877 ((ctxt->options & XML_PARSE_NO_XXE) ||
7878 ((ctxt->loadsubset == 0) &&
7879 (ctxt->replaceEntities == 0) &&
7880 (ctxt->validate == 0))))
7881 return;
7882
7883 if (entity->flags & XML_ENT_EXPANDING) {
7884 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7885 xmlHaltParser(ctxt);
7886 return;
7887 }
7888
7889 input = xmlNewEntityInputStream(ctxt, entity);
7890 if (xmlPushInput(ctxt, input) < 0) {
7891 xmlFreeInputStream(input);
7892 return;
7893 }
7894
7895 entity->flags |= XML_ENT_EXPANDING;
7896
7897 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7898 xmlDetectEncoding(ctxt);
7899
7900 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7901 (IS_BLANK_CH(NXT(5)))) {
7902 xmlParseTextDecl(ctxt);
7903 }
7904 }
7905 }
7906 }
7907 }
7908
7909 /**
7910 * xmlLoadEntityContent:
7911 * @ctxt: an XML parser context
7912 * @entity: an unloaded system entity
7913 *
7914 * Load the content of an entity.
7915 *
7916 * Returns 0 in case of success and -1 in case of failure
7917 */
7918 static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt,xmlEntityPtr entity)7919 xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7920 xmlParserInputPtr oldinput, input = NULL;
7921 xmlParserInputPtr *oldinputTab;
7922 const xmlChar *oldencoding;
7923 xmlChar *content = NULL;
7924 xmlResourceType rtype;
7925 size_t length, i;
7926 int oldinputNr, oldinputMax;
7927 int ret = -1;
7928 int res;
7929
7930 if ((ctxt == NULL) || (entity == NULL) ||
7931 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7932 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7933 (entity->content != NULL)) {
7934 xmlFatalErr(ctxt, XML_ERR_ARGUMENT,
7935 "xmlLoadEntityContent parameter error");
7936 return(-1);
7937 }
7938
7939 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)
7940 rtype = XML_RESOURCE_PARAMETER_ENTITY;
7941 else
7942 rtype = XML_RESOURCE_GENERAL_ENTITY;
7943
7944 input = xmlLoadResource(ctxt, (char *) entity->URI,
7945 (char *) entity->ExternalID, rtype);
7946 if (input == NULL)
7947 return(-1);
7948
7949 oldinput = ctxt->input;
7950 oldinputNr = ctxt->inputNr;
7951 oldinputMax = ctxt->inputMax;
7952 oldinputTab = ctxt->inputTab;
7953 oldencoding = ctxt->encoding;
7954
7955 ctxt->input = NULL;
7956 ctxt->inputNr = 0;
7957 ctxt->inputMax = 1;
7958 ctxt->encoding = NULL;
7959 ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
7960 if (ctxt->inputTab == NULL) {
7961 xmlErrMemory(ctxt);
7962 xmlFreeInputStream(input);
7963 goto error;
7964 }
7965
7966 xmlBufResetInput(input->buf->buffer, input);
7967
7968 if (inputPush(ctxt, input) < 0) {
7969 xmlFreeInputStream(input);
7970 goto error;
7971 }
7972
7973 xmlDetectEncoding(ctxt);
7974
7975 /*
7976 * Parse a possible text declaration first
7977 */
7978 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7979 xmlParseTextDecl(ctxt);
7980 /*
7981 * An XML-1.0 document can't reference an entity not XML-1.0
7982 */
7983 if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
7984 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
7985 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
7986 "Version mismatch between document and entity\n");
7987 }
7988 }
7989
7990 length = input->cur - input->base;
7991 xmlBufShrink(input->buf->buffer, length);
7992 xmlSaturatedAdd(&ctxt->sizeentities, length);
7993
7994 while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
7995 ;
7996
7997 xmlBufResetInput(input->buf->buffer, input);
7998
7999 if (res < 0) {
8000 xmlCtxtErrIO(ctxt, input->buf->error, NULL);
8001 goto error;
8002 }
8003
8004 length = xmlBufUse(input->buf->buffer);
8005 if (length > INT_MAX) {
8006 xmlErrMemory(ctxt);
8007 goto error;
8008 }
8009
8010 content = xmlStrndup(xmlBufContent(input->buf->buffer), length);
8011 if (content == NULL) {
8012 xmlErrMemory(ctxt);
8013 goto error;
8014 }
8015
8016 for (i = 0; i < length; ) {
8017 int clen = length - i;
8018 int c = xmlGetUTF8Char(content + i, &clen);
8019
8020 if ((c < 0) || (!IS_CHAR(c))) {
8021 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8022 "xmlLoadEntityContent: invalid char value %d\n",
8023 content[i]);
8024 goto error;
8025 }
8026 i += clen;
8027 }
8028
8029 xmlSaturatedAdd(&ctxt->sizeentities, length);
8030 entity->content = content;
8031 entity->length = length;
8032 content = NULL;
8033 ret = 0;
8034
8035 error:
8036 while (ctxt->inputNr > 0)
8037 xmlFreeInputStream(inputPop(ctxt));
8038 xmlFree(ctxt->inputTab);
8039 xmlFree((xmlChar *) ctxt->encoding);
8040
8041 ctxt->input = oldinput;
8042 ctxt->inputNr = oldinputNr;
8043 ctxt->inputMax = oldinputMax;
8044 ctxt->inputTab = oldinputTab;
8045 ctxt->encoding = oldencoding;
8046
8047 xmlFree(content);
8048
8049 return(ret);
8050 }
8051
8052 /**
8053 * xmlParseStringPEReference:
8054 * @ctxt: an XML parser context
8055 * @str: a pointer to an index in the string
8056 *
8057 * parse PEReference declarations
8058 *
8059 * [69] PEReference ::= '%' Name ';'
8060 *
8061 * [ WFC: No Recursion ]
8062 * A parsed entity must not contain a recursive
8063 * reference to itself, either directly or indirectly.
8064 *
8065 * [ WFC: Entity Declared ]
8066 * In a document without any DTD, a document with only an internal DTD
8067 * subset which contains no parameter entity references, or a document
8068 * with "standalone='yes'", ... ... The declaration of a parameter
8069 * entity must precede any reference to it...
8070 *
8071 * [ VC: Entity Declared ]
8072 * In a document with an external subset or external parameter entities
8073 * with "standalone='no'", ... ... The declaration of a parameter entity
8074 * must precede any reference to it...
8075 *
8076 * [ WFC: In DTD ]
8077 * Parameter-entity references may only appear in the DTD.
8078 * NOTE: misleading but this is handled.
8079 *
8080 * Returns the string of the entity content.
8081 * str is updated to the current value of the index
8082 */
8083 static xmlEntityPtr
xmlParseStringPEReference(xmlParserCtxtPtr ctxt,const xmlChar ** str)8084 xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8085 const xmlChar *ptr;
8086 xmlChar cur;
8087 xmlChar *name;
8088 xmlEntityPtr entity = NULL;
8089
8090 if ((str == NULL) || (*str == NULL)) return(NULL);
8091 ptr = *str;
8092 cur = *ptr;
8093 if (cur != '%')
8094 return(NULL);
8095 ptr++;
8096 name = xmlParseStringName(ctxt, &ptr);
8097 if (name == NULL) {
8098 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8099 "xmlParseStringPEReference: no name\n");
8100 *str = ptr;
8101 return(NULL);
8102 }
8103 cur = *ptr;
8104 if (cur != ';') {
8105 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8106 xmlFree(name);
8107 *str = ptr;
8108 return(NULL);
8109 }
8110 ptr++;
8111
8112 /* Must be set before xmlHandleUndeclaredEntity */
8113 ctxt->hasPErefs = 1;
8114
8115 /*
8116 * Request the entity from SAX
8117 */
8118 if ((ctxt->sax != NULL) &&
8119 (ctxt->sax->getParameterEntity != NULL))
8120 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8121
8122 if (entity == NULL) {
8123 xmlHandleUndeclaredEntity(ctxt, name);
8124 } else {
8125 /*
8126 * Internal checking in case the entity quest barfed
8127 */
8128 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8129 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8130 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8131 "%%%s; is not a parameter entity\n",
8132 name, NULL);
8133 }
8134 }
8135
8136 xmlFree(name);
8137 *str = ptr;
8138 return(entity);
8139 }
8140
8141 /**
8142 * xmlParseDocTypeDecl:
8143 * @ctxt: an XML parser context
8144 *
8145 * DEPRECATED: Internal function, don't use.
8146 *
8147 * parse a DOCTYPE declaration
8148 *
8149 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8150 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8151 *
8152 * [ VC: Root Element Type ]
8153 * The Name in the document type declaration must match the element
8154 * type of the root element.
8155 */
8156
8157 void
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt)8158 xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8159 const xmlChar *name = NULL;
8160 xmlChar *ExternalID = NULL;
8161 xmlChar *URI = NULL;
8162
8163 /*
8164 * We know that '<!DOCTYPE' has been detected.
8165 */
8166 SKIP(9);
8167
8168 SKIP_BLANKS;
8169
8170 /*
8171 * Parse the DOCTYPE name.
8172 */
8173 name = xmlParseName(ctxt);
8174 if (name == NULL) {
8175 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8176 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8177 }
8178 ctxt->intSubName = name;
8179
8180 SKIP_BLANKS;
8181
8182 /*
8183 * Check for SystemID and ExternalID
8184 */
8185 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8186
8187 if ((URI != NULL) || (ExternalID != NULL)) {
8188 ctxt->hasExternalSubset = 1;
8189 }
8190 ctxt->extSubURI = URI;
8191 ctxt->extSubSystem = ExternalID;
8192
8193 SKIP_BLANKS;
8194
8195 /*
8196 * Create and update the internal subset.
8197 */
8198 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8199 (!ctxt->disableSAX))
8200 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8201
8202 /*
8203 * Is there any internal subset declarations ?
8204 * they are handled separately in xmlParseInternalSubset()
8205 */
8206 if (RAW == '[')
8207 return;
8208
8209 /*
8210 * We should be at the end of the DOCTYPE declaration.
8211 */
8212 if (RAW != '>') {
8213 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8214 }
8215 NEXT;
8216 }
8217
8218 /**
8219 * xmlParseInternalSubset:
8220 * @ctxt: an XML parser context
8221 *
8222 * parse the internal subset declaration
8223 *
8224 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8225 */
8226
8227 static void
xmlParseInternalSubset(xmlParserCtxtPtr ctxt)8228 xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8229 /*
8230 * Is there any DTD definition ?
8231 */
8232 if (RAW == '[') {
8233 int oldInputNr = ctxt->inputNr;
8234
8235 NEXT;
8236 /*
8237 * Parse the succession of Markup declarations and
8238 * PEReferences.
8239 * Subsequence (markupdecl | PEReference | S)*
8240 */
8241 SKIP_BLANKS;
8242 while (((RAW != ']') || (ctxt->inputNr > oldInputNr)) &&
8243 (PARSER_STOPPED(ctxt) == 0)) {
8244
8245 /*
8246 * Conditional sections are allowed from external entities included
8247 * by PE References in the internal subset.
8248 */
8249 if ((PARSER_EXTERNAL(ctxt)) &&
8250 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8251 xmlParseConditionalSections(ctxt);
8252 } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8253 xmlParseMarkupDecl(ctxt);
8254 } else if (RAW == '%') {
8255 xmlParsePEReference(ctxt);
8256 } else {
8257 xmlFatalErr(ctxt, XML_ERR_INT_SUBSET_NOT_FINISHED, NULL);
8258 break;
8259 }
8260 SKIP_BLANKS_PE;
8261 SHRINK;
8262 GROW;
8263 }
8264
8265 while (ctxt->inputNr > oldInputNr)
8266 xmlPopPE(ctxt);
8267
8268 if (RAW == ']') {
8269 NEXT;
8270 SKIP_BLANKS;
8271 }
8272 }
8273
8274 /*
8275 * We should be at the end of the DOCTYPE declaration.
8276 */
8277 if ((ctxt->wellFormed) && (RAW != '>')) {
8278 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8279 return;
8280 }
8281 NEXT;
8282 }
8283
8284 #ifdef LIBXML_SAX1_ENABLED
8285 /**
8286 * xmlParseAttribute:
8287 * @ctxt: an XML parser context
8288 * @value: a xmlChar ** used to store the value of the attribute
8289 *
8290 * DEPRECATED: Internal function, don't use.
8291 *
8292 * parse an attribute
8293 *
8294 * [41] Attribute ::= Name Eq AttValue
8295 *
8296 * [ WFC: No External Entity References ]
8297 * Attribute values cannot contain direct or indirect entity references
8298 * to external entities.
8299 *
8300 * [ WFC: No < in Attribute Values ]
8301 * The replacement text of any entity referred to directly or indirectly in
8302 * an attribute value (other than "<") must not contain a <.
8303 *
8304 * [ VC: Attribute Value Type ]
8305 * The attribute must have been declared; the value must be of the type
8306 * declared for it.
8307 *
8308 * [25] Eq ::= S? '=' S?
8309 *
8310 * With namespace:
8311 *
8312 * [NS 11] Attribute ::= QName Eq AttValue
8313 *
8314 * Also the case QName == xmlns:??? is handled independently as a namespace
8315 * definition.
8316 *
8317 * Returns the attribute name, and the value in *value.
8318 */
8319
8320 const xmlChar *
xmlParseAttribute(xmlParserCtxtPtr ctxt,xmlChar ** value)8321 xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8322 const xmlChar *name;
8323 xmlChar *val;
8324
8325 *value = NULL;
8326 GROW;
8327 name = xmlParseName(ctxt);
8328 if (name == NULL) {
8329 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8330 "error parsing attribute name\n");
8331 return(NULL);
8332 }
8333
8334 /*
8335 * read the value
8336 */
8337 SKIP_BLANKS;
8338 if (RAW == '=') {
8339 NEXT;
8340 SKIP_BLANKS;
8341 val = xmlParseAttValue(ctxt);
8342 } else {
8343 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8344 "Specification mandates value for attribute %s\n", name);
8345 return(name);
8346 }
8347
8348 /*
8349 * Check that xml:lang conforms to the specification
8350 * No more registered as an error, just generate a warning now
8351 * since this was deprecated in XML second edition
8352 */
8353 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8354 if (!xmlCheckLanguageID(val)) {
8355 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8356 "Malformed value for xml:lang : %s\n",
8357 val, NULL);
8358 }
8359 }
8360
8361 /*
8362 * Check that xml:space conforms to the specification
8363 */
8364 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8365 if (xmlStrEqual(val, BAD_CAST "default"))
8366 *(ctxt->space) = 0;
8367 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8368 *(ctxt->space) = 1;
8369 else {
8370 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8371 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8372 val, NULL);
8373 }
8374 }
8375
8376 *value = val;
8377 return(name);
8378 }
8379
8380 /**
8381 * xmlParseStartTag:
8382 * @ctxt: an XML parser context
8383 *
8384 * DEPRECATED: Internal function, don't use.
8385 *
8386 * Parse a start tag. Always consumes '<'.
8387 *
8388 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8389 *
8390 * [ WFC: Unique Att Spec ]
8391 * No attribute name may appear more than once in the same start-tag or
8392 * empty-element tag.
8393 *
8394 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8395 *
8396 * [ WFC: Unique Att Spec ]
8397 * No attribute name may appear more than once in the same start-tag or
8398 * empty-element tag.
8399 *
8400 * With namespace:
8401 *
8402 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8403 *
8404 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8405 *
8406 * Returns the element name parsed
8407 */
8408
8409 const xmlChar *
xmlParseStartTag(xmlParserCtxtPtr ctxt)8410 xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8411 const xmlChar *name;
8412 const xmlChar *attname;
8413 xmlChar *attvalue;
8414 const xmlChar **atts = ctxt->atts;
8415 int nbatts = 0;
8416 int maxatts = ctxt->maxatts;
8417 int i;
8418
8419 if (RAW != '<') return(NULL);
8420 NEXT1;
8421
8422 name = xmlParseName(ctxt);
8423 if (name == NULL) {
8424 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8425 "xmlParseStartTag: invalid element name\n");
8426 return(NULL);
8427 }
8428
8429 /*
8430 * Now parse the attributes, it ends up with the ending
8431 *
8432 * (S Attribute)* S?
8433 */
8434 SKIP_BLANKS;
8435 GROW;
8436
8437 while (((RAW != '>') &&
8438 ((RAW != '/') || (NXT(1) != '>')) &&
8439 (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
8440 attname = xmlParseAttribute(ctxt, &attvalue);
8441 if (attname == NULL)
8442 break;
8443 if (attvalue != NULL) {
8444 /*
8445 * [ WFC: Unique Att Spec ]
8446 * No attribute name may appear more than once in the same
8447 * start-tag or empty-element tag.
8448 */
8449 for (i = 0; i < nbatts;i += 2) {
8450 if (xmlStrEqual(atts[i], attname)) {
8451 xmlErrAttributeDup(ctxt, NULL, attname);
8452 xmlFree(attvalue);
8453 goto failed;
8454 }
8455 }
8456 /*
8457 * Add the pair to atts
8458 */
8459 if (atts == NULL) {
8460 maxatts = 22; /* allow for 10 attrs by default */
8461 atts = (const xmlChar **)
8462 xmlMalloc(maxatts * sizeof(xmlChar *));
8463 if (atts == NULL) {
8464 xmlErrMemory(ctxt);
8465 if (attvalue != NULL)
8466 xmlFree(attvalue);
8467 goto failed;
8468 }
8469 ctxt->atts = atts;
8470 ctxt->maxatts = maxatts;
8471 } else if (nbatts + 4 > maxatts) {
8472 const xmlChar **n;
8473
8474 maxatts *= 2;
8475 n = (const xmlChar **) xmlRealloc((void *) atts,
8476 maxatts * sizeof(const xmlChar *));
8477 if (n == NULL) {
8478 xmlErrMemory(ctxt);
8479 if (attvalue != NULL)
8480 xmlFree(attvalue);
8481 goto failed;
8482 }
8483 atts = n;
8484 ctxt->atts = atts;
8485 ctxt->maxatts = maxatts;
8486 }
8487 atts[nbatts++] = attname;
8488 atts[nbatts++] = attvalue;
8489 atts[nbatts] = NULL;
8490 atts[nbatts + 1] = NULL;
8491 } else {
8492 if (attvalue != NULL)
8493 xmlFree(attvalue);
8494 }
8495
8496 failed:
8497
8498 GROW
8499 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8500 break;
8501 if (SKIP_BLANKS == 0) {
8502 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8503 "attributes construct error\n");
8504 }
8505 SHRINK;
8506 GROW;
8507 }
8508
8509 /*
8510 * SAX: Start of Element !
8511 */
8512 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8513 (!ctxt->disableSAX)) {
8514 if (nbatts > 0)
8515 ctxt->sax->startElement(ctxt->userData, name, atts);
8516 else
8517 ctxt->sax->startElement(ctxt->userData, name, NULL);
8518 }
8519
8520 if (atts != NULL) {
8521 /* Free only the content strings */
8522 for (i = 1;i < nbatts;i+=2)
8523 if (atts[i] != NULL)
8524 xmlFree((xmlChar *) atts[i]);
8525 }
8526 return(name);
8527 }
8528
8529 /**
8530 * xmlParseEndTag1:
8531 * @ctxt: an XML parser context
8532 * @line: line of the start tag
8533 * @nsNr: number of namespaces on the start tag
8534 *
8535 * Parse an end tag. Always consumes '</'.
8536 *
8537 * [42] ETag ::= '</' Name S? '>'
8538 *
8539 * With namespace
8540 *
8541 * [NS 9] ETag ::= '</' QName S? '>'
8542 */
8543
8544 static void
xmlParseEndTag1(xmlParserCtxtPtr ctxt,int line)8545 xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8546 const xmlChar *name;
8547
8548 GROW;
8549 if ((RAW != '<') || (NXT(1) != '/')) {
8550 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8551 "xmlParseEndTag: '</' not found\n");
8552 return;
8553 }
8554 SKIP(2);
8555
8556 name = xmlParseNameAndCompare(ctxt,ctxt->name);
8557
8558 /*
8559 * We should definitely be at the ending "S? '>'" part
8560 */
8561 GROW;
8562 SKIP_BLANKS;
8563 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8564 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8565 } else
8566 NEXT1;
8567
8568 /*
8569 * [ WFC: Element Type Match ]
8570 * The Name in an element's end-tag must match the element type in the
8571 * start-tag.
8572 *
8573 */
8574 if (name != (xmlChar*)1) {
8575 if (name == NULL) name = BAD_CAST "unparsable";
8576 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8577 "Opening and ending tag mismatch: %s line %d and %s\n",
8578 ctxt->name, line, name);
8579 }
8580
8581 /*
8582 * SAX: End of Tag
8583 */
8584 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8585 (!ctxt->disableSAX))
8586 ctxt->sax->endElement(ctxt->userData, ctxt->name);
8587
8588 namePop(ctxt);
8589 spacePop(ctxt);
8590 }
8591
8592 /**
8593 * xmlParseEndTag:
8594 * @ctxt: an XML parser context
8595 *
8596 * DEPRECATED: Internal function, don't use.
8597 *
8598 * parse an end of tag
8599 *
8600 * [42] ETag ::= '</' Name S? '>'
8601 *
8602 * With namespace
8603 *
8604 * [NS 9] ETag ::= '</' QName S? '>'
8605 */
8606
8607 void
xmlParseEndTag(xmlParserCtxtPtr ctxt)8608 xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8609 xmlParseEndTag1(ctxt, 0);
8610 }
8611 #endif /* LIBXML_SAX1_ENABLED */
8612
8613 /************************************************************************
8614 * *
8615 * SAX 2 specific operations *
8616 * *
8617 ************************************************************************/
8618
8619 /**
8620 * xmlParseQNameHashed:
8621 * @ctxt: an XML parser context
8622 * @prefix: pointer to store the prefix part
8623 *
8624 * parse an XML Namespace QName
8625 *
8626 * [6] QName ::= (Prefix ':')? LocalPart
8627 * [7] Prefix ::= NCName
8628 * [8] LocalPart ::= NCName
8629 *
8630 * Returns the Name parsed or NULL
8631 */
8632
8633 static xmlHashedString
xmlParseQNameHashed(xmlParserCtxtPtr ctxt,xmlHashedString * prefix)8634 xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8635 xmlHashedString l, p;
8636 int start, isNCName = 0;
8637
8638 l.name = NULL;
8639 p.name = NULL;
8640
8641 GROW;
8642 start = CUR_PTR - BASE_PTR;
8643
8644 l = xmlParseNCName(ctxt);
8645 if (l.name != NULL) {
8646 isNCName = 1;
8647 if (CUR == ':') {
8648 NEXT;
8649 p = l;
8650 l = xmlParseNCName(ctxt);
8651 }
8652 }
8653 if ((l.name == NULL) || (CUR == ':')) {
8654 xmlChar *tmp;
8655
8656 l.name = NULL;
8657 p.name = NULL;
8658 if ((isNCName == 0) && (CUR != ':'))
8659 return(l);
8660 tmp = xmlParseNmtoken(ctxt);
8661 if (tmp != NULL)
8662 xmlFree(tmp);
8663 l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8664 CUR_PTR - (BASE_PTR + start));
8665 if (l.name == NULL) {
8666 xmlErrMemory(ctxt);
8667 return(l);
8668 }
8669 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8670 "Failed to parse QName '%s'\n", l.name, NULL, NULL);
8671 }
8672
8673 *prefix = p;
8674 return(l);
8675 }
8676
8677 /**
8678 * xmlParseQName:
8679 * @ctxt: an XML parser context
8680 * @prefix: pointer to store the prefix part
8681 *
8682 * parse an XML Namespace QName
8683 *
8684 * [6] QName ::= (Prefix ':')? LocalPart
8685 * [7] Prefix ::= NCName
8686 * [8] LocalPart ::= NCName
8687 *
8688 * Returns the Name parsed or NULL
8689 */
8690
8691 static const xmlChar *
xmlParseQName(xmlParserCtxtPtr ctxt,const xmlChar ** prefix)8692 xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8693 xmlHashedString n, p;
8694
8695 n = xmlParseQNameHashed(ctxt, &p);
8696 if (n.name == NULL)
8697 return(NULL);
8698 *prefix = p.name;
8699 return(n.name);
8700 }
8701
8702 /**
8703 * xmlParseQNameAndCompare:
8704 * @ctxt: an XML parser context
8705 * @name: the localname
8706 * @prefix: the prefix, if any.
8707 *
8708 * parse an XML name and compares for match
8709 * (specialized for endtag parsing)
8710 *
8711 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8712 * and the name for mismatch
8713 */
8714
8715 static const xmlChar *
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt,xmlChar const * name,xmlChar const * prefix)8716 xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8717 xmlChar const *prefix) {
8718 const xmlChar *cmp;
8719 const xmlChar *in;
8720 const xmlChar *ret;
8721 const xmlChar *prefix2;
8722
8723 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8724
8725 GROW;
8726 in = ctxt->input->cur;
8727
8728 cmp = prefix;
8729 while (*in != 0 && *in == *cmp) {
8730 ++in;
8731 ++cmp;
8732 }
8733 if ((*cmp == 0) && (*in == ':')) {
8734 in++;
8735 cmp = name;
8736 while (*in != 0 && *in == *cmp) {
8737 ++in;
8738 ++cmp;
8739 }
8740 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8741 /* success */
8742 ctxt->input->col += in - ctxt->input->cur;
8743 ctxt->input->cur = in;
8744 return((const xmlChar*) 1);
8745 }
8746 }
8747 /*
8748 * all strings coms from the dictionary, equality can be done directly
8749 */
8750 ret = xmlParseQName (ctxt, &prefix2);
8751 if (ret == NULL)
8752 return(NULL);
8753 if ((ret == name) && (prefix == prefix2))
8754 return((const xmlChar*) 1);
8755 return ret;
8756 }
8757
8758 /**
8759 * xmlParseAttribute2:
8760 * @ctxt: an XML parser context
8761 * @pref: the element prefix
8762 * @elem: the element name
8763 * @prefix: a xmlChar ** used to store the value of the attribute prefix
8764 * @value: a xmlChar ** used to store the value of the attribute
8765 * @len: an int * to save the length of the attribute
8766 * @alloc: an int * to indicate if the attribute was allocated
8767 *
8768 * parse an attribute in the new SAX2 framework.
8769 *
8770 * Returns the attribute name, and the value in *value, .
8771 */
8772
8773 static xmlHashedString
xmlParseAttribute2(xmlParserCtxtPtr ctxt,const xmlChar * pref,const xmlChar * elem,xmlHashedString * hprefix,xmlChar ** value,int * len,int * alloc)8774 xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8775 const xmlChar * pref, const xmlChar * elem,
8776 xmlHashedString * hprefix, xmlChar ** value,
8777 int *len, int *alloc)
8778 {
8779 xmlHashedString hname;
8780 const xmlChar *prefix, *name;
8781 xmlChar *val = NULL, *internal_val = NULL;
8782 int normalize = 0;
8783 int isNamespace;
8784
8785 *value = NULL;
8786 GROW;
8787 hname = xmlParseQNameHashed(ctxt, hprefix);
8788 if (hname.name == NULL) {
8789 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8790 "error parsing attribute name\n");
8791 return(hname);
8792 }
8793 name = hname.name;
8794 if (hprefix->name != NULL)
8795 prefix = hprefix->name;
8796 else
8797 prefix = NULL;
8798
8799 /*
8800 * get the type if needed
8801 */
8802 if (ctxt->attsSpecial != NULL) {
8803 int type;
8804
8805 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8806 pref, elem,
8807 prefix, name);
8808 if (type != 0)
8809 normalize = 1;
8810 }
8811
8812 /*
8813 * read the value
8814 */
8815 SKIP_BLANKS;
8816 if (RAW == '=') {
8817 NEXT;
8818 SKIP_BLANKS;
8819 isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) ||
8820 (prefix == ctxt->str_xmlns));
8821 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize,
8822 isNamespace);
8823 if (val == NULL)
8824 goto error;
8825 } else {
8826 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8827 "Specification mandates value for attribute %s\n",
8828 name);
8829 goto error;
8830 }
8831
8832 if (prefix == ctxt->str_xml) {
8833 /*
8834 * Check that xml:lang conforms to the specification
8835 * No more registered as an error, just generate a warning now
8836 * since this was deprecated in XML second edition
8837 */
8838 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8839 internal_val = xmlStrndup(val, *len);
8840 if (internal_val == NULL)
8841 goto mem_error;
8842 if (!xmlCheckLanguageID(internal_val)) {
8843 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8844 "Malformed value for xml:lang : %s\n",
8845 internal_val, NULL);
8846 }
8847 }
8848
8849 /*
8850 * Check that xml:space conforms to the specification
8851 */
8852 if (xmlStrEqual(name, BAD_CAST "space")) {
8853 internal_val = xmlStrndup(val, *len);
8854 if (internal_val == NULL)
8855 goto mem_error;
8856 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8857 *(ctxt->space) = 0;
8858 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8859 *(ctxt->space) = 1;
8860 else {
8861 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8862 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8863 internal_val, NULL);
8864 }
8865 }
8866 if (internal_val) {
8867 xmlFree(internal_val);
8868 }
8869 }
8870
8871 *value = val;
8872 return (hname);
8873
8874 mem_error:
8875 xmlErrMemory(ctxt);
8876 error:
8877 if ((val != NULL) && (*alloc != 0))
8878 xmlFree(val);
8879 return(hname);
8880 }
8881
8882 /**
8883 * xmlAttrHashInsert:
8884 * @ctxt: parser context
8885 * @size: size of the hash table
8886 * @name: attribute name
8887 * @uri: namespace uri
8888 * @hashValue: combined hash value of name and uri
8889 * @aindex: attribute index (this is a multiple of 5)
8890 *
8891 * Inserts a new attribute into the hash table.
8892 *
8893 * Returns INT_MAX if no existing attribute was found, the attribute
8894 * index if an attribute was found, -1 if a memory allocation failed.
8895 */
8896 static int
xmlAttrHashInsert(xmlParserCtxtPtr ctxt,unsigned size,const xmlChar * name,const xmlChar * uri,unsigned hashValue,int aindex)8897 xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
8898 const xmlChar *uri, unsigned hashValue, int aindex) {
8899 xmlAttrHashBucket *table = ctxt->attrHash;
8900 xmlAttrHashBucket *bucket;
8901 unsigned hindex;
8902
8903 hindex = hashValue & (size - 1);
8904 bucket = &table[hindex];
8905
8906 while (bucket->index >= 0) {
8907 const xmlChar **atts = &ctxt->atts[bucket->index];
8908
8909 if (name == atts[0]) {
8910 int nsIndex = (int) (ptrdiff_t) atts[2];
8911
8912 if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
8913 (nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
8914 (uri == ctxt->nsTab[nsIndex * 2 + 1]))
8915 return(bucket->index);
8916 }
8917
8918 hindex++;
8919 bucket++;
8920 if (hindex >= size) {
8921 hindex = 0;
8922 bucket = table;
8923 }
8924 }
8925
8926 bucket->index = aindex;
8927
8928 return(INT_MAX);
8929 }
8930
8931 /**
8932 * xmlParseStartTag2:
8933 * @ctxt: an XML parser context
8934 *
8935 * Parse a start tag. Always consumes '<'.
8936 *
8937 * This routine is called when running SAX2 parsing
8938 *
8939 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8940 *
8941 * [ WFC: Unique Att Spec ]
8942 * No attribute name may appear more than once in the same start-tag or
8943 * empty-element tag.
8944 *
8945 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8946 *
8947 * [ WFC: Unique Att Spec ]
8948 * No attribute name may appear more than once in the same start-tag or
8949 * empty-element tag.
8950 *
8951 * With namespace:
8952 *
8953 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8954 *
8955 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8956 *
8957 * Returns the element name parsed
8958 */
8959
8960 static const xmlChar *
xmlParseStartTag2(xmlParserCtxtPtr ctxt,const xmlChar ** pref,const xmlChar ** URI,int * nbNsPtr)8961 xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
8962 const xmlChar **URI, int *nbNsPtr) {
8963 xmlHashedString hlocalname;
8964 xmlHashedString hprefix;
8965 xmlHashedString hattname;
8966 xmlHashedString haprefix;
8967 const xmlChar *localname;
8968 const xmlChar *prefix;
8969 const xmlChar *attname;
8970 const xmlChar *aprefix;
8971 const xmlChar *uri;
8972 xmlChar *attvalue = NULL;
8973 const xmlChar **atts = ctxt->atts;
8974 unsigned attrHashSize = 0;
8975 int maxatts = ctxt->maxatts;
8976 int nratts, nbatts, nbdef;
8977 int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
8978 int alloc = 0;
8979
8980 if (RAW != '<') return(NULL);
8981 NEXT1;
8982
8983 nbatts = 0;
8984 nratts = 0;
8985 nbdef = 0;
8986 nbNs = 0;
8987 nbTotalDef = 0;
8988 attval = 0;
8989
8990 if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
8991 xmlErrMemory(ctxt);
8992 return(NULL);
8993 }
8994
8995 hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
8996 if (hlocalname.name == NULL) {
8997 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8998 "StartTag: invalid element name\n");
8999 return(NULL);
9000 }
9001 localname = hlocalname.name;
9002 prefix = hprefix.name;
9003
9004 /*
9005 * Now parse the attributes, it ends up with the ending
9006 *
9007 * (S Attribute)* S?
9008 */
9009 SKIP_BLANKS;
9010 GROW;
9011
9012 /*
9013 * The ctxt->atts array will be ultimately passed to the SAX callback
9014 * containing five xmlChar pointers for each attribute:
9015 *
9016 * [0] attribute name
9017 * [1] attribute prefix
9018 * [2] namespace URI
9019 * [3] attribute value
9020 * [4] end of attribute value
9021 *
9022 * To save memory, we reuse this array temporarily and store integers
9023 * in these pointer variables.
9024 *
9025 * [0] attribute name
9026 * [1] attribute prefix
9027 * [2] hash value of attribute prefix, and later namespace index
9028 * [3] for non-allocated values: ptrdiff_t offset into input buffer
9029 * [4] for non-allocated values: ptrdiff_t offset into input buffer
9030 *
9031 * The ctxt->attallocs array contains an additional unsigned int for
9032 * each attribute, containing the hash value of the attribute name
9033 * and the alloc flag in bit 31.
9034 */
9035
9036 while (((RAW != '>') &&
9037 ((RAW != '/') || (NXT(1) != '>')) &&
9038 (IS_BYTE_CHAR(RAW))) && (PARSER_STOPPED(ctxt) == 0)) {
9039 int len = -1;
9040
9041 hattname = xmlParseAttribute2(ctxt, prefix, localname,
9042 &haprefix, &attvalue, &len,
9043 &alloc);
9044 if (hattname.name == NULL)
9045 break;
9046 if (attvalue == NULL)
9047 goto next_attr;
9048 attname = hattname.name;
9049 aprefix = haprefix.name;
9050 if (len < 0) len = xmlStrlen(attvalue);
9051
9052 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9053 xmlHashedString huri;
9054 xmlURIPtr parsedUri;
9055
9056 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9057 uri = huri.name;
9058 if (uri == NULL) {
9059 xmlErrMemory(ctxt);
9060 goto next_attr;
9061 }
9062 if (*uri != 0) {
9063 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9064 xmlErrMemory(ctxt);
9065 goto next_attr;
9066 }
9067 if (parsedUri == NULL) {
9068 xmlNsErr(ctxt, XML_WAR_NS_URI,
9069 "xmlns: '%s' is not a valid URI\n",
9070 uri, NULL, NULL);
9071 } else {
9072 if (parsedUri->scheme == NULL) {
9073 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9074 "xmlns: URI %s is not absolute\n",
9075 uri, NULL, NULL);
9076 }
9077 xmlFreeURI(parsedUri);
9078 }
9079 if (uri == ctxt->str_xml_ns) {
9080 if (attname != ctxt->str_xml) {
9081 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9082 "xml namespace URI cannot be the default namespace\n",
9083 NULL, NULL, NULL);
9084 }
9085 goto next_attr;
9086 }
9087 if ((len == 29) &&
9088 (xmlStrEqual(uri,
9089 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9090 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9091 "reuse of the xmlns namespace name is forbidden\n",
9092 NULL, NULL, NULL);
9093 goto next_attr;
9094 }
9095 }
9096
9097 if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9098 nbNs++;
9099 } else if (aprefix == ctxt->str_xmlns) {
9100 xmlHashedString huri;
9101 xmlURIPtr parsedUri;
9102
9103 huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9104 uri = huri.name;
9105 if (uri == NULL) {
9106 xmlErrMemory(ctxt);
9107 goto next_attr;
9108 }
9109
9110 if (attname == ctxt->str_xml) {
9111 if (uri != ctxt->str_xml_ns) {
9112 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9113 "xml namespace prefix mapped to wrong URI\n",
9114 NULL, NULL, NULL);
9115 }
9116 /*
9117 * Do not keep a namespace definition node
9118 */
9119 goto next_attr;
9120 }
9121 if (uri == ctxt->str_xml_ns) {
9122 if (attname != ctxt->str_xml) {
9123 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9124 "xml namespace URI mapped to wrong prefix\n",
9125 NULL, NULL, NULL);
9126 }
9127 goto next_attr;
9128 }
9129 if (attname == ctxt->str_xmlns) {
9130 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9131 "redefinition of the xmlns prefix is forbidden\n",
9132 NULL, NULL, NULL);
9133 goto next_attr;
9134 }
9135 if ((len == 29) &&
9136 (xmlStrEqual(uri,
9137 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9138 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9139 "reuse of the xmlns namespace name is forbidden\n",
9140 NULL, NULL, NULL);
9141 goto next_attr;
9142 }
9143 if ((uri == NULL) || (uri[0] == 0)) {
9144 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9145 "xmlns:%s: Empty XML namespace is not allowed\n",
9146 attname, NULL, NULL);
9147 goto next_attr;
9148 } else {
9149 if (xmlParseURISafe((const char *) uri, &parsedUri) < 0) {
9150 xmlErrMemory(ctxt);
9151 goto next_attr;
9152 }
9153 if (parsedUri == NULL) {
9154 xmlNsErr(ctxt, XML_WAR_NS_URI,
9155 "xmlns:%s: '%s' is not a valid URI\n",
9156 attname, uri, NULL);
9157 } else {
9158 if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9159 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9160 "xmlns:%s: URI %s is not absolute\n",
9161 attname, uri, NULL);
9162 }
9163 xmlFreeURI(parsedUri);
9164 }
9165 }
9166
9167 if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9168 nbNs++;
9169 } else {
9170 /*
9171 * Populate attributes array, see above for repurposing
9172 * of xmlChar pointers.
9173 */
9174 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9175 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9176 goto next_attr;
9177 }
9178 maxatts = ctxt->maxatts;
9179 atts = ctxt->atts;
9180 }
9181 ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9182 ((unsigned) alloc << 31);
9183 atts[nbatts++] = attname;
9184 atts[nbatts++] = aprefix;
9185 atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9186 if (alloc) {
9187 atts[nbatts++] = attvalue;
9188 attvalue += len;
9189 atts[nbatts++] = attvalue;
9190 } else {
9191 /*
9192 * attvalue points into the input buffer which can be
9193 * reallocated. Store differences to input->base instead.
9194 * The pointers will be reconstructed later.
9195 */
9196 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9197 attvalue += len;
9198 atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9199 }
9200 /*
9201 * tag if some deallocation is needed
9202 */
9203 if (alloc != 0) attval = 1;
9204 attvalue = NULL; /* moved into atts */
9205 }
9206
9207 next_attr:
9208 if ((attvalue != NULL) && (alloc != 0)) {
9209 xmlFree(attvalue);
9210 attvalue = NULL;
9211 }
9212
9213 GROW
9214 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9215 break;
9216 if (SKIP_BLANKS == 0) {
9217 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9218 "attributes construct error\n");
9219 break;
9220 }
9221 GROW;
9222 }
9223
9224 /*
9225 * Namespaces from default attributes
9226 */
9227 if (ctxt->attsDefault != NULL) {
9228 xmlDefAttrsPtr defaults;
9229
9230 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9231 if (defaults != NULL) {
9232 for (i = 0; i < defaults->nbAttrs; i++) {
9233 xmlDefAttr *attr = &defaults->attrs[i];
9234
9235 attname = attr->name.name;
9236 aprefix = attr->prefix.name;
9237
9238 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9239 xmlParserEntityCheck(ctxt, attr->expandedSize);
9240
9241 if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9242 nbNs++;
9243 } else if (aprefix == ctxt->str_xmlns) {
9244 xmlParserEntityCheck(ctxt, attr->expandedSize);
9245
9246 if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9247 NULL, 1) > 0)
9248 nbNs++;
9249 } else {
9250 nbTotalDef += 1;
9251 }
9252 }
9253 }
9254 }
9255
9256 /*
9257 * Resolve attribute namespaces
9258 */
9259 for (i = 0; i < nbatts; i += 5) {
9260 attname = atts[i];
9261 aprefix = atts[i+1];
9262
9263 /*
9264 * The default namespace does not apply to attribute names.
9265 */
9266 if (aprefix == NULL) {
9267 nsIndex = NS_INDEX_EMPTY;
9268 } else if (aprefix == ctxt->str_xml) {
9269 nsIndex = NS_INDEX_XML;
9270 } else {
9271 haprefix.name = aprefix;
9272 haprefix.hashValue = (size_t) atts[i+2];
9273 nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9274
9275 if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex)) {
9276 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9277 "Namespace prefix %s for %s on %s is not defined\n",
9278 aprefix, attname, localname);
9279 nsIndex = NS_INDEX_EMPTY;
9280 }
9281 }
9282
9283 atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9284 }
9285
9286 /*
9287 * Maximum number of attributes including default attributes.
9288 */
9289 maxAtts = nratts + nbTotalDef;
9290
9291 /*
9292 * Verify that attribute names are unique.
9293 */
9294 if (maxAtts > 1) {
9295 attrHashSize = 4;
9296 while (attrHashSize / 2 < (unsigned) maxAtts)
9297 attrHashSize *= 2;
9298
9299 if (attrHashSize > ctxt->attrHashMax) {
9300 xmlAttrHashBucket *tmp;
9301
9302 tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9303 if (tmp == NULL) {
9304 xmlErrMemory(ctxt);
9305 goto done;
9306 }
9307
9308 ctxt->attrHash = tmp;
9309 ctxt->attrHashMax = attrHashSize;
9310 }
9311
9312 memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9313
9314 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9315 const xmlChar *nsuri;
9316 unsigned hashValue, nameHashValue, uriHashValue;
9317 int res;
9318
9319 attname = atts[i];
9320 aprefix = atts[i+1];
9321 nsIndex = (ptrdiff_t) atts[i+2];
9322 /* Hash values always have bit 31 set, see dict.c */
9323 nameHashValue = ctxt->attallocs[j] | 0x80000000;
9324
9325 if (nsIndex == NS_INDEX_EMPTY) {
9326 /*
9327 * Prefix with empty namespace means an undeclared
9328 * prefix which was already reported above.
9329 */
9330 if (aprefix != NULL)
9331 continue;
9332 nsuri = NULL;
9333 uriHashValue = URI_HASH_EMPTY;
9334 } else if (nsIndex == NS_INDEX_XML) {
9335 nsuri = ctxt->str_xml_ns;
9336 uriHashValue = URI_HASH_XML;
9337 } else {
9338 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9339 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9340 }
9341
9342 hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9343 res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9344 hashValue, i);
9345 if (res < 0)
9346 continue;
9347
9348 /*
9349 * [ WFC: Unique Att Spec ]
9350 * No attribute name may appear more than once in the same
9351 * start-tag or empty-element tag.
9352 * As extended by the Namespace in XML REC.
9353 */
9354 if (res < INT_MAX) {
9355 if (aprefix == atts[res+1]) {
9356 xmlErrAttributeDup(ctxt, aprefix, attname);
9357 } else {
9358 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9359 "Namespaced Attribute %s in '%s' redefined\n",
9360 attname, nsuri, NULL);
9361 }
9362 }
9363 }
9364 }
9365
9366 /*
9367 * Default attributes
9368 */
9369 if (ctxt->attsDefault != NULL) {
9370 xmlDefAttrsPtr defaults;
9371
9372 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9373 if (defaults != NULL) {
9374 for (i = 0; i < defaults->nbAttrs; i++) {
9375 xmlDefAttr *attr = &defaults->attrs[i];
9376 const xmlChar *nsuri = NULL;
9377 unsigned hashValue, uriHashValue = 0;
9378 int res;
9379
9380 attname = attr->name.name;
9381 aprefix = attr->prefix.name;
9382
9383 if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9384 continue;
9385 if (aprefix == ctxt->str_xmlns)
9386 continue;
9387
9388 if (aprefix == NULL) {
9389 nsIndex = NS_INDEX_EMPTY;
9390 nsuri = NULL;
9391 uriHashValue = URI_HASH_EMPTY;
9392 } else if (aprefix == ctxt->str_xml) {
9393 nsIndex = NS_INDEX_XML;
9394 nsuri = ctxt->str_xml_ns;
9395 uriHashValue = URI_HASH_XML;
9396 } else {
9397 nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9398 if ((nsIndex == INT_MAX) ||
9399 (nsIndex < ctxt->nsdb->minNsIndex)) {
9400 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9401 "Namespace prefix %s for %s on %s is not "
9402 "defined\n",
9403 aprefix, attname, localname);
9404 nsIndex = NS_INDEX_EMPTY;
9405 nsuri = NULL;
9406 uriHashValue = URI_HASH_EMPTY;
9407 } else {
9408 nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9409 uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9410 }
9411 }
9412
9413 /*
9414 * Check whether the attribute exists
9415 */
9416 if (maxAtts > 1) {
9417 hashValue = xmlDictCombineHash(attr->name.hashValue,
9418 uriHashValue);
9419 res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9420 hashValue, nbatts);
9421 if (res < 0)
9422 continue;
9423 if (res < INT_MAX) {
9424 if (aprefix == atts[res+1])
9425 continue;
9426 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9427 "Namespaced Attribute %s in '%s' redefined\n",
9428 attname, nsuri, NULL);
9429 }
9430 }
9431
9432 xmlParserEntityCheck(ctxt, attr->expandedSize);
9433
9434 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9435 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9436 localname = NULL;
9437 goto done;
9438 }
9439 maxatts = ctxt->maxatts;
9440 atts = ctxt->atts;
9441 }
9442
9443 atts[nbatts++] = attname;
9444 atts[nbatts++] = aprefix;
9445 atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
9446 atts[nbatts++] = attr->value.name;
9447 atts[nbatts++] = attr->valueEnd;
9448 if ((ctxt->standalone == 1) && (attr->external != 0)) {
9449 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9450 "standalone: attribute %s on %s defaulted "
9451 "from external subset\n",
9452 attname, localname);
9453 }
9454 nbdef++;
9455 }
9456 }
9457 }
9458
9459 /*
9460 * Reconstruct attribute pointers
9461 */
9462 for (i = 0, j = 0; i < nbatts; i += 5, j++) {
9463 /* namespace URI */
9464 nsIndex = (ptrdiff_t) atts[i+2];
9465 if (nsIndex == INT_MAX)
9466 atts[i+2] = NULL;
9467 else if (nsIndex == INT_MAX - 1)
9468 atts[i+2] = ctxt->str_xml_ns;
9469 else
9470 atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
9471
9472 if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
9473 atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3]; /* value */
9474 atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4]; /* valuend */
9475 }
9476 }
9477
9478 uri = xmlParserNsLookupUri(ctxt, &hprefix);
9479 if ((prefix != NULL) && (uri == NULL)) {
9480 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9481 "Namespace prefix %s on %s is not defined\n",
9482 prefix, localname, NULL);
9483 }
9484 *pref = prefix;
9485 *URI = uri;
9486
9487 /*
9488 * SAX callback
9489 */
9490 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9491 (!ctxt->disableSAX)) {
9492 if (nbNs > 0)
9493 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9494 nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
9495 nbatts / 5, nbdef, atts);
9496 else
9497 ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
9498 0, NULL, nbatts / 5, nbdef, atts);
9499 }
9500
9501 done:
9502 /*
9503 * Free allocated attribute values
9504 */
9505 if (attval != 0) {
9506 for (i = 0, j = 0; j < nratts; i += 5, j++)
9507 if (ctxt->attallocs[j] & 0x80000000)
9508 xmlFree((xmlChar *) atts[i+3]);
9509 }
9510
9511 *nbNsPtr = nbNs;
9512 return(localname);
9513 }
9514
9515 /**
9516 * xmlParseEndTag2:
9517 * @ctxt: an XML parser context
9518 * @line: line of the start tag
9519 * @nsNr: number of namespaces on the start tag
9520 *
9521 * Parse an end tag. Always consumes '</'.
9522 *
9523 * [42] ETag ::= '</' Name S? '>'
9524 *
9525 * With namespace
9526 *
9527 * [NS 9] ETag ::= '</' QName S? '>'
9528 */
9529
9530 static void
xmlParseEndTag2(xmlParserCtxtPtr ctxt,const xmlStartTag * tag)9531 xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9532 const xmlChar *name;
9533
9534 GROW;
9535 if ((RAW != '<') || (NXT(1) != '/')) {
9536 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9537 return;
9538 }
9539 SKIP(2);
9540
9541 if (tag->prefix == NULL)
9542 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9543 else
9544 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9545
9546 /*
9547 * We should definitely be at the ending "S? '>'" part
9548 */
9549 GROW;
9550 SKIP_BLANKS;
9551 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9552 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9553 } else
9554 NEXT1;
9555
9556 /*
9557 * [ WFC: Element Type Match ]
9558 * The Name in an element's end-tag must match the element type in the
9559 * start-tag.
9560 *
9561 */
9562 if (name != (xmlChar*)1) {
9563 if (name == NULL) name = BAD_CAST "unparsable";
9564 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9565 "Opening and ending tag mismatch: %s line %d and %s\n",
9566 ctxt->name, tag->line, name);
9567 }
9568
9569 /*
9570 * SAX: End of Tag
9571 */
9572 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9573 (!ctxt->disableSAX))
9574 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9575 tag->URI);
9576
9577 spacePop(ctxt);
9578 if (tag->nsNr != 0)
9579 xmlParserNsPop(ctxt, tag->nsNr);
9580 }
9581
9582 /**
9583 * xmlParseCDSect:
9584 * @ctxt: an XML parser context
9585 *
9586 * DEPRECATED: Internal function, don't use.
9587 *
9588 * Parse escaped pure raw content. Always consumes '<!['.
9589 *
9590 * [18] CDSect ::= CDStart CData CDEnd
9591 *
9592 * [19] CDStart ::= '<![CDATA['
9593 *
9594 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9595 *
9596 * [21] CDEnd ::= ']]>'
9597 */
9598 void
xmlParseCDSect(xmlParserCtxtPtr ctxt)9599 xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9600 xmlChar *buf = NULL;
9601 int len = 0;
9602 int size = XML_PARSER_BUFFER_SIZE;
9603 int r, rl;
9604 int s, sl;
9605 int cur, l;
9606 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9607 XML_MAX_HUGE_LENGTH :
9608 XML_MAX_TEXT_LENGTH;
9609
9610 if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9611 return;
9612 SKIP(3);
9613
9614 if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9615 return;
9616 SKIP(6);
9617
9618 r = xmlCurrentCharRecover(ctxt, &rl);
9619 if (!IS_CHAR(r)) {
9620 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9621 goto out;
9622 }
9623 NEXTL(rl);
9624 s = xmlCurrentCharRecover(ctxt, &sl);
9625 if (!IS_CHAR(s)) {
9626 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9627 goto out;
9628 }
9629 NEXTL(sl);
9630 cur = xmlCurrentCharRecover(ctxt, &l);
9631 buf = xmlMalloc(size);
9632 if (buf == NULL) {
9633 xmlErrMemory(ctxt);
9634 goto out;
9635 }
9636 while (IS_CHAR(cur) &&
9637 ((r != ']') || (s != ']') || (cur != '>'))) {
9638 if (len + 5 >= size) {
9639 xmlChar *tmp;
9640
9641 tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9642 if (tmp == NULL) {
9643 xmlErrMemory(ctxt);
9644 goto out;
9645 }
9646 buf = tmp;
9647 size *= 2;
9648 }
9649 COPY_BUF(buf, len, r);
9650 if (len > maxLength) {
9651 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9652 "CData section too big found\n");
9653 goto out;
9654 }
9655 r = s;
9656 rl = sl;
9657 s = cur;
9658 sl = l;
9659 NEXTL(l);
9660 cur = xmlCurrentCharRecover(ctxt, &l);
9661 }
9662 buf[len] = 0;
9663 if (cur != '>') {
9664 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9665 "CData section not finished\n%.50s\n", buf);
9666 goto out;
9667 }
9668 NEXTL(l);
9669
9670 /*
9671 * OK the buffer is to be consumed as cdata.
9672 */
9673 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9674 if (ctxt->sax->cdataBlock != NULL)
9675 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9676 else if (ctxt->sax->characters != NULL)
9677 ctxt->sax->characters(ctxt->userData, buf, len);
9678 }
9679
9680 out:
9681 xmlFree(buf);
9682 }
9683
9684 /**
9685 * xmlParseContentInternal:
9686 * @ctxt: an XML parser context
9687 *
9688 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9689 * unexpected EOF to the caller.
9690 */
9691
9692 static void
xmlParseContentInternal(xmlParserCtxtPtr ctxt)9693 xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9694 int oldNameNr = ctxt->nameNr;
9695 int oldSpaceNr = ctxt->spaceNr;
9696 int oldNodeNr = ctxt->nodeNr;
9697
9698 GROW;
9699 while ((ctxt->input->cur < ctxt->input->end) &&
9700 (PARSER_STOPPED(ctxt) == 0)) {
9701 const xmlChar *cur = ctxt->input->cur;
9702
9703 /*
9704 * First case : a Processing Instruction.
9705 */
9706 if ((*cur == '<') && (cur[1] == '?')) {
9707 xmlParsePI(ctxt);
9708 }
9709
9710 /*
9711 * Second case : a CDSection
9712 */
9713 /* 2.6.0 test was *cur not RAW */
9714 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9715 xmlParseCDSect(ctxt);
9716 }
9717
9718 /*
9719 * Third case : a comment
9720 */
9721 else if ((*cur == '<') && (NXT(1) == '!') &&
9722 (NXT(2) == '-') && (NXT(3) == '-')) {
9723 xmlParseComment(ctxt);
9724 }
9725
9726 /*
9727 * Fourth case : a sub-element.
9728 */
9729 else if (*cur == '<') {
9730 if (NXT(1) == '/') {
9731 if (ctxt->nameNr <= oldNameNr)
9732 break;
9733 xmlParseElementEnd(ctxt);
9734 } else {
9735 xmlParseElementStart(ctxt);
9736 }
9737 }
9738
9739 /*
9740 * Fifth case : a reference. If if has not been resolved,
9741 * parsing returns it's Name, create the node
9742 */
9743
9744 else if (*cur == '&') {
9745 xmlParseReference(ctxt);
9746 }
9747
9748 /*
9749 * Last case, text. Note that References are handled directly.
9750 */
9751 else {
9752 xmlParseCharDataInternal(ctxt, 0);
9753 }
9754
9755 SHRINK;
9756 GROW;
9757 }
9758
9759 if ((ctxt->nameNr > oldNameNr) &&
9760 (ctxt->input->cur >= ctxt->input->end) &&
9761 (ctxt->wellFormed)) {
9762 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9763 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9764 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9765 "Premature end of data in tag %s line %d\n",
9766 name, line, NULL);
9767 }
9768
9769 /*
9770 * Clean up in error case
9771 */
9772
9773 while (ctxt->nodeNr > oldNodeNr)
9774 nodePop(ctxt);
9775
9776 while (ctxt->nameNr > oldNameNr) {
9777 xmlStartTag *tag = &ctxt->pushTab[ctxt->nameNr - 1];
9778
9779 if (tag->nsNr != 0)
9780 xmlParserNsPop(ctxt, tag->nsNr);
9781
9782 namePop(ctxt);
9783 }
9784
9785 while (ctxt->spaceNr > oldSpaceNr)
9786 spacePop(ctxt);
9787 }
9788
9789 /**
9790 * xmlParseContent:
9791 * @ctxt: an XML parser context
9792 *
9793 * Parse XML element content. This is useful if you're only interested
9794 * in custom SAX callbacks. If you want a node list, use
9795 * xmlCtxtParseContent.
9796 */
9797 void
xmlParseContent(xmlParserCtxtPtr ctxt)9798 xmlParseContent(xmlParserCtxtPtr ctxt) {
9799 if ((ctxt == NULL) || (ctxt->input == NULL))
9800 return;
9801
9802 xmlCtxtInitializeLate(ctxt);
9803
9804 xmlParseContentInternal(ctxt);
9805
9806 if (ctxt->input->cur < ctxt->input->end)
9807 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
9808 }
9809
9810 /**
9811 * xmlParseElement:
9812 * @ctxt: an XML parser context
9813 *
9814 * DEPRECATED: Internal function, don't use.
9815 *
9816 * parse an XML element
9817 *
9818 * [39] element ::= EmptyElemTag | STag content ETag
9819 *
9820 * [ WFC: Element Type Match ]
9821 * The Name in an element's end-tag must match the element type in the
9822 * start-tag.
9823 *
9824 */
9825
9826 void
xmlParseElement(xmlParserCtxtPtr ctxt)9827 xmlParseElement(xmlParserCtxtPtr ctxt) {
9828 if (xmlParseElementStart(ctxt) != 0)
9829 return;
9830
9831 xmlParseContentInternal(ctxt);
9832
9833 if (ctxt->input->cur >= ctxt->input->end) {
9834 if (ctxt->wellFormed) {
9835 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9836 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9837 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9838 "Premature end of data in tag %s line %d\n",
9839 name, line, NULL);
9840 }
9841 return;
9842 }
9843
9844 xmlParseElementEnd(ctxt);
9845 }
9846
9847 /**
9848 * xmlParseElementStart:
9849 * @ctxt: an XML parser context
9850 *
9851 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9852 * opening tag was parsed, 1 if an empty element was parsed.
9853 *
9854 * Always consumes '<'.
9855 */
9856 static int
xmlParseElementStart(xmlParserCtxtPtr ctxt)9857 xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9858 int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
9859 const xmlChar *name;
9860 const xmlChar *prefix = NULL;
9861 const xmlChar *URI = NULL;
9862 xmlParserNodeInfo node_info;
9863 int line;
9864 xmlNodePtr cur;
9865 int nbNs = 0;
9866
9867 if (ctxt->nameNr > maxDepth) {
9868 xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
9869 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9870 ctxt->nameNr);
9871 xmlHaltParser(ctxt);
9872 return(-1);
9873 }
9874
9875 /* Capture start position */
9876 if (ctxt->record_info) {
9877 node_info.begin_pos = ctxt->input->consumed +
9878 (CUR_PTR - ctxt->input->base);
9879 node_info.begin_line = ctxt->input->line;
9880 }
9881
9882 if (ctxt->spaceNr == 0)
9883 spacePush(ctxt, -1);
9884 else if (*ctxt->space == -2)
9885 spacePush(ctxt, -1);
9886 else
9887 spacePush(ctxt, *ctxt->space);
9888
9889 line = ctxt->input->line;
9890 #ifdef LIBXML_SAX1_ENABLED
9891 if (ctxt->sax2)
9892 #endif /* LIBXML_SAX1_ENABLED */
9893 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
9894 #ifdef LIBXML_SAX1_ENABLED
9895 else
9896 name = xmlParseStartTag(ctxt);
9897 #endif /* LIBXML_SAX1_ENABLED */
9898 if (name == NULL) {
9899 spacePop(ctxt);
9900 return(-1);
9901 }
9902 nameNsPush(ctxt, name, prefix, URI, line, nbNs);
9903 cur = ctxt->node;
9904
9905 #ifdef LIBXML_VALID_ENABLED
9906 /*
9907 * [ VC: Root Element Type ]
9908 * The Name in the document type declaration must match the element
9909 * type of the root element.
9910 */
9911 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9912 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9913 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9914 #endif /* LIBXML_VALID_ENABLED */
9915
9916 /*
9917 * Check for an Empty Element.
9918 */
9919 if ((RAW == '/') && (NXT(1) == '>')) {
9920 SKIP(2);
9921 if (ctxt->sax2) {
9922 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9923 (!ctxt->disableSAX))
9924 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9925 #ifdef LIBXML_SAX1_ENABLED
9926 } else {
9927 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9928 (!ctxt->disableSAX))
9929 ctxt->sax->endElement(ctxt->userData, name);
9930 #endif /* LIBXML_SAX1_ENABLED */
9931 }
9932 namePop(ctxt);
9933 spacePop(ctxt);
9934 if (nbNs > 0)
9935 xmlParserNsPop(ctxt, nbNs);
9936 if (cur != NULL && ctxt->record_info) {
9937 node_info.node = cur;
9938 node_info.end_pos = ctxt->input->consumed +
9939 (CUR_PTR - ctxt->input->base);
9940 node_info.end_line = ctxt->input->line;
9941 xmlParserAddNodeInfo(ctxt, &node_info);
9942 }
9943 return(1);
9944 }
9945 if (RAW == '>') {
9946 NEXT1;
9947 if (cur != NULL && ctxt->record_info) {
9948 node_info.node = cur;
9949 node_info.end_pos = 0;
9950 node_info.end_line = 0;
9951 xmlParserAddNodeInfo(ctxt, &node_info);
9952 }
9953 } else {
9954 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9955 "Couldn't find end of Start Tag %s line %d\n",
9956 name, line, NULL);
9957
9958 /*
9959 * end of parsing of this node.
9960 */
9961 nodePop(ctxt);
9962 namePop(ctxt);
9963 spacePop(ctxt);
9964 if (nbNs > 0)
9965 xmlParserNsPop(ctxt, nbNs);
9966 return(-1);
9967 }
9968
9969 return(0);
9970 }
9971
9972 /**
9973 * xmlParseElementEnd:
9974 * @ctxt: an XML parser context
9975 *
9976 * Parse the end of an XML element. Always consumes '</'.
9977 */
9978 static void
xmlParseElementEnd(xmlParserCtxtPtr ctxt)9979 xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9980 xmlNodePtr cur = ctxt->node;
9981
9982 if (ctxt->nameNr <= 0) {
9983 if ((RAW == '<') && (NXT(1) == '/'))
9984 SKIP(2);
9985 return;
9986 }
9987
9988 /*
9989 * parse the end of tag: '</' should be here.
9990 */
9991 if (ctxt->sax2) {
9992 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9993 namePop(ctxt);
9994 }
9995 #ifdef LIBXML_SAX1_ENABLED
9996 else
9997 xmlParseEndTag1(ctxt, 0);
9998 #endif /* LIBXML_SAX1_ENABLED */
9999
10000 /*
10001 * Capture end position
10002 */
10003 if (cur != NULL && ctxt->record_info) {
10004 xmlParserNodeInfoPtr node_info;
10005
10006 node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10007 if (node_info != NULL) {
10008 node_info->end_pos = ctxt->input->consumed +
10009 (CUR_PTR - ctxt->input->base);
10010 node_info->end_line = ctxt->input->line;
10011 }
10012 }
10013 }
10014
10015 /**
10016 * xmlParseVersionNum:
10017 * @ctxt: an XML parser context
10018 *
10019 * DEPRECATED: Internal function, don't use.
10020 *
10021 * parse the XML version value.
10022 *
10023 * [26] VersionNum ::= '1.' [0-9]+
10024 *
10025 * In practice allow [0-9].[0-9]+ at that level
10026 *
10027 * Returns the string giving the XML version number, or NULL
10028 */
10029 xmlChar *
xmlParseVersionNum(xmlParserCtxtPtr ctxt)10030 xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10031 xmlChar *buf = NULL;
10032 int len = 0;
10033 int size = 10;
10034 xmlChar cur;
10035
10036 buf = xmlMalloc(size);
10037 if (buf == NULL) {
10038 xmlErrMemory(ctxt);
10039 return(NULL);
10040 }
10041 cur = CUR;
10042 if (!((cur >= '0') && (cur <= '9'))) {
10043 xmlFree(buf);
10044 return(NULL);
10045 }
10046 buf[len++] = cur;
10047 NEXT;
10048 cur=CUR;
10049 if (cur != '.') {
10050 xmlFree(buf);
10051 return(NULL);
10052 }
10053 buf[len++] = cur;
10054 NEXT;
10055 cur=CUR;
10056 while ((cur >= '0') && (cur <= '9')) {
10057 if (len + 1 >= size) {
10058 xmlChar *tmp;
10059
10060 size *= 2;
10061 tmp = (xmlChar *) xmlRealloc(buf, size);
10062 if (tmp == NULL) {
10063 xmlFree(buf);
10064 xmlErrMemory(ctxt);
10065 return(NULL);
10066 }
10067 buf = tmp;
10068 }
10069 buf[len++] = cur;
10070 NEXT;
10071 cur=CUR;
10072 }
10073 buf[len] = 0;
10074 return(buf);
10075 }
10076
10077 /**
10078 * xmlParseVersionInfo:
10079 * @ctxt: an XML parser context
10080 *
10081 * DEPRECATED: Internal function, don't use.
10082 *
10083 * parse the XML version.
10084 *
10085 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10086 *
10087 * [25] Eq ::= S? '=' S?
10088 *
10089 * Returns the version string, e.g. "1.0"
10090 */
10091
10092 xmlChar *
xmlParseVersionInfo(xmlParserCtxtPtr ctxt)10093 xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10094 xmlChar *version = NULL;
10095
10096 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10097 SKIP(7);
10098 SKIP_BLANKS;
10099 if (RAW != '=') {
10100 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10101 return(NULL);
10102 }
10103 NEXT;
10104 SKIP_BLANKS;
10105 if (RAW == '"') {
10106 NEXT;
10107 version = xmlParseVersionNum(ctxt);
10108 if (RAW != '"') {
10109 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10110 } else
10111 NEXT;
10112 } else if (RAW == '\''){
10113 NEXT;
10114 version = xmlParseVersionNum(ctxt);
10115 if (RAW != '\'') {
10116 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10117 } else
10118 NEXT;
10119 } else {
10120 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10121 }
10122 }
10123 return(version);
10124 }
10125
10126 /**
10127 * xmlParseEncName:
10128 * @ctxt: an XML parser context
10129 *
10130 * DEPRECATED: Internal function, don't use.
10131 *
10132 * parse the XML encoding name
10133 *
10134 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10135 *
10136 * Returns the encoding name value or NULL
10137 */
10138 xmlChar *
xmlParseEncName(xmlParserCtxtPtr ctxt)10139 xmlParseEncName(xmlParserCtxtPtr ctxt) {
10140 xmlChar *buf = NULL;
10141 int len = 0;
10142 int size = 10;
10143 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10144 XML_MAX_TEXT_LENGTH :
10145 XML_MAX_NAME_LENGTH;
10146 xmlChar cur;
10147
10148 cur = CUR;
10149 if (((cur >= 'a') && (cur <= 'z')) ||
10150 ((cur >= 'A') && (cur <= 'Z'))) {
10151 buf = xmlMalloc(size);
10152 if (buf == NULL) {
10153 xmlErrMemory(ctxt);
10154 return(NULL);
10155 }
10156
10157 buf[len++] = cur;
10158 NEXT;
10159 cur = CUR;
10160 while (((cur >= 'a') && (cur <= 'z')) ||
10161 ((cur >= 'A') && (cur <= 'Z')) ||
10162 ((cur >= '0') && (cur <= '9')) ||
10163 (cur == '.') || (cur == '_') ||
10164 (cur == '-')) {
10165 if (len + 1 >= size) {
10166 xmlChar *tmp;
10167
10168 size *= 2;
10169 tmp = (xmlChar *) xmlRealloc(buf, size);
10170 if (tmp == NULL) {
10171 xmlErrMemory(ctxt);
10172 xmlFree(buf);
10173 return(NULL);
10174 }
10175 buf = tmp;
10176 }
10177 buf[len++] = cur;
10178 if (len > maxLength) {
10179 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10180 xmlFree(buf);
10181 return(NULL);
10182 }
10183 NEXT;
10184 cur = CUR;
10185 }
10186 buf[len] = 0;
10187 } else {
10188 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10189 }
10190 return(buf);
10191 }
10192
10193 /**
10194 * xmlParseEncodingDecl:
10195 * @ctxt: an XML parser context
10196 *
10197 * DEPRECATED: Internal function, don't use.
10198 *
10199 * parse the XML encoding declaration
10200 *
10201 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10202 *
10203 * this setups the conversion filters.
10204 *
10205 * Returns the encoding value or NULL
10206 */
10207
10208 const xmlChar *
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt)10209 xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10210 xmlChar *encoding = NULL;
10211
10212 SKIP_BLANKS;
10213 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10214 return(NULL);
10215
10216 SKIP(8);
10217 SKIP_BLANKS;
10218 if (RAW != '=') {
10219 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10220 return(NULL);
10221 }
10222 NEXT;
10223 SKIP_BLANKS;
10224 if (RAW == '"') {
10225 NEXT;
10226 encoding = xmlParseEncName(ctxt);
10227 if (RAW != '"') {
10228 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10229 xmlFree((xmlChar *) encoding);
10230 return(NULL);
10231 } else
10232 NEXT;
10233 } else if (RAW == '\''){
10234 NEXT;
10235 encoding = xmlParseEncName(ctxt);
10236 if (RAW != '\'') {
10237 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10238 xmlFree((xmlChar *) encoding);
10239 return(NULL);
10240 } else
10241 NEXT;
10242 } else {
10243 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10244 }
10245
10246 if (encoding == NULL)
10247 return(NULL);
10248
10249 xmlSetDeclaredEncoding(ctxt, encoding);
10250
10251 return(ctxt->encoding);
10252 }
10253
10254 /**
10255 * xmlParseSDDecl:
10256 * @ctxt: an XML parser context
10257 *
10258 * DEPRECATED: Internal function, don't use.
10259 *
10260 * parse the XML standalone declaration
10261 *
10262 * [32] SDDecl ::= S 'standalone' Eq
10263 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10264 *
10265 * [ VC: Standalone Document Declaration ]
10266 * TODO The standalone document declaration must have the value "no"
10267 * if any external markup declarations contain declarations of:
10268 * - attributes with default values, if elements to which these
10269 * attributes apply appear in the document without specifications
10270 * of values for these attributes, or
10271 * - entities (other than amp, lt, gt, apos, quot), if references
10272 * to those entities appear in the document, or
10273 * - attributes with values subject to normalization, where the
10274 * attribute appears in the document with a value which will change
10275 * as a result of normalization, or
10276 * - element types with element content, if white space occurs directly
10277 * within any instance of those types.
10278 *
10279 * Returns:
10280 * 1 if standalone="yes"
10281 * 0 if standalone="no"
10282 * -2 if standalone attribute is missing or invalid
10283 * (A standalone value of -2 means that the XML declaration was found,
10284 * but no value was specified for the standalone attribute).
10285 */
10286
10287 int
xmlParseSDDecl(xmlParserCtxtPtr ctxt)10288 xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10289 int standalone = -2;
10290
10291 SKIP_BLANKS;
10292 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10293 SKIP(10);
10294 SKIP_BLANKS;
10295 if (RAW != '=') {
10296 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10297 return(standalone);
10298 }
10299 NEXT;
10300 SKIP_BLANKS;
10301 if (RAW == '\''){
10302 NEXT;
10303 if ((RAW == 'n') && (NXT(1) == 'o')) {
10304 standalone = 0;
10305 SKIP(2);
10306 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10307 (NXT(2) == 's')) {
10308 standalone = 1;
10309 SKIP(3);
10310 } else {
10311 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10312 }
10313 if (RAW != '\'') {
10314 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10315 } else
10316 NEXT;
10317 } else if (RAW == '"'){
10318 NEXT;
10319 if ((RAW == 'n') && (NXT(1) == 'o')) {
10320 standalone = 0;
10321 SKIP(2);
10322 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10323 (NXT(2) == 's')) {
10324 standalone = 1;
10325 SKIP(3);
10326 } else {
10327 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10328 }
10329 if (RAW != '"') {
10330 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10331 } else
10332 NEXT;
10333 } else {
10334 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10335 }
10336 }
10337 return(standalone);
10338 }
10339
10340 /**
10341 * xmlParseXMLDecl:
10342 * @ctxt: an XML parser context
10343 *
10344 * DEPRECATED: Internal function, don't use.
10345 *
10346 * parse an XML declaration header
10347 *
10348 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10349 */
10350
10351 void
xmlParseXMLDecl(xmlParserCtxtPtr ctxt)10352 xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10353 xmlChar *version;
10354
10355 /*
10356 * This value for standalone indicates that the document has an
10357 * XML declaration but it does not have a standalone attribute.
10358 * It will be overwritten later if a standalone attribute is found.
10359 */
10360
10361 ctxt->standalone = -2;
10362
10363 /*
10364 * We know that '<?xml' is here.
10365 */
10366 SKIP(5);
10367
10368 if (!IS_BLANK_CH(RAW)) {
10369 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10370 "Blank needed after '<?xml'\n");
10371 }
10372 SKIP_BLANKS;
10373
10374 /*
10375 * We must have the VersionInfo here.
10376 */
10377 version = xmlParseVersionInfo(ctxt);
10378 if (version == NULL) {
10379 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10380 } else {
10381 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10382 /*
10383 * Changed here for XML-1.0 5th edition
10384 */
10385 if (ctxt->options & XML_PARSE_OLD10) {
10386 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10387 "Unsupported version '%s'\n",
10388 version);
10389 } else {
10390 if ((version[0] == '1') && ((version[1] == '.'))) {
10391 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10392 "Unsupported version '%s'\n",
10393 version, NULL);
10394 } else {
10395 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10396 "Unsupported version '%s'\n",
10397 version);
10398 }
10399 }
10400 }
10401 if (ctxt->version != NULL)
10402 xmlFree((void *) ctxt->version);
10403 ctxt->version = version;
10404 }
10405
10406 /*
10407 * We may have the encoding declaration
10408 */
10409 if (!IS_BLANK_CH(RAW)) {
10410 if ((RAW == '?') && (NXT(1) == '>')) {
10411 SKIP(2);
10412 return;
10413 }
10414 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10415 }
10416 xmlParseEncodingDecl(ctxt);
10417
10418 /*
10419 * We may have the standalone status.
10420 */
10421 if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10422 if ((RAW == '?') && (NXT(1) == '>')) {
10423 SKIP(2);
10424 return;
10425 }
10426 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10427 }
10428
10429 /*
10430 * We can grow the input buffer freely at that point
10431 */
10432 GROW;
10433
10434 SKIP_BLANKS;
10435 ctxt->standalone = xmlParseSDDecl(ctxt);
10436
10437 SKIP_BLANKS;
10438 if ((RAW == '?') && (NXT(1) == '>')) {
10439 SKIP(2);
10440 } else if (RAW == '>') {
10441 /* Deprecated old WD ... */
10442 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10443 NEXT;
10444 } else {
10445 int c;
10446
10447 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10448 while ((PARSER_STOPPED(ctxt) == 0) &&
10449 ((c = CUR) != 0)) {
10450 NEXT;
10451 if (c == '>')
10452 break;
10453 }
10454 }
10455 }
10456
10457 /**
10458 * xmlCtxtGetVersion:
10459 * ctxt: parser context
10460 *
10461 * Available since 2.14.0.
10462 *
10463 * Returns the version from the XML declaration.
10464 */
10465 const xmlChar *
xmlCtxtGetVersion(xmlParserCtxtPtr ctxt)10466 xmlCtxtGetVersion(xmlParserCtxtPtr ctxt) {
10467 if (ctxt == NULL)
10468 return(NULL);
10469
10470 return(ctxt->version);
10471 }
10472
10473 /**
10474 * xmlCtxtGetStandalone:
10475 * ctxt: parser context
10476 *
10477 * Available since 2.14.0.
10478 *
10479 * Returns the value from the standalone document declaration.
10480 */
10481 int
xmlCtxtGetStandalone(xmlParserCtxtPtr ctxt)10482 xmlCtxtGetStandalone(xmlParserCtxtPtr ctxt) {
10483 if (ctxt == NULL)
10484 return(0);
10485
10486 return(ctxt->standalone);
10487 }
10488
10489 /**
10490 * xmlParseMisc:
10491 * @ctxt: an XML parser context
10492 *
10493 * DEPRECATED: Internal function, don't use.
10494 *
10495 * parse an XML Misc* optional field.
10496 *
10497 * [27] Misc ::= Comment | PI | S
10498 */
10499
10500 void
xmlParseMisc(xmlParserCtxtPtr ctxt)10501 xmlParseMisc(xmlParserCtxtPtr ctxt) {
10502 while (PARSER_STOPPED(ctxt) == 0) {
10503 SKIP_BLANKS;
10504 GROW;
10505 if ((RAW == '<') && (NXT(1) == '?')) {
10506 xmlParsePI(ctxt);
10507 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10508 xmlParseComment(ctxt);
10509 } else {
10510 break;
10511 }
10512 }
10513 }
10514
10515 static void
xmlFinishDocument(xmlParserCtxtPtr ctxt)10516 xmlFinishDocument(xmlParserCtxtPtr ctxt) {
10517 xmlDocPtr doc;
10518
10519 /*
10520 * SAX: end of the document processing.
10521 */
10522 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10523 ctxt->sax->endDocument(ctxt->userData);
10524
10525 doc = ctxt->myDoc;
10526 if (doc != NULL) {
10527 if (ctxt->wellFormed) {
10528 doc->properties |= XML_DOC_WELLFORMED;
10529 if (ctxt->valid)
10530 doc->properties |= XML_DOC_DTDVALID;
10531 if (ctxt->nsWellFormed)
10532 doc->properties |= XML_DOC_NSVALID;
10533 }
10534
10535 if (ctxt->options & XML_PARSE_OLD10)
10536 doc->properties |= XML_DOC_OLD10;
10537
10538 /*
10539 * Remove locally kept entity definitions if the tree was not built
10540 */
10541 if (xmlStrEqual(doc->version, SAX_COMPAT_MODE)) {
10542 xmlFreeDoc(doc);
10543 ctxt->myDoc = NULL;
10544 }
10545 }
10546 }
10547
10548 /**
10549 * xmlParseDocument:
10550 * @ctxt: an XML parser context
10551 *
10552 * Parse an XML document and invoke the SAX handlers. This is useful
10553 * if you're only interested in custom SAX callbacks. If you want a
10554 * document tree, use xmlCtxtParseDocument.
10555 *
10556 * Returns 0, -1 in case of error.
10557 */
10558
10559 int
xmlParseDocument(xmlParserCtxtPtr ctxt)10560 xmlParseDocument(xmlParserCtxtPtr ctxt) {
10561 if ((ctxt == NULL) || (ctxt->input == NULL))
10562 return(-1);
10563
10564 GROW;
10565
10566 /*
10567 * SAX: detecting the level.
10568 */
10569 xmlCtxtInitializeLate(ctxt);
10570
10571 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10572 ctxt->sax->setDocumentLocator(ctxt->userData,
10573 (xmlSAXLocator *) &xmlDefaultSAXLocator);
10574 }
10575
10576 xmlDetectEncoding(ctxt);
10577
10578 if (CUR == 0) {
10579 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10580 return(-1);
10581 }
10582
10583 GROW;
10584 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10585
10586 /*
10587 * Note that we will switch encoding on the fly.
10588 */
10589 xmlParseXMLDecl(ctxt);
10590 SKIP_BLANKS;
10591 } else {
10592 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10593 if (ctxt->version == NULL) {
10594 xmlErrMemory(ctxt);
10595 return(-1);
10596 }
10597 }
10598 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10599 ctxt->sax->startDocument(ctxt->userData);
10600 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10601 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10602 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10603 }
10604
10605 /*
10606 * The Misc part of the Prolog
10607 */
10608 xmlParseMisc(ctxt);
10609
10610 /*
10611 * Then possibly doc type declaration(s) and more Misc
10612 * (doctypedecl Misc*)?
10613 */
10614 GROW;
10615 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10616
10617 ctxt->inSubset = 1;
10618 xmlParseDocTypeDecl(ctxt);
10619 if (RAW == '[') {
10620 xmlParseInternalSubset(ctxt);
10621 }
10622
10623 /*
10624 * Create and update the external subset.
10625 */
10626 ctxt->inSubset = 2;
10627 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10628 (!ctxt->disableSAX))
10629 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10630 ctxt->extSubSystem, ctxt->extSubURI);
10631 ctxt->inSubset = 0;
10632
10633 xmlCleanSpecialAttr(ctxt);
10634
10635 xmlParseMisc(ctxt);
10636 }
10637
10638 /*
10639 * Time to start parsing the tree itself
10640 */
10641 GROW;
10642 if (RAW != '<') {
10643 if (ctxt->wellFormed)
10644 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10645 "Start tag expected, '<' not found\n");
10646 } else {
10647 xmlParseElement(ctxt);
10648
10649 /*
10650 * The Misc part at the end
10651 */
10652 xmlParseMisc(ctxt);
10653
10654 if (ctxt->input->cur < ctxt->input->end) {
10655 if (ctxt->wellFormed)
10656 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10657 } else if ((ctxt->input->buf != NULL) &&
10658 (ctxt->input->buf->encoder != NULL) &&
10659 (ctxt->input->buf->error == 0) &&
10660 (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
10661 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
10662 "Truncated multi-byte sequence at EOF\n");
10663 }
10664 }
10665
10666 ctxt->instate = XML_PARSER_EOF;
10667 xmlFinishDocument(ctxt);
10668
10669 if (! ctxt->wellFormed) {
10670 ctxt->valid = 0;
10671 return(-1);
10672 }
10673
10674 return(0);
10675 }
10676
10677 /**
10678 * xmlParseExtParsedEnt:
10679 * @ctxt: an XML parser context
10680 *
10681 * parse a general parsed entity
10682 * An external general parsed entity is well-formed if it matches the
10683 * production labeled extParsedEnt.
10684 *
10685 * [78] extParsedEnt ::= TextDecl? content
10686 *
10687 * Returns 0, -1 in case of error. the parser context is augmented
10688 * as a result of the parsing.
10689 */
10690
10691 int
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt)10692 xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10693 if ((ctxt == NULL) || (ctxt->input == NULL))
10694 return(-1);
10695
10696 xmlCtxtInitializeLate(ctxt);
10697
10698 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
10699 ctxt->sax->setDocumentLocator(ctxt->userData,
10700 (xmlSAXLocator *) &xmlDefaultSAXLocator);
10701 }
10702
10703 xmlDetectEncoding(ctxt);
10704
10705 if (CUR == 0) {
10706 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10707 }
10708
10709 /*
10710 * Check for the XMLDecl in the Prolog.
10711 */
10712 GROW;
10713 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10714
10715 /*
10716 * Note that we will switch encoding on the fly.
10717 */
10718 xmlParseXMLDecl(ctxt);
10719 SKIP_BLANKS;
10720 } else {
10721 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10722 }
10723 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10724 ctxt->sax->startDocument(ctxt->userData);
10725
10726 /*
10727 * Doing validity checking on chunk doesn't make sense
10728 */
10729 ctxt->options &= ~XML_PARSE_DTDVALID;
10730 ctxt->validate = 0;
10731 ctxt->depth = 0;
10732
10733 xmlParseContentInternal(ctxt);
10734
10735 if (ctxt->input->cur < ctxt->input->end)
10736 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10737
10738 /*
10739 * SAX: end of the document processing.
10740 */
10741 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10742 ctxt->sax->endDocument(ctxt->userData);
10743
10744 if (! ctxt->wellFormed) return(-1);
10745 return(0);
10746 }
10747
10748 #ifdef LIBXML_PUSH_ENABLED
10749 /************************************************************************
10750 * *
10751 * Progressive parsing interfaces *
10752 * *
10753 ************************************************************************/
10754
10755 /**
10756 * xmlParseLookupChar:
10757 * @ctxt: an XML parser context
10758 * @c: character
10759 *
10760 * Check whether the input buffer contains a character.
10761 */
10762 static int
xmlParseLookupChar(xmlParserCtxtPtr ctxt,int c)10763 xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10764 const xmlChar *cur;
10765
10766 if (ctxt->checkIndex == 0) {
10767 cur = ctxt->input->cur + 1;
10768 } else {
10769 cur = ctxt->input->cur + ctxt->checkIndex;
10770 }
10771
10772 if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10773 size_t index = ctxt->input->end - ctxt->input->cur;
10774
10775 if (index > LONG_MAX) {
10776 ctxt->checkIndex = 0;
10777 return(1);
10778 }
10779 ctxt->checkIndex = index;
10780 return(0);
10781 } else {
10782 ctxt->checkIndex = 0;
10783 return(1);
10784 }
10785 }
10786
10787 /**
10788 * xmlParseLookupString:
10789 * @ctxt: an XML parser context
10790 * @startDelta: delta to apply at the start
10791 * @str: string
10792 * @strLen: length of string
10793 *
10794 * Check whether the input buffer contains a string.
10795 */
10796 static const xmlChar *
xmlParseLookupString(xmlParserCtxtPtr ctxt,size_t startDelta,const char * str,size_t strLen)10797 xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10798 const char *str, size_t strLen) {
10799 const xmlChar *cur, *term;
10800
10801 if (ctxt->checkIndex == 0) {
10802 cur = ctxt->input->cur + startDelta;
10803 } else {
10804 cur = ctxt->input->cur + ctxt->checkIndex;
10805 }
10806
10807 term = BAD_CAST strstr((const char *) cur, str);
10808 if (term == NULL) {
10809 const xmlChar *end = ctxt->input->end;
10810 size_t index;
10811
10812 /* Rescan (strLen - 1) characters. */
10813 if ((size_t) (end - cur) < strLen)
10814 end = cur;
10815 else
10816 end -= strLen - 1;
10817 index = end - ctxt->input->cur;
10818 if (index > LONG_MAX) {
10819 ctxt->checkIndex = 0;
10820 return(ctxt->input->end - strLen);
10821 }
10822 ctxt->checkIndex = index;
10823 } else {
10824 ctxt->checkIndex = 0;
10825 }
10826
10827 return(term);
10828 }
10829
10830 /**
10831 * xmlParseLookupCharData:
10832 * @ctxt: an XML parser context
10833 *
10834 * Check whether the input buffer contains terminated char data.
10835 */
10836 static int
xmlParseLookupCharData(xmlParserCtxtPtr ctxt)10837 xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10838 const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10839 const xmlChar *end = ctxt->input->end;
10840 size_t index;
10841
10842 while (cur < end) {
10843 if ((*cur == '<') || (*cur == '&')) {
10844 ctxt->checkIndex = 0;
10845 return(1);
10846 }
10847 cur++;
10848 }
10849
10850 index = cur - ctxt->input->cur;
10851 if (index > LONG_MAX) {
10852 ctxt->checkIndex = 0;
10853 return(1);
10854 }
10855 ctxt->checkIndex = index;
10856 return(0);
10857 }
10858
10859 /**
10860 * xmlParseLookupGt:
10861 * @ctxt: an XML parser context
10862 *
10863 * Check whether there's enough data in the input buffer to finish parsing
10864 * a start tag. This has to take quotes into account.
10865 */
10866 static int
xmlParseLookupGt(xmlParserCtxtPtr ctxt)10867 xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10868 const xmlChar *cur;
10869 const xmlChar *end = ctxt->input->end;
10870 int state = ctxt->endCheckState;
10871 size_t index;
10872
10873 if (ctxt->checkIndex == 0)
10874 cur = ctxt->input->cur + 1;
10875 else
10876 cur = ctxt->input->cur + ctxt->checkIndex;
10877
10878 while (cur < end) {
10879 if (state) {
10880 if (*cur == state)
10881 state = 0;
10882 } else if (*cur == '\'' || *cur == '"') {
10883 state = *cur;
10884 } else if (*cur == '>') {
10885 ctxt->checkIndex = 0;
10886 ctxt->endCheckState = 0;
10887 return(1);
10888 }
10889 cur++;
10890 }
10891
10892 index = cur - ctxt->input->cur;
10893 if (index > LONG_MAX) {
10894 ctxt->checkIndex = 0;
10895 ctxt->endCheckState = 0;
10896 return(1);
10897 }
10898 ctxt->checkIndex = index;
10899 ctxt->endCheckState = state;
10900 return(0);
10901 }
10902
10903 /**
10904 * xmlParseLookupInternalSubset:
10905 * @ctxt: an XML parser context
10906 *
10907 * Check whether there's enough data in the input buffer to finish parsing
10908 * the internal subset.
10909 */
10910 static int
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt)10911 xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10912 /*
10913 * Sorry, but progressive parsing of the internal subset is not
10914 * supported. We first check that the full content of the internal
10915 * subset is available and parsing is launched only at that point.
10916 * Internal subset ends with "']' S? '>'" in an unescaped section and
10917 * not in a ']]>' sequence which are conditional sections.
10918 */
10919 const xmlChar *cur, *start;
10920 const xmlChar *end = ctxt->input->end;
10921 int state = ctxt->endCheckState;
10922 size_t index;
10923
10924 if (ctxt->checkIndex == 0) {
10925 cur = ctxt->input->cur + 1;
10926 } else {
10927 cur = ctxt->input->cur + ctxt->checkIndex;
10928 }
10929 start = cur;
10930
10931 while (cur < end) {
10932 if (state == '-') {
10933 if ((*cur == '-') &&
10934 (cur[1] == '-') &&
10935 (cur[2] == '>')) {
10936 state = 0;
10937 cur += 3;
10938 start = cur;
10939 continue;
10940 }
10941 }
10942 else if (state == ']') {
10943 if (*cur == '>') {
10944 ctxt->checkIndex = 0;
10945 ctxt->endCheckState = 0;
10946 return(1);
10947 }
10948 if (IS_BLANK_CH(*cur)) {
10949 state = ' ';
10950 } else if (*cur != ']') {
10951 state = 0;
10952 start = cur;
10953 continue;
10954 }
10955 }
10956 else if (state == ' ') {
10957 if (*cur == '>') {
10958 ctxt->checkIndex = 0;
10959 ctxt->endCheckState = 0;
10960 return(1);
10961 }
10962 if (!IS_BLANK_CH(*cur)) {
10963 state = 0;
10964 start = cur;
10965 continue;
10966 }
10967 }
10968 else if (state != 0) {
10969 if (*cur == state) {
10970 state = 0;
10971 start = cur + 1;
10972 }
10973 }
10974 else if (*cur == '<') {
10975 if ((cur[1] == '!') &&
10976 (cur[2] == '-') &&
10977 (cur[3] == '-')) {
10978 state = '-';
10979 cur += 4;
10980 /* Don't treat <!--> as comment */
10981 start = cur;
10982 continue;
10983 }
10984 }
10985 else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10986 state = *cur;
10987 }
10988
10989 cur++;
10990 }
10991
10992 /*
10993 * Rescan the three last characters to detect "<!--" and "-->"
10994 * split across chunks.
10995 */
10996 if ((state == 0) || (state == '-')) {
10997 if (cur - start < 3)
10998 cur = start;
10999 else
11000 cur -= 3;
11001 }
11002 index = cur - ctxt->input->cur;
11003 if (index > LONG_MAX) {
11004 ctxt->checkIndex = 0;
11005 ctxt->endCheckState = 0;
11006 return(1);
11007 }
11008 ctxt->checkIndex = index;
11009 ctxt->endCheckState = state;
11010 return(0);
11011 }
11012
11013 /**
11014 * xmlParseTryOrFinish:
11015 * @ctxt: an XML parser context
11016 * @terminate: last chunk indicator
11017 *
11018 * Try to progress on parsing
11019 *
11020 * Returns zero if no parsing was possible
11021 */
11022 static int
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt,int terminate)11023 xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11024 int ret = 0;
11025 size_t avail;
11026 xmlChar cur, next;
11027
11028 if (ctxt->input == NULL)
11029 return(0);
11030
11031 if ((ctxt->input != NULL) &&
11032 (ctxt->input->cur - ctxt->input->base > 4096)) {
11033 xmlParserShrink(ctxt);
11034 }
11035
11036 while (ctxt->disableSAX == 0) {
11037 avail = ctxt->input->end - ctxt->input->cur;
11038 if (avail < 1)
11039 goto done;
11040 switch (ctxt->instate) {
11041 case XML_PARSER_EOF:
11042 /*
11043 * Document parsing is done !
11044 */
11045 goto done;
11046 case XML_PARSER_START:
11047 /*
11048 * Very first chars read from the document flow.
11049 */
11050 if ((!terminate) && (avail < 4))
11051 goto done;
11052
11053 /*
11054 * We need more bytes to detect EBCDIC code pages.
11055 * See xmlDetectEBCDIC.
11056 */
11057 if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11058 (!terminate) && (avail < 200))
11059 goto done;
11060
11061 xmlDetectEncoding(ctxt);
11062 ctxt->instate = XML_PARSER_XML_DECL;
11063 break;
11064
11065 case XML_PARSER_XML_DECL:
11066 if ((!terminate) && (avail < 2))
11067 goto done;
11068 cur = ctxt->input->cur[0];
11069 next = ctxt->input->cur[1];
11070 if ((cur == '<') && (next == '?')) {
11071 /* PI or XML decl */
11072 if ((!terminate) &&
11073 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11074 goto done;
11075 if ((ctxt->input->cur[2] == 'x') &&
11076 (ctxt->input->cur[3] == 'm') &&
11077 (ctxt->input->cur[4] == 'l') &&
11078 (IS_BLANK_CH(ctxt->input->cur[5]))) {
11079 ret += 5;
11080 xmlParseXMLDecl(ctxt);
11081 } else {
11082 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11083 if (ctxt->version == NULL) {
11084 xmlErrMemory(ctxt);
11085 break;
11086 }
11087 }
11088 } else {
11089 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11090 if (ctxt->version == NULL) {
11091 xmlErrMemory(ctxt);
11092 break;
11093 }
11094 }
11095 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
11096 ctxt->sax->setDocumentLocator(ctxt->userData,
11097 (xmlSAXLocator *) &xmlDefaultSAXLocator);
11098 }
11099 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11100 (!ctxt->disableSAX))
11101 ctxt->sax->startDocument(ctxt->userData);
11102 ctxt->instate = XML_PARSER_MISC;
11103 break;
11104 case XML_PARSER_START_TAG: {
11105 const xmlChar *name;
11106 const xmlChar *prefix = NULL;
11107 const xmlChar *URI = NULL;
11108 int line = ctxt->input->line;
11109 int nbNs = 0;
11110
11111 if ((!terminate) && (avail < 2))
11112 goto done;
11113 cur = ctxt->input->cur[0];
11114 if (cur != '<') {
11115 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11116 "Start tag expected, '<' not found");
11117 ctxt->instate = XML_PARSER_EOF;
11118 xmlFinishDocument(ctxt);
11119 goto done;
11120 }
11121 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11122 goto done;
11123 if (ctxt->spaceNr == 0)
11124 spacePush(ctxt, -1);
11125 else if (*ctxt->space == -2)
11126 spacePush(ctxt, -1);
11127 else
11128 spacePush(ctxt, *ctxt->space);
11129 #ifdef LIBXML_SAX1_ENABLED
11130 if (ctxt->sax2)
11131 #endif /* LIBXML_SAX1_ENABLED */
11132 name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11133 #ifdef LIBXML_SAX1_ENABLED
11134 else
11135 name = xmlParseStartTag(ctxt);
11136 #endif /* LIBXML_SAX1_ENABLED */
11137 if (name == NULL) {
11138 spacePop(ctxt);
11139 ctxt->instate = XML_PARSER_EOF;
11140 xmlFinishDocument(ctxt);
11141 goto done;
11142 }
11143 #ifdef LIBXML_VALID_ENABLED
11144 /*
11145 * [ VC: Root Element Type ]
11146 * The Name in the document type declaration must match
11147 * the element type of the root element.
11148 */
11149 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11150 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11151 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11152 #endif /* LIBXML_VALID_ENABLED */
11153
11154 /*
11155 * Check for an Empty Element.
11156 */
11157 if ((RAW == '/') && (NXT(1) == '>')) {
11158 SKIP(2);
11159
11160 if (ctxt->sax2) {
11161 if ((ctxt->sax != NULL) &&
11162 (ctxt->sax->endElementNs != NULL) &&
11163 (!ctxt->disableSAX))
11164 ctxt->sax->endElementNs(ctxt->userData, name,
11165 prefix, URI);
11166 if (nbNs > 0)
11167 xmlParserNsPop(ctxt, nbNs);
11168 #ifdef LIBXML_SAX1_ENABLED
11169 } else {
11170 if ((ctxt->sax != NULL) &&
11171 (ctxt->sax->endElement != NULL) &&
11172 (!ctxt->disableSAX))
11173 ctxt->sax->endElement(ctxt->userData, name);
11174 #endif /* LIBXML_SAX1_ENABLED */
11175 }
11176 spacePop(ctxt);
11177 } else if (RAW == '>') {
11178 NEXT;
11179 nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11180 } else {
11181 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11182 "Couldn't find end of Start Tag %s\n",
11183 name);
11184 nodePop(ctxt);
11185 spacePop(ctxt);
11186 if (nbNs > 0)
11187 xmlParserNsPop(ctxt, nbNs);
11188 }
11189
11190 if (ctxt->nameNr == 0)
11191 ctxt->instate = XML_PARSER_EPILOG;
11192 else
11193 ctxt->instate = XML_PARSER_CONTENT;
11194 break;
11195 }
11196 case XML_PARSER_CONTENT: {
11197 cur = ctxt->input->cur[0];
11198
11199 if (cur == '<') {
11200 if ((!terminate) && (avail < 2))
11201 goto done;
11202 next = ctxt->input->cur[1];
11203
11204 if (next == '/') {
11205 ctxt->instate = XML_PARSER_END_TAG;
11206 break;
11207 } else if (next == '?') {
11208 if ((!terminate) &&
11209 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11210 goto done;
11211 xmlParsePI(ctxt);
11212 ctxt->instate = XML_PARSER_CONTENT;
11213 break;
11214 } else if (next == '!') {
11215 if ((!terminate) && (avail < 3))
11216 goto done;
11217 next = ctxt->input->cur[2];
11218
11219 if (next == '-') {
11220 if ((!terminate) && (avail < 4))
11221 goto done;
11222 if (ctxt->input->cur[3] == '-') {
11223 if ((!terminate) &&
11224 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11225 goto done;
11226 xmlParseComment(ctxt);
11227 ctxt->instate = XML_PARSER_CONTENT;
11228 break;
11229 }
11230 } else if (next == '[') {
11231 if ((!terminate) && (avail < 9))
11232 goto done;
11233 if ((ctxt->input->cur[2] == '[') &&
11234 (ctxt->input->cur[3] == 'C') &&
11235 (ctxt->input->cur[4] == 'D') &&
11236 (ctxt->input->cur[5] == 'A') &&
11237 (ctxt->input->cur[6] == 'T') &&
11238 (ctxt->input->cur[7] == 'A') &&
11239 (ctxt->input->cur[8] == '[')) {
11240 if ((!terminate) &&
11241 (!xmlParseLookupString(ctxt, 9, "]]>", 3)))
11242 goto done;
11243 ctxt->instate = XML_PARSER_CDATA_SECTION;
11244 xmlParseCDSect(ctxt);
11245 ctxt->instate = XML_PARSER_CONTENT;
11246 break;
11247 }
11248 }
11249 }
11250 } else if (cur == '&') {
11251 if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11252 goto done;
11253 xmlParseReference(ctxt);
11254 break;
11255 } else {
11256 /* TODO Avoid the extra copy, handle directly !!! */
11257 /*
11258 * Goal of the following test is:
11259 * - minimize calls to the SAX 'character' callback
11260 * when they are mergeable
11261 * - handle an problem for isBlank when we only parse
11262 * a sequence of blank chars and the next one is
11263 * not available to check against '<' presence.
11264 * - tries to homogenize the differences in SAX
11265 * callbacks between the push and pull versions
11266 * of the parser.
11267 */
11268 if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11269 if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11270 goto done;
11271 }
11272 ctxt->checkIndex = 0;
11273 xmlParseCharDataInternal(ctxt, !terminate);
11274 break;
11275 }
11276
11277 ctxt->instate = XML_PARSER_START_TAG;
11278 break;
11279 }
11280 case XML_PARSER_END_TAG:
11281 if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11282 goto done;
11283 if (ctxt->sax2) {
11284 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11285 nameNsPop(ctxt);
11286 }
11287 #ifdef LIBXML_SAX1_ENABLED
11288 else
11289 xmlParseEndTag1(ctxt, 0);
11290 #endif /* LIBXML_SAX1_ENABLED */
11291 if (ctxt->nameNr == 0) {
11292 ctxt->instate = XML_PARSER_EPILOG;
11293 } else {
11294 ctxt->instate = XML_PARSER_CONTENT;
11295 }
11296 break;
11297 case XML_PARSER_MISC:
11298 case XML_PARSER_PROLOG:
11299 case XML_PARSER_EPILOG:
11300 SKIP_BLANKS;
11301 avail = ctxt->input->end - ctxt->input->cur;
11302 if (avail < 1)
11303 goto done;
11304 if (ctxt->input->cur[0] == '<') {
11305 if ((!terminate) && (avail < 2))
11306 goto done;
11307 next = ctxt->input->cur[1];
11308 if (next == '?') {
11309 if ((!terminate) &&
11310 (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11311 goto done;
11312 xmlParsePI(ctxt);
11313 break;
11314 } else if (next == '!') {
11315 if ((!terminate) && (avail < 3))
11316 goto done;
11317
11318 if (ctxt->input->cur[2] == '-') {
11319 if ((!terminate) && (avail < 4))
11320 goto done;
11321 if (ctxt->input->cur[3] == '-') {
11322 if ((!terminate) &&
11323 (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11324 goto done;
11325 xmlParseComment(ctxt);
11326 break;
11327 }
11328 } else if (ctxt->instate == XML_PARSER_MISC) {
11329 if ((!terminate) && (avail < 9))
11330 goto done;
11331 if ((ctxt->input->cur[2] == 'D') &&
11332 (ctxt->input->cur[3] == 'O') &&
11333 (ctxt->input->cur[4] == 'C') &&
11334 (ctxt->input->cur[5] == 'T') &&
11335 (ctxt->input->cur[6] == 'Y') &&
11336 (ctxt->input->cur[7] == 'P') &&
11337 (ctxt->input->cur[8] == 'E')) {
11338 if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11339 goto done;
11340 ctxt->inSubset = 1;
11341 xmlParseDocTypeDecl(ctxt);
11342 if (RAW == '[') {
11343 ctxt->instate = XML_PARSER_DTD;
11344 } else {
11345 /*
11346 * Create and update the external subset.
11347 */
11348 ctxt->inSubset = 2;
11349 if ((ctxt->sax != NULL) &&
11350 (!ctxt->disableSAX) &&
11351 (ctxt->sax->externalSubset != NULL))
11352 ctxt->sax->externalSubset(
11353 ctxt->userData,
11354 ctxt->intSubName,
11355 ctxt->extSubSystem,
11356 ctxt->extSubURI);
11357 ctxt->inSubset = 0;
11358 xmlCleanSpecialAttr(ctxt);
11359 ctxt->instate = XML_PARSER_PROLOG;
11360 }
11361 break;
11362 }
11363 }
11364 }
11365 }
11366
11367 if (ctxt->instate == XML_PARSER_EPILOG) {
11368 if (ctxt->errNo == XML_ERR_OK)
11369 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11370 ctxt->instate = XML_PARSER_EOF;
11371 xmlFinishDocument(ctxt);
11372 } else {
11373 ctxt->instate = XML_PARSER_START_TAG;
11374 }
11375 break;
11376 case XML_PARSER_DTD: {
11377 if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11378 goto done;
11379 xmlParseInternalSubset(ctxt);
11380 ctxt->inSubset = 2;
11381 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11382 (ctxt->sax->externalSubset != NULL))
11383 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11384 ctxt->extSubSystem, ctxt->extSubURI);
11385 ctxt->inSubset = 0;
11386 xmlCleanSpecialAttr(ctxt);
11387 ctxt->instate = XML_PARSER_PROLOG;
11388 break;
11389 }
11390 default:
11391 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
11392 "PP: internal error\n");
11393 ctxt->instate = XML_PARSER_EOF;
11394 break;
11395 }
11396 }
11397 done:
11398 return(ret);
11399 }
11400
11401 /**
11402 * xmlParseChunk:
11403 * @ctxt: an XML parser context
11404 * @chunk: chunk of memory
11405 * @size: size of chunk in bytes
11406 * @terminate: last chunk indicator
11407 *
11408 * Parse a chunk of memory in push parser mode.
11409 *
11410 * Assumes that the parser context was initialized with
11411 * xmlCreatePushParserCtxt.
11412 *
11413 * The last chunk, which will often be empty, must be marked with
11414 * the @terminate flag. With the default SAX callbacks, the resulting
11415 * document will be available in ctxt->myDoc. This pointer will not
11416 * be freed by the library.
11417 *
11418 * If the document isn't well-formed, ctxt->myDoc is set to NULL.
11419 * The push parser doesn't support recovery mode.
11420 *
11421 * Returns an xmlParserErrors code (0 on success).
11422 */
11423 int
xmlParseChunk(xmlParserCtxtPtr ctxt,const char * chunk,int size,int terminate)11424 xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11425 int terminate) {
11426 size_t curBase;
11427 size_t maxLength;
11428 size_t pos;
11429 int end_in_lf = 0;
11430 int res;
11431
11432 if ((ctxt == NULL) || (size < 0))
11433 return(XML_ERR_ARGUMENT);
11434 if ((chunk == NULL) && (size > 0))
11435 return(XML_ERR_ARGUMENT);
11436 if ((ctxt->input == NULL) || (ctxt->input->buf == NULL))
11437 return(XML_ERR_ARGUMENT);
11438 if (ctxt->disableSAX != 0)
11439 return(ctxt->errNo);
11440
11441 ctxt->input->flags |= XML_INPUT_PROGRESSIVE;
11442 if (ctxt->instate == XML_PARSER_START)
11443 xmlCtxtInitializeLate(ctxt);
11444 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11445 (chunk[size - 1] == '\r')) {
11446 end_in_lf = 1;
11447 size--;
11448 }
11449
11450 /*
11451 * Also push an empty chunk to make sure that the raw buffer
11452 * will be flushed if there is an encoder.
11453 */
11454 pos = ctxt->input->cur - ctxt->input->base;
11455 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11456 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11457 if (res < 0) {
11458 xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11459 xmlHaltParser(ctxt);
11460 return(ctxt->errNo);
11461 }
11462
11463 xmlParseTryOrFinish(ctxt, terminate);
11464
11465 curBase = ctxt->input->cur - ctxt->input->base;
11466 maxLength = (ctxt->options & XML_PARSE_HUGE) ?
11467 XML_MAX_HUGE_LENGTH :
11468 XML_MAX_LOOKUP_LIMIT;
11469 if (curBase > maxLength) {
11470 xmlFatalErr(ctxt, XML_ERR_RESOURCE_LIMIT,
11471 "Buffer size limit exceeded, try XML_PARSE_HUGE\n");
11472 xmlHaltParser(ctxt);
11473 }
11474
11475 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX != 0))
11476 return(ctxt->errNo);
11477
11478 if (end_in_lf == 1) {
11479 pos = ctxt->input->cur - ctxt->input->base;
11480 res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11481 xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
11482 if (res < 0) {
11483 xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL);
11484 xmlHaltParser(ctxt);
11485 return(ctxt->errNo);
11486 }
11487 }
11488 if (terminate) {
11489 /*
11490 * Check for termination
11491 */
11492 if ((ctxt->instate != XML_PARSER_EOF) &&
11493 (ctxt->instate != XML_PARSER_EPILOG)) {
11494 if (ctxt->nameNr > 0) {
11495 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
11496 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
11497 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
11498 "Premature end of data in tag %s line %d\n",
11499 name, line, NULL);
11500 } else if (ctxt->instate == XML_PARSER_START) {
11501 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11502 } else {
11503 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11504 "Start tag expected, '<' not found\n");
11505 }
11506 } else if ((ctxt->input->buf->encoder != NULL) &&
11507 (ctxt->input->buf->error == 0) &&
11508 (!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11509 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11510 "Truncated multi-byte sequence at EOF\n");
11511 }
11512 if (ctxt->instate != XML_PARSER_EOF) {
11513 ctxt->instate = XML_PARSER_EOF;
11514 xmlFinishDocument(ctxt);
11515 }
11516 }
11517 if (ctxt->wellFormed == 0)
11518 return((xmlParserErrors) ctxt->errNo);
11519 else
11520 return(0);
11521 }
11522
11523 /************************************************************************
11524 * *
11525 * I/O front end functions to the parser *
11526 * *
11527 ************************************************************************/
11528
11529 /**
11530 * xmlCreatePushParserCtxt:
11531 * @sax: a SAX handler (optional)
11532 * @user_data: user data for SAX callbacks (optional)
11533 * @chunk: initial chunk (optional, deprecated)
11534 * @size: size of initial chunk in bytes
11535 * @filename: file name or URI (optional)
11536 *
11537 * Create a parser context for using the XML parser in push mode.
11538 * See xmlParseChunk.
11539 *
11540 * Passing an initial chunk is useless and deprecated.
11541 *
11542 * @filename is used as base URI to fetch external entities and for
11543 * error reports.
11544 *
11545 * Returns the new parser context or NULL if a memory allocation
11546 * failed.
11547 */
11548
11549 xmlParserCtxtPtr
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,void * user_data,const char * chunk,int size,const char * filename)11550 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11551 const char *chunk, int size, const char *filename) {
11552 xmlParserCtxtPtr ctxt;
11553 xmlParserInputPtr input;
11554
11555 ctxt = xmlNewSAXParserCtxt(sax, user_data);
11556 if (ctxt == NULL)
11557 return(NULL);
11558
11559 ctxt->options &= ~XML_PARSE_NODICT;
11560 ctxt->dictNames = 1;
11561
11562 input = xmlNewPushInput(filename, chunk, size);
11563 if (input == NULL) {
11564 xmlFreeParserCtxt(ctxt);
11565 return(NULL);
11566 }
11567 if (inputPush(ctxt, input) < 0) {
11568 xmlFreeInputStream(input);
11569 xmlFreeParserCtxt(ctxt);
11570 return(NULL);
11571 }
11572
11573 return(ctxt);
11574 }
11575 #endif /* LIBXML_PUSH_ENABLED */
11576
11577 /**
11578 * xmlStopParser:
11579 * @ctxt: an XML parser context
11580 *
11581 * Blocks further parser processing
11582 */
11583 void
xmlStopParser(xmlParserCtxtPtr ctxt)11584 xmlStopParser(xmlParserCtxtPtr ctxt) {
11585 if (ctxt == NULL)
11586 return;
11587 xmlHaltParser(ctxt);
11588 if (ctxt->errNo != XML_ERR_NO_MEMORY)
11589 ctxt->errNo = XML_ERR_USER_STOP;
11590 }
11591
11592 /**
11593 * xmlCreateIOParserCtxt:
11594 * @sax: a SAX handler (optional)
11595 * @user_data: user data for SAX callbacks (optional)
11596 * @ioread: an I/O read function
11597 * @ioclose: an I/O close function (optional)
11598 * @ioctx: an I/O handler
11599 * @enc: the charset encoding if known (deprecated)
11600 *
11601 * Create a parser context for using the XML parser with an existing
11602 * I/O stream
11603 *
11604 * Returns the new parser context or NULL
11605 */
11606 xmlParserCtxtPtr
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax,void * user_data,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,xmlCharEncoding enc)11607 xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11608 xmlInputReadCallback ioread,
11609 xmlInputCloseCallback ioclose,
11610 void *ioctx, xmlCharEncoding enc) {
11611 xmlParserCtxtPtr ctxt;
11612 xmlParserInputPtr input;
11613 const char *encoding;
11614
11615 ctxt = xmlNewSAXParserCtxt(sax, user_data);
11616 if (ctxt == NULL)
11617 return(NULL);
11618
11619 encoding = xmlGetCharEncodingName(enc);
11620 input = xmlCtxtNewInputFromIO(ctxt, NULL, ioread, ioclose, ioctx,
11621 encoding, 0);
11622 if (input == NULL) {
11623 xmlFreeParserCtxt(ctxt);
11624 return (NULL);
11625 }
11626 if (inputPush(ctxt, input) < 0) {
11627 xmlFreeInputStream(input);
11628 xmlFreeParserCtxt(ctxt);
11629 return(NULL);
11630 }
11631
11632 return(ctxt);
11633 }
11634
11635 #ifdef LIBXML_VALID_ENABLED
11636 /************************************************************************
11637 * *
11638 * Front ends when parsing a DTD *
11639 * *
11640 ************************************************************************/
11641
11642 /**
11643 * xmlIOParseDTD:
11644 * @sax: the SAX handler block or NULL
11645 * @input: an Input Buffer
11646 * @enc: the charset encoding if known
11647 *
11648 * Load and parse a DTD
11649 *
11650 * Returns the resulting xmlDtdPtr or NULL in case of error.
11651 * @input will be freed by the function in any case.
11652 */
11653
11654 xmlDtdPtr
xmlIOParseDTD(xmlSAXHandlerPtr sax,xmlParserInputBufferPtr input,xmlCharEncoding enc)11655 xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11656 xmlCharEncoding enc) {
11657 xmlDtdPtr ret = NULL;
11658 xmlParserCtxtPtr ctxt;
11659 xmlParserInputPtr pinput = NULL;
11660
11661 if (input == NULL)
11662 return(NULL);
11663
11664 ctxt = xmlNewSAXParserCtxt(sax, NULL);
11665 if (ctxt == NULL) {
11666 xmlFreeParserInputBuffer(input);
11667 return(NULL);
11668 }
11669
11670 /*
11671 * generate a parser input from the I/O handler
11672 */
11673
11674 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
11675 if (pinput == NULL) {
11676 xmlFreeParserInputBuffer(input);
11677 xmlFreeParserCtxt(ctxt);
11678 return(NULL);
11679 }
11680
11681 /*
11682 * plug some encoding conversion routines here.
11683 */
11684 if (xmlPushInput(ctxt, pinput) < 0) {
11685 xmlFreeInputStream(pinput);
11686 xmlFreeParserCtxt(ctxt);
11687 return(NULL);
11688 }
11689 if (enc != XML_CHAR_ENCODING_NONE) {
11690 xmlSwitchEncoding(ctxt, enc);
11691 }
11692
11693 /*
11694 * let's parse that entity knowing it's an external subset.
11695 */
11696 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11697 if (ctxt->myDoc == NULL) {
11698 xmlErrMemory(ctxt);
11699 return(NULL);
11700 }
11701 ctxt->myDoc->properties = XML_DOC_INTERNAL;
11702 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11703 BAD_CAST "none", BAD_CAST "none");
11704
11705 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11706
11707 if (ctxt->myDoc != NULL) {
11708 if (ctxt->wellFormed) {
11709 ret = ctxt->myDoc->extSubset;
11710 ctxt->myDoc->extSubset = NULL;
11711 if (ret != NULL) {
11712 xmlNodePtr tmp;
11713
11714 ret->doc = NULL;
11715 tmp = ret->children;
11716 while (tmp != NULL) {
11717 tmp->doc = NULL;
11718 tmp = tmp->next;
11719 }
11720 }
11721 } else {
11722 ret = NULL;
11723 }
11724 xmlFreeDoc(ctxt->myDoc);
11725 ctxt->myDoc = NULL;
11726 }
11727 xmlFreeParserCtxt(ctxt);
11728
11729 return(ret);
11730 }
11731
11732 /**
11733 * xmlSAXParseDTD:
11734 * @sax: the SAX handler block
11735 * @ExternalID: a NAME* containing the External ID of the DTD
11736 * @SystemID: a NAME* containing the URL to the DTD
11737 *
11738 * DEPRECATED: Don't use.
11739 *
11740 * Load and parse an external subset.
11741 *
11742 * Returns the resulting xmlDtdPtr or NULL in case of error.
11743 */
11744
11745 xmlDtdPtr
xmlSAXParseDTD(xmlSAXHandlerPtr sax,const xmlChar * ExternalID,const xmlChar * SystemID)11746 xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11747 const xmlChar *SystemID) {
11748 xmlDtdPtr ret = NULL;
11749 xmlParserCtxtPtr ctxt;
11750 xmlParserInputPtr input = NULL;
11751 xmlChar* systemIdCanonic;
11752
11753 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11754
11755 ctxt = xmlNewSAXParserCtxt(sax, NULL);
11756 if (ctxt == NULL) {
11757 return(NULL);
11758 }
11759
11760 /*
11761 * Canonicalise the system ID
11762 */
11763 systemIdCanonic = xmlCanonicPath(SystemID);
11764 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
11765 xmlFreeParserCtxt(ctxt);
11766 return(NULL);
11767 }
11768
11769 /*
11770 * Ask the Entity resolver to load the damn thing
11771 */
11772
11773 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
11774 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11775 systemIdCanonic);
11776 if (input == NULL) {
11777 xmlFreeParserCtxt(ctxt);
11778 if (systemIdCanonic != NULL)
11779 xmlFree(systemIdCanonic);
11780 return(NULL);
11781 }
11782
11783 /*
11784 * plug some encoding conversion routines here.
11785 */
11786 if (xmlPushInput(ctxt, input) < 0) {
11787 xmlFreeInputStream(input);
11788 xmlFreeParserCtxt(ctxt);
11789 if (systemIdCanonic != NULL)
11790 xmlFree(systemIdCanonic);
11791 return(NULL);
11792 }
11793
11794 xmlDetectEncoding(ctxt);
11795
11796 if (input->filename == NULL)
11797 input->filename = (char *) systemIdCanonic;
11798 else
11799 xmlFree(systemIdCanonic);
11800
11801 /*
11802 * let's parse that entity knowing it's an external subset.
11803 */
11804 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
11805 if (ctxt->myDoc == NULL) {
11806 xmlErrMemory(ctxt);
11807 xmlFreeParserCtxt(ctxt);
11808 return(NULL);
11809 }
11810 ctxt->myDoc->properties = XML_DOC_INTERNAL;
11811 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11812 ExternalID, SystemID);
11813 if (ctxt->myDoc->extSubset == NULL) {
11814 xmlFreeDoc(ctxt->myDoc);
11815 xmlFreeParserCtxt(ctxt);
11816 return(NULL);
11817 }
11818 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11819
11820 if (ctxt->myDoc != NULL) {
11821 if (ctxt->wellFormed) {
11822 ret = ctxt->myDoc->extSubset;
11823 ctxt->myDoc->extSubset = NULL;
11824 if (ret != NULL) {
11825 xmlNodePtr tmp;
11826
11827 ret->doc = NULL;
11828 tmp = ret->children;
11829 while (tmp != NULL) {
11830 tmp->doc = NULL;
11831 tmp = tmp->next;
11832 }
11833 }
11834 } else {
11835 ret = NULL;
11836 }
11837 xmlFreeDoc(ctxt->myDoc);
11838 ctxt->myDoc = NULL;
11839 }
11840 xmlFreeParserCtxt(ctxt);
11841
11842 return(ret);
11843 }
11844
11845
11846 /**
11847 * xmlParseDTD:
11848 * @ExternalID: a NAME* containing the External ID of the DTD
11849 * @SystemID: a NAME* containing the URL to the DTD
11850 *
11851 * Load and parse an external subset.
11852 *
11853 * Returns the resulting xmlDtdPtr or NULL in case of error.
11854 */
11855
11856 xmlDtdPtr
xmlParseDTD(const xmlChar * ExternalID,const xmlChar * SystemID)11857 xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11858 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11859 }
11860 #endif /* LIBXML_VALID_ENABLED */
11861
11862 /************************************************************************
11863 * *
11864 * Front ends when parsing an Entity *
11865 * *
11866 ************************************************************************/
11867
11868 static xmlNodePtr
xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,int hasTextDecl,int buildTree)11869 xmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
11870 int hasTextDecl, int buildTree) {
11871 xmlNodePtr root = NULL;
11872 xmlNodePtr list = NULL;
11873 xmlChar *rootName = BAD_CAST "#root";
11874 int result;
11875
11876 if (buildTree) {
11877 root = xmlNewDocNode(ctxt->myDoc, NULL, rootName, NULL);
11878 if (root == NULL) {
11879 xmlErrMemory(ctxt);
11880 goto error;
11881 }
11882 }
11883
11884 if (xmlPushInput(ctxt, input) < 0)
11885 goto error;
11886
11887 nameNsPush(ctxt, rootName, NULL, NULL, 0, 0);
11888 spacePush(ctxt, -1);
11889
11890 if (buildTree)
11891 nodePush(ctxt, root);
11892
11893 if (hasTextDecl) {
11894 xmlDetectEncoding(ctxt);
11895
11896 /*
11897 * Parse a possible text declaration first
11898 */
11899 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
11900 (IS_BLANK_CH(NXT(5)))) {
11901 xmlParseTextDecl(ctxt);
11902 /*
11903 * An XML-1.0 document can't reference an entity not XML-1.0
11904 */
11905 if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
11906 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
11907 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
11908 "Version mismatch between document and "
11909 "entity\n");
11910 }
11911 }
11912 }
11913
11914 xmlParseContentInternal(ctxt);
11915
11916 if (ctxt->input->cur < ctxt->input->end)
11917 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11918
11919 if ((ctxt->wellFormed) ||
11920 ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
11921 if (root != NULL) {
11922 xmlNodePtr cur;
11923
11924 /*
11925 * Unlink newly created node list.
11926 */
11927 list = root->children;
11928 root->children = NULL;
11929 root->last = NULL;
11930 for (cur = list; cur != NULL; cur = cur->next)
11931 cur->parent = NULL;
11932 }
11933 }
11934
11935 /*
11936 * Read the rest of the stream in case of errors. We want
11937 * to account for the whole entity size.
11938 */
11939 do {
11940 ctxt->input->cur = ctxt->input->end;
11941 xmlParserShrink(ctxt);
11942 result = xmlParserGrow(ctxt);
11943 } while (result > 0);
11944
11945 if (buildTree)
11946 nodePop(ctxt);
11947
11948 namePop(ctxt);
11949 spacePop(ctxt);
11950
11951 /* xmlPopInput would free the stream */
11952 inputPop(ctxt);
11953
11954 error:
11955 xmlFreeNode(root);
11956
11957 return(list);
11958 }
11959
11960 static void
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt,xmlEntityPtr ent)11961 xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
11962 xmlParserInputPtr input;
11963 xmlNodePtr list;
11964 unsigned long consumed;
11965 int isExternal;
11966 int buildTree;
11967 int oldMinNsIndex;
11968 int oldNodelen, oldNodemem;
11969
11970 isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
11971 buildTree = (ctxt->node != NULL);
11972
11973 /*
11974 * Recursion check
11975 */
11976 if (ent->flags & XML_ENT_EXPANDING) {
11977 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
11978 xmlHaltParser(ctxt);
11979 goto error;
11980 }
11981
11982 /*
11983 * Load entity
11984 */
11985 input = xmlNewEntityInputStream(ctxt, ent);
11986 if (input == NULL)
11987 goto error;
11988
11989 /*
11990 * When building a tree, we need to limit the scope of namespace
11991 * declarations, so that entities don't reference xmlNs structs
11992 * from the parent of a reference.
11993 */
11994 oldMinNsIndex = ctxt->nsdb->minNsIndex;
11995 if (buildTree)
11996 ctxt->nsdb->minNsIndex = ctxt->nsNr;
11997
11998 oldNodelen = ctxt->nodelen;
11999 oldNodemem = ctxt->nodemem;
12000 ctxt->nodelen = 0;
12001 ctxt->nodemem = 0;
12002
12003 /*
12004 * Parse content
12005 *
12006 * This initiates a recursive call chain:
12007 *
12008 * - xmlCtxtParseContentInternal
12009 * - xmlParseContentInternal
12010 * - xmlParseReference
12011 * - xmlCtxtParseEntity
12012 *
12013 * The nesting depth is limited by the maximum number of inputs,
12014 * see xmlPushInput.
12015 *
12016 * It's possible to make this non-recursive (minNsIndex must be
12017 * stored in the input struct) at the expense of code readability.
12018 */
12019
12020 ent->flags |= XML_ENT_EXPANDING;
12021
12022 list = xmlCtxtParseContentInternal(ctxt, input, isExternal, buildTree);
12023
12024 ent->flags &= ~XML_ENT_EXPANDING;
12025
12026 ctxt->nsdb->minNsIndex = oldMinNsIndex;
12027 ctxt->nodelen = oldNodelen;
12028 ctxt->nodemem = oldNodemem;
12029
12030 /*
12031 * Entity size accounting
12032 */
12033 consumed = input->consumed;
12034 xmlSaturatedAddSizeT(&consumed, input->end - input->base);
12035
12036 if ((ent->flags & XML_ENT_CHECKED) == 0)
12037 xmlSaturatedAdd(&ent->expandedSize, consumed);
12038
12039 if ((ent->flags & XML_ENT_PARSED) == 0) {
12040 if (isExternal)
12041 xmlSaturatedAdd(&ctxt->sizeentities, consumed);
12042
12043 ent->children = list;
12044
12045 while (list != NULL) {
12046 list->parent = (xmlNodePtr) ent;
12047 if (list->next == NULL)
12048 ent->last = list;
12049 list = list->next;
12050 }
12051 } else {
12052 xmlFreeNodeList(list);
12053 }
12054
12055 xmlFreeInputStream(input);
12056
12057 error:
12058 ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
12059 }
12060
12061 /**
12062 * xmlParseCtxtExternalEntity:
12063 * @ctxt: the existing parsing context
12064 * @URL: the URL for the entity to load
12065 * @ID: the System ID for the entity to load
12066 * @listOut: the return value for the set of parsed nodes
12067 *
12068 * Parse an external general entity within an existing parsing context
12069 * An external general parsed entity is well-formed if it matches the
12070 * production labeled extParsedEnt.
12071 *
12072 * [78] extParsedEnt ::= TextDecl? content
12073 *
12074 * Returns 0 if the entity is well formed, -1 in case of args problem and
12075 * the parser error code otherwise
12076 */
12077
12078 int
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * listOut)12079 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctxt, const xmlChar *URL,
12080 const xmlChar *ID, xmlNodePtr *listOut) {
12081 xmlParserInputPtr input;
12082 xmlNodePtr list;
12083
12084 if (listOut != NULL)
12085 *listOut = NULL;
12086
12087 if (ctxt == NULL)
12088 return(XML_ERR_ARGUMENT);
12089
12090 input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12091 XML_RESOURCE_GENERAL_ENTITY);
12092 if (input == NULL)
12093 return(ctxt->errNo);
12094
12095 xmlCtxtInitializeLate(ctxt);
12096
12097 list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 1, 1);
12098 if (listOut != NULL)
12099 *listOut = list;
12100 else
12101 xmlFreeNodeList(list);
12102
12103 xmlFreeInputStream(input);
12104 return(ctxt->errNo);
12105 }
12106
12107 #ifdef LIBXML_SAX1_ENABLED
12108 /**
12109 * xmlParseExternalEntity:
12110 * @doc: the document the chunk pertains to
12111 * @sax: the SAX handler block (possibly NULL)
12112 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12113 * @depth: Used for loop detection, use 0
12114 * @URL: the URL for the entity to load
12115 * @ID: the System ID for the entity to load
12116 * @list: the return value for the set of parsed nodes
12117 *
12118 * DEPRECATED: Use xmlParseCtxtExternalEntity.
12119 *
12120 * Parse an external general entity
12121 * An external general parsed entity is well-formed if it matches the
12122 * production labeled extParsedEnt.
12123 *
12124 * [78] extParsedEnt ::= TextDecl? content
12125 *
12126 * Returns 0 if the entity is well formed, -1 in case of args problem and
12127 * the parser error code otherwise
12128 */
12129
12130 int
xmlParseExternalEntity(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * URL,const xmlChar * ID,xmlNodePtr * list)12131 xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12132 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
12133 xmlParserCtxtPtr ctxt;
12134 int ret;
12135
12136 if (list != NULL)
12137 *list = NULL;
12138
12139 if (doc == NULL)
12140 return(XML_ERR_ARGUMENT);
12141
12142 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12143 if (ctxt == NULL)
12144 return(XML_ERR_NO_MEMORY);
12145
12146 ctxt->depth = depth;
12147 ctxt->myDoc = doc;
12148 ret = xmlParseCtxtExternalEntity(ctxt, URL, ID, list);
12149
12150 xmlFreeParserCtxt(ctxt);
12151 return(ret);
12152 }
12153
12154 /**
12155 * xmlParseBalancedChunkMemory:
12156 * @doc: the document the chunk pertains to (must not be NULL)
12157 * @sax: the SAX handler block (possibly NULL)
12158 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12159 * @depth: Used for loop detection, use 0
12160 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12161 * @lst: the return value for the set of parsed nodes
12162 *
12163 * Parse a well-balanced chunk of an XML document
12164 * called by the parser
12165 * The allowed sequence for the Well Balanced Chunk is the one defined by
12166 * the content production in the XML grammar:
12167 *
12168 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12169 *
12170 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12171 * the parser error code otherwise
12172 */
12173
12174 int
xmlParseBalancedChunkMemory(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * lst)12175 xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12176 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12177 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12178 depth, string, lst, 0 );
12179 }
12180 #endif /* LIBXML_SAX1_ENABLED */
12181
12182 /**
12183 * xmlCtxtParseContent:
12184 * @ctxt: parser context
12185 * @input: parser input
12186 * @node: target node or document
12187 * @hasTextDecl: whether to parse text declaration
12188 *
12189 * Parse a well-balanced chunk of XML matching the 'content' production.
12190 *
12191 * Namespaces in scope of @node and entities of @node's document are
12192 * recognized. When validating, the DTD of @node's document is used.
12193 *
12194 * Always consumes @input even in error case.
12195 *
12196 * Available since 2.14.0.
12197 *
12198 * Returns a node list or NULL in case of error.
12199 */
12200 xmlNodePtr
xmlCtxtParseContent(xmlParserCtxtPtr ctxt,xmlParserInputPtr input,xmlNodePtr node,int hasTextDecl)12201 xmlCtxtParseContent(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
12202 xmlNodePtr node, int hasTextDecl) {
12203 xmlDocPtr doc;
12204 xmlNodePtr cur, list = NULL;
12205 int nsnr = 0;
12206 xmlDictPtr oldDict;
12207 int oldOptions, oldDictNames, oldLoadSubset;
12208
12209 if ((ctxt == NULL) || (input == NULL) || (node == NULL)) {
12210 xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12211 goto exit;
12212 }
12213
12214 doc = node->doc;
12215 if (doc == NULL) {
12216 xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12217 goto exit;
12218 }
12219
12220 switch (node->type) {
12221 case XML_ELEMENT_NODE:
12222 case XML_DOCUMENT_NODE:
12223 case XML_HTML_DOCUMENT_NODE:
12224 break;
12225
12226 case XML_ATTRIBUTE_NODE:
12227 case XML_TEXT_NODE:
12228 case XML_CDATA_SECTION_NODE:
12229 case XML_ENTITY_REF_NODE:
12230 case XML_PI_NODE:
12231 case XML_COMMENT_NODE:
12232 for (cur = node->parent; cur != NULL; cur = node->parent) {
12233 if ((cur->type == XML_ELEMENT_NODE) ||
12234 (cur->type == XML_DOCUMENT_NODE) ||
12235 (cur->type == XML_HTML_DOCUMENT_NODE)) {
12236 node = cur;
12237 break;
12238 }
12239 }
12240 break;
12241
12242 default:
12243 xmlFatalErr(ctxt, XML_ERR_ARGUMENT, NULL);
12244 goto exit;
12245 }
12246
12247 #ifdef LIBXML_HTML_ENABLED
12248 if (ctxt->html)
12249 htmlCtxtReset(ctxt);
12250 else
12251 #endif
12252 xmlCtxtReset(ctxt);
12253
12254 oldDict = ctxt->dict;
12255 oldOptions = ctxt->options;
12256 oldDictNames = ctxt->dictNames;
12257 oldLoadSubset = ctxt->loadsubset;
12258
12259 /*
12260 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12261 */
12262 if (doc->dict != NULL) {
12263 ctxt->dict = doc->dict;
12264 } else {
12265 ctxt->options |= XML_PARSE_NODICT;
12266 ctxt->dictNames = 0;
12267 }
12268
12269 /*
12270 * Disable IDs
12271 */
12272 ctxt->loadsubset |= XML_SKIP_IDS;
12273
12274 ctxt->myDoc = doc;
12275
12276 #ifdef LIBXML_HTML_ENABLED
12277 if (ctxt->html) {
12278 /*
12279 * When parsing in context, it makes no sense to add implied
12280 * elements like html/body/etc...
12281 */
12282 ctxt->options |= HTML_PARSE_NOIMPLIED;
12283
12284 list = htmlCtxtParseContentInternal(ctxt, input);
12285 } else
12286 #endif
12287 {
12288 xmlCtxtInitializeLate(ctxt);
12289
12290 /*
12291 * This hack lowers the error level of undeclared entities
12292 * from XML_ERR_FATAL (well-formedness error) to XML_ERR_ERROR
12293 * or XML_ERR_WARNING.
12294 */
12295 ctxt->hasExternalSubset = 1;
12296
12297 /*
12298 * initialize the SAX2 namespaces stack
12299 */
12300 cur = node;
12301 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12302 xmlNsPtr ns = cur->nsDef;
12303 xmlHashedString hprefix, huri;
12304
12305 while (ns != NULL) {
12306 hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
12307 huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
12308 if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
12309 nsnr++;
12310 ns = ns->next;
12311 }
12312 cur = cur->parent;
12313 }
12314
12315 list = xmlCtxtParseContentInternal(ctxt, input, hasTextDecl, 1);
12316
12317 if (nsnr > 0)
12318 xmlParserNsPop(ctxt, nsnr);
12319 }
12320
12321 ctxt->dict = oldDict;
12322 ctxt->options = oldOptions;
12323 ctxt->dictNames = oldDictNames;
12324 ctxt->loadsubset = oldLoadSubset;
12325 ctxt->myDoc = NULL;
12326 ctxt->node = NULL;
12327
12328 exit:
12329 xmlFreeInputStream(input);
12330 return(list);
12331 }
12332
12333 /**
12334 * xmlParseInNodeContext:
12335 * @node: the context node
12336 * @data: the input string
12337 * @datalen: the input string length in bytes
12338 * @options: a combination of xmlParserOption
12339 * @listOut: the return value for the set of parsed nodes
12340 *
12341 * Parse a well-balanced chunk of an XML document
12342 * within the context (DTD, namespaces, etc ...) of the given node.
12343 *
12344 * The allowed sequence for the data is a Well Balanced Chunk defined by
12345 * the content production in the XML grammar:
12346 *
12347 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12348 *
12349 * This function assumes the encoding of @node's document which is
12350 * typically not what you want. A better alternative is
12351 * xmlCtxtParseContent.
12352 *
12353 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12354 * error code otherwise
12355 */
12356 xmlParserErrors
xmlParseInNodeContext(xmlNodePtr node,const char * data,int datalen,int options,xmlNodePtr * listOut)12357 xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12358 int options, xmlNodePtr *listOut) {
12359 xmlParserCtxtPtr ctxt;
12360 xmlParserInputPtr input;
12361 xmlDocPtr doc;
12362 xmlNodePtr list;
12363 xmlParserErrors ret;
12364
12365 if (listOut == NULL)
12366 return(XML_ERR_INTERNAL_ERROR);
12367 *listOut = NULL;
12368
12369 if ((node == NULL) || (data == NULL) || (datalen < 0))
12370 return(XML_ERR_INTERNAL_ERROR);
12371
12372 doc = node->doc;
12373 if (doc == NULL)
12374 return(XML_ERR_INTERNAL_ERROR);
12375
12376 #ifdef LIBXML_HTML_ENABLED
12377 if (doc->type == XML_HTML_DOCUMENT_NODE) {
12378 ctxt = htmlNewParserCtxt();
12379 }
12380 else
12381 #endif
12382 ctxt = xmlNewParserCtxt();
12383
12384 if (ctxt == NULL)
12385 return(XML_ERR_NO_MEMORY);
12386
12387 input = xmlCtxtNewInputFromMemory(ctxt, NULL, data, datalen,
12388 (const char *) doc->encoding,
12389 XML_INPUT_BUF_STATIC);
12390 if (input == NULL) {
12391 xmlFreeParserCtxt(ctxt);
12392 return(XML_ERR_NO_MEMORY);
12393 }
12394
12395 xmlCtxtUseOptions(ctxt, options);
12396
12397 list = xmlCtxtParseContent(ctxt, input, node, /* hasTextDecl */ 0);
12398
12399 if (list == NULL) {
12400 ret = ctxt->errNo;
12401 if (ret == XML_ERR_ARGUMENT)
12402 ret = XML_ERR_INTERNAL_ERROR;
12403 } else {
12404 ret = XML_ERR_OK;
12405 *listOut = list;
12406 }
12407
12408 xmlFreeParserCtxt(ctxt);
12409
12410 return(ret);
12411 }
12412
12413 #ifdef LIBXML_SAX1_ENABLED
12414 /**
12415 * xmlParseBalancedChunkMemoryRecover:
12416 * @doc: the document the chunk pertains to (must not be NULL)
12417 * @sax: the SAX handler block (possibly NULL)
12418 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12419 * @depth: Used for loop detection, use 0
12420 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12421 * @listOut: the return value for the set of parsed nodes
12422 * @recover: return nodes even if the data is broken (use 0)
12423 *
12424 * Parse a well-balanced chunk of an XML document
12425 *
12426 * The allowed sequence for the Well Balanced Chunk is the one defined by
12427 * the content production in the XML grammar:
12428 *
12429 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12430 *
12431 * Returns 0 if the chunk is well balanced, or thehe parser error code
12432 * otherwise.
12433 *
12434 * In case recover is set to 1, the nodelist will not be empty even if
12435 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12436 * some extent.
12437 */
12438 int
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,xmlSAXHandlerPtr sax,void * user_data,int depth,const xmlChar * string,xmlNodePtr * listOut,int recover)12439 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12440 void *user_data, int depth, const xmlChar *string, xmlNodePtr *listOut,
12441 int recover) {
12442 xmlParserCtxtPtr ctxt;
12443 xmlParserInputPtr input;
12444 xmlNodePtr list;
12445 int ret;
12446
12447 if (listOut != NULL)
12448 *listOut = NULL;
12449
12450 if (string == NULL)
12451 return(XML_ERR_ARGUMENT);
12452
12453 ctxt = xmlNewSAXParserCtxt(sax, user_data);
12454 if (ctxt == NULL)
12455 return(XML_ERR_NO_MEMORY);
12456
12457 xmlCtxtInitializeLate(ctxt);
12458
12459 ctxt->depth = depth;
12460 ctxt->myDoc = doc;
12461 if (recover) {
12462 ctxt->options |= XML_PARSE_RECOVER;
12463 ctxt->recovery = 1;
12464 }
12465
12466 input = xmlNewStringInputStream(ctxt, string);
12467 if (input == NULL) {
12468 ret = ctxt->errNo;
12469 goto error;
12470 }
12471
12472 list = xmlCtxtParseContentInternal(ctxt, input, /* hasTextDecl */ 0, 1);
12473 if (listOut != NULL)
12474 *listOut = list;
12475 else
12476 xmlFreeNodeList(list);
12477
12478 if (!ctxt->wellFormed)
12479 ret = ctxt->errNo;
12480 else
12481 ret = XML_ERR_OK;
12482
12483 error:
12484 xmlFreeInputStream(input);
12485 xmlFreeParserCtxt(ctxt);
12486 return(ret);
12487 }
12488
12489 /**
12490 * xmlSAXParseEntity:
12491 * @sax: the SAX handler block
12492 * @filename: the filename
12493 *
12494 * DEPRECATED: Don't use.
12495 *
12496 * parse an XML external entity out of context and build a tree.
12497 * It use the given SAX function block to handle the parsing callback.
12498 * If sax is NULL, fallback to the default DOM tree building routines.
12499 *
12500 * [78] extParsedEnt ::= TextDecl? content
12501 *
12502 * This correspond to a "Well Balanced" chunk
12503 *
12504 * Returns the resulting document tree
12505 */
12506
12507 xmlDocPtr
xmlSAXParseEntity(xmlSAXHandlerPtr sax,const char * filename)12508 xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12509 xmlDocPtr ret;
12510 xmlParserCtxtPtr ctxt;
12511
12512 ctxt = xmlCreateFileParserCtxt(filename);
12513 if (ctxt == NULL) {
12514 return(NULL);
12515 }
12516 if (sax != NULL) {
12517 if (sax->initialized == XML_SAX2_MAGIC) {
12518 *ctxt->sax = *sax;
12519 } else {
12520 memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12521 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12522 }
12523 ctxt->userData = NULL;
12524 }
12525
12526 xmlParseExtParsedEnt(ctxt);
12527
12528 if (ctxt->wellFormed) {
12529 ret = ctxt->myDoc;
12530 } else {
12531 ret = NULL;
12532 xmlFreeDoc(ctxt->myDoc);
12533 }
12534
12535 xmlFreeParserCtxt(ctxt);
12536
12537 return(ret);
12538 }
12539
12540 /**
12541 * xmlParseEntity:
12542 * @filename: the filename
12543 *
12544 * parse an XML external entity out of context and build a tree.
12545 *
12546 * [78] extParsedEnt ::= TextDecl? content
12547 *
12548 * This correspond to a "Well Balanced" chunk
12549 *
12550 * Returns the resulting document tree
12551 */
12552
12553 xmlDocPtr
xmlParseEntity(const char * filename)12554 xmlParseEntity(const char *filename) {
12555 return(xmlSAXParseEntity(NULL, filename));
12556 }
12557 #endif /* LIBXML_SAX1_ENABLED */
12558
12559 /**
12560 * xmlCreateEntityParserCtxt:
12561 * @URL: the entity URL
12562 * @ID: the entity PUBLIC ID
12563 * @base: a possible base for the target URI
12564 *
12565 * DEPRECATED: Don't use.
12566 *
12567 * Create a parser context for an external entity
12568 * Automatic support for ZLIB/Compress compressed document is provided
12569 * by default if found at compile-time.
12570 *
12571 * Returns the new parser context or NULL
12572 */
12573 xmlParserCtxtPtr
xmlCreateEntityParserCtxt(const xmlChar * URL,const xmlChar * ID,const xmlChar * base)12574 xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12575 const xmlChar *base) {
12576 xmlParserCtxtPtr ctxt;
12577 xmlParserInputPtr input;
12578 xmlChar *uri = NULL;
12579
12580 ctxt = xmlNewParserCtxt();
12581 if (ctxt == NULL)
12582 return(NULL);
12583
12584 if (base != NULL) {
12585 if (xmlBuildURISafe(URL, base, &uri) < 0)
12586 goto error;
12587 if (uri != NULL)
12588 URL = uri;
12589 }
12590
12591 input = xmlLoadResource(ctxt, (char *) URL, (char *) ID,
12592 XML_RESOURCE_UNKNOWN);
12593 if (input == NULL)
12594 goto error;
12595
12596 if (inputPush(ctxt, input) < 0) {
12597 xmlFreeInputStream(input);
12598 goto error;
12599 }
12600
12601 xmlFree(uri);
12602 return(ctxt);
12603
12604 error:
12605 xmlFree(uri);
12606 xmlFreeParserCtxt(ctxt);
12607 return(NULL);
12608 }
12609
12610 /************************************************************************
12611 * *
12612 * Front ends when parsing from a file *
12613 * *
12614 ************************************************************************/
12615
12616 /**
12617 * xmlCreateURLParserCtxt:
12618 * @filename: the filename or URL
12619 * @options: a combination of xmlParserOption
12620 *
12621 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12622 *
12623 * Create a parser context for a file or URL content.
12624 * Automatic support for ZLIB/Compress compressed document is provided
12625 * by default if found at compile-time and for file accesses
12626 *
12627 * Returns the new parser context or NULL
12628 */
12629 xmlParserCtxtPtr
xmlCreateURLParserCtxt(const char * filename,int options)12630 xmlCreateURLParserCtxt(const char *filename, int options)
12631 {
12632 xmlParserCtxtPtr ctxt;
12633 xmlParserInputPtr input;
12634
12635 ctxt = xmlNewParserCtxt();
12636 if (ctxt == NULL)
12637 return(NULL);
12638
12639 xmlCtxtUseOptions(ctxt, options);
12640 ctxt->linenumbers = 1;
12641
12642 input = xmlLoadResource(ctxt, filename, NULL, XML_RESOURCE_MAIN_DOCUMENT);
12643 if (input == NULL) {
12644 xmlFreeParserCtxt(ctxt);
12645 return(NULL);
12646 }
12647 if (inputPush(ctxt, input) < 0) {
12648 xmlFreeInputStream(input);
12649 xmlFreeParserCtxt(ctxt);
12650 return(NULL);
12651 }
12652
12653 return(ctxt);
12654 }
12655
12656 /**
12657 * xmlCreateFileParserCtxt:
12658 * @filename: the filename
12659 *
12660 * DEPRECATED: Use xmlNewParserCtxt and xmlCtxtReadFile.
12661 *
12662 * Create a parser context for a file content.
12663 * Automatic support for ZLIB/Compress compressed document is provided
12664 * by default if found at compile-time.
12665 *
12666 * Returns the new parser context or NULL
12667 */
12668 xmlParserCtxtPtr
xmlCreateFileParserCtxt(const char * filename)12669 xmlCreateFileParserCtxt(const char *filename)
12670 {
12671 return(xmlCreateURLParserCtxt(filename, 0));
12672 }
12673
12674 #ifdef LIBXML_SAX1_ENABLED
12675 /**
12676 * xmlSAXParseFileWithData:
12677 * @sax: the SAX handler block
12678 * @filename: the filename
12679 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12680 * documents
12681 * @data: the userdata
12682 *
12683 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12684 *
12685 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12686 * compressed document is provided by default if found at compile-time.
12687 * It use the given SAX function block to handle the parsing callback.
12688 * If sax is NULL, fallback to the default DOM tree building routines.
12689 *
12690 * User data (void *) is stored within the parser context in the
12691 * context's _private member, so it is available nearly everywhere in libxml
12692 *
12693 * Returns the resulting document tree
12694 */
12695
12696 xmlDocPtr
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax,const char * filename,int recovery,void * data)12697 xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
12698 int recovery, void *data) {
12699 xmlDocPtr ret;
12700 xmlParserCtxtPtr ctxt;
12701 xmlParserInputPtr input;
12702
12703 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12704 if (ctxt == NULL)
12705 return(NULL);
12706
12707 if (data != NULL)
12708 ctxt->_private = data;
12709
12710 if (recovery) {
12711 ctxt->options |= XML_PARSE_RECOVER;
12712 ctxt->recovery = 1;
12713 }
12714
12715 if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
12716 input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, NULL, 0);
12717 else
12718 input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, NULL, 0);
12719
12720 ret = xmlCtxtParseDocument(ctxt, input);
12721
12722 xmlFreeParserCtxt(ctxt);
12723 return(ret);
12724 }
12725
12726 /**
12727 * xmlSAXParseFile:
12728 * @sax: the SAX handler block
12729 * @filename: the filename
12730 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12731 * documents
12732 *
12733 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12734 *
12735 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12736 * compressed document is provided by default if found at compile-time.
12737 * It use the given SAX function block to handle the parsing callback.
12738 * If sax is NULL, fallback to the default DOM tree building routines.
12739 *
12740 * Returns the resulting document tree
12741 */
12742
12743 xmlDocPtr
xmlSAXParseFile(xmlSAXHandlerPtr sax,const char * filename,int recovery)12744 xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
12745 int recovery) {
12746 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
12747 }
12748
12749 /**
12750 * xmlRecoverDoc:
12751 * @cur: a pointer to an array of xmlChar
12752 *
12753 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
12754 *
12755 * parse an XML in-memory document and build a tree.
12756 * In the case the document is not Well Formed, a attempt to build a
12757 * tree is tried anyway
12758 *
12759 * Returns the resulting document tree or NULL in case of failure
12760 */
12761
12762 xmlDocPtr
xmlRecoverDoc(const xmlChar * cur)12763 xmlRecoverDoc(const xmlChar *cur) {
12764 return(xmlSAXParseDoc(NULL, cur, 1));
12765 }
12766
12767 /**
12768 * xmlParseFile:
12769 * @filename: the filename
12770 *
12771 * DEPRECATED: Use xmlReadFile.
12772 *
12773 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12774 * compressed document is provided by default if found at compile-time.
12775 *
12776 * Returns the resulting document tree if the file was wellformed,
12777 * NULL otherwise.
12778 */
12779
12780 xmlDocPtr
xmlParseFile(const char * filename)12781 xmlParseFile(const char *filename) {
12782 return(xmlSAXParseFile(NULL, filename, 0));
12783 }
12784
12785 /**
12786 * xmlRecoverFile:
12787 * @filename: the filename
12788 *
12789 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
12790 *
12791 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
12792 * compressed document is provided by default if found at compile-time.
12793 * In the case the document is not Well Formed, it attempts to build
12794 * a tree anyway
12795 *
12796 * Returns the resulting document tree or NULL in case of failure
12797 */
12798
12799 xmlDocPtr
xmlRecoverFile(const char * filename)12800 xmlRecoverFile(const char *filename) {
12801 return(xmlSAXParseFile(NULL, filename, 1));
12802 }
12803
12804
12805 /**
12806 * xmlSetupParserForBuffer:
12807 * @ctxt: an XML parser context
12808 * @buffer: a xmlChar * buffer
12809 * @filename: a file name
12810 *
12811 * DEPRECATED: Don't use.
12812 *
12813 * Setup the parser context to parse a new buffer; Clears any prior
12814 * contents from the parser context. The buffer parameter must not be
12815 * NULL, but the filename parameter can be
12816 */
12817 void
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt,const xmlChar * buffer,const char * filename)12818 xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
12819 const char* filename)
12820 {
12821 xmlParserInputPtr input;
12822
12823 if ((ctxt == NULL) || (buffer == NULL))
12824 return;
12825
12826 xmlClearParserCtxt(ctxt);
12827
12828 input = xmlCtxtNewInputFromString(ctxt, filename, (const char *) buffer,
12829 NULL, 0);
12830 if (input == NULL)
12831 return;
12832 if (inputPush(ctxt, input) < 0)
12833 xmlFreeInputStream(input);
12834 }
12835
12836 /**
12837 * xmlSAXUserParseFile:
12838 * @sax: a SAX handler
12839 * @user_data: The user data returned on SAX callbacks
12840 * @filename: a file name
12841 *
12842 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
12843 *
12844 * parse an XML file and call the given SAX handler routines.
12845 * Automatic support for ZLIB/Compress compressed document is provided
12846 *
12847 * Returns 0 in case of success or a error number otherwise
12848 */
12849 int
xmlSAXUserParseFile(xmlSAXHandlerPtr sax,void * user_data,const char * filename)12850 xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
12851 const char *filename) {
12852 int ret = 0;
12853 xmlParserCtxtPtr ctxt;
12854
12855 ctxt = xmlCreateFileParserCtxt(filename);
12856 if (ctxt == NULL) return -1;
12857 if (sax != NULL) {
12858 if (sax->initialized == XML_SAX2_MAGIC) {
12859 *ctxt->sax = *sax;
12860 } else {
12861 memset(ctxt->sax, 0, sizeof(*ctxt->sax));
12862 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12863 }
12864 ctxt->userData = user_data;
12865 }
12866
12867 xmlParseDocument(ctxt);
12868
12869 if (ctxt->wellFormed)
12870 ret = 0;
12871 else {
12872 if (ctxt->errNo != 0)
12873 ret = ctxt->errNo;
12874 else
12875 ret = -1;
12876 }
12877 if (ctxt->myDoc != NULL) {
12878 xmlFreeDoc(ctxt->myDoc);
12879 ctxt->myDoc = NULL;
12880 }
12881 xmlFreeParserCtxt(ctxt);
12882
12883 return ret;
12884 }
12885 #endif /* LIBXML_SAX1_ENABLED */
12886
12887 /************************************************************************
12888 * *
12889 * Front ends when parsing from memory *
12890 * *
12891 ************************************************************************/
12892
12893 /**
12894 * xmlCreateMemoryParserCtxt:
12895 * @buffer: a pointer to a char array
12896 * @size: the size of the array
12897 *
12898 * Create a parser context for an XML in-memory document. The input buffer
12899 * must not contain a terminating null byte.
12900 *
12901 * Returns the new parser context or NULL
12902 */
12903 xmlParserCtxtPtr
xmlCreateMemoryParserCtxt(const char * buffer,int size)12904 xmlCreateMemoryParserCtxt(const char *buffer, int size) {
12905 xmlParserCtxtPtr ctxt;
12906 xmlParserInputPtr input;
12907
12908 if (size < 0)
12909 return(NULL);
12910
12911 ctxt = xmlNewParserCtxt();
12912 if (ctxt == NULL)
12913 return(NULL);
12914
12915 input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL, 0);
12916 if (input == NULL) {
12917 xmlFreeParserCtxt(ctxt);
12918 return(NULL);
12919 }
12920 if (inputPush(ctxt, input) < 0) {
12921 xmlFreeInputStream(input);
12922 xmlFreeParserCtxt(ctxt);
12923 return(NULL);
12924 }
12925
12926 return(ctxt);
12927 }
12928
12929 #ifdef LIBXML_SAX1_ENABLED
12930 /**
12931 * xmlSAXParseMemoryWithData:
12932 * @sax: the SAX handler block
12933 * @buffer: an pointer to a char array
12934 * @size: the size of the array
12935 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
12936 * documents
12937 * @data: the userdata
12938 *
12939 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
12940 *
12941 * parse an XML in-memory block and use the given SAX function block
12942 * to handle the parsing callback. If sax is NULL, fallback to the default
12943 * DOM tree building routines.
12944 *
12945 * User data (void *) is stored within the parser context in the
12946 * context's _private member, so it is available nearly everywhere in libxml
12947 *
12948 * Returns the resulting document tree
12949 */
12950
12951 xmlDocPtr
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery,void * data)12952 xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
12953 int size, int recovery, void *data) {
12954 xmlDocPtr ret;
12955 xmlParserCtxtPtr ctxt;
12956 xmlParserInputPtr input;
12957
12958 if (size < 0)
12959 return(NULL);
12960
12961 ctxt = xmlNewSAXParserCtxt(sax, NULL);
12962 if (ctxt == NULL)
12963 return(NULL);
12964
12965 if (data != NULL)
12966 ctxt->_private=data;
12967
12968 if (recovery) {
12969 ctxt->options |= XML_PARSE_RECOVER;
12970 ctxt->recovery = 1;
12971 }
12972
12973 input = xmlCtxtNewInputFromMemory(ctxt, NULL, buffer, size, NULL,
12974 XML_INPUT_BUF_STATIC);
12975
12976 ret = xmlCtxtParseDocument(ctxt, input);
12977
12978 xmlFreeParserCtxt(ctxt);
12979 return(ret);
12980 }
12981
12982 /**
12983 * xmlSAXParseMemory:
12984 * @sax: the SAX handler block
12985 * @buffer: an pointer to a char array
12986 * @size: the size of the array
12987 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
12988 * documents
12989 *
12990 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
12991 *
12992 * parse an XML in-memory block and use the given SAX function block
12993 * to handle the parsing callback. If sax is NULL, fallback to the default
12994 * DOM tree building routines.
12995 *
12996 * Returns the resulting document tree
12997 */
12998 xmlDocPtr
xmlSAXParseMemory(xmlSAXHandlerPtr sax,const char * buffer,int size,int recovery)12999 xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13000 int size, int recovery) {
13001 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13002 }
13003
13004 /**
13005 * xmlParseMemory:
13006 * @buffer: an pointer to a char array
13007 * @size: the size of the array
13008 *
13009 * DEPRECATED: Use xmlReadMemory.
13010 *
13011 * parse an XML in-memory block and build a tree.
13012 *
13013 * Returns the resulting document tree
13014 */
13015
xmlParseMemory(const char * buffer,int size)13016 xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13017 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13018 }
13019
13020 /**
13021 * xmlRecoverMemory:
13022 * @buffer: an pointer to a char array
13023 * @size: the size of the array
13024 *
13025 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13026 *
13027 * parse an XML in-memory block and build a tree.
13028 * In the case the document is not Well Formed, an attempt to
13029 * build a tree is tried anyway
13030 *
13031 * Returns the resulting document tree or NULL in case of error
13032 */
13033
xmlRecoverMemory(const char * buffer,int size)13034 xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13035 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13036 }
13037
13038 /**
13039 * xmlSAXUserParseMemory:
13040 * @sax: a SAX handler
13041 * @user_data: The user data returned on SAX callbacks
13042 * @buffer: an in-memory XML document input
13043 * @size: the length of the XML document in bytes
13044 *
13045 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13046 *
13047 * parse an XML in-memory buffer and call the given SAX handler routines.
13048 *
13049 * Returns 0 in case of success or a error number otherwise
13050 */
xmlSAXUserParseMemory(xmlSAXHandlerPtr sax,void * user_data,const char * buffer,int size)13051 int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13052 const char *buffer, int size) {
13053 int ret = 0;
13054 xmlParserCtxtPtr ctxt;
13055
13056 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13057 if (ctxt == NULL) return -1;
13058 if (sax != NULL) {
13059 if (sax->initialized == XML_SAX2_MAGIC) {
13060 *ctxt->sax = *sax;
13061 } else {
13062 memset(ctxt->sax, 0, sizeof(*ctxt->sax));
13063 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
13064 }
13065 ctxt->userData = user_data;
13066 }
13067
13068 xmlParseDocument(ctxt);
13069
13070 if (ctxt->wellFormed)
13071 ret = 0;
13072 else {
13073 if (ctxt->errNo != 0)
13074 ret = ctxt->errNo;
13075 else
13076 ret = -1;
13077 }
13078 if (ctxt->myDoc != NULL) {
13079 xmlFreeDoc(ctxt->myDoc);
13080 ctxt->myDoc = NULL;
13081 }
13082 xmlFreeParserCtxt(ctxt);
13083
13084 return ret;
13085 }
13086 #endif /* LIBXML_SAX1_ENABLED */
13087
13088 /**
13089 * xmlCreateDocParserCtxt:
13090 * @str: a pointer to an array of xmlChar
13091 *
13092 * Creates a parser context for an XML in-memory document.
13093 *
13094 * Returns the new parser context or NULL
13095 */
13096 xmlParserCtxtPtr
xmlCreateDocParserCtxt(const xmlChar * str)13097 xmlCreateDocParserCtxt(const xmlChar *str) {
13098 xmlParserCtxtPtr ctxt;
13099 xmlParserInputPtr input;
13100
13101 ctxt = xmlNewParserCtxt();
13102 if (ctxt == NULL)
13103 return(NULL);
13104
13105 input = xmlCtxtNewInputFromString(ctxt, NULL, (const char *) str, NULL, 0);
13106 if (input == NULL) {
13107 xmlFreeParserCtxt(ctxt);
13108 return(NULL);
13109 }
13110 if (inputPush(ctxt, input) < 0) {
13111 xmlFreeInputStream(input);
13112 xmlFreeParserCtxt(ctxt);
13113 return(NULL);
13114 }
13115
13116 return(ctxt);
13117 }
13118
13119 #ifdef LIBXML_SAX1_ENABLED
13120 /**
13121 * xmlSAXParseDoc:
13122 * @sax: the SAX handler block
13123 * @cur: a pointer to an array of xmlChar
13124 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13125 * documents
13126 *
13127 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13128 *
13129 * parse an XML in-memory document and build a tree.
13130 * It use the given SAX function block to handle the parsing callback.
13131 * If sax is NULL, fallback to the default DOM tree building routines.
13132 *
13133 * Returns the resulting document tree
13134 */
13135
13136 xmlDocPtr
xmlSAXParseDoc(xmlSAXHandlerPtr sax,const xmlChar * cur,int recovery)13137 xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
13138 xmlDocPtr ret;
13139 xmlParserCtxtPtr ctxt;
13140 xmlSAXHandlerPtr oldsax = NULL;
13141
13142 if (cur == NULL) return(NULL);
13143
13144
13145 ctxt = xmlCreateDocParserCtxt(cur);
13146 if (ctxt == NULL) return(NULL);
13147 if (sax != NULL) {
13148 oldsax = ctxt->sax;
13149 ctxt->sax = sax;
13150 ctxt->userData = NULL;
13151 }
13152
13153 xmlParseDocument(ctxt);
13154 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13155 else {
13156 ret = NULL;
13157 xmlFreeDoc(ctxt->myDoc);
13158 ctxt->myDoc = NULL;
13159 }
13160 if (sax != NULL)
13161 ctxt->sax = oldsax;
13162 xmlFreeParserCtxt(ctxt);
13163
13164 return(ret);
13165 }
13166
13167 /**
13168 * xmlParseDoc:
13169 * @cur: a pointer to an array of xmlChar
13170 *
13171 * DEPRECATED: Use xmlReadDoc.
13172 *
13173 * parse an XML in-memory document and build a tree.
13174 *
13175 * Returns the resulting document tree
13176 */
13177
13178 xmlDocPtr
xmlParseDoc(const xmlChar * cur)13179 xmlParseDoc(const xmlChar *cur) {
13180 return(xmlSAXParseDoc(NULL, cur, 0));
13181 }
13182 #endif /* LIBXML_SAX1_ENABLED */
13183
13184 /************************************************************************
13185 * *
13186 * New set (2.6.0) of simpler and more flexible APIs *
13187 * *
13188 ************************************************************************/
13189
13190 /**
13191 * DICT_FREE:
13192 * @str: a string
13193 *
13194 * Free a string if it is not owned by the "dict" dictionary in the
13195 * current scope
13196 */
13197 #define DICT_FREE(str) \
13198 if ((str) && ((!dict) || \
13199 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13200 xmlFree((char *)(str));
13201
13202 /**
13203 * xmlCtxtReset:
13204 * @ctxt: an XML parser context
13205 *
13206 * Reset a parser context
13207 */
13208 void
xmlCtxtReset(xmlParserCtxtPtr ctxt)13209 xmlCtxtReset(xmlParserCtxtPtr ctxt)
13210 {
13211 xmlParserInputPtr input;
13212 xmlDictPtr dict;
13213
13214 if (ctxt == NULL)
13215 return;
13216
13217 dict = ctxt->dict;
13218
13219 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13220 xmlFreeInputStream(input);
13221 }
13222 ctxt->inputNr = 0;
13223 ctxt->input = NULL;
13224
13225 ctxt->spaceNr = 0;
13226 if (ctxt->spaceTab != NULL) {
13227 ctxt->spaceTab[0] = -1;
13228 ctxt->space = &ctxt->spaceTab[0];
13229 } else {
13230 ctxt->space = NULL;
13231 }
13232
13233
13234 ctxt->nodeNr = 0;
13235 ctxt->node = NULL;
13236
13237 ctxt->nameNr = 0;
13238 ctxt->name = NULL;
13239
13240 ctxt->nsNr = 0;
13241 xmlParserNsReset(ctxt->nsdb);
13242
13243 DICT_FREE(ctxt->version);
13244 ctxt->version = NULL;
13245 DICT_FREE(ctxt->encoding);
13246 ctxt->encoding = NULL;
13247 DICT_FREE(ctxt->extSubURI);
13248 ctxt->extSubURI = NULL;
13249 DICT_FREE(ctxt->extSubSystem);
13250 ctxt->extSubSystem = NULL;
13251
13252 if (ctxt->directory != NULL) {
13253 xmlFree(ctxt->directory);
13254 ctxt->directory = NULL;
13255 }
13256
13257 if (ctxt->myDoc != NULL)
13258 xmlFreeDoc(ctxt->myDoc);
13259 ctxt->myDoc = NULL;
13260
13261 ctxt->standalone = -1;
13262 ctxt->hasExternalSubset = 0;
13263 ctxt->hasPErefs = 0;
13264 ctxt->html = 0;
13265 ctxt->instate = XML_PARSER_START;
13266
13267 ctxt->wellFormed = 1;
13268 ctxt->nsWellFormed = 1;
13269 ctxt->disableSAX = 0;
13270 ctxt->valid = 1;
13271 ctxt->record_info = 0;
13272 ctxt->checkIndex = 0;
13273 ctxt->endCheckState = 0;
13274 ctxt->inSubset = 0;
13275 ctxt->errNo = XML_ERR_OK;
13276 ctxt->depth = 0;
13277 ctxt->catalogs = NULL;
13278 ctxt->sizeentities = 0;
13279 ctxt->sizeentcopy = 0;
13280 xmlInitNodeInfoSeq(&ctxt->node_seq);
13281
13282 if (ctxt->attsDefault != NULL) {
13283 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
13284 ctxt->attsDefault = NULL;
13285 }
13286 if (ctxt->attsSpecial != NULL) {
13287 xmlHashFree(ctxt->attsSpecial, NULL);
13288 ctxt->attsSpecial = NULL;
13289 }
13290
13291 #ifdef LIBXML_CATALOG_ENABLED
13292 if (ctxt->catalogs != NULL)
13293 xmlCatalogFreeLocal(ctxt->catalogs);
13294 #endif
13295 ctxt->nbErrors = 0;
13296 ctxt->nbWarnings = 0;
13297 if (ctxt->lastError.code != XML_ERR_OK)
13298 xmlResetError(&ctxt->lastError);
13299 }
13300
13301 /**
13302 * xmlCtxtResetPush:
13303 * @ctxt: an XML parser context
13304 * @chunk: a pointer to an array of chars
13305 * @size: number of chars in the array
13306 * @filename: an optional file name or URI
13307 * @encoding: the document encoding, or NULL
13308 *
13309 * Reset a push parser context
13310 *
13311 * Returns 0 in case of success and 1 in case of error
13312 */
13313 int
xmlCtxtResetPush(xmlParserCtxtPtr ctxt,const char * chunk,int size,const char * filename,const char * encoding)13314 xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13315 int size, const char *filename, const char *encoding)
13316 {
13317 xmlParserInputPtr input;
13318
13319 if (ctxt == NULL)
13320 return(1);
13321
13322 xmlCtxtReset(ctxt);
13323
13324 input = xmlNewPushInput(filename, chunk, size);
13325 if (input == NULL)
13326 return(1);
13327
13328 if (inputPush(ctxt, input) < 0) {
13329 xmlFreeInputStream(input);
13330 return(1);
13331 }
13332
13333 if (encoding != NULL)
13334 xmlSwitchEncodingName(ctxt, encoding);
13335
13336 return(0);
13337 }
13338
13339 static int
xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt,int options,int keepMask)13340 xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask)
13341 {
13342 int allMask;
13343
13344 if (ctxt == NULL)
13345 return(-1);
13346
13347 /*
13348 * XInclude options aren't handled by the parser.
13349 *
13350 * XML_PARSE_XINCLUDE
13351 * XML_PARSE_NOXINCNODE
13352 * XML_PARSE_NOBASEFIX
13353 */
13354 allMask = XML_PARSE_RECOVER |
13355 XML_PARSE_NOENT |
13356 XML_PARSE_DTDLOAD |
13357 XML_PARSE_DTDATTR |
13358 XML_PARSE_DTDVALID |
13359 XML_PARSE_NOERROR |
13360 XML_PARSE_NOWARNING |
13361 XML_PARSE_PEDANTIC |
13362 XML_PARSE_NOBLANKS |
13363 #ifdef LIBXML_SAX1_ENABLED
13364 XML_PARSE_SAX1 |
13365 #endif
13366 XML_PARSE_NONET |
13367 XML_PARSE_NODICT |
13368 XML_PARSE_NSCLEAN |
13369 XML_PARSE_NOCDATA |
13370 XML_PARSE_COMPACT |
13371 XML_PARSE_OLD10 |
13372 XML_PARSE_HUGE |
13373 XML_PARSE_OLDSAX |
13374 XML_PARSE_IGNORE_ENC |
13375 XML_PARSE_BIG_LINES |
13376 XML_PARSE_NO_XXE |
13377 XML_PARSE_NO_UNZIP |
13378 XML_PARSE_NO_SYS_CATALOG |
13379 XML_PARSE_NO_CATALOG_PI;
13380
13381 ctxt->options = (ctxt->options & keepMask) | (options & allMask);
13382
13383 /*
13384 * For some options, struct members are historically the source
13385 * of truth. The values are initalized from global variables and
13386 * old code could also modify them directly. Several older API
13387 * functions that don't take an options argument rely on these
13388 * deprecated mechanisms.
13389 *
13390 * Once public access to struct members and the globals are
13391 * disabled, we can use the options bitmask as source of
13392 * truth, making all these struct members obsolete.
13393 *
13394 * The XML_DETECT_IDS flags is misnamed. It simply enables
13395 * loading of the external subset.
13396 */
13397 ctxt->recovery = (options & XML_PARSE_RECOVER) ? 1 : 0;
13398 ctxt->replaceEntities = (options & XML_PARSE_NOENT) ? 1 : 0;
13399 ctxt->loadsubset = (options & XML_PARSE_DTDLOAD) ? XML_DETECT_IDS : 0;
13400 ctxt->loadsubset |= (options & XML_PARSE_DTDATTR) ? XML_COMPLETE_ATTRS : 0;
13401 ctxt->validate = (options & XML_PARSE_DTDVALID) ? 1 : 0;
13402 ctxt->pedantic = (options & XML_PARSE_PEDANTIC) ? 1 : 0;
13403 ctxt->keepBlanks = (options & XML_PARSE_NOBLANKS) ? 0 : 1;
13404 ctxt->dictNames = (options & XML_PARSE_NODICT) ? 0 : 1;
13405
13406 /*
13407 * Changing SAX callbacks is a bad idea. This should be fixed.
13408 */
13409 if (options & XML_PARSE_NOBLANKS) {
13410 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
13411 }
13412 if (options & XML_PARSE_NOCDATA) {
13413 ctxt->sax->cdataBlock = NULL;
13414 }
13415 if (options & XML_PARSE_HUGE) {
13416 if (ctxt->dict != NULL)
13417 xmlDictSetLimit(ctxt->dict, 0);
13418 }
13419
13420 ctxt->linenumbers = 1;
13421
13422 return(options & ~allMask);
13423 }
13424
13425 /**
13426 * xmlCtxtSetOptions:
13427 * @ctxt: an XML parser context
13428 * @options: a bitmask of xmlParserOption values
13429 *
13430 * Applies the options to the parser context. Unset options are
13431 * cleared.
13432 *
13433 * Available since 2.13.0. With older versions, you can use
13434 * xmlCtxtUseOptions.
13435 *
13436 * XML_PARSE_RECOVER
13437 *
13438 * Enable "recovery" mode which allows non-wellformed documents.
13439 * How this mode behaves exactly is unspecified and may change
13440 * without further notice. Use of this feature is DISCOURAGED.
13441 *
13442 * XML_PARSE_NOENT
13443 *
13444 * Despite the confusing name, this option enables substitution
13445 * of entities. The resulting tree won't contain any entity
13446 * reference nodes.
13447 *
13448 * This option also enables loading of external entities (both
13449 * general and parameter entities) which is dangerous. If you
13450 * process untrusted data, it's recommended to set the
13451 * XML_PARSE_NO_XXE option to disable loading of external
13452 * entities.
13453 *
13454 * XML_PARSE_DTDLOAD
13455 *
13456 * Enables loading of an external DTD and the loading and
13457 * substitution of external parameter entities. Has no effect
13458 * if XML_PARSE_NO_XXE is set.
13459 *
13460 * XML_PARSE_DTDATTR
13461 *
13462 * Adds default attributes from the DTD to the result document.
13463 *
13464 * Implies XML_PARSE_DTDLOAD, but loading of external content
13465 * can be disabled with XML_PARSE_NO_XXE.
13466 *
13467 * XML_PARSE_DTDVALID
13468 *
13469 * This option enables DTD validation which requires to load
13470 * external DTDs and external entities (both general and
13471 * parameter entities) unless XML_PARSE_NO_XXE was set.
13472 *
13473 * XML_PARSE_NO_XXE
13474 *
13475 * Disables loading of external DTDs or entities.
13476 *
13477 * Available since 2.13.0.
13478 *
13479 * XML_PARSE_NOERROR
13480 *
13481 * Disable error and warning reports to the error handlers.
13482 * Errors are still accessible with xmlCtxtGetLastError.
13483 *
13484 * XML_PARSE_NOWARNING
13485 *
13486 * Disable warning reports.
13487 *
13488 * XML_PARSE_PEDANTIC
13489 *
13490 * Enable some pedantic warnings.
13491 *
13492 * XML_PARSE_NOBLANKS
13493 *
13494 * Remove some text nodes containing only whitespace from the
13495 * result document. Which nodes are removed depends on DTD
13496 * element declarations or a conservative heuristic. The
13497 * reindenting feature of the serialization code relies on this
13498 * option to be set when parsing. Use of this option is
13499 * DISCOURAGED.
13500 *
13501 * XML_PARSE_SAX1
13502 *
13503 * Always invoke the deprecated SAX1 startElement and endElement
13504 * handlers. This option is DEPRECATED.
13505 *
13506 * XML_PARSE_NONET
13507 *
13508 * Disable network access with the builtin HTTP client.
13509 *
13510 * XML_PARSE_NODICT
13511 *
13512 * Create a document without interned strings, making all
13513 * strings separate memory allocations.
13514 *
13515 * XML_PARSE_NSCLEAN
13516 *
13517 * Remove redundant namespace declarations from the result
13518 * document.
13519 *
13520 * XML_PARSE_NOCDATA
13521 *
13522 * Output normal text nodes instead of CDATA nodes.
13523 *
13524 * XML_PARSE_COMPACT
13525 *
13526 * Store small strings directly in the node struct to save
13527 * memory.
13528 *
13529 * XML_PARSE_OLD10
13530 *
13531 * Use old Name productions from before XML 1.0 Fifth Edition.
13532 * This options is DEPRECATED.
13533 *
13534 * XML_PARSE_HUGE
13535 *
13536 * Relax some internal limits.
13537 *
13538 * Maximum size of text nodes, tags, comments, processing instructions,
13539 * CDATA sections, entity values
13540 *
13541 * normal: 10M
13542 * huge: 1B
13543 *
13544 * Maximum size of names, system literals, pubid literals
13545 *
13546 * normal: 50K
13547 * huge: 10M
13548 *
13549 * Maximum nesting depth of elements
13550 *
13551 * normal: 256
13552 * huge: 2048
13553 *
13554 * Maximum nesting depth of entities
13555 *
13556 * normal: 20
13557 * huge: 40
13558 *
13559 * XML_PARSE_OLDSAX
13560 *
13561 * Enable an unspecified legacy mode for SAX parsers. This
13562 * option is DEPRECATED.
13563 *
13564 * XML_PARSE_IGNORE_ENC
13565 *
13566 * Ignore the encoding in the XML declaration. This option is
13567 * mostly unneeded these days. The only effect is to enforce
13568 * UTF-8 decoding of ASCII-like data.
13569 *
13570 * XML_PARSE_BIG_LINES
13571 *
13572 * Enable reporting of line numbers larger than 65535.
13573 *
13574 * XML_PARSE_NO_UNZIP
13575 *
13576 * Disables input decompression. Setting this option is recommended
13577 * to avoid zip bombs.
13578 *
13579 * Available since 2.14.0.
13580 *
13581 * XML_PARSE_NO_SYS_CATALOG
13582 *
13583 * Disables the global system XML catalog.
13584 *
13585 * Available since 2.14.0.
13586 *
13587 * XML_PARSE_NO_CATALOG_PI
13588 *
13589 * Ignore XML catalog processing instructions.
13590 *
13591 * Available since 2.14.0.
13592 *
13593 * Returns 0 in case of success, the set of unknown or unimplemented options
13594 * in case of error.
13595 */
13596 int
xmlCtxtSetOptions(xmlParserCtxtPtr ctxt,int options)13597 xmlCtxtSetOptions(xmlParserCtxtPtr ctxt, int options)
13598 {
13599 #ifdef LIBXML_HTML_ENABLED
13600 if ((ctxt != NULL) && (ctxt->html))
13601 return(htmlCtxtSetOptions(ctxt, options));
13602 #endif
13603
13604 return(xmlCtxtSetOptionsInternal(ctxt, options, 0));
13605 }
13606
13607 /**
13608 * xmlCtxtGetOptions:
13609 * @ctxt: an XML parser context
13610 *
13611 * Get the current options of the parser context.
13612 *
13613 * Available since 2.14.0.
13614 *
13615 * Returns the current options set in the parser context, or -1 if ctxt is NULL.
13616 */
13617 int
xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)13618 xmlCtxtGetOptions(xmlParserCtxtPtr ctxt)
13619 {
13620 if (ctxt == NULL)
13621 return(-1);
13622
13623 return(ctxt->options);
13624 }
13625
13626 /**
13627 * xmlCtxtUseOptions:
13628 * @ctxt: an XML parser context
13629 * @options: a combination of xmlParserOption
13630 *
13631 * DEPRECATED: Use xmlCtxtSetOptions.
13632 *
13633 * Applies the options to the parser context. The following options
13634 * are never cleared and can only be enabled:
13635 *
13636 * XML_PARSE_NOERROR
13637 * XML_PARSE_NOWARNING
13638 * XML_PARSE_NONET
13639 * XML_PARSE_NSCLEAN
13640 * XML_PARSE_NOCDATA
13641 * XML_PARSE_COMPACT
13642 * XML_PARSE_OLD10
13643 * XML_PARSE_HUGE
13644 * XML_PARSE_OLDSAX
13645 * XML_PARSE_IGNORE_ENC
13646 * XML_PARSE_BIG_LINES
13647 *
13648 * Returns 0 in case of success, the set of unknown or unimplemented options
13649 * in case of error.
13650 */
13651 int
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt,int options)13652 xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
13653 {
13654 int keepMask;
13655
13656 #ifdef LIBXML_HTML_ENABLED
13657 if ((ctxt != NULL) && (ctxt->html))
13658 return(htmlCtxtUseOptions(ctxt, options));
13659 #endif
13660
13661 /*
13662 * For historic reasons, some options can only be enabled.
13663 */
13664 keepMask = XML_PARSE_NOERROR |
13665 XML_PARSE_NOWARNING |
13666 XML_PARSE_NONET |
13667 XML_PARSE_NSCLEAN |
13668 XML_PARSE_NOCDATA |
13669 XML_PARSE_COMPACT |
13670 XML_PARSE_OLD10 |
13671 XML_PARSE_HUGE |
13672 XML_PARSE_OLDSAX |
13673 XML_PARSE_IGNORE_ENC |
13674 XML_PARSE_BIG_LINES;
13675
13676 return(xmlCtxtSetOptionsInternal(ctxt, options, keepMask));
13677 }
13678
13679 /**
13680 * xmlCtxtSetMaxAmplification:
13681 * @ctxt: an XML parser context
13682 * @maxAmpl: maximum amplification factor
13683 *
13684 * To protect against exponential entity expansion ("billion laughs"), the
13685 * size of serialized output is (roughly) limited to the input size
13686 * multiplied by this factor. The default value is 5.
13687 *
13688 * When working with documents making heavy use of entity expansion, it can
13689 * be necessary to increase the value. For security reasons, this should only
13690 * be considered when processing trusted input.
13691 */
13692 void
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt,unsigned maxAmpl)13693 xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
13694 {
13695 ctxt->maxAmpl = maxAmpl;
13696 }
13697
13698 /**
13699 * xmlCtxtParseDocument:
13700 * @ctxt: an XML parser context
13701 * @input: parser input
13702 *
13703 * Parse an XML document and return the resulting document tree.
13704 * Takes ownership of the input object.
13705 *
13706 * Available since 2.13.0.
13707 *
13708 * Returns the resulting document tree or NULL
13709 */
13710 xmlDocPtr
xmlCtxtParseDocument(xmlParserCtxtPtr ctxt,xmlParserInputPtr input)13711 xmlCtxtParseDocument(xmlParserCtxtPtr ctxt, xmlParserInputPtr input)
13712 {
13713 xmlDocPtr ret = NULL;
13714
13715 if ((ctxt == NULL) || (input == NULL))
13716 return(NULL);
13717
13718 /* assert(ctxt->inputNr == 0); */
13719 while (ctxt->inputNr > 0)
13720 xmlFreeInputStream(inputPop(ctxt));
13721
13722 if (inputPush(ctxt, input) < 0) {
13723 xmlFreeInputStream(input);
13724 return(NULL);
13725 }
13726
13727 xmlParseDocument(ctxt);
13728
13729 if ((ctxt->wellFormed) ||
13730 ((ctxt->recovery) && (ctxt->errNo != XML_ERR_NO_MEMORY))) {
13731 ret = ctxt->myDoc;
13732 } else {
13733 if (ctxt->errNo == XML_ERR_OK)
13734 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, "unknown error\n");
13735
13736 ret = NULL;
13737 xmlFreeDoc(ctxt->myDoc);
13738 }
13739 ctxt->myDoc = NULL;
13740
13741 /* assert(ctxt->inputNr == 1); */
13742 while (ctxt->inputNr > 0)
13743 xmlFreeInputStream(inputPop(ctxt));
13744
13745 return(ret);
13746 }
13747
13748 /**
13749 * xmlReadDoc:
13750 * @cur: a pointer to a zero terminated string
13751 * @URL: base URL (optional)
13752 * @encoding: the document encoding (optional)
13753 * @options: a combination of xmlParserOption
13754 *
13755 * Convenience function to parse an XML document from a
13756 * zero-terminated string.
13757 *
13758 * See xmlCtxtReadDoc for details.
13759 *
13760 * Returns the resulting document tree
13761 */
13762 xmlDocPtr
xmlReadDoc(const xmlChar * cur,const char * URL,const char * encoding,int options)13763 xmlReadDoc(const xmlChar *cur, const char *URL, const char *encoding,
13764 int options)
13765 {
13766 xmlParserCtxtPtr ctxt;
13767 xmlParserInputPtr input;
13768 xmlDocPtr doc;
13769
13770 ctxt = xmlNewParserCtxt();
13771 if (ctxt == NULL)
13772 return(NULL);
13773
13774 xmlCtxtUseOptions(ctxt, options);
13775
13776 input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) cur, encoding,
13777 XML_INPUT_BUF_STATIC);
13778
13779 doc = xmlCtxtParseDocument(ctxt, input);
13780
13781 xmlFreeParserCtxt(ctxt);
13782 return(doc);
13783 }
13784
13785 /**
13786 * xmlReadFile:
13787 * @filename: a file or URL
13788 * @encoding: the document encoding (optional)
13789 * @options: a combination of xmlParserOption
13790 *
13791 * Convenience function to parse an XML file from the filesystem,
13792 * the network or a global user-define resource loader.
13793 *
13794 * See xmlCtxtReadFile for details.
13795 *
13796 * Returns the resulting document tree
13797 */
13798 xmlDocPtr
xmlReadFile(const char * filename,const char * encoding,int options)13799 xmlReadFile(const char *filename, const char *encoding, int options)
13800 {
13801 xmlParserCtxtPtr ctxt;
13802 xmlParserInputPtr input;
13803 xmlDocPtr doc;
13804
13805 ctxt = xmlNewParserCtxt();
13806 if (ctxt == NULL)
13807 return(NULL);
13808
13809 xmlCtxtUseOptions(ctxt, options);
13810
13811 /*
13812 * Backward compatibility for users of command line utilities like
13813 * xmlstarlet expecting "-" to mean stdin. This is dangerous and
13814 * should be removed at some point.
13815 */
13816 if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
13817 input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
13818 encoding, 0);
13819 else
13820 input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
13821
13822 doc = xmlCtxtParseDocument(ctxt, input);
13823
13824 xmlFreeParserCtxt(ctxt);
13825 return(doc);
13826 }
13827
13828 /**
13829 * xmlReadMemory:
13830 * @buffer: a pointer to a char array
13831 * @size: the size of the array
13832 * @url: base URL (optional)
13833 * @encoding: the document encoding (optional)
13834 * @options: a combination of xmlParserOption
13835 *
13836 * Parse an XML in-memory document and build a tree. The input buffer must
13837 * not contain a terminating null byte.
13838 *
13839 * See xmlCtxtReadMemory for details.
13840 *
13841 * Returns the resulting document tree
13842 */
13843 xmlDocPtr
xmlReadMemory(const char * buffer,int size,const char * url,const char * encoding,int options)13844 xmlReadMemory(const char *buffer, int size, const char *url,
13845 const char *encoding, int options)
13846 {
13847 xmlParserCtxtPtr ctxt;
13848 xmlParserInputPtr input;
13849 xmlDocPtr doc;
13850
13851 if (size < 0)
13852 return(NULL);
13853
13854 ctxt = xmlNewParserCtxt();
13855 if (ctxt == NULL)
13856 return(NULL);
13857
13858 xmlCtxtUseOptions(ctxt, options);
13859
13860 input = xmlCtxtNewInputFromMemory(ctxt, url, buffer, size, encoding,
13861 XML_INPUT_BUF_STATIC);
13862
13863 doc = xmlCtxtParseDocument(ctxt, input);
13864
13865 xmlFreeParserCtxt(ctxt);
13866 return(doc);
13867 }
13868
13869 /**
13870 * xmlReadFd:
13871 * @fd: an open file descriptor
13872 * @URL: base URL (optional)
13873 * @encoding: the document encoding (optional)
13874 * @options: a combination of xmlParserOption
13875 *
13876 * Parse an XML from a file descriptor and build a tree.
13877 *
13878 * See xmlCtxtReadFd for details.
13879 *
13880 * NOTE that the file descriptor will not be closed when the
13881 * context is freed or reset.
13882 *
13883 * Returns the resulting document tree
13884 */
13885 xmlDocPtr
xmlReadFd(int fd,const char * URL,const char * encoding,int options)13886 xmlReadFd(int fd, const char *URL, const char *encoding, int options)
13887 {
13888 xmlParserCtxtPtr ctxt;
13889 xmlParserInputPtr input;
13890 xmlDocPtr doc;
13891
13892 ctxt = xmlNewParserCtxt();
13893 if (ctxt == NULL)
13894 return(NULL);
13895
13896 xmlCtxtUseOptions(ctxt, options);
13897
13898 input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
13899
13900 doc = xmlCtxtParseDocument(ctxt, input);
13901
13902 xmlFreeParserCtxt(ctxt);
13903 return(doc);
13904 }
13905
13906 /**
13907 * xmlReadIO:
13908 * @ioread: an I/O read function
13909 * @ioclose: an I/O close function (optional)
13910 * @ioctx: an I/O handler
13911 * @URL: base URL (optional)
13912 * @encoding: the document encoding (optional)
13913 * @options: a combination of xmlParserOption
13914 *
13915 * Parse an XML document from I/O functions and context and build a tree.
13916 *
13917 * See xmlCtxtReadIO for details.
13918 *
13919 * Returns the resulting document tree
13920 */
13921 xmlDocPtr
xmlReadIO(xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)13922 xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
13923 void *ioctx, const char *URL, const char *encoding, int options)
13924 {
13925 xmlParserCtxtPtr ctxt;
13926 xmlParserInputPtr input;
13927 xmlDocPtr doc;
13928
13929 ctxt = xmlNewParserCtxt();
13930 if (ctxt == NULL)
13931 return(NULL);
13932
13933 xmlCtxtUseOptions(ctxt, options);
13934
13935 input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
13936 encoding, 0);
13937
13938 doc = xmlCtxtParseDocument(ctxt, input);
13939
13940 xmlFreeParserCtxt(ctxt);
13941 return(doc);
13942 }
13943
13944 /**
13945 * xmlCtxtReadDoc:
13946 * @ctxt: an XML parser context
13947 * @str: a pointer to a zero terminated string
13948 * @URL: base URL (optional)
13949 * @encoding: the document encoding (optional)
13950 * @options: a combination of xmlParserOption
13951 *
13952 * Parse an XML in-memory document and build a tree.
13953 *
13954 * @URL is used as base to resolve external entities and for error
13955 * reporting.
13956 *
13957 * See xmlCtxtUseOptions for details.
13958 *
13959 * Returns the resulting document tree
13960 */
13961 xmlDocPtr
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt,const xmlChar * str,const char * URL,const char * encoding,int options)13962 xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
13963 const char *URL, const char *encoding, int options)
13964 {
13965 xmlParserInputPtr input;
13966
13967 if (ctxt == NULL)
13968 return(NULL);
13969
13970 xmlCtxtReset(ctxt);
13971 xmlCtxtUseOptions(ctxt, options);
13972
13973 input = xmlCtxtNewInputFromString(ctxt, URL, (const char *) str, encoding,
13974 XML_INPUT_BUF_STATIC);
13975
13976 return(xmlCtxtParseDocument(ctxt, input));
13977 }
13978
13979 /**
13980 * xmlCtxtReadFile:
13981 * @ctxt: an XML parser context
13982 * @filename: a file or URL
13983 * @encoding: the document encoding (optional)
13984 * @options: a combination of xmlParserOption
13985 *
13986 * Parse an XML file from the filesystem, the network or a user-defined
13987 * resource loader.
13988 *
13989 * Returns the resulting document tree
13990 */
13991 xmlDocPtr
xmlCtxtReadFile(xmlParserCtxtPtr ctxt,const char * filename,const char * encoding,int options)13992 xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
13993 const char *encoding, int options)
13994 {
13995 xmlParserInputPtr input;
13996
13997 if (ctxt == NULL)
13998 return(NULL);
13999
14000 xmlCtxtReset(ctxt);
14001 xmlCtxtUseOptions(ctxt, options);
14002
14003 input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
14004
14005 return(xmlCtxtParseDocument(ctxt, input));
14006 }
14007
14008 /**
14009 * xmlCtxtReadMemory:
14010 * @ctxt: an XML parser context
14011 * @buffer: a pointer to a char array
14012 * @size: the size of the array
14013 * @URL: base URL (optional)
14014 * @encoding: the document encoding (optional)
14015 * @options: a combination of xmlParserOption
14016 *
14017 * Parse an XML in-memory document and build a tree. The input buffer must
14018 * not contain a terminating null byte.
14019 *
14020 * @URL is used as base to resolve external entities and for error
14021 * reporting.
14022 *
14023 * See xmlCtxtUseOptions for details.
14024 *
14025 * Returns the resulting document tree
14026 */
14027 xmlDocPtr
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt,const char * buffer,int size,const char * URL,const char * encoding,int options)14028 xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14029 const char *URL, const char *encoding, int options)
14030 {
14031 xmlParserInputPtr input;
14032
14033 if ((ctxt == NULL) || (size < 0))
14034 return(NULL);
14035
14036 xmlCtxtReset(ctxt);
14037 xmlCtxtUseOptions(ctxt, options);
14038
14039 input = xmlCtxtNewInputFromMemory(ctxt, URL, buffer, size, encoding,
14040 XML_INPUT_BUF_STATIC);
14041
14042 return(xmlCtxtParseDocument(ctxt, input));
14043 }
14044
14045 /**
14046 * xmlCtxtReadFd:
14047 * @ctxt: an XML parser context
14048 * @fd: an open file descriptor
14049 * @URL: base URL (optional)
14050 * @encoding: the document encoding (optional)
14051 * @options: a combination of xmlParserOption
14052 *
14053 * Parse an XML document from a file descriptor and build a tree.
14054 *
14055 * NOTE that the file descriptor will not be closed when the
14056 * context is freed or reset.
14057 *
14058 * @URL is used as base to resolve external entities and for error
14059 * reporting.
14060 *
14061 * See xmlCtxtUseOptions for details.
14062 *
14063 * Returns the resulting document tree
14064 */
14065 xmlDocPtr
xmlCtxtReadFd(xmlParserCtxtPtr ctxt,int fd,const char * URL,const char * encoding,int options)14066 xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14067 const char *URL, const char *encoding, int options)
14068 {
14069 xmlParserInputPtr input;
14070
14071 if (ctxt == NULL)
14072 return(NULL);
14073
14074 xmlCtxtReset(ctxt);
14075 xmlCtxtUseOptions(ctxt, options);
14076
14077 input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
14078
14079 return(xmlCtxtParseDocument(ctxt, input));
14080 }
14081
14082 /**
14083 * xmlCtxtReadIO:
14084 * @ctxt: an XML parser context
14085 * @ioread: an I/O read function
14086 * @ioclose: an I/O close function
14087 * @ioctx: an I/O handler
14088 * @URL: the base URL to use for the document
14089 * @encoding: the document encoding, or NULL
14090 * @options: a combination of xmlParserOption
14091 *
14092 * parse an XML document from I/O functions and source and build a tree.
14093 * This reuses the existing @ctxt parser context
14094 *
14095 * @URL is used as base to resolve external entities and for error
14096 * reporting.
14097 *
14098 * See xmlCtxtUseOptions for details.
14099 *
14100 * Returns the resulting document tree
14101 */
14102 xmlDocPtr
xmlCtxtReadIO(xmlParserCtxtPtr ctxt,xmlInputReadCallback ioread,xmlInputCloseCallback ioclose,void * ioctx,const char * URL,const char * encoding,int options)14103 xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14104 xmlInputCloseCallback ioclose, void *ioctx,
14105 const char *URL,
14106 const char *encoding, int options)
14107 {
14108 xmlParserInputPtr input;
14109
14110 if (ctxt == NULL)
14111 return(NULL);
14112
14113 xmlCtxtReset(ctxt);
14114 xmlCtxtUseOptions(ctxt, options);
14115
14116 input = xmlCtxtNewInputFromIO(ctxt, URL, ioread, ioclose, ioctx,
14117 encoding, 0);
14118
14119 return(xmlCtxtParseDocument(ctxt, input));
14120 }
14121
14122