1 #include "Python.h"
2 #include <ctype.h>
3 
4 #include "structmember.h"         // PyMemberDef
5 #include "expat.h"
6 
7 #include "pyexpat.h"
8 
9 /* Do not emit Clinic output to a file as that wreaks havoc with conditionally
10    included methods. */
11 /*[clinic input]
12 module pyexpat
13 [clinic start generated code]*/
14 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
15 
16 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
17 
18 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
19     PyObject_Malloc, PyObject_Realloc, PyObject_Free};
20 
21 enum HandlerTypes {
22     StartElement,
23     EndElement,
24     ProcessingInstruction,
25     CharacterData,
26     UnparsedEntityDecl,
27     NotationDecl,
28     StartNamespaceDecl,
29     EndNamespaceDecl,
30     Comment,
31     StartCdataSection,
32     EndCdataSection,
33     Default,
34     DefaultHandlerExpand,
35     NotStandalone,
36     ExternalEntityRef,
37     StartDoctypeDecl,
38     EndDoctypeDecl,
39     EntityDecl,
40     XmlDecl,
41     ElementDecl,
42     AttlistDecl,
43 #if XML_COMBINED_VERSION >= 19504
44     SkippedEntity,
45 #endif
46     _DummyDecl
47 };
48 
49 typedef struct {
50     PyTypeObject *xml_parse_type;
51     PyObject *error;
52     PyObject *str_read;
53 } pyexpat_state;
54 
55 static inline pyexpat_state*
pyexpat_get_state(PyObject * module)56 pyexpat_get_state(PyObject *module)
57 {
58     void *state = PyModule_GetState(module);
59     assert(state != NULL);
60     return (pyexpat_state *)state;
61 }
62 
63 /* ----------------------------------------------------- */
64 
65 /* Declarations for objects of type xmlparser */
66 
67 typedef struct {
68     PyObject_HEAD
69 
70     XML_Parser itself;
71     int ordered_attributes;     /* Return attributes as a list. */
72     int specified_attributes;   /* Report only specified attributes. */
73     int in_callback;            /* Is a callback active? */
74     int ns_prefixes;            /* Namespace-triplets mode? */
75     XML_Char *buffer;           /* Buffer used when accumulating characters */
76                                 /* NULL if not enabled */
77     int buffer_size;            /* Size of buffer, in XML_Char units */
78     int buffer_used;            /* Buffer units in use */
79     PyObject *intern;           /* Dictionary to intern strings */
80     PyObject **handlers;
81 } xmlparseobject;
82 
83 #include "clinic/pyexpat.c.h"
84 
85 #define CHARACTER_DATA_BUFFER_SIZE 8192
86 
87 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
88 typedef void* xmlhandler;
89 
90 struct HandlerInfo {
91     const char *name;
92     xmlhandlersetter setter;
93     xmlhandler handler;
94     PyGetSetDef getset;
95 };
96 
97 static struct HandlerInfo handler_info[64];
98 
99 /* Set an integer attribute on the error object; return true on success,
100  * false on an exception.
101  */
102 static int
set_error_attr(PyObject * err,const char * name,int value)103 set_error_attr(PyObject *err, const char *name, int value)
104 {
105     PyObject *v = PyLong_FromLong(value);
106 
107     if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
108         Py_XDECREF(v);
109         return 0;
110     }
111     Py_DECREF(v);
112     return 1;
113 }
114 
115 /* Build and set an Expat exception, including positioning
116  * information.  Always returns NULL.
117  */
118 static PyObject *
set_error(pyexpat_state * state,xmlparseobject * self,enum XML_Error code)119 set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
120 {
121     PyObject *err;
122     PyObject *buffer;
123     XML_Parser parser = self->itself;
124     int lineno = XML_GetErrorLineNumber(parser);
125     int column = XML_GetErrorColumnNumber(parser);
126 
127     buffer = PyUnicode_FromFormat("%s: line %i, column %i",
128                                   XML_ErrorString(code), lineno, column);
129     if (buffer == NULL)
130         return NULL;
131     err = PyObject_CallOneArg(state->error, buffer);
132     Py_DECREF(buffer);
133     if (  err != NULL
134           && set_error_attr(err, "code", code)
135           && set_error_attr(err, "offset", column)
136           && set_error_attr(err, "lineno", lineno)) {
137         PyErr_SetObject(state->error, err);
138     }
139     Py_XDECREF(err);
140     return NULL;
141 }
142 
143 static int
have_handler(xmlparseobject * self,int type)144 have_handler(xmlparseobject *self, int type)
145 {
146     PyObject *handler = self->handlers[type];
147     return handler != NULL;
148 }
149 
150 /* Convert a string of XML_Chars into a Unicode string.
151    Returns None if str is a null pointer. */
152 
153 static PyObject *
conv_string_to_unicode(const XML_Char * str)154 conv_string_to_unicode(const XML_Char *str)
155 {
156     /* XXX currently this code assumes that XML_Char is 8-bit,
157        and hence in UTF-8.  */
158     /* UTF-8 from Expat, Unicode desired */
159     if (str == NULL) {
160         Py_RETURN_NONE;
161     }
162     return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
163 }
164 
165 static PyObject *
conv_string_len_to_unicode(const XML_Char * str,int len)166 conv_string_len_to_unicode(const XML_Char *str, int len)
167 {
168     /* XXX currently this code assumes that XML_Char is 8-bit,
169        and hence in UTF-8.  */
170     /* UTF-8 from Expat, Unicode desired */
171     if (str == NULL) {
172         Py_RETURN_NONE;
173     }
174     return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
175 }
176 
177 /* Callback routines */
178 
179 static void clear_handlers(xmlparseobject *self, int initial);
180 
181 /* This handler is used when an error has been detected, in the hope
182    that actual parsing can be terminated early.  This will only help
183    if an external entity reference is encountered. */
184 static int
error_external_entity_ref_handler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)185 error_external_entity_ref_handler(XML_Parser parser,
186                                   const XML_Char *context,
187                                   const XML_Char *base,
188                                   const XML_Char *systemId,
189                                   const XML_Char *publicId)
190 {
191     return 0;
192 }
193 
194 /* Dummy character data handler used when an error (exception) has
195    been detected, and the actual parsing can be terminated early.
196    This is needed since character data handler can't be safely removed
197    from within the character data handler, but can be replaced.  It is
198    used only from the character data handler trampoline, and must be
199    used right after `flag_error()` is called. */
200 static void
noop_character_data_handler(void * userData,const XML_Char * data,int len)201 noop_character_data_handler(void *userData, const XML_Char *data, int len)
202 {
203     /* Do nothing. */
204 }
205 
206 static void
flag_error(xmlparseobject * self)207 flag_error(xmlparseobject *self)
208 {
209     clear_handlers(self, 0);
210     XML_SetExternalEntityRefHandler(self->itself,
211                                     error_external_entity_ref_handler);
212 }
213 
214 static PyObject*
call_with_frame(const char * funcname,int lineno,PyObject * func,PyObject * args,xmlparseobject * self)215 call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
216                 xmlparseobject *self)
217 {
218     PyObject *res;
219 
220     res = PyObject_Call(func, args, NULL);
221     if (res == NULL) {
222         _PyTraceback_Add(funcname, __FILE__, lineno);
223         XML_StopParser(self->itself, XML_FALSE);
224     }
225     return res;
226 }
227 
228 static PyObject*
string_intern(xmlparseobject * self,const char * str)229 string_intern(xmlparseobject *self, const char* str)
230 {
231     PyObject *result = conv_string_to_unicode(str);
232     PyObject *value;
233     /* result can be NULL if the unicode conversion failed. */
234     if (!result)
235         return result;
236     if (!self->intern)
237         return result;
238     value = PyDict_GetItemWithError(self->intern, result);
239     if (!value) {
240         if (!PyErr_Occurred() &&
241             PyDict_SetItem(self->intern, result, result) == 0)
242         {
243             return result;
244         }
245         else {
246             Py_DECREF(result);
247             return NULL;
248         }
249     }
250     Py_INCREF(value);
251     Py_DECREF(result);
252     return value;
253 }
254 
255 /* Return 0 on success, -1 on exception.
256  * flag_error() will be called before return if needed.
257  */
258 static int
call_character_handler(xmlparseobject * self,const XML_Char * buffer,int len)259 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
260 {
261     PyObject *args;
262     PyObject *temp;
263 
264     if (!have_handler(self, CharacterData))
265         return -1;
266 
267     args = PyTuple_New(1);
268     if (args == NULL)
269         return -1;
270     temp = (conv_string_len_to_unicode(buffer, len));
271     if (temp == NULL) {
272         Py_DECREF(args);
273         flag_error(self);
274         XML_SetCharacterDataHandler(self->itself,
275                                     noop_character_data_handler);
276         return -1;
277     }
278     PyTuple_SET_ITEM(args, 0, temp);
279     /* temp is now a borrowed reference; consider it unused. */
280     self->in_callback = 1;
281     temp = call_with_frame("CharacterData", __LINE__,
282                            self->handlers[CharacterData], args, self);
283     /* temp is an owned reference again, or NULL */
284     self->in_callback = 0;
285     Py_DECREF(args);
286     if (temp == NULL) {
287         flag_error(self);
288         XML_SetCharacterDataHandler(self->itself,
289                                     noop_character_data_handler);
290         return -1;
291     }
292     Py_DECREF(temp);
293     return 0;
294 }
295 
296 static int
flush_character_buffer(xmlparseobject * self)297 flush_character_buffer(xmlparseobject *self)
298 {
299     int rc;
300     if (self->buffer == NULL || self->buffer_used == 0)
301         return 0;
302     rc = call_character_handler(self, self->buffer, self->buffer_used);
303     self->buffer_used = 0;
304     return rc;
305 }
306 
307 static void
my_CharacterDataHandler(void * userData,const XML_Char * data,int len)308 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
309 {
310     xmlparseobject *self = (xmlparseobject *) userData;
311 
312     if (PyErr_Occurred())
313         return;
314 
315     if (self->buffer == NULL)
316         call_character_handler(self, data, len);
317     else {
318         if ((self->buffer_used + len) > self->buffer_size) {
319             if (flush_character_buffer(self) < 0)
320                 return;
321             /* handler might have changed; drop the rest on the floor
322              * if there isn't a handler anymore
323              */
324             if (!have_handler(self, CharacterData))
325                 return;
326         }
327         if (len > self->buffer_size) {
328             call_character_handler(self, data, len);
329             self->buffer_used = 0;
330         }
331         else {
332             memcpy(self->buffer + self->buffer_used,
333                    data, len * sizeof(XML_Char));
334             self->buffer_used += len;
335         }
336     }
337 }
338 
339 static void
my_StartElementHandler(void * userData,const XML_Char * name,const XML_Char * atts[])340 my_StartElementHandler(void *userData,
341                        const XML_Char *name, const XML_Char *atts[])
342 {
343     xmlparseobject *self = (xmlparseobject *)userData;
344 
345     if (have_handler(self, StartElement)) {
346         PyObject *container, *rv, *args;
347         int i, max;
348 
349         if (PyErr_Occurred())
350             return;
351 
352         if (flush_character_buffer(self) < 0)
353             return;
354         /* Set max to the number of slots filled in atts[]; max/2 is
355          * the number of attributes we need to process.
356          */
357         if (self->specified_attributes) {
358             max = XML_GetSpecifiedAttributeCount(self->itself);
359         }
360         else {
361             max = 0;
362             while (atts[max] != NULL)
363                 max += 2;
364         }
365         /* Build the container. */
366         if (self->ordered_attributes)
367             container = PyList_New(max);
368         else
369             container = PyDict_New();
370         if (container == NULL) {
371             flag_error(self);
372             return;
373         }
374         for (i = 0; i < max; i += 2) {
375             PyObject *n = string_intern(self, (XML_Char *) atts[i]);
376             PyObject *v;
377             if (n == NULL) {
378                 flag_error(self);
379                 Py_DECREF(container);
380                 return;
381             }
382             v = conv_string_to_unicode((XML_Char *) atts[i+1]);
383             if (v == NULL) {
384                 flag_error(self);
385                 Py_DECREF(container);
386                 Py_DECREF(n);
387                 return;
388             }
389             if (self->ordered_attributes) {
390                 PyList_SET_ITEM(container, i, n);
391                 PyList_SET_ITEM(container, i+1, v);
392             }
393             else if (PyDict_SetItem(container, n, v)) {
394                 flag_error(self);
395                 Py_DECREF(n);
396                 Py_DECREF(v);
397                 Py_DECREF(container);
398                 return;
399             }
400             else {
401                 Py_DECREF(n);
402                 Py_DECREF(v);
403             }
404         }
405         args = string_intern(self, name);
406         if (args == NULL) {
407             Py_DECREF(container);
408             return;
409         }
410         args = Py_BuildValue("(NN)", args, container);
411         if (args == NULL) {
412             return;
413         }
414         /* Container is now a borrowed reference; ignore it. */
415         self->in_callback = 1;
416         rv = call_with_frame("StartElement", __LINE__,
417                              self->handlers[StartElement], args, self);
418         self->in_callback = 0;
419         Py_DECREF(args);
420         if (rv == NULL) {
421             flag_error(self);
422             return;
423         }
424         Py_DECREF(rv);
425     }
426 }
427 
428 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
429                 RETURN, GETUSERDATA) \
430 static RC \
431 my_##NAME##Handler PARAMS {\
432     xmlparseobject *self = GETUSERDATA ; \
433     PyObject *args = NULL; \
434     PyObject *rv = NULL; \
435     INIT \
436 \
437     if (have_handler(self, NAME)) { \
438         if (PyErr_Occurred()) \
439             return RETURN; \
440         if (flush_character_buffer(self) < 0) \
441             return RETURN; \
442         args = Py_BuildValue PARAM_FORMAT ;\
443         if (!args) { flag_error(self); return RETURN;} \
444         self->in_callback = 1; \
445         rv = call_with_frame(#NAME,__LINE__, \
446                              self->handlers[NAME], args, self); \
447         self->in_callback = 0; \
448         Py_DECREF(args); \
449         if (rv == NULL) { \
450             flag_error(self); \
451             return RETURN; \
452         } \
453         CONVERSION \
454         Py_DECREF(rv); \
455     } \
456     return RETURN; \
457 }
458 
459 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
460         RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
461         (xmlparseobject *)userData)
462 
463 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
464         RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
465                         rc = PyLong_AsLong(rv);, rc, \
466         (xmlparseobject *)userData)
467 
468 VOID_HANDLER(EndElement,
469              (void *userData, const XML_Char *name),
470              ("(N)", string_intern(self, name)))
471 
472 VOID_HANDLER(ProcessingInstruction,
473              (void *userData,
474               const XML_Char *target,
475               const XML_Char *data),
476              ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
477 
478 VOID_HANDLER(UnparsedEntityDecl,
479              (void *userData,
480               const XML_Char *entityName,
481               const XML_Char *base,
482               const XML_Char *systemId,
483               const XML_Char *publicId,
484               const XML_Char *notationName),
485              ("(NNNNN)",
486               string_intern(self, entityName), string_intern(self, base),
487               string_intern(self, systemId), string_intern(self, publicId),
488               string_intern(self, notationName)))
489 
490 VOID_HANDLER(EntityDecl,
491              (void *userData,
492               const XML_Char *entityName,
493               int is_parameter_entity,
494               const XML_Char *value,
495               int value_length,
496               const XML_Char *base,
497               const XML_Char *systemId,
498               const XML_Char *publicId,
499               const XML_Char *notationName),
500              ("NiNNNNN",
501               string_intern(self, entityName), is_parameter_entity,
502               (conv_string_len_to_unicode(value, value_length)),
503               string_intern(self, base), string_intern(self, systemId),
504               string_intern(self, publicId),
505               string_intern(self, notationName)))
506 
507 VOID_HANDLER(XmlDecl,
508              (void *userData,
509               const XML_Char *version,
510               const XML_Char *encoding,
511               int standalone),
512              ("(O&O&i)",
513               conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
514               standalone))
515 
516 static PyObject *
conv_content_model(XML_Content * const model,PyObject * (* conv_string)(const XML_Char *))517 conv_content_model(XML_Content * const model,
518                    PyObject *(*conv_string)(const XML_Char *))
519 {
520     PyObject *result = NULL;
521     PyObject *children = PyTuple_New(model->numchildren);
522     int i;
523 
524     if (children != NULL) {
525         assert(model->numchildren < INT_MAX);
526         for (i = 0; i < (int)model->numchildren; ++i) {
527             PyObject *child = conv_content_model(&model->children[i],
528                                                  conv_string);
529             if (child == NULL) {
530                 Py_XDECREF(children);
531                 return NULL;
532             }
533             PyTuple_SET_ITEM(children, i, child);
534         }
535         result = Py_BuildValue("(iiO&N)",
536                                model->type, model->quant,
537                                conv_string,model->name, children);
538     }
539     return result;
540 }
541 
542 static void
my_ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)543 my_ElementDeclHandler(void *userData,
544                       const XML_Char *name,
545                       XML_Content *model)
546 {
547     xmlparseobject *self = (xmlparseobject *)userData;
548     PyObject *args = NULL;
549 
550     if (have_handler(self, ElementDecl)) {
551         PyObject *rv = NULL;
552         PyObject *modelobj, *nameobj;
553 
554         if (PyErr_Occurred())
555             return;
556 
557         if (flush_character_buffer(self) < 0)
558             goto finally;
559         modelobj = conv_content_model(model, (conv_string_to_unicode));
560         if (modelobj == NULL) {
561             flag_error(self);
562             goto finally;
563         }
564         nameobj = string_intern(self, name);
565         if (nameobj == NULL) {
566             Py_DECREF(modelobj);
567             flag_error(self);
568             goto finally;
569         }
570         args = Py_BuildValue("NN", nameobj, modelobj);
571         if (args == NULL) {
572             flag_error(self);
573             goto finally;
574         }
575         self->in_callback = 1;
576         rv = call_with_frame("ElementDecl", __LINE__,
577                              self->handlers[ElementDecl], args, self);
578         self->in_callback = 0;
579         if (rv == NULL) {
580             flag_error(self);
581             goto finally;
582         }
583         Py_DECREF(rv);
584     }
585  finally:
586     Py_XDECREF(args);
587     XML_FreeContentModel(self->itself, model);
588     return;
589 }
590 
591 VOID_HANDLER(AttlistDecl,
592              (void *userData,
593               const XML_Char *elname,
594               const XML_Char *attname,
595               const XML_Char *att_type,
596               const XML_Char *dflt,
597               int isrequired),
598              ("(NNO&O&i)",
599               string_intern(self, elname), string_intern(self, attname),
600               conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
601               isrequired))
602 
603 #if XML_COMBINED_VERSION >= 19504
604 VOID_HANDLER(SkippedEntity,
605              (void *userData,
606               const XML_Char *entityName,
607               int is_parameter_entity),
608              ("Ni",
609               string_intern(self, entityName), is_parameter_entity))
610 #endif
611 
612 VOID_HANDLER(NotationDecl,
613                 (void *userData,
614                         const XML_Char *notationName,
615                         const XML_Char *base,
616                         const XML_Char *systemId,
617                         const XML_Char *publicId),
618                 ("(NNNN)",
619                  string_intern(self, notationName), string_intern(self, base),
620                  string_intern(self, systemId), string_intern(self, publicId)))
621 
622 VOID_HANDLER(StartNamespaceDecl,
623                 (void *userData,
624                       const XML_Char *prefix,
625                       const XML_Char *uri),
626                 ("(NN)",
627                  string_intern(self, prefix), string_intern(self, uri)))
628 
629 VOID_HANDLER(EndNamespaceDecl,
630                 (void *userData,
631                     const XML_Char *prefix),
632                 ("(N)", string_intern(self, prefix)))
633 
634 VOID_HANDLER(Comment,
635                (void *userData, const XML_Char *data),
636                 ("(O&)", conv_string_to_unicode ,data))
637 
638 VOID_HANDLER(StartCdataSection,
639                (void *userData),
640                 ("()"))
641 
642 VOID_HANDLER(EndCdataSection,
643                (void *userData),
644                 ("()"))
645 
646 VOID_HANDLER(Default,
647               (void *userData, const XML_Char *s, int len),
648               ("(N)", (conv_string_len_to_unicode(s,len))))
649 
650 VOID_HANDLER(DefaultHandlerExpand,
651               (void *userData, const XML_Char *s, int len),
652               ("(N)", (conv_string_len_to_unicode(s,len))))
653 #define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
654 
655 INT_HANDLER(NotStandalone,
656                 (void *userData),
657                 ("()"))
658 
659 RC_HANDLER(int, ExternalEntityRef,
660                 (XML_Parser parser,
661                     const XML_Char *context,
662                     const XML_Char *base,
663                     const XML_Char *systemId,
664                     const XML_Char *publicId),
665                 int rc=0;,
666                 ("(O&NNN)",
667                  conv_string_to_unicode ,context, string_intern(self, base),
668                  string_intern(self, systemId), string_intern(self, publicId)),
669                 rc = PyLong_AsLong(rv);, rc,
670                 XML_GetUserData(parser))
671 
672 /* XXX UnknownEncodingHandler */
673 
674 VOID_HANDLER(StartDoctypeDecl,
675              (void *userData, const XML_Char *doctypeName,
676               const XML_Char *sysid, const XML_Char *pubid,
677               int has_internal_subset),
678              ("(NNNi)", string_intern(self, doctypeName),
679               string_intern(self, sysid), string_intern(self, pubid),
680               has_internal_subset))
681 
682 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
683 
684 /* ---------------------------------------------------------------- */
685 /*[clinic input]
686 class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
687 [clinic start generated code]*/
688 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
689 
690 
691 static PyObject *
get_parse_result(pyexpat_state * state,xmlparseobject * self,int rv)692 get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
693 {
694     if (PyErr_Occurred()) {
695         return NULL;
696     }
697     if (rv == 0) {
698         return set_error(state, self, XML_GetErrorCode(self->itself));
699     }
700     if (flush_character_buffer(self) < 0) {
701         return NULL;
702     }
703     return PyLong_FromLong(rv);
704 }
705 
706 #define MAX_CHUNK_SIZE (1 << 20)
707 
708 /*[clinic input]
709 pyexpat.xmlparser.Parse
710 
711     cls: defining_class
712     data: object
713     isfinal: bool(accept={int}) = False
714     /
715 
716 Parse XML data.
717 
718 `isfinal' should be true at end of input.
719 [clinic start generated code]*/
720 
721 static PyObject *
pyexpat_xmlparser_Parse_impl(xmlparseobject * self,PyTypeObject * cls,PyObject * data,int isfinal)722 pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
723                              PyObject *data, int isfinal)
724 /*[clinic end generated code: output=8faffe07fe1f862a input=fc97f833558ca715]*/
725 {
726     const char *s;
727     Py_ssize_t slen;
728     Py_buffer view;
729     int rc;
730     pyexpat_state *state = PyType_GetModuleState(cls);
731 
732     if (PyUnicode_Check(data)) {
733         view.buf = NULL;
734         s = PyUnicode_AsUTF8AndSize(data, &slen);
735         if (s == NULL)
736             return NULL;
737         /* Explicitly set UTF-8 encoding. Return code ignored. */
738         (void)XML_SetEncoding(self->itself, "utf-8");
739     }
740     else {
741         if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
742             return NULL;
743         s = view.buf;
744         slen = view.len;
745     }
746 
747     static_assert(MAX_CHUNK_SIZE <= INT_MAX,
748                   "MAX_CHUNK_SIZE is larger than INT_MAX");
749     while (slen > MAX_CHUNK_SIZE) {
750         rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
751         if (!rc)
752             goto done;
753         s += MAX_CHUNK_SIZE;
754         slen -= MAX_CHUNK_SIZE;
755     }
756 
757     assert(slen <= INT_MAX);
758     rc = XML_Parse(self->itself, s, (int)slen, isfinal);
759 
760 done:
761     if (view.buf != NULL) {
762         PyBuffer_Release(&view);
763     }
764     return get_parse_result(state, self, rc);
765 }
766 
767 /* File reading copied from cPickle */
768 
769 #define BUF_SIZE 2048
770 
771 static int
readinst(char * buf,int buf_size,PyObject * meth)772 readinst(char *buf, int buf_size, PyObject *meth)
773 {
774     PyObject *str;
775     Py_ssize_t len;
776     const char *ptr;
777 
778     str = PyObject_CallFunction(meth, "i", buf_size);
779     if (str == NULL)
780         goto error;
781 
782     if (PyBytes_Check(str))
783         ptr = PyBytes_AS_STRING(str);
784     else if (PyByteArray_Check(str))
785         ptr = PyByteArray_AS_STRING(str);
786     else {
787         PyErr_Format(PyExc_TypeError,
788                      "read() did not return a bytes object (type=%.400s)",
789                      Py_TYPE(str)->tp_name);
790         goto error;
791     }
792     len = Py_SIZE(str);
793     if (len > buf_size) {
794         PyErr_Format(PyExc_ValueError,
795                      "read() returned too much data: "
796                      "%i bytes requested, %zd returned",
797                      buf_size, len);
798         goto error;
799     }
800     memcpy(buf, ptr, len);
801     Py_DECREF(str);
802     /* len <= buf_size <= INT_MAX */
803     return (int)len;
804 
805 error:
806     Py_XDECREF(str);
807     return -1;
808 }
809 
810 /*[clinic input]
811 pyexpat.xmlparser.ParseFile
812 
813     cls: defining_class
814     file: object
815     /
816 
817 Parse XML data from file-like object.
818 [clinic start generated code]*/
819 
820 static PyObject *
pyexpat_xmlparser_ParseFile_impl(xmlparseobject * self,PyTypeObject * cls,PyObject * file)821 pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
822                                  PyObject *file)
823 /*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
824 {
825     int rv = 1;
826     PyObject *readmethod = NULL;
827 
828     pyexpat_state *state = PyType_GetModuleState(cls);
829 
830     if (_PyObject_LookupAttr(file, state->str_read, &readmethod) < 0) {
831         return NULL;
832     }
833     if (readmethod == NULL) {
834         PyErr_SetString(PyExc_TypeError,
835                         "argument must have 'read' attribute");
836         return NULL;
837     }
838     for (;;) {
839         int bytes_read;
840         void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
841         if (buf == NULL) {
842             Py_XDECREF(readmethod);
843             return get_parse_result(state, self, 0);
844         }
845 
846         bytes_read = readinst(buf, BUF_SIZE, readmethod);
847         if (bytes_read < 0) {
848             Py_DECREF(readmethod);
849             return NULL;
850         }
851         rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
852         if (PyErr_Occurred()) {
853             Py_XDECREF(readmethod);
854             return NULL;
855         }
856 
857         if (!rv || bytes_read == 0)
858             break;
859     }
860     Py_XDECREF(readmethod);
861     return get_parse_result(state, self, rv);
862 }
863 
864 /*[clinic input]
865 pyexpat.xmlparser.SetBase
866 
867     base: str
868     /
869 
870 Set the base URL for the parser.
871 [clinic start generated code]*/
872 
873 static PyObject *
pyexpat_xmlparser_SetBase_impl(xmlparseobject * self,const char * base)874 pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
875 /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
876 {
877     if (!XML_SetBase(self->itself, base)) {
878         return PyErr_NoMemory();
879     }
880     Py_RETURN_NONE;
881 }
882 
883 /*[clinic input]
884 pyexpat.xmlparser.GetBase
885 
886 Return base URL string for the parser.
887 [clinic start generated code]*/
888 
889 static PyObject *
pyexpat_xmlparser_GetBase_impl(xmlparseobject * self)890 pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
891 /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
892 {
893     return Py_BuildValue("z", XML_GetBase(self->itself));
894 }
895 
896 /*[clinic input]
897 pyexpat.xmlparser.GetInputContext
898 
899 Return the untranslated text of the input that caused the current event.
900 
901 If the event was generated by a large amount of text (such as a start tag
902 for an element with many attributes), not all of the text may be available.
903 [clinic start generated code]*/
904 
905 static PyObject *
pyexpat_xmlparser_GetInputContext_impl(xmlparseobject * self)906 pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
907 /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
908 {
909     if (self->in_callback) {
910         int offset, size;
911         const char *buffer
912             = XML_GetInputContext(self->itself, &offset, &size);
913 
914         if (buffer != NULL)
915             return PyBytes_FromStringAndSize(buffer + offset,
916                                               size - offset);
917         else
918             Py_RETURN_NONE;
919     }
920     else
921         Py_RETURN_NONE;
922 }
923 
924 /*[clinic input]
925 pyexpat.xmlparser.ExternalEntityParserCreate
926 
927     cls: defining_class
928     context: str(accept={str, NoneType})
929     encoding: str = NULL
930     /
931 
932 Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
933 [clinic start generated code]*/
934 
935 static PyObject *
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject * self,PyTypeObject * cls,const char * context,const char * encoding)936 pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
937                                                   PyTypeObject *cls,
938                                                   const char *context,
939                                                   const char *encoding)
940 /*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
941 {
942     xmlparseobject *new_parser;
943     int i;
944 
945     pyexpat_state *state = PyType_GetModuleState(cls);
946 
947     new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
948     if (new_parser == NULL) {
949         return NULL;
950     }
951 
952     new_parser->buffer_size = self->buffer_size;
953     new_parser->buffer_used = 0;
954     new_parser->buffer = NULL;
955     new_parser->ordered_attributes = self->ordered_attributes;
956     new_parser->specified_attributes = self->specified_attributes;
957     new_parser->in_callback = 0;
958     new_parser->ns_prefixes = self->ns_prefixes;
959     new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
960                                                         encoding);
961     new_parser->handlers = 0;
962     new_parser->intern = self->intern;
963     Py_XINCREF(new_parser->intern);
964 
965     if (self->buffer != NULL) {
966         new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
967         if (new_parser->buffer == NULL) {
968             Py_DECREF(new_parser);
969             return PyErr_NoMemory();
970         }
971     }
972     if (!new_parser->itself) {
973         Py_DECREF(new_parser);
974         return PyErr_NoMemory();
975     }
976 
977     XML_SetUserData(new_parser->itself, (void *)new_parser);
978 
979     /* allocate and clear handlers first */
980     for (i = 0; handler_info[i].name != NULL; i++)
981         /* do nothing */;
982 
983     new_parser->handlers = PyMem_New(PyObject *, i);
984     if (!new_parser->handlers) {
985         Py_DECREF(new_parser);
986         return PyErr_NoMemory();
987     }
988     clear_handlers(new_parser, 1);
989 
990     /* then copy handlers from self */
991     for (i = 0; handler_info[i].name != NULL; i++) {
992         PyObject *handler = self->handlers[i];
993         if (handler != NULL) {
994             Py_INCREF(handler);
995             new_parser->handlers[i] = handler;
996             handler_info[i].setter(new_parser->itself,
997                                    handler_info[i].handler);
998         }
999     }
1000 
1001     PyObject_GC_Track(new_parser);
1002     return (PyObject *)new_parser;
1003 }
1004 
1005 /*[clinic input]
1006 pyexpat.xmlparser.SetParamEntityParsing
1007 
1008     flag: int
1009     /
1010 
1011 Controls parsing of parameter entities (including the external DTD subset).
1012 
1013 Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
1014 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
1015 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
1016 was successful.
1017 [clinic start generated code]*/
1018 
1019 static PyObject *
pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject * self,int flag)1020 pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
1021 /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
1022 {
1023     flag = XML_SetParamEntityParsing(self->itself, flag);
1024     return PyLong_FromLong(flag);
1025 }
1026 
1027 
1028 #if XML_COMBINED_VERSION >= 19505
1029 /*[clinic input]
1030 pyexpat.xmlparser.UseForeignDTD
1031 
1032     cls: defining_class
1033     flag: bool = True
1034     /
1035 
1036 Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1037 
1038 This readily allows the use of a 'default' document type controlled by the
1039 application, while still getting the advantage of providing document type
1040 information to the parser. 'flag' defaults to True if not provided.
1041 [clinic start generated code]*/
1042 
1043 static PyObject *
pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject * self,PyTypeObject * cls,int flag)1044 pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
1045                                      int flag)
1046 /*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/
1047 {
1048     pyexpat_state *state = PyType_GetModuleState(cls);
1049     enum XML_Error rc;
1050 
1051     rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
1052     if (rc != XML_ERROR_NONE) {
1053         return set_error(state, self, rc);
1054     }
1055     Py_RETURN_NONE;
1056 }
1057 #endif
1058 
1059 static struct PyMethodDef xmlparse_methods[] = {
1060     PYEXPAT_XMLPARSER_PARSE_METHODDEF
1061     PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1062     PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1063     PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1064     PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1065     PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1066     PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
1067 #if XML_COMBINED_VERSION >= 19505
1068     PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
1069 #endif
1070     {NULL, NULL}  /* sentinel */
1071 };
1072 
1073 /* ---------- */
1074 
1075 
1076 
1077 /* pyexpat international encoding support.
1078    Make it as simple as possible.
1079 */
1080 
1081 static int
PyUnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)1082 PyUnknownEncodingHandler(void *encodingHandlerData,
1083                          const XML_Char *name,
1084                          XML_Encoding *info)
1085 {
1086     static unsigned char template_buffer[256] = {0};
1087     PyObject* u;
1088     int i;
1089     const void *data;
1090     unsigned int kind;
1091 
1092     if (PyErr_Occurred())
1093         return XML_STATUS_ERROR;
1094 
1095     if (template_buffer[1] == 0) {
1096         for (i = 0; i < 256; i++)
1097             template_buffer[i] = i;
1098     }
1099 
1100     u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
1101     if (u == NULL || PyUnicode_READY(u)) {
1102         Py_XDECREF(u);
1103         return XML_STATUS_ERROR;
1104     }
1105 
1106     if (PyUnicode_GET_LENGTH(u) != 256) {
1107         Py_DECREF(u);
1108         PyErr_SetString(PyExc_ValueError,
1109                         "multi-byte encodings are not supported");
1110         return XML_STATUS_ERROR;
1111     }
1112 
1113     kind = PyUnicode_KIND(u);
1114     data = PyUnicode_DATA(u);
1115     for (i = 0; i < 256; i++) {
1116         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1117         if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1118             info->map[i] = ch;
1119         else
1120             info->map[i] = -1;
1121     }
1122 
1123     info->data = NULL;
1124     info->convert = NULL;
1125     info->release = NULL;
1126     Py_DECREF(u);
1127 
1128     return XML_STATUS_OK;
1129 }
1130 
1131 
1132 static PyObject *
newxmlparseobject(pyexpat_state * state,const char * encoding,const char * namespace_separator,PyObject * intern)1133 newxmlparseobject(pyexpat_state *state, const char *encoding,
1134                   const char *namespace_separator, PyObject *intern)
1135 {
1136     int i;
1137     xmlparseobject *self;
1138 
1139     self = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
1140     if (self == NULL)
1141         return NULL;
1142 
1143     self->buffer = NULL;
1144     self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1145     self->buffer_used = 0;
1146     self->ordered_attributes = 0;
1147     self->specified_attributes = 0;
1148     self->in_callback = 0;
1149     self->ns_prefixes = 0;
1150     self->handlers = NULL;
1151     self->intern = intern;
1152     Py_XINCREF(self->intern);
1153 
1154     /* namespace_separator is either NULL or contains one char + \0 */
1155     self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1156                                        namespace_separator);
1157     if (self->itself == NULL) {
1158         PyErr_SetString(PyExc_RuntimeError,
1159                         "XML_ParserCreate failed");
1160         Py_DECREF(self);
1161         return NULL;
1162     }
1163 #if XML_COMBINED_VERSION >= 20100
1164     /* This feature was added upstream in libexpat 2.1.0. */
1165     XML_SetHashSalt(self->itself,
1166                     (unsigned long)_Py_HashSecret.expat.hashsalt);
1167 #endif
1168     XML_SetUserData(self->itself, (void *)self);
1169     XML_SetUnknownEncodingHandler(self->itself,
1170                   (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1171 
1172     for (i = 0; handler_info[i].name != NULL; i++)
1173         /* do nothing */;
1174 
1175     self->handlers = PyMem_New(PyObject *, i);
1176     if (!self->handlers) {
1177         Py_DECREF(self);
1178         return PyErr_NoMemory();
1179     }
1180     clear_handlers(self, 1);
1181 
1182     PyObject_GC_Track(self);
1183     return (PyObject*)self;
1184 }
1185 
1186 static int
xmlparse_traverse(xmlparseobject * op,visitproc visit,void * arg)1187 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1188 {
1189     for (int i = 0; handler_info[i].name != NULL; i++) {
1190         Py_VISIT(op->handlers[i]);
1191     }
1192     Py_VISIT(Py_TYPE(op));
1193     return 0;
1194 }
1195 
1196 static int
xmlparse_clear(xmlparseobject * op)1197 xmlparse_clear(xmlparseobject *op)
1198 {
1199     clear_handlers(op, 0);
1200     Py_CLEAR(op->intern);
1201     return 0;
1202 }
1203 
1204 static void
xmlparse_dealloc(xmlparseobject * self)1205 xmlparse_dealloc(xmlparseobject *self)
1206 {
1207     PyObject_GC_UnTrack(self);
1208     (void)xmlparse_clear(self);
1209     if (self->itself != NULL)
1210         XML_ParserFree(self->itself);
1211     self->itself = NULL;
1212 
1213     if (self->handlers != NULL) {
1214         PyMem_Free(self->handlers);
1215         self->handlers = NULL;
1216     }
1217     if (self->buffer != NULL) {
1218         PyMem_Free(self->buffer);
1219         self->buffer = NULL;
1220     }
1221     PyTypeObject *tp = Py_TYPE(self);
1222     PyObject_GC_Del(self);
1223     Py_DECREF(tp);
1224 }
1225 
1226 
1227 static PyObject *
xmlparse_handler_getter(xmlparseobject * self,struct HandlerInfo * hi)1228 xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
1229 {
1230     assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1231     int handlernum = (int)(hi - handler_info);
1232     PyObject *result = self->handlers[handlernum];
1233     if (result == NULL)
1234         result = Py_None;
1235     Py_INCREF(result);
1236     return result;
1237 }
1238 
1239 static int
xmlparse_handler_setter(xmlparseobject * self,PyObject * v,struct HandlerInfo * hi)1240 xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
1241 {
1242     assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1243     int handlernum = (int)(hi - handler_info);
1244     if (v == NULL) {
1245         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1246         return -1;
1247     }
1248     if (handlernum == CharacterData) {
1249         /* If we're changing the character data handler, flush all
1250          * cached data with the old handler.  Not sure there's a
1251          * "right" thing to do, though, but this probably won't
1252          * happen.
1253          */
1254         if (flush_character_buffer(self) < 0)
1255             return -1;
1256     }
1257 
1258     xmlhandler c_handler = NULL;
1259     if (v == Py_None) {
1260         /* If this is the character data handler, and a character
1261            data handler is already active, we need to be more
1262            careful.  What we can safely do is replace the existing
1263            character data handler callback function with a no-op
1264            function that will refuse to call Python.  The downside
1265            is that this doesn't completely remove the character
1266            data handler from the C layer if there's any callback
1267            active, so Expat does a little more work than it
1268            otherwise would, but that's really an odd case.  A more
1269            elaborate system of handlers and state could remove the
1270            C handler more effectively. */
1271         if (handlernum == CharacterData && self->in_callback)
1272             c_handler = noop_character_data_handler;
1273         v = NULL;
1274     }
1275     else if (v != NULL) {
1276         Py_INCREF(v);
1277         c_handler = handler_info[handlernum].handler;
1278     }
1279     Py_XSETREF(self->handlers[handlernum], v);
1280     handler_info[handlernum].setter(self->itself, c_handler);
1281     return 0;
1282 }
1283 
1284 #define INT_GETTER(name) \
1285     static PyObject * \
1286     xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1287     { \
1288         return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1289     }
1290 INT_GETTER(ErrorCode)
INT_GETTER(ErrorLineNumber)1291 INT_GETTER(ErrorLineNumber)
1292 INT_GETTER(ErrorColumnNumber)
1293 INT_GETTER(ErrorByteIndex)
1294 INT_GETTER(CurrentLineNumber)
1295 INT_GETTER(CurrentColumnNumber)
1296 INT_GETTER(CurrentByteIndex)
1297 
1298 #undef INT_GETTER
1299 
1300 static PyObject *
1301 xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1302 {
1303     return PyBool_FromLong(self->buffer != NULL);
1304 }
1305 
1306 static int
xmlparse_buffer_text_setter(xmlparseobject * self,PyObject * v,void * closure)1307 xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1308 {
1309     if (v == NULL) {
1310         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1311         return -1;
1312     }
1313     int b = PyObject_IsTrue(v);
1314     if (b < 0)
1315         return -1;
1316     if (b) {
1317         if (self->buffer == NULL) {
1318             self->buffer = PyMem_Malloc(self->buffer_size);
1319             if (self->buffer == NULL) {
1320                 PyErr_NoMemory();
1321                 return -1;
1322             }
1323             self->buffer_used = 0;
1324         }
1325     }
1326     else if (self->buffer != NULL) {
1327         if (flush_character_buffer(self) < 0)
1328             return -1;
1329         PyMem_Free(self->buffer);
1330         self->buffer = NULL;
1331     }
1332     return 0;
1333 }
1334 
1335 static PyObject *
xmlparse_buffer_size_getter(xmlparseobject * self,void * closure)1336 xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1337 {
1338     return PyLong_FromLong((long) self->buffer_size);
1339 }
1340 
1341 static int
xmlparse_buffer_size_setter(xmlparseobject * self,PyObject * v,void * closure)1342 xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1343 {
1344     if (v == NULL) {
1345         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1346         return -1;
1347     }
1348     long new_buffer_size;
1349     if (!PyLong_Check(v)) {
1350         PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1351         return -1;
1352     }
1353 
1354     new_buffer_size = PyLong_AsLong(v);
1355     if (new_buffer_size <= 0) {
1356         if (!PyErr_Occurred())
1357             PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1358         return -1;
1359     }
1360 
1361     /* trivial case -- no change */
1362     if (new_buffer_size == self->buffer_size) {
1363         return 0;
1364     }
1365 
1366     /* check maximum */
1367     if (new_buffer_size > INT_MAX) {
1368         char errmsg[100];
1369         sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1370         PyErr_SetString(PyExc_ValueError, errmsg);
1371         return -1;
1372     }
1373 
1374     if (self->buffer != NULL) {
1375         /* there is already a buffer */
1376         if (self->buffer_used != 0) {
1377             if (flush_character_buffer(self) < 0) {
1378                 return -1;
1379             }
1380         }
1381         /* free existing buffer */
1382         PyMem_Free(self->buffer);
1383     }
1384     self->buffer = PyMem_Malloc(new_buffer_size);
1385     if (self->buffer == NULL) {
1386         PyErr_NoMemory();
1387         return -1;
1388     }
1389     self->buffer_size = new_buffer_size;
1390     return 0;
1391 }
1392 
1393 static PyObject *
xmlparse_buffer_used_getter(xmlparseobject * self,void * closure)1394 xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1395 {
1396     return PyLong_FromLong((long) self->buffer_used);
1397 }
1398 
1399 static PyObject *
xmlparse_namespace_prefixes_getter(xmlparseobject * self,void * closure)1400 xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1401 {
1402     return PyBool_FromLong(self->ns_prefixes);
1403 }
1404 
1405 static int
xmlparse_namespace_prefixes_setter(xmlparseobject * self,PyObject * v,void * closure)1406 xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1407 {
1408     if (v == NULL) {
1409         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1410         return -1;
1411     }
1412     int b = PyObject_IsTrue(v);
1413     if (b < 0)
1414         return -1;
1415     self->ns_prefixes = b;
1416     XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1417     return 0;
1418 }
1419 
1420 static PyObject *
xmlparse_ordered_attributes_getter(xmlparseobject * self,void * closure)1421 xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1422 {
1423     return PyBool_FromLong(self->ordered_attributes);
1424 }
1425 
1426 static int
xmlparse_ordered_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1427 xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1428 {
1429     if (v == NULL) {
1430         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1431         return -1;
1432     }
1433     int b = PyObject_IsTrue(v);
1434     if (b < 0)
1435         return -1;
1436     self->ordered_attributes = b;
1437     return 0;
1438 }
1439 
1440 static PyObject *
xmlparse_specified_attributes_getter(xmlparseobject * self,void * closure)1441 xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1442 {
1443     return PyBool_FromLong((long) self->specified_attributes);
1444 }
1445 
1446 static int
xmlparse_specified_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1447 xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1448 {
1449     if (v == NULL) {
1450         PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1451         return -1;
1452     }
1453     int b = PyObject_IsTrue(v);
1454     if (b < 0)
1455         return -1;
1456     self->specified_attributes = b;
1457     return 0;
1458 }
1459 
1460 static PyMemberDef xmlparse_members[] = {
1461     {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1462     {NULL}
1463 };
1464 
1465 #define XMLPARSE_GETTER_DEF(name) \
1466     {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1467 #define XMLPARSE_GETTER_SETTER_DEF(name) \
1468     {#name, (getter)xmlparse_##name##_getter, \
1469             (setter)xmlparse_##name##_setter, NULL},
1470 
1471 static PyGetSetDef xmlparse_getsetlist[] = {
1472     XMLPARSE_GETTER_DEF(ErrorCode)
1473     XMLPARSE_GETTER_DEF(ErrorLineNumber)
1474     XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1475     XMLPARSE_GETTER_DEF(ErrorByteIndex)
1476     XMLPARSE_GETTER_DEF(CurrentLineNumber)
1477     XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1478     XMLPARSE_GETTER_DEF(CurrentByteIndex)
1479     XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1480     XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1481     XMLPARSE_GETTER_DEF(buffer_used)
1482     XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1483     XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1484     XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1485     {NULL},
1486 };
1487 
1488 #undef XMLPARSE_GETTER_DEF
1489 #undef XMLPARSE_GETTER_SETTER_DEF
1490 
1491 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1492 
1493 static PyType_Slot _xml_parse_type_spec_slots[] = {
1494     {Py_tp_dealloc, xmlparse_dealloc},
1495     {Py_tp_doc, (void *)Xmlparsetype__doc__},
1496     {Py_tp_traverse, xmlparse_traverse},
1497     {Py_tp_clear, xmlparse_clear},
1498     {Py_tp_methods, xmlparse_methods},
1499     {Py_tp_members, xmlparse_members},
1500     {Py_tp_getset, xmlparse_getsetlist},
1501     {0, 0}
1502 };
1503 
1504 static PyType_Spec _xml_parse_type_spec = {
1505     .name = "pyexpat.xmlparser",
1506     .basicsize = sizeof(xmlparseobject),
1507     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
1508               Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
1509     .slots = _xml_parse_type_spec_slots,
1510 };
1511 
1512 /* End of code for xmlparser objects */
1513 /* -------------------------------------------------------- */
1514 
1515 /*[clinic input]
1516 pyexpat.ParserCreate
1517 
1518     encoding: str(accept={str, NoneType}) = None
1519     namespace_separator: str(accept={str, NoneType}) = None
1520     intern: object = NULL
1521 
1522 Return a new XML parser object.
1523 [clinic start generated code]*/
1524 
1525 static PyObject *
pyexpat_ParserCreate_impl(PyObject * module,const char * encoding,const char * namespace_separator,PyObject * intern)1526 pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
1527                           const char *namespace_separator, PyObject *intern)
1528 /*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
1529 {
1530     PyObject *result;
1531     int intern_decref = 0;
1532 
1533     if (namespace_separator != NULL
1534         && strlen(namespace_separator) > 1) {
1535         PyErr_SetString(PyExc_ValueError,
1536                         "namespace_separator must be at most one"
1537                         " character, omitted, or None");
1538         return NULL;
1539     }
1540     /* Explicitly passing None means no interning is desired.
1541        Not passing anything means that a new dictionary is used. */
1542     if (intern == Py_None)
1543         intern = NULL;
1544     else if (intern == NULL) {
1545         intern = PyDict_New();
1546         if (!intern)
1547             return NULL;
1548         intern_decref = 1;
1549     }
1550     else if (!PyDict_Check(intern)) {
1551         PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1552         return NULL;
1553     }
1554 
1555     pyexpat_state *state = pyexpat_get_state(module);
1556     result = newxmlparseobject(state, encoding, namespace_separator, intern);
1557     if (intern_decref) {
1558         Py_DECREF(intern);
1559     }
1560     return result;
1561 }
1562 
1563 /*[clinic input]
1564 pyexpat.ErrorString
1565 
1566     code: long
1567     /
1568 
1569 Returns string error for given number.
1570 [clinic start generated code]*/
1571 
1572 static PyObject *
pyexpat_ErrorString_impl(PyObject * module,long code)1573 pyexpat_ErrorString_impl(PyObject *module, long code)
1574 /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
1575 {
1576     return Py_BuildValue("z", XML_ErrorString((int)code));
1577 }
1578 
1579 /* List of methods defined in the module */
1580 
1581 static struct PyMethodDef pyexpat_methods[] = {
1582     PYEXPAT_PARSERCREATE_METHODDEF
1583     PYEXPAT_ERRORSTRING_METHODDEF
1584     {NULL, NULL}  /* sentinel */
1585 };
1586 
1587 /* Module docstring */
1588 
1589 PyDoc_STRVAR(pyexpat_module_documentation,
1590 "Python wrapper for Expat parser.");
1591 
1592 /* Initialization function for the module */
1593 
1594 #ifndef MODULE_NAME
1595 #define MODULE_NAME "pyexpat"
1596 #endif
1597 
init_handler_descrs(pyexpat_state * state)1598 static int init_handler_descrs(pyexpat_state *state)
1599 {
1600     int i;
1601     assert(!PyType_HasFeature(state->xml_parse_type, Py_TPFLAGS_VALID_VERSION_TAG));
1602     for (i = 0; handler_info[i].name != NULL; i++) {
1603         struct HandlerInfo *hi = &handler_info[i];
1604         hi->getset.name = hi->name;
1605         hi->getset.get = (getter)xmlparse_handler_getter;
1606         hi->getset.set = (setter)xmlparse_handler_setter;
1607         hi->getset.closure = &handler_info[i];
1608 
1609         PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
1610         if (descr == NULL)
1611             return -1;
1612 
1613         if (PyDict_SetDefault(state->xml_parse_type->tp_dict, PyDescr_NAME(descr), descr) == NULL) {
1614             Py_DECREF(descr);
1615             return -1;
1616         }
1617         Py_DECREF(descr);
1618     }
1619     return 0;
1620 }
1621 
1622 static PyObject *
add_submodule(PyObject * mod,const char * fullname)1623 add_submodule(PyObject *mod, const char *fullname)
1624 {
1625     const char *name = strrchr(fullname, '.') + 1;
1626 
1627     PyObject *submodule = PyModule_New(fullname);
1628     if (submodule == NULL) {
1629         return NULL;
1630     }
1631 
1632     PyObject *mod_name = PyUnicode_FromString(fullname);
1633     if (mod_name == NULL) {
1634         Py_DECREF(submodule);
1635         return NULL;
1636     }
1637 
1638     if (_PyImport_SetModule(mod_name, submodule) < 0) {
1639         Py_DECREF(submodule);
1640         Py_DECREF(mod_name);
1641         return NULL;
1642     }
1643     Py_DECREF(mod_name);
1644 
1645     /* gives away the reference to the submodule */
1646     if (PyModule_AddObject(mod, name, submodule) < 0) {
1647         Py_DECREF(submodule);
1648         return NULL;
1649     }
1650 
1651     return submodule;
1652 }
1653 
1654 struct ErrorInfo {
1655     const char * name;  /* Error constant name, e.g. "XML_ERROR_NO_MEMORY" */
1656     const char * description;  /* Error description as returned by XML_ErrorString(<int>) */
1657 };
1658 
1659 static
1660 struct ErrorInfo error_info_of[] = {
1661     {NULL, NULL},  /* XML_ERROR_NONE (value 0) is not exposed */
1662 
1663     {"XML_ERROR_NO_MEMORY", "out of memory"},
1664     {"XML_ERROR_SYNTAX", "syntax error"},
1665     {"XML_ERROR_NO_ELEMENTS", "no element found"},
1666     {"XML_ERROR_INVALID_TOKEN", "not well-formed (invalid token)"},
1667     {"XML_ERROR_UNCLOSED_TOKEN", "unclosed token"},
1668     {"XML_ERROR_PARTIAL_CHAR", "partial character"},
1669     {"XML_ERROR_TAG_MISMATCH", "mismatched tag"},
1670     {"XML_ERROR_DUPLICATE_ATTRIBUTE", "duplicate attribute"},
1671     {"XML_ERROR_JUNK_AFTER_DOC_ELEMENT", "junk after document element"},
1672     {"XML_ERROR_PARAM_ENTITY_REF", "illegal parameter entity reference"},
1673     {"XML_ERROR_UNDEFINED_ENTITY", "undefined entity"},
1674     {"XML_ERROR_RECURSIVE_ENTITY_REF", "recursive entity reference"},
1675     {"XML_ERROR_ASYNC_ENTITY", "asynchronous entity"},
1676     {"XML_ERROR_BAD_CHAR_REF", "reference to invalid character number"},
1677     {"XML_ERROR_BINARY_ENTITY_REF", "reference to binary entity"},
1678     {"XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", "reference to external entity in attribute"},
1679     {"XML_ERROR_MISPLACED_XML_PI", "XML or text declaration not at start of entity"},
1680     {"XML_ERROR_UNKNOWN_ENCODING", "unknown encoding"},
1681     {"XML_ERROR_INCORRECT_ENCODING", "encoding specified in XML declaration is incorrect"},
1682     {"XML_ERROR_UNCLOSED_CDATA_SECTION", "unclosed CDATA section"},
1683     {"XML_ERROR_EXTERNAL_ENTITY_HANDLING", "error in processing external entity reference"},
1684     {"XML_ERROR_NOT_STANDALONE", "document is not standalone"},
1685     {"XML_ERROR_UNEXPECTED_STATE", "unexpected parser state - please send a bug report"},
1686     {"XML_ERROR_ENTITY_DECLARED_IN_PE", "entity declared in parameter entity"},
1687     {"XML_ERROR_FEATURE_REQUIRES_XML_DTD", "requested feature requires XML_DTD support in Expat"},
1688     {"XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING", "cannot change setting once parsing has begun"},
1689 
1690     /* Added in Expat 1.95.7. */
1691     {"XML_ERROR_UNBOUND_PREFIX", "unbound prefix"},
1692 
1693     /* Added in Expat 1.95.8. */
1694     {"XML_ERROR_UNDECLARING_PREFIX", "must not undeclare prefix"},
1695     {"XML_ERROR_INCOMPLETE_PE", "incomplete markup in parameter entity"},
1696     {"XML_ERROR_XML_DECL", "XML declaration not well-formed"},
1697     {"XML_ERROR_TEXT_DECL", "text declaration not well-formed"},
1698     {"XML_ERROR_PUBLICID", "illegal character(s) in public id"},
1699     {"XML_ERROR_SUSPENDED", "parser suspended"},
1700     {"XML_ERROR_NOT_SUSPENDED", "parser not suspended"},
1701     {"XML_ERROR_ABORTED", "parsing aborted"},
1702     {"XML_ERROR_FINISHED", "parsing finished"},
1703     {"XML_ERROR_SUSPEND_PE", "cannot suspend in external parameter entity"},
1704 
1705     /* Added in 2.0.0. */
1706     {"XML_ERROR_RESERVED_PREFIX_XML", "reserved prefix (xml) must not be undeclared or bound to another namespace name"},
1707     {"XML_ERROR_RESERVED_PREFIX_XMLNS", "reserved prefix (xmlns) must not be declared or undeclared"},
1708     {"XML_ERROR_RESERVED_NAMESPACE_URI", "prefix must not be bound to one of the reserved namespace names"},
1709 
1710     /* Added in 2.2.1. */
1711     {"XML_ERROR_INVALID_ARGUMENT", "invalid argument"},
1712 
1713     /* Added in 2.3.0. */
1714     {"XML_ERROR_NO_BUFFER", "a successful prior call to function XML_GetBuffer is required"},
1715 
1716     /* Added in 2.4.0. */
1717     {"XML_ERROR_AMPLIFICATION_LIMIT_BREACH", "limit on input amplification factor (from DTD and entities) breached"}
1718 };
1719 
1720 static int
add_error(PyObject * errors_module,PyObject * codes_dict,PyObject * rev_codes_dict,size_t error_index)1721 add_error(PyObject *errors_module, PyObject *codes_dict,
1722           PyObject *rev_codes_dict, size_t error_index)
1723 {
1724     const char * const name = error_info_of[error_index].name;
1725     const int error_code = (int)error_index;
1726 
1727     /* NOTE: This keeps the source of truth regarding error
1728      *       messages with libexpat and (by definiton) in bulletproof sync
1729      *       with the other uses of the XML_ErrorString function
1730      *       elsewhere within this file.  pyexpat's copy of the messages
1731      *       only acts as a fallback in case of outdated runtime libexpat,
1732      *       where it returns NULL. */
1733     const char *error_string = XML_ErrorString(error_code);
1734     if (error_string == NULL) {
1735         error_string = error_info_of[error_index].description;
1736     }
1737 
1738     if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
1739         return -1;
1740     }
1741 
1742     PyObject *num = PyLong_FromLong(error_code);
1743     if (num == NULL) {
1744         return -1;
1745     }
1746 
1747     if (PyDict_SetItemString(codes_dict, error_string, num) < 0) {
1748         Py_DECREF(num);
1749         return -1;
1750     }
1751 
1752     PyObject *str = PyUnicode_FromString(error_string);
1753     if (str == NULL) {
1754         Py_DECREF(num);
1755         return -1;
1756     }
1757 
1758     int res = PyDict_SetItem(rev_codes_dict, num, str);
1759     Py_DECREF(str);
1760     Py_DECREF(num);
1761     if (res < 0) {
1762         return -1;
1763     }
1764 
1765     return 0;
1766 }
1767 
1768 static int
add_errors_module(PyObject * mod)1769 add_errors_module(PyObject *mod)
1770 {
1771     PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors");
1772     if (errors_module == NULL) {
1773         return -1;
1774     }
1775 
1776     PyObject *codes_dict = PyDict_New();
1777     PyObject *rev_codes_dict = PyDict_New();
1778     if (codes_dict == NULL || rev_codes_dict == NULL) {
1779         goto error;
1780     }
1781 
1782     size_t error_index = 0;
1783     for (; error_index < sizeof(error_info_of) / sizeof(struct ErrorInfo); error_index++) {
1784         if (error_info_of[error_index].name == NULL) {
1785             continue;
1786         }
1787 
1788         if (add_error(errors_module, codes_dict, rev_codes_dict, error_index) < 0) {
1789             goto error;
1790         }
1791     }
1792 
1793     if (PyModule_AddStringConstant(errors_module, "__doc__",
1794                                    "Constants used to describe "
1795                                    "error conditions.") < 0) {
1796         goto error;
1797     }
1798 
1799     Py_INCREF(codes_dict);
1800     if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0) {
1801         Py_DECREF(codes_dict);
1802         goto error;
1803     }
1804     Py_CLEAR(codes_dict);
1805 
1806     Py_INCREF(rev_codes_dict);
1807     if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0) {
1808         Py_DECREF(rev_codes_dict);
1809         goto error;
1810     }
1811     Py_CLEAR(rev_codes_dict);
1812 
1813     return 0;
1814 
1815 error:
1816     Py_XDECREF(codes_dict);
1817     Py_XDECREF(rev_codes_dict);
1818     return -1;
1819 }
1820 
1821 static int
add_model_module(PyObject * mod)1822 add_model_module(PyObject *mod)
1823 {
1824     PyObject *model_module = add_submodule(mod, MODULE_NAME ".model");
1825     if (model_module == NULL) {
1826         return -1;
1827     }
1828 
1829 #define MYCONST(c)  do {                                        \
1830         if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \
1831             return -1;                                          \
1832         }                                                       \
1833     } while(0)
1834 
1835     if (PyModule_AddStringConstant(
1836         model_module, "__doc__",
1837         "Constants used to interpret content model information.") < 0) {
1838         return -1;
1839     }
1840 
1841     MYCONST(XML_CTYPE_EMPTY);
1842     MYCONST(XML_CTYPE_ANY);
1843     MYCONST(XML_CTYPE_MIXED);
1844     MYCONST(XML_CTYPE_NAME);
1845     MYCONST(XML_CTYPE_CHOICE);
1846     MYCONST(XML_CTYPE_SEQ);
1847 
1848     MYCONST(XML_CQUANT_NONE);
1849     MYCONST(XML_CQUANT_OPT);
1850     MYCONST(XML_CQUANT_REP);
1851     MYCONST(XML_CQUANT_PLUS);
1852 #undef MYCONST
1853     return 0;
1854 }
1855 
1856 #if XML_COMBINED_VERSION > 19505
1857 static int
add_features(PyObject * mod)1858 add_features(PyObject *mod)
1859 {
1860     PyObject *list = PyList_New(0);
1861     if (list == NULL) {
1862         return -1;
1863     }
1864 
1865     const XML_Feature *features = XML_GetFeatureList();
1866     for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) {
1867         PyObject *item = Py_BuildValue("si", features[i].name,
1868                                        features[i].value);
1869         if (item == NULL) {
1870             goto error;
1871         }
1872         int ok = PyList_Append(list, item);
1873         Py_DECREF(item);
1874         if (ok < 0) {
1875             goto error;
1876         }
1877     }
1878     if (PyModule_AddObject(mod, "features", list) < 0) {
1879         goto error;
1880     }
1881     return 0;
1882 
1883 error:
1884     Py_DECREF(list);
1885     return -1;
1886 }
1887 #endif
1888 
1889 static int
pyexpat_exec(PyObject * mod)1890 pyexpat_exec(PyObject *mod)
1891 {
1892     pyexpat_state *state = pyexpat_get_state(mod);
1893     state->str_read = PyUnicode_InternFromString("read");
1894     if (state->str_read == NULL) {
1895         return -1;
1896     }
1897     state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec(
1898         mod, &_xml_parse_type_spec, NULL);
1899 
1900     if (state->xml_parse_type == NULL) {
1901         return -1;
1902     }
1903 
1904     if (init_handler_descrs(state) < 0) {
1905         return -1;
1906     }
1907     state->error = PyErr_NewException("xml.parsers.expat.ExpatError",
1908                                       NULL, NULL);
1909     if (state->error == NULL) {
1910         return -1;
1911     }
1912 
1913     /* Add some symbolic constants to the module */
1914 
1915     if (PyModule_AddObjectRef(mod, "error", state->error) < 0) {
1916         return -1;
1917     }
1918 
1919     if (PyModule_AddObjectRef(mod, "ExpatError", state->error) < 0) {
1920         return -1;
1921     }
1922 
1923     if (PyModule_AddObjectRef(mod, "XMLParserType",
1924                            (PyObject *) state->xml_parse_type) < 0) {
1925         return -1;
1926     }
1927 
1928     if (PyModule_AddStringConstant(mod, "EXPAT_VERSION",
1929                                    XML_ExpatVersion()) < 0) {
1930         return -1;
1931     }
1932     {
1933         XML_Expat_Version info = XML_ExpatVersionInfo();
1934         PyObject *versionInfo = Py_BuildValue("(iii)",
1935                                               info.major,
1936                                               info.minor,
1937                                               info.micro);
1938         if (PyModule_AddObject(mod, "version_info", versionInfo) < 0) {
1939             Py_DECREF(versionInfo);
1940             return -1;
1941         }
1942     }
1943     /* XXX When Expat supports some way of figuring out how it was
1944        compiled, this should check and set native_encoding
1945        appropriately.
1946     */
1947     if (PyModule_AddStringConstant(mod, "native_encoding", "UTF-8") < 0) {
1948         return -1;
1949     }
1950 
1951     if (add_errors_module(mod) < 0) {
1952         return -1;
1953     }
1954 
1955     if (add_model_module(mod) < 0) {
1956         return -1;
1957     }
1958 
1959 #if XML_COMBINED_VERSION > 19505
1960     if (add_features(mod) < 0) {
1961         return -1;
1962     }
1963 #endif
1964 
1965 #define MYCONST(c) do {                                 \
1966         if (PyModule_AddIntConstant(mod, #c, c) < 0) {  \
1967             return -1;                                  \
1968         }                                               \
1969     } while(0)
1970 
1971     MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1972     MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1973     MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1974 #undef MYCONST
1975 
1976     static struct PyExpat_CAPI capi;
1977     /* initialize pyexpat dispatch table */
1978     capi.size = sizeof(capi);
1979     capi.magic = PyExpat_CAPI_MAGIC;
1980     capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1981     capi.MINOR_VERSION = XML_MINOR_VERSION;
1982     capi.MICRO_VERSION = XML_MICRO_VERSION;
1983     capi.ErrorString = XML_ErrorString;
1984     capi.GetErrorCode = XML_GetErrorCode;
1985     capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1986     capi.GetErrorLineNumber = XML_GetErrorLineNumber;
1987     capi.Parse = XML_Parse;
1988     capi.ParserCreate_MM = XML_ParserCreate_MM;
1989     capi.ParserFree = XML_ParserFree;
1990     capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1991     capi.SetCommentHandler = XML_SetCommentHandler;
1992     capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1993     capi.SetElementHandler = XML_SetElementHandler;
1994     capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1995     capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1996     capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1997     capi.SetUserData = XML_SetUserData;
1998     capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
1999     capi.SetEncoding = XML_SetEncoding;
2000     capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
2001 #if XML_COMBINED_VERSION >= 20100
2002     capi.SetHashSalt = XML_SetHashSalt;
2003 #else
2004     capi.SetHashSalt = NULL;
2005 #endif
2006 
2007     /* export using capsule */
2008     PyObject *capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
2009     if (capi_object == NULL) {
2010         return -1;
2011     }
2012 
2013     if (PyModule_AddObject(mod, "expat_CAPI", capi_object) < 0) {
2014         Py_DECREF(capi_object);
2015         return -1;
2016     }
2017 
2018     return 0;
2019 }
2020 
2021 static int
pyexpat_traverse(PyObject * module,visitproc visit,void * arg)2022 pyexpat_traverse(PyObject *module, visitproc visit, void *arg)
2023 {
2024     pyexpat_state *state = pyexpat_get_state(module);
2025     Py_VISIT(state->xml_parse_type);
2026     Py_VISIT(state->error);
2027     Py_VISIT(state->str_read);
2028     return 0;
2029 }
2030 
2031 static int
pyexpat_clear(PyObject * module)2032 pyexpat_clear(PyObject *module)
2033 {
2034     pyexpat_state *state = pyexpat_get_state(module);
2035     Py_CLEAR(state->xml_parse_type);
2036     Py_CLEAR(state->error);
2037     Py_CLEAR(state->str_read);
2038     return 0;
2039 }
2040 
2041 static void
pyexpat_free(void * module)2042 pyexpat_free(void *module)
2043 {
2044     pyexpat_clear((PyObject *)module);
2045 }
2046 
2047 static PyModuleDef_Slot pyexpat_slots[] = {
2048     {Py_mod_exec, pyexpat_exec},
2049     {0, NULL}
2050 };
2051 
2052 static struct PyModuleDef pyexpatmodule = {
2053     PyModuleDef_HEAD_INIT,
2054     .m_name = MODULE_NAME,
2055     .m_doc = pyexpat_module_documentation,
2056     .m_size = sizeof(pyexpat_state),
2057     .m_methods = pyexpat_methods,
2058     .m_slots = pyexpat_slots,
2059     .m_traverse = pyexpat_traverse,
2060     .m_clear = pyexpat_clear,
2061     .m_free = pyexpat_free
2062 };
2063 
2064 PyMODINIT_FUNC
PyInit_pyexpat(void)2065 PyInit_pyexpat(void)
2066 {
2067     return PyModuleDef_Init(&pyexpatmodule);
2068 }
2069 
2070 static void
clear_handlers(xmlparseobject * self,int initial)2071 clear_handlers(xmlparseobject *self, int initial)
2072 {
2073     int i = 0;
2074 
2075     for (; handler_info[i].name != NULL; i++) {
2076         if (initial)
2077             self->handlers[i] = NULL;
2078         else {
2079             Py_CLEAR(self->handlers[i]);
2080             handler_info[i].setter(self->itself, NULL);
2081         }
2082     }
2083 }
2084 
2085 static struct HandlerInfo handler_info[] = {
2086 
2087 #define HANDLER_INFO(name) \
2088     {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
2089 
2090     HANDLER_INFO(StartElementHandler)
2091     HANDLER_INFO(EndElementHandler)
2092     HANDLER_INFO(ProcessingInstructionHandler)
2093     HANDLER_INFO(CharacterDataHandler)
2094     HANDLER_INFO(UnparsedEntityDeclHandler)
2095     HANDLER_INFO(NotationDeclHandler)
2096     HANDLER_INFO(StartNamespaceDeclHandler)
2097     HANDLER_INFO(EndNamespaceDeclHandler)
2098     HANDLER_INFO(CommentHandler)
2099     HANDLER_INFO(StartCdataSectionHandler)
2100     HANDLER_INFO(EndCdataSectionHandler)
2101     HANDLER_INFO(DefaultHandler)
2102     HANDLER_INFO(DefaultHandlerExpand)
2103     HANDLER_INFO(NotStandaloneHandler)
2104     HANDLER_INFO(ExternalEntityRefHandler)
2105     HANDLER_INFO(StartDoctypeDeclHandler)
2106     HANDLER_INFO(EndDoctypeDeclHandler)
2107     HANDLER_INFO(EntityDeclHandler)
2108     HANDLER_INFO(XmlDeclHandler)
2109     HANDLER_INFO(ElementDeclHandler)
2110     HANDLER_INFO(AttlistDeclHandler)
2111 #if XML_COMBINED_VERSION >= 19504
2112     HANDLER_INFO(SkippedEntityHandler)
2113 #endif
2114 
2115 #undef HANDLER_INFO
2116 
2117     {NULL, NULL, NULL} /* sentinel */
2118 };
2119