1 #include "Python.h"
2 #include <ctype.h>
3
4 #include "structmember.h" // PyMemberDef
5 #include "expat.h"
6
7 #include "pyexpat.h"
8
9 /* Do not emit Clinic output to a file as that wreaks havoc with conditionally
10 included methods. */
11 /*[clinic input]
12 module pyexpat
13 [clinic start generated code]*/
14 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=b168d503a4490c15]*/
15
16 #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
17
18 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
19 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
20
21 enum HandlerTypes {
22 StartElement,
23 EndElement,
24 ProcessingInstruction,
25 CharacterData,
26 UnparsedEntityDecl,
27 NotationDecl,
28 StartNamespaceDecl,
29 EndNamespaceDecl,
30 Comment,
31 StartCdataSection,
32 EndCdataSection,
33 Default,
34 DefaultHandlerExpand,
35 NotStandalone,
36 ExternalEntityRef,
37 StartDoctypeDecl,
38 EndDoctypeDecl,
39 EntityDecl,
40 XmlDecl,
41 ElementDecl,
42 AttlistDecl,
43 #if XML_COMBINED_VERSION >= 19504
44 SkippedEntity,
45 #endif
46 _DummyDecl
47 };
48
49 typedef struct {
50 PyTypeObject *xml_parse_type;
51 PyObject *error;
52 PyObject *str_read;
53 } pyexpat_state;
54
55 static inline pyexpat_state*
pyexpat_get_state(PyObject * module)56 pyexpat_get_state(PyObject *module)
57 {
58 void *state = PyModule_GetState(module);
59 assert(state != NULL);
60 return (pyexpat_state *)state;
61 }
62
63 /* ----------------------------------------------------- */
64
65 /* Declarations for objects of type xmlparser */
66
67 typedef struct {
68 PyObject_HEAD
69
70 XML_Parser itself;
71 int ordered_attributes; /* Return attributes as a list. */
72 int specified_attributes; /* Report only specified attributes. */
73 int in_callback; /* Is a callback active? */
74 int ns_prefixes; /* Namespace-triplets mode? */
75 XML_Char *buffer; /* Buffer used when accumulating characters */
76 /* NULL if not enabled */
77 int buffer_size; /* Size of buffer, in XML_Char units */
78 int buffer_used; /* Buffer units in use */
79 PyObject *intern; /* Dictionary to intern strings */
80 PyObject **handlers;
81 } xmlparseobject;
82
83 #include "clinic/pyexpat.c.h"
84
85 #define CHARACTER_DATA_BUFFER_SIZE 8192
86
87 typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
88 typedef void* xmlhandler;
89
90 struct HandlerInfo {
91 const char *name;
92 xmlhandlersetter setter;
93 xmlhandler handler;
94 PyGetSetDef getset;
95 };
96
97 static struct HandlerInfo handler_info[64];
98
99 /* Set an integer attribute on the error object; return true on success,
100 * false on an exception.
101 */
102 static int
set_error_attr(PyObject * err,const char * name,int value)103 set_error_attr(PyObject *err, const char *name, int value)
104 {
105 PyObject *v = PyLong_FromLong(value);
106
107 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
108 Py_XDECREF(v);
109 return 0;
110 }
111 Py_DECREF(v);
112 return 1;
113 }
114
115 /* Build and set an Expat exception, including positioning
116 * information. Always returns NULL.
117 */
118 static PyObject *
set_error(pyexpat_state * state,xmlparseobject * self,enum XML_Error code)119 set_error(pyexpat_state *state, xmlparseobject *self, enum XML_Error code)
120 {
121 PyObject *err;
122 PyObject *buffer;
123 XML_Parser parser = self->itself;
124 int lineno = XML_GetErrorLineNumber(parser);
125 int column = XML_GetErrorColumnNumber(parser);
126
127 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
128 XML_ErrorString(code), lineno, column);
129 if (buffer == NULL)
130 return NULL;
131 err = PyObject_CallOneArg(state->error, buffer);
132 Py_DECREF(buffer);
133 if ( err != NULL
134 && set_error_attr(err, "code", code)
135 && set_error_attr(err, "offset", column)
136 && set_error_attr(err, "lineno", lineno)) {
137 PyErr_SetObject(state->error, err);
138 }
139 Py_XDECREF(err);
140 return NULL;
141 }
142
143 static int
have_handler(xmlparseobject * self,int type)144 have_handler(xmlparseobject *self, int type)
145 {
146 PyObject *handler = self->handlers[type];
147 return handler != NULL;
148 }
149
150 /* Convert a string of XML_Chars into a Unicode string.
151 Returns None if str is a null pointer. */
152
153 static PyObject *
conv_string_to_unicode(const XML_Char * str)154 conv_string_to_unicode(const XML_Char *str)
155 {
156 /* XXX currently this code assumes that XML_Char is 8-bit,
157 and hence in UTF-8. */
158 /* UTF-8 from Expat, Unicode desired */
159 if (str == NULL) {
160 Py_RETURN_NONE;
161 }
162 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
163 }
164
165 static PyObject *
conv_string_len_to_unicode(const XML_Char * str,int len)166 conv_string_len_to_unicode(const XML_Char *str, int len)
167 {
168 /* XXX currently this code assumes that XML_Char is 8-bit,
169 and hence in UTF-8. */
170 /* UTF-8 from Expat, Unicode desired */
171 if (str == NULL) {
172 Py_RETURN_NONE;
173 }
174 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
175 }
176
177 /* Callback routines */
178
179 static void clear_handlers(xmlparseobject *self, int initial);
180
181 /* This handler is used when an error has been detected, in the hope
182 that actual parsing can be terminated early. This will only help
183 if an external entity reference is encountered. */
184 static int
error_external_entity_ref_handler(XML_Parser parser,const XML_Char * context,const XML_Char * base,const XML_Char * systemId,const XML_Char * publicId)185 error_external_entity_ref_handler(XML_Parser parser,
186 const XML_Char *context,
187 const XML_Char *base,
188 const XML_Char *systemId,
189 const XML_Char *publicId)
190 {
191 return 0;
192 }
193
194 /* Dummy character data handler used when an error (exception) has
195 been detected, and the actual parsing can be terminated early.
196 This is needed since character data handler can't be safely removed
197 from within the character data handler, but can be replaced. It is
198 used only from the character data handler trampoline, and must be
199 used right after `flag_error()` is called. */
200 static void
noop_character_data_handler(void * userData,const XML_Char * data,int len)201 noop_character_data_handler(void *userData, const XML_Char *data, int len)
202 {
203 /* Do nothing. */
204 }
205
206 static void
flag_error(xmlparseobject * self)207 flag_error(xmlparseobject *self)
208 {
209 clear_handlers(self, 0);
210 XML_SetExternalEntityRefHandler(self->itself,
211 error_external_entity_ref_handler);
212 }
213
214 static PyObject*
call_with_frame(const char * funcname,int lineno,PyObject * func,PyObject * args,xmlparseobject * self)215 call_with_frame(const char *funcname, int lineno, PyObject* func, PyObject* args,
216 xmlparseobject *self)
217 {
218 PyObject *res;
219
220 res = PyObject_Call(func, args, NULL);
221 if (res == NULL) {
222 _PyTraceback_Add(funcname, __FILE__, lineno);
223 XML_StopParser(self->itself, XML_FALSE);
224 }
225 return res;
226 }
227
228 static PyObject*
string_intern(xmlparseobject * self,const char * str)229 string_intern(xmlparseobject *self, const char* str)
230 {
231 PyObject *result = conv_string_to_unicode(str);
232 PyObject *value;
233 /* result can be NULL if the unicode conversion failed. */
234 if (!result)
235 return result;
236 if (!self->intern)
237 return result;
238 value = PyDict_GetItemWithError(self->intern, result);
239 if (!value) {
240 if (!PyErr_Occurred() &&
241 PyDict_SetItem(self->intern, result, result) == 0)
242 {
243 return result;
244 }
245 else {
246 Py_DECREF(result);
247 return NULL;
248 }
249 }
250 Py_INCREF(value);
251 Py_DECREF(result);
252 return value;
253 }
254
255 /* Return 0 on success, -1 on exception.
256 * flag_error() will be called before return if needed.
257 */
258 static int
call_character_handler(xmlparseobject * self,const XML_Char * buffer,int len)259 call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
260 {
261 PyObject *args;
262 PyObject *temp;
263
264 if (!have_handler(self, CharacterData))
265 return -1;
266
267 args = PyTuple_New(1);
268 if (args == NULL)
269 return -1;
270 temp = (conv_string_len_to_unicode(buffer, len));
271 if (temp == NULL) {
272 Py_DECREF(args);
273 flag_error(self);
274 XML_SetCharacterDataHandler(self->itself,
275 noop_character_data_handler);
276 return -1;
277 }
278 PyTuple_SET_ITEM(args, 0, temp);
279 /* temp is now a borrowed reference; consider it unused. */
280 self->in_callback = 1;
281 temp = call_with_frame("CharacterData", __LINE__,
282 self->handlers[CharacterData], args, self);
283 /* temp is an owned reference again, or NULL */
284 self->in_callback = 0;
285 Py_DECREF(args);
286 if (temp == NULL) {
287 flag_error(self);
288 XML_SetCharacterDataHandler(self->itself,
289 noop_character_data_handler);
290 return -1;
291 }
292 Py_DECREF(temp);
293 return 0;
294 }
295
296 static int
flush_character_buffer(xmlparseobject * self)297 flush_character_buffer(xmlparseobject *self)
298 {
299 int rc;
300 if (self->buffer == NULL || self->buffer_used == 0)
301 return 0;
302 rc = call_character_handler(self, self->buffer, self->buffer_used);
303 self->buffer_used = 0;
304 return rc;
305 }
306
307 static void
my_CharacterDataHandler(void * userData,const XML_Char * data,int len)308 my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
309 {
310 xmlparseobject *self = (xmlparseobject *) userData;
311
312 if (PyErr_Occurred())
313 return;
314
315 if (self->buffer == NULL)
316 call_character_handler(self, data, len);
317 else {
318 if ((self->buffer_used + len) > self->buffer_size) {
319 if (flush_character_buffer(self) < 0)
320 return;
321 /* handler might have changed; drop the rest on the floor
322 * if there isn't a handler anymore
323 */
324 if (!have_handler(self, CharacterData))
325 return;
326 }
327 if (len > self->buffer_size) {
328 call_character_handler(self, data, len);
329 self->buffer_used = 0;
330 }
331 else {
332 memcpy(self->buffer + self->buffer_used,
333 data, len * sizeof(XML_Char));
334 self->buffer_used += len;
335 }
336 }
337 }
338
339 static void
my_StartElementHandler(void * userData,const XML_Char * name,const XML_Char * atts[])340 my_StartElementHandler(void *userData,
341 const XML_Char *name, const XML_Char *atts[])
342 {
343 xmlparseobject *self = (xmlparseobject *)userData;
344
345 if (have_handler(self, StartElement)) {
346 PyObject *container, *rv, *args;
347 int i, max;
348
349 if (PyErr_Occurred())
350 return;
351
352 if (flush_character_buffer(self) < 0)
353 return;
354 /* Set max to the number of slots filled in atts[]; max/2 is
355 * the number of attributes we need to process.
356 */
357 if (self->specified_attributes) {
358 max = XML_GetSpecifiedAttributeCount(self->itself);
359 }
360 else {
361 max = 0;
362 while (atts[max] != NULL)
363 max += 2;
364 }
365 /* Build the container. */
366 if (self->ordered_attributes)
367 container = PyList_New(max);
368 else
369 container = PyDict_New();
370 if (container == NULL) {
371 flag_error(self);
372 return;
373 }
374 for (i = 0; i < max; i += 2) {
375 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
376 PyObject *v;
377 if (n == NULL) {
378 flag_error(self);
379 Py_DECREF(container);
380 return;
381 }
382 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
383 if (v == NULL) {
384 flag_error(self);
385 Py_DECREF(container);
386 Py_DECREF(n);
387 return;
388 }
389 if (self->ordered_attributes) {
390 PyList_SET_ITEM(container, i, n);
391 PyList_SET_ITEM(container, i+1, v);
392 }
393 else if (PyDict_SetItem(container, n, v)) {
394 flag_error(self);
395 Py_DECREF(n);
396 Py_DECREF(v);
397 Py_DECREF(container);
398 return;
399 }
400 else {
401 Py_DECREF(n);
402 Py_DECREF(v);
403 }
404 }
405 args = string_intern(self, name);
406 if (args == NULL) {
407 Py_DECREF(container);
408 return;
409 }
410 args = Py_BuildValue("(NN)", args, container);
411 if (args == NULL) {
412 return;
413 }
414 /* Container is now a borrowed reference; ignore it. */
415 self->in_callback = 1;
416 rv = call_with_frame("StartElement", __LINE__,
417 self->handlers[StartElement], args, self);
418 self->in_callback = 0;
419 Py_DECREF(args);
420 if (rv == NULL) {
421 flag_error(self);
422 return;
423 }
424 Py_DECREF(rv);
425 }
426 }
427
428 #define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
429 RETURN, GETUSERDATA) \
430 static RC \
431 my_##NAME##Handler PARAMS {\
432 xmlparseobject *self = GETUSERDATA ; \
433 PyObject *args = NULL; \
434 PyObject *rv = NULL; \
435 INIT \
436 \
437 if (have_handler(self, NAME)) { \
438 if (PyErr_Occurred()) \
439 return RETURN; \
440 if (flush_character_buffer(self) < 0) \
441 return RETURN; \
442 args = Py_BuildValue PARAM_FORMAT ;\
443 if (!args) { flag_error(self); return RETURN;} \
444 self->in_callback = 1; \
445 rv = call_with_frame(#NAME,__LINE__, \
446 self->handlers[NAME], args, self); \
447 self->in_callback = 0; \
448 Py_DECREF(args); \
449 if (rv == NULL) { \
450 flag_error(self); \
451 return RETURN; \
452 } \
453 CONVERSION \
454 Py_DECREF(rv); \
455 } \
456 return RETURN; \
457 }
458
459 #define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
460 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
461 (xmlparseobject *)userData)
462
463 #define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
464 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
465 rc = PyLong_AsLong(rv);, rc, \
466 (xmlparseobject *)userData)
467
468 VOID_HANDLER(EndElement,
469 (void *userData, const XML_Char *name),
470 ("(N)", string_intern(self, name)))
471
472 VOID_HANDLER(ProcessingInstruction,
473 (void *userData,
474 const XML_Char *target,
475 const XML_Char *data),
476 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
477
478 VOID_HANDLER(UnparsedEntityDecl,
479 (void *userData,
480 const XML_Char *entityName,
481 const XML_Char *base,
482 const XML_Char *systemId,
483 const XML_Char *publicId,
484 const XML_Char *notationName),
485 ("(NNNNN)",
486 string_intern(self, entityName), string_intern(self, base),
487 string_intern(self, systemId), string_intern(self, publicId),
488 string_intern(self, notationName)))
489
490 VOID_HANDLER(EntityDecl,
491 (void *userData,
492 const XML_Char *entityName,
493 int is_parameter_entity,
494 const XML_Char *value,
495 int value_length,
496 const XML_Char *base,
497 const XML_Char *systemId,
498 const XML_Char *publicId,
499 const XML_Char *notationName),
500 ("NiNNNNN",
501 string_intern(self, entityName), is_parameter_entity,
502 (conv_string_len_to_unicode(value, value_length)),
503 string_intern(self, base), string_intern(self, systemId),
504 string_intern(self, publicId),
505 string_intern(self, notationName)))
506
507 VOID_HANDLER(XmlDecl,
508 (void *userData,
509 const XML_Char *version,
510 const XML_Char *encoding,
511 int standalone),
512 ("(O&O&i)",
513 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
514 standalone))
515
516 static PyObject *
conv_content_model(XML_Content * const model,PyObject * (* conv_string)(const XML_Char *))517 conv_content_model(XML_Content * const model,
518 PyObject *(*conv_string)(const XML_Char *))
519 {
520 PyObject *result = NULL;
521 PyObject *children = PyTuple_New(model->numchildren);
522 int i;
523
524 if (children != NULL) {
525 assert(model->numchildren < INT_MAX);
526 for (i = 0; i < (int)model->numchildren; ++i) {
527 PyObject *child = conv_content_model(&model->children[i],
528 conv_string);
529 if (child == NULL) {
530 Py_XDECREF(children);
531 return NULL;
532 }
533 PyTuple_SET_ITEM(children, i, child);
534 }
535 result = Py_BuildValue("(iiO&N)",
536 model->type, model->quant,
537 conv_string,model->name, children);
538 }
539 return result;
540 }
541
542 static void
my_ElementDeclHandler(void * userData,const XML_Char * name,XML_Content * model)543 my_ElementDeclHandler(void *userData,
544 const XML_Char *name,
545 XML_Content *model)
546 {
547 xmlparseobject *self = (xmlparseobject *)userData;
548 PyObject *args = NULL;
549
550 if (have_handler(self, ElementDecl)) {
551 PyObject *rv = NULL;
552 PyObject *modelobj, *nameobj;
553
554 if (PyErr_Occurred())
555 return;
556
557 if (flush_character_buffer(self) < 0)
558 goto finally;
559 modelobj = conv_content_model(model, (conv_string_to_unicode));
560 if (modelobj == NULL) {
561 flag_error(self);
562 goto finally;
563 }
564 nameobj = string_intern(self, name);
565 if (nameobj == NULL) {
566 Py_DECREF(modelobj);
567 flag_error(self);
568 goto finally;
569 }
570 args = Py_BuildValue("NN", nameobj, modelobj);
571 if (args == NULL) {
572 flag_error(self);
573 goto finally;
574 }
575 self->in_callback = 1;
576 rv = call_with_frame("ElementDecl", __LINE__,
577 self->handlers[ElementDecl], args, self);
578 self->in_callback = 0;
579 if (rv == NULL) {
580 flag_error(self);
581 goto finally;
582 }
583 Py_DECREF(rv);
584 }
585 finally:
586 Py_XDECREF(args);
587 XML_FreeContentModel(self->itself, model);
588 return;
589 }
590
591 VOID_HANDLER(AttlistDecl,
592 (void *userData,
593 const XML_Char *elname,
594 const XML_Char *attname,
595 const XML_Char *att_type,
596 const XML_Char *dflt,
597 int isrequired),
598 ("(NNO&O&i)",
599 string_intern(self, elname), string_intern(self, attname),
600 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
601 isrequired))
602
603 #if XML_COMBINED_VERSION >= 19504
604 VOID_HANDLER(SkippedEntity,
605 (void *userData,
606 const XML_Char *entityName,
607 int is_parameter_entity),
608 ("Ni",
609 string_intern(self, entityName), is_parameter_entity))
610 #endif
611
612 VOID_HANDLER(NotationDecl,
613 (void *userData,
614 const XML_Char *notationName,
615 const XML_Char *base,
616 const XML_Char *systemId,
617 const XML_Char *publicId),
618 ("(NNNN)",
619 string_intern(self, notationName), string_intern(self, base),
620 string_intern(self, systemId), string_intern(self, publicId)))
621
622 VOID_HANDLER(StartNamespaceDecl,
623 (void *userData,
624 const XML_Char *prefix,
625 const XML_Char *uri),
626 ("(NN)",
627 string_intern(self, prefix), string_intern(self, uri)))
628
629 VOID_HANDLER(EndNamespaceDecl,
630 (void *userData,
631 const XML_Char *prefix),
632 ("(N)", string_intern(self, prefix)))
633
634 VOID_HANDLER(Comment,
635 (void *userData, const XML_Char *data),
636 ("(O&)", conv_string_to_unicode ,data))
637
638 VOID_HANDLER(StartCdataSection,
639 (void *userData),
640 ("()"))
641
642 VOID_HANDLER(EndCdataSection,
643 (void *userData),
644 ("()"))
645
646 VOID_HANDLER(Default,
647 (void *userData, const XML_Char *s, int len),
648 ("(N)", (conv_string_len_to_unicode(s,len))))
649
650 VOID_HANDLER(DefaultHandlerExpand,
651 (void *userData, const XML_Char *s, int len),
652 ("(N)", (conv_string_len_to_unicode(s,len))))
653 #define my_DefaultHandlerExpand my_DefaultHandlerExpandHandler
654
655 INT_HANDLER(NotStandalone,
656 (void *userData),
657 ("()"))
658
659 RC_HANDLER(int, ExternalEntityRef,
660 (XML_Parser parser,
661 const XML_Char *context,
662 const XML_Char *base,
663 const XML_Char *systemId,
664 const XML_Char *publicId),
665 int rc=0;,
666 ("(O&NNN)",
667 conv_string_to_unicode ,context, string_intern(self, base),
668 string_intern(self, systemId), string_intern(self, publicId)),
669 rc = PyLong_AsLong(rv);, rc,
670 XML_GetUserData(parser))
671
672 /* XXX UnknownEncodingHandler */
673
674 VOID_HANDLER(StartDoctypeDecl,
675 (void *userData, const XML_Char *doctypeName,
676 const XML_Char *sysid, const XML_Char *pubid,
677 int has_internal_subset),
678 ("(NNNi)", string_intern(self, doctypeName),
679 string_intern(self, sysid), string_intern(self, pubid),
680 has_internal_subset))
681
682 VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
683
684 /* ---------------------------------------------------------------- */
685 /*[clinic input]
686 class pyexpat.xmlparser "xmlparseobject *" "&Xmlparsetype"
687 [clinic start generated code]*/
688 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2393162385232e1c]*/
689
690
691 static PyObject *
get_parse_result(pyexpat_state * state,xmlparseobject * self,int rv)692 get_parse_result(pyexpat_state *state, xmlparseobject *self, int rv)
693 {
694 if (PyErr_Occurred()) {
695 return NULL;
696 }
697 if (rv == 0) {
698 return set_error(state, self, XML_GetErrorCode(self->itself));
699 }
700 if (flush_character_buffer(self) < 0) {
701 return NULL;
702 }
703 return PyLong_FromLong(rv);
704 }
705
706 #define MAX_CHUNK_SIZE (1 << 20)
707
708 /*[clinic input]
709 pyexpat.xmlparser.Parse
710
711 cls: defining_class
712 data: object
713 isfinal: bool(accept={int}) = False
714 /
715
716 Parse XML data.
717
718 `isfinal' should be true at end of input.
719 [clinic start generated code]*/
720
721 static PyObject *
pyexpat_xmlparser_Parse_impl(xmlparseobject * self,PyTypeObject * cls,PyObject * data,int isfinal)722 pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
723 PyObject *data, int isfinal)
724 /*[clinic end generated code: output=8faffe07fe1f862a input=fc97f833558ca715]*/
725 {
726 const char *s;
727 Py_ssize_t slen;
728 Py_buffer view;
729 int rc;
730 pyexpat_state *state = PyType_GetModuleState(cls);
731
732 if (PyUnicode_Check(data)) {
733 view.buf = NULL;
734 s = PyUnicode_AsUTF8AndSize(data, &slen);
735 if (s == NULL)
736 return NULL;
737 /* Explicitly set UTF-8 encoding. Return code ignored. */
738 (void)XML_SetEncoding(self->itself, "utf-8");
739 }
740 else {
741 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
742 return NULL;
743 s = view.buf;
744 slen = view.len;
745 }
746
747 static_assert(MAX_CHUNK_SIZE <= INT_MAX,
748 "MAX_CHUNK_SIZE is larger than INT_MAX");
749 while (slen > MAX_CHUNK_SIZE) {
750 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
751 if (!rc)
752 goto done;
753 s += MAX_CHUNK_SIZE;
754 slen -= MAX_CHUNK_SIZE;
755 }
756
757 assert(slen <= INT_MAX);
758 rc = XML_Parse(self->itself, s, (int)slen, isfinal);
759
760 done:
761 if (view.buf != NULL) {
762 PyBuffer_Release(&view);
763 }
764 return get_parse_result(state, self, rc);
765 }
766
767 /* File reading copied from cPickle */
768
769 #define BUF_SIZE 2048
770
771 static int
readinst(char * buf,int buf_size,PyObject * meth)772 readinst(char *buf, int buf_size, PyObject *meth)
773 {
774 PyObject *str;
775 Py_ssize_t len;
776 const char *ptr;
777
778 str = PyObject_CallFunction(meth, "i", buf_size);
779 if (str == NULL)
780 goto error;
781
782 if (PyBytes_Check(str))
783 ptr = PyBytes_AS_STRING(str);
784 else if (PyByteArray_Check(str))
785 ptr = PyByteArray_AS_STRING(str);
786 else {
787 PyErr_Format(PyExc_TypeError,
788 "read() did not return a bytes object (type=%.400s)",
789 Py_TYPE(str)->tp_name);
790 goto error;
791 }
792 len = Py_SIZE(str);
793 if (len > buf_size) {
794 PyErr_Format(PyExc_ValueError,
795 "read() returned too much data: "
796 "%i bytes requested, %zd returned",
797 buf_size, len);
798 goto error;
799 }
800 memcpy(buf, ptr, len);
801 Py_DECREF(str);
802 /* len <= buf_size <= INT_MAX */
803 return (int)len;
804
805 error:
806 Py_XDECREF(str);
807 return -1;
808 }
809
810 /*[clinic input]
811 pyexpat.xmlparser.ParseFile
812
813 cls: defining_class
814 file: object
815 /
816
817 Parse XML data from file-like object.
818 [clinic start generated code]*/
819
820 static PyObject *
pyexpat_xmlparser_ParseFile_impl(xmlparseobject * self,PyTypeObject * cls,PyObject * file)821 pyexpat_xmlparser_ParseFile_impl(xmlparseobject *self, PyTypeObject *cls,
822 PyObject *file)
823 /*[clinic end generated code: output=34780a094c8ca3ae input=ba4bc9c541684793]*/
824 {
825 int rv = 1;
826 PyObject *readmethod = NULL;
827
828 pyexpat_state *state = PyType_GetModuleState(cls);
829
830 if (_PyObject_LookupAttr(file, state->str_read, &readmethod) < 0) {
831 return NULL;
832 }
833 if (readmethod == NULL) {
834 PyErr_SetString(PyExc_TypeError,
835 "argument must have 'read' attribute");
836 return NULL;
837 }
838 for (;;) {
839 int bytes_read;
840 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
841 if (buf == NULL) {
842 Py_XDECREF(readmethod);
843 return get_parse_result(state, self, 0);
844 }
845
846 bytes_read = readinst(buf, BUF_SIZE, readmethod);
847 if (bytes_read < 0) {
848 Py_DECREF(readmethod);
849 return NULL;
850 }
851 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
852 if (PyErr_Occurred()) {
853 Py_XDECREF(readmethod);
854 return NULL;
855 }
856
857 if (!rv || bytes_read == 0)
858 break;
859 }
860 Py_XDECREF(readmethod);
861 return get_parse_result(state, self, rv);
862 }
863
864 /*[clinic input]
865 pyexpat.xmlparser.SetBase
866
867 base: str
868 /
869
870 Set the base URL for the parser.
871 [clinic start generated code]*/
872
873 static PyObject *
pyexpat_xmlparser_SetBase_impl(xmlparseobject * self,const char * base)874 pyexpat_xmlparser_SetBase_impl(xmlparseobject *self, const char *base)
875 /*[clinic end generated code: output=c212ddceb607b539 input=c684e5de895ee1a8]*/
876 {
877 if (!XML_SetBase(self->itself, base)) {
878 return PyErr_NoMemory();
879 }
880 Py_RETURN_NONE;
881 }
882
883 /*[clinic input]
884 pyexpat.xmlparser.GetBase
885
886 Return base URL string for the parser.
887 [clinic start generated code]*/
888
889 static PyObject *
pyexpat_xmlparser_GetBase_impl(xmlparseobject * self)890 pyexpat_xmlparser_GetBase_impl(xmlparseobject *self)
891 /*[clinic end generated code: output=2886cb21f9a8739a input=918d71c38009620e]*/
892 {
893 return Py_BuildValue("z", XML_GetBase(self->itself));
894 }
895
896 /*[clinic input]
897 pyexpat.xmlparser.GetInputContext
898
899 Return the untranslated text of the input that caused the current event.
900
901 If the event was generated by a large amount of text (such as a start tag
902 for an element with many attributes), not all of the text may be available.
903 [clinic start generated code]*/
904
905 static PyObject *
pyexpat_xmlparser_GetInputContext_impl(xmlparseobject * self)906 pyexpat_xmlparser_GetInputContext_impl(xmlparseobject *self)
907 /*[clinic end generated code: output=a88026d683fc22cc input=034df8712db68379]*/
908 {
909 if (self->in_callback) {
910 int offset, size;
911 const char *buffer
912 = XML_GetInputContext(self->itself, &offset, &size);
913
914 if (buffer != NULL)
915 return PyBytes_FromStringAndSize(buffer + offset,
916 size - offset);
917 else
918 Py_RETURN_NONE;
919 }
920 else
921 Py_RETURN_NONE;
922 }
923
924 /*[clinic input]
925 pyexpat.xmlparser.ExternalEntityParserCreate
926
927 cls: defining_class
928 context: str(accept={str, NoneType})
929 encoding: str = NULL
930 /
931
932 Create a parser for parsing an external entity based on the information passed to the ExternalEntityRefHandler.
933 [clinic start generated code]*/
934
935 static PyObject *
pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject * self,PyTypeObject * cls,const char * context,const char * encoding)936 pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
937 PyTypeObject *cls,
938 const char *context,
939 const char *encoding)
940 /*[clinic end generated code: output=01d4472b49cb3f92 input=ec70c6b9e6e9619a]*/
941 {
942 xmlparseobject *new_parser;
943 int i;
944
945 pyexpat_state *state = PyType_GetModuleState(cls);
946
947 new_parser = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
948 if (new_parser == NULL) {
949 return NULL;
950 }
951
952 new_parser->buffer_size = self->buffer_size;
953 new_parser->buffer_used = 0;
954 new_parser->buffer = NULL;
955 new_parser->ordered_attributes = self->ordered_attributes;
956 new_parser->specified_attributes = self->specified_attributes;
957 new_parser->in_callback = 0;
958 new_parser->ns_prefixes = self->ns_prefixes;
959 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
960 encoding);
961 new_parser->handlers = 0;
962 new_parser->intern = self->intern;
963 Py_XINCREF(new_parser->intern);
964
965 if (self->buffer != NULL) {
966 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
967 if (new_parser->buffer == NULL) {
968 Py_DECREF(new_parser);
969 return PyErr_NoMemory();
970 }
971 }
972 if (!new_parser->itself) {
973 Py_DECREF(new_parser);
974 return PyErr_NoMemory();
975 }
976
977 XML_SetUserData(new_parser->itself, (void *)new_parser);
978
979 /* allocate and clear handlers first */
980 for (i = 0; handler_info[i].name != NULL; i++)
981 /* do nothing */;
982
983 new_parser->handlers = PyMem_New(PyObject *, i);
984 if (!new_parser->handlers) {
985 Py_DECREF(new_parser);
986 return PyErr_NoMemory();
987 }
988 clear_handlers(new_parser, 1);
989
990 /* then copy handlers from self */
991 for (i = 0; handler_info[i].name != NULL; i++) {
992 PyObject *handler = self->handlers[i];
993 if (handler != NULL) {
994 Py_INCREF(handler);
995 new_parser->handlers[i] = handler;
996 handler_info[i].setter(new_parser->itself,
997 handler_info[i].handler);
998 }
999 }
1000
1001 PyObject_GC_Track(new_parser);
1002 return (PyObject *)new_parser;
1003 }
1004
1005 /*[clinic input]
1006 pyexpat.xmlparser.SetParamEntityParsing
1007
1008 flag: int
1009 /
1010
1011 Controls parsing of parameter entities (including the external DTD subset).
1012
1013 Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,
1014 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and
1015 XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag
1016 was successful.
1017 [clinic start generated code]*/
1018
1019 static PyObject *
pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject * self,int flag)1020 pyexpat_xmlparser_SetParamEntityParsing_impl(xmlparseobject *self, int flag)
1021 /*[clinic end generated code: output=18668ee8e760d64c input=8aea19b4b15e9af1]*/
1022 {
1023 flag = XML_SetParamEntityParsing(self->itself, flag);
1024 return PyLong_FromLong(flag);
1025 }
1026
1027
1028 #if XML_COMBINED_VERSION >= 19505
1029 /*[clinic input]
1030 pyexpat.xmlparser.UseForeignDTD
1031
1032 cls: defining_class
1033 flag: bool = True
1034 /
1035
1036 Allows the application to provide an artificial external subset if one is not specified as part of the document instance.
1037
1038 This readily allows the use of a 'default' document type controlled by the
1039 application, while still getting the advantage of providing document type
1040 information to the parser. 'flag' defaults to True if not provided.
1041 [clinic start generated code]*/
1042
1043 static PyObject *
pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject * self,PyTypeObject * cls,int flag)1044 pyexpat_xmlparser_UseForeignDTD_impl(xmlparseobject *self, PyTypeObject *cls,
1045 int flag)
1046 /*[clinic end generated code: output=d7d98252bd25a20f input=23440ecb0573fb29]*/
1047 {
1048 pyexpat_state *state = PyType_GetModuleState(cls);
1049 enum XML_Error rc;
1050
1051 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
1052 if (rc != XML_ERROR_NONE) {
1053 return set_error(state, self, rc);
1054 }
1055 Py_RETURN_NONE;
1056 }
1057 #endif
1058
1059 static struct PyMethodDef xmlparse_methods[] = {
1060 PYEXPAT_XMLPARSER_PARSE_METHODDEF
1061 PYEXPAT_XMLPARSER_PARSEFILE_METHODDEF
1062 PYEXPAT_XMLPARSER_SETBASE_METHODDEF
1063 PYEXPAT_XMLPARSER_GETBASE_METHODDEF
1064 PYEXPAT_XMLPARSER_GETINPUTCONTEXT_METHODDEF
1065 PYEXPAT_XMLPARSER_EXTERNALENTITYPARSERCREATE_METHODDEF
1066 PYEXPAT_XMLPARSER_SETPARAMENTITYPARSING_METHODDEF
1067 #if XML_COMBINED_VERSION >= 19505
1068 PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF
1069 #endif
1070 {NULL, NULL} /* sentinel */
1071 };
1072
1073 /* ---------- */
1074
1075
1076
1077 /* pyexpat international encoding support.
1078 Make it as simple as possible.
1079 */
1080
1081 static int
PyUnknownEncodingHandler(void * encodingHandlerData,const XML_Char * name,XML_Encoding * info)1082 PyUnknownEncodingHandler(void *encodingHandlerData,
1083 const XML_Char *name,
1084 XML_Encoding *info)
1085 {
1086 static unsigned char template_buffer[256] = {0};
1087 PyObject* u;
1088 int i;
1089 const void *data;
1090 unsigned int kind;
1091
1092 if (PyErr_Occurred())
1093 return XML_STATUS_ERROR;
1094
1095 if (template_buffer[1] == 0) {
1096 for (i = 0; i < 256; i++)
1097 template_buffer[i] = i;
1098 }
1099
1100 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
1101 if (u == NULL || PyUnicode_READY(u)) {
1102 Py_XDECREF(u);
1103 return XML_STATUS_ERROR;
1104 }
1105
1106 if (PyUnicode_GET_LENGTH(u) != 256) {
1107 Py_DECREF(u);
1108 PyErr_SetString(PyExc_ValueError,
1109 "multi-byte encodings are not supported");
1110 return XML_STATUS_ERROR;
1111 }
1112
1113 kind = PyUnicode_KIND(u);
1114 data = PyUnicode_DATA(u);
1115 for (i = 0; i < 256; i++) {
1116 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1117 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1118 info->map[i] = ch;
1119 else
1120 info->map[i] = -1;
1121 }
1122
1123 info->data = NULL;
1124 info->convert = NULL;
1125 info->release = NULL;
1126 Py_DECREF(u);
1127
1128 return XML_STATUS_OK;
1129 }
1130
1131
1132 static PyObject *
newxmlparseobject(pyexpat_state * state,const char * encoding,const char * namespace_separator,PyObject * intern)1133 newxmlparseobject(pyexpat_state *state, const char *encoding,
1134 const char *namespace_separator, PyObject *intern)
1135 {
1136 int i;
1137 xmlparseobject *self;
1138
1139 self = PyObject_GC_New(xmlparseobject, state->xml_parse_type);
1140 if (self == NULL)
1141 return NULL;
1142
1143 self->buffer = NULL;
1144 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1145 self->buffer_used = 0;
1146 self->ordered_attributes = 0;
1147 self->specified_attributes = 0;
1148 self->in_callback = 0;
1149 self->ns_prefixes = 0;
1150 self->handlers = NULL;
1151 self->intern = intern;
1152 Py_XINCREF(self->intern);
1153
1154 /* namespace_separator is either NULL or contains one char + \0 */
1155 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1156 namespace_separator);
1157 if (self->itself == NULL) {
1158 PyErr_SetString(PyExc_RuntimeError,
1159 "XML_ParserCreate failed");
1160 Py_DECREF(self);
1161 return NULL;
1162 }
1163 #if XML_COMBINED_VERSION >= 20100
1164 /* This feature was added upstream in libexpat 2.1.0. */
1165 XML_SetHashSalt(self->itself,
1166 (unsigned long)_Py_HashSecret.expat.hashsalt);
1167 #endif
1168 XML_SetUserData(self->itself, (void *)self);
1169 XML_SetUnknownEncodingHandler(self->itself,
1170 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
1171
1172 for (i = 0; handler_info[i].name != NULL; i++)
1173 /* do nothing */;
1174
1175 self->handlers = PyMem_New(PyObject *, i);
1176 if (!self->handlers) {
1177 Py_DECREF(self);
1178 return PyErr_NoMemory();
1179 }
1180 clear_handlers(self, 1);
1181
1182 PyObject_GC_Track(self);
1183 return (PyObject*)self;
1184 }
1185
1186 static int
xmlparse_traverse(xmlparseobject * op,visitproc visit,void * arg)1187 xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1188 {
1189 for (int i = 0; handler_info[i].name != NULL; i++) {
1190 Py_VISIT(op->handlers[i]);
1191 }
1192 Py_VISIT(Py_TYPE(op));
1193 return 0;
1194 }
1195
1196 static int
xmlparse_clear(xmlparseobject * op)1197 xmlparse_clear(xmlparseobject *op)
1198 {
1199 clear_handlers(op, 0);
1200 Py_CLEAR(op->intern);
1201 return 0;
1202 }
1203
1204 static void
xmlparse_dealloc(xmlparseobject * self)1205 xmlparse_dealloc(xmlparseobject *self)
1206 {
1207 PyObject_GC_UnTrack(self);
1208 (void)xmlparse_clear(self);
1209 if (self->itself != NULL)
1210 XML_ParserFree(self->itself);
1211 self->itself = NULL;
1212
1213 if (self->handlers != NULL) {
1214 PyMem_Free(self->handlers);
1215 self->handlers = NULL;
1216 }
1217 if (self->buffer != NULL) {
1218 PyMem_Free(self->buffer);
1219 self->buffer = NULL;
1220 }
1221 PyTypeObject *tp = Py_TYPE(self);
1222 PyObject_GC_Del(self);
1223 Py_DECREF(tp);
1224 }
1225
1226
1227 static PyObject *
xmlparse_handler_getter(xmlparseobject * self,struct HandlerInfo * hi)1228 xmlparse_handler_getter(xmlparseobject *self, struct HandlerInfo *hi)
1229 {
1230 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1231 int handlernum = (int)(hi - handler_info);
1232 PyObject *result = self->handlers[handlernum];
1233 if (result == NULL)
1234 result = Py_None;
1235 Py_INCREF(result);
1236 return result;
1237 }
1238
1239 static int
xmlparse_handler_setter(xmlparseobject * self,PyObject * v,struct HandlerInfo * hi)1240 xmlparse_handler_setter(xmlparseobject *self, PyObject *v, struct HandlerInfo *hi)
1241 {
1242 assert((hi - handler_info) < (Py_ssize_t)Py_ARRAY_LENGTH(handler_info));
1243 int handlernum = (int)(hi - handler_info);
1244 if (v == NULL) {
1245 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1246 return -1;
1247 }
1248 if (handlernum == CharacterData) {
1249 /* If we're changing the character data handler, flush all
1250 * cached data with the old handler. Not sure there's a
1251 * "right" thing to do, though, but this probably won't
1252 * happen.
1253 */
1254 if (flush_character_buffer(self) < 0)
1255 return -1;
1256 }
1257
1258 xmlhandler c_handler = NULL;
1259 if (v == Py_None) {
1260 /* If this is the character data handler, and a character
1261 data handler is already active, we need to be more
1262 careful. What we can safely do is replace the existing
1263 character data handler callback function with a no-op
1264 function that will refuse to call Python. The downside
1265 is that this doesn't completely remove the character
1266 data handler from the C layer if there's any callback
1267 active, so Expat does a little more work than it
1268 otherwise would, but that's really an odd case. A more
1269 elaborate system of handlers and state could remove the
1270 C handler more effectively. */
1271 if (handlernum == CharacterData && self->in_callback)
1272 c_handler = noop_character_data_handler;
1273 v = NULL;
1274 }
1275 else if (v != NULL) {
1276 Py_INCREF(v);
1277 c_handler = handler_info[handlernum].handler;
1278 }
1279 Py_XSETREF(self->handlers[handlernum], v);
1280 handler_info[handlernum].setter(self->itself, c_handler);
1281 return 0;
1282 }
1283
1284 #define INT_GETTER(name) \
1285 static PyObject * \
1286 xmlparse_##name##_getter(xmlparseobject *self, void *closure) \
1287 { \
1288 return PyLong_FromLong((long) XML_Get##name(self->itself)); \
1289 }
1290 INT_GETTER(ErrorCode)
INT_GETTER(ErrorLineNumber)1291 INT_GETTER(ErrorLineNumber)
1292 INT_GETTER(ErrorColumnNumber)
1293 INT_GETTER(ErrorByteIndex)
1294 INT_GETTER(CurrentLineNumber)
1295 INT_GETTER(CurrentColumnNumber)
1296 INT_GETTER(CurrentByteIndex)
1297
1298 #undef INT_GETTER
1299
1300 static PyObject *
1301 xmlparse_buffer_text_getter(xmlparseobject *self, void *closure)
1302 {
1303 return PyBool_FromLong(self->buffer != NULL);
1304 }
1305
1306 static int
xmlparse_buffer_text_setter(xmlparseobject * self,PyObject * v,void * closure)1307 xmlparse_buffer_text_setter(xmlparseobject *self, PyObject *v, void *closure)
1308 {
1309 if (v == NULL) {
1310 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1311 return -1;
1312 }
1313 int b = PyObject_IsTrue(v);
1314 if (b < 0)
1315 return -1;
1316 if (b) {
1317 if (self->buffer == NULL) {
1318 self->buffer = PyMem_Malloc(self->buffer_size);
1319 if (self->buffer == NULL) {
1320 PyErr_NoMemory();
1321 return -1;
1322 }
1323 self->buffer_used = 0;
1324 }
1325 }
1326 else if (self->buffer != NULL) {
1327 if (flush_character_buffer(self) < 0)
1328 return -1;
1329 PyMem_Free(self->buffer);
1330 self->buffer = NULL;
1331 }
1332 return 0;
1333 }
1334
1335 static PyObject *
xmlparse_buffer_size_getter(xmlparseobject * self,void * closure)1336 xmlparse_buffer_size_getter(xmlparseobject *self, void *closure)
1337 {
1338 return PyLong_FromLong((long) self->buffer_size);
1339 }
1340
1341 static int
xmlparse_buffer_size_setter(xmlparseobject * self,PyObject * v,void * closure)1342 xmlparse_buffer_size_setter(xmlparseobject *self, PyObject *v, void *closure)
1343 {
1344 if (v == NULL) {
1345 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1346 return -1;
1347 }
1348 long new_buffer_size;
1349 if (!PyLong_Check(v)) {
1350 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1351 return -1;
1352 }
1353
1354 new_buffer_size = PyLong_AsLong(v);
1355 if (new_buffer_size <= 0) {
1356 if (!PyErr_Occurred())
1357 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1358 return -1;
1359 }
1360
1361 /* trivial case -- no change */
1362 if (new_buffer_size == self->buffer_size) {
1363 return 0;
1364 }
1365
1366 /* check maximum */
1367 if (new_buffer_size > INT_MAX) {
1368 char errmsg[100];
1369 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1370 PyErr_SetString(PyExc_ValueError, errmsg);
1371 return -1;
1372 }
1373
1374 if (self->buffer != NULL) {
1375 /* there is already a buffer */
1376 if (self->buffer_used != 0) {
1377 if (flush_character_buffer(self) < 0) {
1378 return -1;
1379 }
1380 }
1381 /* free existing buffer */
1382 PyMem_Free(self->buffer);
1383 }
1384 self->buffer = PyMem_Malloc(new_buffer_size);
1385 if (self->buffer == NULL) {
1386 PyErr_NoMemory();
1387 return -1;
1388 }
1389 self->buffer_size = new_buffer_size;
1390 return 0;
1391 }
1392
1393 static PyObject *
xmlparse_buffer_used_getter(xmlparseobject * self,void * closure)1394 xmlparse_buffer_used_getter(xmlparseobject *self, void *closure)
1395 {
1396 return PyLong_FromLong((long) self->buffer_used);
1397 }
1398
1399 static PyObject *
xmlparse_namespace_prefixes_getter(xmlparseobject * self,void * closure)1400 xmlparse_namespace_prefixes_getter(xmlparseobject *self, void *closure)
1401 {
1402 return PyBool_FromLong(self->ns_prefixes);
1403 }
1404
1405 static int
xmlparse_namespace_prefixes_setter(xmlparseobject * self,PyObject * v,void * closure)1406 xmlparse_namespace_prefixes_setter(xmlparseobject *self, PyObject *v, void *closure)
1407 {
1408 if (v == NULL) {
1409 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1410 return -1;
1411 }
1412 int b = PyObject_IsTrue(v);
1413 if (b < 0)
1414 return -1;
1415 self->ns_prefixes = b;
1416 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1417 return 0;
1418 }
1419
1420 static PyObject *
xmlparse_ordered_attributes_getter(xmlparseobject * self,void * closure)1421 xmlparse_ordered_attributes_getter(xmlparseobject *self, void *closure)
1422 {
1423 return PyBool_FromLong(self->ordered_attributes);
1424 }
1425
1426 static int
xmlparse_ordered_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1427 xmlparse_ordered_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1428 {
1429 if (v == NULL) {
1430 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1431 return -1;
1432 }
1433 int b = PyObject_IsTrue(v);
1434 if (b < 0)
1435 return -1;
1436 self->ordered_attributes = b;
1437 return 0;
1438 }
1439
1440 static PyObject *
xmlparse_specified_attributes_getter(xmlparseobject * self,void * closure)1441 xmlparse_specified_attributes_getter(xmlparseobject *self, void *closure)
1442 {
1443 return PyBool_FromLong((long) self->specified_attributes);
1444 }
1445
1446 static int
xmlparse_specified_attributes_setter(xmlparseobject * self,PyObject * v,void * closure)1447 xmlparse_specified_attributes_setter(xmlparseobject *self, PyObject *v, void *closure)
1448 {
1449 if (v == NULL) {
1450 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1451 return -1;
1452 }
1453 int b = PyObject_IsTrue(v);
1454 if (b < 0)
1455 return -1;
1456 self->specified_attributes = b;
1457 return 0;
1458 }
1459
1460 static PyMemberDef xmlparse_members[] = {
1461 {"intern", T_OBJECT, offsetof(xmlparseobject, intern), READONLY, NULL},
1462 {NULL}
1463 };
1464
1465 #define XMLPARSE_GETTER_DEF(name) \
1466 {#name, (getter)xmlparse_##name##_getter, NULL, NULL},
1467 #define XMLPARSE_GETTER_SETTER_DEF(name) \
1468 {#name, (getter)xmlparse_##name##_getter, \
1469 (setter)xmlparse_##name##_setter, NULL},
1470
1471 static PyGetSetDef xmlparse_getsetlist[] = {
1472 XMLPARSE_GETTER_DEF(ErrorCode)
1473 XMLPARSE_GETTER_DEF(ErrorLineNumber)
1474 XMLPARSE_GETTER_DEF(ErrorColumnNumber)
1475 XMLPARSE_GETTER_DEF(ErrorByteIndex)
1476 XMLPARSE_GETTER_DEF(CurrentLineNumber)
1477 XMLPARSE_GETTER_DEF(CurrentColumnNumber)
1478 XMLPARSE_GETTER_DEF(CurrentByteIndex)
1479 XMLPARSE_GETTER_SETTER_DEF(buffer_size)
1480 XMLPARSE_GETTER_SETTER_DEF(buffer_text)
1481 XMLPARSE_GETTER_DEF(buffer_used)
1482 XMLPARSE_GETTER_SETTER_DEF(namespace_prefixes)
1483 XMLPARSE_GETTER_SETTER_DEF(ordered_attributes)
1484 XMLPARSE_GETTER_SETTER_DEF(specified_attributes)
1485 {NULL},
1486 };
1487
1488 #undef XMLPARSE_GETTER_DEF
1489 #undef XMLPARSE_GETTER_SETTER_DEF
1490
1491 PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
1492
1493 static PyType_Slot _xml_parse_type_spec_slots[] = {
1494 {Py_tp_dealloc, xmlparse_dealloc},
1495 {Py_tp_doc, (void *)Xmlparsetype__doc__},
1496 {Py_tp_traverse, xmlparse_traverse},
1497 {Py_tp_clear, xmlparse_clear},
1498 {Py_tp_methods, xmlparse_methods},
1499 {Py_tp_members, xmlparse_members},
1500 {Py_tp_getset, xmlparse_getsetlist},
1501 {0, 0}
1502 };
1503
1504 static PyType_Spec _xml_parse_type_spec = {
1505 .name = "pyexpat.xmlparser",
1506 .basicsize = sizeof(xmlparseobject),
1507 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
1508 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
1509 .slots = _xml_parse_type_spec_slots,
1510 };
1511
1512 /* End of code for xmlparser objects */
1513 /* -------------------------------------------------------- */
1514
1515 /*[clinic input]
1516 pyexpat.ParserCreate
1517
1518 encoding: str(accept={str, NoneType}) = None
1519 namespace_separator: str(accept={str, NoneType}) = None
1520 intern: object = NULL
1521
1522 Return a new XML parser object.
1523 [clinic start generated code]*/
1524
1525 static PyObject *
pyexpat_ParserCreate_impl(PyObject * module,const char * encoding,const char * namespace_separator,PyObject * intern)1526 pyexpat_ParserCreate_impl(PyObject *module, const char *encoding,
1527 const char *namespace_separator, PyObject *intern)
1528 /*[clinic end generated code: output=295c0cf01ab1146c input=e8da8e8d7122cb5d]*/
1529 {
1530 PyObject *result;
1531 int intern_decref = 0;
1532
1533 if (namespace_separator != NULL
1534 && strlen(namespace_separator) > 1) {
1535 PyErr_SetString(PyExc_ValueError,
1536 "namespace_separator must be at most one"
1537 " character, omitted, or None");
1538 return NULL;
1539 }
1540 /* Explicitly passing None means no interning is desired.
1541 Not passing anything means that a new dictionary is used. */
1542 if (intern == Py_None)
1543 intern = NULL;
1544 else if (intern == NULL) {
1545 intern = PyDict_New();
1546 if (!intern)
1547 return NULL;
1548 intern_decref = 1;
1549 }
1550 else if (!PyDict_Check(intern)) {
1551 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1552 return NULL;
1553 }
1554
1555 pyexpat_state *state = pyexpat_get_state(module);
1556 result = newxmlparseobject(state, encoding, namespace_separator, intern);
1557 if (intern_decref) {
1558 Py_DECREF(intern);
1559 }
1560 return result;
1561 }
1562
1563 /*[clinic input]
1564 pyexpat.ErrorString
1565
1566 code: long
1567 /
1568
1569 Returns string error for given number.
1570 [clinic start generated code]*/
1571
1572 static PyObject *
pyexpat_ErrorString_impl(PyObject * module,long code)1573 pyexpat_ErrorString_impl(PyObject *module, long code)
1574 /*[clinic end generated code: output=2feae50d166f2174 input=cc67de010d9e62b3]*/
1575 {
1576 return Py_BuildValue("z", XML_ErrorString((int)code));
1577 }
1578
1579 /* List of methods defined in the module */
1580
1581 static struct PyMethodDef pyexpat_methods[] = {
1582 PYEXPAT_PARSERCREATE_METHODDEF
1583 PYEXPAT_ERRORSTRING_METHODDEF
1584 {NULL, NULL} /* sentinel */
1585 };
1586
1587 /* Module docstring */
1588
1589 PyDoc_STRVAR(pyexpat_module_documentation,
1590 "Python wrapper for Expat parser.");
1591
1592 /* Initialization function for the module */
1593
1594 #ifndef MODULE_NAME
1595 #define MODULE_NAME "pyexpat"
1596 #endif
1597
init_handler_descrs(pyexpat_state * state)1598 static int init_handler_descrs(pyexpat_state *state)
1599 {
1600 int i;
1601 assert(!PyType_HasFeature(state->xml_parse_type, Py_TPFLAGS_VALID_VERSION_TAG));
1602 for (i = 0; handler_info[i].name != NULL; i++) {
1603 struct HandlerInfo *hi = &handler_info[i];
1604 hi->getset.name = hi->name;
1605 hi->getset.get = (getter)xmlparse_handler_getter;
1606 hi->getset.set = (setter)xmlparse_handler_setter;
1607 hi->getset.closure = &handler_info[i];
1608
1609 PyObject *descr = PyDescr_NewGetSet(state->xml_parse_type, &hi->getset);
1610 if (descr == NULL)
1611 return -1;
1612
1613 if (PyDict_SetDefault(state->xml_parse_type->tp_dict, PyDescr_NAME(descr), descr) == NULL) {
1614 Py_DECREF(descr);
1615 return -1;
1616 }
1617 Py_DECREF(descr);
1618 }
1619 return 0;
1620 }
1621
1622 static PyObject *
add_submodule(PyObject * mod,const char * fullname)1623 add_submodule(PyObject *mod, const char *fullname)
1624 {
1625 const char *name = strrchr(fullname, '.') + 1;
1626
1627 PyObject *submodule = PyModule_New(fullname);
1628 if (submodule == NULL) {
1629 return NULL;
1630 }
1631
1632 PyObject *mod_name = PyUnicode_FromString(fullname);
1633 if (mod_name == NULL) {
1634 Py_DECREF(submodule);
1635 return NULL;
1636 }
1637
1638 if (_PyImport_SetModule(mod_name, submodule) < 0) {
1639 Py_DECREF(submodule);
1640 Py_DECREF(mod_name);
1641 return NULL;
1642 }
1643 Py_DECREF(mod_name);
1644
1645 /* gives away the reference to the submodule */
1646 if (PyModule_AddObject(mod, name, submodule) < 0) {
1647 Py_DECREF(submodule);
1648 return NULL;
1649 }
1650
1651 return submodule;
1652 }
1653
1654 struct ErrorInfo {
1655 const char * name; /* Error constant name, e.g. "XML_ERROR_NO_MEMORY" */
1656 const char * description; /* Error description as returned by XML_ErrorString(<int>) */
1657 };
1658
1659 static
1660 struct ErrorInfo error_info_of[] = {
1661 {NULL, NULL}, /* XML_ERROR_NONE (value 0) is not exposed */
1662
1663 {"XML_ERROR_NO_MEMORY", "out of memory"},
1664 {"XML_ERROR_SYNTAX", "syntax error"},
1665 {"XML_ERROR_NO_ELEMENTS", "no element found"},
1666 {"XML_ERROR_INVALID_TOKEN", "not well-formed (invalid token)"},
1667 {"XML_ERROR_UNCLOSED_TOKEN", "unclosed token"},
1668 {"XML_ERROR_PARTIAL_CHAR", "partial character"},
1669 {"XML_ERROR_TAG_MISMATCH", "mismatched tag"},
1670 {"XML_ERROR_DUPLICATE_ATTRIBUTE", "duplicate attribute"},
1671 {"XML_ERROR_JUNK_AFTER_DOC_ELEMENT", "junk after document element"},
1672 {"XML_ERROR_PARAM_ENTITY_REF", "illegal parameter entity reference"},
1673 {"XML_ERROR_UNDEFINED_ENTITY", "undefined entity"},
1674 {"XML_ERROR_RECURSIVE_ENTITY_REF", "recursive entity reference"},
1675 {"XML_ERROR_ASYNC_ENTITY", "asynchronous entity"},
1676 {"XML_ERROR_BAD_CHAR_REF", "reference to invalid character number"},
1677 {"XML_ERROR_BINARY_ENTITY_REF", "reference to binary entity"},
1678 {"XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF", "reference to external entity in attribute"},
1679 {"XML_ERROR_MISPLACED_XML_PI", "XML or text declaration not at start of entity"},
1680 {"XML_ERROR_UNKNOWN_ENCODING", "unknown encoding"},
1681 {"XML_ERROR_INCORRECT_ENCODING", "encoding specified in XML declaration is incorrect"},
1682 {"XML_ERROR_UNCLOSED_CDATA_SECTION", "unclosed CDATA section"},
1683 {"XML_ERROR_EXTERNAL_ENTITY_HANDLING", "error in processing external entity reference"},
1684 {"XML_ERROR_NOT_STANDALONE", "document is not standalone"},
1685 {"XML_ERROR_UNEXPECTED_STATE", "unexpected parser state - please send a bug report"},
1686 {"XML_ERROR_ENTITY_DECLARED_IN_PE", "entity declared in parameter entity"},
1687 {"XML_ERROR_FEATURE_REQUIRES_XML_DTD", "requested feature requires XML_DTD support in Expat"},
1688 {"XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING", "cannot change setting once parsing has begun"},
1689
1690 /* Added in Expat 1.95.7. */
1691 {"XML_ERROR_UNBOUND_PREFIX", "unbound prefix"},
1692
1693 /* Added in Expat 1.95.8. */
1694 {"XML_ERROR_UNDECLARING_PREFIX", "must not undeclare prefix"},
1695 {"XML_ERROR_INCOMPLETE_PE", "incomplete markup in parameter entity"},
1696 {"XML_ERROR_XML_DECL", "XML declaration not well-formed"},
1697 {"XML_ERROR_TEXT_DECL", "text declaration not well-formed"},
1698 {"XML_ERROR_PUBLICID", "illegal character(s) in public id"},
1699 {"XML_ERROR_SUSPENDED", "parser suspended"},
1700 {"XML_ERROR_NOT_SUSPENDED", "parser not suspended"},
1701 {"XML_ERROR_ABORTED", "parsing aborted"},
1702 {"XML_ERROR_FINISHED", "parsing finished"},
1703 {"XML_ERROR_SUSPEND_PE", "cannot suspend in external parameter entity"},
1704
1705 /* Added in 2.0.0. */
1706 {"XML_ERROR_RESERVED_PREFIX_XML", "reserved prefix (xml) must not be undeclared or bound to another namespace name"},
1707 {"XML_ERROR_RESERVED_PREFIX_XMLNS", "reserved prefix (xmlns) must not be declared or undeclared"},
1708 {"XML_ERROR_RESERVED_NAMESPACE_URI", "prefix must not be bound to one of the reserved namespace names"},
1709
1710 /* Added in 2.2.1. */
1711 {"XML_ERROR_INVALID_ARGUMENT", "invalid argument"},
1712
1713 /* Added in 2.3.0. */
1714 {"XML_ERROR_NO_BUFFER", "a successful prior call to function XML_GetBuffer is required"},
1715
1716 /* Added in 2.4.0. */
1717 {"XML_ERROR_AMPLIFICATION_LIMIT_BREACH", "limit on input amplification factor (from DTD and entities) breached"}
1718 };
1719
1720 static int
add_error(PyObject * errors_module,PyObject * codes_dict,PyObject * rev_codes_dict,size_t error_index)1721 add_error(PyObject *errors_module, PyObject *codes_dict,
1722 PyObject *rev_codes_dict, size_t error_index)
1723 {
1724 const char * const name = error_info_of[error_index].name;
1725 const int error_code = (int)error_index;
1726
1727 /* NOTE: This keeps the source of truth regarding error
1728 * messages with libexpat and (by definiton) in bulletproof sync
1729 * with the other uses of the XML_ErrorString function
1730 * elsewhere within this file. pyexpat's copy of the messages
1731 * only acts as a fallback in case of outdated runtime libexpat,
1732 * where it returns NULL. */
1733 const char *error_string = XML_ErrorString(error_code);
1734 if (error_string == NULL) {
1735 error_string = error_info_of[error_index].description;
1736 }
1737
1738 if (PyModule_AddStringConstant(errors_module, name, error_string) < 0) {
1739 return -1;
1740 }
1741
1742 PyObject *num = PyLong_FromLong(error_code);
1743 if (num == NULL) {
1744 return -1;
1745 }
1746
1747 if (PyDict_SetItemString(codes_dict, error_string, num) < 0) {
1748 Py_DECREF(num);
1749 return -1;
1750 }
1751
1752 PyObject *str = PyUnicode_FromString(error_string);
1753 if (str == NULL) {
1754 Py_DECREF(num);
1755 return -1;
1756 }
1757
1758 int res = PyDict_SetItem(rev_codes_dict, num, str);
1759 Py_DECREF(str);
1760 Py_DECREF(num);
1761 if (res < 0) {
1762 return -1;
1763 }
1764
1765 return 0;
1766 }
1767
1768 static int
add_errors_module(PyObject * mod)1769 add_errors_module(PyObject *mod)
1770 {
1771 PyObject *errors_module = add_submodule(mod, MODULE_NAME ".errors");
1772 if (errors_module == NULL) {
1773 return -1;
1774 }
1775
1776 PyObject *codes_dict = PyDict_New();
1777 PyObject *rev_codes_dict = PyDict_New();
1778 if (codes_dict == NULL || rev_codes_dict == NULL) {
1779 goto error;
1780 }
1781
1782 size_t error_index = 0;
1783 for (; error_index < sizeof(error_info_of) / sizeof(struct ErrorInfo); error_index++) {
1784 if (error_info_of[error_index].name == NULL) {
1785 continue;
1786 }
1787
1788 if (add_error(errors_module, codes_dict, rev_codes_dict, error_index) < 0) {
1789 goto error;
1790 }
1791 }
1792
1793 if (PyModule_AddStringConstant(errors_module, "__doc__",
1794 "Constants used to describe "
1795 "error conditions.") < 0) {
1796 goto error;
1797 }
1798
1799 Py_INCREF(codes_dict);
1800 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0) {
1801 Py_DECREF(codes_dict);
1802 goto error;
1803 }
1804 Py_CLEAR(codes_dict);
1805
1806 Py_INCREF(rev_codes_dict);
1807 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0) {
1808 Py_DECREF(rev_codes_dict);
1809 goto error;
1810 }
1811 Py_CLEAR(rev_codes_dict);
1812
1813 return 0;
1814
1815 error:
1816 Py_XDECREF(codes_dict);
1817 Py_XDECREF(rev_codes_dict);
1818 return -1;
1819 }
1820
1821 static int
add_model_module(PyObject * mod)1822 add_model_module(PyObject *mod)
1823 {
1824 PyObject *model_module = add_submodule(mod, MODULE_NAME ".model");
1825 if (model_module == NULL) {
1826 return -1;
1827 }
1828
1829 #define MYCONST(c) do { \
1830 if (PyModule_AddIntConstant(model_module, #c, c) < 0) { \
1831 return -1; \
1832 } \
1833 } while(0)
1834
1835 if (PyModule_AddStringConstant(
1836 model_module, "__doc__",
1837 "Constants used to interpret content model information.") < 0) {
1838 return -1;
1839 }
1840
1841 MYCONST(XML_CTYPE_EMPTY);
1842 MYCONST(XML_CTYPE_ANY);
1843 MYCONST(XML_CTYPE_MIXED);
1844 MYCONST(XML_CTYPE_NAME);
1845 MYCONST(XML_CTYPE_CHOICE);
1846 MYCONST(XML_CTYPE_SEQ);
1847
1848 MYCONST(XML_CQUANT_NONE);
1849 MYCONST(XML_CQUANT_OPT);
1850 MYCONST(XML_CQUANT_REP);
1851 MYCONST(XML_CQUANT_PLUS);
1852 #undef MYCONST
1853 return 0;
1854 }
1855
1856 #if XML_COMBINED_VERSION > 19505
1857 static int
add_features(PyObject * mod)1858 add_features(PyObject *mod)
1859 {
1860 PyObject *list = PyList_New(0);
1861 if (list == NULL) {
1862 return -1;
1863 }
1864
1865 const XML_Feature *features = XML_GetFeatureList();
1866 for (size_t i = 0; features[i].feature != XML_FEATURE_END; ++i) {
1867 PyObject *item = Py_BuildValue("si", features[i].name,
1868 features[i].value);
1869 if (item == NULL) {
1870 goto error;
1871 }
1872 int ok = PyList_Append(list, item);
1873 Py_DECREF(item);
1874 if (ok < 0) {
1875 goto error;
1876 }
1877 }
1878 if (PyModule_AddObject(mod, "features", list) < 0) {
1879 goto error;
1880 }
1881 return 0;
1882
1883 error:
1884 Py_DECREF(list);
1885 return -1;
1886 }
1887 #endif
1888
1889 static int
pyexpat_exec(PyObject * mod)1890 pyexpat_exec(PyObject *mod)
1891 {
1892 pyexpat_state *state = pyexpat_get_state(mod);
1893 state->str_read = PyUnicode_InternFromString("read");
1894 if (state->str_read == NULL) {
1895 return -1;
1896 }
1897 state->xml_parse_type = (PyTypeObject *)PyType_FromModuleAndSpec(
1898 mod, &_xml_parse_type_spec, NULL);
1899
1900 if (state->xml_parse_type == NULL) {
1901 return -1;
1902 }
1903
1904 if (init_handler_descrs(state) < 0) {
1905 return -1;
1906 }
1907 state->error = PyErr_NewException("xml.parsers.expat.ExpatError",
1908 NULL, NULL);
1909 if (state->error == NULL) {
1910 return -1;
1911 }
1912
1913 /* Add some symbolic constants to the module */
1914
1915 if (PyModule_AddObjectRef(mod, "error", state->error) < 0) {
1916 return -1;
1917 }
1918
1919 if (PyModule_AddObjectRef(mod, "ExpatError", state->error) < 0) {
1920 return -1;
1921 }
1922
1923 if (PyModule_AddObjectRef(mod, "XMLParserType",
1924 (PyObject *) state->xml_parse_type) < 0) {
1925 return -1;
1926 }
1927
1928 if (PyModule_AddStringConstant(mod, "EXPAT_VERSION",
1929 XML_ExpatVersion()) < 0) {
1930 return -1;
1931 }
1932 {
1933 XML_Expat_Version info = XML_ExpatVersionInfo();
1934 PyObject *versionInfo = Py_BuildValue("(iii)",
1935 info.major,
1936 info.minor,
1937 info.micro);
1938 if (PyModule_AddObject(mod, "version_info", versionInfo) < 0) {
1939 Py_DECREF(versionInfo);
1940 return -1;
1941 }
1942 }
1943 /* XXX When Expat supports some way of figuring out how it was
1944 compiled, this should check and set native_encoding
1945 appropriately.
1946 */
1947 if (PyModule_AddStringConstant(mod, "native_encoding", "UTF-8") < 0) {
1948 return -1;
1949 }
1950
1951 if (add_errors_module(mod) < 0) {
1952 return -1;
1953 }
1954
1955 if (add_model_module(mod) < 0) {
1956 return -1;
1957 }
1958
1959 #if XML_COMBINED_VERSION > 19505
1960 if (add_features(mod) < 0) {
1961 return -1;
1962 }
1963 #endif
1964
1965 #define MYCONST(c) do { \
1966 if (PyModule_AddIntConstant(mod, #c, c) < 0) { \
1967 return -1; \
1968 } \
1969 } while(0)
1970
1971 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1972 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1973 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
1974 #undef MYCONST
1975
1976 static struct PyExpat_CAPI capi;
1977 /* initialize pyexpat dispatch table */
1978 capi.size = sizeof(capi);
1979 capi.magic = PyExpat_CAPI_MAGIC;
1980 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1981 capi.MINOR_VERSION = XML_MINOR_VERSION;
1982 capi.MICRO_VERSION = XML_MICRO_VERSION;
1983 capi.ErrorString = XML_ErrorString;
1984 capi.GetErrorCode = XML_GetErrorCode;
1985 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1986 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
1987 capi.Parse = XML_Parse;
1988 capi.ParserCreate_MM = XML_ParserCreate_MM;
1989 capi.ParserFree = XML_ParserFree;
1990 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1991 capi.SetCommentHandler = XML_SetCommentHandler;
1992 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1993 capi.SetElementHandler = XML_SetElementHandler;
1994 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1995 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1996 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1997 capi.SetUserData = XML_SetUserData;
1998 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
1999 capi.SetEncoding = XML_SetEncoding;
2000 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
2001 #if XML_COMBINED_VERSION >= 20100
2002 capi.SetHashSalt = XML_SetHashSalt;
2003 #else
2004 capi.SetHashSalt = NULL;
2005 #endif
2006
2007 /* export using capsule */
2008 PyObject *capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
2009 if (capi_object == NULL) {
2010 return -1;
2011 }
2012
2013 if (PyModule_AddObject(mod, "expat_CAPI", capi_object) < 0) {
2014 Py_DECREF(capi_object);
2015 return -1;
2016 }
2017
2018 return 0;
2019 }
2020
2021 static int
pyexpat_traverse(PyObject * module,visitproc visit,void * arg)2022 pyexpat_traverse(PyObject *module, visitproc visit, void *arg)
2023 {
2024 pyexpat_state *state = pyexpat_get_state(module);
2025 Py_VISIT(state->xml_parse_type);
2026 Py_VISIT(state->error);
2027 Py_VISIT(state->str_read);
2028 return 0;
2029 }
2030
2031 static int
pyexpat_clear(PyObject * module)2032 pyexpat_clear(PyObject *module)
2033 {
2034 pyexpat_state *state = pyexpat_get_state(module);
2035 Py_CLEAR(state->xml_parse_type);
2036 Py_CLEAR(state->error);
2037 Py_CLEAR(state->str_read);
2038 return 0;
2039 }
2040
2041 static void
pyexpat_free(void * module)2042 pyexpat_free(void *module)
2043 {
2044 pyexpat_clear((PyObject *)module);
2045 }
2046
2047 static PyModuleDef_Slot pyexpat_slots[] = {
2048 {Py_mod_exec, pyexpat_exec},
2049 {0, NULL}
2050 };
2051
2052 static struct PyModuleDef pyexpatmodule = {
2053 PyModuleDef_HEAD_INIT,
2054 .m_name = MODULE_NAME,
2055 .m_doc = pyexpat_module_documentation,
2056 .m_size = sizeof(pyexpat_state),
2057 .m_methods = pyexpat_methods,
2058 .m_slots = pyexpat_slots,
2059 .m_traverse = pyexpat_traverse,
2060 .m_clear = pyexpat_clear,
2061 .m_free = pyexpat_free
2062 };
2063
2064 PyMODINIT_FUNC
PyInit_pyexpat(void)2065 PyInit_pyexpat(void)
2066 {
2067 return PyModuleDef_Init(&pyexpatmodule);
2068 }
2069
2070 static void
clear_handlers(xmlparseobject * self,int initial)2071 clear_handlers(xmlparseobject *self, int initial)
2072 {
2073 int i = 0;
2074
2075 for (; handler_info[i].name != NULL; i++) {
2076 if (initial)
2077 self->handlers[i] = NULL;
2078 else {
2079 Py_CLEAR(self->handlers[i]);
2080 handler_info[i].setter(self->itself, NULL);
2081 }
2082 }
2083 }
2084
2085 static struct HandlerInfo handler_info[] = {
2086
2087 #define HANDLER_INFO(name) \
2088 {#name, (xmlhandlersetter)XML_Set##name, (xmlhandler)my_##name},
2089
2090 HANDLER_INFO(StartElementHandler)
2091 HANDLER_INFO(EndElementHandler)
2092 HANDLER_INFO(ProcessingInstructionHandler)
2093 HANDLER_INFO(CharacterDataHandler)
2094 HANDLER_INFO(UnparsedEntityDeclHandler)
2095 HANDLER_INFO(NotationDeclHandler)
2096 HANDLER_INFO(StartNamespaceDeclHandler)
2097 HANDLER_INFO(EndNamespaceDeclHandler)
2098 HANDLER_INFO(CommentHandler)
2099 HANDLER_INFO(StartCdataSectionHandler)
2100 HANDLER_INFO(EndCdataSectionHandler)
2101 HANDLER_INFO(DefaultHandler)
2102 HANDLER_INFO(DefaultHandlerExpand)
2103 HANDLER_INFO(NotStandaloneHandler)
2104 HANDLER_INFO(ExternalEntityRefHandler)
2105 HANDLER_INFO(StartDoctypeDeclHandler)
2106 HANDLER_INFO(EndDoctypeDeclHandler)
2107 HANDLER_INFO(EntityDeclHandler)
2108 HANDLER_INFO(XmlDeclHandler)
2109 HANDLER_INFO(ElementDeclHandler)
2110 HANDLER_INFO(AttlistDeclHandler)
2111 #if XML_COMBINED_VERSION >= 19504
2112 HANDLER_INFO(SkippedEntityHandler)
2113 #endif
2114
2115 #undef HANDLER_INFO
2116
2117 {NULL, NULL, NULL} /* sentinel */
2118 };
2119