xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/xml/etree/ElementTree.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1"""Lightweight XML support for Python.
2
3 XML is an inherently hierarchical data format, and the most natural way to
4 represent it is with a tree.  This module has two classes for this purpose:
5
6    1. ElementTree represents the whole XML document as a tree and
7
8    2. Element represents a single node in this tree.
9
10 Interactions with the whole document (reading and writing to/from files) are
11 usually done on the ElementTree level.  Interactions with a single XML element
12 and its sub-elements are done on the Element level.
13
14 Element is a flexible container object designed to store hierarchical data
15 structures in memory. It can be described as a cross between a list and a
16 dictionary.  Each Element has a number of properties associated with it:
17
18    'tag' - a string containing the element's name.
19
20    'attributes' - a Python dictionary storing the element's attributes.
21
22    'text' - a string containing the element's text content.
23
24    'tail' - an optional string containing text after the element's end tag.
25
26    And a number of child elements stored in a Python sequence.
27
28 To create an element instance, use the Element constructor,
29 or the SubElement factory function.
30
31 You can also use the ElementTree class to wrap an element structure
32 and convert it to and from XML.
33
34"""
35
36#---------------------------------------------------------------------
37# Licensed to PSF under a Contributor Agreement.
38# See https://www.python.org/psf/license for licensing details.
39#
40# ElementTree
41# Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
42#
43# [email protected]
44# http://www.pythonware.com
45# --------------------------------------------------------------------
46# The ElementTree toolkit is
47#
48# Copyright (c) 1999-2008 by Fredrik Lundh
49#
50# By obtaining, using, and/or copying this software and/or its
51# associated documentation, you agree that you have read, understood,
52# and will comply with the following terms and conditions:
53#
54# Permission to use, copy, modify, and distribute this software and
55# its associated documentation for any purpose and without fee is
56# hereby granted, provided that the above copyright notice appears in
57# all copies, and that both that copyright notice and this permission
58# notice appear in supporting documentation, and that the name of
59# Secret Labs AB or the author not be used in advertising or publicity
60# pertaining to distribution of the software without specific, written
61# prior permission.
62#
63# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
64# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
65# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
66# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
67# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
68# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
69# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
70# OF THIS SOFTWARE.
71# --------------------------------------------------------------------
72
73__all__ = [
74    # public symbols
75    "Comment",
76    "dump",
77    "Element", "ElementTree",
78    "fromstring", "fromstringlist",
79    "indent", "iselement", "iterparse",
80    "parse", "ParseError",
81    "PI", "ProcessingInstruction",
82    "QName",
83    "SubElement",
84    "tostring", "tostringlist",
85    "TreeBuilder",
86    "VERSION",
87    "XML", "XMLID",
88    "XMLParser", "XMLPullParser",
89    "register_namespace",
90    "canonicalize", "C14NWriterTarget",
91    ]
92
93VERSION = "1.3.0"
94
95import sys
96import re
97import warnings
98import io
99import collections
100import collections.abc
101import contextlib
102
103from . import ElementPath
104
105
106class ParseError(SyntaxError):
107    """An error when parsing an XML document.
108
109    In addition to its exception value, a ParseError contains
110    two extra attributes:
111        'code'     - the specific exception code
112        'position' - the line and column of the error
113
114    """
115    pass
116
117# --------------------------------------------------------------------
118
119
120def iselement(element):
121    """Return True if *element* appears to be an Element."""
122    return hasattr(element, 'tag')
123
124
125class Element:
126    """An XML element.
127
128    This class is the reference implementation of the Element interface.
129
130    An element's length is its number of subelements.  That means if you
131    want to check if an element is truly empty, you should check BOTH
132    its length AND its text attribute.
133
134    The element tag, attribute names, and attribute values can be either
135    bytes or strings.
136
137    *tag* is the element name.  *attrib* is an optional dictionary containing
138    element attributes. *extra* are additional element attributes given as
139    keyword arguments.
140
141    Example form:
142        <tag attrib>text<child/>...</tag>tail
143
144    """
145
146    tag = None
147    """The element's name."""
148
149    attrib = None
150    """Dictionary of the element's attributes."""
151
152    text = None
153    """
154    Text before first subelement. This is either a string or the value None.
155    Note that if there is no text, this attribute may be either
156    None or the empty string, depending on the parser.
157
158    """
159
160    tail = None
161    """
162    Text after this element's end tag, but before the next sibling element's
163    start tag.  This is either a string or the value None.  Note that if there
164    was no text, this attribute may be either None or an empty string,
165    depending on the parser.
166
167    """
168
169    def __init__(self, tag, attrib={}, **extra):
170        if not isinstance(attrib, dict):
171            raise TypeError("attrib must be dict, not %s" % (
172                attrib.__class__.__name__,))
173        self.tag = tag
174        self.attrib = {**attrib, **extra}
175        self._children = []
176
177    def __repr__(self):
178        return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self))
179
180    def makeelement(self, tag, attrib):
181        """Create a new element with the same type.
182
183        *tag* is a string containing the element name.
184        *attrib* is a dictionary containing the element attributes.
185
186        Do not call this method, use the SubElement factory function instead.
187
188        """
189        return self.__class__(tag, attrib)
190
191    def copy(self):
192        """Return copy of current element.
193
194        This creates a shallow copy. Subelements will be shared with the
195        original tree.
196
197        """
198        warnings.warn(
199            "elem.copy() is deprecated. Use copy.copy(elem) instead.",
200            DeprecationWarning
201            )
202        return self.__copy__()
203
204    def __copy__(self):
205        elem = self.makeelement(self.tag, self.attrib)
206        elem.text = self.text
207        elem.tail = self.tail
208        elem[:] = self
209        return elem
210
211    def __len__(self):
212        return len(self._children)
213
214    def __bool__(self):
215        warnings.warn(
216            "The behavior of this method will change in future versions.  "
217            "Use specific 'len(elem)' or 'elem is not None' test instead.",
218            FutureWarning, stacklevel=2
219            )
220        return len(self._children) != 0 # emulate old behaviour, for now
221
222    def __getitem__(self, index):
223        return self._children[index]
224
225    def __setitem__(self, index, element):
226        if isinstance(index, slice):
227            for elt in element:
228                self._assert_is_element(elt)
229        else:
230            self._assert_is_element(element)
231        self._children[index] = element
232
233    def __delitem__(self, index):
234        del self._children[index]
235
236    def append(self, subelement):
237        """Add *subelement* to the end of this element.
238
239        The new element will appear in document order after the last existing
240        subelement (or directly after the text, if it's the first subelement),
241        but before the end tag for this element.
242
243        """
244        self._assert_is_element(subelement)
245        self._children.append(subelement)
246
247    def extend(self, elements):
248        """Append subelements from a sequence.
249
250        *elements* is a sequence with zero or more elements.
251
252        """
253        for element in elements:
254            self._assert_is_element(element)
255            self._children.append(element)
256
257    def insert(self, index, subelement):
258        """Insert *subelement* at position *index*."""
259        self._assert_is_element(subelement)
260        self._children.insert(index, subelement)
261
262    def _assert_is_element(self, e):
263        # Need to refer to the actual Python implementation, not the
264        # shadowing C implementation.
265        if not isinstance(e, _Element_Py):
266            raise TypeError('expected an Element, not %s' % type(e).__name__)
267
268    def remove(self, subelement):
269        """Remove matching subelement.
270
271        Unlike the find methods, this method compares elements based on
272        identity, NOT ON tag value or contents.  To remove subelements by
273        other means, the easiest way is to use a list comprehension to
274        select what elements to keep, and then use slice assignment to update
275        the parent element.
276
277        ValueError is raised if a matching element could not be found.
278
279        """
280        # assert iselement(element)
281        self._children.remove(subelement)
282
283    def find(self, path, namespaces=None):
284        """Find first matching element by tag name or path.
285
286        *path* is a string having either an element tag or an XPath,
287        *namespaces* is an optional mapping from namespace prefix to full name.
288
289        Return the first matching element, or None if no element was found.
290
291        """
292        return ElementPath.find(self, path, namespaces)
293
294    def findtext(self, path, default=None, namespaces=None):
295        """Find text for first matching element by tag name or path.
296
297        *path* is a string having either an element tag or an XPath,
298        *default* is the value to return if the element was not found,
299        *namespaces* is an optional mapping from namespace prefix to full name.
300
301        Return text content of first matching element, or default value if
302        none was found.  Note that if an element is found having no text
303        content, the empty string is returned.
304
305        """
306        return ElementPath.findtext(self, path, default, namespaces)
307
308    def findall(self, path, namespaces=None):
309        """Find all matching subelements by tag name or path.
310
311        *path* is a string having either an element tag or an XPath,
312        *namespaces* is an optional mapping from namespace prefix to full name.
313
314        Returns list containing all matching elements in document order.
315
316        """
317        return ElementPath.findall(self, path, namespaces)
318
319    def iterfind(self, path, namespaces=None):
320        """Find all matching subelements by tag name or path.
321
322        *path* is a string having either an element tag or an XPath,
323        *namespaces* is an optional mapping from namespace prefix to full name.
324
325        Return an iterable yielding all matching elements in document order.
326
327        """
328        return ElementPath.iterfind(self, path, namespaces)
329
330    def clear(self):
331        """Reset element.
332
333        This function removes all subelements, clears all attributes, and sets
334        the text and tail attributes to None.
335
336        """
337        self.attrib.clear()
338        self._children = []
339        self.text = self.tail = None
340
341    def get(self, key, default=None):
342        """Get element attribute.
343
344        Equivalent to attrib.get, but some implementations may handle this a
345        bit more efficiently.  *key* is what attribute to look for, and
346        *default* is what to return if the attribute was not found.
347
348        Returns a string containing the attribute value, or the default if
349        attribute was not found.
350
351        """
352        return self.attrib.get(key, default)
353
354    def set(self, key, value):
355        """Set element attribute.
356
357        Equivalent to attrib[key] = value, but some implementations may handle
358        this a bit more efficiently.  *key* is what attribute to set, and
359        *value* is the attribute value to set it to.
360
361        """
362        self.attrib[key] = value
363
364    def keys(self):
365        """Get list of attribute names.
366
367        Names are returned in an arbitrary order, just like an ordinary
368        Python dict.  Equivalent to attrib.keys()
369
370        """
371        return self.attrib.keys()
372
373    def items(self):
374        """Get element attributes as a sequence.
375
376        The attributes are returned in arbitrary order.  Equivalent to
377        attrib.items().
378
379        Return a list of (name, value) tuples.
380
381        """
382        return self.attrib.items()
383
384    def iter(self, tag=None):
385        """Create tree iterator.
386
387        The iterator loops over the element and all subelements in document
388        order, returning all elements with a matching tag.
389
390        If the tree structure is modified during iteration, new or removed
391        elements may or may not be included.  To get a stable set, use the
392        list() function on the iterator, and loop over the resulting list.
393
394        *tag* is what tags to look for (default is to return all elements)
395
396        Return an iterator containing all the matching elements.
397
398        """
399        if tag == "*":
400            tag = None
401        if tag is None or self.tag == tag:
402            yield self
403        for e in self._children:
404            yield from e.iter(tag)
405
406    def itertext(self):
407        """Create text iterator.
408
409        The iterator loops over the element and all subelements in document
410        order, returning all inner text.
411
412        """
413        tag = self.tag
414        if not isinstance(tag, str) and tag is not None:
415            return
416        t = self.text
417        if t:
418            yield t
419        for e in self:
420            yield from e.itertext()
421            t = e.tail
422            if t:
423                yield t
424
425
426def SubElement(parent, tag, attrib={}, **extra):
427    """Subelement factory which creates an element instance, and appends it
428    to an existing parent.
429
430    The element tag, attribute names, and attribute values can be either
431    bytes or Unicode strings.
432
433    *parent* is the parent element, *tag* is the subelements name, *attrib* is
434    an optional directory containing element attributes, *extra* are
435    additional attributes given as keyword arguments.
436
437    """
438    attrib = {**attrib, **extra}
439    element = parent.makeelement(tag, attrib)
440    parent.append(element)
441    return element
442
443
444def Comment(text=None):
445    """Comment element factory.
446
447    This function creates a special element which the standard serializer
448    serializes as an XML comment.
449
450    *text* is a string containing the comment string.
451
452    """
453    element = Element(Comment)
454    element.text = text
455    return element
456
457
458def ProcessingInstruction(target, text=None):
459    """Processing Instruction element factory.
460
461    This function creates a special element which the standard serializer
462    serializes as an XML comment.
463
464    *target* is a string containing the processing instruction, *text* is a
465    string containing the processing instruction contents, if any.
466
467    """
468    element = Element(ProcessingInstruction)
469    element.text = target
470    if text:
471        element.text = element.text + " " + text
472    return element
473
474PI = ProcessingInstruction
475
476
477class QName:
478    """Qualified name wrapper.
479
480    This class can be used to wrap a QName attribute value in order to get
481    proper namespace handing on output.
482
483    *text_or_uri* is a string containing the QName value either in the form
484    {uri}local, or if the tag argument is given, the URI part of a QName.
485
486    *tag* is an optional argument which if given, will make the first
487    argument (text_or_uri) be interpreted as a URI, and this argument (tag)
488    be interpreted as a local name.
489
490    """
491    def __init__(self, text_or_uri, tag=None):
492        if tag:
493            text_or_uri = "{%s}%s" % (text_or_uri, tag)
494        self.text = text_or_uri
495    def __str__(self):
496        return self.text
497    def __repr__(self):
498        return '<%s %r>' % (self.__class__.__name__, self.text)
499    def __hash__(self):
500        return hash(self.text)
501    def __le__(self, other):
502        if isinstance(other, QName):
503            return self.text <= other.text
504        return self.text <= other
505    def __lt__(self, other):
506        if isinstance(other, QName):
507            return self.text < other.text
508        return self.text < other
509    def __ge__(self, other):
510        if isinstance(other, QName):
511            return self.text >= other.text
512        return self.text >= other
513    def __gt__(self, other):
514        if isinstance(other, QName):
515            return self.text > other.text
516        return self.text > other
517    def __eq__(self, other):
518        if isinstance(other, QName):
519            return self.text == other.text
520        return self.text == other
521
522# --------------------------------------------------------------------
523
524
525class ElementTree:
526    """An XML element hierarchy.
527
528    This class also provides support for serialization to and from
529    standard XML.
530
531    *element* is an optional root element node,
532    *file* is an optional file handle or file name of an XML file whose
533    contents will be used to initialize the tree with.
534
535    """
536    def __init__(self, element=None, file=None):
537        # assert element is None or iselement(element)
538        self._root = element # first node
539        if file:
540            self.parse(file)
541
542    def getroot(self):
543        """Return root element of this tree."""
544        return self._root
545
546    def _setroot(self, element):
547        """Replace root element of this tree.
548
549        This will discard the current contents of the tree and replace it
550        with the given element.  Use with care!
551
552        """
553        # assert iselement(element)
554        self._root = element
555
556    def parse(self, source, parser=None):
557        """Load external XML document into element tree.
558
559        *source* is a file name or file object, *parser* is an optional parser
560        instance that defaults to XMLParser.
561
562        ParseError is raised if the parser fails to parse the document.
563
564        Returns the root element of the given source document.
565
566        """
567        close_source = False
568        if not hasattr(source, "read"):
569            source = open(source, "rb")
570            close_source = True
571        try:
572            if parser is None:
573                # If no parser was specified, create a default XMLParser
574                parser = XMLParser()
575                if hasattr(parser, '_parse_whole'):
576                    # The default XMLParser, when it comes from an accelerator,
577                    # can define an internal _parse_whole API for efficiency.
578                    # It can be used to parse the whole source without feeding
579                    # it with chunks.
580                    self._root = parser._parse_whole(source)
581                    return self._root
582            while True:
583                data = source.read(65536)
584                if not data:
585                    break
586                parser.feed(data)
587            self._root = parser.close()
588            return self._root
589        finally:
590            if close_source:
591                source.close()
592
593    def iter(self, tag=None):
594        """Create and return tree iterator for the root element.
595
596        The iterator loops over all elements in this tree, in document order.
597
598        *tag* is a string with the tag name to iterate over
599        (default is to return all elements).
600
601        """
602        # assert self._root is not None
603        return self._root.iter(tag)
604
605    def find(self, path, namespaces=None):
606        """Find first matching element by tag name or path.
607
608        Same as getroot().find(path), which is Element.find()
609
610        *path* is a string having either an element tag or an XPath,
611        *namespaces* is an optional mapping from namespace prefix to full name.
612
613        Return the first matching element, or None if no element was found.
614
615        """
616        # assert self._root is not None
617        if path[:1] == "/":
618            path = "." + path
619            warnings.warn(
620                "This search is broken in 1.3 and earlier, and will be "
621                "fixed in a future version.  If you rely on the current "
622                "behaviour, change it to %r" % path,
623                FutureWarning, stacklevel=2
624                )
625        return self._root.find(path, namespaces)
626
627    def findtext(self, path, default=None, namespaces=None):
628        """Find first matching element by tag name or path.
629
630        Same as getroot().findtext(path),  which is Element.findtext()
631
632        *path* is a string having either an element tag or an XPath,
633        *namespaces* is an optional mapping from namespace prefix to full name.
634
635        Return the first matching element, or None if no element was found.
636
637        """
638        # assert self._root is not None
639        if path[:1] == "/":
640            path = "." + path
641            warnings.warn(
642                "This search is broken in 1.3 and earlier, and will be "
643                "fixed in a future version.  If you rely on the current "
644                "behaviour, change it to %r" % path,
645                FutureWarning, stacklevel=2
646                )
647        return self._root.findtext(path, default, namespaces)
648
649    def findall(self, path, namespaces=None):
650        """Find all matching subelements by tag name or path.
651
652        Same as getroot().findall(path), which is Element.findall().
653
654        *path* is a string having either an element tag or an XPath,
655        *namespaces* is an optional mapping from namespace prefix to full name.
656
657        Return list containing all matching elements in document order.
658
659        """
660        # assert self._root is not None
661        if path[:1] == "/":
662            path = "." + path
663            warnings.warn(
664                "This search is broken in 1.3 and earlier, and will be "
665                "fixed in a future version.  If you rely on the current "
666                "behaviour, change it to %r" % path,
667                FutureWarning, stacklevel=2
668                )
669        return self._root.findall(path, namespaces)
670
671    def iterfind(self, path, namespaces=None):
672        """Find all matching subelements by tag name or path.
673
674        Same as getroot().iterfind(path), which is element.iterfind()
675
676        *path* is a string having either an element tag or an XPath,
677        *namespaces* is an optional mapping from namespace prefix to full name.
678
679        Return an iterable yielding all matching elements in document order.
680
681        """
682        # assert self._root is not None
683        if path[:1] == "/":
684            path = "." + path
685            warnings.warn(
686                "This search is broken in 1.3 and earlier, and will be "
687                "fixed in a future version.  If you rely on the current "
688                "behaviour, change it to %r" % path,
689                FutureWarning, stacklevel=2
690                )
691        return self._root.iterfind(path, namespaces)
692
693    def write(self, file_or_filename,
694              encoding=None,
695              xml_declaration=None,
696              default_namespace=None,
697              method=None, *,
698              short_empty_elements=True):
699        """Write element tree to a file as XML.
700
701        Arguments:
702          *file_or_filename* -- file name or a file object opened for writing
703
704          *encoding* -- the output encoding (default: US-ASCII)
705
706          *xml_declaration* -- bool indicating if an XML declaration should be
707                               added to the output. If None, an XML declaration
708                               is added if encoding IS NOT either of:
709                               US-ASCII, UTF-8, or Unicode
710
711          *default_namespace* -- sets the default XML namespace (for "xmlns")
712
713          *method* -- either "xml" (default), "html, "text", or "c14n"
714
715          *short_empty_elements* -- controls the formatting of elements
716                                    that contain no content. If True (default)
717                                    they are emitted as a single self-closed
718                                    tag, otherwise they are emitted as a pair
719                                    of start/end tags
720
721        """
722        if not method:
723            method = "xml"
724        elif method not in _serialize:
725            raise ValueError("unknown method %r" % method)
726        if not encoding:
727            if method == "c14n":
728                encoding = "utf-8"
729            else:
730                encoding = "us-ascii"
731        with _get_writer(file_or_filename, encoding) as (write, declared_encoding):
732            if method == "xml" and (xml_declaration or
733                    (xml_declaration is None and
734                     encoding.lower() != "unicode" and
735                     declared_encoding.lower() not in ("utf-8", "us-ascii"))):
736                write("<?xml version='1.0' encoding='%s'?>\n" % (
737                    declared_encoding,))
738            if method == "text":
739                _serialize_text(write, self._root)
740            else:
741                qnames, namespaces = _namespaces(self._root, default_namespace)
742                serialize = _serialize[method]
743                serialize(write, self._root, qnames, namespaces,
744                          short_empty_elements=short_empty_elements)
745
746    def write_c14n(self, file):
747        # lxml.etree compatibility.  use output method instead
748        return self.write(file, method="c14n")
749
750# --------------------------------------------------------------------
751# serialization support
752
753@contextlib.contextmanager
754def _get_writer(file_or_filename, encoding):
755    # returns text write method and release all resources after using
756    try:
757        write = file_or_filename.write
758    except AttributeError:
759        # file_or_filename is a file name
760        if encoding.lower() == "unicode":
761            encoding="utf-8"
762        with open(file_or_filename, "w", encoding=encoding,
763                  errors="xmlcharrefreplace") as file:
764            yield file.write, encoding
765    else:
766        # file_or_filename is a file-like object
767        # encoding determines if it is a text or binary writer
768        if encoding.lower() == "unicode":
769            # use a text writer as is
770            yield write, getattr(file_or_filename, "encoding", None) or "utf-8"
771        else:
772            # wrap a binary writer with TextIOWrapper
773            with contextlib.ExitStack() as stack:
774                if isinstance(file_or_filename, io.BufferedIOBase):
775                    file = file_or_filename
776                elif isinstance(file_or_filename, io.RawIOBase):
777                    file = io.BufferedWriter(file_or_filename)
778                    # Keep the original file open when the BufferedWriter is
779                    # destroyed
780                    stack.callback(file.detach)
781                else:
782                    # This is to handle passed objects that aren't in the
783                    # IOBase hierarchy, but just have a write method
784                    file = io.BufferedIOBase()
785                    file.writable = lambda: True
786                    file.write = write
787                    try:
788                        # TextIOWrapper uses this methods to determine
789                        # if BOM (for UTF-16, etc) should be added
790                        file.seekable = file_or_filename.seekable
791                        file.tell = file_or_filename.tell
792                    except AttributeError:
793                        pass
794                file = io.TextIOWrapper(file,
795                                        encoding=encoding,
796                                        errors="xmlcharrefreplace",
797                                        newline="\n")
798                # Keep the original file open when the TextIOWrapper is
799                # destroyed
800                stack.callback(file.detach)
801                yield file.write, encoding
802
803def _namespaces(elem, default_namespace=None):
804    # identify namespaces used in this tree
805
806    # maps qnames to *encoded* prefix:local names
807    qnames = {None: None}
808
809    # maps uri:s to prefixes
810    namespaces = {}
811    if default_namespace:
812        namespaces[default_namespace] = ""
813
814    def add_qname(qname):
815        # calculate serialized qname representation
816        try:
817            if qname[:1] == "{":
818                uri, tag = qname[1:].rsplit("}", 1)
819                prefix = namespaces.get(uri)
820                if prefix is None:
821                    prefix = _namespace_map.get(uri)
822                    if prefix is None:
823                        prefix = "ns%d" % len(namespaces)
824                    if prefix != "xml":
825                        namespaces[uri] = prefix
826                if prefix:
827                    qnames[qname] = "%s:%s" % (prefix, tag)
828                else:
829                    qnames[qname] = tag # default element
830            else:
831                if default_namespace:
832                    # FIXME: can this be handled in XML 1.0?
833                    raise ValueError(
834                        "cannot use non-qualified names with "
835                        "default_namespace option"
836                        )
837                qnames[qname] = qname
838        except TypeError:
839            _raise_serialization_error(qname)
840
841    # populate qname and namespaces table
842    for elem in elem.iter():
843        tag = elem.tag
844        if isinstance(tag, QName):
845            if tag.text not in qnames:
846                add_qname(tag.text)
847        elif isinstance(tag, str):
848            if tag not in qnames:
849                add_qname(tag)
850        elif tag is not None and tag is not Comment and tag is not PI:
851            _raise_serialization_error(tag)
852        for key, value in elem.items():
853            if isinstance(key, QName):
854                key = key.text
855            if key not in qnames:
856                add_qname(key)
857            if isinstance(value, QName) and value.text not in qnames:
858                add_qname(value.text)
859        text = elem.text
860        if isinstance(text, QName) and text.text not in qnames:
861            add_qname(text.text)
862    return qnames, namespaces
863
864def _serialize_xml(write, elem, qnames, namespaces,
865                   short_empty_elements, **kwargs):
866    tag = elem.tag
867    text = elem.text
868    if tag is Comment:
869        write("<!--%s-->" % text)
870    elif tag is ProcessingInstruction:
871        write("<?%s?>" % text)
872    else:
873        tag = qnames[tag]
874        if tag is None:
875            if text:
876                write(_escape_cdata(text))
877            for e in elem:
878                _serialize_xml(write, e, qnames, None,
879                               short_empty_elements=short_empty_elements)
880        else:
881            write("<" + tag)
882            items = list(elem.items())
883            if items or namespaces:
884                if namespaces:
885                    for v, k in sorted(namespaces.items(),
886                                       key=lambda x: x[1]):  # sort on prefix
887                        if k:
888                            k = ":" + k
889                        write(" xmlns%s=\"%s\"" % (
890                            k,
891                            _escape_attrib(v)
892                            ))
893                for k, v in items:
894                    if isinstance(k, QName):
895                        k = k.text
896                    if isinstance(v, QName):
897                        v = qnames[v.text]
898                    else:
899                        v = _escape_attrib(v)
900                    write(" %s=\"%s\"" % (qnames[k], v))
901            if text or len(elem) or not short_empty_elements:
902                write(">")
903                if text:
904                    write(_escape_cdata(text))
905                for e in elem:
906                    _serialize_xml(write, e, qnames, None,
907                                   short_empty_elements=short_empty_elements)
908                write("</" + tag + ">")
909            else:
910                write(" />")
911    if elem.tail:
912        write(_escape_cdata(elem.tail))
913
914HTML_EMPTY = {"area", "base", "basefont", "br", "col", "embed", "frame", "hr",
915              "img", "input", "isindex", "link", "meta", "param", "source",
916              "track", "wbr"}
917
918def _serialize_html(write, elem, qnames, namespaces, **kwargs):
919    tag = elem.tag
920    text = elem.text
921    if tag is Comment:
922        write("<!--%s-->" % _escape_cdata(text))
923    elif tag is ProcessingInstruction:
924        write("<?%s?>" % _escape_cdata(text))
925    else:
926        tag = qnames[tag]
927        if tag is None:
928            if text:
929                write(_escape_cdata(text))
930            for e in elem:
931                _serialize_html(write, e, qnames, None)
932        else:
933            write("<" + tag)
934            items = list(elem.items())
935            if items or namespaces:
936                if namespaces:
937                    for v, k in sorted(namespaces.items(),
938                                       key=lambda x: x[1]):  # sort on prefix
939                        if k:
940                            k = ":" + k
941                        write(" xmlns%s=\"%s\"" % (
942                            k,
943                            _escape_attrib(v)
944                            ))
945                for k, v in items:
946                    if isinstance(k, QName):
947                        k = k.text
948                    if isinstance(v, QName):
949                        v = qnames[v.text]
950                    else:
951                        v = _escape_attrib_html(v)
952                    # FIXME: handle boolean attributes
953                    write(" %s=\"%s\"" % (qnames[k], v))
954            write(">")
955            ltag = tag.lower()
956            if text:
957                if ltag == "script" or ltag == "style":
958                    write(text)
959                else:
960                    write(_escape_cdata(text))
961            for e in elem:
962                _serialize_html(write, e, qnames, None)
963            if ltag not in HTML_EMPTY:
964                write("</" + tag + ">")
965    if elem.tail:
966        write(_escape_cdata(elem.tail))
967
968def _serialize_text(write, elem):
969    for part in elem.itertext():
970        write(part)
971    if elem.tail:
972        write(elem.tail)
973
974_serialize = {
975    "xml": _serialize_xml,
976    "html": _serialize_html,
977    "text": _serialize_text,
978# this optional method is imported at the end of the module
979#   "c14n": _serialize_c14n,
980}
981
982
983def register_namespace(prefix, uri):
984    """Register a namespace prefix.
985
986    The registry is global, and any existing mapping for either the
987    given prefix or the namespace URI will be removed.
988
989    *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
990    attributes in this namespace will be serialized with prefix if possible.
991
992    ValueError is raised if prefix is reserved or is invalid.
993
994    """
995    if re.match(r"ns\d+$", prefix):
996        raise ValueError("Prefix format reserved for internal use")
997    for k, v in list(_namespace_map.items()):
998        if k == uri or v == prefix:
999            del _namespace_map[k]
1000    _namespace_map[uri] = prefix
1001
1002_namespace_map = {
1003    # "well-known" namespace prefixes
1004    "http://www.w3.org/XML/1998/namespace": "xml",
1005    "http://www.w3.org/1999/xhtml": "html",
1006    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1007    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1008    # xml schema
1009    "http://www.w3.org/2001/XMLSchema": "xs",
1010    "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1011    # dublin core
1012    "http://purl.org/dc/elements/1.1/": "dc",
1013}
1014# For tests and troubleshooting
1015register_namespace._namespace_map = _namespace_map
1016
1017def _raise_serialization_error(text):
1018    raise TypeError(
1019        "cannot serialize %r (type %s)" % (text, type(text).__name__)
1020        )
1021
1022def _escape_cdata(text):
1023    # escape character data
1024    try:
1025        # it's worth avoiding do-nothing calls for strings that are
1026        # shorter than 500 characters, or so.  assume that's, by far,
1027        # the most common case in most applications.
1028        if "&" in text:
1029            text = text.replace("&", "&amp;")
1030        if "<" in text:
1031            text = text.replace("<", "&lt;")
1032        if ">" in text:
1033            text = text.replace(">", "&gt;")
1034        return text
1035    except (TypeError, AttributeError):
1036        _raise_serialization_error(text)
1037
1038def _escape_attrib(text):
1039    # escape attribute value
1040    try:
1041        if "&" in text:
1042            text = text.replace("&", "&amp;")
1043        if "<" in text:
1044            text = text.replace("<", "&lt;")
1045        if ">" in text:
1046            text = text.replace(">", "&gt;")
1047        if "\"" in text:
1048            text = text.replace("\"", "&quot;")
1049        # Although section 2.11 of the XML specification states that CR or
1050        # CR LN should be replaced with just LN, it applies only to EOLNs
1051        # which take part of organizing file into lines. Within attributes,
1052        # we are replacing these with entity numbers, so they do not count.
1053        # http://www.w3.org/TR/REC-xml/#sec-line-ends
1054        # The current solution, contained in following six lines, was
1055        # discussed in issue 17582 and 39011.
1056        if "\r" in text:
1057            text = text.replace("\r", "&#13;")
1058        if "\n" in text:
1059            text = text.replace("\n", "&#10;")
1060        if "\t" in text:
1061            text = text.replace("\t", "&#09;")
1062        return text
1063    except (TypeError, AttributeError):
1064        _raise_serialization_error(text)
1065
1066def _escape_attrib_html(text):
1067    # escape attribute value
1068    try:
1069        if "&" in text:
1070            text = text.replace("&", "&amp;")
1071        if ">" in text:
1072            text = text.replace(">", "&gt;")
1073        if "\"" in text:
1074            text = text.replace("\"", "&quot;")
1075        return text
1076    except (TypeError, AttributeError):
1077        _raise_serialization_error(text)
1078
1079# --------------------------------------------------------------------
1080
1081def tostring(element, encoding=None, method=None, *,
1082             xml_declaration=None, default_namespace=None,
1083             short_empty_elements=True):
1084    """Generate string representation of XML element.
1085
1086    All subelements are included.  If encoding is "unicode", a string
1087    is returned. Otherwise a bytestring is returned.
1088
1089    *element* is an Element instance, *encoding* is an optional output
1090    encoding defaulting to US-ASCII, *method* is an optional output which can
1091    be one of "xml" (default), "html", "text" or "c14n", *default_namespace*
1092    sets the default XML namespace (for "xmlns").
1093
1094    Returns an (optionally) encoded string containing the XML data.
1095
1096    """
1097    stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
1098    ElementTree(element).write(stream, encoding,
1099                               xml_declaration=xml_declaration,
1100                               default_namespace=default_namespace,
1101                               method=method,
1102                               short_empty_elements=short_empty_elements)
1103    return stream.getvalue()
1104
1105class _ListDataStream(io.BufferedIOBase):
1106    """An auxiliary stream accumulating into a list reference."""
1107    def __init__(self, lst):
1108        self.lst = lst
1109
1110    def writable(self):
1111        return True
1112
1113    def seekable(self):
1114        return True
1115
1116    def write(self, b):
1117        self.lst.append(b)
1118
1119    def tell(self):
1120        return len(self.lst)
1121
1122def tostringlist(element, encoding=None, method=None, *,
1123                 xml_declaration=None, default_namespace=None,
1124                 short_empty_elements=True):
1125    lst = []
1126    stream = _ListDataStream(lst)
1127    ElementTree(element).write(stream, encoding,
1128                               xml_declaration=xml_declaration,
1129                               default_namespace=default_namespace,
1130                               method=method,
1131                               short_empty_elements=short_empty_elements)
1132    return lst
1133
1134
1135def dump(elem):
1136    """Write element tree or element structure to sys.stdout.
1137
1138    This function should be used for debugging only.
1139
1140    *elem* is either an ElementTree, or a single Element.  The exact output
1141    format is implementation dependent.  In this version, it's written as an
1142    ordinary XML file.
1143
1144    """
1145    # debugging
1146    if not isinstance(elem, ElementTree):
1147        elem = ElementTree(elem)
1148    elem.write(sys.stdout, encoding="unicode")
1149    tail = elem.getroot().tail
1150    if not tail or tail[-1] != "\n":
1151        sys.stdout.write("\n")
1152
1153
1154def indent(tree, space="  ", level=0):
1155    """Indent an XML document by inserting newlines and indentation space
1156    after elements.
1157
1158    *tree* is the ElementTree or Element to modify.  The (root) element
1159    itself will not be changed, but the tail text of all elements in its
1160    subtree will be adapted.
1161
1162    *space* is the whitespace to insert for each indentation level, two
1163    space characters by default.
1164
1165    *level* is the initial indentation level. Setting this to a higher
1166    value than 0 can be used for indenting subtrees that are more deeply
1167    nested inside of a document.
1168    """
1169    if isinstance(tree, ElementTree):
1170        tree = tree.getroot()
1171    if level < 0:
1172        raise ValueError(f"Initial indentation level must be >= 0, got {level}")
1173    if not len(tree):
1174        return
1175
1176    # Reduce the memory consumption by reusing indentation strings.
1177    indentations = ["\n" + level * space]
1178
1179    def _indent_children(elem, level):
1180        # Start a new indentation level for the first child.
1181        child_level = level + 1
1182        try:
1183            child_indentation = indentations[child_level]
1184        except IndexError:
1185            child_indentation = indentations[level] + space
1186            indentations.append(child_indentation)
1187
1188        if not elem.text or not elem.text.strip():
1189            elem.text = child_indentation
1190
1191        for child in elem:
1192            if len(child):
1193                _indent_children(child, child_level)
1194            if not child.tail or not child.tail.strip():
1195                child.tail = child_indentation
1196
1197        # Dedent after the last child by overwriting the previous indentation.
1198        if not child.tail.strip():
1199            child.tail = indentations[level]
1200
1201    _indent_children(tree, 0)
1202
1203
1204# --------------------------------------------------------------------
1205# parsing
1206
1207
1208def parse(source, parser=None):
1209    """Parse XML document into element tree.
1210
1211    *source* is a filename or file object containing XML data,
1212    *parser* is an optional parser instance defaulting to XMLParser.
1213
1214    Return an ElementTree instance.
1215
1216    """
1217    tree = ElementTree()
1218    tree.parse(source, parser)
1219    return tree
1220
1221
1222def iterparse(source, events=None, parser=None):
1223    """Incrementally parse XML document into ElementTree.
1224
1225    This class also reports what's going on to the user based on the
1226    *events* it is initialized with.  The supported events are the strings
1227    "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
1228    detailed namespace information).  If *events* is omitted, only
1229    "end" events are reported.
1230
1231    *source* is a filename or file object containing XML data, *events* is
1232    a list of events to report back, *parser* is an optional parser instance.
1233
1234    Returns an iterator providing (event, elem) pairs.
1235
1236    """
1237    # Use the internal, undocumented _parser argument for now; When the
1238    # parser argument of iterparse is removed, this can be killed.
1239    pullparser = XMLPullParser(events=events, _parser=parser)
1240
1241    def iterator(source):
1242        close_source = False
1243        try:
1244            if not hasattr(source, "read"):
1245                source = open(source, "rb")
1246                close_source = True
1247            yield None
1248            while True:
1249                yield from pullparser.read_events()
1250                # load event buffer
1251                data = source.read(16 * 1024)
1252                if not data:
1253                    break
1254                pullparser.feed(data)
1255            root = pullparser._close_and_return_root()
1256            yield from pullparser.read_events()
1257            it.root = root
1258        finally:
1259            if close_source:
1260                source.close()
1261
1262    class IterParseIterator(collections.abc.Iterator):
1263        __next__ = iterator(source).__next__
1264    it = IterParseIterator()
1265    it.root = None
1266    del iterator, IterParseIterator
1267
1268    next(it)
1269    return it
1270
1271
1272class XMLPullParser:
1273
1274    def __init__(self, events=None, *, _parser=None):
1275        # The _parser argument is for internal use only and must not be relied
1276        # upon in user code. It will be removed in a future release.
1277        # See https://bugs.python.org/issue17741 for more details.
1278
1279        self._events_queue = collections.deque()
1280        self._parser = _parser or XMLParser(target=TreeBuilder())
1281        # wire up the parser for event reporting
1282        if events is None:
1283            events = ("end",)
1284        self._parser._setevents(self._events_queue, events)
1285
1286    def feed(self, data):
1287        """Feed encoded data to parser."""
1288        if self._parser is None:
1289            raise ValueError("feed() called after end of stream")
1290        if data:
1291            try:
1292                self._parser.feed(data)
1293            except SyntaxError as exc:
1294                self._events_queue.append(exc)
1295
1296    def _close_and_return_root(self):
1297        # iterparse needs this to set its root attribute properly :(
1298        root = self._parser.close()
1299        self._parser = None
1300        return root
1301
1302    def close(self):
1303        """Finish feeding data to parser.
1304
1305        Unlike XMLParser, does not return the root element. Use
1306        read_events() to consume elements from XMLPullParser.
1307        """
1308        self._close_and_return_root()
1309
1310    def read_events(self):
1311        """Return an iterator over currently available (event, elem) pairs.
1312
1313        Events are consumed from the internal event queue as they are
1314        retrieved from the iterator.
1315        """
1316        events = self._events_queue
1317        while events:
1318            event = events.popleft()
1319            if isinstance(event, Exception):
1320                raise event
1321            else:
1322                yield event
1323
1324
1325def XML(text, parser=None):
1326    """Parse XML document from string constant.
1327
1328    This function can be used to embed "XML Literals" in Python code.
1329
1330    *text* is a string containing XML data, *parser* is an
1331    optional parser instance, defaulting to the standard XMLParser.
1332
1333    Returns an Element instance.
1334
1335    """
1336    if not parser:
1337        parser = XMLParser(target=TreeBuilder())
1338    parser.feed(text)
1339    return parser.close()
1340
1341
1342def XMLID(text, parser=None):
1343    """Parse XML document from string constant for its IDs.
1344
1345    *text* is a string containing XML data, *parser* is an
1346    optional parser instance, defaulting to the standard XMLParser.
1347
1348    Returns an (Element, dict) tuple, in which the
1349    dict maps element id:s to elements.
1350
1351    """
1352    if not parser:
1353        parser = XMLParser(target=TreeBuilder())
1354    parser.feed(text)
1355    tree = parser.close()
1356    ids = {}
1357    for elem in tree.iter():
1358        id = elem.get("id")
1359        if id:
1360            ids[id] = elem
1361    return tree, ids
1362
1363# Parse XML document from string constant.  Alias for XML().
1364fromstring = XML
1365
1366def fromstringlist(sequence, parser=None):
1367    """Parse XML document from sequence of string fragments.
1368
1369    *sequence* is a list of other sequence, *parser* is an optional parser
1370    instance, defaulting to the standard XMLParser.
1371
1372    Returns an Element instance.
1373
1374    """
1375    if not parser:
1376        parser = XMLParser(target=TreeBuilder())
1377    for text in sequence:
1378        parser.feed(text)
1379    return parser.close()
1380
1381# --------------------------------------------------------------------
1382
1383
1384class TreeBuilder:
1385    """Generic element structure builder.
1386
1387    This builder converts a sequence of start, data, and end method
1388    calls to a well-formed element structure.
1389
1390    You can use this class to build an element structure using a custom XML
1391    parser, or a parser for some other XML-like format.
1392
1393    *element_factory* is an optional element factory which is called
1394    to create new Element instances, as necessary.
1395
1396    *comment_factory* is a factory to create comments to be used instead of
1397    the standard factory.  If *insert_comments* is false (the default),
1398    comments will not be inserted into the tree.
1399
1400    *pi_factory* is a factory to create processing instructions to be used
1401    instead of the standard factory.  If *insert_pis* is false (the default),
1402    processing instructions will not be inserted into the tree.
1403    """
1404    def __init__(self, element_factory=None, *,
1405                 comment_factory=None, pi_factory=None,
1406                 insert_comments=False, insert_pis=False):
1407        self._data = [] # data collector
1408        self._elem = [] # element stack
1409        self._last = None # last element
1410        self._root = None # root element
1411        self._tail = None # true if we're after an end tag
1412        if comment_factory is None:
1413            comment_factory = Comment
1414        self._comment_factory = comment_factory
1415        self.insert_comments = insert_comments
1416        if pi_factory is None:
1417            pi_factory = ProcessingInstruction
1418        self._pi_factory = pi_factory
1419        self.insert_pis = insert_pis
1420        if element_factory is None:
1421            element_factory = Element
1422        self._factory = element_factory
1423
1424    def close(self):
1425        """Flush builder buffers and return toplevel document Element."""
1426        assert len(self._elem) == 0, "missing end tags"
1427        assert self._root is not None, "missing toplevel element"
1428        return self._root
1429
1430    def _flush(self):
1431        if self._data:
1432            if self._last is not None:
1433                text = "".join(self._data)
1434                if self._tail:
1435                    assert self._last.tail is None, "internal error (tail)"
1436                    self._last.tail = text
1437                else:
1438                    assert self._last.text is None, "internal error (text)"
1439                    self._last.text = text
1440            self._data = []
1441
1442    def data(self, data):
1443        """Add text to current element."""
1444        self._data.append(data)
1445
1446    def start(self, tag, attrs):
1447        """Open new element and return it.
1448
1449        *tag* is the element name, *attrs* is a dict containing element
1450        attributes.
1451
1452        """
1453        self._flush()
1454        self._last = elem = self._factory(tag, attrs)
1455        if self._elem:
1456            self._elem[-1].append(elem)
1457        elif self._root is None:
1458            self._root = elem
1459        self._elem.append(elem)
1460        self._tail = 0
1461        return elem
1462
1463    def end(self, tag):
1464        """Close and return current Element.
1465
1466        *tag* is the element name.
1467
1468        """
1469        self._flush()
1470        self._last = self._elem.pop()
1471        assert self._last.tag == tag,\
1472               "end tag mismatch (expected %s, got %s)" % (
1473                   self._last.tag, tag)
1474        self._tail = 1
1475        return self._last
1476
1477    def comment(self, text):
1478        """Create a comment using the comment_factory.
1479
1480        *text* is the text of the comment.
1481        """
1482        return self._handle_single(
1483            self._comment_factory, self.insert_comments, text)
1484
1485    def pi(self, target, text=None):
1486        """Create a processing instruction using the pi_factory.
1487
1488        *target* is the target name of the processing instruction.
1489        *text* is the data of the processing instruction, or ''.
1490        """
1491        return self._handle_single(
1492            self._pi_factory, self.insert_pis, target, text)
1493
1494    def _handle_single(self, factory, insert, *args):
1495        elem = factory(*args)
1496        if insert:
1497            self._flush()
1498            self._last = elem
1499            if self._elem:
1500                self._elem[-1].append(elem)
1501            self._tail = 1
1502        return elem
1503
1504
1505# also see ElementTree and TreeBuilder
1506class XMLParser:
1507    """Element structure builder for XML source data based on the expat parser.
1508
1509    *target* is an optional target object which defaults to an instance of the
1510    standard TreeBuilder class, *encoding* is an optional encoding string
1511    which if given, overrides the encoding specified in the XML file:
1512    http://www.iana.org/assignments/character-sets
1513
1514    """
1515
1516    def __init__(self, *, target=None, encoding=None):
1517        try:
1518            from xml.parsers import expat
1519        except ImportError:
1520            try:
1521                import pyexpat as expat
1522            except ImportError:
1523                raise ImportError(
1524                    "No module named expat; use SimpleXMLTreeBuilder instead"
1525                    )
1526        parser = expat.ParserCreate(encoding, "}")
1527        if target is None:
1528            target = TreeBuilder()
1529        # underscored names are provided for compatibility only
1530        self.parser = self._parser = parser
1531        self.target = self._target = target
1532        self._error = expat.error
1533        self._names = {} # name memo cache
1534        # main callbacks
1535        parser.DefaultHandlerExpand = self._default
1536        if hasattr(target, 'start'):
1537            parser.StartElementHandler = self._start
1538        if hasattr(target, 'end'):
1539            parser.EndElementHandler = self._end
1540        if hasattr(target, 'start_ns'):
1541            parser.StartNamespaceDeclHandler = self._start_ns
1542        if hasattr(target, 'end_ns'):
1543            parser.EndNamespaceDeclHandler = self._end_ns
1544        if hasattr(target, 'data'):
1545            parser.CharacterDataHandler = target.data
1546        # miscellaneous callbacks
1547        if hasattr(target, 'comment'):
1548            parser.CommentHandler = target.comment
1549        if hasattr(target, 'pi'):
1550            parser.ProcessingInstructionHandler = target.pi
1551        # Configure pyexpat: buffering, new-style attribute handling.
1552        parser.buffer_text = 1
1553        parser.ordered_attributes = 1
1554        self._doctype = None
1555        self.entity = {}
1556        try:
1557            self.version = "Expat %d.%d.%d" % expat.version_info
1558        except AttributeError:
1559            pass # unknown
1560
1561    def _setevents(self, events_queue, events_to_report):
1562        # Internal API for XMLPullParser
1563        # events_to_report: a list of events to report during parsing (same as
1564        # the *events* of XMLPullParser's constructor.
1565        # events_queue: a list of actual parsing events that will be populated
1566        # by the underlying parser.
1567        #
1568        parser = self._parser
1569        append = events_queue.append
1570        for event_name in events_to_report:
1571            if event_name == "start":
1572                parser.ordered_attributes = 1
1573                def handler(tag, attrib_in, event=event_name, append=append,
1574                            start=self._start):
1575                    append((event, start(tag, attrib_in)))
1576                parser.StartElementHandler = handler
1577            elif event_name == "end":
1578                def handler(tag, event=event_name, append=append,
1579                            end=self._end):
1580                    append((event, end(tag)))
1581                parser.EndElementHandler = handler
1582            elif event_name == "start-ns":
1583                # TreeBuilder does not implement .start_ns()
1584                if hasattr(self.target, "start_ns"):
1585                    def handler(prefix, uri, event=event_name, append=append,
1586                                start_ns=self._start_ns):
1587                        append((event, start_ns(prefix, uri)))
1588                else:
1589                    def handler(prefix, uri, event=event_name, append=append):
1590                        append((event, (prefix or '', uri or '')))
1591                parser.StartNamespaceDeclHandler = handler
1592            elif event_name == "end-ns":
1593                # TreeBuilder does not implement .end_ns()
1594                if hasattr(self.target, "end_ns"):
1595                    def handler(prefix, event=event_name, append=append,
1596                                end_ns=self._end_ns):
1597                        append((event, end_ns(prefix)))
1598                else:
1599                    def handler(prefix, event=event_name, append=append):
1600                        append((event, None))
1601                parser.EndNamespaceDeclHandler = handler
1602            elif event_name == 'comment':
1603                def handler(text, event=event_name, append=append, self=self):
1604                    append((event, self.target.comment(text)))
1605                parser.CommentHandler = handler
1606            elif event_name == 'pi':
1607                def handler(pi_target, data, event=event_name, append=append,
1608                            self=self):
1609                    append((event, self.target.pi(pi_target, data)))
1610                parser.ProcessingInstructionHandler = handler
1611            else:
1612                raise ValueError("unknown event %r" % event_name)
1613
1614    def _raiseerror(self, value):
1615        err = ParseError(value)
1616        err.code = value.code
1617        err.position = value.lineno, value.offset
1618        raise err
1619
1620    def _fixname(self, key):
1621        # expand qname, and convert name string to ascii, if possible
1622        try:
1623            name = self._names[key]
1624        except KeyError:
1625            name = key
1626            if "}" in name:
1627                name = "{" + name
1628            self._names[key] = name
1629        return name
1630
1631    def _start_ns(self, prefix, uri):
1632        return self.target.start_ns(prefix or '', uri or '')
1633
1634    def _end_ns(self, prefix):
1635        return self.target.end_ns(prefix or '')
1636
1637    def _start(self, tag, attr_list):
1638        # Handler for expat's StartElementHandler. Since ordered_attributes
1639        # is set, the attributes are reported as a list of alternating
1640        # attribute name,value.
1641        fixname = self._fixname
1642        tag = fixname(tag)
1643        attrib = {}
1644        if attr_list:
1645            for i in range(0, len(attr_list), 2):
1646                attrib[fixname(attr_list[i])] = attr_list[i+1]
1647        return self.target.start(tag, attrib)
1648
1649    def _end(self, tag):
1650        return self.target.end(self._fixname(tag))
1651
1652    def _default(self, text):
1653        prefix = text[:1]
1654        if prefix == "&":
1655            # deal with undefined entities
1656            try:
1657                data_handler = self.target.data
1658            except AttributeError:
1659                return
1660            try:
1661                data_handler(self.entity[text[1:-1]])
1662            except KeyError:
1663                from xml.parsers import expat
1664                err = expat.error(
1665                    "undefined entity %s: line %d, column %d" %
1666                    (text, self.parser.ErrorLineNumber,
1667                    self.parser.ErrorColumnNumber)
1668                    )
1669                err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1670                err.lineno = self.parser.ErrorLineNumber
1671                err.offset = self.parser.ErrorColumnNumber
1672                raise err
1673        elif prefix == "<" and text[:9] == "<!DOCTYPE":
1674            self._doctype = [] # inside a doctype declaration
1675        elif self._doctype is not None:
1676            # parse doctype contents
1677            if prefix == ">":
1678                self._doctype = None
1679                return
1680            text = text.strip()
1681            if not text:
1682                return
1683            self._doctype.append(text)
1684            n = len(self._doctype)
1685            if n > 2:
1686                type = self._doctype[1]
1687                if type == "PUBLIC" and n == 4:
1688                    name, type, pubid, system = self._doctype
1689                    if pubid:
1690                        pubid = pubid[1:-1]
1691                elif type == "SYSTEM" and n == 3:
1692                    name, type, system = self._doctype
1693                    pubid = None
1694                else:
1695                    return
1696                if hasattr(self.target, "doctype"):
1697                    self.target.doctype(name, pubid, system[1:-1])
1698                elif hasattr(self, "doctype"):
1699                    warnings.warn(
1700                        "The doctype() method of XMLParser is ignored.  "
1701                        "Define doctype() method on the TreeBuilder target.",
1702                        RuntimeWarning)
1703
1704                self._doctype = None
1705
1706    def feed(self, data):
1707        """Feed encoded data to parser."""
1708        try:
1709            self.parser.Parse(data, False)
1710        except self._error as v:
1711            self._raiseerror(v)
1712
1713    def close(self):
1714        """Finish feeding data to parser and return element structure."""
1715        try:
1716            self.parser.Parse(b"", True) # end of data
1717        except self._error as v:
1718            self._raiseerror(v)
1719        try:
1720            close_handler = self.target.close
1721        except AttributeError:
1722            pass
1723        else:
1724            return close_handler()
1725        finally:
1726            # get rid of circular references
1727            del self.parser, self._parser
1728            del self.target, self._target
1729
1730
1731# --------------------------------------------------------------------
1732# C14N 2.0
1733
1734def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
1735    """Convert XML to its C14N 2.0 serialised form.
1736
1737    If *out* is provided, it must be a file or file-like object that receives
1738    the serialised canonical XML output (text, not bytes) through its ``.write()``
1739    method.  To write to a file, open it in text mode with encoding "utf-8".
1740    If *out* is not provided, this function returns the output as text string.
1741
1742    Either *xml_data* (an XML string) or *from_file* (a file path or
1743    file-like object) must be provided as input.
1744
1745    The configuration options are the same as for the ``C14NWriterTarget``.
1746    """
1747    if xml_data is None and from_file is None:
1748        raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
1749    sio = None
1750    if out is None:
1751        sio = out = io.StringIO()
1752
1753    parser = XMLParser(target=C14NWriterTarget(out.write, **options))
1754
1755    if xml_data is not None:
1756        parser.feed(xml_data)
1757        parser.close()
1758    elif from_file is not None:
1759        parse(from_file, parser=parser)
1760
1761    return sio.getvalue() if sio is not None else None
1762
1763
1764_looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
1765
1766
1767class C14NWriterTarget:
1768    """
1769    Canonicalization writer target for the XMLParser.
1770
1771    Serialises parse events to XML C14N 2.0.
1772
1773    The *write* function is used for writing out the resulting data stream
1774    as text (not bytes).  To write to a file, open it in text mode with encoding
1775    "utf-8" and pass its ``.write`` method.
1776
1777    Configuration options:
1778
1779    - *with_comments*: set to true to include comments
1780    - *strip_text*: set to true to strip whitespace before and after text content
1781    - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
1782    - *qname_aware_tags*: a set of qname aware tag names in which prefixes
1783                          should be replaced in text content
1784    - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
1785                           should be replaced in text content
1786    - *exclude_attrs*: a set of attribute names that should not be serialised
1787    - *exclude_tags*: a set of tag names that should not be serialised
1788    """
1789    def __init__(self, write, *,
1790                 with_comments=False, strip_text=False, rewrite_prefixes=False,
1791                 qname_aware_tags=None, qname_aware_attrs=None,
1792                 exclude_attrs=None, exclude_tags=None):
1793        self._write = write
1794        self._data = []
1795        self._with_comments = with_comments
1796        self._strip_text = strip_text
1797        self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
1798        self._exclude_tags = set(exclude_tags) if exclude_tags else None
1799
1800        self._rewrite_prefixes = rewrite_prefixes
1801        if qname_aware_tags:
1802            self._qname_aware_tags = set(qname_aware_tags)
1803        else:
1804            self._qname_aware_tags = None
1805        if qname_aware_attrs:
1806            self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
1807        else:
1808            self._find_qname_aware_attrs = None
1809
1810        # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
1811        self._declared_ns_stack = [[
1812            ("http://www.w3.org/XML/1998/namespace", "xml"),
1813        ]]
1814        # Stack with user declared namespace prefixes as (uri, prefix) pairs.
1815        self._ns_stack = []
1816        if not rewrite_prefixes:
1817            self._ns_stack.append(list(_namespace_map.items()))
1818        self._ns_stack.append([])
1819        self._prefix_map = {}
1820        self._preserve_space = [False]
1821        self._pending_start = None
1822        self._root_seen = False
1823        self._root_done = False
1824        self._ignored_depth = 0
1825
1826    def _iter_namespaces(self, ns_stack, _reversed=reversed):
1827        for namespaces in _reversed(ns_stack):
1828            if namespaces:  # almost no element declares new namespaces
1829                yield from namespaces
1830
1831    def _resolve_prefix_name(self, prefixed_name):
1832        prefix, name = prefixed_name.split(':', 1)
1833        for uri, p in self._iter_namespaces(self._ns_stack):
1834            if p == prefix:
1835                return f'{{{uri}}}{name}'
1836        raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
1837
1838    def _qname(self, qname, uri=None):
1839        if uri is None:
1840            uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
1841        else:
1842            tag = qname
1843
1844        prefixes_seen = set()
1845        for u, prefix in self._iter_namespaces(self._declared_ns_stack):
1846            if u == uri and prefix not in prefixes_seen:
1847                return f'{prefix}:{tag}' if prefix else tag, tag, uri
1848            prefixes_seen.add(prefix)
1849
1850        # Not declared yet => add new declaration.
1851        if self._rewrite_prefixes:
1852            if uri in self._prefix_map:
1853                prefix = self._prefix_map[uri]
1854            else:
1855                prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
1856            self._declared_ns_stack[-1].append((uri, prefix))
1857            return f'{prefix}:{tag}', tag, uri
1858
1859        if not uri and '' not in prefixes_seen:
1860            # No default namespace declared => no prefix needed.
1861            return tag, tag, uri
1862
1863        for u, prefix in self._iter_namespaces(self._ns_stack):
1864            if u == uri:
1865                self._declared_ns_stack[-1].append((uri, prefix))
1866                return f'{prefix}:{tag}' if prefix else tag, tag, uri
1867
1868        if not uri:
1869            # As soon as a default namespace is defined,
1870            # anything that has no namespace (and thus, no prefix) goes there.
1871            return tag, tag, uri
1872
1873        raise ValueError(f'Namespace "{uri}" is not declared in scope')
1874
1875    def data(self, data):
1876        if not self._ignored_depth:
1877            self._data.append(data)
1878
1879    def _flush(self, _join_text=''.join):
1880        data = _join_text(self._data)
1881        del self._data[:]
1882        if self._strip_text and not self._preserve_space[-1]:
1883            data = data.strip()
1884        if self._pending_start is not None:
1885            args, self._pending_start = self._pending_start, None
1886            qname_text = data if data and _looks_like_prefix_name(data) else None
1887            self._start(*args, qname_text)
1888            if qname_text is not None:
1889                return
1890        if data and self._root_seen:
1891            self._write(_escape_cdata_c14n(data))
1892
1893    def start_ns(self, prefix, uri):
1894        if self._ignored_depth:
1895            return
1896        # we may have to resolve qnames in text content
1897        if self._data:
1898            self._flush()
1899        self._ns_stack[-1].append((uri, prefix))
1900
1901    def start(self, tag, attrs):
1902        if self._exclude_tags is not None and (
1903                self._ignored_depth or tag in self._exclude_tags):
1904            self._ignored_depth += 1
1905            return
1906        if self._data:
1907            self._flush()
1908
1909        new_namespaces = []
1910        self._declared_ns_stack.append(new_namespaces)
1911
1912        if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
1913            # Need to parse text first to see if it requires a prefix declaration.
1914            self._pending_start = (tag, attrs, new_namespaces)
1915            return
1916        self._start(tag, attrs, new_namespaces)
1917
1918    def _start(self, tag, attrs, new_namespaces, qname_text=None):
1919        if self._exclude_attrs is not None and attrs:
1920            attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
1921
1922        qnames = {tag, *attrs}
1923        resolved_names = {}
1924
1925        # Resolve prefixes in attribute and tag text.
1926        if qname_text is not None:
1927            qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
1928            qnames.add(qname)
1929        if self._find_qname_aware_attrs is not None and attrs:
1930            qattrs = self._find_qname_aware_attrs(attrs)
1931            if qattrs:
1932                for attr_name in qattrs:
1933                    value = attrs[attr_name]
1934                    if _looks_like_prefix_name(value):
1935                        qname = resolved_names[value] = self._resolve_prefix_name(value)
1936                        qnames.add(qname)
1937            else:
1938                qattrs = None
1939        else:
1940            qattrs = None
1941
1942        # Assign prefixes in lexicographical order of used URIs.
1943        parse_qname = self._qname
1944        parsed_qnames = {n: parse_qname(n) for n in sorted(
1945            qnames, key=lambda n: n.split('}', 1))}
1946
1947        # Write namespace declarations in prefix order ...
1948        if new_namespaces:
1949            attr_list = [
1950                ('xmlns:' + prefix if prefix else 'xmlns', uri)
1951                for uri, prefix in new_namespaces
1952            ]
1953            attr_list.sort()
1954        else:
1955            # almost always empty
1956            attr_list = []
1957
1958        # ... followed by attributes in URI+name order
1959        if attrs:
1960            for k, v in sorted(attrs.items()):
1961                if qattrs is not None and k in qattrs and v in resolved_names:
1962                    v = parsed_qnames[resolved_names[v]][0]
1963                attr_qname, attr_name, uri = parsed_qnames[k]
1964                # No prefix for attributes in default ('') namespace.
1965                attr_list.append((attr_qname if uri else attr_name, v))
1966
1967        # Honour xml:space attributes.
1968        space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
1969        self._preserve_space.append(
1970            space_behaviour == 'preserve' if space_behaviour
1971            else self._preserve_space[-1])
1972
1973        # Write the tag.
1974        write = self._write
1975        write('<' + parsed_qnames[tag][0])
1976        if attr_list:
1977            write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
1978        write('>')
1979
1980        # Write the resolved qname text content.
1981        if qname_text is not None:
1982            write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
1983
1984        self._root_seen = True
1985        self._ns_stack.append([])
1986
1987    def end(self, tag):
1988        if self._ignored_depth:
1989            self._ignored_depth -= 1
1990            return
1991        if self._data:
1992            self._flush()
1993        self._write(f'</{self._qname(tag)[0]}>')
1994        self._preserve_space.pop()
1995        self._root_done = len(self._preserve_space) == 1
1996        self._declared_ns_stack.pop()
1997        self._ns_stack.pop()
1998
1999    def comment(self, text):
2000        if not self._with_comments:
2001            return
2002        if self._ignored_depth:
2003            return
2004        if self._root_done:
2005            self._write('\n')
2006        elif self._root_seen and self._data:
2007            self._flush()
2008        self._write(f'<!--{_escape_cdata_c14n(text)}-->')
2009        if not self._root_seen:
2010            self._write('\n')
2011
2012    def pi(self, target, data):
2013        if self._ignored_depth:
2014            return
2015        if self._root_done:
2016            self._write('\n')
2017        elif self._root_seen and self._data:
2018            self._flush()
2019        self._write(
2020            f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
2021        if not self._root_seen:
2022            self._write('\n')
2023
2024
2025def _escape_cdata_c14n(text):
2026    # escape character data
2027    try:
2028        # it's worth avoiding do-nothing calls for strings that are
2029        # shorter than 500 character, or so.  assume that's, by far,
2030        # the most common case in most applications.
2031        if '&' in text:
2032            text = text.replace('&', '&amp;')
2033        if '<' in text:
2034            text = text.replace('<', '&lt;')
2035        if '>' in text:
2036            text = text.replace('>', '&gt;')
2037        if '\r' in text:
2038            text = text.replace('\r', '&#xD;')
2039        return text
2040    except (TypeError, AttributeError):
2041        _raise_serialization_error(text)
2042
2043
2044def _escape_attrib_c14n(text):
2045    # escape attribute value
2046    try:
2047        if '&' in text:
2048            text = text.replace('&', '&amp;')
2049        if '<' in text:
2050            text = text.replace('<', '&lt;')
2051        if '"' in text:
2052            text = text.replace('"', '&quot;')
2053        if '\t' in text:
2054            text = text.replace('\t', '&#x9;')
2055        if '\n' in text:
2056            text = text.replace('\n', '&#xA;')
2057        if '\r' in text:
2058            text = text.replace('\r', '&#xD;')
2059        return text
2060    except (TypeError, AttributeError):
2061        _raise_serialization_error(text)
2062
2063
2064# --------------------------------------------------------------------
2065
2066# Import the C accelerators
2067try:
2068    # Element is going to be shadowed by the C implementation. We need to keep
2069    # the Python version of it accessible for some "creative" by external code
2070    # (see tests)
2071    _Element_Py = Element
2072
2073    # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories
2074    from _elementtree import *
2075    from _elementtree import _set_factories
2076except ImportError:
2077    pass
2078else:
2079    _set_factories(Comment, ProcessingInstruction)
2080