1# IMPORTANT: the same tests are run from "test_xml_etree_c" in order
2# to ensure consistency between the C implementation and the Python
3# implementation.
4#
5# For this purpose, the module-level "ET" symbol is temporarily
6# monkey-patched when running the "test_xml_etree_c" test suite.
7
8import copy
9import functools
10import html
11import io
12import itertools
13import operator
14import os
15import pickle
16import sys
17import textwrap
18import types
19import unittest
20import warnings
21import weakref
22
23from functools import partial
24from itertools import product, islice
25from test import support
26from test.support import os_helper
27from test.support import warnings_helper
28from test.support import findfile, gc_collect, swap_attr, swap_item
29from test.support.import_helper import import_fresh_module
30from test.support.os_helper import TESTFN
31
32
33# pyET is the pure-Python implementation.
34#
35# ET is pyET in test_xml_etree and is the C accelerated version in
36# test_xml_etree_c.
37pyET = None
38ET = None
39
40SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
41try:
42    SIMPLE_XMLFILE.encode("utf-8")
43except UnicodeEncodeError:
44    raise unittest.SkipTest("filename is not encodable to utf8")
45SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
46UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
47
48SAMPLE_XML = """\
49<body>
50  <tag class='a'>text</tag>
51  <tag class='b' />
52  <section>
53    <tag class='b' id='inner'>subtext</tag>
54  </section>
55</body>
56"""
57
58SAMPLE_SECTION = """\
59<section>
60  <tag class='b' id='inner'>subtext</tag>
61  <nexttag />
62  <nextsection>
63    <tag />
64  </nextsection>
65</section>
66"""
67
68SAMPLE_XML_NS = """
69<body xmlns="http://effbot.org/ns">
70  <tag>text</tag>
71  <tag />
72  <section>
73    <tag>subtext</tag>
74  </section>
75</body>
76"""
77
78SAMPLE_XML_NS_ELEMS = """
79<root>
80<h:table xmlns:h="hello">
81  <h:tr>
82    <h:td>Apples</h:td>
83    <h:td>Bananas</h:td>
84  </h:tr>
85</h:table>
86
87<f:table xmlns:f="foo">
88  <f:name>African Coffee Table</f:name>
89  <f:width>80</f:width>
90  <f:length>120</f:length>
91</f:table>
92</root>
93"""
94
95ENTITY_XML = """\
96<!DOCTYPE points [
97<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
98%user-entities;
99]>
100<document>&entity;</document>
101"""
102
103EXTERNAL_ENTITY_XML = """\
104<!DOCTYPE points [
105<!ENTITY entity SYSTEM "file:///non-existing-file.xml">
106]>
107<document>&entity;</document>
108"""
109
110ATTLIST_XML = """\
111<?xml version="1.0" encoding="UTF-8"?>
112<!DOCTYPE Foo [
113<!ELEMENT foo (bar*)>
114<!ELEMENT bar (#PCDATA)*>
115<!ATTLIST bar xml:lang CDATA "eng">
116<!ENTITY qux "quux">
117]>
118<foo>
119<bar>&qux;</bar>
120</foo>
121"""
122
123def checkwarnings(*filters, quiet=False):
124    def decorator(test):
125        def newtest(*args, **kwargs):
126            with warnings_helper.check_warnings(*filters, quiet=quiet):
127                test(*args, **kwargs)
128        functools.update_wrapper(newtest, test)
129        return newtest
130    return decorator
131
132def convlinesep(data):
133    return data.replace(b'\n', os.linesep.encode())
134
135
136class ModuleTest(unittest.TestCase):
137    def test_sanity(self):
138        # Import sanity.
139
140        from xml.etree import ElementTree
141        from xml.etree import ElementInclude
142        from xml.etree import ElementPath
143
144    def test_all(self):
145        names = ("xml.etree.ElementTree", "_elementtree")
146        support.check__all__(self, ET, names, not_exported=("HTML_EMPTY",))
147
148
149def serialize(elem, to_string=True, encoding='unicode', **options):
150    if encoding != 'unicode':
151        file = io.BytesIO()
152    else:
153        file = io.StringIO()
154    tree = ET.ElementTree(elem)
155    tree.write(file, encoding=encoding, **options)
156    if to_string:
157        return file.getvalue()
158    else:
159        file.seek(0)
160        return file
161
162def summarize_list(seq):
163    return [elem.tag for elem in seq]
164
165
166class ElementTestCase:
167    @classmethod
168    def setUpClass(cls):
169        cls.modules = {pyET, ET}
170
171    def pickleRoundTrip(self, obj, name, dumper, loader, proto):
172        try:
173            with swap_item(sys.modules, name, dumper):
174                temp = pickle.dumps(obj, proto)
175            with swap_item(sys.modules, name, loader):
176                result = pickle.loads(temp)
177        except pickle.PicklingError as pe:
178            # pyET must be second, because pyET may be (equal to) ET.
179            human = dict([(ET, "cET"), (pyET, "pyET")])
180            raise support.TestFailed("Failed to round-trip %r from %r to %r"
181                                     % (obj,
182                                        human.get(dumper, dumper),
183                                        human.get(loader, loader))) from pe
184        return result
185
186    def assertEqualElements(self, alice, bob):
187        self.assertIsInstance(alice, (ET.Element, pyET.Element))
188        self.assertIsInstance(bob, (ET.Element, pyET.Element))
189        self.assertEqual(len(list(alice)), len(list(bob)))
190        for x, y in zip(alice, bob):
191            self.assertEqualElements(x, y)
192        properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
193        self.assertEqual(properties(alice), properties(bob))
194
195# --------------------------------------------------------------------
196# element tree tests
197
198class ElementTreeTest(unittest.TestCase):
199
200    def serialize_check(self, elem, expected):
201        self.assertEqual(serialize(elem), expected)
202
203    def test_interface(self):
204        # Test element tree interface.
205
206        def check_element(element):
207            self.assertTrue(ET.iselement(element), msg="not an element")
208            direlem = dir(element)
209            for attr in 'tag', 'attrib', 'text', 'tail':
210                self.assertTrue(hasattr(element, attr),
211                        msg='no %s member' % attr)
212                self.assertIn(attr, direlem,
213                        msg='no %s visible by dir' % attr)
214
215            self.assertIsInstance(element.tag, str)
216            self.assertIsInstance(element.attrib, dict)
217            if element.text is not None:
218                self.assertIsInstance(element.text, str)
219            if element.tail is not None:
220                self.assertIsInstance(element.tail, str)
221            for elem in element:
222                check_element(elem)
223
224        element = ET.Element("tag")
225        check_element(element)
226        tree = ET.ElementTree(element)
227        check_element(tree.getroot())
228        element = ET.Element("t\xe4g", key="value")
229        tree = ET.ElementTree(element)
230        self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$")
231        element = ET.Element("tag", key="value")
232
233        # Make sure all standard element methods exist.
234
235        def check_method(method):
236            self.assertTrue(hasattr(method, '__call__'),
237                    msg="%s not callable" % method)
238
239        check_method(element.append)
240        check_method(element.extend)
241        check_method(element.insert)
242        check_method(element.remove)
243        check_method(element.find)
244        check_method(element.iterfind)
245        check_method(element.findall)
246        check_method(element.findtext)
247        check_method(element.clear)
248        check_method(element.get)
249        check_method(element.set)
250        check_method(element.keys)
251        check_method(element.items)
252        check_method(element.iter)
253        check_method(element.itertext)
254
255        # These methods return an iterable. See bug 6472.
256
257        def check_iter(it):
258            check_method(it.__next__)
259
260        check_iter(element.iterfind("tag"))
261        check_iter(element.iterfind("*"))
262        check_iter(tree.iterfind("tag"))
263        check_iter(tree.iterfind("*"))
264
265        # These aliases are provided:
266
267        self.assertEqual(ET.XML, ET.fromstring)
268        self.assertEqual(ET.PI, ET.ProcessingInstruction)
269
270    def test_set_attribute(self):
271        element = ET.Element('tag')
272
273        self.assertEqual(element.tag, 'tag')
274        element.tag = 'Tag'
275        self.assertEqual(element.tag, 'Tag')
276        element.tag = 'TAG'
277        self.assertEqual(element.tag, 'TAG')
278
279        self.assertIsNone(element.text)
280        element.text = 'Text'
281        self.assertEqual(element.text, 'Text')
282        element.text = 'TEXT'
283        self.assertEqual(element.text, 'TEXT')
284
285        self.assertIsNone(element.tail)
286        element.tail = 'Tail'
287        self.assertEqual(element.tail, 'Tail')
288        element.tail = 'TAIL'
289        self.assertEqual(element.tail, 'TAIL')
290
291        self.assertEqual(element.attrib, {})
292        element.attrib = {'a': 'b', 'c': 'd'}
293        self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'})
294        element.attrib = {'A': 'B', 'C': 'D'}
295        self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
296
297    def test_simpleops(self):
298        # Basic method sanity checks.
299
300        elem = ET.XML("<body><tag/></body>")
301        self.serialize_check(elem, '<body><tag /></body>')
302        e = ET.Element("tag2")
303        elem.append(e)
304        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
305        elem.remove(e)
306        self.serialize_check(elem, '<body><tag /></body>')
307        elem.insert(0, e)
308        self.serialize_check(elem, '<body><tag2 /><tag /></body>')
309        elem.remove(e)
310        elem.extend([e])
311        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
312        elem.remove(e)
313        elem.extend(iter([e]))
314        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
315        elem.remove(e)
316
317        element = ET.Element("tag", key="value")
318        self.serialize_check(element, '<tag key="value" />') # 1
319        subelement = ET.Element("subtag")
320        element.append(subelement)
321        self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
322        element.insert(0, subelement)
323        self.serialize_check(element,
324                '<tag key="value"><subtag /><subtag /></tag>') # 3
325        element.remove(subelement)
326        self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
327        element.remove(subelement)
328        self.serialize_check(element, '<tag key="value" />') # 5
329        with self.assertRaises(ValueError) as cm:
330            element.remove(subelement)
331        self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
332        self.serialize_check(element, '<tag key="value" />') # 6
333        element[0:0] = [subelement, subelement, subelement]
334        self.serialize_check(element[1], '<subtag />')
335        self.assertEqual(element[1:9], [element[1], element[2]])
336        self.assertEqual(element[:9:2], [element[0], element[2]])
337        del element[1:2]
338        self.serialize_check(element,
339                '<tag key="value"><subtag /><subtag /></tag>')
340
341    def test_cdata(self):
342        # Test CDATA handling (etc).
343
344        self.serialize_check(ET.XML("<tag>hello</tag>"),
345                '<tag>hello</tag>')
346        self.serialize_check(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"),
347                '<tag>hello</tag>')
348        self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
349                '<tag>hello</tag>')
350
351    def test_file_init(self):
352        stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
353        tree = ET.ElementTree(file=stringfile)
354        self.assertEqual(tree.find("tag").tag, 'tag')
355        self.assertEqual(tree.find("section/tag").tag, 'tag')
356
357        tree = ET.ElementTree(file=SIMPLE_XMLFILE)
358        self.assertEqual(tree.find("element").tag, 'element')
359        self.assertEqual(tree.find("element/../empty-element").tag,
360                'empty-element')
361
362    def test_path_cache(self):
363        # Check that the path cache behaves sanely.
364
365        from xml.etree import ElementPath
366
367        elem = ET.XML(SAMPLE_XML)
368        for i in range(10): ET.ElementTree(elem).find('./'+str(i))
369        cache_len_10 = len(ElementPath._cache)
370        for i in range(10): ET.ElementTree(elem).find('./'+str(i))
371        self.assertEqual(len(ElementPath._cache), cache_len_10)
372        for i in range(20): ET.ElementTree(elem).find('./'+str(i))
373        self.assertGreater(len(ElementPath._cache), cache_len_10)
374        for i in range(600): ET.ElementTree(elem).find('./'+str(i))
375        self.assertLess(len(ElementPath._cache), 500)
376
377    def test_copy(self):
378        # Test copy handling (etc).
379
380        import copy
381        e1 = ET.XML("<tag>hello<foo/></tag>")
382        e2 = copy.copy(e1)
383        e3 = copy.deepcopy(e1)
384        e1.find("foo").tag = "bar"
385        self.serialize_check(e1, '<tag>hello<bar /></tag>')
386        self.serialize_check(e2, '<tag>hello<bar /></tag>')
387        self.serialize_check(e3, '<tag>hello<foo /></tag>')
388
389    def test_attrib(self):
390        # Test attribute handling.
391
392        elem = ET.Element("tag")
393        elem.get("key") # 1.1
394        self.assertEqual(elem.get("key", "default"), 'default') # 1.2
395
396        elem.set("key", "value")
397        self.assertEqual(elem.get("key"), 'value') # 1.3
398
399        elem = ET.Element("tag", key="value")
400        self.assertEqual(elem.get("key"), 'value') # 2.1
401        self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
402
403        attrib = {"key": "value"}
404        elem = ET.Element("tag", attrib)
405        attrib.clear() # check for aliasing issues
406        self.assertEqual(elem.get("key"), 'value') # 3.1
407        self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
408
409        attrib = {"key": "value"}
410        elem = ET.Element("tag", **attrib)
411        attrib.clear() # check for aliasing issues
412        self.assertEqual(elem.get("key"), 'value') # 4.1
413        self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
414
415        elem = ET.Element("tag", {"key": "other"}, key="value")
416        self.assertEqual(elem.get("key"), 'value') # 5.1
417        self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
418
419        elem = ET.Element('test')
420        elem.text = "aa"
421        elem.set('testa', 'testval')
422        elem.set('testb', 'test2')
423        self.assertEqual(ET.tostring(elem),
424                b'<test testa="testval" testb="test2">aa</test>')
425        self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
426        self.assertEqual(sorted(elem.items()),
427                [('testa', 'testval'), ('testb', 'test2')])
428        self.assertEqual(elem.attrib['testb'], 'test2')
429        elem.attrib['testb'] = 'test1'
430        elem.attrib['testc'] = 'test2'
431        self.assertEqual(ET.tostring(elem),
432                b'<test testa="testval" testb="test1" testc="test2">aa</test>')
433
434        # Test preserving white space chars in attributes
435        elem = ET.Element('test')
436        elem.set('a', '\r')
437        elem.set('b', '\r\n')
438        elem.set('c', '\t\n\r ')
439        elem.set('d', '\n\n\r\r\t\t  ')
440        self.assertEqual(ET.tostring(elem),
441                b'<test a="&#13;" b="&#13;&#10;" c="&#09;&#10;&#13; " d="&#10;&#10;&#13;&#13;&#09;&#09;  " />')
442
443    def test_makeelement(self):
444        # Test makeelement handling.
445
446        elem = ET.Element("tag")
447        attrib = {"key": "value"}
448        subelem = elem.makeelement("subtag", attrib)
449        self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
450        elem.append(subelem)
451        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
452
453        elem.clear()
454        self.serialize_check(elem, '<tag />')
455        elem.append(subelem)
456        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
457        elem.extend([subelem, subelem])
458        self.serialize_check(elem,
459            '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
460        elem[:] = [subelem]
461        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
462        elem[:] = tuple([subelem])
463        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
464
465    def test_parsefile(self):
466        # Test parsing from file.
467
468        tree = ET.parse(SIMPLE_XMLFILE)
469        stream = io.StringIO()
470        tree.write(stream, encoding='unicode')
471        self.assertEqual(stream.getvalue(),
472                '<root>\n'
473                '   <element key="value">text</element>\n'
474                '   <element>text</element>tail\n'
475                '   <empty-element />\n'
476                '</root>')
477        tree = ET.parse(SIMPLE_NS_XMLFILE)
478        stream = io.StringIO()
479        tree.write(stream, encoding='unicode')
480        self.assertEqual(stream.getvalue(),
481                '<ns0:root xmlns:ns0="namespace">\n'
482                '   <ns0:element key="value">text</ns0:element>\n'
483                '   <ns0:element>text</ns0:element>tail\n'
484                '   <ns0:empty-element />\n'
485                '</ns0:root>')
486
487        with open(SIMPLE_XMLFILE) as f:
488            data = f.read()
489
490        parser = ET.XMLParser()
491        self.assertRegex(parser.version, r'^Expat ')
492        parser.feed(data)
493        self.serialize_check(parser.close(),
494                '<root>\n'
495                '   <element key="value">text</element>\n'
496                '   <element>text</element>tail\n'
497                '   <empty-element />\n'
498                '</root>')
499
500        target = ET.TreeBuilder()
501        parser = ET.XMLParser(target=target)
502        parser.feed(data)
503        self.serialize_check(parser.close(),
504                '<root>\n'
505                '   <element key="value">text</element>\n'
506                '   <element>text</element>tail\n'
507                '   <empty-element />\n'
508                '</root>')
509
510    def test_parseliteral(self):
511        element = ET.XML("<html><body>text</body></html>")
512        self.assertEqual(ET.tostring(element, encoding='unicode'),
513                '<html><body>text</body></html>')
514        element = ET.fromstring("<html><body>text</body></html>")
515        self.assertEqual(ET.tostring(element, encoding='unicode'),
516                '<html><body>text</body></html>')
517        sequence = ["<html><body>", "text</bo", "dy></html>"]
518        element = ET.fromstringlist(sequence)
519        self.assertEqual(ET.tostring(element),
520                b'<html><body>text</body></html>')
521        self.assertEqual(b"".join(ET.tostringlist(element)),
522                b'<html><body>text</body></html>')
523        self.assertEqual(ET.tostring(element, "ascii"),
524                b"<?xml version='1.0' encoding='ascii'?>\n"
525                b"<html><body>text</body></html>")
526        _, ids = ET.XMLID("<html><body>text</body></html>")
527        self.assertEqual(len(ids), 0)
528        _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
529        self.assertEqual(len(ids), 1)
530        self.assertEqual(ids["body"].tag, 'body')
531
532    def test_iterparse(self):
533        # Test iterparse interface.
534
535        iterparse = ET.iterparse
536
537        context = iterparse(SIMPLE_XMLFILE)
538        action, elem = next(context)
539        self.assertEqual((action, elem.tag), ('end', 'element'))
540        self.assertEqual([(action, elem.tag) for action, elem in context], [
541                ('end', 'element'),
542                ('end', 'empty-element'),
543                ('end', 'root'),
544            ])
545        self.assertEqual(context.root.tag, 'root')
546
547        context = iterparse(SIMPLE_NS_XMLFILE)
548        self.assertEqual([(action, elem.tag) for action, elem in context], [
549                ('end', '{namespace}element'),
550                ('end', '{namespace}element'),
551                ('end', '{namespace}empty-element'),
552                ('end', '{namespace}root'),
553            ])
554
555        events = ()
556        context = iterparse(SIMPLE_XMLFILE, events)
557        self.assertEqual([(action, elem.tag) for action, elem in context], [])
558
559        events = ()
560        context = iterparse(SIMPLE_XMLFILE, events=events)
561        self.assertEqual([(action, elem.tag) for action, elem in context], [])
562
563        events = ("start", "end")
564        context = iterparse(SIMPLE_XMLFILE, events)
565        self.assertEqual([(action, elem.tag) for action, elem in context], [
566                ('start', 'root'),
567                ('start', 'element'),
568                ('end', 'element'),
569                ('start', 'element'),
570                ('end', 'element'),
571                ('start', 'empty-element'),
572                ('end', 'empty-element'),
573                ('end', 'root'),
574            ])
575
576        events = ("start", "end", "start-ns", "end-ns")
577        context = iterparse(SIMPLE_NS_XMLFILE, events)
578        self.assertEqual([(action, elem.tag) if action in ("start", "end")
579                                             else (action, elem)
580                          for action, elem in context], [
581                ('start-ns', ('', 'namespace')),
582                ('start', '{namespace}root'),
583                ('start', '{namespace}element'),
584                ('end', '{namespace}element'),
585                ('start', '{namespace}element'),
586                ('end', '{namespace}element'),
587                ('start', '{namespace}empty-element'),
588                ('end', '{namespace}empty-element'),
589                ('end', '{namespace}root'),
590                ('end-ns', None),
591            ])
592
593        events = ('start-ns', 'end-ns')
594        context = iterparse(io.StringIO(r"<root xmlns=''/>"), events)
595        res = [action for action, elem in context]
596        self.assertEqual(res, ['start-ns', 'end-ns'])
597
598        events = ("start", "end", "bogus")
599        with open(SIMPLE_XMLFILE, "rb") as f:
600            with self.assertRaises(ValueError) as cm:
601                iterparse(f, events)
602            self.assertFalse(f.closed)
603        self.assertEqual(str(cm.exception), "unknown event 'bogus'")
604
605        with warnings_helper.check_no_resource_warning(self):
606            with self.assertRaises(ValueError) as cm:
607                iterparse(SIMPLE_XMLFILE, events)
608            self.assertEqual(str(cm.exception), "unknown event 'bogus'")
609            del cm
610
611        source = io.BytesIO(
612            b"<?xml version='1.0' encoding='iso-8859-1'?>\n"
613            b"<body xmlns='http://&#233;ffbot.org/ns'\n"
614            b"      xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
615        events = ("start-ns",)
616        context = iterparse(source, events)
617        self.assertEqual([(action, elem) for action, elem in context], [
618                ('start-ns', ('', 'http://\xe9ffbot.org/ns')),
619                ('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
620            ])
621
622        source = io.StringIO("<document />junk")
623        it = iterparse(source)
624        action, elem = next(it)
625        self.assertEqual((action, elem.tag), ('end', 'document'))
626        with self.assertRaises(ET.ParseError) as cm:
627            next(it)
628        self.assertEqual(str(cm.exception),
629                'junk after document element: line 1, column 12')
630
631        self.addCleanup(os_helper.unlink, TESTFN)
632        with open(TESTFN, "wb") as f:
633            f.write(b"<document />junk")
634        it = iterparse(TESTFN)
635        action, elem = next(it)
636        self.assertEqual((action, elem.tag), ('end', 'document'))
637        with warnings_helper.check_no_resource_warning(self):
638            with self.assertRaises(ET.ParseError) as cm:
639                next(it)
640            self.assertEqual(str(cm.exception),
641                    'junk after document element: line 1, column 12')
642            del cm, it
643
644        # Not exhausting the iterator still closes the resource (bpo-43292)
645        with warnings_helper.check_no_resource_warning(self):
646            it = iterparse(TESTFN)
647            del it
648
649        with self.assertRaises(FileNotFoundError):
650            iterparse("nonexistent")
651
652    def test_writefile(self):
653        elem = ET.Element("tag")
654        elem.text = "text"
655        self.serialize_check(elem, '<tag>text</tag>')
656        ET.SubElement(elem, "subtag").text = "subtext"
657        self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
658
659        # Test tag suppression
660        elem.tag = None
661        self.serialize_check(elem, 'text<subtag>subtext</subtag>')
662        elem.insert(0, ET.Comment("comment"))
663        self.serialize_check(elem,
664                'text<!--comment--><subtag>subtext</subtag>')     # assumes 1.3
665
666        elem[0] = ET.PI("key", "value")
667        self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
668
669    def test_custom_builder(self):
670        # Test parser w. custom builder.
671
672        with open(SIMPLE_XMLFILE) as f:
673            data = f.read()
674        class Builder(list):
675            def start(self, tag, attrib):
676                self.append(("start", tag))
677            def end(self, tag):
678                self.append(("end", tag))
679            def data(self, text):
680                pass
681        builder = Builder()
682        parser = ET.XMLParser(target=builder)
683        parser.feed(data)
684        self.assertEqual(builder, [
685                ('start', 'root'),
686                ('start', 'element'),
687                ('end', 'element'),
688                ('start', 'element'),
689                ('end', 'element'),
690                ('start', 'empty-element'),
691                ('end', 'empty-element'),
692                ('end', 'root'),
693            ])
694
695        with open(SIMPLE_NS_XMLFILE) as f:
696            data = f.read()
697        class Builder(list):
698            def start(self, tag, attrib):
699                self.append(("start", tag))
700            def end(self, tag):
701                self.append(("end", tag))
702            def data(self, text):
703                pass
704            def pi(self, target, data):
705                self.append(("pi", target, data))
706            def comment(self, data):
707                self.append(("comment", data))
708            def start_ns(self, prefix, uri):
709                self.append(("start-ns", prefix, uri))
710            def end_ns(self, prefix):
711                self.append(("end-ns", prefix))
712        builder = Builder()
713        parser = ET.XMLParser(target=builder)
714        parser.feed(data)
715        self.assertEqual(builder, [
716                ('pi', 'pi', 'data'),
717                ('comment', ' comment '),
718                ('start-ns', '', 'namespace'),
719                ('start', '{namespace}root'),
720                ('start', '{namespace}element'),
721                ('end', '{namespace}element'),
722                ('start', '{namespace}element'),
723                ('end', '{namespace}element'),
724                ('start', '{namespace}empty-element'),
725                ('end', '{namespace}empty-element'),
726                ('end', '{namespace}root'),
727                ('end-ns', ''),
728            ])
729
730    def test_custom_builder_only_end_ns(self):
731        class Builder(list):
732            def end_ns(self, prefix):
733                self.append(("end-ns", prefix))
734
735        builder = Builder()
736        parser = ET.XMLParser(target=builder)
737        parser.feed(textwrap.dedent("""\
738            <?pi data?>
739            <!-- comment -->
740            <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
741               <a:element key='value'>text</a:element>
742               <p:element>text</p:element>tail
743               <empty-element/>
744            </root>
745            """))
746        self.assertEqual(builder, [
747                ('end-ns', 'a'),
748                ('end-ns', 'p'),
749                ('end-ns', ''),
750            ])
751
752    def test_initialize_parser_without_target(self):
753        # Explicit None
754        parser = ET.XMLParser(target=None)
755        self.assertIsInstance(parser.target, ET.TreeBuilder)
756
757        # Implicit None
758        parser2 = ET.XMLParser()
759        self.assertIsInstance(parser2.target, ET.TreeBuilder)
760
761    def test_children(self):
762        # Test Element children iteration
763
764        with open(SIMPLE_XMLFILE, "rb") as f:
765            tree = ET.parse(f)
766        self.assertEqual([summarize_list(elem)
767                          for elem in tree.getroot().iter()], [
768                ['element', 'element', 'empty-element'],
769                [],
770                [],
771                [],
772            ])
773        self.assertEqual([summarize_list(elem)
774                          for elem in tree.iter()], [
775                ['element', 'element', 'empty-element'],
776                [],
777                [],
778                [],
779            ])
780
781        elem = ET.XML(SAMPLE_XML)
782        self.assertEqual(len(list(elem)), 3)
783        self.assertEqual(len(list(elem[2])), 1)
784        self.assertEqual(elem[:], list(elem))
785        child1 = elem[0]
786        child2 = elem[2]
787        del elem[1:2]
788        self.assertEqual(len(list(elem)), 2)
789        self.assertEqual(child1, elem[0])
790        self.assertEqual(child2, elem[1])
791        elem[0:2] = [child2, child1]
792        self.assertEqual(child2, elem[0])
793        self.assertEqual(child1, elem[1])
794        self.assertNotEqual(child1, elem[0])
795        elem.clear()
796        self.assertEqual(list(elem), [])
797
798    def test_writestring(self):
799        elem = ET.XML("<html><body>text</body></html>")
800        self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
801        elem = ET.fromstring("<html><body>text</body></html>")
802        self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
803
804    def test_indent(self):
805        elem = ET.XML("<root></root>")
806        ET.indent(elem)
807        self.assertEqual(ET.tostring(elem), b'<root />')
808
809        elem = ET.XML("<html><body>text</body></html>")
810        ET.indent(elem)
811        self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>\n</html>')
812
813        elem = ET.XML("<html> <body>text</body>  </html>")
814        ET.indent(elem)
815        self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>\n</html>')
816
817        elem = ET.XML("<html><body>text</body>tail</html>")
818        ET.indent(elem)
819        self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>tail</html>')
820
821        elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
822        ET.indent(elem)
823        self.assertEqual(
824            ET.tostring(elem),
825            b'<html>\n'
826            b'  <body>\n'
827            b'    <p>par</p>\n'
828            b'    <p>text</p>\n'
829            b'    <p>\n'
830            b'      <br />\n'
831            b'    </p>\n'
832            b'  </body>\n'
833            b'</html>'
834        )
835
836        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
837        ET.indent(elem)
838        self.assertEqual(
839            ET.tostring(elem),
840            b'<html>\n'
841            b'  <body>\n'
842            b'    <p>pre<br />post</p>\n'
843            b'    <p>text</p>\n'
844            b'  </body>\n'
845            b'</html>'
846        )
847
848    def test_indent_space(self):
849        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
850        ET.indent(elem, space='\t')
851        self.assertEqual(
852            ET.tostring(elem),
853            b'<html>\n'
854            b'\t<body>\n'
855            b'\t\t<p>pre<br />post</p>\n'
856            b'\t\t<p>text</p>\n'
857            b'\t</body>\n'
858            b'</html>'
859        )
860
861        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
862        ET.indent(elem, space='')
863        self.assertEqual(
864            ET.tostring(elem),
865            b'<html>\n'
866            b'<body>\n'
867            b'<p>pre<br />post</p>\n'
868            b'<p>text</p>\n'
869            b'</body>\n'
870            b'</html>'
871        )
872
873    def test_indent_space_caching(self):
874        elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
875        ET.indent(elem)
876        self.assertEqual(
877            {el.tail for el in elem.iter()},
878            {None, "\n", "\n  ", "\n    "}
879        )
880        self.assertEqual(
881            {el.text for el in elem.iter()},
882            {None, "\n  ", "\n    ", "\n      ", "par", "text"}
883        )
884        self.assertEqual(
885            len({el.tail for el in elem.iter()}),
886            len({id(el.tail) for el in elem.iter()}),
887        )
888
889    def test_indent_level(self):
890        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
891        with self.assertRaises(ValueError):
892            ET.indent(elem, level=-1)
893        self.assertEqual(
894            ET.tostring(elem),
895            b"<html><body><p>pre<br />post</p><p>text</p></body></html>"
896        )
897
898        ET.indent(elem, level=2)
899        self.assertEqual(
900            ET.tostring(elem),
901            b'<html>\n'
902            b'      <body>\n'
903            b'        <p>pre<br />post</p>\n'
904            b'        <p>text</p>\n'
905            b'      </body>\n'
906            b'    </html>'
907        )
908
909        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
910        ET.indent(elem, level=1, space=' ')
911        self.assertEqual(
912            ET.tostring(elem),
913            b'<html>\n'
914            b'  <body>\n'
915            b'   <p>pre<br />post</p>\n'
916            b'   <p>text</p>\n'
917            b'  </body>\n'
918            b' </html>'
919        )
920
921    def test_tostring_default_namespace(self):
922        elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
923        self.assertEqual(
924            ET.tostring(elem, encoding='unicode'),
925            '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
926        )
927        self.assertEqual(
928            ET.tostring(elem, encoding='unicode', default_namespace='http://effbot.org/ns'),
929            '<body xmlns="http://effbot.org/ns"><tag /></body>'
930        )
931
932    def test_tostring_default_namespace_different_namespace(self):
933        elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
934        self.assertEqual(
935            ET.tostring(elem, encoding='unicode', default_namespace='foobar'),
936            '<ns1:body xmlns="foobar" xmlns:ns1="http://effbot.org/ns"><ns1:tag /></ns1:body>'
937        )
938
939    def test_tostring_default_namespace_original_no_namespace(self):
940        elem = ET.XML('<body><tag/></body>')
941        EXPECTED_MSG = '^cannot use non-qualified names with default_namespace option$'
942        with self.assertRaisesRegex(ValueError, EXPECTED_MSG):
943            ET.tostring(elem, encoding='unicode', default_namespace='foobar')
944
945    def test_tostring_no_xml_declaration(self):
946        elem = ET.XML('<body><tag/></body>')
947        self.assertEqual(
948            ET.tostring(elem, encoding='unicode'),
949            '<body><tag /></body>'
950        )
951
952    def test_tostring_xml_declaration(self):
953        elem = ET.XML('<body><tag/></body>')
954        self.assertEqual(
955            ET.tostring(elem, encoding='utf8', xml_declaration=True),
956            b"<?xml version='1.0' encoding='utf8'?>\n<body><tag /></body>"
957        )
958
959    def test_tostring_xml_declaration_unicode_encoding(self):
960        elem = ET.XML('<body><tag/></body>')
961        self.assertEqual(
962            ET.tostring(elem, encoding='unicode', xml_declaration=True),
963            "<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>"
964        )
965
966    def test_tostring_xml_declaration_cases(self):
967        elem = ET.XML('<body><tag>ø</tag></body>')
968        TESTCASES = [
969        #   (expected_retval,                  encoding, xml_declaration)
970            # ... xml_declaration = None
971            (b'<body><tag>&#248;</tag></body>', None, None),
972            (b'<body><tag>\xc3\xb8</tag></body>', 'UTF-8', None),
973            (b'<body><tag>&#248;</tag></body>', 'US-ASCII', None),
974            (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
975             b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', None),
976            ('<body><tag>ø</tag></body>', 'unicode', None),
977
978            # ... xml_declaration = False
979            (b"<body><tag>&#248;</tag></body>", None, False),
980            (b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', False),
981            (b"<body><tag>&#248;</tag></body>", 'US-ASCII', False),
982            (b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', False),
983            ("<body><tag>ø</tag></body>", 'unicode', False),
984
985            # ... xml_declaration = True
986            (b"<?xml version='1.0' encoding='us-ascii'?>\n"
987             b"<body><tag>&#248;</tag></body>", None, True),
988            (b"<?xml version='1.0' encoding='UTF-8'?>\n"
989             b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', True),
990            (b"<?xml version='1.0' encoding='US-ASCII'?>\n"
991             b"<body><tag>&#248;</tag></body>", 'US-ASCII', True),
992            (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
993             b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True),
994            ("<?xml version='1.0' encoding='utf-8'?>\n"
995             "<body><tag>ø</tag></body>", 'unicode', True),
996
997        ]
998        for expected_retval, encoding, xml_declaration in TESTCASES:
999            with self.subTest(f'encoding={encoding} '
1000                              f'xml_declaration={xml_declaration}'):
1001                self.assertEqual(
1002                    ET.tostring(
1003                        elem,
1004                        encoding=encoding,
1005                        xml_declaration=xml_declaration
1006                    ),
1007                    expected_retval
1008                )
1009
1010    def test_tostringlist_default_namespace(self):
1011        elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
1012        self.assertEqual(
1013            ''.join(ET.tostringlist(elem, encoding='unicode')),
1014            '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
1015        )
1016        self.assertEqual(
1017            ''.join(ET.tostringlist(elem, encoding='unicode', default_namespace='http://effbot.org/ns')),
1018            '<body xmlns="http://effbot.org/ns"><tag /></body>'
1019        )
1020
1021    def test_tostringlist_xml_declaration(self):
1022        elem = ET.XML('<body><tag/></body>')
1023        self.assertEqual(
1024            ''.join(ET.tostringlist(elem, encoding='unicode')),
1025            '<body><tag /></body>'
1026        )
1027        self.assertEqual(
1028            b''.join(ET.tostringlist(elem, xml_declaration=True)),
1029            b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>"
1030        )
1031
1032        stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True)
1033        self.assertEqual(
1034            ''.join(stringlist),
1035            "<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>"
1036        )
1037        self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>")
1038        self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:])
1039
1040    def test_encoding(self):
1041        def check(encoding, body=''):
1042            xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
1043                   (encoding, body))
1044            self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
1045            self.assertEqual(ET.XML(xml).text, body)
1046        check("ascii", 'a')
1047        check("us-ascii", 'a')
1048        check("iso-8859-1", '\xbd')
1049        check("iso-8859-15", '\u20ac')
1050        check("cp437", '\u221a')
1051        check("mac-roman", '\u02da')
1052
1053        def xml(encoding):
1054            return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
1055        def bxml(encoding):
1056            return xml(encoding).encode(encoding)
1057        supported_encodings = [
1058            'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
1059            'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
1060            'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
1061            'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
1062            'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
1063            'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
1064            'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
1065            'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
1066            'cp1256', 'cp1257', 'cp1258',
1067            'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
1068            'mac-roman', 'mac-turkish',
1069            'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
1070            'iso2022-jp-3', 'iso2022-jp-ext',
1071            'koi8-r', 'koi8-t', 'koi8-u', 'kz1048',
1072            'hz', 'ptcp154',
1073        ]
1074        for encoding in supported_encodings:
1075            self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
1076
1077        unsupported_ascii_compatible_encodings = [
1078            'big5', 'big5hkscs',
1079            'cp932', 'cp949', 'cp950',
1080            'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
1081            'gb2312', 'gbk', 'gb18030',
1082            'iso2022-kr', 'johab',
1083            'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
1084            'utf-7',
1085        ]
1086        for encoding in unsupported_ascii_compatible_encodings:
1087            self.assertRaises(ValueError, ET.XML, bxml(encoding))
1088
1089        unsupported_ascii_incompatible_encodings = [
1090            'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
1091            'utf_32', 'utf_32_be', 'utf_32_le',
1092        ]
1093        for encoding in unsupported_ascii_incompatible_encodings:
1094            self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
1095
1096        self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
1097        self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
1098
1099    def test_methods(self):
1100        # Test serialization methods.
1101
1102        e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
1103        e.tail = "\n"
1104        self.assertEqual(serialize(e),
1105                '<html><link /><script>1 &lt; 2</script></html>\n')
1106        self.assertEqual(serialize(e, method=None),
1107                '<html><link /><script>1 &lt; 2</script></html>\n')
1108        self.assertEqual(serialize(e, method="xml"),
1109                '<html><link /><script>1 &lt; 2</script></html>\n')
1110        self.assertEqual(serialize(e, method="html"),
1111                '<html><link><script>1 < 2</script></html>\n')
1112        self.assertEqual(serialize(e, method="text"), '1 < 2\n')
1113
1114    def test_issue18347(self):
1115        e = ET.XML('<html><CamelCase>text</CamelCase></html>')
1116        self.assertEqual(serialize(e),
1117                '<html><CamelCase>text</CamelCase></html>')
1118        self.assertEqual(serialize(e, method="html"),
1119                '<html><CamelCase>text</CamelCase></html>')
1120
1121    def test_entity(self):
1122        # Test entity handling.
1123
1124        # 1) good entities
1125
1126        e = ET.XML("<document title='&#x8230;'>test</document>")
1127        self.assertEqual(serialize(e, encoding="us-ascii"),
1128                b'<document title="&#33328;">test</document>')
1129        self.serialize_check(e, '<document title="\u8230">test</document>')
1130
1131        # 2) bad entities
1132
1133        with self.assertRaises(ET.ParseError) as cm:
1134            ET.XML("<document>&entity;</document>")
1135        self.assertEqual(str(cm.exception),
1136                'undefined entity: line 1, column 10')
1137
1138        with self.assertRaises(ET.ParseError) as cm:
1139            ET.XML(ENTITY_XML)
1140        self.assertEqual(str(cm.exception),
1141                'undefined entity &entity;: line 5, column 10')
1142
1143        # 3) custom entity
1144
1145        parser = ET.XMLParser()
1146        parser.entity["entity"] = "text"
1147        parser.feed(ENTITY_XML)
1148        root = parser.close()
1149        self.serialize_check(root, '<document>text</document>')
1150
1151        # 4) external (SYSTEM) entity
1152
1153        with self.assertRaises(ET.ParseError) as cm:
1154            ET.XML(EXTERNAL_ENTITY_XML)
1155        self.assertEqual(str(cm.exception),
1156                'undefined entity &entity;: line 4, column 10')
1157
1158    def test_namespace(self):
1159        # Test namespace issues.
1160
1161        # 1) xml namespace
1162
1163        elem = ET.XML("<tag xml:lang='en' />")
1164        self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
1165
1166        # 2) other "well-known" namespaces
1167
1168        elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
1169        self.serialize_check(elem,
1170            '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
1171
1172        elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
1173        self.serialize_check(elem,
1174            '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
1175
1176        elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
1177        self.serialize_check(elem,
1178            '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
1179
1180        # 3) unknown namespaces
1181        elem = ET.XML(SAMPLE_XML_NS)
1182        self.serialize_check(elem,
1183            '<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
1184            '  <ns0:tag>text</ns0:tag>\n'
1185            '  <ns0:tag />\n'
1186            '  <ns0:section>\n'
1187            '    <ns0:tag>subtext</ns0:tag>\n'
1188            '  </ns0:section>\n'
1189            '</ns0:body>')
1190
1191    def test_qname(self):
1192        # Test QName handling.
1193
1194        # 1) decorated tags
1195
1196        elem = ET.Element("{uri}tag")
1197        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
1198        elem = ET.Element(ET.QName("{uri}tag"))
1199        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
1200        elem = ET.Element(ET.QName("uri", "tag"))
1201        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
1202        elem = ET.Element(ET.QName("uri", "tag"))
1203        subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
1204        subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
1205        self.serialize_check(elem,
1206            '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
1207
1208        # 2) decorated attributes
1209
1210        elem.clear()
1211        elem.attrib["{uri}key"] = "value"
1212        self.serialize_check(elem,
1213            '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
1214
1215        elem.clear()
1216        elem.attrib[ET.QName("{uri}key")] = "value"
1217        self.serialize_check(elem,
1218            '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
1219
1220        # 3) decorated values are not converted by default, but the
1221        # QName wrapper can be used for values
1222
1223        elem.clear()
1224        elem.attrib["{uri}key"] = "{uri}value"
1225        self.serialize_check(elem,
1226            '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
1227
1228        elem.clear()
1229        elem.attrib["{uri}key"] = ET.QName("{uri}value")
1230        self.serialize_check(elem,
1231            '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
1232
1233        elem.clear()
1234        subelem = ET.Element("tag")
1235        subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
1236        elem.append(subelem)
1237        elem.append(subelem)
1238        self.serialize_check(elem,
1239            '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
1240            '<tag ns1:key="ns2:value" />'
1241            '<tag ns1:key="ns2:value" />'
1242            '</ns0:tag>') # 3.3
1243
1244        # 4) Direct QName tests
1245
1246        self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
1247        self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
1248        q1 = ET.QName('ns', 'tag')
1249        q2 = ET.QName('ns', 'tag')
1250        self.assertEqual(q1, q2)
1251        q2 = ET.QName('ns', 'other-tag')
1252        self.assertNotEqual(q1, q2)
1253        self.assertNotEqual(q1, 'ns:tag')
1254        self.assertEqual(q1, '{ns}tag')
1255
1256    def test_doctype_public(self):
1257        # Test PUBLIC doctype.
1258
1259        elem = ET.XML('<!DOCTYPE html PUBLIC'
1260                ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
1261                ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1262                '<html>text</html>')
1263
1264    def test_xpath_tokenizer(self):
1265        # Test the XPath tokenizer.
1266        from xml.etree import ElementPath
1267        def check(p, expected, namespaces=None):
1268            self.assertEqual([op or tag
1269                              for op, tag in ElementPath.xpath_tokenizer(p, namespaces)],
1270                             expected)
1271
1272        # tests from the xml specification
1273        check("*", ['*'])
1274        check("text()", ['text', '()'])
1275        check("@name", ['@', 'name'])
1276        check("@*", ['@', '*'])
1277        check("para[1]", ['para', '[', '1', ']'])
1278        check("para[last()]", ['para', '[', 'last', '()', ']'])
1279        check("*/para", ['*', '/', 'para'])
1280        check("/doc/chapter[5]/section[2]",
1281              ['/', 'doc', '/', 'chapter', '[', '5', ']',
1282               '/', 'section', '[', '2', ']'])
1283        check("chapter//para", ['chapter', '//', 'para'])
1284        check("//para", ['//', 'para'])
1285        check("//olist/item", ['//', 'olist', '/', 'item'])
1286        check(".", ['.'])
1287        check(".//para", ['.', '//', 'para'])
1288        check("..", ['..'])
1289        check("../@lang", ['..', '/', '@', 'lang'])
1290        check("chapter[title]", ['chapter', '[', 'title', ']'])
1291        check("employee[@secretary and @assistant]", ['employee',
1292              '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
1293
1294        # additional tests
1295        check("@{ns}attr", ['@', '{ns}attr'])
1296        check("{http://spam}egg", ['{http://spam}egg'])
1297        check("./spam.egg", ['.', '/', 'spam.egg'])
1298        check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
1299
1300        # wildcard tags
1301        check("{ns}*", ['{ns}*'])
1302        check("{}*", ['{}*'])
1303        check("{*}tag", ['{*}tag'])
1304        check("{*}*", ['{*}*'])
1305        check(".//{*}tag", ['.', '//', '{*}tag'])
1306
1307        # namespace prefix resolution
1308        check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'],
1309              {'xsd': 'http://www.w3.org/2001/XMLSchema'})
1310        check("type", ['{http://www.w3.org/2001/XMLSchema}type'],
1311              {'': 'http://www.w3.org/2001/XMLSchema'})
1312        check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'],
1313              {'xsd': 'http://www.w3.org/2001/XMLSchema'})
1314        check("@type", ['@', 'type'],
1315              {'': 'http://www.w3.org/2001/XMLSchema'})
1316        check("@{*}type", ['@', '{*}type'],
1317              {'': 'http://www.w3.org/2001/XMLSchema'})
1318        check("@{ns}attr", ['@', '{ns}attr'],
1319              {'': 'http://www.w3.org/2001/XMLSchema',
1320               'ns': 'http://www.w3.org/2001/XMLSchema'})
1321
1322    def test_processinginstruction(self):
1323        # Test ProcessingInstruction directly
1324
1325        self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
1326                b'<?test instruction?>')
1327        self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
1328                b'<?test instruction?>')
1329
1330        # Issue #2746
1331
1332        self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
1333                b'<?test <testing&>?>')
1334        self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
1335                b"<?xml version='1.0' encoding='latin-1'?>\n"
1336                b"<?test <testing&>\xe3?>")
1337
1338    def test_html_empty_elems_serialization(self):
1339        # issue 15970
1340        # from http://www.w3.org/TR/html401/index/elements.html
1341        for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'EMBED', 'FRAME',
1342                        'HR', 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM',
1343                        'SOURCE', 'TRACK', 'WBR']:
1344            for elem in [element, element.lower()]:
1345                expected = '<%s>' % elem
1346                serialized = serialize(ET.XML('<%s />' % elem), method='html')
1347                self.assertEqual(serialized, expected)
1348                serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
1349                                       method='html')
1350                self.assertEqual(serialized, expected)
1351
1352    def test_dump_attribute_order(self):
1353        # See BPO 34160
1354        e = ET.Element('cirriculum', status='public', company='example')
1355        with support.captured_stdout() as stdout:
1356            ET.dump(e)
1357        self.assertEqual(stdout.getvalue(),
1358                         '<cirriculum status="public" company="example" />\n')
1359
1360    def test_tree_write_attribute_order(self):
1361        # See BPO 34160
1362        root = ET.Element('cirriculum', status='public', company='example')
1363        self.assertEqual(serialize(root),
1364                         '<cirriculum status="public" company="example" />')
1365        self.assertEqual(serialize(root, method='html'),
1366                '<cirriculum status="public" company="example"></cirriculum>')
1367
1368    def test_attlist_default(self):
1369        # Test default attribute values; See BPO 42151.
1370        root = ET.fromstring(ATTLIST_XML)
1371        self.assertEqual(root[0].attrib,
1372                         {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'})
1373
1374
1375class XMLPullParserTest(unittest.TestCase):
1376
1377    def _feed(self, parser, data, chunk_size=None):
1378        if chunk_size is None:
1379            parser.feed(data)
1380        else:
1381            for i in range(0, len(data), chunk_size):
1382                parser.feed(data[i:i+chunk_size])
1383
1384    def assert_events(self, parser, expected, max_events=None):
1385        self.assertEqual(
1386            [(event, (elem.tag, elem.text))
1387             for event, elem in islice(parser.read_events(), max_events)],
1388            expected)
1389
1390    def assert_event_tuples(self, parser, expected, max_events=None):
1391        self.assertEqual(
1392            list(islice(parser.read_events(), max_events)),
1393            expected)
1394
1395    def assert_event_tags(self, parser, expected, max_events=None):
1396        events = islice(parser.read_events(), max_events)
1397        self.assertEqual([(action, elem.tag) for action, elem in events],
1398                         expected)
1399
1400    def test_simple_xml(self):
1401        for chunk_size in (None, 1, 5):
1402            with self.subTest(chunk_size=chunk_size):
1403                parser = ET.XMLPullParser()
1404                self.assert_event_tags(parser, [])
1405                self._feed(parser, "<!-- comment -->\n", chunk_size)
1406                self.assert_event_tags(parser, [])
1407                self._feed(parser,
1408                           "<root>\n  <element key='value'>text</element",
1409                           chunk_size)
1410                self.assert_event_tags(parser, [])
1411                self._feed(parser, ">\n", chunk_size)
1412                self.assert_event_tags(parser, [('end', 'element')])
1413                self._feed(parser, "<element>text</element>tail\n", chunk_size)
1414                self._feed(parser, "<empty-element/>\n", chunk_size)
1415                self.assert_event_tags(parser, [
1416                    ('end', 'element'),
1417                    ('end', 'empty-element'),
1418                    ])
1419                self._feed(parser, "</root>\n", chunk_size)
1420                self.assert_event_tags(parser, [('end', 'root')])
1421                self.assertIsNone(parser.close())
1422
1423    def test_feed_while_iterating(self):
1424        parser = ET.XMLPullParser()
1425        it = parser.read_events()
1426        self._feed(parser, "<root>\n  <element key='value'>text</element>\n")
1427        action, elem = next(it)
1428        self.assertEqual((action, elem.tag), ('end', 'element'))
1429        self._feed(parser, "</root>\n")
1430        action, elem = next(it)
1431        self.assertEqual((action, elem.tag), ('end', 'root'))
1432        with self.assertRaises(StopIteration):
1433            next(it)
1434
1435    def test_simple_xml_with_ns(self):
1436        parser = ET.XMLPullParser()
1437        self.assert_event_tags(parser, [])
1438        self._feed(parser, "<!-- comment -->\n")
1439        self.assert_event_tags(parser, [])
1440        self._feed(parser, "<root xmlns='namespace'>\n")
1441        self.assert_event_tags(parser, [])
1442        self._feed(parser, "<element key='value'>text</element")
1443        self.assert_event_tags(parser, [])
1444        self._feed(parser, ">\n")
1445        self.assert_event_tags(parser, [('end', '{namespace}element')])
1446        self._feed(parser, "<element>text</element>tail\n")
1447        self._feed(parser, "<empty-element/>\n")
1448        self.assert_event_tags(parser, [
1449            ('end', '{namespace}element'),
1450            ('end', '{namespace}empty-element'),
1451            ])
1452        self._feed(parser, "</root>\n")
1453        self.assert_event_tags(parser, [('end', '{namespace}root')])
1454        self.assertIsNone(parser.close())
1455
1456    def test_ns_events(self):
1457        parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
1458        self._feed(parser, "<!-- comment -->\n")
1459        self._feed(parser, "<root xmlns='namespace'>\n")
1460        self.assertEqual(
1461            list(parser.read_events()),
1462            [('start-ns', ('', 'namespace'))])
1463        self._feed(parser, "<element key='value'>text</element")
1464        self._feed(parser, ">\n")
1465        self._feed(parser, "<element>text</element>tail\n")
1466        self._feed(parser, "<empty-element/>\n")
1467        self._feed(parser, "</root>\n")
1468        self.assertEqual(list(parser.read_events()), [('end-ns', None)])
1469        self.assertIsNone(parser.close())
1470
1471    def test_ns_events_start(self):
1472        parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
1473        self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1474        self.assert_event_tuples(parser, [
1475            ('start-ns', ('', 'abc')),
1476            ('start-ns', ('p', 'xyz')),
1477        ], max_events=2)
1478        self.assert_event_tags(parser, [
1479            ('start', '{abc}tag'),
1480        ], max_events=1)
1481
1482        self._feed(parser, "<child />\n")
1483        self.assert_event_tags(parser, [
1484            ('start', '{abc}child'),
1485            ('end', '{abc}child'),
1486        ])
1487
1488        self._feed(parser, "</tag>\n")
1489        parser.close()
1490        self.assert_event_tags(parser, [
1491            ('end', '{abc}tag'),
1492        ])
1493
1494    def test_ns_events_start_end(self):
1495        parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
1496        self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1497        self.assert_event_tuples(parser, [
1498            ('start-ns', ('', 'abc')),
1499            ('start-ns', ('p', 'xyz')),
1500        ], max_events=2)
1501        self.assert_event_tags(parser, [
1502            ('start', '{abc}tag'),
1503        ], max_events=1)
1504
1505        self._feed(parser, "<child />\n")
1506        self.assert_event_tags(parser, [
1507            ('start', '{abc}child'),
1508            ('end', '{abc}child'),
1509        ])
1510
1511        self._feed(parser, "</tag>\n")
1512        parser.close()
1513        self.assert_event_tags(parser, [
1514            ('end', '{abc}tag'),
1515        ], max_events=1)
1516        self.assert_event_tuples(parser, [
1517            ('end-ns', None),
1518            ('end-ns', None),
1519        ])
1520
1521    def test_events(self):
1522        parser = ET.XMLPullParser(events=())
1523        self._feed(parser, "<root/>\n")
1524        self.assert_event_tags(parser, [])
1525
1526        parser = ET.XMLPullParser(events=('start', 'end'))
1527        self._feed(parser, "<!-- text here -->\n")
1528        self.assert_events(parser, [])
1529
1530        parser = ET.XMLPullParser(events=('start', 'end'))
1531        self._feed(parser, "<root>\n")
1532        self.assert_event_tags(parser, [('start', 'root')])
1533        self._feed(parser, "<element key='value'>text</element")
1534        self.assert_event_tags(parser, [('start', 'element')])
1535        self._feed(parser, ">\n")
1536        self.assert_event_tags(parser, [('end', 'element')])
1537        self._feed(parser,
1538                   "<element xmlns='foo'>text<empty-element/></element>tail\n")
1539        self.assert_event_tags(parser, [
1540            ('start', '{foo}element'),
1541            ('start', '{foo}empty-element'),
1542            ('end', '{foo}empty-element'),
1543            ('end', '{foo}element'),
1544            ])
1545        self._feed(parser, "</root>")
1546        self.assertIsNone(parser.close())
1547        self.assert_event_tags(parser, [('end', 'root')])
1548
1549        parser = ET.XMLPullParser(events=('start',))
1550        self._feed(parser, "<!-- comment -->\n")
1551        self.assert_event_tags(parser, [])
1552        self._feed(parser, "<root>\n")
1553        self.assert_event_tags(parser, [('start', 'root')])
1554        self._feed(parser, "<element key='value'>text</element")
1555        self.assert_event_tags(parser, [('start', 'element')])
1556        self._feed(parser, ">\n")
1557        self.assert_event_tags(parser, [])
1558        self._feed(parser,
1559                   "<element xmlns='foo'>text<empty-element/></element>tail\n")
1560        self.assert_event_tags(parser, [
1561            ('start', '{foo}element'),
1562            ('start', '{foo}empty-element'),
1563            ])
1564        self._feed(parser, "</root>")
1565        self.assertIsNone(parser.close())
1566
1567    def test_events_comment(self):
1568        parser = ET.XMLPullParser(events=('start', 'comment', 'end'))
1569        self._feed(parser, "<!-- text here -->\n")
1570        self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
1571        self._feed(parser, "<!-- more text here -->\n")
1572        self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))])
1573        self._feed(parser, "<root-tag>text")
1574        self.assert_event_tags(parser, [('start', 'root-tag')])
1575        self._feed(parser, "<!-- inner comment-->\n")
1576        self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))])
1577        self._feed(parser, "</root-tag>\n")
1578        self.assert_event_tags(parser, [('end', 'root-tag')])
1579        self._feed(parser, "<!-- outer comment -->\n")
1580        self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))])
1581
1582        parser = ET.XMLPullParser(events=('comment',))
1583        self._feed(parser, "<!-- text here -->\n")
1584        self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
1585
1586    def test_events_pi(self):
1587        parser = ET.XMLPullParser(events=('start', 'pi', 'end'))
1588        self._feed(parser, "<?pitarget?>\n")
1589        self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))])
1590        parser = ET.XMLPullParser(events=('pi',))
1591        self._feed(parser, "<?pitarget some text ?>\n")
1592        self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))])
1593
1594    def test_events_sequence(self):
1595        # Test that events can be some sequence that's not just a tuple or list
1596        eventset = {'end', 'start'}
1597        parser = ET.XMLPullParser(events=eventset)
1598        self._feed(parser, "<foo>bar</foo>")
1599        self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1600
1601        class DummyIter:
1602            def __init__(self):
1603                self.events = iter(['start', 'end', 'start-ns'])
1604            def __iter__(self):
1605                return self
1606            def __next__(self):
1607                return next(self.events)
1608
1609        parser = ET.XMLPullParser(events=DummyIter())
1610        self._feed(parser, "<foo>bar</foo>")
1611        self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1612
1613    def test_unknown_event(self):
1614        with self.assertRaises(ValueError):
1615            ET.XMLPullParser(events=('start', 'end', 'bogus'))
1616
1617
1618#
1619# xinclude tests (samples from appendix C of the xinclude specification)
1620
1621XINCLUDE = {}
1622
1623XINCLUDE["C1.xml"] = """\
1624<?xml version='1.0'?>
1625<document xmlns:xi="http://www.w3.org/2001/XInclude">
1626  <p>120 Mz is adequate for an average home user.</p>
1627  <xi:include href="disclaimer.xml"/>
1628</document>
1629"""
1630
1631XINCLUDE["disclaimer.xml"] = """\
1632<?xml version='1.0'?>
1633<disclaimer>
1634  <p>The opinions represented herein represent those of the individual
1635  and should not be interpreted as official policy endorsed by this
1636  organization.</p>
1637</disclaimer>
1638"""
1639
1640XINCLUDE["C2.xml"] = """\
1641<?xml version='1.0'?>
1642<document xmlns:xi="http://www.w3.org/2001/XInclude">
1643  <p>This document has been accessed
1644  <xi:include href="count.txt" parse="text"/> times.</p>
1645</document>
1646"""
1647
1648XINCLUDE["count.txt"] = "324387"
1649
1650XINCLUDE["C2b.xml"] = """\
1651<?xml version='1.0'?>
1652<document xmlns:xi="http://www.w3.org/2001/XInclude">
1653  <p>This document has been <em>accessed</em>
1654  <xi:include href="count.txt" parse="text"/> times.</p>
1655</document>
1656"""
1657
1658XINCLUDE["C3.xml"] = """\
1659<?xml version='1.0'?>
1660<document xmlns:xi="http://www.w3.org/2001/XInclude">
1661  <p>The following is the source of the "data.xml" resource:</p>
1662  <example><xi:include href="data.xml" parse="text"/></example>
1663</document>
1664"""
1665
1666XINCLUDE["data.xml"] = """\
1667<?xml version='1.0'?>
1668<data>
1669  <item><![CDATA[Brooks & Shields]]></item>
1670</data>
1671"""
1672
1673XINCLUDE["C5.xml"] = """\
1674<?xml version='1.0'?>
1675<div xmlns:xi="http://www.w3.org/2001/XInclude">
1676  <xi:include href="example.txt" parse="text">
1677    <xi:fallback>
1678      <xi:include href="fallback-example.txt" parse="text">
1679        <xi:fallback><a href="mailto:[email protected]">Report error</a></xi:fallback>
1680      </xi:include>
1681    </xi:fallback>
1682  </xi:include>
1683</div>
1684"""
1685
1686XINCLUDE["default.xml"] = """\
1687<?xml version='1.0'?>
1688<document xmlns:xi="http://www.w3.org/2001/XInclude">
1689  <p>Example.</p>
1690  <xi:include href="{}"/>
1691</document>
1692""".format(html.escape(SIMPLE_XMLFILE, True))
1693
1694XINCLUDE["include_c1_repeated.xml"] = """\
1695<?xml version='1.0'?>
1696<document xmlns:xi="http://www.w3.org/2001/XInclude">
1697  <p>The following is the source code of Recursive1.xml:</p>
1698  <xi:include href="C1.xml"/>
1699  <xi:include href="C1.xml"/>
1700  <xi:include href="C1.xml"/>
1701  <xi:include href="C1.xml"/>
1702</document>
1703"""
1704
1705#
1706# badly formatted xi:include tags
1707
1708XINCLUDE_BAD = {}
1709
1710XINCLUDE_BAD["B1.xml"] = """\
1711<?xml version='1.0'?>
1712<document xmlns:xi="http://www.w3.org/2001/XInclude">
1713  <p>120 Mz is adequate for an average home user.</p>
1714  <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1715</document>
1716"""
1717
1718XINCLUDE_BAD["B2.xml"] = """\
1719<?xml version='1.0'?>
1720<div xmlns:xi="http://www.w3.org/2001/XInclude">
1721    <xi:fallback></xi:fallback>
1722</div>
1723"""
1724
1725XINCLUDE["Recursive1.xml"] = """\
1726<?xml version='1.0'?>
1727<document xmlns:xi="http://www.w3.org/2001/XInclude">
1728  <p>The following is the source code of Recursive2.xml:</p>
1729  <xi:include href="Recursive2.xml"/>
1730</document>
1731"""
1732
1733XINCLUDE["Recursive2.xml"] = """\
1734<?xml version='1.0'?>
1735<document xmlns:xi="http://www.w3.org/2001/XInclude">
1736  <p>The following is the source code of Recursive3.xml:</p>
1737  <xi:include href="Recursive3.xml"/>
1738</document>
1739"""
1740
1741XINCLUDE["Recursive3.xml"] = """\
1742<?xml version='1.0'?>
1743<document xmlns:xi="http://www.w3.org/2001/XInclude">
1744  <p>The following is the source code of Recursive1.xml:</p>
1745  <xi:include href="Recursive1.xml"/>
1746</document>
1747"""
1748
1749
1750class XIncludeTest(unittest.TestCase):
1751
1752    def xinclude_loader(self, href, parse="xml", encoding=None):
1753        try:
1754            data = XINCLUDE[href]
1755        except KeyError:
1756            raise OSError("resource not found")
1757        if parse == "xml":
1758            data = ET.XML(data)
1759        return data
1760
1761    def none_loader(self, href, parser, encoding=None):
1762        return None
1763
1764    def _my_loader(self, href, parse):
1765        # Used to avoid a test-dependency problem where the default loader
1766        # of ElementInclude uses the pyET parser for cET tests.
1767        if parse == 'xml':
1768            with open(href, 'rb') as f:
1769                return ET.parse(f).getroot()
1770        else:
1771            return None
1772
1773    def test_xinclude_default(self):
1774        from xml.etree import ElementInclude
1775        doc = self.xinclude_loader('default.xml')
1776        ElementInclude.include(doc, self._my_loader)
1777        self.assertEqual(serialize(doc),
1778            '<document>\n'
1779            '  <p>Example.</p>\n'
1780            '  <root>\n'
1781            '   <element key="value">text</element>\n'
1782            '   <element>text</element>tail\n'
1783            '   <empty-element />\n'
1784            '</root>\n'
1785            '</document>')
1786
1787    def test_xinclude(self):
1788        from xml.etree import ElementInclude
1789
1790        # Basic inclusion example (XInclude C.1)
1791        document = self.xinclude_loader("C1.xml")
1792        ElementInclude.include(document, self.xinclude_loader)
1793        self.assertEqual(serialize(document),
1794            '<document>\n'
1795            '  <p>120 Mz is adequate for an average home user.</p>\n'
1796            '  <disclaimer>\n'
1797            '  <p>The opinions represented herein represent those of the individual\n'
1798            '  and should not be interpreted as official policy endorsed by this\n'
1799            '  organization.</p>\n'
1800            '</disclaimer>\n'
1801            '</document>') # C1
1802
1803        # Textual inclusion example (XInclude C.2)
1804        document = self.xinclude_loader("C2.xml")
1805        ElementInclude.include(document, self.xinclude_loader)
1806        self.assertEqual(serialize(document),
1807            '<document>\n'
1808            '  <p>This document has been accessed\n'
1809            '  324387 times.</p>\n'
1810            '</document>') # C2
1811
1812        # Textual inclusion after sibling element (based on modified XInclude C.2)
1813        document = self.xinclude_loader("C2b.xml")
1814        ElementInclude.include(document, self.xinclude_loader)
1815        self.assertEqual(serialize(document),
1816            '<document>\n'
1817            '  <p>This document has been <em>accessed</em>\n'
1818            '  324387 times.</p>\n'
1819            '</document>') # C2b
1820
1821        # Textual inclusion of XML example (XInclude C.3)
1822        document = self.xinclude_loader("C3.xml")
1823        ElementInclude.include(document, self.xinclude_loader)
1824        self.assertEqual(serialize(document),
1825            '<document>\n'
1826            '  <p>The following is the source of the "data.xml" resource:</p>\n'
1827            "  <example>&lt;?xml version='1.0'?&gt;\n"
1828            '&lt;data&gt;\n'
1829            '  &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;\n'
1830            '&lt;/data&gt;\n'
1831            '</example>\n'
1832            '</document>') # C3
1833
1834        # Fallback example (XInclude C.5)
1835        # Note! Fallback support is not yet implemented
1836        document = self.xinclude_loader("C5.xml")
1837        with self.assertRaises(OSError) as cm:
1838            ElementInclude.include(document, self.xinclude_loader)
1839        self.assertEqual(str(cm.exception), 'resource not found')
1840        self.assertEqual(serialize(document),
1841            '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
1842            '  <ns0:include href="example.txt" parse="text">\n'
1843            '    <ns0:fallback>\n'
1844            '      <ns0:include href="fallback-example.txt" parse="text">\n'
1845            '        <ns0:fallback><a href="mailto:[email protected]">Report error</a></ns0:fallback>\n'
1846            '      </ns0:include>\n'
1847            '    </ns0:fallback>\n'
1848            '  </ns0:include>\n'
1849            '</div>') # C5
1850
1851    def test_xinclude_repeated(self):
1852        from xml.etree import ElementInclude
1853
1854        document = self.xinclude_loader("include_c1_repeated.xml")
1855        ElementInclude.include(document, self.xinclude_loader)
1856        self.assertEqual(1+4*2, len(document.findall(".//p")))
1857
1858    def test_xinclude_failures(self):
1859        from xml.etree import ElementInclude
1860
1861        # Test failure to locate included XML file.
1862        document = ET.XML(XINCLUDE["C1.xml"])
1863        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1864            ElementInclude.include(document, loader=self.none_loader)
1865        self.assertEqual(str(cm.exception),
1866                "cannot load 'disclaimer.xml' as 'xml'")
1867
1868        # Test failure to locate included text file.
1869        document = ET.XML(XINCLUDE["C2.xml"])
1870        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1871            ElementInclude.include(document, loader=self.none_loader)
1872        self.assertEqual(str(cm.exception),
1873                "cannot load 'count.txt' as 'text'")
1874
1875        # Test bad parse type.
1876        document = ET.XML(XINCLUDE_BAD["B1.xml"])
1877        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1878            ElementInclude.include(document, loader=self.none_loader)
1879        self.assertEqual(str(cm.exception),
1880                "unknown parse type in xi:include tag ('BAD_TYPE')")
1881
1882        # Test xi:fallback outside xi:include.
1883        document = ET.XML(XINCLUDE_BAD["B2.xml"])
1884        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1885            ElementInclude.include(document, loader=self.none_loader)
1886        self.assertEqual(str(cm.exception),
1887                "xi:fallback tag must be child of xi:include "
1888                "('{http://www.w3.org/2001/XInclude}fallback')")
1889
1890        # Test infinitely recursive includes.
1891        document = self.xinclude_loader("Recursive1.xml")
1892        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1893            ElementInclude.include(document, self.xinclude_loader)
1894        self.assertEqual(str(cm.exception),
1895                "recursive include of Recursive2.xml")
1896
1897        # Test 'max_depth' limitation.
1898        document = self.xinclude_loader("Recursive1.xml")
1899        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1900            ElementInclude.include(document, self.xinclude_loader, max_depth=None)
1901        self.assertEqual(str(cm.exception),
1902                "recursive include of Recursive2.xml")
1903
1904        document = self.xinclude_loader("Recursive1.xml")
1905        with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
1906            ElementInclude.include(document, self.xinclude_loader, max_depth=0)
1907        self.assertEqual(str(cm.exception),
1908                "maximum xinclude depth reached when including file Recursive2.xml")
1909
1910        document = self.xinclude_loader("Recursive1.xml")
1911        with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
1912            ElementInclude.include(document, self.xinclude_loader, max_depth=1)
1913        self.assertEqual(str(cm.exception),
1914                "maximum xinclude depth reached when including file Recursive3.xml")
1915
1916        document = self.xinclude_loader("Recursive1.xml")
1917        with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
1918            ElementInclude.include(document, self.xinclude_loader, max_depth=2)
1919        self.assertEqual(str(cm.exception),
1920                "maximum xinclude depth reached when including file Recursive1.xml")
1921
1922        document = self.xinclude_loader("Recursive1.xml")
1923        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1924            ElementInclude.include(document, self.xinclude_loader, max_depth=3)
1925        self.assertEqual(str(cm.exception),
1926                "recursive include of Recursive2.xml")
1927
1928
1929# --------------------------------------------------------------------
1930# reported bugs
1931
1932class BugsTest(unittest.TestCase):
1933
1934    def test_bug_xmltoolkit21(self):
1935        # marshaller gives obscure errors for non-string values
1936
1937        def check(elem):
1938            with self.assertRaises(TypeError) as cm:
1939                serialize(elem)
1940            self.assertEqual(str(cm.exception),
1941                    'cannot serialize 123 (type int)')
1942
1943        elem = ET.Element(123)
1944        check(elem) # tag
1945
1946        elem = ET.Element("elem")
1947        elem.text = 123
1948        check(elem) # text
1949
1950        elem = ET.Element("elem")
1951        elem.tail = 123
1952        check(elem) # tail
1953
1954        elem = ET.Element("elem")
1955        elem.set(123, "123")
1956        check(elem) # attribute key
1957
1958        elem = ET.Element("elem")
1959        elem.set("123", 123)
1960        check(elem) # attribute value
1961
1962    def test_bug_xmltoolkit25(self):
1963        # typo in ElementTree.findtext
1964
1965        elem = ET.XML(SAMPLE_XML)
1966        tree = ET.ElementTree(elem)
1967        self.assertEqual(tree.findtext("tag"), 'text')
1968        self.assertEqual(tree.findtext("section/tag"), 'subtext')
1969
1970    def test_bug_xmltoolkit28(self):
1971        # .//tag causes exceptions
1972
1973        tree = ET.XML("<doc><table><tbody/></table></doc>")
1974        self.assertEqual(summarize_list(tree.findall(".//thead")), [])
1975        self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
1976
1977    def test_bug_xmltoolkitX1(self):
1978        # dump() doesn't flush the output buffer
1979
1980        tree = ET.XML("<doc><table><tbody/></table></doc>")
1981        with support.captured_stdout() as stdout:
1982            ET.dump(tree)
1983            self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
1984
1985    def test_bug_xmltoolkit39(self):
1986        # non-ascii element and attribute names doesn't work
1987
1988        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1989        self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
1990
1991        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1992                      b"<tag \xe4ttr='v&#228;lue' />")
1993        self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'})
1994        self.assertEqual(ET.tostring(tree, "utf-8"),
1995                b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
1996
1997        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1998                      b'<t\xe4g>text</t\xe4g>')
1999        self.assertEqual(ET.tostring(tree, "utf-8"),
2000                b'<t\xc3\xa4g>text</t\xc3\xa4g>')
2001
2002        tree = ET.Element("t\u00e4g")
2003        self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
2004
2005        tree = ET.Element("tag")
2006        tree.set("\u00e4ttr", "v\u00e4lue")
2007        self.assertEqual(ET.tostring(tree, "utf-8"),
2008                b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
2009
2010    def test_bug_xmltoolkit54(self):
2011        # problems handling internally defined entities
2012
2013        e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]>"
2014                   '<doc>&ldots;</doc>')
2015        self.assertEqual(serialize(e, encoding="us-ascii"),
2016                b'<doc>&#33328;</doc>')
2017        self.assertEqual(serialize(e), '<doc>\u8230</doc>')
2018
2019    def test_bug_xmltoolkit55(self):
2020        # make sure we're reporting the first error, not the last
2021
2022        with self.assertRaises(ET.ParseError) as cm:
2023            ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>"
2024                   b'<doc>&ldots;&ndots;&rdots;</doc>')
2025        self.assertEqual(str(cm.exception),
2026                'undefined entity &ldots;: line 1, column 36')
2027
2028    def test_bug_xmltoolkit60(self):
2029        # Handle crash in stream source.
2030
2031        class ExceptionFile:
2032            def read(self, x):
2033                raise OSError
2034
2035        self.assertRaises(OSError, ET.parse, ExceptionFile())
2036
2037    def test_bug_xmltoolkit62(self):
2038        # Don't crash when using custom entities.
2039
2040        ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'}
2041        parser = ET.XMLParser()
2042        parser.entity.update(ENTITIES)
2043        parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
2044<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
2045<patent-application-publication>
2046<subdoc-abstract>
2047<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
2048</subdoc-abstract>
2049</patent-application-publication>""")
2050        t = parser.close()
2051        self.assertEqual(t.find('.//paragraph').text,
2052            'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
2053
2054    @unittest.skipIf(sys.gettrace(), "Skips under coverage.")
2055    def test_bug_xmltoolkit63(self):
2056        # Check reference leak.
2057        def xmltoolkit63():
2058            tree = ET.TreeBuilder()
2059            tree.start("tag", {})
2060            tree.data("text")
2061            tree.end("tag")
2062
2063        xmltoolkit63()
2064        count = sys.getrefcount(None)
2065        for i in range(1000):
2066            xmltoolkit63()
2067        self.assertEqual(sys.getrefcount(None), count)
2068
2069    def test_bug_200708_newline(self):
2070        # Preserve newlines in attributes.
2071
2072        e = ET.Element('SomeTag', text="def _f():\n  return 3\n")
2073        self.assertEqual(ET.tostring(e),
2074                b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
2075        self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
2076                'def _f():\n  return 3\n')
2077        self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
2078                b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
2079
2080    def test_bug_200708_close(self):
2081        # Test default builder.
2082        parser = ET.XMLParser() # default
2083        parser.feed("<element>some text</element>")
2084        self.assertEqual(parser.close().tag, 'element')
2085
2086        # Test custom builder.
2087        class EchoTarget:
2088            def close(self):
2089                return ET.Element("element") # simulate root
2090        parser = ET.XMLParser(target=EchoTarget())
2091        parser.feed("<element>some text</element>")
2092        self.assertEqual(parser.close().tag, 'element')
2093
2094    def test_bug_200709_default_namespace(self):
2095        e = ET.Element("{default}elem")
2096        s = ET.SubElement(e, "{default}elem")
2097        self.assertEqual(serialize(e, default_namespace="default"), # 1
2098                '<elem xmlns="default"><elem /></elem>')
2099
2100        e = ET.Element("{default}elem")
2101        s = ET.SubElement(e, "{default}elem")
2102        s = ET.SubElement(e, "{not-default}elem")
2103        self.assertEqual(serialize(e, default_namespace="default"), # 2
2104            '<elem xmlns="default" xmlns:ns1="not-default">'
2105            '<elem />'
2106            '<ns1:elem />'
2107            '</elem>')
2108
2109        e = ET.Element("{default}elem")
2110        s = ET.SubElement(e, "{default}elem")
2111        s = ET.SubElement(e, "elem") # unprefixed name
2112        with self.assertRaises(ValueError) as cm:
2113            serialize(e, default_namespace="default") # 3
2114        self.assertEqual(str(cm.exception),
2115                'cannot use non-qualified names with default_namespace option')
2116
2117    def test_bug_200709_register_namespace(self):
2118        e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
2119        self.assertEqual(ET.tostring(e),
2120            b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
2121        ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
2122        e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
2123        self.assertEqual(ET.tostring(e),
2124            b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
2125
2126        # And the Dublin Core namespace is in the default list:
2127
2128        e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
2129        self.assertEqual(ET.tostring(e),
2130            b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
2131
2132    def test_bug_200709_element_comment(self):
2133        # Not sure if this can be fixed, really (since the serializer needs
2134        # ET.Comment, not cET.comment).
2135
2136        a = ET.Element('a')
2137        a.append(ET.Comment('foo'))
2138        self.assertEqual(a[0].tag, ET.Comment)
2139
2140        a = ET.Element('a')
2141        a.append(ET.PI('foo'))
2142        self.assertEqual(a[0].tag, ET.PI)
2143
2144    def test_bug_200709_element_insert(self):
2145        a = ET.Element('a')
2146        b = ET.SubElement(a, 'b')
2147        c = ET.SubElement(a, 'c')
2148        d = ET.Element('d')
2149        a.insert(0, d)
2150        self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
2151        a.insert(-1, d)
2152        self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
2153
2154    def test_bug_200709_iter_comment(self):
2155        a = ET.Element('a')
2156        b = ET.SubElement(a, 'b')
2157        comment_b = ET.Comment("TEST-b")
2158        b.append(comment_b)
2159        self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
2160
2161    # --------------------------------------------------------------------
2162    # reported on bugs.python.org
2163
2164    def test_bug_1534630(self):
2165        bob = ET.TreeBuilder()
2166        e = bob.data("data")
2167        e = bob.start("tag", {})
2168        e = bob.end("tag")
2169        e = bob.close()
2170        self.assertEqual(serialize(e), '<tag />')
2171
2172    def test_issue6233(self):
2173        e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
2174                   b'<body>t\xc3\xa3g</body>')
2175        self.assertEqual(ET.tostring(e, 'ascii'),
2176                b"<?xml version='1.0' encoding='ascii'?>\n"
2177                b'<body>t&#227;g</body>')
2178        e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
2179                   b'<body>t\xe3g</body>')
2180        self.assertEqual(ET.tostring(e, 'ascii'),
2181                b"<?xml version='1.0' encoding='ascii'?>\n"
2182                b'<body>t&#227;g</body>')
2183
2184    def test_issue6565(self):
2185        elem = ET.XML("<body><tag/></body>")
2186        self.assertEqual(summarize_list(elem), ['tag'])
2187        newelem = ET.XML(SAMPLE_XML)
2188        elem[:] = newelem[:]
2189        self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
2190
2191    def test_issue10777(self):
2192        # Registering a namespace twice caused a "dictionary changed size during
2193        # iteration" bug.
2194
2195        ET.register_namespace('test10777', 'http://myuri/')
2196        ET.register_namespace('test10777', 'http://myuri/')
2197
2198    def test_lost_text(self):
2199        # Issue #25902: Borrowed text can disappear
2200        class Text:
2201            def __bool__(self):
2202                e.text = 'changed'
2203                return True
2204
2205        e = ET.Element('tag')
2206        e.text = Text()
2207        i = e.itertext()
2208        t = next(i)
2209        self.assertIsInstance(t, Text)
2210        self.assertIsInstance(e.text, str)
2211        self.assertEqual(e.text, 'changed')
2212
2213    def test_lost_tail(self):
2214        # Issue #25902: Borrowed tail can disappear
2215        class Text:
2216            def __bool__(self):
2217                e[0].tail = 'changed'
2218                return True
2219
2220        e = ET.Element('root')
2221        e.append(ET.Element('tag'))
2222        e[0].tail = Text()
2223        i = e.itertext()
2224        t = next(i)
2225        self.assertIsInstance(t, Text)
2226        self.assertIsInstance(e[0].tail, str)
2227        self.assertEqual(e[0].tail, 'changed')
2228
2229    def test_lost_elem(self):
2230        # Issue #25902: Borrowed element can disappear
2231        class Tag:
2232            def __eq__(self, other):
2233                e[0] = ET.Element('changed')
2234                next(i)
2235                return True
2236
2237        e = ET.Element('root')
2238        e.append(ET.Element(Tag()))
2239        e.append(ET.Element('tag'))
2240        i = e.iter('tag')
2241        try:
2242            t = next(i)
2243        except ValueError:
2244            self.skipTest('generators are not reentrant')
2245        self.assertIsInstance(t.tag, Tag)
2246        self.assertIsInstance(e[0].tag, str)
2247        self.assertEqual(e[0].tag, 'changed')
2248
2249    def check_expat224_utf8_bug(self, text):
2250        xml = b'<a b="%s"/>' % text
2251        root = ET.XML(xml)
2252        self.assertEqual(root.get('b'), text.decode('utf-8'))
2253
2254    def test_expat224_utf8_bug(self):
2255        # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
2256        # Check that Expat 2.2.4 fixed the bug.
2257        #
2258        # Test buffer bounds at odd and even positions.
2259
2260        text = b'\xc3\xa0' * 1024
2261        self.check_expat224_utf8_bug(text)
2262
2263        text = b'x' + b'\xc3\xa0' * 1024
2264        self.check_expat224_utf8_bug(text)
2265
2266    def test_expat224_utf8_bug_file(self):
2267        with open(UTF8_BUG_XMLFILE, 'rb') as fp:
2268            raw = fp.read()
2269        root = ET.fromstring(raw)
2270        xmlattr = root.get('b')
2271
2272        # "Parse" manually the XML file to extract the value of the 'b'
2273        # attribute of the <a b='xxx' /> XML element
2274        text = raw.decode('utf-8').strip()
2275        text = text.replace('\r\n', ' ')
2276        text = text[6:-4]
2277        self.assertEqual(root.get('b'), text)
2278
2279    def test_39495_treebuilder_start(self):
2280        self.assertRaises(TypeError, ET.TreeBuilder().start, "tag")
2281        self.assertRaises(TypeError, ET.TreeBuilder().start, "tag", None)
2282
2283
2284
2285# --------------------------------------------------------------------
2286
2287
2288class BasicElementTest(ElementTestCase, unittest.TestCase):
2289
2290    def test___init__(self):
2291        tag = "foo"
2292        attrib = { "zix": "wyp" }
2293
2294        element_foo = ET.Element(tag, attrib)
2295
2296        # traits of an element
2297        self.assertIsInstance(element_foo, ET.Element)
2298        self.assertIn("tag", dir(element_foo))
2299        self.assertIn("attrib", dir(element_foo))
2300        self.assertIn("text", dir(element_foo))
2301        self.assertIn("tail", dir(element_foo))
2302
2303        # string attributes have expected values
2304        self.assertEqual(element_foo.tag, tag)
2305        self.assertIsNone(element_foo.text)
2306        self.assertIsNone(element_foo.tail)
2307
2308        # attrib is a copy
2309        self.assertIsNot(element_foo.attrib, attrib)
2310        self.assertEqual(element_foo.attrib, attrib)
2311
2312        # attrib isn't linked
2313        attrib["bar"] = "baz"
2314        self.assertIsNot(element_foo.attrib, attrib)
2315        self.assertNotEqual(element_foo.attrib, attrib)
2316
2317    def test_copy(self):
2318        # Only run this test if Element.copy() is defined.
2319        if "copy" not in dir(ET.Element):
2320            raise unittest.SkipTest("Element.copy() not present")
2321
2322        element_foo = ET.Element("foo", { "zix": "wyp" })
2323        element_foo.append(ET.Element("bar", { "baz": "qix" }))
2324
2325        with self.assertWarns(DeprecationWarning):
2326            element_foo2 = element_foo.copy()
2327
2328        # elements are not the same
2329        self.assertIsNot(element_foo2, element_foo)
2330
2331        # string attributes are equal
2332        self.assertEqual(element_foo2.tag, element_foo.tag)
2333        self.assertEqual(element_foo2.text, element_foo.text)
2334        self.assertEqual(element_foo2.tail, element_foo.tail)
2335
2336        # number of children is the same
2337        self.assertEqual(len(element_foo2), len(element_foo))
2338
2339        # children are the same
2340        for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2341            self.assertIs(child1, child2)
2342
2343        # attrib is a copy
2344        self.assertEqual(element_foo2.attrib, element_foo.attrib)
2345
2346    def test___copy__(self):
2347        element_foo = ET.Element("foo", { "zix": "wyp" })
2348        element_foo.append(ET.Element("bar", { "baz": "qix" }))
2349
2350        element_foo2 = copy.copy(element_foo)
2351
2352        # elements are not the same
2353        self.assertIsNot(element_foo2, element_foo)
2354
2355        # string attributes are equal
2356        self.assertEqual(element_foo2.tag, element_foo.tag)
2357        self.assertEqual(element_foo2.text, element_foo.text)
2358        self.assertEqual(element_foo2.tail, element_foo.tail)
2359
2360        # number of children is the same
2361        self.assertEqual(len(element_foo2), len(element_foo))
2362
2363        # children are the same
2364        for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2365            self.assertIs(child1, child2)
2366
2367        # attrib is a copy
2368        self.assertEqual(element_foo2.attrib, element_foo.attrib)
2369
2370    def test___deepcopy__(self):
2371        element_foo = ET.Element("foo", { "zix": "wyp" })
2372        element_foo.append(ET.Element("bar", { "baz": "qix" }))
2373
2374        element_foo2 = copy.deepcopy(element_foo)
2375
2376        # elements are not the same
2377        self.assertIsNot(element_foo2, element_foo)
2378
2379        # string attributes are equal
2380        self.assertEqual(element_foo2.tag, element_foo.tag)
2381        self.assertEqual(element_foo2.text, element_foo.text)
2382        self.assertEqual(element_foo2.tail, element_foo.tail)
2383
2384        # number of children is the same
2385        self.assertEqual(len(element_foo2), len(element_foo))
2386
2387        # children are not the same
2388        for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2389            self.assertIsNot(child1, child2)
2390
2391        # attrib is a copy
2392        self.assertIsNot(element_foo2.attrib, element_foo.attrib)
2393        self.assertEqual(element_foo2.attrib, element_foo.attrib)
2394
2395        # attrib isn't linked
2396        element_foo.attrib["bar"] = "baz"
2397        self.assertIsNot(element_foo2.attrib, element_foo.attrib)
2398        self.assertNotEqual(element_foo2.attrib, element_foo.attrib)
2399
2400    def test_augmentation_type_errors(self):
2401        e = ET.Element('joe')
2402        self.assertRaises(TypeError, e.append, 'b')
2403        self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo'])
2404        self.assertRaises(TypeError, e.insert, 0, 'foo')
2405        e[:] = [ET.Element('bar')]
2406        with self.assertRaises(TypeError):
2407            e[0] = 'foo'
2408        with self.assertRaises(TypeError):
2409            e[:] = [ET.Element('bar'), 'foo']
2410
2411        if hasattr(e, '__setstate__'):
2412            state = {
2413                'tag': 'tag',
2414                '_children': [None],  # non-Element
2415                'attrib': 'attr',
2416                'tail': 'tail',
2417                'text': 'text',
2418            }
2419            self.assertRaises(TypeError, e.__setstate__, state)
2420
2421        if hasattr(e, '__deepcopy__'):
2422            class E(ET.Element):
2423                def __deepcopy__(self, memo):
2424                    return None  # non-Element
2425            e[:] = [E('bar')]
2426            self.assertRaises(TypeError, copy.deepcopy, e)
2427
2428    def test_cyclic_gc(self):
2429        class Dummy:
2430            pass
2431
2432        # Test the shortest cycle: d->element->d
2433        d = Dummy()
2434        d.dummyref = ET.Element('joe', attr=d)
2435        wref = weakref.ref(d)
2436        del d
2437        gc_collect()
2438        self.assertIsNone(wref())
2439
2440        # A longer cycle: d->e->e2->d
2441        e = ET.Element('joe')
2442        d = Dummy()
2443        d.dummyref = e
2444        wref = weakref.ref(d)
2445        e2 = ET.SubElement(e, 'foo', attr=d)
2446        del d, e, e2
2447        gc_collect()
2448        self.assertIsNone(wref())
2449
2450        # A cycle between Element objects as children of one another
2451        # e1->e2->e3->e1
2452        e1 = ET.Element('e1')
2453        e2 = ET.Element('e2')
2454        e3 = ET.Element('e3')
2455        e3.append(e1)
2456        e2.append(e3)
2457        e1.append(e2)
2458        wref = weakref.ref(e1)
2459        del e1, e2, e3
2460        gc_collect()
2461        self.assertIsNone(wref())
2462
2463    def test_weakref(self):
2464        flag = False
2465        def wref_cb(w):
2466            nonlocal flag
2467            flag = True
2468        e = ET.Element('e')
2469        wref = weakref.ref(e, wref_cb)
2470        self.assertEqual(wref().tag, 'e')
2471        del e
2472        gc_collect()  # For PyPy or other GCs.
2473        self.assertEqual(flag, True)
2474        self.assertEqual(wref(), None)
2475
2476    def test_get_keyword_args(self):
2477        e1 = ET.Element('foo' , x=1, y=2, z=3)
2478        self.assertEqual(e1.get('x', default=7), 1)
2479        self.assertEqual(e1.get('w', default=7), 7)
2480
2481    def test_pickle(self):
2482        # issue #16076: the C implementation wasn't pickleable.
2483        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2484            for dumper, loader in product(self.modules, repeat=2):
2485                e = dumper.Element('foo', bar=42)
2486                e.text = "text goes here"
2487                e.tail = "opposite of head"
2488                dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
2489                e.append(dumper.Element('child'))
2490                e.findall('.//grandchild')[0].set('attr', 'other value')
2491
2492                e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
2493                                          dumper, loader, proto)
2494
2495                self.assertEqual(e2.tag, 'foo')
2496                self.assertEqual(e2.attrib['bar'], 42)
2497                self.assertEqual(len(e2), 2)
2498                self.assertEqualElements(e, e2)
2499
2500    def test_pickle_issue18997(self):
2501        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2502            for dumper, loader in product(self.modules, repeat=2):
2503                XMLTEXT = """<?xml version="1.0"?>
2504                    <group><dogs>4</dogs>
2505                    </group>"""
2506                e1 = dumper.fromstring(XMLTEXT)
2507                self.assertEqual(e1.__getstate__()['tag'], 'group')
2508                e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree',
2509                                          dumper, loader, proto)
2510                self.assertEqual(e2.tag, 'group')
2511                self.assertEqual(e2[0].tag, 'dogs')
2512
2513
2514class BadElementTest(ElementTestCase, unittest.TestCase):
2515    def test_extend_mutable_list(self):
2516        class X:
2517            @property
2518            def __class__(self):
2519                L[:] = [ET.Element('baz')]
2520                return ET.Element
2521        L = [X()]
2522        e = ET.Element('foo')
2523        try:
2524            e.extend(L)
2525        except TypeError:
2526            pass
2527
2528        class Y(X, ET.Element):
2529            pass
2530        L = [Y('x')]
2531        e = ET.Element('foo')
2532        e.extend(L)
2533
2534    def test_extend_mutable_list2(self):
2535        class X:
2536            @property
2537            def __class__(self):
2538                del L[:]
2539                return ET.Element
2540        L = [X(), ET.Element('baz')]
2541        e = ET.Element('foo')
2542        try:
2543            e.extend(L)
2544        except TypeError:
2545            pass
2546
2547        class Y(X, ET.Element):
2548            pass
2549        L = [Y('bar'), ET.Element('baz')]
2550        e = ET.Element('foo')
2551        e.extend(L)
2552
2553    def test_remove_with_mutating(self):
2554        class X(ET.Element):
2555            def __eq__(self, o):
2556                del e[:]
2557                return False
2558        e = ET.Element('foo')
2559        e.extend([X('bar')])
2560        self.assertRaises(ValueError, e.remove, ET.Element('baz'))
2561
2562        e = ET.Element('foo')
2563        e.extend([ET.Element('bar')])
2564        self.assertRaises(ValueError, e.remove, X('baz'))
2565
2566    def test_recursive_repr(self):
2567        # Issue #25455
2568        e = ET.Element('foo')
2569        with swap_attr(e, 'tag', e):
2570            with self.assertRaises(RuntimeError):
2571                repr(e)  # Should not crash
2572
2573    def test_element_get_text(self):
2574        # Issue #27863
2575        class X(str):
2576            def __del__(self):
2577                try:
2578                    elem.text
2579                except NameError:
2580                    pass
2581
2582        b = ET.TreeBuilder()
2583        b.start('tag', {})
2584        b.data('ABCD')
2585        b.data(X('EFGH'))
2586        b.data('IJKL')
2587        b.end('tag')
2588
2589        elem = b.close()
2590        self.assertEqual(elem.text, 'ABCDEFGHIJKL')
2591
2592    def test_element_get_tail(self):
2593        # Issue #27863
2594        class X(str):
2595            def __del__(self):
2596                try:
2597                    elem[0].tail
2598                except NameError:
2599                    pass
2600
2601        b = ET.TreeBuilder()
2602        b.start('root', {})
2603        b.start('tag', {})
2604        b.end('tag')
2605        b.data('ABCD')
2606        b.data(X('EFGH'))
2607        b.data('IJKL')
2608        b.end('root')
2609
2610        elem = b.close()
2611        self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL')
2612
2613    def test_subscr(self):
2614        # Issue #27863
2615        class X:
2616            def __index__(self):
2617                del e[:]
2618                return 1
2619
2620        e = ET.Element('elem')
2621        e.append(ET.Element('child'))
2622        e[:X()]  # shouldn't crash
2623
2624        e.append(ET.Element('child'))
2625        e[0:10:X()]  # shouldn't crash
2626
2627    def test_ass_subscr(self):
2628        # Issue #27863
2629        class X:
2630            def __index__(self):
2631                e[:] = []
2632                return 1
2633
2634        e = ET.Element('elem')
2635        for _ in range(10):
2636            e.insert(0, ET.Element('child'))
2637
2638        e[0:10:X()] = []  # shouldn't crash
2639
2640    def test_treebuilder_start(self):
2641        # Issue #27863
2642        def element_factory(x, y):
2643            return []
2644        b = ET.TreeBuilder(element_factory=element_factory)
2645
2646        b.start('tag', {})
2647        b.data('ABCD')
2648        self.assertRaises(AttributeError, b.start, 'tag2', {})
2649        del b
2650        gc_collect()
2651
2652    def test_treebuilder_end(self):
2653        # Issue #27863
2654        def element_factory(x, y):
2655            return []
2656        b = ET.TreeBuilder(element_factory=element_factory)
2657
2658        b.start('tag', {})
2659        b.data('ABCD')
2660        self.assertRaises(AttributeError, b.end, 'tag')
2661        del b
2662        gc_collect()
2663
2664
2665class MutatingElementPath(str):
2666    def __new__(cls, elem, *args):
2667        self = str.__new__(cls, *args)
2668        self.elem = elem
2669        return self
2670    def __eq__(self, o):
2671        del self.elem[:]
2672        return True
2673MutatingElementPath.__hash__ = str.__hash__
2674
2675class BadElementPath(str):
2676    def __eq__(self, o):
2677        raise 1/0
2678BadElementPath.__hash__ = str.__hash__
2679
2680class BadElementPathTest(ElementTestCase, unittest.TestCase):
2681    def setUp(self):
2682        super().setUp()
2683        from xml.etree import ElementPath
2684        self.path_cache = ElementPath._cache
2685        ElementPath._cache = {}
2686
2687    def tearDown(self):
2688        from xml.etree import ElementPath
2689        ElementPath._cache = self.path_cache
2690        super().tearDown()
2691
2692    def test_find_with_mutating(self):
2693        e = ET.Element('foo')
2694        e.extend([ET.Element('bar')])
2695        e.find(MutatingElementPath(e, 'x'))
2696
2697    def test_find_with_error(self):
2698        e = ET.Element('foo')
2699        e.extend([ET.Element('bar')])
2700        try:
2701            e.find(BadElementPath('x'))
2702        except ZeroDivisionError:
2703            pass
2704
2705    def test_findtext_with_mutating(self):
2706        e = ET.Element('foo')
2707        e.extend([ET.Element('bar')])
2708        e.findtext(MutatingElementPath(e, 'x'))
2709
2710    def test_findtext_with_error(self):
2711        e = ET.Element('foo')
2712        e.extend([ET.Element('bar')])
2713        try:
2714            e.findtext(BadElementPath('x'))
2715        except ZeroDivisionError:
2716            pass
2717
2718    def test_findtext_with_falsey_text_attribute(self):
2719        root_elem = ET.Element('foo')
2720        sub_elem = ET.SubElement(root_elem, 'bar')
2721        falsey = ["", 0, False, [], (), {}]
2722        for val in falsey:
2723            sub_elem.text = val
2724            self.assertEqual(root_elem.findtext('./bar'), val)
2725
2726    def test_findtext_with_none_text_attribute(self):
2727        root_elem = ET.Element('foo')
2728        sub_elem = ET.SubElement(root_elem, 'bar')
2729        sub_elem.text = None
2730        self.assertEqual(root_elem.findtext('./bar'), '')
2731
2732    def test_findall_with_mutating(self):
2733        e = ET.Element('foo')
2734        e.extend([ET.Element('bar')])
2735        e.findall(MutatingElementPath(e, 'x'))
2736
2737    def test_findall_with_error(self):
2738        e = ET.Element('foo')
2739        e.extend([ET.Element('bar')])
2740        try:
2741            e.findall(BadElementPath('x'))
2742        except ZeroDivisionError:
2743            pass
2744
2745
2746class ElementTreeTypeTest(unittest.TestCase):
2747    def test_istype(self):
2748        self.assertIsInstance(ET.ParseError, type)
2749        self.assertIsInstance(ET.QName, type)
2750        self.assertIsInstance(ET.ElementTree, type)
2751        self.assertIsInstance(ET.Element, type)
2752        self.assertIsInstance(ET.TreeBuilder, type)
2753        self.assertIsInstance(ET.XMLParser, type)
2754
2755    def test_Element_subclass_trivial(self):
2756        class MyElement(ET.Element):
2757            pass
2758
2759        mye = MyElement('foo')
2760        self.assertIsInstance(mye, ET.Element)
2761        self.assertIsInstance(mye, MyElement)
2762        self.assertEqual(mye.tag, 'foo')
2763
2764        # test that attribute assignment works (issue 14849)
2765        mye.text = "joe"
2766        self.assertEqual(mye.text, "joe")
2767
2768    def test_Element_subclass_constructor(self):
2769        class MyElement(ET.Element):
2770            def __init__(self, tag, attrib={}, **extra):
2771                super(MyElement, self).__init__(tag + '__', attrib, **extra)
2772
2773        mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
2774        self.assertEqual(mye.tag, 'foo__')
2775        self.assertEqual(sorted(mye.items()),
2776            [('a', 1), ('b', 2), ('c', 3), ('d', 4)])
2777
2778    def test_Element_subclass_new_method(self):
2779        class MyElement(ET.Element):
2780            def newmethod(self):
2781                return self.tag
2782
2783        mye = MyElement('joe')
2784        self.assertEqual(mye.newmethod(), 'joe')
2785
2786    def test_Element_subclass_find(self):
2787        class MyElement(ET.Element):
2788            pass
2789
2790        e = ET.Element('foo')
2791        e.text = 'text'
2792        sub = MyElement('bar')
2793        sub.text = 'subtext'
2794        e.append(sub)
2795        self.assertEqual(e.findtext('bar'), 'subtext')
2796        self.assertEqual(e.find('bar').tag, 'bar')
2797        found = list(e.findall('bar'))
2798        self.assertEqual(len(found), 1, found)
2799        self.assertEqual(found[0].tag, 'bar')
2800
2801
2802class ElementFindTest(unittest.TestCase):
2803    def test_find_simple(self):
2804        e = ET.XML(SAMPLE_XML)
2805        self.assertEqual(e.find('tag').tag, 'tag')
2806        self.assertEqual(e.find('section/tag').tag, 'tag')
2807        self.assertEqual(e.find('./tag').tag, 'tag')
2808
2809        e[2] = ET.XML(SAMPLE_SECTION)
2810        self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
2811
2812        self.assertEqual(e.findtext('./tag'), 'text')
2813        self.assertEqual(e.findtext('section/tag'), 'subtext')
2814
2815        # section/nexttag is found but has no text
2816        self.assertEqual(e.findtext('section/nexttag'), '')
2817        self.assertEqual(e.findtext('section/nexttag', 'default'), '')
2818
2819        # tog doesn't exist and 'default' kicks in
2820        self.assertIsNone(e.findtext('tog'))
2821        self.assertEqual(e.findtext('tog', 'default'), 'default')
2822
2823        # Issue #16922
2824        self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
2825
2826    def test_find_xpath(self):
2827        LINEAR_XML = '''
2828        <body>
2829            <tag class='a'/>
2830            <tag class='b'/>
2831            <tag class='c'/>
2832            <tag class='d'/>
2833        </body>'''
2834        e = ET.XML(LINEAR_XML)
2835
2836        # Test for numeric indexing and last()
2837        self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
2838        self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
2839        self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
2840        self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
2841        self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
2842
2843        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]')
2844        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]')
2845        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
2846        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
2847
2848    def test_findall(self):
2849        e = ET.XML(SAMPLE_XML)
2850        e[2] = ET.XML(SAMPLE_SECTION)
2851        self.assertEqual(summarize_list(e.findall('.')), ['body'])
2852        self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
2853        self.assertEqual(summarize_list(e.findall('tog')), [])
2854        self.assertEqual(summarize_list(e.findall('tog/foo')), [])
2855        self.assertEqual(summarize_list(e.findall('*')),
2856            ['tag', 'tag', 'section'])
2857        self.assertEqual(summarize_list(e.findall('.//tag')),
2858            ['tag'] * 4)
2859        self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
2860        self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
2861        self.assertEqual(summarize_list(e.findall('section/*')),
2862            ['tag', 'nexttag', 'nextsection'])
2863        self.assertEqual(summarize_list(e.findall('section//*')),
2864            ['tag', 'nexttag', 'nextsection', 'tag'])
2865        self.assertEqual(summarize_list(e.findall('section/.//*')),
2866            ['tag', 'nexttag', 'nextsection', 'tag'])
2867        self.assertEqual(summarize_list(e.findall('*/*')),
2868            ['tag', 'nexttag', 'nextsection'])
2869        self.assertEqual(summarize_list(e.findall('*//*')),
2870            ['tag', 'nexttag', 'nextsection', 'tag'])
2871        self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
2872        self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
2873        self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
2874        self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
2875
2876        self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
2877            ['tag'] * 3)
2878        self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
2879            ['tag'])
2880        self.assertEqual(summarize_list(e.findall('.//tag[@class!="a"]')),
2881            ['tag'] * 2)
2882        self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
2883            ['tag'] * 2)
2884        self.assertEqual(summarize_list(e.findall('.//tag[@class!="b"]')),
2885            ['tag'])
2886        self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
2887            ['tag'])
2888        self.assertEqual(summarize_list(e.findall('.//section[tag]')),
2889            ['section'])
2890        self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
2891        self.assertEqual(summarize_list(e.findall('../tag')), [])
2892        self.assertEqual(summarize_list(e.findall('section/../tag')),
2893            ['tag'] * 2)
2894        self.assertEqual(e.findall('section//'), e.findall('section//*'))
2895
2896        self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
2897            ['section'])
2898        self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
2899            ['section'])
2900        self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
2901            ['section'])
2902        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2903            ['section'])
2904        self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
2905            ['section'])
2906
2907        # Negations of above tests. They match nothing because the sole section
2908        # tag has subtext.
2909        self.assertEqual(summarize_list(e.findall(".//section[tag!='subtext']")),
2910            [])
2911        self.assertEqual(summarize_list(e.findall(".//section[tag !='subtext']")),
2912            [])
2913        self.assertEqual(summarize_list(e.findall(".//section[tag!= 'subtext']")),
2914            [])
2915        self.assertEqual(summarize_list(e.findall(".//section[tag != 'subtext']")),
2916            [])
2917        self.assertEqual(summarize_list(e.findall(".//section[ tag != 'subtext' ]")),
2918            [])
2919
2920        self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
2921                         ['tag'])
2922        self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
2923                         ['tag'])
2924        self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
2925                         ['tag'])
2926        self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
2927                         ['tag'])
2928        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2929                         ['tag'])
2930        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
2931                         [])
2932        self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
2933                         [])
2934
2935        # Negations of above tests.
2936        #   Matches everything but the tag containing subtext
2937        self.assertEqual(summarize_list(e.findall(".//tag[.!='subtext']")),
2938                         ['tag'] * 3)
2939        self.assertEqual(summarize_list(e.findall(".//tag[. !='subtext']")),
2940                         ['tag'] * 3)
2941        self.assertEqual(summarize_list(e.findall('.//tag[.!= "subtext"]')),
2942                         ['tag'] * 3)
2943        self.assertEqual(summarize_list(e.findall('.//tag[ . != "subtext" ]')),
2944                         ['tag'] * 3)
2945        self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext']")),
2946                         ['tag'] * 3)
2947        # Matches all tags.
2948        self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext ']")),
2949                         ['tag'] * 4)
2950        self.assertEqual(summarize_list(e.findall(".//tag[.!= ' subtext']")),
2951                         ['tag'] * 4)
2952
2953        # duplicate section => 2x tag matches
2954        e[1] = e[2]
2955        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2956                         ['section', 'section'])
2957        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2958                         ['tag', 'tag'])
2959
2960    def test_test_find_with_ns(self):
2961        e = ET.XML(SAMPLE_XML_NS)
2962        self.assertEqual(summarize_list(e.findall('tag')), [])
2963        self.assertEqual(
2964            summarize_list(e.findall("{http://effbot.org/ns}tag")),
2965            ['{http://effbot.org/ns}tag'] * 2)
2966        self.assertEqual(
2967            summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
2968            ['{http://effbot.org/ns}tag'] * 3)
2969
2970    def test_findall_different_nsmaps(self):
2971        root = ET.XML('''
2972            <a xmlns:x="X" xmlns:y="Y">
2973                <x:b><c/></x:b>
2974                <b/>
2975                <c><x:b/><b/></c><y:b/>
2976            </a>''')
2977        nsmap = {'xx': 'X'}
2978        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2979        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2980        nsmap = {'xx': 'Y'}
2981        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2982        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2983        nsmap = {'xx': 'X', '': 'Y'}
2984        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2985        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
2986
2987    def test_findall_wildcard(self):
2988        root = ET.XML('''
2989            <a xmlns:x="X" xmlns:y="Y">
2990                <x:b><c/></x:b>
2991                <b/>
2992                <c><x:b/><b/></c><y:b/>
2993            </a>''')
2994        root.append(ET.Comment('test'))
2995
2996        self.assertEqual(summarize_list(root.findall("{*}b")),
2997                         ['{X}b', 'b', '{Y}b'])
2998        self.assertEqual(summarize_list(root.findall("{*}c")),
2999                         ['c'])
3000        self.assertEqual(summarize_list(root.findall("{X}*")),
3001                         ['{X}b'])
3002        self.assertEqual(summarize_list(root.findall("{Y}*")),
3003                         ['{Y}b'])
3004        self.assertEqual(summarize_list(root.findall("{}*")),
3005                         ['b', 'c'])
3006        self.assertEqual(summarize_list(root.findall("{}b")),  # only for consistency
3007                         ['b'])
3008        self.assertEqual(summarize_list(root.findall("{}b")),
3009                         summarize_list(root.findall("b")))
3010        self.assertEqual(summarize_list(root.findall("{*}*")),
3011                         ['{X}b', 'b', 'c', '{Y}b'])
3012        # This is an unfortunate difference, but that's how find('*') works.
3013        self.assertEqual(summarize_list(root.findall("{*}*") + [root[-1]]),
3014                         summarize_list(root.findall("*")))
3015
3016        self.assertEqual(summarize_list(root.findall(".//{*}b")),
3017                         ['{X}b', 'b', '{X}b', 'b', '{Y}b'])
3018        self.assertEqual(summarize_list(root.findall(".//{*}c")),
3019                         ['c', 'c'])
3020        self.assertEqual(summarize_list(root.findall(".//{X}*")),
3021                         ['{X}b', '{X}b'])
3022        self.assertEqual(summarize_list(root.findall(".//{Y}*")),
3023                         ['{Y}b'])
3024        self.assertEqual(summarize_list(root.findall(".//{}*")),
3025                         ['c', 'b', 'c', 'b'])
3026        self.assertEqual(summarize_list(root.findall(".//{}b")),  # only for consistency
3027                         ['b', 'b'])
3028        self.assertEqual(summarize_list(root.findall(".//{}b")),
3029                         summarize_list(root.findall(".//b")))
3030
3031    def test_bad_find(self):
3032        e = ET.XML(SAMPLE_XML)
3033        with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'):
3034            e.findall('/tag')
3035
3036    def test_find_through_ElementTree(self):
3037        e = ET.XML(SAMPLE_XML)
3038        self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
3039        self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
3040        self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')),
3041            ['tag'] * 2)
3042        # this produces a warning
3043        msg = ("This search is broken in 1.3 and earlier, and will be fixed "
3044               "in a future version.  If you rely on the current behaviour, "
3045               "change it to '.+'")
3046        with self.assertWarnsRegex(FutureWarning, msg):
3047            it = ET.ElementTree(e).findall('//tag')
3048        self.assertEqual(summarize_list(it), ['tag'] * 3)
3049
3050
3051class ElementIterTest(unittest.TestCase):
3052    def _ilist(self, elem, tag=None):
3053        return summarize_list(elem.iter(tag))
3054
3055    def test_basic(self):
3056        doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
3057        self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
3058        self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
3059        self.assertEqual(next(doc.iter()).tag, 'html')
3060        self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
3061        self.assertEqual(''.join(doc.find('body').itertext()),
3062            'this is a paragraph.')
3063        self.assertEqual(next(doc.itertext()), 'this is a ')
3064
3065        # iterparse should return an iterator
3066        sourcefile = serialize(doc, to_string=False)
3067        self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
3068
3069        # With an explicit parser too (issue #9708)
3070        sourcefile = serialize(doc, to_string=False)
3071        parser = ET.XMLParser(target=ET.TreeBuilder())
3072        self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
3073                         'end')
3074
3075        tree = ET.ElementTree(None)
3076        self.assertRaises(AttributeError, tree.iter)
3077
3078        # Issue #16913
3079        doc = ET.XML("<root>a&amp;<sub>b&amp;</sub>c&amp;</root>")
3080        self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
3081
3082    def test_corners(self):
3083        # single root, no subelements
3084        a = ET.Element('a')
3085        self.assertEqual(self._ilist(a), ['a'])
3086
3087        # one child
3088        b = ET.SubElement(a, 'b')
3089        self.assertEqual(self._ilist(a), ['a', 'b'])
3090
3091        # one child and one grandchild
3092        c = ET.SubElement(b, 'c')
3093        self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
3094
3095        # two children, only first with grandchild
3096        d = ET.SubElement(a, 'd')
3097        self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
3098
3099        # replace first child by second
3100        a[0] = a[1]
3101        del a[1]
3102        self.assertEqual(self._ilist(a), ['a', 'd'])
3103
3104    def test_iter_by_tag(self):
3105        doc = ET.XML('''
3106            <document>
3107                <house>
3108                    <room>bedroom1</room>
3109                    <room>bedroom2</room>
3110                </house>
3111                <shed>nothing here
3112                </shed>
3113                <house>
3114                    <room>bedroom8</room>
3115                </house>
3116            </document>''')
3117
3118        self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
3119        self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
3120
3121        # test that iter also accepts 'tag' as a keyword arg
3122        self.assertEqual(
3123            summarize_list(doc.iter(tag='room')),
3124            ['room'] * 3)
3125
3126        # make sure both tag=None and tag='*' return all tags
3127        all_tags = ['document', 'house', 'room', 'room',
3128                    'shed', 'house', 'room']
3129        self.assertEqual(summarize_list(doc.iter()), all_tags)
3130        self.assertEqual(self._ilist(doc), all_tags)
3131        self.assertEqual(self._ilist(doc, '*'), all_tags)
3132
3133    def test_copy(self):
3134        a = ET.Element('a')
3135        it = a.iter()
3136        with self.assertRaises(TypeError):
3137            copy.copy(it)
3138
3139    def test_pickle(self):
3140        a = ET.Element('a')
3141        it = a.iter()
3142        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
3143            with self.assertRaises((TypeError, pickle.PicklingError)):
3144                pickle.dumps(it, proto)
3145
3146
3147class TreeBuilderTest(unittest.TestCase):
3148    sample1 = ('<!DOCTYPE html PUBLIC'
3149        ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
3150        ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
3151        '<html>text<div>subtext</div>tail</html>')
3152
3153    sample2 = '''<toplevel>sometext</toplevel>'''
3154
3155    def _check_sample1_element(self, e):
3156        self.assertEqual(e.tag, 'html')
3157        self.assertEqual(e.text, 'text')
3158        self.assertEqual(e.tail, None)
3159        self.assertEqual(e.attrib, {})
3160        children = list(e)
3161        self.assertEqual(len(children), 1)
3162        child = children[0]
3163        self.assertEqual(child.tag, 'div')
3164        self.assertEqual(child.text, 'subtext')
3165        self.assertEqual(child.tail, 'tail')
3166        self.assertEqual(child.attrib, {})
3167
3168    def test_dummy_builder(self):
3169        class BaseDummyBuilder:
3170            def close(self):
3171                return 42
3172
3173        class DummyBuilder(BaseDummyBuilder):
3174            data = start = end = lambda *a: None
3175
3176        parser = ET.XMLParser(target=DummyBuilder())
3177        parser.feed(self.sample1)
3178        self.assertEqual(parser.close(), 42)
3179
3180        parser = ET.XMLParser(target=BaseDummyBuilder())
3181        parser.feed(self.sample1)
3182        self.assertEqual(parser.close(), 42)
3183
3184        parser = ET.XMLParser(target=object())
3185        parser.feed(self.sample1)
3186        self.assertIsNone(parser.close())
3187
3188    def test_treebuilder_comment(self):
3189        b = ET.TreeBuilder()
3190        self.assertEqual(b.comment('ctext').tag, ET.Comment)
3191        self.assertEqual(b.comment('ctext').text, 'ctext')
3192
3193        b = ET.TreeBuilder(comment_factory=ET.Comment)
3194        self.assertEqual(b.comment('ctext').tag, ET.Comment)
3195        self.assertEqual(b.comment('ctext').text, 'ctext')
3196
3197        b = ET.TreeBuilder(comment_factory=len)
3198        self.assertEqual(b.comment('ctext'), len('ctext'))
3199
3200    def test_treebuilder_pi(self):
3201        b = ET.TreeBuilder()
3202        self.assertEqual(b.pi('target', None).tag, ET.PI)
3203        self.assertEqual(b.pi('target', None).text, 'target')
3204
3205        b = ET.TreeBuilder(pi_factory=ET.PI)
3206        self.assertEqual(b.pi('target').tag, ET.PI)
3207        self.assertEqual(b.pi('target').text, "target")
3208        self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI)
3209        self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget  text ")
3210
3211        b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text))
3212        self.assertEqual(b.pi('target'), (len('target'), None))
3213        self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text '))
3214
3215    def test_late_tail(self):
3216        # Issue #37399: The tail of an ignored comment could overwrite the text before it.
3217        class TreeBuilderSubclass(ET.TreeBuilder):
3218            pass
3219
3220        xml = "<a>text<!-- comment -->tail</a>"
3221        a = ET.fromstring(xml)
3222        self.assertEqual(a.text, "texttail")
3223
3224        parser = ET.XMLParser(target=TreeBuilderSubclass())
3225        parser.feed(xml)
3226        a = parser.close()
3227        self.assertEqual(a.text, "texttail")
3228
3229        xml = "<a>text<?pi data?>tail</a>"
3230        a = ET.fromstring(xml)
3231        self.assertEqual(a.text, "texttail")
3232
3233        xml = "<a>text<?pi data?>tail</a>"
3234        parser = ET.XMLParser(target=TreeBuilderSubclass())
3235        parser.feed(xml)
3236        a = parser.close()
3237        self.assertEqual(a.text, "texttail")
3238
3239    def test_late_tail_mix_pi_comments(self):
3240        # Issue #37399: The tail of an ignored comment could overwrite the text before it.
3241        # Test appending tails to comments/pis.
3242        class TreeBuilderSubclass(ET.TreeBuilder):
3243            pass
3244
3245        xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>"
3246        parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
3247        parser.feed(xml)
3248        a = parser.close()
3249        self.assertEqual(a[0].text, ' comment ')
3250        self.assertEqual(a[0].tail, '\ntail')
3251        self.assertEqual(a.text, "text ")
3252
3253        parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True))
3254        parser.feed(xml)
3255        a = parser.close()
3256        self.assertEqual(a[0].text, ' comment ')
3257        self.assertEqual(a[0].tail, '\ntail')
3258        self.assertEqual(a.text, "text ")
3259
3260        xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>"
3261        parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True))
3262        parser.feed(xml)
3263        a = parser.close()
3264        self.assertEqual(a[0].text, 'pi data')
3265        self.assertEqual(a[0].tail, 'tail')
3266        self.assertEqual(a.text, "text\n")
3267
3268        parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True))
3269        parser.feed(xml)
3270        a = parser.close()
3271        self.assertEqual(a[0].text, 'pi data')
3272        self.assertEqual(a[0].tail, 'tail')
3273        self.assertEqual(a.text, "text\n")
3274
3275    def test_treebuilder_elementfactory_none(self):
3276        parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
3277        parser.feed(self.sample1)
3278        e = parser.close()
3279        self._check_sample1_element(e)
3280
3281    def test_subclass(self):
3282        class MyTreeBuilder(ET.TreeBuilder):
3283            def foobar(self, x):
3284                return x * 2
3285
3286        tb = MyTreeBuilder()
3287        self.assertEqual(tb.foobar(10), 20)
3288
3289        parser = ET.XMLParser(target=tb)
3290        parser.feed(self.sample1)
3291
3292        e = parser.close()
3293        self._check_sample1_element(e)
3294
3295    def test_subclass_comment_pi(self):
3296        class MyTreeBuilder(ET.TreeBuilder):
3297            def foobar(self, x):
3298                return x * 2
3299
3300        tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI)
3301        self.assertEqual(tb.foobar(10), 20)
3302
3303        parser = ET.XMLParser(target=tb)
3304        parser.feed(self.sample1)
3305        parser.feed('<!-- a comment--><?and a pi?>')
3306
3307        e = parser.close()
3308        self._check_sample1_element(e)
3309
3310    def test_element_factory(self):
3311        lst = []
3312        def myfactory(tag, attrib):
3313            nonlocal lst
3314            lst.append(tag)
3315            return ET.Element(tag, attrib)
3316
3317        tb = ET.TreeBuilder(element_factory=myfactory)
3318        parser = ET.XMLParser(target=tb)
3319        parser.feed(self.sample2)
3320        parser.close()
3321
3322        self.assertEqual(lst, ['toplevel'])
3323
3324    def _check_element_factory_class(self, cls):
3325        tb = ET.TreeBuilder(element_factory=cls)
3326
3327        parser = ET.XMLParser(target=tb)
3328        parser.feed(self.sample1)
3329        e = parser.close()
3330        self.assertIsInstance(e, cls)
3331        self._check_sample1_element(e)
3332
3333    def test_element_factory_subclass(self):
3334        class MyElement(ET.Element):
3335            pass
3336        self._check_element_factory_class(MyElement)
3337
3338    def test_element_factory_pure_python_subclass(self):
3339        # Mimic SimpleTAL's behaviour (issue #16089): both versions of
3340        # TreeBuilder should be able to cope with a subclass of the
3341        # pure Python Element class.
3342        base = ET._Element_Py
3343        # Not from a C extension
3344        self.assertEqual(base.__module__, 'xml.etree.ElementTree')
3345        # Force some multiple inheritance with a C class to make things
3346        # more interesting.
3347        class MyElement(base, ValueError):
3348            pass
3349        self._check_element_factory_class(MyElement)
3350
3351    def test_doctype(self):
3352        class DoctypeParser:
3353            _doctype = None
3354
3355            def doctype(self, name, pubid, system):
3356                self._doctype = (name, pubid, system)
3357
3358            def close(self):
3359                return self._doctype
3360
3361        parser = ET.XMLParser(target=DoctypeParser())
3362        parser.feed(self.sample1)
3363
3364        self.assertEqual(parser.close(),
3365            ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3366             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
3367
3368    def test_builder_lookup_errors(self):
3369        class RaisingBuilder:
3370            def __init__(self, raise_in=None, what=ValueError):
3371                self.raise_in = raise_in
3372                self.what = what
3373
3374            def __getattr__(self, name):
3375                if name == self.raise_in:
3376                    raise self.what(self.raise_in)
3377                def handle(*args):
3378                    pass
3379                return handle
3380
3381        ET.XMLParser(target=RaisingBuilder())
3382        # cET also checks for 'close' and 'doctype', PyET does it only at need
3383        for event in ('start', 'data', 'end', 'comment', 'pi'):
3384            with self.assertRaisesRegex(ValueError, event):
3385                ET.XMLParser(target=RaisingBuilder(event))
3386
3387        ET.XMLParser(target=RaisingBuilder(what=AttributeError))
3388        for event in ('start', 'data', 'end', 'comment', 'pi'):
3389            parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError))
3390            parser.feed(self.sample1)
3391            self.assertIsNone(parser.close())
3392
3393
3394class XMLParserTest(unittest.TestCase):
3395    sample1 = b'<file><line>22</line></file>'
3396    sample2 = (b'<!DOCTYPE html PUBLIC'
3397        b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
3398        b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
3399        b'<html>text</html>')
3400    sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n'
3401        '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>')
3402
3403    def _check_sample_element(self, e):
3404        self.assertEqual(e.tag, 'file')
3405        self.assertEqual(e[0].tag, 'line')
3406        self.assertEqual(e[0].text, '22')
3407
3408    def test_constructor_args(self):
3409        parser2 = ET.XMLParser(encoding='utf-8',
3410                               target=ET.TreeBuilder())
3411        parser2.feed(self.sample1)
3412        self._check_sample_element(parser2.close())
3413
3414    def test_subclass(self):
3415        class MyParser(ET.XMLParser):
3416            pass
3417        parser = MyParser()
3418        parser.feed(self.sample1)
3419        self._check_sample_element(parser.close())
3420
3421    def test_doctype_warning(self):
3422        with warnings.catch_warnings():
3423            warnings.simplefilter('error', DeprecationWarning)
3424            parser = ET.XMLParser()
3425            parser.feed(self.sample2)
3426            parser.close()
3427
3428    def test_subclass_doctype(self):
3429        _doctype = None
3430        class MyParserWithDoctype(ET.XMLParser):
3431            def doctype(self, *args, **kwargs):
3432                nonlocal _doctype
3433                _doctype = (args, kwargs)
3434
3435        parser = MyParserWithDoctype()
3436        with self.assertWarnsRegex(RuntimeWarning, 'doctype'):
3437            parser.feed(self.sample2)
3438        parser.close()
3439        self.assertIsNone(_doctype)
3440
3441        _doctype = _doctype2 = None
3442        with warnings.catch_warnings():
3443            warnings.simplefilter('error', DeprecationWarning)
3444            warnings.simplefilter('error', RuntimeWarning)
3445            class DoctypeParser:
3446                def doctype(self, name, pubid, system):
3447                    nonlocal _doctype2
3448                    _doctype2 = (name, pubid, system)
3449
3450            parser = MyParserWithDoctype(target=DoctypeParser())
3451            parser.feed(self.sample2)
3452            parser.close()
3453            self.assertIsNone(_doctype)
3454            self.assertEqual(_doctype2,
3455                ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3456                 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
3457
3458    def test_inherited_doctype(self):
3459        '''Ensure that ordinary usage is not deprecated (Issue 19176)'''
3460        with warnings.catch_warnings():
3461            warnings.simplefilter('error', DeprecationWarning)
3462            warnings.simplefilter('error', RuntimeWarning)
3463            class MyParserWithoutDoctype(ET.XMLParser):
3464                pass
3465            parser = MyParserWithoutDoctype()
3466            parser.feed(self.sample2)
3467            parser.close()
3468
3469    def test_parse_string(self):
3470        parser = ET.XMLParser(target=ET.TreeBuilder())
3471        parser.feed(self.sample3)
3472        e = parser.close()
3473        self.assertEqual(e.tag, 'money')
3474        self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
3475        self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
3476
3477
3478class NamespaceParseTest(unittest.TestCase):
3479    def test_find_with_namespace(self):
3480        nsmap = {'h': 'hello', 'f': 'foo'}
3481        doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
3482
3483        self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
3484        self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
3485        self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
3486
3487
3488class ElementSlicingTest(unittest.TestCase):
3489    def _elem_tags(self, elemlist):
3490        return [e.tag for e in elemlist]
3491
3492    def _subelem_tags(self, elem):
3493        return self._elem_tags(list(elem))
3494
3495    def _make_elem_with_children(self, numchildren):
3496        """Create an Element with a tag 'a', with the given amount of children
3497           named 'a0', 'a1' ... and so on.
3498
3499        """
3500        e = ET.Element('a')
3501        for i in range(numchildren):
3502            ET.SubElement(e, 'a%s' % i)
3503        return e
3504
3505    def test_getslice_single_index(self):
3506        e = self._make_elem_with_children(10)
3507
3508        self.assertEqual(e[1].tag, 'a1')
3509        self.assertEqual(e[-2].tag, 'a8')
3510
3511        self.assertRaises(IndexError, lambda: e[12])
3512        self.assertRaises(IndexError, lambda: e[-12])
3513
3514    def test_getslice_range(self):
3515        e = self._make_elem_with_children(6)
3516
3517        self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
3518        self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
3519        self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
3520        self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
3521        self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
3522        self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
3523
3524    def test_getslice_steps(self):
3525        e = self._make_elem_with_children(10)
3526
3527        self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
3528        self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
3529        self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
3530        self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
3531        self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3'])
3532        self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3'])
3533
3534    def test_getslice_negative_steps(self):
3535        e = self._make_elem_with_children(4)
3536
3537        self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
3538        self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
3539        self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3'])
3540        self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3'])
3541        self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3'])
3542
3543    def test_delslice(self):
3544        e = self._make_elem_with_children(4)
3545        del e[0:2]
3546        self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
3547
3548        e = self._make_elem_with_children(4)
3549        del e[0:]
3550        self.assertEqual(self._subelem_tags(e), [])
3551
3552        e = self._make_elem_with_children(4)
3553        del e[::-1]
3554        self.assertEqual(self._subelem_tags(e), [])
3555
3556        e = self._make_elem_with_children(4)
3557        del e[::-2]
3558        self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
3559
3560        e = self._make_elem_with_children(4)
3561        del e[1::2]
3562        self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
3563
3564        e = self._make_elem_with_children(2)
3565        del e[::2]
3566        self.assertEqual(self._subelem_tags(e), ['a1'])
3567
3568    def test_setslice_single_index(self):
3569        e = self._make_elem_with_children(4)
3570        e[1] = ET.Element('b')
3571        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3572
3573        e[-2] = ET.Element('c')
3574        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
3575
3576        with self.assertRaises(IndexError):
3577            e[5] = ET.Element('d')
3578        with self.assertRaises(IndexError):
3579            e[-5] = ET.Element('d')
3580        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
3581
3582    def test_setslice_range(self):
3583        e = self._make_elem_with_children(4)
3584        e[1:3] = [ET.Element('b%s' % i) for i in range(2)]
3585        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3'])
3586
3587        e = self._make_elem_with_children(4)
3588        e[1:3] = [ET.Element('b')]
3589        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3'])
3590
3591        e = self._make_elem_with_children(4)
3592        e[1:3] = [ET.Element('b%s' % i) for i in range(3)]
3593        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3'])
3594
3595    def test_setslice_steps(self):
3596        e = self._make_elem_with_children(6)
3597        e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)]
3598        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5'])
3599
3600        e = self._make_elem_with_children(6)
3601        with self.assertRaises(ValueError):
3602            e[1:5:2] = [ET.Element('b')]
3603        with self.assertRaises(ValueError):
3604            e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)]
3605        with self.assertRaises(ValueError):
3606            e[1:5:2] = []
3607        self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5'])
3608
3609        e = self._make_elem_with_children(4)
3610        e[1::sys.maxsize] = [ET.Element('b')]
3611        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3612        e[1::sys.maxsize<<64] = [ET.Element('c')]
3613        self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
3614
3615    def test_setslice_negative_steps(self):
3616        e = self._make_elem_with_children(4)
3617        e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)]
3618        self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3'])
3619
3620        e = self._make_elem_with_children(4)
3621        with self.assertRaises(ValueError):
3622            e[2:0:-1] = [ET.Element('b')]
3623        with self.assertRaises(ValueError):
3624            e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)]
3625        with self.assertRaises(ValueError):
3626            e[2:0:-1] = []
3627        self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3'])
3628
3629        e = self._make_elem_with_children(4)
3630        e[1::-sys.maxsize] = [ET.Element('b')]
3631        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3632        e[1::-sys.maxsize-1] = [ET.Element('c')]
3633        self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
3634        e[1::-sys.maxsize<<64] = [ET.Element('d')]
3635        self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3'])
3636
3637
3638class IOTest(unittest.TestCase):
3639    def test_encoding(self):
3640        # Test encoding issues.
3641        elem = ET.Element("tag")
3642        elem.text = "abc"
3643        self.assertEqual(serialize(elem), '<tag>abc</tag>')
3644        for enc in ("utf-8", "us-ascii"):
3645            with self.subTest(enc):
3646                self.assertEqual(serialize(elem, encoding=enc),
3647                        b'<tag>abc</tag>')
3648                self.assertEqual(serialize(elem, encoding=enc.upper()),
3649                        b'<tag>abc</tag>')
3650        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3651            with self.subTest(enc):
3652                self.assertEqual(serialize(elem, encoding=enc),
3653                        ("<?xml version='1.0' encoding='%s'?>\n"
3654                         "<tag>abc</tag>" % enc).encode(enc))
3655                upper = enc.upper()
3656                self.assertEqual(serialize(elem, encoding=upper),
3657                        ("<?xml version='1.0' encoding='%s'?>\n"
3658                         "<tag>abc</tag>" % upper).encode(enc))
3659
3660        elem = ET.Element("tag")
3661        elem.text = "<&\"\'>"
3662        self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
3663        self.assertEqual(serialize(elem, encoding="utf-8"),
3664                b'<tag>&lt;&amp;"\'&gt;</tag>')
3665        self.assertEqual(serialize(elem, encoding="us-ascii"),
3666                b'<tag>&lt;&amp;"\'&gt;</tag>')
3667        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3668            self.assertEqual(serialize(elem, encoding=enc),
3669                    ("<?xml version='1.0' encoding='%s'?>\n"
3670                     "<tag>&lt;&amp;\"'&gt;</tag>" % enc).encode(enc))
3671
3672        elem = ET.Element("tag")
3673        elem.attrib["key"] = "<&\"\'>"
3674        self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" />')
3675        self.assertEqual(serialize(elem, encoding="utf-8"),
3676                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
3677        self.assertEqual(serialize(elem, encoding="us-ascii"),
3678                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
3679        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3680            self.assertEqual(serialize(elem, encoding=enc),
3681                    ("<?xml version='1.0' encoding='%s'?>\n"
3682                     "<tag key=\"&lt;&amp;&quot;'&gt;\" />" % enc).encode(enc))
3683
3684        elem = ET.Element("tag")
3685        elem.text = '\xe5\xf6\xf6<>'
3686        self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6&lt;&gt;</tag>')
3687        self.assertEqual(serialize(elem, encoding="utf-8"),
3688                b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
3689        self.assertEqual(serialize(elem, encoding="us-ascii"),
3690                b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
3691        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3692            self.assertEqual(serialize(elem, encoding=enc),
3693                    ("<?xml version='1.0' encoding='%s'?>\n"
3694                     "<tag>åöö&lt;&gt;</tag>" % enc).encode(enc))
3695
3696        elem = ET.Element("tag")
3697        elem.attrib["key"] = '\xe5\xf6\xf6<>'
3698        self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6&lt;&gt;" />')
3699        self.assertEqual(serialize(elem, encoding="utf-8"),
3700                b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
3701        self.assertEqual(serialize(elem, encoding="us-ascii"),
3702                b'<tag key="&#229;&#246;&#246;&lt;&gt;" />')
3703        for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"):
3704            self.assertEqual(serialize(elem, encoding=enc),
3705                    ("<?xml version='1.0' encoding='%s'?>\n"
3706                     "<tag key=\"åöö&lt;&gt;\" />" % enc).encode(enc))
3707
3708    def test_write_to_filename(self):
3709        self.addCleanup(os_helper.unlink, TESTFN)
3710        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3711        tree.write(TESTFN)
3712        with open(TESTFN, 'rb') as f:
3713            self.assertEqual(f.read(), b'''<site>&#248;</site>''')
3714
3715    def test_write_to_filename_with_encoding(self):
3716        self.addCleanup(os_helper.unlink, TESTFN)
3717        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3718        tree.write(TESTFN, encoding='utf-8')
3719        with open(TESTFN, 'rb') as f:
3720            self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
3721
3722        tree.write(TESTFN, encoding='ISO-8859-1')
3723        with open(TESTFN, 'rb') as f:
3724            self.assertEqual(f.read(), convlinesep(
3725                             b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
3726                             b'''<site>\xf8</site>'''))
3727
3728    def test_write_to_filename_as_unicode(self):
3729        self.addCleanup(os_helper.unlink, TESTFN)
3730        with open(TESTFN, 'w') as f:
3731            encoding = f.encoding
3732        os_helper.unlink(TESTFN)
3733
3734        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3735        tree.write(TESTFN, encoding='unicode')
3736        with open(TESTFN, 'rb') as f:
3737            self.assertEqual(f.read(), b"<site>\xc3\xb8</site>")
3738
3739    def test_write_to_text_file(self):
3740        self.addCleanup(os_helper.unlink, TESTFN)
3741        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3742        with open(TESTFN, 'w', encoding='utf-8') as f:
3743            tree.write(f, encoding='unicode')
3744            self.assertFalse(f.closed)
3745        with open(TESTFN, 'rb') as f:
3746            self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
3747
3748        with open(TESTFN, 'w', encoding='ascii', errors='xmlcharrefreplace') as f:
3749            tree.write(f, encoding='unicode')
3750            self.assertFalse(f.closed)
3751        with open(TESTFN, 'rb') as f:
3752            self.assertEqual(f.read(),  b'''<site>&#248;</site>''')
3753
3754        with open(TESTFN, 'w', encoding='ISO-8859-1') as f:
3755            tree.write(f, encoding='unicode')
3756            self.assertFalse(f.closed)
3757        with open(TESTFN, 'rb') as f:
3758            self.assertEqual(f.read(), b'''<site>\xf8</site>''')
3759
3760    def test_write_to_binary_file(self):
3761        self.addCleanup(os_helper.unlink, TESTFN)
3762        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3763        with open(TESTFN, 'wb') as f:
3764            tree.write(f)
3765            self.assertFalse(f.closed)
3766        with open(TESTFN, 'rb') as f:
3767            self.assertEqual(f.read(), b'''<site>&#248;</site>''')
3768
3769    def test_write_to_binary_file_with_encoding(self):
3770        self.addCleanup(os_helper.unlink, TESTFN)
3771        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3772        with open(TESTFN, 'wb') as f:
3773            tree.write(f, encoding='utf-8')
3774            self.assertFalse(f.closed)
3775        with open(TESTFN, 'rb') as f:
3776            self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
3777
3778        with open(TESTFN, 'wb') as f:
3779            tree.write(f, encoding='ISO-8859-1')
3780            self.assertFalse(f.closed)
3781        with open(TESTFN, 'rb') as f:
3782            self.assertEqual(f.read(),
3783                             b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
3784                             b'''<site>\xf8</site>''')
3785
3786    def test_write_to_binary_file_with_bom(self):
3787        self.addCleanup(os_helper.unlink, TESTFN)
3788        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3789        # test BOM writing to buffered file
3790        with open(TESTFN, 'wb') as f:
3791            tree.write(f, encoding='utf-16')
3792            self.assertFalse(f.closed)
3793        with open(TESTFN, 'rb') as f:
3794            self.assertEqual(f.read(),
3795                    '''<?xml version='1.0' encoding='utf-16'?>\n'''
3796                    '''<site>\xf8</site>'''.encode("utf-16"))
3797        # test BOM writing to non-buffered file
3798        with open(TESTFN, 'wb', buffering=0) as f:
3799            tree.write(f, encoding='utf-16')
3800            self.assertFalse(f.closed)
3801        with open(TESTFN, 'rb') as f:
3802            self.assertEqual(f.read(),
3803                    '''<?xml version='1.0' encoding='utf-16'?>\n'''
3804                    '''<site>\xf8</site>'''.encode("utf-16"))
3805
3806    def test_read_from_stringio(self):
3807        tree = ET.ElementTree()
3808        stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
3809        tree.parse(stream)
3810        self.assertEqual(tree.getroot().tag, 'site')
3811
3812    def test_write_to_stringio(self):
3813        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3814        stream = io.StringIO()
3815        tree.write(stream, encoding='unicode')
3816        self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''')
3817
3818    def test_read_from_bytesio(self):
3819        tree = ET.ElementTree()
3820        raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3821        tree.parse(raw)
3822        self.assertEqual(tree.getroot().tag, 'site')
3823
3824    def test_write_to_bytesio(self):
3825        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3826        raw = io.BytesIO()
3827        tree.write(raw)
3828        self.assertEqual(raw.getvalue(), b'''<site>&#248;</site>''')
3829
3830    class dummy:
3831        pass
3832
3833    def test_read_from_user_text_reader(self):
3834        stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
3835        reader = self.dummy()
3836        reader.read = stream.read
3837        tree = ET.ElementTree()
3838        tree.parse(reader)
3839        self.assertEqual(tree.getroot().tag, 'site')
3840
3841    def test_write_to_user_text_writer(self):
3842        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3843        stream = io.StringIO()
3844        writer = self.dummy()
3845        writer.write = stream.write
3846        tree.write(writer, encoding='unicode')
3847        self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''')
3848
3849    def test_read_from_user_binary_reader(self):
3850        raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3851        reader = self.dummy()
3852        reader.read = raw.read
3853        tree = ET.ElementTree()
3854        tree.parse(reader)
3855        self.assertEqual(tree.getroot().tag, 'site')
3856        tree = ET.ElementTree()
3857
3858    def test_write_to_user_binary_writer(self):
3859        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3860        raw = io.BytesIO()
3861        writer = self.dummy()
3862        writer.write = raw.write
3863        tree.write(writer)
3864        self.assertEqual(raw.getvalue(), b'''<site>&#248;</site>''')
3865
3866    def test_write_to_user_binary_writer_with_bom(self):
3867        tree = ET.ElementTree(ET.XML('''<site />'''))
3868        raw = io.BytesIO()
3869        writer = self.dummy()
3870        writer.write = raw.write
3871        writer.seekable = lambda: True
3872        writer.tell = raw.tell
3873        tree.write(writer, encoding="utf-16")
3874        self.assertEqual(raw.getvalue(),
3875                '''<?xml version='1.0' encoding='utf-16'?>\n'''
3876                '''<site />'''.encode("utf-16"))
3877
3878    def test_tostringlist_invariant(self):
3879        root = ET.fromstring('<tag>foo</tag>')
3880        self.assertEqual(
3881            ET.tostring(root, 'unicode'),
3882            ''.join(ET.tostringlist(root, 'unicode')))
3883        self.assertEqual(
3884            ET.tostring(root, 'utf-16'),
3885            b''.join(ET.tostringlist(root, 'utf-16')))
3886
3887    def test_short_empty_elements(self):
3888        root = ET.fromstring('<tag>a<x />b<y></y>c</tag>')
3889        self.assertEqual(
3890            ET.tostring(root, 'unicode'),
3891            '<tag>a<x />b<y />c</tag>')
3892        self.assertEqual(
3893            ET.tostring(root, 'unicode', short_empty_elements=True),
3894            '<tag>a<x />b<y />c</tag>')
3895        self.assertEqual(
3896            ET.tostring(root, 'unicode', short_empty_elements=False),
3897            '<tag>a<x></x>b<y></y>c</tag>')
3898
3899
3900class ParseErrorTest(unittest.TestCase):
3901    def test_subclass(self):
3902        self.assertIsInstance(ET.ParseError(), SyntaxError)
3903
3904    def _get_error(self, s):
3905        try:
3906            ET.fromstring(s)
3907        except ET.ParseError as e:
3908            return e
3909
3910    def test_error_position(self):
3911        self.assertEqual(self._get_error('foo').position, (1, 0))
3912        self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
3913        self.assertEqual(self._get_error('foobar<').position, (1, 6))
3914
3915    def test_error_code(self):
3916        import xml.parsers.expat.errors as ERRORS
3917        self.assertEqual(self._get_error('foo').code,
3918                ERRORS.codes[ERRORS.XML_ERROR_SYNTAX])
3919
3920
3921class KeywordArgsTest(unittest.TestCase):
3922    # Test various issues with keyword arguments passed to ET.Element
3923    # constructor and methods
3924    def test_issue14818(self):
3925        x = ET.XML("<a>foo</a>")
3926        self.assertEqual(x.find('a', None),
3927                         x.find(path='a', namespaces=None))
3928        self.assertEqual(x.findtext('a', None, None),
3929                         x.findtext(path='a', default=None, namespaces=None))
3930        self.assertEqual(x.findall('a', None),
3931                         x.findall(path='a', namespaces=None))
3932        self.assertEqual(list(x.iterfind('a', None)),
3933                         list(x.iterfind(path='a', namespaces=None)))
3934
3935        self.assertEqual(ET.Element('a').attrib, {})
3936        elements = [
3937            ET.Element('a', dict(href="#", id="foo")),
3938            ET.Element('a', attrib=dict(href="#", id="foo")),
3939            ET.Element('a', dict(href="#"), id="foo"),
3940            ET.Element('a', href="#", id="foo"),
3941            ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
3942        ]
3943        for e in elements:
3944            self.assertEqual(e.tag, 'a')
3945            self.assertEqual(e.attrib, dict(href="#", id="foo"))
3946
3947        e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
3948        self.assertEqual(e2.attrib['key1'], 'value1')
3949
3950        with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
3951            ET.Element('a', "I'm not a dict")
3952        with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
3953            ET.Element('a', attrib="I'm not a dict")
3954
3955# --------------------------------------------------------------------
3956
3957class NoAcceleratorTest(unittest.TestCase):
3958    def setUp(self):
3959        if not pyET:
3960            raise unittest.SkipTest('only for the Python version')
3961
3962    # Test that the C accelerator was not imported for pyET
3963    def test_correct_import_pyET(self):
3964        # The type of methods defined in Python code is types.FunctionType,
3965        # while the type of methods defined inside _elementtree is
3966        # <class 'wrapper_descriptor'>
3967        self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
3968        self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
3969
3970
3971# --------------------------------------------------------------------
3972
3973def c14n_roundtrip(xml, **options):
3974    return pyET.canonicalize(xml, **options)
3975
3976
3977class C14NTest(unittest.TestCase):
3978    maxDiff = None
3979
3980    #
3981    # simple roundtrip tests (from c14n.py)
3982
3983    def test_simple_roundtrip(self):
3984        # Basics
3985        self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>')
3986        self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME
3987                '<doc xmlns="uri"></doc>')
3988        self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),
3989            '<prefix:doc xmlns:prefix="uri"></prefix:doc>')
3990        self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"),
3991            '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>')
3992        self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"),
3993            '<elem></elem>')
3994
3995        # C14N spec
3996        self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"),
3997            '<doc>Hello, world!</doc>')
3998        self.assertEqual(c14n_roundtrip("<value>&#x32;</value>"),
3999            '<value>2</value>')
4000        self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'),
4001            '<compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>')
4002        self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>'''),
4003            '<compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>')
4004        self.assertEqual(c14n_roundtrip("<norm attr=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>"),
4005            '<norm attr=" \'    &#xD;&#xA;&#x9;   \' "></norm>')
4006        self.assertEqual(c14n_roundtrip("<normNames attr='   A   &#x20;&#13;&#xa;&#9;   B   '/>"),
4007            '<normNames attr="   A    &#xD;&#xA;&#x9;   B   "></normNames>')
4008        self.assertEqual(c14n_roundtrip("<normId id=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>"),
4009            '<normId id=" \'    &#xD;&#xA;&#x9;   \' "></normId>')
4010
4011        # fragments from PJ's tests
4012        #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"),
4013        #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>')
4014
4015        # Namespace issues
4016        xml = '<X xmlns="http://nps/a"><Y targets="abc,xyz"></Y></X>'
4017        self.assertEqual(c14n_roundtrip(xml), xml)
4018        xml = '<X xmlns="http://nps/a"><Y xmlns="http://nsp/b" targets="abc,xyz"></Y></X>'
4019        self.assertEqual(c14n_roundtrip(xml), xml)
4020        xml = '<X xmlns="http://nps/a"><Y xmlns:b="http://nsp/b" b:targets="abc,xyz"></Y></X>'
4021        self.assertEqual(c14n_roundtrip(xml), xml)
4022
4023    def test_c14n_exclusion(self):
4024        xml = textwrap.dedent("""\
4025        <root xmlns:x="http://example.com/x">
4026            <a x:attr="attrx">
4027                <b>abtext</b>
4028            </a>
4029            <b>btext</b>
4030            <c>
4031                <x:d>dtext</x:d>
4032            </c>
4033        </root>
4034        """)
4035        self.assertEqual(
4036            c14n_roundtrip(xml, strip_text=True),
4037            '<root>'
4038            '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
4039            '<b>btext</b>'
4040            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
4041            '</root>')
4042        self.assertEqual(
4043            c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']),
4044            '<root>'
4045            '<a><b>abtext</b></a>'
4046            '<b>btext</b>'
4047            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
4048            '</root>')
4049        self.assertEqual(
4050            c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']),
4051            '<root>'
4052            '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
4053            '<b>btext</b>'
4054            '<c></c>'
4055            '</root>')
4056        self.assertEqual(
4057            c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'],
4058                           exclude_tags=['{http://example.com/x}d']),
4059            '<root>'
4060            '<a><b>abtext</b></a>'
4061            '<b>btext</b>'
4062            '<c></c>'
4063            '</root>')
4064        self.assertEqual(
4065            c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']),
4066            '<root>'
4067            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
4068            '</root>')
4069        self.assertEqual(
4070            c14n_roundtrip(xml, exclude_tags=['a', 'b']),
4071            '<root>\n'
4072            '    \n'
4073            '    \n'
4074            '    <c>\n'
4075            '        <x:d xmlns:x="http://example.com/x">dtext</x:d>\n'
4076            '    </c>\n'
4077            '</root>')
4078        self.assertEqual(
4079            c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']),
4080            '<root>'
4081            '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>'
4082            '<c></c>'
4083            '</root>')
4084        self.assertEqual(
4085            c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']),
4086            '<root>\n'
4087            '    <a xmlns:x="http://example.com/x" x:attr="attrx">\n'
4088            '        \n'
4089            '    </a>\n'
4090            '    \n'
4091            '    <c>\n'
4092            '        \n'
4093            '    </c>\n'
4094            '</root>')
4095
4096    #
4097    # basic method=c14n tests from the c14n 2.0 specification.  uses
4098    # test files under xmltestdata/c14n-20.
4099
4100    # note that this uses generated C14N versions of the standard ET.write
4101    # output, not roundtripped C14N (see above).
4102
4103    def test_xml_c14n2(self):
4104        datadir = findfile("c14n-20", subdir="xmltestdata")
4105        full_path = partial(os.path.join, datadir)
4106
4107        files = [filename[:-4] for filename in sorted(os.listdir(datadir))
4108                 if filename.endswith('.xml')]
4109        input_files = [
4110            filename for filename in files
4111            if filename.startswith('in')
4112        ]
4113        configs = {
4114            filename: {
4115                # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
4116                option.tag.split('}')[-1]: ((option.text or '').strip(), option)
4117                for option in ET.parse(full_path(filename) + ".xml").getroot()
4118            }
4119            for filename in files
4120            if filename.startswith('c14n')
4121        }
4122
4123        tests = {
4124            input_file: [
4125                (filename, configs[filename.rsplit('_', 1)[-1]])
4126                for filename in files
4127                if filename.startswith(f'out_{input_file}_')
4128                and filename.rsplit('_', 1)[-1] in configs
4129            ]
4130            for input_file in input_files
4131        }
4132
4133        # Make sure we found all test cases.
4134        self.assertEqual(30, len([
4135            output_file for output_files in tests.values()
4136            for output_file in output_files]))
4137
4138        def get_option(config, option_name, default=None):
4139            return config.get(option_name, (default, ()))[0]
4140
4141        for input_file, output_files in tests.items():
4142            for output_file, config in output_files:
4143                keep_comments = get_option(
4144                    config, 'IgnoreComments') == 'true'  # no, it's right :)
4145                strip_text = get_option(
4146                    config, 'TrimTextNodes') == 'true'
4147                rewrite_prefixes = get_option(
4148                    config, 'PrefixRewrite') == 'sequential'
4149                if 'QNameAware' in config:
4150                    qattrs = [
4151                        f"{{{el.get('NS')}}}{el.get('Name')}"
4152                        for el in config['QNameAware'][1].findall(
4153                            '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')
4154                    ]
4155                    qtags = [
4156                        f"{{{el.get('NS')}}}{el.get('Name')}"
4157                        for el in config['QNameAware'][1].findall(
4158                            '{http://www.w3.org/2010/xml-c14n2}Element')
4159                    ]
4160                else:
4161                    qtags = qattrs = None
4162
4163                # Build subtest description from config.
4164                config_descr = ','.join(
4165                    f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}"
4166                    for name, (value, children) in sorted(config.items())
4167                )
4168
4169                with self.subTest(f"{output_file}({config_descr})"):
4170                    if input_file == 'inNsRedecl' and not rewrite_prefixes:
4171                        self.skipTest(
4172                            f"Redeclared namespace handling is not supported in {output_file}")
4173                    if input_file == 'inNsSuperfluous' and not rewrite_prefixes:
4174                        self.skipTest(
4175                            f"Redeclared namespace handling is not supported in {output_file}")
4176                    if 'QNameAware' in config and config['QNameAware'][1].find(
4177                            '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:
4178                        self.skipTest(
4179                            f"QName rewriting in XPath text is not supported in {output_file}")
4180
4181                    f = full_path(input_file + ".xml")
4182                    if input_file == 'inC14N5':
4183                        # Hack: avoid setting up external entity resolution in the parser.
4184                        with open(full_path('world.txt'), 'rb') as entity_file:
4185                            with open(f, 'rb') as f:
4186                                f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read()))
4187
4188                    text = ET.canonicalize(
4189                        from_file=f,
4190                        with_comments=keep_comments,
4191                        strip_text=strip_text,
4192                        rewrite_prefixes=rewrite_prefixes,
4193                        qname_aware_tags=qtags, qname_aware_attrs=qattrs)
4194
4195                    with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:
4196                        expected = f.read()
4197                        if input_file == 'inC14N3':
4198                            # FIXME: cET resolves default attributes but ET does not!
4199                            expected = expected.replace(' attr="default"', '')
4200                            text = text.replace(' attr="default"', '')
4201                    self.assertEqual(expected, text)
4202
4203# --------------------------------------------------------------------
4204
4205
4206def test_main(module=None):
4207    # When invoked without a module, runs the Python ET tests by loading pyET.
4208    # Otherwise, uses the given module as the ET.
4209    global pyET
4210    pyET = import_fresh_module('xml.etree.ElementTree',
4211                               blocked=['_elementtree'])
4212    if module is None:
4213        module = pyET
4214
4215    global ET
4216    ET = module
4217
4218    test_classes = [
4219        ModuleTest,
4220        ElementSlicingTest,
4221        BasicElementTest,
4222        BadElementTest,
4223        BadElementPathTest,
4224        ElementTreeTest,
4225        IOTest,
4226        ParseErrorTest,
4227        XIncludeTest,
4228        ElementTreeTypeTest,
4229        ElementFindTest,
4230        ElementIterTest,
4231        TreeBuilderTest,
4232        XMLParserTest,
4233        XMLPullParserTest,
4234        BugsTest,
4235        KeywordArgsTest,
4236        C14NTest,
4237        ]
4238
4239    # These tests will only run for the pure-Python version that doesn't import
4240    # _elementtree. We can't use skipUnless here, because pyET is filled in only
4241    # after the module is loaded.
4242    if pyET is not ET:
4243        test_classes.extend([
4244            NoAcceleratorTest,
4245            ])
4246
4247    # Provide default namespace mapping and path cache.
4248    from xml.etree import ElementPath
4249    nsmap = ET.register_namespace._namespace_map
4250    # Copy the default namespace mapping
4251    nsmap_copy = nsmap.copy()
4252    # Copy the path cache (should be empty)
4253    path_cache = ElementPath._cache
4254    ElementPath._cache = path_cache.copy()
4255    # Align the Comment/PI factories.
4256    if hasattr(ET, '_set_factories'):
4257        old_factories = ET._set_factories(ET.Comment, ET.PI)
4258    else:
4259        old_factories = None
4260
4261    try:
4262        support.run_unittest(*test_classes)
4263    finally:
4264        from xml.etree import ElementPath
4265        # Restore mapping and path cache
4266        nsmap.clear()
4267        nsmap.update(nsmap_copy)
4268        ElementPath._cache = path_cache
4269        if old_factories is not None:
4270            ET._set_factories(*old_factories)
4271        # don't interfere with subsequent tests
4272        ET = pyET = None
4273
4274
4275if __name__ == '__main__':
4276    test_main()
4277