1# IMPORTANT: the same tests are run from "test_xml_etree_c" in order 2# to ensure consistency between the C implementation and the Python 3# implementation. 4# 5# For this purpose, the module-level "ET" symbol is temporarily 6# monkey-patched when running the "test_xml_etree_c" test suite. 7 8import copy 9import functools 10import html 11import io 12import itertools 13import operator 14import os 15import pickle 16import sys 17import textwrap 18import types 19import unittest 20import warnings 21import weakref 22 23from functools import partial 24from itertools import product, islice 25from test import support 26from test.support import os_helper 27from test.support import warnings_helper 28from test.support import findfile, gc_collect, swap_attr, swap_item 29from test.support.import_helper import import_fresh_module 30from test.support.os_helper import TESTFN 31 32 33# pyET is the pure-Python implementation. 34# 35# ET is pyET in test_xml_etree and is the C accelerated version in 36# test_xml_etree_c. 37pyET = None 38ET = None 39 40SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") 41try: 42 SIMPLE_XMLFILE.encode("utf-8") 43except UnicodeEncodeError: 44 raise unittest.SkipTest("filename is not encodable to utf8") 45SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") 46UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata") 47 48SAMPLE_XML = """\ 49<body> 50 <tag class='a'>text</tag> 51 <tag class='b' /> 52 <section> 53 <tag class='b' id='inner'>subtext</tag> 54 </section> 55</body> 56""" 57 58SAMPLE_SECTION = """\ 59<section> 60 <tag class='b' id='inner'>subtext</tag> 61 <nexttag /> 62 <nextsection> 63 <tag /> 64 </nextsection> 65</section> 66""" 67 68SAMPLE_XML_NS = """ 69<body xmlns="http://effbot.org/ns"> 70 <tag>text</tag> 71 <tag /> 72 <section> 73 <tag>subtext</tag> 74 </section> 75</body> 76""" 77 78SAMPLE_XML_NS_ELEMS = """ 79<root> 80<h:table xmlns:h="hello"> 81 <h:tr> 82 <h:td>Apples</h:td> 83 <h:td>Bananas</h:td> 84 </h:tr> 85</h:table> 86 87<f:table xmlns:f="foo"> 88 <f:name>African Coffee Table</f:name> 89 <f:width>80</f:width> 90 <f:length>120</f:length> 91</f:table> 92</root> 93""" 94 95ENTITY_XML = """\ 96<!DOCTYPE points [ 97<!ENTITY % user-entities SYSTEM 'user-entities.xml'> 98%user-entities; 99]> 100<document>&entity;</document> 101""" 102 103EXTERNAL_ENTITY_XML = """\ 104<!DOCTYPE points [ 105<!ENTITY entity SYSTEM "file:///non-existing-file.xml"> 106]> 107<document>&entity;</document> 108""" 109 110ATTLIST_XML = """\ 111<?xml version="1.0" encoding="UTF-8"?> 112<!DOCTYPE Foo [ 113<!ELEMENT foo (bar*)> 114<!ELEMENT bar (#PCDATA)*> 115<!ATTLIST bar xml:lang CDATA "eng"> 116<!ENTITY qux "quux"> 117]> 118<foo> 119<bar>&qux;</bar> 120</foo> 121""" 122 123def checkwarnings(*filters, quiet=False): 124 def decorator(test): 125 def newtest(*args, **kwargs): 126 with warnings_helper.check_warnings(*filters, quiet=quiet): 127 test(*args, **kwargs) 128 functools.update_wrapper(newtest, test) 129 return newtest 130 return decorator 131 132def convlinesep(data): 133 return data.replace(b'\n', os.linesep.encode()) 134 135 136class ModuleTest(unittest.TestCase): 137 def test_sanity(self): 138 # Import sanity. 139 140 from xml.etree import ElementTree 141 from xml.etree import ElementInclude 142 from xml.etree import ElementPath 143 144 def test_all(self): 145 names = ("xml.etree.ElementTree", "_elementtree") 146 support.check__all__(self, ET, names, not_exported=("HTML_EMPTY",)) 147 148 149def serialize(elem, to_string=True, encoding='unicode', **options): 150 if encoding != 'unicode': 151 file = io.BytesIO() 152 else: 153 file = io.StringIO() 154 tree = ET.ElementTree(elem) 155 tree.write(file, encoding=encoding, **options) 156 if to_string: 157 return file.getvalue() 158 else: 159 file.seek(0) 160 return file 161 162def summarize_list(seq): 163 return [elem.tag for elem in seq] 164 165 166class ElementTestCase: 167 @classmethod 168 def setUpClass(cls): 169 cls.modules = {pyET, ET} 170 171 def pickleRoundTrip(self, obj, name, dumper, loader, proto): 172 try: 173 with swap_item(sys.modules, name, dumper): 174 temp = pickle.dumps(obj, proto) 175 with swap_item(sys.modules, name, loader): 176 result = pickle.loads(temp) 177 except pickle.PicklingError as pe: 178 # pyET must be second, because pyET may be (equal to) ET. 179 human = dict([(ET, "cET"), (pyET, "pyET")]) 180 raise support.TestFailed("Failed to round-trip %r from %r to %r" 181 % (obj, 182 human.get(dumper, dumper), 183 human.get(loader, loader))) from pe 184 return result 185 186 def assertEqualElements(self, alice, bob): 187 self.assertIsInstance(alice, (ET.Element, pyET.Element)) 188 self.assertIsInstance(bob, (ET.Element, pyET.Element)) 189 self.assertEqual(len(list(alice)), len(list(bob))) 190 for x, y in zip(alice, bob): 191 self.assertEqualElements(x, y) 192 properties = operator.attrgetter('tag', 'tail', 'text', 'attrib') 193 self.assertEqual(properties(alice), properties(bob)) 194 195# -------------------------------------------------------------------- 196# element tree tests 197 198class ElementTreeTest(unittest.TestCase): 199 200 def serialize_check(self, elem, expected): 201 self.assertEqual(serialize(elem), expected) 202 203 def test_interface(self): 204 # Test element tree interface. 205 206 def check_element(element): 207 self.assertTrue(ET.iselement(element), msg="not an element") 208 direlem = dir(element) 209 for attr in 'tag', 'attrib', 'text', 'tail': 210 self.assertTrue(hasattr(element, attr), 211 msg='no %s member' % attr) 212 self.assertIn(attr, direlem, 213 msg='no %s visible by dir' % attr) 214 215 self.assertIsInstance(element.tag, str) 216 self.assertIsInstance(element.attrib, dict) 217 if element.text is not None: 218 self.assertIsInstance(element.text, str) 219 if element.tail is not None: 220 self.assertIsInstance(element.tail, str) 221 for elem in element: 222 check_element(elem) 223 224 element = ET.Element("tag") 225 check_element(element) 226 tree = ET.ElementTree(element) 227 check_element(tree.getroot()) 228 element = ET.Element("t\xe4g", key="value") 229 tree = ET.ElementTree(element) 230 self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$") 231 element = ET.Element("tag", key="value") 232 233 # Make sure all standard element methods exist. 234 235 def check_method(method): 236 self.assertTrue(hasattr(method, '__call__'), 237 msg="%s not callable" % method) 238 239 check_method(element.append) 240 check_method(element.extend) 241 check_method(element.insert) 242 check_method(element.remove) 243 check_method(element.find) 244 check_method(element.iterfind) 245 check_method(element.findall) 246 check_method(element.findtext) 247 check_method(element.clear) 248 check_method(element.get) 249 check_method(element.set) 250 check_method(element.keys) 251 check_method(element.items) 252 check_method(element.iter) 253 check_method(element.itertext) 254 255 # These methods return an iterable. See bug 6472. 256 257 def check_iter(it): 258 check_method(it.__next__) 259 260 check_iter(element.iterfind("tag")) 261 check_iter(element.iterfind("*")) 262 check_iter(tree.iterfind("tag")) 263 check_iter(tree.iterfind("*")) 264 265 # These aliases are provided: 266 267 self.assertEqual(ET.XML, ET.fromstring) 268 self.assertEqual(ET.PI, ET.ProcessingInstruction) 269 270 def test_set_attribute(self): 271 element = ET.Element('tag') 272 273 self.assertEqual(element.tag, 'tag') 274 element.tag = 'Tag' 275 self.assertEqual(element.tag, 'Tag') 276 element.tag = 'TAG' 277 self.assertEqual(element.tag, 'TAG') 278 279 self.assertIsNone(element.text) 280 element.text = 'Text' 281 self.assertEqual(element.text, 'Text') 282 element.text = 'TEXT' 283 self.assertEqual(element.text, 'TEXT') 284 285 self.assertIsNone(element.tail) 286 element.tail = 'Tail' 287 self.assertEqual(element.tail, 'Tail') 288 element.tail = 'TAIL' 289 self.assertEqual(element.tail, 'TAIL') 290 291 self.assertEqual(element.attrib, {}) 292 element.attrib = {'a': 'b', 'c': 'd'} 293 self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'}) 294 element.attrib = {'A': 'B', 'C': 'D'} 295 self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) 296 297 def test_simpleops(self): 298 # Basic method sanity checks. 299 300 elem = ET.XML("<body><tag/></body>") 301 self.serialize_check(elem, '<body><tag /></body>') 302 e = ET.Element("tag2") 303 elem.append(e) 304 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 305 elem.remove(e) 306 self.serialize_check(elem, '<body><tag /></body>') 307 elem.insert(0, e) 308 self.serialize_check(elem, '<body><tag2 /><tag /></body>') 309 elem.remove(e) 310 elem.extend([e]) 311 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 312 elem.remove(e) 313 elem.extend(iter([e])) 314 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 315 elem.remove(e) 316 317 element = ET.Element("tag", key="value") 318 self.serialize_check(element, '<tag key="value" />') # 1 319 subelement = ET.Element("subtag") 320 element.append(subelement) 321 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2 322 element.insert(0, subelement) 323 self.serialize_check(element, 324 '<tag key="value"><subtag /><subtag /></tag>') # 3 325 element.remove(subelement) 326 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4 327 element.remove(subelement) 328 self.serialize_check(element, '<tag key="value" />') # 5 329 with self.assertRaises(ValueError) as cm: 330 element.remove(subelement) 331 self.assertEqual(str(cm.exception), 'list.remove(x): x not in list') 332 self.serialize_check(element, '<tag key="value" />') # 6 333 element[0:0] = [subelement, subelement, subelement] 334 self.serialize_check(element[1], '<subtag />') 335 self.assertEqual(element[1:9], [element[1], element[2]]) 336 self.assertEqual(element[:9:2], [element[0], element[2]]) 337 del element[1:2] 338 self.serialize_check(element, 339 '<tag key="value"><subtag /><subtag /></tag>') 340 341 def test_cdata(self): 342 # Test CDATA handling (etc). 343 344 self.serialize_check(ET.XML("<tag>hello</tag>"), 345 '<tag>hello</tag>') 346 self.serialize_check(ET.XML("<tag>hello</tag>"), 347 '<tag>hello</tag>') 348 self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"), 349 '<tag>hello</tag>') 350 351 def test_file_init(self): 352 stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8")) 353 tree = ET.ElementTree(file=stringfile) 354 self.assertEqual(tree.find("tag").tag, 'tag') 355 self.assertEqual(tree.find("section/tag").tag, 'tag') 356 357 tree = ET.ElementTree(file=SIMPLE_XMLFILE) 358 self.assertEqual(tree.find("element").tag, 'element') 359 self.assertEqual(tree.find("element/../empty-element").tag, 360 'empty-element') 361 362 def test_path_cache(self): 363 # Check that the path cache behaves sanely. 364 365 from xml.etree import ElementPath 366 367 elem = ET.XML(SAMPLE_XML) 368 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 369 cache_len_10 = len(ElementPath._cache) 370 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 371 self.assertEqual(len(ElementPath._cache), cache_len_10) 372 for i in range(20): ET.ElementTree(elem).find('./'+str(i)) 373 self.assertGreater(len(ElementPath._cache), cache_len_10) 374 for i in range(600): ET.ElementTree(elem).find('./'+str(i)) 375 self.assertLess(len(ElementPath._cache), 500) 376 377 def test_copy(self): 378 # Test copy handling (etc). 379 380 import copy 381 e1 = ET.XML("<tag>hello<foo/></tag>") 382 e2 = copy.copy(e1) 383 e3 = copy.deepcopy(e1) 384 e1.find("foo").tag = "bar" 385 self.serialize_check(e1, '<tag>hello<bar /></tag>') 386 self.serialize_check(e2, '<tag>hello<bar /></tag>') 387 self.serialize_check(e3, '<tag>hello<foo /></tag>') 388 389 def test_attrib(self): 390 # Test attribute handling. 391 392 elem = ET.Element("tag") 393 elem.get("key") # 1.1 394 self.assertEqual(elem.get("key", "default"), 'default') # 1.2 395 396 elem.set("key", "value") 397 self.assertEqual(elem.get("key"), 'value') # 1.3 398 399 elem = ET.Element("tag", key="value") 400 self.assertEqual(elem.get("key"), 'value') # 2.1 401 self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2 402 403 attrib = {"key": "value"} 404 elem = ET.Element("tag", attrib) 405 attrib.clear() # check for aliasing issues 406 self.assertEqual(elem.get("key"), 'value') # 3.1 407 self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2 408 409 attrib = {"key": "value"} 410 elem = ET.Element("tag", **attrib) 411 attrib.clear() # check for aliasing issues 412 self.assertEqual(elem.get("key"), 'value') # 4.1 413 self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2 414 415 elem = ET.Element("tag", {"key": "other"}, key="value") 416 self.assertEqual(elem.get("key"), 'value') # 5.1 417 self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2 418 419 elem = ET.Element('test') 420 elem.text = "aa" 421 elem.set('testa', 'testval') 422 elem.set('testb', 'test2') 423 self.assertEqual(ET.tostring(elem), 424 b'<test testa="testval" testb="test2">aa</test>') 425 self.assertEqual(sorted(elem.keys()), ['testa', 'testb']) 426 self.assertEqual(sorted(elem.items()), 427 [('testa', 'testval'), ('testb', 'test2')]) 428 self.assertEqual(elem.attrib['testb'], 'test2') 429 elem.attrib['testb'] = 'test1' 430 elem.attrib['testc'] = 'test2' 431 self.assertEqual(ET.tostring(elem), 432 b'<test testa="testval" testb="test1" testc="test2">aa</test>') 433 434 # Test preserving white space chars in attributes 435 elem = ET.Element('test') 436 elem.set('a', '\r') 437 elem.set('b', '\r\n') 438 elem.set('c', '\t\n\r ') 439 elem.set('d', '\n\n\r\r\t\t ') 440 self.assertEqual(ET.tostring(elem), 441 b'<test a=" " b=" " c="	 " d=" 		 " />') 442 443 def test_makeelement(self): 444 # Test makeelement handling. 445 446 elem = ET.Element("tag") 447 attrib = {"key": "value"} 448 subelem = elem.makeelement("subtag", attrib) 449 self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing") 450 elem.append(subelem) 451 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 452 453 elem.clear() 454 self.serialize_check(elem, '<tag />') 455 elem.append(subelem) 456 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 457 elem.extend([subelem, subelem]) 458 self.serialize_check(elem, 459 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>') 460 elem[:] = [subelem] 461 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 462 elem[:] = tuple([subelem]) 463 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 464 465 def test_parsefile(self): 466 # Test parsing from file. 467 468 tree = ET.parse(SIMPLE_XMLFILE) 469 stream = io.StringIO() 470 tree.write(stream, encoding='unicode') 471 self.assertEqual(stream.getvalue(), 472 '<root>\n' 473 ' <element key="value">text</element>\n' 474 ' <element>text</element>tail\n' 475 ' <empty-element />\n' 476 '</root>') 477 tree = ET.parse(SIMPLE_NS_XMLFILE) 478 stream = io.StringIO() 479 tree.write(stream, encoding='unicode') 480 self.assertEqual(stream.getvalue(), 481 '<ns0:root xmlns:ns0="namespace">\n' 482 ' <ns0:element key="value">text</ns0:element>\n' 483 ' <ns0:element>text</ns0:element>tail\n' 484 ' <ns0:empty-element />\n' 485 '</ns0:root>') 486 487 with open(SIMPLE_XMLFILE) as f: 488 data = f.read() 489 490 parser = ET.XMLParser() 491 self.assertRegex(parser.version, r'^Expat ') 492 parser.feed(data) 493 self.serialize_check(parser.close(), 494 '<root>\n' 495 ' <element key="value">text</element>\n' 496 ' <element>text</element>tail\n' 497 ' <empty-element />\n' 498 '</root>') 499 500 target = ET.TreeBuilder() 501 parser = ET.XMLParser(target=target) 502 parser.feed(data) 503 self.serialize_check(parser.close(), 504 '<root>\n' 505 ' <element key="value">text</element>\n' 506 ' <element>text</element>tail\n' 507 ' <empty-element />\n' 508 '</root>') 509 510 def test_parseliteral(self): 511 element = ET.XML("<html><body>text</body></html>") 512 self.assertEqual(ET.tostring(element, encoding='unicode'), 513 '<html><body>text</body></html>') 514 element = ET.fromstring("<html><body>text</body></html>") 515 self.assertEqual(ET.tostring(element, encoding='unicode'), 516 '<html><body>text</body></html>') 517 sequence = ["<html><body>", "text</bo", "dy></html>"] 518 element = ET.fromstringlist(sequence) 519 self.assertEqual(ET.tostring(element), 520 b'<html><body>text</body></html>') 521 self.assertEqual(b"".join(ET.tostringlist(element)), 522 b'<html><body>text</body></html>') 523 self.assertEqual(ET.tostring(element, "ascii"), 524 b"<?xml version='1.0' encoding='ascii'?>\n" 525 b"<html><body>text</body></html>") 526 _, ids = ET.XMLID("<html><body>text</body></html>") 527 self.assertEqual(len(ids), 0) 528 _, ids = ET.XMLID("<html><body id='body'>text</body></html>") 529 self.assertEqual(len(ids), 1) 530 self.assertEqual(ids["body"].tag, 'body') 531 532 def test_iterparse(self): 533 # Test iterparse interface. 534 535 iterparse = ET.iterparse 536 537 context = iterparse(SIMPLE_XMLFILE) 538 action, elem = next(context) 539 self.assertEqual((action, elem.tag), ('end', 'element')) 540 self.assertEqual([(action, elem.tag) for action, elem in context], [ 541 ('end', 'element'), 542 ('end', 'empty-element'), 543 ('end', 'root'), 544 ]) 545 self.assertEqual(context.root.tag, 'root') 546 547 context = iterparse(SIMPLE_NS_XMLFILE) 548 self.assertEqual([(action, elem.tag) for action, elem in context], [ 549 ('end', '{namespace}element'), 550 ('end', '{namespace}element'), 551 ('end', '{namespace}empty-element'), 552 ('end', '{namespace}root'), 553 ]) 554 555 events = () 556 context = iterparse(SIMPLE_XMLFILE, events) 557 self.assertEqual([(action, elem.tag) for action, elem in context], []) 558 559 events = () 560 context = iterparse(SIMPLE_XMLFILE, events=events) 561 self.assertEqual([(action, elem.tag) for action, elem in context], []) 562 563 events = ("start", "end") 564 context = iterparse(SIMPLE_XMLFILE, events) 565 self.assertEqual([(action, elem.tag) for action, elem in context], [ 566 ('start', 'root'), 567 ('start', 'element'), 568 ('end', 'element'), 569 ('start', 'element'), 570 ('end', 'element'), 571 ('start', 'empty-element'), 572 ('end', 'empty-element'), 573 ('end', 'root'), 574 ]) 575 576 events = ("start", "end", "start-ns", "end-ns") 577 context = iterparse(SIMPLE_NS_XMLFILE, events) 578 self.assertEqual([(action, elem.tag) if action in ("start", "end") 579 else (action, elem) 580 for action, elem in context], [ 581 ('start-ns', ('', 'namespace')), 582 ('start', '{namespace}root'), 583 ('start', '{namespace}element'), 584 ('end', '{namespace}element'), 585 ('start', '{namespace}element'), 586 ('end', '{namespace}element'), 587 ('start', '{namespace}empty-element'), 588 ('end', '{namespace}empty-element'), 589 ('end', '{namespace}root'), 590 ('end-ns', None), 591 ]) 592 593 events = ('start-ns', 'end-ns') 594 context = iterparse(io.StringIO(r"<root xmlns=''/>"), events) 595 res = [action for action, elem in context] 596 self.assertEqual(res, ['start-ns', 'end-ns']) 597 598 events = ("start", "end", "bogus") 599 with open(SIMPLE_XMLFILE, "rb") as f: 600 with self.assertRaises(ValueError) as cm: 601 iterparse(f, events) 602 self.assertFalse(f.closed) 603 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 604 605 with warnings_helper.check_no_resource_warning(self): 606 with self.assertRaises(ValueError) as cm: 607 iterparse(SIMPLE_XMLFILE, events) 608 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 609 del cm 610 611 source = io.BytesIO( 612 b"<?xml version='1.0' encoding='iso-8859-1'?>\n" 613 b"<body xmlns='http://éffbot.org/ns'\n" 614 b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n") 615 events = ("start-ns",) 616 context = iterparse(source, events) 617 self.assertEqual([(action, elem) for action, elem in context], [ 618 ('start-ns', ('', 'http://\xe9ffbot.org/ns')), 619 ('start-ns', ('cl\xe9', 'http://effbot.org/ns')), 620 ]) 621 622 source = io.StringIO("<document />junk") 623 it = iterparse(source) 624 action, elem = next(it) 625 self.assertEqual((action, elem.tag), ('end', 'document')) 626 with self.assertRaises(ET.ParseError) as cm: 627 next(it) 628 self.assertEqual(str(cm.exception), 629 'junk after document element: line 1, column 12') 630 631 self.addCleanup(os_helper.unlink, TESTFN) 632 with open(TESTFN, "wb") as f: 633 f.write(b"<document />junk") 634 it = iterparse(TESTFN) 635 action, elem = next(it) 636 self.assertEqual((action, elem.tag), ('end', 'document')) 637 with warnings_helper.check_no_resource_warning(self): 638 with self.assertRaises(ET.ParseError) as cm: 639 next(it) 640 self.assertEqual(str(cm.exception), 641 'junk after document element: line 1, column 12') 642 del cm, it 643 644 # Not exhausting the iterator still closes the resource (bpo-43292) 645 with warnings_helper.check_no_resource_warning(self): 646 it = iterparse(TESTFN) 647 del it 648 649 with self.assertRaises(FileNotFoundError): 650 iterparse("nonexistent") 651 652 def test_writefile(self): 653 elem = ET.Element("tag") 654 elem.text = "text" 655 self.serialize_check(elem, '<tag>text</tag>') 656 ET.SubElement(elem, "subtag").text = "subtext" 657 self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>') 658 659 # Test tag suppression 660 elem.tag = None 661 self.serialize_check(elem, 'text<subtag>subtext</subtag>') 662 elem.insert(0, ET.Comment("comment")) 663 self.serialize_check(elem, 664 'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3 665 666 elem[0] = ET.PI("key", "value") 667 self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>') 668 669 def test_custom_builder(self): 670 # Test parser w. custom builder. 671 672 with open(SIMPLE_XMLFILE) as f: 673 data = f.read() 674 class Builder(list): 675 def start(self, tag, attrib): 676 self.append(("start", tag)) 677 def end(self, tag): 678 self.append(("end", tag)) 679 def data(self, text): 680 pass 681 builder = Builder() 682 parser = ET.XMLParser(target=builder) 683 parser.feed(data) 684 self.assertEqual(builder, [ 685 ('start', 'root'), 686 ('start', 'element'), 687 ('end', 'element'), 688 ('start', 'element'), 689 ('end', 'element'), 690 ('start', 'empty-element'), 691 ('end', 'empty-element'), 692 ('end', 'root'), 693 ]) 694 695 with open(SIMPLE_NS_XMLFILE) as f: 696 data = f.read() 697 class Builder(list): 698 def start(self, tag, attrib): 699 self.append(("start", tag)) 700 def end(self, tag): 701 self.append(("end", tag)) 702 def data(self, text): 703 pass 704 def pi(self, target, data): 705 self.append(("pi", target, data)) 706 def comment(self, data): 707 self.append(("comment", data)) 708 def start_ns(self, prefix, uri): 709 self.append(("start-ns", prefix, uri)) 710 def end_ns(self, prefix): 711 self.append(("end-ns", prefix)) 712 builder = Builder() 713 parser = ET.XMLParser(target=builder) 714 parser.feed(data) 715 self.assertEqual(builder, [ 716 ('pi', 'pi', 'data'), 717 ('comment', ' comment '), 718 ('start-ns', '', 'namespace'), 719 ('start', '{namespace}root'), 720 ('start', '{namespace}element'), 721 ('end', '{namespace}element'), 722 ('start', '{namespace}element'), 723 ('end', '{namespace}element'), 724 ('start', '{namespace}empty-element'), 725 ('end', '{namespace}empty-element'), 726 ('end', '{namespace}root'), 727 ('end-ns', ''), 728 ]) 729 730 def test_custom_builder_only_end_ns(self): 731 class Builder(list): 732 def end_ns(self, prefix): 733 self.append(("end-ns", prefix)) 734 735 builder = Builder() 736 parser = ET.XMLParser(target=builder) 737 parser.feed(textwrap.dedent("""\ 738 <?pi data?> 739 <!-- comment --> 740 <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'> 741 <a:element key='value'>text</a:element> 742 <p:element>text</p:element>tail 743 <empty-element/> 744 </root> 745 """)) 746 self.assertEqual(builder, [ 747 ('end-ns', 'a'), 748 ('end-ns', 'p'), 749 ('end-ns', ''), 750 ]) 751 752 def test_initialize_parser_without_target(self): 753 # Explicit None 754 parser = ET.XMLParser(target=None) 755 self.assertIsInstance(parser.target, ET.TreeBuilder) 756 757 # Implicit None 758 parser2 = ET.XMLParser() 759 self.assertIsInstance(parser2.target, ET.TreeBuilder) 760 761 def test_children(self): 762 # Test Element children iteration 763 764 with open(SIMPLE_XMLFILE, "rb") as f: 765 tree = ET.parse(f) 766 self.assertEqual([summarize_list(elem) 767 for elem in tree.getroot().iter()], [ 768 ['element', 'element', 'empty-element'], 769 [], 770 [], 771 [], 772 ]) 773 self.assertEqual([summarize_list(elem) 774 for elem in tree.iter()], [ 775 ['element', 'element', 'empty-element'], 776 [], 777 [], 778 [], 779 ]) 780 781 elem = ET.XML(SAMPLE_XML) 782 self.assertEqual(len(list(elem)), 3) 783 self.assertEqual(len(list(elem[2])), 1) 784 self.assertEqual(elem[:], list(elem)) 785 child1 = elem[0] 786 child2 = elem[2] 787 del elem[1:2] 788 self.assertEqual(len(list(elem)), 2) 789 self.assertEqual(child1, elem[0]) 790 self.assertEqual(child2, elem[1]) 791 elem[0:2] = [child2, child1] 792 self.assertEqual(child2, elem[0]) 793 self.assertEqual(child1, elem[1]) 794 self.assertNotEqual(child1, elem[0]) 795 elem.clear() 796 self.assertEqual(list(elem), []) 797 798 def test_writestring(self): 799 elem = ET.XML("<html><body>text</body></html>") 800 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 801 elem = ET.fromstring("<html><body>text</body></html>") 802 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 803 804 def test_indent(self): 805 elem = ET.XML("<root></root>") 806 ET.indent(elem) 807 self.assertEqual(ET.tostring(elem), b'<root />') 808 809 elem = ET.XML("<html><body>text</body></html>") 810 ET.indent(elem) 811 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>') 812 813 elem = ET.XML("<html> <body>text</body> </html>") 814 ET.indent(elem) 815 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>') 816 817 elem = ET.XML("<html><body>text</body>tail</html>") 818 ET.indent(elem) 819 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>') 820 821 elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>") 822 ET.indent(elem) 823 self.assertEqual( 824 ET.tostring(elem), 825 b'<html>\n' 826 b' <body>\n' 827 b' <p>par</p>\n' 828 b' <p>text</p>\n' 829 b' <p>\n' 830 b' <br />\n' 831 b' </p>\n' 832 b' </body>\n' 833 b'</html>' 834 ) 835 836 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 837 ET.indent(elem) 838 self.assertEqual( 839 ET.tostring(elem), 840 b'<html>\n' 841 b' <body>\n' 842 b' <p>pre<br />post</p>\n' 843 b' <p>text</p>\n' 844 b' </body>\n' 845 b'</html>' 846 ) 847 848 def test_indent_space(self): 849 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 850 ET.indent(elem, space='\t') 851 self.assertEqual( 852 ET.tostring(elem), 853 b'<html>\n' 854 b'\t<body>\n' 855 b'\t\t<p>pre<br />post</p>\n' 856 b'\t\t<p>text</p>\n' 857 b'\t</body>\n' 858 b'</html>' 859 ) 860 861 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 862 ET.indent(elem, space='') 863 self.assertEqual( 864 ET.tostring(elem), 865 b'<html>\n' 866 b'<body>\n' 867 b'<p>pre<br />post</p>\n' 868 b'<p>text</p>\n' 869 b'</body>\n' 870 b'</html>' 871 ) 872 873 def test_indent_space_caching(self): 874 elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>") 875 ET.indent(elem) 876 self.assertEqual( 877 {el.tail for el in elem.iter()}, 878 {None, "\n", "\n ", "\n "} 879 ) 880 self.assertEqual( 881 {el.text for el in elem.iter()}, 882 {None, "\n ", "\n ", "\n ", "par", "text"} 883 ) 884 self.assertEqual( 885 len({el.tail for el in elem.iter()}), 886 len({id(el.tail) for el in elem.iter()}), 887 ) 888 889 def test_indent_level(self): 890 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 891 with self.assertRaises(ValueError): 892 ET.indent(elem, level=-1) 893 self.assertEqual( 894 ET.tostring(elem), 895 b"<html><body><p>pre<br />post</p><p>text</p></body></html>" 896 ) 897 898 ET.indent(elem, level=2) 899 self.assertEqual( 900 ET.tostring(elem), 901 b'<html>\n' 902 b' <body>\n' 903 b' <p>pre<br />post</p>\n' 904 b' <p>text</p>\n' 905 b' </body>\n' 906 b' </html>' 907 ) 908 909 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 910 ET.indent(elem, level=1, space=' ') 911 self.assertEqual( 912 ET.tostring(elem), 913 b'<html>\n' 914 b' <body>\n' 915 b' <p>pre<br />post</p>\n' 916 b' <p>text</p>\n' 917 b' </body>\n' 918 b' </html>' 919 ) 920 921 def test_tostring_default_namespace(self): 922 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') 923 self.assertEqual( 924 ET.tostring(elem, encoding='unicode'), 925 '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>' 926 ) 927 self.assertEqual( 928 ET.tostring(elem, encoding='unicode', default_namespace='http://effbot.org/ns'), 929 '<body xmlns="http://effbot.org/ns"><tag /></body>' 930 ) 931 932 def test_tostring_default_namespace_different_namespace(self): 933 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') 934 self.assertEqual( 935 ET.tostring(elem, encoding='unicode', default_namespace='foobar'), 936 '<ns1:body xmlns="foobar" xmlns:ns1="http://effbot.org/ns"><ns1:tag /></ns1:body>' 937 ) 938 939 def test_tostring_default_namespace_original_no_namespace(self): 940 elem = ET.XML('<body><tag/></body>') 941 EXPECTED_MSG = '^cannot use non-qualified names with default_namespace option$' 942 with self.assertRaisesRegex(ValueError, EXPECTED_MSG): 943 ET.tostring(elem, encoding='unicode', default_namespace='foobar') 944 945 def test_tostring_no_xml_declaration(self): 946 elem = ET.XML('<body><tag/></body>') 947 self.assertEqual( 948 ET.tostring(elem, encoding='unicode'), 949 '<body><tag /></body>' 950 ) 951 952 def test_tostring_xml_declaration(self): 953 elem = ET.XML('<body><tag/></body>') 954 self.assertEqual( 955 ET.tostring(elem, encoding='utf8', xml_declaration=True), 956 b"<?xml version='1.0' encoding='utf8'?>\n<body><tag /></body>" 957 ) 958 959 def test_tostring_xml_declaration_unicode_encoding(self): 960 elem = ET.XML('<body><tag/></body>') 961 self.assertEqual( 962 ET.tostring(elem, encoding='unicode', xml_declaration=True), 963 "<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>" 964 ) 965 966 def test_tostring_xml_declaration_cases(self): 967 elem = ET.XML('<body><tag>ø</tag></body>') 968 TESTCASES = [ 969 # (expected_retval, encoding, xml_declaration) 970 # ... xml_declaration = None 971 (b'<body><tag>ø</tag></body>', None, None), 972 (b'<body><tag>\xc3\xb8</tag></body>', 'UTF-8', None), 973 (b'<body><tag>ø</tag></body>', 'US-ASCII', None), 974 (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n" 975 b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', None), 976 ('<body><tag>ø</tag></body>', 'unicode', None), 977 978 # ... xml_declaration = False 979 (b"<body><tag>ø</tag></body>", None, False), 980 (b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', False), 981 (b"<body><tag>ø</tag></body>", 'US-ASCII', False), 982 (b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', False), 983 ("<body><tag>ø</tag></body>", 'unicode', False), 984 985 # ... xml_declaration = True 986 (b"<?xml version='1.0' encoding='us-ascii'?>\n" 987 b"<body><tag>ø</tag></body>", None, True), 988 (b"<?xml version='1.0' encoding='UTF-8'?>\n" 989 b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', True), 990 (b"<?xml version='1.0' encoding='US-ASCII'?>\n" 991 b"<body><tag>ø</tag></body>", 'US-ASCII', True), 992 (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n" 993 b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True), 994 ("<?xml version='1.0' encoding='utf-8'?>\n" 995 "<body><tag>ø</tag></body>", 'unicode', True), 996 997 ] 998 for expected_retval, encoding, xml_declaration in TESTCASES: 999 with self.subTest(f'encoding={encoding} ' 1000 f'xml_declaration={xml_declaration}'): 1001 self.assertEqual( 1002 ET.tostring( 1003 elem, 1004 encoding=encoding, 1005 xml_declaration=xml_declaration 1006 ), 1007 expected_retval 1008 ) 1009 1010 def test_tostringlist_default_namespace(self): 1011 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') 1012 self.assertEqual( 1013 ''.join(ET.tostringlist(elem, encoding='unicode')), 1014 '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>' 1015 ) 1016 self.assertEqual( 1017 ''.join(ET.tostringlist(elem, encoding='unicode', default_namespace='http://effbot.org/ns')), 1018 '<body xmlns="http://effbot.org/ns"><tag /></body>' 1019 ) 1020 1021 def test_tostringlist_xml_declaration(self): 1022 elem = ET.XML('<body><tag/></body>') 1023 self.assertEqual( 1024 ''.join(ET.tostringlist(elem, encoding='unicode')), 1025 '<body><tag /></body>' 1026 ) 1027 self.assertEqual( 1028 b''.join(ET.tostringlist(elem, xml_declaration=True)), 1029 b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>" 1030 ) 1031 1032 stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True) 1033 self.assertEqual( 1034 ''.join(stringlist), 1035 "<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>" 1036 ) 1037 self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>") 1038 self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:]) 1039 1040 def test_encoding(self): 1041 def check(encoding, body=''): 1042 xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" % 1043 (encoding, body)) 1044 self.assertEqual(ET.XML(xml.encode(encoding)).text, body) 1045 self.assertEqual(ET.XML(xml).text, body) 1046 check("ascii", 'a') 1047 check("us-ascii", 'a') 1048 check("iso-8859-1", '\xbd') 1049 check("iso-8859-15", '\u20ac') 1050 check("cp437", '\u221a') 1051 check("mac-roman", '\u02da') 1052 1053 def xml(encoding): 1054 return "<?xml version='1.0' encoding='%s'?><xml />" % encoding 1055 def bxml(encoding): 1056 return xml(encoding).encode(encoding) 1057 supported_encodings = [ 1058 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le', 1059 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5', 1060 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', 1061 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 1062 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 1063 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 1064 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125', 1065 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 1066 'cp1256', 'cp1257', 'cp1258', 1067 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', 1068 'mac-roman', 'mac-turkish', 1069 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 1070 'iso2022-jp-3', 'iso2022-jp-ext', 1071 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 1072 'hz', 'ptcp154', 1073 ] 1074 for encoding in supported_encodings: 1075 self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />') 1076 1077 unsupported_ascii_compatible_encodings = [ 1078 'big5', 'big5hkscs', 1079 'cp932', 'cp949', 'cp950', 1080 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr', 1081 'gb2312', 'gbk', 'gb18030', 1082 'iso2022-kr', 'johab', 1083 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 1084 'utf-7', 1085 ] 1086 for encoding in unsupported_ascii_compatible_encodings: 1087 self.assertRaises(ValueError, ET.XML, bxml(encoding)) 1088 1089 unsupported_ascii_incompatible_encodings = [ 1090 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140', 1091 'utf_32', 'utf_32_be', 'utf_32_le', 1092 ] 1093 for encoding in unsupported_ascii_incompatible_encodings: 1094 self.assertRaises(ET.ParseError, ET.XML, bxml(encoding)) 1095 1096 self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii')) 1097 self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii')) 1098 1099 def test_methods(self): 1100 # Test serialization methods. 1101 1102 e = ET.XML("<html><link/><script>1 < 2</script></html>") 1103 e.tail = "\n" 1104 self.assertEqual(serialize(e), 1105 '<html><link /><script>1 < 2</script></html>\n') 1106 self.assertEqual(serialize(e, method=None), 1107 '<html><link /><script>1 < 2</script></html>\n') 1108 self.assertEqual(serialize(e, method="xml"), 1109 '<html><link /><script>1 < 2</script></html>\n') 1110 self.assertEqual(serialize(e, method="html"), 1111 '<html><link><script>1 < 2</script></html>\n') 1112 self.assertEqual(serialize(e, method="text"), '1 < 2\n') 1113 1114 def test_issue18347(self): 1115 e = ET.XML('<html><CamelCase>text</CamelCase></html>') 1116 self.assertEqual(serialize(e), 1117 '<html><CamelCase>text</CamelCase></html>') 1118 self.assertEqual(serialize(e, method="html"), 1119 '<html><CamelCase>text</CamelCase></html>') 1120 1121 def test_entity(self): 1122 # Test entity handling. 1123 1124 # 1) good entities 1125 1126 e = ET.XML("<document title='舰'>test</document>") 1127 self.assertEqual(serialize(e, encoding="us-ascii"), 1128 b'<document title="舰">test</document>') 1129 self.serialize_check(e, '<document title="\u8230">test</document>') 1130 1131 # 2) bad entities 1132 1133 with self.assertRaises(ET.ParseError) as cm: 1134 ET.XML("<document>&entity;</document>") 1135 self.assertEqual(str(cm.exception), 1136 'undefined entity: line 1, column 10') 1137 1138 with self.assertRaises(ET.ParseError) as cm: 1139 ET.XML(ENTITY_XML) 1140 self.assertEqual(str(cm.exception), 1141 'undefined entity &entity;: line 5, column 10') 1142 1143 # 3) custom entity 1144 1145 parser = ET.XMLParser() 1146 parser.entity["entity"] = "text" 1147 parser.feed(ENTITY_XML) 1148 root = parser.close() 1149 self.serialize_check(root, '<document>text</document>') 1150 1151 # 4) external (SYSTEM) entity 1152 1153 with self.assertRaises(ET.ParseError) as cm: 1154 ET.XML(EXTERNAL_ENTITY_XML) 1155 self.assertEqual(str(cm.exception), 1156 'undefined entity &entity;: line 4, column 10') 1157 1158 def test_namespace(self): 1159 # Test namespace issues. 1160 1161 # 1) xml namespace 1162 1163 elem = ET.XML("<tag xml:lang='en' />") 1164 self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1 1165 1166 # 2) other "well-known" namespaces 1167 1168 elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />") 1169 self.serialize_check(elem, 1170 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1 1171 1172 elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />") 1173 self.serialize_check(elem, 1174 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2 1175 1176 elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />") 1177 self.serialize_check(elem, 1178 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3 1179 1180 # 3) unknown namespaces 1181 elem = ET.XML(SAMPLE_XML_NS) 1182 self.serialize_check(elem, 1183 '<ns0:body xmlns:ns0="http://effbot.org/ns">\n' 1184 ' <ns0:tag>text</ns0:tag>\n' 1185 ' <ns0:tag />\n' 1186 ' <ns0:section>\n' 1187 ' <ns0:tag>subtext</ns0:tag>\n' 1188 ' </ns0:section>\n' 1189 '</ns0:body>') 1190 1191 def test_qname(self): 1192 # Test QName handling. 1193 1194 # 1) decorated tags 1195 1196 elem = ET.Element("{uri}tag") 1197 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1 1198 elem = ET.Element(ET.QName("{uri}tag")) 1199 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2 1200 elem = ET.Element(ET.QName("uri", "tag")) 1201 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3 1202 elem = ET.Element(ET.QName("uri", "tag")) 1203 subelem = ET.SubElement(elem, ET.QName("uri", "tag1")) 1204 subelem = ET.SubElement(elem, ET.QName("uri", "tag2")) 1205 self.serialize_check(elem, 1206 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4 1207 1208 # 2) decorated attributes 1209 1210 elem.clear() 1211 elem.attrib["{uri}key"] = "value" 1212 self.serialize_check(elem, 1213 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1 1214 1215 elem.clear() 1216 elem.attrib[ET.QName("{uri}key")] = "value" 1217 self.serialize_check(elem, 1218 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2 1219 1220 # 3) decorated values are not converted by default, but the 1221 # QName wrapper can be used for values 1222 1223 elem.clear() 1224 elem.attrib["{uri}key"] = "{uri}value" 1225 self.serialize_check(elem, 1226 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1 1227 1228 elem.clear() 1229 elem.attrib["{uri}key"] = ET.QName("{uri}value") 1230 self.serialize_check(elem, 1231 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2 1232 1233 elem.clear() 1234 subelem = ET.Element("tag") 1235 subelem.attrib["{uri1}key"] = ET.QName("{uri2}value") 1236 elem.append(subelem) 1237 elem.append(subelem) 1238 self.serialize_check(elem, 1239 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">' 1240 '<tag ns1:key="ns2:value" />' 1241 '<tag ns1:key="ns2:value" />' 1242 '</ns0:tag>') # 3.3 1243 1244 # 4) Direct QName tests 1245 1246 self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag') 1247 self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag') 1248 q1 = ET.QName('ns', 'tag') 1249 q2 = ET.QName('ns', 'tag') 1250 self.assertEqual(q1, q2) 1251 q2 = ET.QName('ns', 'other-tag') 1252 self.assertNotEqual(q1, q2) 1253 self.assertNotEqual(q1, 'ns:tag') 1254 self.assertEqual(q1, '{ns}tag') 1255 1256 def test_doctype_public(self): 1257 # Test PUBLIC doctype. 1258 1259 elem = ET.XML('<!DOCTYPE html PUBLIC' 1260 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 1261 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 1262 '<html>text</html>') 1263 1264 def test_xpath_tokenizer(self): 1265 # Test the XPath tokenizer. 1266 from xml.etree import ElementPath 1267 def check(p, expected, namespaces=None): 1268 self.assertEqual([op or tag 1269 for op, tag in ElementPath.xpath_tokenizer(p, namespaces)], 1270 expected) 1271 1272 # tests from the xml specification 1273 check("*", ['*']) 1274 check("text()", ['text', '()']) 1275 check("@name", ['@', 'name']) 1276 check("@*", ['@', '*']) 1277 check("para[1]", ['para', '[', '1', ']']) 1278 check("para[last()]", ['para', '[', 'last', '()', ']']) 1279 check("*/para", ['*', '/', 'para']) 1280 check("/doc/chapter[5]/section[2]", 1281 ['/', 'doc', '/', 'chapter', '[', '5', ']', 1282 '/', 'section', '[', '2', ']']) 1283 check("chapter//para", ['chapter', '//', 'para']) 1284 check("//para", ['//', 'para']) 1285 check("//olist/item", ['//', 'olist', '/', 'item']) 1286 check(".", ['.']) 1287 check(".//para", ['.', '//', 'para']) 1288 check("..", ['..']) 1289 check("../@lang", ['..', '/', '@', 'lang']) 1290 check("chapter[title]", ['chapter', '[', 'title', ']']) 1291 check("employee[@secretary and @assistant]", ['employee', 1292 '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']) 1293 1294 # additional tests 1295 check("@{ns}attr", ['@', '{ns}attr']) 1296 check("{http://spam}egg", ['{http://spam}egg']) 1297 check("./spam.egg", ['.', '/', 'spam.egg']) 1298 check(".//{http://spam}egg", ['.', '//', '{http://spam}egg']) 1299 1300 # wildcard tags 1301 check("{ns}*", ['{ns}*']) 1302 check("{}*", ['{}*']) 1303 check("{*}tag", ['{*}tag']) 1304 check("{*}*", ['{*}*']) 1305 check(".//{*}tag", ['.', '//', '{*}tag']) 1306 1307 # namespace prefix resolution 1308 check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'], 1309 {'xsd': 'http://www.w3.org/2001/XMLSchema'}) 1310 check("type", ['{http://www.w3.org/2001/XMLSchema}type'], 1311 {'': 'http://www.w3.org/2001/XMLSchema'}) 1312 check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'], 1313 {'xsd': 'http://www.w3.org/2001/XMLSchema'}) 1314 check("@type", ['@', 'type'], 1315 {'': 'http://www.w3.org/2001/XMLSchema'}) 1316 check("@{*}type", ['@', '{*}type'], 1317 {'': 'http://www.w3.org/2001/XMLSchema'}) 1318 check("@{ns}attr", ['@', '{ns}attr'], 1319 {'': 'http://www.w3.org/2001/XMLSchema', 1320 'ns': 'http://www.w3.org/2001/XMLSchema'}) 1321 1322 def test_processinginstruction(self): 1323 # Test ProcessingInstruction directly 1324 1325 self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')), 1326 b'<?test instruction?>') 1327 self.assertEqual(ET.tostring(ET.PI('test', 'instruction')), 1328 b'<?test instruction?>') 1329 1330 # Issue #2746 1331 1332 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')), 1333 b'<?test <testing&>?>') 1334 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'), 1335 b"<?xml version='1.0' encoding='latin-1'?>\n" 1336 b"<?test <testing&>\xe3?>") 1337 1338 def test_html_empty_elems_serialization(self): 1339 # issue 15970 1340 # from http://www.w3.org/TR/html401/index/elements.html 1341 for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'EMBED', 'FRAME', 1342 'HR', 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM', 1343 'SOURCE', 'TRACK', 'WBR']: 1344 for elem in [element, element.lower()]: 1345 expected = '<%s>' % elem 1346 serialized = serialize(ET.XML('<%s />' % elem), method='html') 1347 self.assertEqual(serialized, expected) 1348 serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)), 1349 method='html') 1350 self.assertEqual(serialized, expected) 1351 1352 def test_dump_attribute_order(self): 1353 # See BPO 34160 1354 e = ET.Element('cirriculum', status='public', company='example') 1355 with support.captured_stdout() as stdout: 1356 ET.dump(e) 1357 self.assertEqual(stdout.getvalue(), 1358 '<cirriculum status="public" company="example" />\n') 1359 1360 def test_tree_write_attribute_order(self): 1361 # See BPO 34160 1362 root = ET.Element('cirriculum', status='public', company='example') 1363 self.assertEqual(serialize(root), 1364 '<cirriculum status="public" company="example" />') 1365 self.assertEqual(serialize(root, method='html'), 1366 '<cirriculum status="public" company="example"></cirriculum>') 1367 1368 def test_attlist_default(self): 1369 # Test default attribute values; See BPO 42151. 1370 root = ET.fromstring(ATTLIST_XML) 1371 self.assertEqual(root[0].attrib, 1372 {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'}) 1373 1374 1375class XMLPullParserTest(unittest.TestCase): 1376 1377 def _feed(self, parser, data, chunk_size=None): 1378 if chunk_size is None: 1379 parser.feed(data) 1380 else: 1381 for i in range(0, len(data), chunk_size): 1382 parser.feed(data[i:i+chunk_size]) 1383 1384 def assert_events(self, parser, expected, max_events=None): 1385 self.assertEqual( 1386 [(event, (elem.tag, elem.text)) 1387 for event, elem in islice(parser.read_events(), max_events)], 1388 expected) 1389 1390 def assert_event_tuples(self, parser, expected, max_events=None): 1391 self.assertEqual( 1392 list(islice(parser.read_events(), max_events)), 1393 expected) 1394 1395 def assert_event_tags(self, parser, expected, max_events=None): 1396 events = islice(parser.read_events(), max_events) 1397 self.assertEqual([(action, elem.tag) for action, elem in events], 1398 expected) 1399 1400 def test_simple_xml(self): 1401 for chunk_size in (None, 1, 5): 1402 with self.subTest(chunk_size=chunk_size): 1403 parser = ET.XMLPullParser() 1404 self.assert_event_tags(parser, []) 1405 self._feed(parser, "<!-- comment -->\n", chunk_size) 1406 self.assert_event_tags(parser, []) 1407 self._feed(parser, 1408 "<root>\n <element key='value'>text</element", 1409 chunk_size) 1410 self.assert_event_tags(parser, []) 1411 self._feed(parser, ">\n", chunk_size) 1412 self.assert_event_tags(parser, [('end', 'element')]) 1413 self._feed(parser, "<element>text</element>tail\n", chunk_size) 1414 self._feed(parser, "<empty-element/>\n", chunk_size) 1415 self.assert_event_tags(parser, [ 1416 ('end', 'element'), 1417 ('end', 'empty-element'), 1418 ]) 1419 self._feed(parser, "</root>\n", chunk_size) 1420 self.assert_event_tags(parser, [('end', 'root')]) 1421 self.assertIsNone(parser.close()) 1422 1423 def test_feed_while_iterating(self): 1424 parser = ET.XMLPullParser() 1425 it = parser.read_events() 1426 self._feed(parser, "<root>\n <element key='value'>text</element>\n") 1427 action, elem = next(it) 1428 self.assertEqual((action, elem.tag), ('end', 'element')) 1429 self._feed(parser, "</root>\n") 1430 action, elem = next(it) 1431 self.assertEqual((action, elem.tag), ('end', 'root')) 1432 with self.assertRaises(StopIteration): 1433 next(it) 1434 1435 def test_simple_xml_with_ns(self): 1436 parser = ET.XMLPullParser() 1437 self.assert_event_tags(parser, []) 1438 self._feed(parser, "<!-- comment -->\n") 1439 self.assert_event_tags(parser, []) 1440 self._feed(parser, "<root xmlns='namespace'>\n") 1441 self.assert_event_tags(parser, []) 1442 self._feed(parser, "<element key='value'>text</element") 1443 self.assert_event_tags(parser, []) 1444 self._feed(parser, ">\n") 1445 self.assert_event_tags(parser, [('end', '{namespace}element')]) 1446 self._feed(parser, "<element>text</element>tail\n") 1447 self._feed(parser, "<empty-element/>\n") 1448 self.assert_event_tags(parser, [ 1449 ('end', '{namespace}element'), 1450 ('end', '{namespace}empty-element'), 1451 ]) 1452 self._feed(parser, "</root>\n") 1453 self.assert_event_tags(parser, [('end', '{namespace}root')]) 1454 self.assertIsNone(parser.close()) 1455 1456 def test_ns_events(self): 1457 parser = ET.XMLPullParser(events=('start-ns', 'end-ns')) 1458 self._feed(parser, "<!-- comment -->\n") 1459 self._feed(parser, "<root xmlns='namespace'>\n") 1460 self.assertEqual( 1461 list(parser.read_events()), 1462 [('start-ns', ('', 'namespace'))]) 1463 self._feed(parser, "<element key='value'>text</element") 1464 self._feed(parser, ">\n") 1465 self._feed(parser, "<element>text</element>tail\n") 1466 self._feed(parser, "<empty-element/>\n") 1467 self._feed(parser, "</root>\n") 1468 self.assertEqual(list(parser.read_events()), [('end-ns', None)]) 1469 self.assertIsNone(parser.close()) 1470 1471 def test_ns_events_start(self): 1472 parser = ET.XMLPullParser(events=('start-ns', 'start', 'end')) 1473 self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n") 1474 self.assert_event_tuples(parser, [ 1475 ('start-ns', ('', 'abc')), 1476 ('start-ns', ('p', 'xyz')), 1477 ], max_events=2) 1478 self.assert_event_tags(parser, [ 1479 ('start', '{abc}tag'), 1480 ], max_events=1) 1481 1482 self._feed(parser, "<child />\n") 1483 self.assert_event_tags(parser, [ 1484 ('start', '{abc}child'), 1485 ('end', '{abc}child'), 1486 ]) 1487 1488 self._feed(parser, "</tag>\n") 1489 parser.close() 1490 self.assert_event_tags(parser, [ 1491 ('end', '{abc}tag'), 1492 ]) 1493 1494 def test_ns_events_start_end(self): 1495 parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns')) 1496 self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n") 1497 self.assert_event_tuples(parser, [ 1498 ('start-ns', ('', 'abc')), 1499 ('start-ns', ('p', 'xyz')), 1500 ], max_events=2) 1501 self.assert_event_tags(parser, [ 1502 ('start', '{abc}tag'), 1503 ], max_events=1) 1504 1505 self._feed(parser, "<child />\n") 1506 self.assert_event_tags(parser, [ 1507 ('start', '{abc}child'), 1508 ('end', '{abc}child'), 1509 ]) 1510 1511 self._feed(parser, "</tag>\n") 1512 parser.close() 1513 self.assert_event_tags(parser, [ 1514 ('end', '{abc}tag'), 1515 ], max_events=1) 1516 self.assert_event_tuples(parser, [ 1517 ('end-ns', None), 1518 ('end-ns', None), 1519 ]) 1520 1521 def test_events(self): 1522 parser = ET.XMLPullParser(events=()) 1523 self._feed(parser, "<root/>\n") 1524 self.assert_event_tags(parser, []) 1525 1526 parser = ET.XMLPullParser(events=('start', 'end')) 1527 self._feed(parser, "<!-- text here -->\n") 1528 self.assert_events(parser, []) 1529 1530 parser = ET.XMLPullParser(events=('start', 'end')) 1531 self._feed(parser, "<root>\n") 1532 self.assert_event_tags(parser, [('start', 'root')]) 1533 self._feed(parser, "<element key='value'>text</element") 1534 self.assert_event_tags(parser, [('start', 'element')]) 1535 self._feed(parser, ">\n") 1536 self.assert_event_tags(parser, [('end', 'element')]) 1537 self._feed(parser, 1538 "<element xmlns='foo'>text<empty-element/></element>tail\n") 1539 self.assert_event_tags(parser, [ 1540 ('start', '{foo}element'), 1541 ('start', '{foo}empty-element'), 1542 ('end', '{foo}empty-element'), 1543 ('end', '{foo}element'), 1544 ]) 1545 self._feed(parser, "</root>") 1546 self.assertIsNone(parser.close()) 1547 self.assert_event_tags(parser, [('end', 'root')]) 1548 1549 parser = ET.XMLPullParser(events=('start',)) 1550 self._feed(parser, "<!-- comment -->\n") 1551 self.assert_event_tags(parser, []) 1552 self._feed(parser, "<root>\n") 1553 self.assert_event_tags(parser, [('start', 'root')]) 1554 self._feed(parser, "<element key='value'>text</element") 1555 self.assert_event_tags(parser, [('start', 'element')]) 1556 self._feed(parser, ">\n") 1557 self.assert_event_tags(parser, []) 1558 self._feed(parser, 1559 "<element xmlns='foo'>text<empty-element/></element>tail\n") 1560 self.assert_event_tags(parser, [ 1561 ('start', '{foo}element'), 1562 ('start', '{foo}empty-element'), 1563 ]) 1564 self._feed(parser, "</root>") 1565 self.assertIsNone(parser.close()) 1566 1567 def test_events_comment(self): 1568 parser = ET.XMLPullParser(events=('start', 'comment', 'end')) 1569 self._feed(parser, "<!-- text here -->\n") 1570 self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) 1571 self._feed(parser, "<!-- more text here -->\n") 1572 self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))]) 1573 self._feed(parser, "<root-tag>text") 1574 self.assert_event_tags(parser, [('start', 'root-tag')]) 1575 self._feed(parser, "<!-- inner comment-->\n") 1576 self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))]) 1577 self._feed(parser, "</root-tag>\n") 1578 self.assert_event_tags(parser, [('end', 'root-tag')]) 1579 self._feed(parser, "<!-- outer comment -->\n") 1580 self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))]) 1581 1582 parser = ET.XMLPullParser(events=('comment',)) 1583 self._feed(parser, "<!-- text here -->\n") 1584 self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) 1585 1586 def test_events_pi(self): 1587 parser = ET.XMLPullParser(events=('start', 'pi', 'end')) 1588 self._feed(parser, "<?pitarget?>\n") 1589 self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))]) 1590 parser = ET.XMLPullParser(events=('pi',)) 1591 self._feed(parser, "<?pitarget some text ?>\n") 1592 self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))]) 1593 1594 def test_events_sequence(self): 1595 # Test that events can be some sequence that's not just a tuple or list 1596 eventset = {'end', 'start'} 1597 parser = ET.XMLPullParser(events=eventset) 1598 self._feed(parser, "<foo>bar</foo>") 1599 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) 1600 1601 class DummyIter: 1602 def __init__(self): 1603 self.events = iter(['start', 'end', 'start-ns']) 1604 def __iter__(self): 1605 return self 1606 def __next__(self): 1607 return next(self.events) 1608 1609 parser = ET.XMLPullParser(events=DummyIter()) 1610 self._feed(parser, "<foo>bar</foo>") 1611 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) 1612 1613 def test_unknown_event(self): 1614 with self.assertRaises(ValueError): 1615 ET.XMLPullParser(events=('start', 'end', 'bogus')) 1616 1617 1618# 1619# xinclude tests (samples from appendix C of the xinclude specification) 1620 1621XINCLUDE = {} 1622 1623XINCLUDE["C1.xml"] = """\ 1624<?xml version='1.0'?> 1625<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1626 <p>120 Mz is adequate for an average home user.</p> 1627 <xi:include href="disclaimer.xml"/> 1628</document> 1629""" 1630 1631XINCLUDE["disclaimer.xml"] = """\ 1632<?xml version='1.0'?> 1633<disclaimer> 1634 <p>The opinions represented herein represent those of the individual 1635 and should not be interpreted as official policy endorsed by this 1636 organization.</p> 1637</disclaimer> 1638""" 1639 1640XINCLUDE["C2.xml"] = """\ 1641<?xml version='1.0'?> 1642<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1643 <p>This document has been accessed 1644 <xi:include href="count.txt" parse="text"/> times.</p> 1645</document> 1646""" 1647 1648XINCLUDE["count.txt"] = "324387" 1649 1650XINCLUDE["C2b.xml"] = """\ 1651<?xml version='1.0'?> 1652<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1653 <p>This document has been <em>accessed</em> 1654 <xi:include href="count.txt" parse="text"/> times.</p> 1655</document> 1656""" 1657 1658XINCLUDE["C3.xml"] = """\ 1659<?xml version='1.0'?> 1660<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1661 <p>The following is the source of the "data.xml" resource:</p> 1662 <example><xi:include href="data.xml" parse="text"/></example> 1663</document> 1664""" 1665 1666XINCLUDE["data.xml"] = """\ 1667<?xml version='1.0'?> 1668<data> 1669 <item><![CDATA[Brooks & Shields]]></item> 1670</data> 1671""" 1672 1673XINCLUDE["C5.xml"] = """\ 1674<?xml version='1.0'?> 1675<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1676 <xi:include href="example.txt" parse="text"> 1677 <xi:fallback> 1678 <xi:include href="fallback-example.txt" parse="text"> 1679 <xi:fallback><a href="mailto:[email protected]">Report error</a></xi:fallback> 1680 </xi:include> 1681 </xi:fallback> 1682 </xi:include> 1683</div> 1684""" 1685 1686XINCLUDE["default.xml"] = """\ 1687<?xml version='1.0'?> 1688<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1689 <p>Example.</p> 1690 <xi:include href="{}"/> 1691</document> 1692""".format(html.escape(SIMPLE_XMLFILE, True)) 1693 1694XINCLUDE["include_c1_repeated.xml"] = """\ 1695<?xml version='1.0'?> 1696<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1697 <p>The following is the source code of Recursive1.xml:</p> 1698 <xi:include href="C1.xml"/> 1699 <xi:include href="C1.xml"/> 1700 <xi:include href="C1.xml"/> 1701 <xi:include href="C1.xml"/> 1702</document> 1703""" 1704 1705# 1706# badly formatted xi:include tags 1707 1708XINCLUDE_BAD = {} 1709 1710XINCLUDE_BAD["B1.xml"] = """\ 1711<?xml version='1.0'?> 1712<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1713 <p>120 Mz is adequate for an average home user.</p> 1714 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/> 1715</document> 1716""" 1717 1718XINCLUDE_BAD["B2.xml"] = """\ 1719<?xml version='1.0'?> 1720<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1721 <xi:fallback></xi:fallback> 1722</div> 1723""" 1724 1725XINCLUDE["Recursive1.xml"] = """\ 1726<?xml version='1.0'?> 1727<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1728 <p>The following is the source code of Recursive2.xml:</p> 1729 <xi:include href="Recursive2.xml"/> 1730</document> 1731""" 1732 1733XINCLUDE["Recursive2.xml"] = """\ 1734<?xml version='1.0'?> 1735<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1736 <p>The following is the source code of Recursive3.xml:</p> 1737 <xi:include href="Recursive3.xml"/> 1738</document> 1739""" 1740 1741XINCLUDE["Recursive3.xml"] = """\ 1742<?xml version='1.0'?> 1743<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1744 <p>The following is the source code of Recursive1.xml:</p> 1745 <xi:include href="Recursive1.xml"/> 1746</document> 1747""" 1748 1749 1750class XIncludeTest(unittest.TestCase): 1751 1752 def xinclude_loader(self, href, parse="xml", encoding=None): 1753 try: 1754 data = XINCLUDE[href] 1755 except KeyError: 1756 raise OSError("resource not found") 1757 if parse == "xml": 1758 data = ET.XML(data) 1759 return data 1760 1761 def none_loader(self, href, parser, encoding=None): 1762 return None 1763 1764 def _my_loader(self, href, parse): 1765 # Used to avoid a test-dependency problem where the default loader 1766 # of ElementInclude uses the pyET parser for cET tests. 1767 if parse == 'xml': 1768 with open(href, 'rb') as f: 1769 return ET.parse(f).getroot() 1770 else: 1771 return None 1772 1773 def test_xinclude_default(self): 1774 from xml.etree import ElementInclude 1775 doc = self.xinclude_loader('default.xml') 1776 ElementInclude.include(doc, self._my_loader) 1777 self.assertEqual(serialize(doc), 1778 '<document>\n' 1779 ' <p>Example.</p>\n' 1780 ' <root>\n' 1781 ' <element key="value">text</element>\n' 1782 ' <element>text</element>tail\n' 1783 ' <empty-element />\n' 1784 '</root>\n' 1785 '</document>') 1786 1787 def test_xinclude(self): 1788 from xml.etree import ElementInclude 1789 1790 # Basic inclusion example (XInclude C.1) 1791 document = self.xinclude_loader("C1.xml") 1792 ElementInclude.include(document, self.xinclude_loader) 1793 self.assertEqual(serialize(document), 1794 '<document>\n' 1795 ' <p>120 Mz is adequate for an average home user.</p>\n' 1796 ' <disclaimer>\n' 1797 ' <p>The opinions represented herein represent those of the individual\n' 1798 ' and should not be interpreted as official policy endorsed by this\n' 1799 ' organization.</p>\n' 1800 '</disclaimer>\n' 1801 '</document>') # C1 1802 1803 # Textual inclusion example (XInclude C.2) 1804 document = self.xinclude_loader("C2.xml") 1805 ElementInclude.include(document, self.xinclude_loader) 1806 self.assertEqual(serialize(document), 1807 '<document>\n' 1808 ' <p>This document has been accessed\n' 1809 ' 324387 times.</p>\n' 1810 '</document>') # C2 1811 1812 # Textual inclusion after sibling element (based on modified XInclude C.2) 1813 document = self.xinclude_loader("C2b.xml") 1814 ElementInclude.include(document, self.xinclude_loader) 1815 self.assertEqual(serialize(document), 1816 '<document>\n' 1817 ' <p>This document has been <em>accessed</em>\n' 1818 ' 324387 times.</p>\n' 1819 '</document>') # C2b 1820 1821 # Textual inclusion of XML example (XInclude C.3) 1822 document = self.xinclude_loader("C3.xml") 1823 ElementInclude.include(document, self.xinclude_loader) 1824 self.assertEqual(serialize(document), 1825 '<document>\n' 1826 ' <p>The following is the source of the "data.xml" resource:</p>\n' 1827 " <example><?xml version='1.0'?>\n" 1828 '<data>\n' 1829 ' <item><![CDATA[Brooks & Shields]]></item>\n' 1830 '</data>\n' 1831 '</example>\n' 1832 '</document>') # C3 1833 1834 # Fallback example (XInclude C.5) 1835 # Note! Fallback support is not yet implemented 1836 document = self.xinclude_loader("C5.xml") 1837 with self.assertRaises(OSError) as cm: 1838 ElementInclude.include(document, self.xinclude_loader) 1839 self.assertEqual(str(cm.exception), 'resource not found') 1840 self.assertEqual(serialize(document), 1841 '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n' 1842 ' <ns0:include href="example.txt" parse="text">\n' 1843 ' <ns0:fallback>\n' 1844 ' <ns0:include href="fallback-example.txt" parse="text">\n' 1845 ' <ns0:fallback><a href="mailto:[email protected]">Report error</a></ns0:fallback>\n' 1846 ' </ns0:include>\n' 1847 ' </ns0:fallback>\n' 1848 ' </ns0:include>\n' 1849 '</div>') # C5 1850 1851 def test_xinclude_repeated(self): 1852 from xml.etree import ElementInclude 1853 1854 document = self.xinclude_loader("include_c1_repeated.xml") 1855 ElementInclude.include(document, self.xinclude_loader) 1856 self.assertEqual(1+4*2, len(document.findall(".//p"))) 1857 1858 def test_xinclude_failures(self): 1859 from xml.etree import ElementInclude 1860 1861 # Test failure to locate included XML file. 1862 document = ET.XML(XINCLUDE["C1.xml"]) 1863 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1864 ElementInclude.include(document, loader=self.none_loader) 1865 self.assertEqual(str(cm.exception), 1866 "cannot load 'disclaimer.xml' as 'xml'") 1867 1868 # Test failure to locate included text file. 1869 document = ET.XML(XINCLUDE["C2.xml"]) 1870 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1871 ElementInclude.include(document, loader=self.none_loader) 1872 self.assertEqual(str(cm.exception), 1873 "cannot load 'count.txt' as 'text'") 1874 1875 # Test bad parse type. 1876 document = ET.XML(XINCLUDE_BAD["B1.xml"]) 1877 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1878 ElementInclude.include(document, loader=self.none_loader) 1879 self.assertEqual(str(cm.exception), 1880 "unknown parse type in xi:include tag ('BAD_TYPE')") 1881 1882 # Test xi:fallback outside xi:include. 1883 document = ET.XML(XINCLUDE_BAD["B2.xml"]) 1884 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1885 ElementInclude.include(document, loader=self.none_loader) 1886 self.assertEqual(str(cm.exception), 1887 "xi:fallback tag must be child of xi:include " 1888 "('{http://www.w3.org/2001/XInclude}fallback')") 1889 1890 # Test infinitely recursive includes. 1891 document = self.xinclude_loader("Recursive1.xml") 1892 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1893 ElementInclude.include(document, self.xinclude_loader) 1894 self.assertEqual(str(cm.exception), 1895 "recursive include of Recursive2.xml") 1896 1897 # Test 'max_depth' limitation. 1898 document = self.xinclude_loader("Recursive1.xml") 1899 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1900 ElementInclude.include(document, self.xinclude_loader, max_depth=None) 1901 self.assertEqual(str(cm.exception), 1902 "recursive include of Recursive2.xml") 1903 1904 document = self.xinclude_loader("Recursive1.xml") 1905 with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm: 1906 ElementInclude.include(document, self.xinclude_loader, max_depth=0) 1907 self.assertEqual(str(cm.exception), 1908 "maximum xinclude depth reached when including file Recursive2.xml") 1909 1910 document = self.xinclude_loader("Recursive1.xml") 1911 with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm: 1912 ElementInclude.include(document, self.xinclude_loader, max_depth=1) 1913 self.assertEqual(str(cm.exception), 1914 "maximum xinclude depth reached when including file Recursive3.xml") 1915 1916 document = self.xinclude_loader("Recursive1.xml") 1917 with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm: 1918 ElementInclude.include(document, self.xinclude_loader, max_depth=2) 1919 self.assertEqual(str(cm.exception), 1920 "maximum xinclude depth reached when including file Recursive1.xml") 1921 1922 document = self.xinclude_loader("Recursive1.xml") 1923 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1924 ElementInclude.include(document, self.xinclude_loader, max_depth=3) 1925 self.assertEqual(str(cm.exception), 1926 "recursive include of Recursive2.xml") 1927 1928 1929# -------------------------------------------------------------------- 1930# reported bugs 1931 1932class BugsTest(unittest.TestCase): 1933 1934 def test_bug_xmltoolkit21(self): 1935 # marshaller gives obscure errors for non-string values 1936 1937 def check(elem): 1938 with self.assertRaises(TypeError) as cm: 1939 serialize(elem) 1940 self.assertEqual(str(cm.exception), 1941 'cannot serialize 123 (type int)') 1942 1943 elem = ET.Element(123) 1944 check(elem) # tag 1945 1946 elem = ET.Element("elem") 1947 elem.text = 123 1948 check(elem) # text 1949 1950 elem = ET.Element("elem") 1951 elem.tail = 123 1952 check(elem) # tail 1953 1954 elem = ET.Element("elem") 1955 elem.set(123, "123") 1956 check(elem) # attribute key 1957 1958 elem = ET.Element("elem") 1959 elem.set("123", 123) 1960 check(elem) # attribute value 1961 1962 def test_bug_xmltoolkit25(self): 1963 # typo in ElementTree.findtext 1964 1965 elem = ET.XML(SAMPLE_XML) 1966 tree = ET.ElementTree(elem) 1967 self.assertEqual(tree.findtext("tag"), 'text') 1968 self.assertEqual(tree.findtext("section/tag"), 'subtext') 1969 1970 def test_bug_xmltoolkit28(self): 1971 # .//tag causes exceptions 1972 1973 tree = ET.XML("<doc><table><tbody/></table></doc>") 1974 self.assertEqual(summarize_list(tree.findall(".//thead")), []) 1975 self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody']) 1976 1977 def test_bug_xmltoolkitX1(self): 1978 # dump() doesn't flush the output buffer 1979 1980 tree = ET.XML("<doc><table><tbody/></table></doc>") 1981 with support.captured_stdout() as stdout: 1982 ET.dump(tree) 1983 self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n') 1984 1985 def test_bug_xmltoolkit39(self): 1986 # non-ascii element and attribute names doesn't work 1987 1988 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />") 1989 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 1990 1991 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1992 b"<tag \xe4ttr='välue' />") 1993 self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'}) 1994 self.assertEqual(ET.tostring(tree, "utf-8"), 1995 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 1996 1997 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1998 b'<t\xe4g>text</t\xe4g>') 1999 self.assertEqual(ET.tostring(tree, "utf-8"), 2000 b'<t\xc3\xa4g>text</t\xc3\xa4g>') 2001 2002 tree = ET.Element("t\u00e4g") 2003 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 2004 2005 tree = ET.Element("tag") 2006 tree.set("\u00e4ttr", "v\u00e4lue") 2007 self.assertEqual(ET.tostring(tree, "utf-8"), 2008 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 2009 2010 def test_bug_xmltoolkit54(self): 2011 # problems handling internally defined entities 2012 2013 e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '舰'>]>" 2014 '<doc>&ldots;</doc>') 2015 self.assertEqual(serialize(e, encoding="us-ascii"), 2016 b'<doc>舰</doc>') 2017 self.assertEqual(serialize(e), '<doc>\u8230</doc>') 2018 2019 def test_bug_xmltoolkit55(self): 2020 # make sure we're reporting the first error, not the last 2021 2022 with self.assertRaises(ET.ParseError) as cm: 2023 ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>" 2024 b'<doc>&ldots;&ndots;&rdots;</doc>') 2025 self.assertEqual(str(cm.exception), 2026 'undefined entity &ldots;: line 1, column 36') 2027 2028 def test_bug_xmltoolkit60(self): 2029 # Handle crash in stream source. 2030 2031 class ExceptionFile: 2032 def read(self, x): 2033 raise OSError 2034 2035 self.assertRaises(OSError, ET.parse, ExceptionFile()) 2036 2037 def test_bug_xmltoolkit62(self): 2038 # Don't crash when using custom entities. 2039 2040 ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'} 2041 parser = ET.XMLParser() 2042 parser.entity.update(ENTITIES) 2043 parser.feed("""<?xml version="1.0" encoding="UTF-8"?> 2044<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []> 2045<patent-application-publication> 2046<subdoc-abstract> 2047<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named ‘BCT9801BEG’.</paragraph> 2048</subdoc-abstract> 2049</patent-application-publication>""") 2050 t = parser.close() 2051 self.assertEqual(t.find('.//paragraph').text, 2052 'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.') 2053 2054 @unittest.skipIf(sys.gettrace(), "Skips under coverage.") 2055 def test_bug_xmltoolkit63(self): 2056 # Check reference leak. 2057 def xmltoolkit63(): 2058 tree = ET.TreeBuilder() 2059 tree.start("tag", {}) 2060 tree.data("text") 2061 tree.end("tag") 2062 2063 xmltoolkit63() 2064 count = sys.getrefcount(None) 2065 for i in range(1000): 2066 xmltoolkit63() 2067 self.assertEqual(sys.getrefcount(None), count) 2068 2069 def test_bug_200708_newline(self): 2070 # Preserve newlines in attributes. 2071 2072 e = ET.Element('SomeTag', text="def _f():\n return 3\n") 2073 self.assertEqual(ET.tostring(e), 2074 b'<SomeTag text="def _f(): return 3 " />') 2075 self.assertEqual(ET.XML(ET.tostring(e)).get("text"), 2076 'def _f():\n return 3\n') 2077 self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))), 2078 b'<SomeTag text="def _f(): return 3 " />') 2079 2080 def test_bug_200708_close(self): 2081 # Test default builder. 2082 parser = ET.XMLParser() # default 2083 parser.feed("<element>some text</element>") 2084 self.assertEqual(parser.close().tag, 'element') 2085 2086 # Test custom builder. 2087 class EchoTarget: 2088 def close(self): 2089 return ET.Element("element") # simulate root 2090 parser = ET.XMLParser(target=EchoTarget()) 2091 parser.feed("<element>some text</element>") 2092 self.assertEqual(parser.close().tag, 'element') 2093 2094 def test_bug_200709_default_namespace(self): 2095 e = ET.Element("{default}elem") 2096 s = ET.SubElement(e, "{default}elem") 2097 self.assertEqual(serialize(e, default_namespace="default"), # 1 2098 '<elem xmlns="default"><elem /></elem>') 2099 2100 e = ET.Element("{default}elem") 2101 s = ET.SubElement(e, "{default}elem") 2102 s = ET.SubElement(e, "{not-default}elem") 2103 self.assertEqual(serialize(e, default_namespace="default"), # 2 2104 '<elem xmlns="default" xmlns:ns1="not-default">' 2105 '<elem />' 2106 '<ns1:elem />' 2107 '</elem>') 2108 2109 e = ET.Element("{default}elem") 2110 s = ET.SubElement(e, "{default}elem") 2111 s = ET.SubElement(e, "elem") # unprefixed name 2112 with self.assertRaises(ValueError) as cm: 2113 serialize(e, default_namespace="default") # 3 2114 self.assertEqual(str(cm.exception), 2115 'cannot use non-qualified names with default_namespace option') 2116 2117 def test_bug_200709_register_namespace(self): 2118 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 2119 self.assertEqual(ET.tostring(e), 2120 b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />') 2121 ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/") 2122 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 2123 self.assertEqual(ET.tostring(e), 2124 b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />') 2125 2126 # And the Dublin Core namespace is in the default list: 2127 2128 e = ET.Element("{http://purl.org/dc/elements/1.1/}title") 2129 self.assertEqual(ET.tostring(e), 2130 b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />') 2131 2132 def test_bug_200709_element_comment(self): 2133 # Not sure if this can be fixed, really (since the serializer needs 2134 # ET.Comment, not cET.comment). 2135 2136 a = ET.Element('a') 2137 a.append(ET.Comment('foo')) 2138 self.assertEqual(a[0].tag, ET.Comment) 2139 2140 a = ET.Element('a') 2141 a.append(ET.PI('foo')) 2142 self.assertEqual(a[0].tag, ET.PI) 2143 2144 def test_bug_200709_element_insert(self): 2145 a = ET.Element('a') 2146 b = ET.SubElement(a, 'b') 2147 c = ET.SubElement(a, 'c') 2148 d = ET.Element('d') 2149 a.insert(0, d) 2150 self.assertEqual(summarize_list(a), ['d', 'b', 'c']) 2151 a.insert(-1, d) 2152 self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c']) 2153 2154 def test_bug_200709_iter_comment(self): 2155 a = ET.Element('a') 2156 b = ET.SubElement(a, 'b') 2157 comment_b = ET.Comment("TEST-b") 2158 b.append(comment_b) 2159 self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment]) 2160 2161 # -------------------------------------------------------------------- 2162 # reported on bugs.python.org 2163 2164 def test_bug_1534630(self): 2165 bob = ET.TreeBuilder() 2166 e = bob.data("data") 2167 e = bob.start("tag", {}) 2168 e = bob.end("tag") 2169 e = bob.close() 2170 self.assertEqual(serialize(e), '<tag />') 2171 2172 def test_issue6233(self): 2173 e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>" 2174 b'<body>t\xc3\xa3g</body>') 2175 self.assertEqual(ET.tostring(e, 'ascii'), 2176 b"<?xml version='1.0' encoding='ascii'?>\n" 2177 b'<body>tãg</body>') 2178 e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 2179 b'<body>t\xe3g</body>') 2180 self.assertEqual(ET.tostring(e, 'ascii'), 2181 b"<?xml version='1.0' encoding='ascii'?>\n" 2182 b'<body>tãg</body>') 2183 2184 def test_issue6565(self): 2185 elem = ET.XML("<body><tag/></body>") 2186 self.assertEqual(summarize_list(elem), ['tag']) 2187 newelem = ET.XML(SAMPLE_XML) 2188 elem[:] = newelem[:] 2189 self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section']) 2190 2191 def test_issue10777(self): 2192 # Registering a namespace twice caused a "dictionary changed size during 2193 # iteration" bug. 2194 2195 ET.register_namespace('test10777', 'http://myuri/') 2196 ET.register_namespace('test10777', 'http://myuri/') 2197 2198 def test_lost_text(self): 2199 # Issue #25902: Borrowed text can disappear 2200 class Text: 2201 def __bool__(self): 2202 e.text = 'changed' 2203 return True 2204 2205 e = ET.Element('tag') 2206 e.text = Text() 2207 i = e.itertext() 2208 t = next(i) 2209 self.assertIsInstance(t, Text) 2210 self.assertIsInstance(e.text, str) 2211 self.assertEqual(e.text, 'changed') 2212 2213 def test_lost_tail(self): 2214 # Issue #25902: Borrowed tail can disappear 2215 class Text: 2216 def __bool__(self): 2217 e[0].tail = 'changed' 2218 return True 2219 2220 e = ET.Element('root') 2221 e.append(ET.Element('tag')) 2222 e[0].tail = Text() 2223 i = e.itertext() 2224 t = next(i) 2225 self.assertIsInstance(t, Text) 2226 self.assertIsInstance(e[0].tail, str) 2227 self.assertEqual(e[0].tail, 'changed') 2228 2229 def test_lost_elem(self): 2230 # Issue #25902: Borrowed element can disappear 2231 class Tag: 2232 def __eq__(self, other): 2233 e[0] = ET.Element('changed') 2234 next(i) 2235 return True 2236 2237 e = ET.Element('root') 2238 e.append(ET.Element(Tag())) 2239 e.append(ET.Element('tag')) 2240 i = e.iter('tag') 2241 try: 2242 t = next(i) 2243 except ValueError: 2244 self.skipTest('generators are not reentrant') 2245 self.assertIsInstance(t.tag, Tag) 2246 self.assertIsInstance(e[0].tag, str) 2247 self.assertEqual(e[0].tag, 'changed') 2248 2249 def check_expat224_utf8_bug(self, text): 2250 xml = b'<a b="%s"/>' % text 2251 root = ET.XML(xml) 2252 self.assertEqual(root.get('b'), text.decode('utf-8')) 2253 2254 def test_expat224_utf8_bug(self): 2255 # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. 2256 # Check that Expat 2.2.4 fixed the bug. 2257 # 2258 # Test buffer bounds at odd and even positions. 2259 2260 text = b'\xc3\xa0' * 1024 2261 self.check_expat224_utf8_bug(text) 2262 2263 text = b'x' + b'\xc3\xa0' * 1024 2264 self.check_expat224_utf8_bug(text) 2265 2266 def test_expat224_utf8_bug_file(self): 2267 with open(UTF8_BUG_XMLFILE, 'rb') as fp: 2268 raw = fp.read() 2269 root = ET.fromstring(raw) 2270 xmlattr = root.get('b') 2271 2272 # "Parse" manually the XML file to extract the value of the 'b' 2273 # attribute of the <a b='xxx' /> XML element 2274 text = raw.decode('utf-8').strip() 2275 text = text.replace('\r\n', ' ') 2276 text = text[6:-4] 2277 self.assertEqual(root.get('b'), text) 2278 2279 def test_39495_treebuilder_start(self): 2280 self.assertRaises(TypeError, ET.TreeBuilder().start, "tag") 2281 self.assertRaises(TypeError, ET.TreeBuilder().start, "tag", None) 2282 2283 2284 2285# -------------------------------------------------------------------- 2286 2287 2288class BasicElementTest(ElementTestCase, unittest.TestCase): 2289 2290 def test___init__(self): 2291 tag = "foo" 2292 attrib = { "zix": "wyp" } 2293 2294 element_foo = ET.Element(tag, attrib) 2295 2296 # traits of an element 2297 self.assertIsInstance(element_foo, ET.Element) 2298 self.assertIn("tag", dir(element_foo)) 2299 self.assertIn("attrib", dir(element_foo)) 2300 self.assertIn("text", dir(element_foo)) 2301 self.assertIn("tail", dir(element_foo)) 2302 2303 # string attributes have expected values 2304 self.assertEqual(element_foo.tag, tag) 2305 self.assertIsNone(element_foo.text) 2306 self.assertIsNone(element_foo.tail) 2307 2308 # attrib is a copy 2309 self.assertIsNot(element_foo.attrib, attrib) 2310 self.assertEqual(element_foo.attrib, attrib) 2311 2312 # attrib isn't linked 2313 attrib["bar"] = "baz" 2314 self.assertIsNot(element_foo.attrib, attrib) 2315 self.assertNotEqual(element_foo.attrib, attrib) 2316 2317 def test_copy(self): 2318 # Only run this test if Element.copy() is defined. 2319 if "copy" not in dir(ET.Element): 2320 raise unittest.SkipTest("Element.copy() not present") 2321 2322 element_foo = ET.Element("foo", { "zix": "wyp" }) 2323 element_foo.append(ET.Element("bar", { "baz": "qix" })) 2324 2325 with self.assertWarns(DeprecationWarning): 2326 element_foo2 = element_foo.copy() 2327 2328 # elements are not the same 2329 self.assertIsNot(element_foo2, element_foo) 2330 2331 # string attributes are equal 2332 self.assertEqual(element_foo2.tag, element_foo.tag) 2333 self.assertEqual(element_foo2.text, element_foo.text) 2334 self.assertEqual(element_foo2.tail, element_foo.tail) 2335 2336 # number of children is the same 2337 self.assertEqual(len(element_foo2), len(element_foo)) 2338 2339 # children are the same 2340 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): 2341 self.assertIs(child1, child2) 2342 2343 # attrib is a copy 2344 self.assertEqual(element_foo2.attrib, element_foo.attrib) 2345 2346 def test___copy__(self): 2347 element_foo = ET.Element("foo", { "zix": "wyp" }) 2348 element_foo.append(ET.Element("bar", { "baz": "qix" })) 2349 2350 element_foo2 = copy.copy(element_foo) 2351 2352 # elements are not the same 2353 self.assertIsNot(element_foo2, element_foo) 2354 2355 # string attributes are equal 2356 self.assertEqual(element_foo2.tag, element_foo.tag) 2357 self.assertEqual(element_foo2.text, element_foo.text) 2358 self.assertEqual(element_foo2.tail, element_foo.tail) 2359 2360 # number of children is the same 2361 self.assertEqual(len(element_foo2), len(element_foo)) 2362 2363 # children are the same 2364 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): 2365 self.assertIs(child1, child2) 2366 2367 # attrib is a copy 2368 self.assertEqual(element_foo2.attrib, element_foo.attrib) 2369 2370 def test___deepcopy__(self): 2371 element_foo = ET.Element("foo", { "zix": "wyp" }) 2372 element_foo.append(ET.Element("bar", { "baz": "qix" })) 2373 2374 element_foo2 = copy.deepcopy(element_foo) 2375 2376 # elements are not the same 2377 self.assertIsNot(element_foo2, element_foo) 2378 2379 # string attributes are equal 2380 self.assertEqual(element_foo2.tag, element_foo.tag) 2381 self.assertEqual(element_foo2.text, element_foo.text) 2382 self.assertEqual(element_foo2.tail, element_foo.tail) 2383 2384 # number of children is the same 2385 self.assertEqual(len(element_foo2), len(element_foo)) 2386 2387 # children are not the same 2388 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): 2389 self.assertIsNot(child1, child2) 2390 2391 # attrib is a copy 2392 self.assertIsNot(element_foo2.attrib, element_foo.attrib) 2393 self.assertEqual(element_foo2.attrib, element_foo.attrib) 2394 2395 # attrib isn't linked 2396 element_foo.attrib["bar"] = "baz" 2397 self.assertIsNot(element_foo2.attrib, element_foo.attrib) 2398 self.assertNotEqual(element_foo2.attrib, element_foo.attrib) 2399 2400 def test_augmentation_type_errors(self): 2401 e = ET.Element('joe') 2402 self.assertRaises(TypeError, e.append, 'b') 2403 self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo']) 2404 self.assertRaises(TypeError, e.insert, 0, 'foo') 2405 e[:] = [ET.Element('bar')] 2406 with self.assertRaises(TypeError): 2407 e[0] = 'foo' 2408 with self.assertRaises(TypeError): 2409 e[:] = [ET.Element('bar'), 'foo'] 2410 2411 if hasattr(e, '__setstate__'): 2412 state = { 2413 'tag': 'tag', 2414 '_children': [None], # non-Element 2415 'attrib': 'attr', 2416 'tail': 'tail', 2417 'text': 'text', 2418 } 2419 self.assertRaises(TypeError, e.__setstate__, state) 2420 2421 if hasattr(e, '__deepcopy__'): 2422 class E(ET.Element): 2423 def __deepcopy__(self, memo): 2424 return None # non-Element 2425 e[:] = [E('bar')] 2426 self.assertRaises(TypeError, copy.deepcopy, e) 2427 2428 def test_cyclic_gc(self): 2429 class Dummy: 2430 pass 2431 2432 # Test the shortest cycle: d->element->d 2433 d = Dummy() 2434 d.dummyref = ET.Element('joe', attr=d) 2435 wref = weakref.ref(d) 2436 del d 2437 gc_collect() 2438 self.assertIsNone(wref()) 2439 2440 # A longer cycle: d->e->e2->d 2441 e = ET.Element('joe') 2442 d = Dummy() 2443 d.dummyref = e 2444 wref = weakref.ref(d) 2445 e2 = ET.SubElement(e, 'foo', attr=d) 2446 del d, e, e2 2447 gc_collect() 2448 self.assertIsNone(wref()) 2449 2450 # A cycle between Element objects as children of one another 2451 # e1->e2->e3->e1 2452 e1 = ET.Element('e1') 2453 e2 = ET.Element('e2') 2454 e3 = ET.Element('e3') 2455 e3.append(e1) 2456 e2.append(e3) 2457 e1.append(e2) 2458 wref = weakref.ref(e1) 2459 del e1, e2, e3 2460 gc_collect() 2461 self.assertIsNone(wref()) 2462 2463 def test_weakref(self): 2464 flag = False 2465 def wref_cb(w): 2466 nonlocal flag 2467 flag = True 2468 e = ET.Element('e') 2469 wref = weakref.ref(e, wref_cb) 2470 self.assertEqual(wref().tag, 'e') 2471 del e 2472 gc_collect() # For PyPy or other GCs. 2473 self.assertEqual(flag, True) 2474 self.assertEqual(wref(), None) 2475 2476 def test_get_keyword_args(self): 2477 e1 = ET.Element('foo' , x=1, y=2, z=3) 2478 self.assertEqual(e1.get('x', default=7), 1) 2479 self.assertEqual(e1.get('w', default=7), 7) 2480 2481 def test_pickle(self): 2482 # issue #16076: the C implementation wasn't pickleable. 2483 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): 2484 for dumper, loader in product(self.modules, repeat=2): 2485 e = dumper.Element('foo', bar=42) 2486 e.text = "text goes here" 2487 e.tail = "opposite of head" 2488 dumper.SubElement(e, 'child').append(dumper.Element('grandchild')) 2489 e.append(dumper.Element('child')) 2490 e.findall('.//grandchild')[0].set('attr', 'other value') 2491 2492 e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree', 2493 dumper, loader, proto) 2494 2495 self.assertEqual(e2.tag, 'foo') 2496 self.assertEqual(e2.attrib['bar'], 42) 2497 self.assertEqual(len(e2), 2) 2498 self.assertEqualElements(e, e2) 2499 2500 def test_pickle_issue18997(self): 2501 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): 2502 for dumper, loader in product(self.modules, repeat=2): 2503 XMLTEXT = """<?xml version="1.0"?> 2504 <group><dogs>4</dogs> 2505 </group>""" 2506 e1 = dumper.fromstring(XMLTEXT) 2507 self.assertEqual(e1.__getstate__()['tag'], 'group') 2508 e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree', 2509 dumper, loader, proto) 2510 self.assertEqual(e2.tag, 'group') 2511 self.assertEqual(e2[0].tag, 'dogs') 2512 2513 2514class BadElementTest(ElementTestCase, unittest.TestCase): 2515 def test_extend_mutable_list(self): 2516 class X: 2517 @property 2518 def __class__(self): 2519 L[:] = [ET.Element('baz')] 2520 return ET.Element 2521 L = [X()] 2522 e = ET.Element('foo') 2523 try: 2524 e.extend(L) 2525 except TypeError: 2526 pass 2527 2528 class Y(X, ET.Element): 2529 pass 2530 L = [Y('x')] 2531 e = ET.Element('foo') 2532 e.extend(L) 2533 2534 def test_extend_mutable_list2(self): 2535 class X: 2536 @property 2537 def __class__(self): 2538 del L[:] 2539 return ET.Element 2540 L = [X(), ET.Element('baz')] 2541 e = ET.Element('foo') 2542 try: 2543 e.extend(L) 2544 except TypeError: 2545 pass 2546 2547 class Y(X, ET.Element): 2548 pass 2549 L = [Y('bar'), ET.Element('baz')] 2550 e = ET.Element('foo') 2551 e.extend(L) 2552 2553 def test_remove_with_mutating(self): 2554 class X(ET.Element): 2555 def __eq__(self, o): 2556 del e[:] 2557 return False 2558 e = ET.Element('foo') 2559 e.extend([X('bar')]) 2560 self.assertRaises(ValueError, e.remove, ET.Element('baz')) 2561 2562 e = ET.Element('foo') 2563 e.extend([ET.Element('bar')]) 2564 self.assertRaises(ValueError, e.remove, X('baz')) 2565 2566 def test_recursive_repr(self): 2567 # Issue #25455 2568 e = ET.Element('foo') 2569 with swap_attr(e, 'tag', e): 2570 with self.assertRaises(RuntimeError): 2571 repr(e) # Should not crash 2572 2573 def test_element_get_text(self): 2574 # Issue #27863 2575 class X(str): 2576 def __del__(self): 2577 try: 2578 elem.text 2579 except NameError: 2580 pass 2581 2582 b = ET.TreeBuilder() 2583 b.start('tag', {}) 2584 b.data('ABCD') 2585 b.data(X('EFGH')) 2586 b.data('IJKL') 2587 b.end('tag') 2588 2589 elem = b.close() 2590 self.assertEqual(elem.text, 'ABCDEFGHIJKL') 2591 2592 def test_element_get_tail(self): 2593 # Issue #27863 2594 class X(str): 2595 def __del__(self): 2596 try: 2597 elem[0].tail 2598 except NameError: 2599 pass 2600 2601 b = ET.TreeBuilder() 2602 b.start('root', {}) 2603 b.start('tag', {}) 2604 b.end('tag') 2605 b.data('ABCD') 2606 b.data(X('EFGH')) 2607 b.data('IJKL') 2608 b.end('root') 2609 2610 elem = b.close() 2611 self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL') 2612 2613 def test_subscr(self): 2614 # Issue #27863 2615 class X: 2616 def __index__(self): 2617 del e[:] 2618 return 1 2619 2620 e = ET.Element('elem') 2621 e.append(ET.Element('child')) 2622 e[:X()] # shouldn't crash 2623 2624 e.append(ET.Element('child')) 2625 e[0:10:X()] # shouldn't crash 2626 2627 def test_ass_subscr(self): 2628 # Issue #27863 2629 class X: 2630 def __index__(self): 2631 e[:] = [] 2632 return 1 2633 2634 e = ET.Element('elem') 2635 for _ in range(10): 2636 e.insert(0, ET.Element('child')) 2637 2638 e[0:10:X()] = [] # shouldn't crash 2639 2640 def test_treebuilder_start(self): 2641 # Issue #27863 2642 def element_factory(x, y): 2643 return [] 2644 b = ET.TreeBuilder(element_factory=element_factory) 2645 2646 b.start('tag', {}) 2647 b.data('ABCD') 2648 self.assertRaises(AttributeError, b.start, 'tag2', {}) 2649 del b 2650 gc_collect() 2651 2652 def test_treebuilder_end(self): 2653 # Issue #27863 2654 def element_factory(x, y): 2655 return [] 2656 b = ET.TreeBuilder(element_factory=element_factory) 2657 2658 b.start('tag', {}) 2659 b.data('ABCD') 2660 self.assertRaises(AttributeError, b.end, 'tag') 2661 del b 2662 gc_collect() 2663 2664 2665class MutatingElementPath(str): 2666 def __new__(cls, elem, *args): 2667 self = str.__new__(cls, *args) 2668 self.elem = elem 2669 return self 2670 def __eq__(self, o): 2671 del self.elem[:] 2672 return True 2673MutatingElementPath.__hash__ = str.__hash__ 2674 2675class BadElementPath(str): 2676 def __eq__(self, o): 2677 raise 1/0 2678BadElementPath.__hash__ = str.__hash__ 2679 2680class BadElementPathTest(ElementTestCase, unittest.TestCase): 2681 def setUp(self): 2682 super().setUp() 2683 from xml.etree import ElementPath 2684 self.path_cache = ElementPath._cache 2685 ElementPath._cache = {} 2686 2687 def tearDown(self): 2688 from xml.etree import ElementPath 2689 ElementPath._cache = self.path_cache 2690 super().tearDown() 2691 2692 def test_find_with_mutating(self): 2693 e = ET.Element('foo') 2694 e.extend([ET.Element('bar')]) 2695 e.find(MutatingElementPath(e, 'x')) 2696 2697 def test_find_with_error(self): 2698 e = ET.Element('foo') 2699 e.extend([ET.Element('bar')]) 2700 try: 2701 e.find(BadElementPath('x')) 2702 except ZeroDivisionError: 2703 pass 2704 2705 def test_findtext_with_mutating(self): 2706 e = ET.Element('foo') 2707 e.extend([ET.Element('bar')]) 2708 e.findtext(MutatingElementPath(e, 'x')) 2709 2710 def test_findtext_with_error(self): 2711 e = ET.Element('foo') 2712 e.extend([ET.Element('bar')]) 2713 try: 2714 e.findtext(BadElementPath('x')) 2715 except ZeroDivisionError: 2716 pass 2717 2718 def test_findtext_with_falsey_text_attribute(self): 2719 root_elem = ET.Element('foo') 2720 sub_elem = ET.SubElement(root_elem, 'bar') 2721 falsey = ["", 0, False, [], (), {}] 2722 for val in falsey: 2723 sub_elem.text = val 2724 self.assertEqual(root_elem.findtext('./bar'), val) 2725 2726 def test_findtext_with_none_text_attribute(self): 2727 root_elem = ET.Element('foo') 2728 sub_elem = ET.SubElement(root_elem, 'bar') 2729 sub_elem.text = None 2730 self.assertEqual(root_elem.findtext('./bar'), '') 2731 2732 def test_findall_with_mutating(self): 2733 e = ET.Element('foo') 2734 e.extend([ET.Element('bar')]) 2735 e.findall(MutatingElementPath(e, 'x')) 2736 2737 def test_findall_with_error(self): 2738 e = ET.Element('foo') 2739 e.extend([ET.Element('bar')]) 2740 try: 2741 e.findall(BadElementPath('x')) 2742 except ZeroDivisionError: 2743 pass 2744 2745 2746class ElementTreeTypeTest(unittest.TestCase): 2747 def test_istype(self): 2748 self.assertIsInstance(ET.ParseError, type) 2749 self.assertIsInstance(ET.QName, type) 2750 self.assertIsInstance(ET.ElementTree, type) 2751 self.assertIsInstance(ET.Element, type) 2752 self.assertIsInstance(ET.TreeBuilder, type) 2753 self.assertIsInstance(ET.XMLParser, type) 2754 2755 def test_Element_subclass_trivial(self): 2756 class MyElement(ET.Element): 2757 pass 2758 2759 mye = MyElement('foo') 2760 self.assertIsInstance(mye, ET.Element) 2761 self.assertIsInstance(mye, MyElement) 2762 self.assertEqual(mye.tag, 'foo') 2763 2764 # test that attribute assignment works (issue 14849) 2765 mye.text = "joe" 2766 self.assertEqual(mye.text, "joe") 2767 2768 def test_Element_subclass_constructor(self): 2769 class MyElement(ET.Element): 2770 def __init__(self, tag, attrib={}, **extra): 2771 super(MyElement, self).__init__(tag + '__', attrib, **extra) 2772 2773 mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4) 2774 self.assertEqual(mye.tag, 'foo__') 2775 self.assertEqual(sorted(mye.items()), 2776 [('a', 1), ('b', 2), ('c', 3), ('d', 4)]) 2777 2778 def test_Element_subclass_new_method(self): 2779 class MyElement(ET.Element): 2780 def newmethod(self): 2781 return self.tag 2782 2783 mye = MyElement('joe') 2784 self.assertEqual(mye.newmethod(), 'joe') 2785 2786 def test_Element_subclass_find(self): 2787 class MyElement(ET.Element): 2788 pass 2789 2790 e = ET.Element('foo') 2791 e.text = 'text' 2792 sub = MyElement('bar') 2793 sub.text = 'subtext' 2794 e.append(sub) 2795 self.assertEqual(e.findtext('bar'), 'subtext') 2796 self.assertEqual(e.find('bar').tag, 'bar') 2797 found = list(e.findall('bar')) 2798 self.assertEqual(len(found), 1, found) 2799 self.assertEqual(found[0].tag, 'bar') 2800 2801 2802class ElementFindTest(unittest.TestCase): 2803 def test_find_simple(self): 2804 e = ET.XML(SAMPLE_XML) 2805 self.assertEqual(e.find('tag').tag, 'tag') 2806 self.assertEqual(e.find('section/tag').tag, 'tag') 2807 self.assertEqual(e.find('./tag').tag, 'tag') 2808 2809 e[2] = ET.XML(SAMPLE_SECTION) 2810 self.assertEqual(e.find('section/nexttag').tag, 'nexttag') 2811 2812 self.assertEqual(e.findtext('./tag'), 'text') 2813 self.assertEqual(e.findtext('section/tag'), 'subtext') 2814 2815 # section/nexttag is found but has no text 2816 self.assertEqual(e.findtext('section/nexttag'), '') 2817 self.assertEqual(e.findtext('section/nexttag', 'default'), '') 2818 2819 # tog doesn't exist and 'default' kicks in 2820 self.assertIsNone(e.findtext('tog')) 2821 self.assertEqual(e.findtext('tog', 'default'), 'default') 2822 2823 # Issue #16922 2824 self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '') 2825 2826 def test_find_xpath(self): 2827 LINEAR_XML = ''' 2828 <body> 2829 <tag class='a'/> 2830 <tag class='b'/> 2831 <tag class='c'/> 2832 <tag class='d'/> 2833 </body>''' 2834 e = ET.XML(LINEAR_XML) 2835 2836 # Test for numeric indexing and last() 2837 self.assertEqual(e.find('./tag[1]').attrib['class'], 'a') 2838 self.assertEqual(e.find('./tag[2]').attrib['class'], 'b') 2839 self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd') 2840 self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c') 2841 self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b') 2842 2843 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]') 2844 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]') 2845 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]') 2846 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]') 2847 2848 def test_findall(self): 2849 e = ET.XML(SAMPLE_XML) 2850 e[2] = ET.XML(SAMPLE_SECTION) 2851 self.assertEqual(summarize_list(e.findall('.')), ['body']) 2852 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag']) 2853 self.assertEqual(summarize_list(e.findall('tog')), []) 2854 self.assertEqual(summarize_list(e.findall('tog/foo')), []) 2855 self.assertEqual(summarize_list(e.findall('*')), 2856 ['tag', 'tag', 'section']) 2857 self.assertEqual(summarize_list(e.findall('.//tag')), 2858 ['tag'] * 4) 2859 self.assertEqual(summarize_list(e.findall('section/tag')), ['tag']) 2860 self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2) 2861 self.assertEqual(summarize_list(e.findall('section/*')), 2862 ['tag', 'nexttag', 'nextsection']) 2863 self.assertEqual(summarize_list(e.findall('section//*')), 2864 ['tag', 'nexttag', 'nextsection', 'tag']) 2865 self.assertEqual(summarize_list(e.findall('section/.//*')), 2866 ['tag', 'nexttag', 'nextsection', 'tag']) 2867 self.assertEqual(summarize_list(e.findall('*/*')), 2868 ['tag', 'nexttag', 'nextsection']) 2869 self.assertEqual(summarize_list(e.findall('*//*')), 2870 ['tag', 'nexttag', 'nextsection', 'tag']) 2871 self.assertEqual(summarize_list(e.findall('*/tag')), ['tag']) 2872 self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag']) 2873 self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2) 2874 self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2) 2875 2876 self.assertEqual(summarize_list(e.findall('.//tag[@class]')), 2877 ['tag'] * 3) 2878 self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')), 2879 ['tag']) 2880 self.assertEqual(summarize_list(e.findall('.//tag[@class!="a"]')), 2881 ['tag'] * 2) 2882 self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')), 2883 ['tag'] * 2) 2884 self.assertEqual(summarize_list(e.findall('.//tag[@class!="b"]')), 2885 ['tag']) 2886 self.assertEqual(summarize_list(e.findall('.//tag[@id]')), 2887 ['tag']) 2888 self.assertEqual(summarize_list(e.findall('.//section[tag]')), 2889 ['section']) 2890 self.assertEqual(summarize_list(e.findall('.//section[element]')), []) 2891 self.assertEqual(summarize_list(e.findall('../tag')), []) 2892 self.assertEqual(summarize_list(e.findall('section/../tag')), 2893 ['tag'] * 2) 2894 self.assertEqual(e.findall('section//'), e.findall('section//*')) 2895 2896 self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")), 2897 ['section']) 2898 self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")), 2899 ['section']) 2900 self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")), 2901 ['section']) 2902 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), 2903 ['section']) 2904 self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")), 2905 ['section']) 2906 2907 # Negations of above tests. They match nothing because the sole section 2908 # tag has subtext. 2909 self.assertEqual(summarize_list(e.findall(".//section[tag!='subtext']")), 2910 []) 2911 self.assertEqual(summarize_list(e.findall(".//section[tag !='subtext']")), 2912 []) 2913 self.assertEqual(summarize_list(e.findall(".//section[tag!= 'subtext']")), 2914 []) 2915 self.assertEqual(summarize_list(e.findall(".//section[tag != 'subtext']")), 2916 []) 2917 self.assertEqual(summarize_list(e.findall(".//section[ tag != 'subtext' ]")), 2918 []) 2919 2920 self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")), 2921 ['tag']) 2922 self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")), 2923 ['tag']) 2924 self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')), 2925 ['tag']) 2926 self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')), 2927 ['tag']) 2928 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")), 2929 ['tag']) 2930 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")), 2931 []) 2932 self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")), 2933 []) 2934 2935 # Negations of above tests. 2936 # Matches everything but the tag containing subtext 2937 self.assertEqual(summarize_list(e.findall(".//tag[.!='subtext']")), 2938 ['tag'] * 3) 2939 self.assertEqual(summarize_list(e.findall(".//tag[. !='subtext']")), 2940 ['tag'] * 3) 2941 self.assertEqual(summarize_list(e.findall('.//tag[.!= "subtext"]')), 2942 ['tag'] * 3) 2943 self.assertEqual(summarize_list(e.findall('.//tag[ . != "subtext" ]')), 2944 ['tag'] * 3) 2945 self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext']")), 2946 ['tag'] * 3) 2947 # Matches all tags. 2948 self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext ']")), 2949 ['tag'] * 4) 2950 self.assertEqual(summarize_list(e.findall(".//tag[.!= ' subtext']")), 2951 ['tag'] * 4) 2952 2953 # duplicate section => 2x tag matches 2954 e[1] = e[2] 2955 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), 2956 ['section', 'section']) 2957 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")), 2958 ['tag', 'tag']) 2959 2960 def test_test_find_with_ns(self): 2961 e = ET.XML(SAMPLE_XML_NS) 2962 self.assertEqual(summarize_list(e.findall('tag')), []) 2963 self.assertEqual( 2964 summarize_list(e.findall("{http://effbot.org/ns}tag")), 2965 ['{http://effbot.org/ns}tag'] * 2) 2966 self.assertEqual( 2967 summarize_list(e.findall(".//{http://effbot.org/ns}tag")), 2968 ['{http://effbot.org/ns}tag'] * 3) 2969 2970 def test_findall_different_nsmaps(self): 2971 root = ET.XML(''' 2972 <a xmlns:x="X" xmlns:y="Y"> 2973 <x:b><c/></x:b> 2974 <b/> 2975 <c><x:b/><b/></c><y:b/> 2976 </a>''') 2977 nsmap = {'xx': 'X'} 2978 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) 2979 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) 2980 nsmap = {'xx': 'Y'} 2981 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1) 2982 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) 2983 nsmap = {'xx': 'X', '': 'Y'} 2984 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) 2985 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1) 2986 2987 def test_findall_wildcard(self): 2988 root = ET.XML(''' 2989 <a xmlns:x="X" xmlns:y="Y"> 2990 <x:b><c/></x:b> 2991 <b/> 2992 <c><x:b/><b/></c><y:b/> 2993 </a>''') 2994 root.append(ET.Comment('test')) 2995 2996 self.assertEqual(summarize_list(root.findall("{*}b")), 2997 ['{X}b', 'b', '{Y}b']) 2998 self.assertEqual(summarize_list(root.findall("{*}c")), 2999 ['c']) 3000 self.assertEqual(summarize_list(root.findall("{X}*")), 3001 ['{X}b']) 3002 self.assertEqual(summarize_list(root.findall("{Y}*")), 3003 ['{Y}b']) 3004 self.assertEqual(summarize_list(root.findall("{}*")), 3005 ['b', 'c']) 3006 self.assertEqual(summarize_list(root.findall("{}b")), # only for consistency 3007 ['b']) 3008 self.assertEqual(summarize_list(root.findall("{}b")), 3009 summarize_list(root.findall("b"))) 3010 self.assertEqual(summarize_list(root.findall("{*}*")), 3011 ['{X}b', 'b', 'c', '{Y}b']) 3012 # This is an unfortunate difference, but that's how find('*') works. 3013 self.assertEqual(summarize_list(root.findall("{*}*") + [root[-1]]), 3014 summarize_list(root.findall("*"))) 3015 3016 self.assertEqual(summarize_list(root.findall(".//{*}b")), 3017 ['{X}b', 'b', '{X}b', 'b', '{Y}b']) 3018 self.assertEqual(summarize_list(root.findall(".//{*}c")), 3019 ['c', 'c']) 3020 self.assertEqual(summarize_list(root.findall(".//{X}*")), 3021 ['{X}b', '{X}b']) 3022 self.assertEqual(summarize_list(root.findall(".//{Y}*")), 3023 ['{Y}b']) 3024 self.assertEqual(summarize_list(root.findall(".//{}*")), 3025 ['c', 'b', 'c', 'b']) 3026 self.assertEqual(summarize_list(root.findall(".//{}b")), # only for consistency 3027 ['b', 'b']) 3028 self.assertEqual(summarize_list(root.findall(".//{}b")), 3029 summarize_list(root.findall(".//b"))) 3030 3031 def test_bad_find(self): 3032 e = ET.XML(SAMPLE_XML) 3033 with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'): 3034 e.findall('/tag') 3035 3036 def test_find_through_ElementTree(self): 3037 e = ET.XML(SAMPLE_XML) 3038 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag') 3039 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text') 3040 self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')), 3041 ['tag'] * 2) 3042 # this produces a warning 3043 msg = ("This search is broken in 1.3 and earlier, and will be fixed " 3044 "in a future version. If you rely on the current behaviour, " 3045 "change it to '.+'") 3046 with self.assertWarnsRegex(FutureWarning, msg): 3047 it = ET.ElementTree(e).findall('//tag') 3048 self.assertEqual(summarize_list(it), ['tag'] * 3) 3049 3050 3051class ElementIterTest(unittest.TestCase): 3052 def _ilist(self, elem, tag=None): 3053 return summarize_list(elem.iter(tag)) 3054 3055 def test_basic(self): 3056 doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>") 3057 self.assertEqual(self._ilist(doc), ['html', 'body', 'i']) 3058 self.assertEqual(self._ilist(doc.find('body')), ['body', 'i']) 3059 self.assertEqual(next(doc.iter()).tag, 'html') 3060 self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...') 3061 self.assertEqual(''.join(doc.find('body').itertext()), 3062 'this is a paragraph.') 3063 self.assertEqual(next(doc.itertext()), 'this is a ') 3064 3065 # iterparse should return an iterator 3066 sourcefile = serialize(doc, to_string=False) 3067 self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end') 3068 3069 # With an explicit parser too (issue #9708) 3070 sourcefile = serialize(doc, to_string=False) 3071 parser = ET.XMLParser(target=ET.TreeBuilder()) 3072 self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], 3073 'end') 3074 3075 tree = ET.ElementTree(None) 3076 self.assertRaises(AttributeError, tree.iter) 3077 3078 # Issue #16913 3079 doc = ET.XML("<root>a&<sub>b&</sub>c&</root>") 3080 self.assertEqual(''.join(doc.itertext()), 'a&b&c&') 3081 3082 def test_corners(self): 3083 # single root, no subelements 3084 a = ET.Element('a') 3085 self.assertEqual(self._ilist(a), ['a']) 3086 3087 # one child 3088 b = ET.SubElement(a, 'b') 3089 self.assertEqual(self._ilist(a), ['a', 'b']) 3090 3091 # one child and one grandchild 3092 c = ET.SubElement(b, 'c') 3093 self.assertEqual(self._ilist(a), ['a', 'b', 'c']) 3094 3095 # two children, only first with grandchild 3096 d = ET.SubElement(a, 'd') 3097 self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd']) 3098 3099 # replace first child by second 3100 a[0] = a[1] 3101 del a[1] 3102 self.assertEqual(self._ilist(a), ['a', 'd']) 3103 3104 def test_iter_by_tag(self): 3105 doc = ET.XML(''' 3106 <document> 3107 <house> 3108 <room>bedroom1</room> 3109 <room>bedroom2</room> 3110 </house> 3111 <shed>nothing here 3112 </shed> 3113 <house> 3114 <room>bedroom8</room> 3115 </house> 3116 </document>''') 3117 3118 self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3) 3119 self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2) 3120 3121 # test that iter also accepts 'tag' as a keyword arg 3122 self.assertEqual( 3123 summarize_list(doc.iter(tag='room')), 3124 ['room'] * 3) 3125 3126 # make sure both tag=None and tag='*' return all tags 3127 all_tags = ['document', 'house', 'room', 'room', 3128 'shed', 'house', 'room'] 3129 self.assertEqual(summarize_list(doc.iter()), all_tags) 3130 self.assertEqual(self._ilist(doc), all_tags) 3131 self.assertEqual(self._ilist(doc, '*'), all_tags) 3132 3133 def test_copy(self): 3134 a = ET.Element('a') 3135 it = a.iter() 3136 with self.assertRaises(TypeError): 3137 copy.copy(it) 3138 3139 def test_pickle(self): 3140 a = ET.Element('a') 3141 it = a.iter() 3142 for proto in range(pickle.HIGHEST_PROTOCOL + 1): 3143 with self.assertRaises((TypeError, pickle.PicklingError)): 3144 pickle.dumps(it, proto) 3145 3146 3147class TreeBuilderTest(unittest.TestCase): 3148 sample1 = ('<!DOCTYPE html PUBLIC' 3149 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 3150 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 3151 '<html>text<div>subtext</div>tail</html>') 3152 3153 sample2 = '''<toplevel>sometext</toplevel>''' 3154 3155 def _check_sample1_element(self, e): 3156 self.assertEqual(e.tag, 'html') 3157 self.assertEqual(e.text, 'text') 3158 self.assertEqual(e.tail, None) 3159 self.assertEqual(e.attrib, {}) 3160 children = list(e) 3161 self.assertEqual(len(children), 1) 3162 child = children[0] 3163 self.assertEqual(child.tag, 'div') 3164 self.assertEqual(child.text, 'subtext') 3165 self.assertEqual(child.tail, 'tail') 3166 self.assertEqual(child.attrib, {}) 3167 3168 def test_dummy_builder(self): 3169 class BaseDummyBuilder: 3170 def close(self): 3171 return 42 3172 3173 class DummyBuilder(BaseDummyBuilder): 3174 data = start = end = lambda *a: None 3175 3176 parser = ET.XMLParser(target=DummyBuilder()) 3177 parser.feed(self.sample1) 3178 self.assertEqual(parser.close(), 42) 3179 3180 parser = ET.XMLParser(target=BaseDummyBuilder()) 3181 parser.feed(self.sample1) 3182 self.assertEqual(parser.close(), 42) 3183 3184 parser = ET.XMLParser(target=object()) 3185 parser.feed(self.sample1) 3186 self.assertIsNone(parser.close()) 3187 3188 def test_treebuilder_comment(self): 3189 b = ET.TreeBuilder() 3190 self.assertEqual(b.comment('ctext').tag, ET.Comment) 3191 self.assertEqual(b.comment('ctext').text, 'ctext') 3192 3193 b = ET.TreeBuilder(comment_factory=ET.Comment) 3194 self.assertEqual(b.comment('ctext').tag, ET.Comment) 3195 self.assertEqual(b.comment('ctext').text, 'ctext') 3196 3197 b = ET.TreeBuilder(comment_factory=len) 3198 self.assertEqual(b.comment('ctext'), len('ctext')) 3199 3200 def test_treebuilder_pi(self): 3201 b = ET.TreeBuilder() 3202 self.assertEqual(b.pi('target', None).tag, ET.PI) 3203 self.assertEqual(b.pi('target', None).text, 'target') 3204 3205 b = ET.TreeBuilder(pi_factory=ET.PI) 3206 self.assertEqual(b.pi('target').tag, ET.PI) 3207 self.assertEqual(b.pi('target').text, "target") 3208 self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI) 3209 self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget text ") 3210 3211 b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text)) 3212 self.assertEqual(b.pi('target'), (len('target'), None)) 3213 self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text ')) 3214 3215 def test_late_tail(self): 3216 # Issue #37399: The tail of an ignored comment could overwrite the text before it. 3217 class TreeBuilderSubclass(ET.TreeBuilder): 3218 pass 3219 3220 xml = "<a>text<!-- comment -->tail</a>" 3221 a = ET.fromstring(xml) 3222 self.assertEqual(a.text, "texttail") 3223 3224 parser = ET.XMLParser(target=TreeBuilderSubclass()) 3225 parser.feed(xml) 3226 a = parser.close() 3227 self.assertEqual(a.text, "texttail") 3228 3229 xml = "<a>text<?pi data?>tail</a>" 3230 a = ET.fromstring(xml) 3231 self.assertEqual(a.text, "texttail") 3232 3233 xml = "<a>text<?pi data?>tail</a>" 3234 parser = ET.XMLParser(target=TreeBuilderSubclass()) 3235 parser.feed(xml) 3236 a = parser.close() 3237 self.assertEqual(a.text, "texttail") 3238 3239 def test_late_tail_mix_pi_comments(self): 3240 # Issue #37399: The tail of an ignored comment could overwrite the text before it. 3241 # Test appending tails to comments/pis. 3242 class TreeBuilderSubclass(ET.TreeBuilder): 3243 pass 3244 3245 xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>" 3246 parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True)) 3247 parser.feed(xml) 3248 a = parser.close() 3249 self.assertEqual(a[0].text, ' comment ') 3250 self.assertEqual(a[0].tail, '\ntail') 3251 self.assertEqual(a.text, "text ") 3252 3253 parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True)) 3254 parser.feed(xml) 3255 a = parser.close() 3256 self.assertEqual(a[0].text, ' comment ') 3257 self.assertEqual(a[0].tail, '\ntail') 3258 self.assertEqual(a.text, "text ") 3259 3260 xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>" 3261 parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True)) 3262 parser.feed(xml) 3263 a = parser.close() 3264 self.assertEqual(a[0].text, 'pi data') 3265 self.assertEqual(a[0].tail, 'tail') 3266 self.assertEqual(a.text, "text\n") 3267 3268 parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True)) 3269 parser.feed(xml) 3270 a = parser.close() 3271 self.assertEqual(a[0].text, 'pi data') 3272 self.assertEqual(a[0].tail, 'tail') 3273 self.assertEqual(a.text, "text\n") 3274 3275 def test_treebuilder_elementfactory_none(self): 3276 parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None)) 3277 parser.feed(self.sample1) 3278 e = parser.close() 3279 self._check_sample1_element(e) 3280 3281 def test_subclass(self): 3282 class MyTreeBuilder(ET.TreeBuilder): 3283 def foobar(self, x): 3284 return x * 2 3285 3286 tb = MyTreeBuilder() 3287 self.assertEqual(tb.foobar(10), 20) 3288 3289 parser = ET.XMLParser(target=tb) 3290 parser.feed(self.sample1) 3291 3292 e = parser.close() 3293 self._check_sample1_element(e) 3294 3295 def test_subclass_comment_pi(self): 3296 class MyTreeBuilder(ET.TreeBuilder): 3297 def foobar(self, x): 3298 return x * 2 3299 3300 tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI) 3301 self.assertEqual(tb.foobar(10), 20) 3302 3303 parser = ET.XMLParser(target=tb) 3304 parser.feed(self.sample1) 3305 parser.feed('<!-- a comment--><?and a pi?>') 3306 3307 e = parser.close() 3308 self._check_sample1_element(e) 3309 3310 def test_element_factory(self): 3311 lst = [] 3312 def myfactory(tag, attrib): 3313 nonlocal lst 3314 lst.append(tag) 3315 return ET.Element(tag, attrib) 3316 3317 tb = ET.TreeBuilder(element_factory=myfactory) 3318 parser = ET.XMLParser(target=tb) 3319 parser.feed(self.sample2) 3320 parser.close() 3321 3322 self.assertEqual(lst, ['toplevel']) 3323 3324 def _check_element_factory_class(self, cls): 3325 tb = ET.TreeBuilder(element_factory=cls) 3326 3327 parser = ET.XMLParser(target=tb) 3328 parser.feed(self.sample1) 3329 e = parser.close() 3330 self.assertIsInstance(e, cls) 3331 self._check_sample1_element(e) 3332 3333 def test_element_factory_subclass(self): 3334 class MyElement(ET.Element): 3335 pass 3336 self._check_element_factory_class(MyElement) 3337 3338 def test_element_factory_pure_python_subclass(self): 3339 # Mimic SimpleTAL's behaviour (issue #16089): both versions of 3340 # TreeBuilder should be able to cope with a subclass of the 3341 # pure Python Element class. 3342 base = ET._Element_Py 3343 # Not from a C extension 3344 self.assertEqual(base.__module__, 'xml.etree.ElementTree') 3345 # Force some multiple inheritance with a C class to make things 3346 # more interesting. 3347 class MyElement(base, ValueError): 3348 pass 3349 self._check_element_factory_class(MyElement) 3350 3351 def test_doctype(self): 3352 class DoctypeParser: 3353 _doctype = None 3354 3355 def doctype(self, name, pubid, system): 3356 self._doctype = (name, pubid, system) 3357 3358 def close(self): 3359 return self._doctype 3360 3361 parser = ET.XMLParser(target=DoctypeParser()) 3362 parser.feed(self.sample1) 3363 3364 self.assertEqual(parser.close(), 3365 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 3366 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 3367 3368 def test_builder_lookup_errors(self): 3369 class RaisingBuilder: 3370 def __init__(self, raise_in=None, what=ValueError): 3371 self.raise_in = raise_in 3372 self.what = what 3373 3374 def __getattr__(self, name): 3375 if name == self.raise_in: 3376 raise self.what(self.raise_in) 3377 def handle(*args): 3378 pass 3379 return handle 3380 3381 ET.XMLParser(target=RaisingBuilder()) 3382 # cET also checks for 'close' and 'doctype', PyET does it only at need 3383 for event in ('start', 'data', 'end', 'comment', 'pi'): 3384 with self.assertRaisesRegex(ValueError, event): 3385 ET.XMLParser(target=RaisingBuilder(event)) 3386 3387 ET.XMLParser(target=RaisingBuilder(what=AttributeError)) 3388 for event in ('start', 'data', 'end', 'comment', 'pi'): 3389 parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError)) 3390 parser.feed(self.sample1) 3391 self.assertIsNone(parser.close()) 3392 3393 3394class XMLParserTest(unittest.TestCase): 3395 sample1 = b'<file><line>22</line></file>' 3396 sample2 = (b'<!DOCTYPE html PUBLIC' 3397 b' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 3398 b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 3399 b'<html>text</html>') 3400 sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n' 3401 '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>') 3402 3403 def _check_sample_element(self, e): 3404 self.assertEqual(e.tag, 'file') 3405 self.assertEqual(e[0].tag, 'line') 3406 self.assertEqual(e[0].text, '22') 3407 3408 def test_constructor_args(self): 3409 parser2 = ET.XMLParser(encoding='utf-8', 3410 target=ET.TreeBuilder()) 3411 parser2.feed(self.sample1) 3412 self._check_sample_element(parser2.close()) 3413 3414 def test_subclass(self): 3415 class MyParser(ET.XMLParser): 3416 pass 3417 parser = MyParser() 3418 parser.feed(self.sample1) 3419 self._check_sample_element(parser.close()) 3420 3421 def test_doctype_warning(self): 3422 with warnings.catch_warnings(): 3423 warnings.simplefilter('error', DeprecationWarning) 3424 parser = ET.XMLParser() 3425 parser.feed(self.sample2) 3426 parser.close() 3427 3428 def test_subclass_doctype(self): 3429 _doctype = None 3430 class MyParserWithDoctype(ET.XMLParser): 3431 def doctype(self, *args, **kwargs): 3432 nonlocal _doctype 3433 _doctype = (args, kwargs) 3434 3435 parser = MyParserWithDoctype() 3436 with self.assertWarnsRegex(RuntimeWarning, 'doctype'): 3437 parser.feed(self.sample2) 3438 parser.close() 3439 self.assertIsNone(_doctype) 3440 3441 _doctype = _doctype2 = None 3442 with warnings.catch_warnings(): 3443 warnings.simplefilter('error', DeprecationWarning) 3444 warnings.simplefilter('error', RuntimeWarning) 3445 class DoctypeParser: 3446 def doctype(self, name, pubid, system): 3447 nonlocal _doctype2 3448 _doctype2 = (name, pubid, system) 3449 3450 parser = MyParserWithDoctype(target=DoctypeParser()) 3451 parser.feed(self.sample2) 3452 parser.close() 3453 self.assertIsNone(_doctype) 3454 self.assertEqual(_doctype2, 3455 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 3456 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 3457 3458 def test_inherited_doctype(self): 3459 '''Ensure that ordinary usage is not deprecated (Issue 19176)''' 3460 with warnings.catch_warnings(): 3461 warnings.simplefilter('error', DeprecationWarning) 3462 warnings.simplefilter('error', RuntimeWarning) 3463 class MyParserWithoutDoctype(ET.XMLParser): 3464 pass 3465 parser = MyParserWithoutDoctype() 3466 parser.feed(self.sample2) 3467 parser.close() 3468 3469 def test_parse_string(self): 3470 parser = ET.XMLParser(target=ET.TreeBuilder()) 3471 parser.feed(self.sample3) 3472 e = parser.close() 3473 self.assertEqual(e.tag, 'money') 3474 self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b') 3475 self.assertEqual(e.text, '$\xa3\u20ac\U0001017b') 3476 3477 3478class NamespaceParseTest(unittest.TestCase): 3479 def test_find_with_namespace(self): 3480 nsmap = {'h': 'hello', 'f': 'foo'} 3481 doc = ET.fromstring(SAMPLE_XML_NS_ELEMS) 3482 3483 self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1) 3484 self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2) 3485 self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1) 3486 3487 3488class ElementSlicingTest(unittest.TestCase): 3489 def _elem_tags(self, elemlist): 3490 return [e.tag for e in elemlist] 3491 3492 def _subelem_tags(self, elem): 3493 return self._elem_tags(list(elem)) 3494 3495 def _make_elem_with_children(self, numchildren): 3496 """Create an Element with a tag 'a', with the given amount of children 3497 named 'a0', 'a1' ... and so on. 3498 3499 """ 3500 e = ET.Element('a') 3501 for i in range(numchildren): 3502 ET.SubElement(e, 'a%s' % i) 3503 return e 3504 3505 def test_getslice_single_index(self): 3506 e = self._make_elem_with_children(10) 3507 3508 self.assertEqual(e[1].tag, 'a1') 3509 self.assertEqual(e[-2].tag, 'a8') 3510 3511 self.assertRaises(IndexError, lambda: e[12]) 3512 self.assertRaises(IndexError, lambda: e[-12]) 3513 3514 def test_getslice_range(self): 3515 e = self._make_elem_with_children(6) 3516 3517 self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5']) 3518 self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5']) 3519 self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5']) 3520 self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4']) 3521 self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4']) 3522 self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1']) 3523 3524 def test_getslice_steps(self): 3525 e = self._make_elem_with_children(10) 3526 3527 self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9']) 3528 self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9']) 3529 self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8']) 3530 self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9']) 3531 self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3']) 3532 self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3']) 3533 3534 def test_getslice_negative_steps(self): 3535 e = self._make_elem_with_children(4) 3536 3537 self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0']) 3538 self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1']) 3539 self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3']) 3540 self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3']) 3541 self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3']) 3542 3543 def test_delslice(self): 3544 e = self._make_elem_with_children(4) 3545 del e[0:2] 3546 self.assertEqual(self._subelem_tags(e), ['a2', 'a3']) 3547 3548 e = self._make_elem_with_children(4) 3549 del e[0:] 3550 self.assertEqual(self._subelem_tags(e), []) 3551 3552 e = self._make_elem_with_children(4) 3553 del e[::-1] 3554 self.assertEqual(self._subelem_tags(e), []) 3555 3556 e = self._make_elem_with_children(4) 3557 del e[::-2] 3558 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 3559 3560 e = self._make_elem_with_children(4) 3561 del e[1::2] 3562 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 3563 3564 e = self._make_elem_with_children(2) 3565 del e[::2] 3566 self.assertEqual(self._subelem_tags(e), ['a1']) 3567 3568 def test_setslice_single_index(self): 3569 e = self._make_elem_with_children(4) 3570 e[1] = ET.Element('b') 3571 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 3572 3573 e[-2] = ET.Element('c') 3574 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 3575 3576 with self.assertRaises(IndexError): 3577 e[5] = ET.Element('d') 3578 with self.assertRaises(IndexError): 3579 e[-5] = ET.Element('d') 3580 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 3581 3582 def test_setslice_range(self): 3583 e = self._make_elem_with_children(4) 3584 e[1:3] = [ET.Element('b%s' % i) for i in range(2)] 3585 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3']) 3586 3587 e = self._make_elem_with_children(4) 3588 e[1:3] = [ET.Element('b')] 3589 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3']) 3590 3591 e = self._make_elem_with_children(4) 3592 e[1:3] = [ET.Element('b%s' % i) for i in range(3)] 3593 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3']) 3594 3595 def test_setslice_steps(self): 3596 e = self._make_elem_with_children(6) 3597 e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)] 3598 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5']) 3599 3600 e = self._make_elem_with_children(6) 3601 with self.assertRaises(ValueError): 3602 e[1:5:2] = [ET.Element('b')] 3603 with self.assertRaises(ValueError): 3604 e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)] 3605 with self.assertRaises(ValueError): 3606 e[1:5:2] = [] 3607 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5']) 3608 3609 e = self._make_elem_with_children(4) 3610 e[1::sys.maxsize] = [ET.Element('b')] 3611 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 3612 e[1::sys.maxsize<<64] = [ET.Element('c')] 3613 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 3614 3615 def test_setslice_negative_steps(self): 3616 e = self._make_elem_with_children(4) 3617 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)] 3618 self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3']) 3619 3620 e = self._make_elem_with_children(4) 3621 with self.assertRaises(ValueError): 3622 e[2:0:-1] = [ET.Element('b')] 3623 with self.assertRaises(ValueError): 3624 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)] 3625 with self.assertRaises(ValueError): 3626 e[2:0:-1] = [] 3627 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3']) 3628 3629 e = self._make_elem_with_children(4) 3630 e[1::-sys.maxsize] = [ET.Element('b')] 3631 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 3632 e[1::-sys.maxsize-1] = [ET.Element('c')] 3633 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 3634 e[1::-sys.maxsize<<64] = [ET.Element('d')] 3635 self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3']) 3636 3637 3638class IOTest(unittest.TestCase): 3639 def test_encoding(self): 3640 # Test encoding issues. 3641 elem = ET.Element("tag") 3642 elem.text = "abc" 3643 self.assertEqual(serialize(elem), '<tag>abc</tag>') 3644 for enc in ("utf-8", "us-ascii"): 3645 with self.subTest(enc): 3646 self.assertEqual(serialize(elem, encoding=enc), 3647 b'<tag>abc</tag>') 3648 self.assertEqual(serialize(elem, encoding=enc.upper()), 3649 b'<tag>abc</tag>') 3650 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3651 with self.subTest(enc): 3652 self.assertEqual(serialize(elem, encoding=enc), 3653 ("<?xml version='1.0' encoding='%s'?>\n" 3654 "<tag>abc</tag>" % enc).encode(enc)) 3655 upper = enc.upper() 3656 self.assertEqual(serialize(elem, encoding=upper), 3657 ("<?xml version='1.0' encoding='%s'?>\n" 3658 "<tag>abc</tag>" % upper).encode(enc)) 3659 3660 elem = ET.Element("tag") 3661 elem.text = "<&\"\'>" 3662 self.assertEqual(serialize(elem), '<tag><&"\'></tag>') 3663 self.assertEqual(serialize(elem, encoding="utf-8"), 3664 b'<tag><&"\'></tag>') 3665 self.assertEqual(serialize(elem, encoding="us-ascii"), 3666 b'<tag><&"\'></tag>') 3667 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3668 self.assertEqual(serialize(elem, encoding=enc), 3669 ("<?xml version='1.0' encoding='%s'?>\n" 3670 "<tag><&\"'></tag>" % enc).encode(enc)) 3671 3672 elem = ET.Element("tag") 3673 elem.attrib["key"] = "<&\"\'>" 3674 self.assertEqual(serialize(elem), '<tag key="<&"\'>" />') 3675 self.assertEqual(serialize(elem, encoding="utf-8"), 3676 b'<tag key="<&"\'>" />') 3677 self.assertEqual(serialize(elem, encoding="us-ascii"), 3678 b'<tag key="<&"\'>" />') 3679 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3680 self.assertEqual(serialize(elem, encoding=enc), 3681 ("<?xml version='1.0' encoding='%s'?>\n" 3682 "<tag key=\"<&"'>\" />" % enc).encode(enc)) 3683 3684 elem = ET.Element("tag") 3685 elem.text = '\xe5\xf6\xf6<>' 3686 self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6<></tag>') 3687 self.assertEqual(serialize(elem, encoding="utf-8"), 3688 b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>') 3689 self.assertEqual(serialize(elem, encoding="us-ascii"), 3690 b'<tag>åöö<></tag>') 3691 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3692 self.assertEqual(serialize(elem, encoding=enc), 3693 ("<?xml version='1.0' encoding='%s'?>\n" 3694 "<tag>åöö<></tag>" % enc).encode(enc)) 3695 3696 elem = ET.Element("tag") 3697 elem.attrib["key"] = '\xe5\xf6\xf6<>' 3698 self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6<>" />') 3699 self.assertEqual(serialize(elem, encoding="utf-8"), 3700 b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />') 3701 self.assertEqual(serialize(elem, encoding="us-ascii"), 3702 b'<tag key="åöö<>" />') 3703 for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"): 3704 self.assertEqual(serialize(elem, encoding=enc), 3705 ("<?xml version='1.0' encoding='%s'?>\n" 3706 "<tag key=\"åöö<>\" />" % enc).encode(enc)) 3707 3708 def test_write_to_filename(self): 3709 self.addCleanup(os_helper.unlink, TESTFN) 3710 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3711 tree.write(TESTFN) 3712 with open(TESTFN, 'rb') as f: 3713 self.assertEqual(f.read(), b'''<site>ø</site>''') 3714 3715 def test_write_to_filename_with_encoding(self): 3716 self.addCleanup(os_helper.unlink, TESTFN) 3717 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3718 tree.write(TESTFN, encoding='utf-8') 3719 with open(TESTFN, 'rb') as f: 3720 self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''') 3721 3722 tree.write(TESTFN, encoding='ISO-8859-1') 3723 with open(TESTFN, 'rb') as f: 3724 self.assertEqual(f.read(), convlinesep( 3725 b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n''' 3726 b'''<site>\xf8</site>''')) 3727 3728 def test_write_to_filename_as_unicode(self): 3729 self.addCleanup(os_helper.unlink, TESTFN) 3730 with open(TESTFN, 'w') as f: 3731 encoding = f.encoding 3732 os_helper.unlink(TESTFN) 3733 3734 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3735 tree.write(TESTFN, encoding='unicode') 3736 with open(TESTFN, 'rb') as f: 3737 self.assertEqual(f.read(), b"<site>\xc3\xb8</site>") 3738 3739 def test_write_to_text_file(self): 3740 self.addCleanup(os_helper.unlink, TESTFN) 3741 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3742 with open(TESTFN, 'w', encoding='utf-8') as f: 3743 tree.write(f, encoding='unicode') 3744 self.assertFalse(f.closed) 3745 with open(TESTFN, 'rb') as f: 3746 self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''') 3747 3748 with open(TESTFN, 'w', encoding='ascii', errors='xmlcharrefreplace') as f: 3749 tree.write(f, encoding='unicode') 3750 self.assertFalse(f.closed) 3751 with open(TESTFN, 'rb') as f: 3752 self.assertEqual(f.read(), b'''<site>ø</site>''') 3753 3754 with open(TESTFN, 'w', encoding='ISO-8859-1') as f: 3755 tree.write(f, encoding='unicode') 3756 self.assertFalse(f.closed) 3757 with open(TESTFN, 'rb') as f: 3758 self.assertEqual(f.read(), b'''<site>\xf8</site>''') 3759 3760 def test_write_to_binary_file(self): 3761 self.addCleanup(os_helper.unlink, TESTFN) 3762 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3763 with open(TESTFN, 'wb') as f: 3764 tree.write(f) 3765 self.assertFalse(f.closed) 3766 with open(TESTFN, 'rb') as f: 3767 self.assertEqual(f.read(), b'''<site>ø</site>''') 3768 3769 def test_write_to_binary_file_with_encoding(self): 3770 self.addCleanup(os_helper.unlink, TESTFN) 3771 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3772 with open(TESTFN, 'wb') as f: 3773 tree.write(f, encoding='utf-8') 3774 self.assertFalse(f.closed) 3775 with open(TESTFN, 'rb') as f: 3776 self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''') 3777 3778 with open(TESTFN, 'wb') as f: 3779 tree.write(f, encoding='ISO-8859-1') 3780 self.assertFalse(f.closed) 3781 with open(TESTFN, 'rb') as f: 3782 self.assertEqual(f.read(), 3783 b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n''' 3784 b'''<site>\xf8</site>''') 3785 3786 def test_write_to_binary_file_with_bom(self): 3787 self.addCleanup(os_helper.unlink, TESTFN) 3788 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3789 # test BOM writing to buffered file 3790 with open(TESTFN, 'wb') as f: 3791 tree.write(f, encoding='utf-16') 3792 self.assertFalse(f.closed) 3793 with open(TESTFN, 'rb') as f: 3794 self.assertEqual(f.read(), 3795 '''<?xml version='1.0' encoding='utf-16'?>\n''' 3796 '''<site>\xf8</site>'''.encode("utf-16")) 3797 # test BOM writing to non-buffered file 3798 with open(TESTFN, 'wb', buffering=0) as f: 3799 tree.write(f, encoding='utf-16') 3800 self.assertFalse(f.closed) 3801 with open(TESTFN, 'rb') as f: 3802 self.assertEqual(f.read(), 3803 '''<?xml version='1.0' encoding='utf-16'?>\n''' 3804 '''<site>\xf8</site>'''.encode("utf-16")) 3805 3806 def test_read_from_stringio(self): 3807 tree = ET.ElementTree() 3808 stream = io.StringIO('''<?xml version="1.0"?><site></site>''') 3809 tree.parse(stream) 3810 self.assertEqual(tree.getroot().tag, 'site') 3811 3812 def test_write_to_stringio(self): 3813 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3814 stream = io.StringIO() 3815 tree.write(stream, encoding='unicode') 3816 self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''') 3817 3818 def test_read_from_bytesio(self): 3819 tree = ET.ElementTree() 3820 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') 3821 tree.parse(raw) 3822 self.assertEqual(tree.getroot().tag, 'site') 3823 3824 def test_write_to_bytesio(self): 3825 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3826 raw = io.BytesIO() 3827 tree.write(raw) 3828 self.assertEqual(raw.getvalue(), b'''<site>ø</site>''') 3829 3830 class dummy: 3831 pass 3832 3833 def test_read_from_user_text_reader(self): 3834 stream = io.StringIO('''<?xml version="1.0"?><site></site>''') 3835 reader = self.dummy() 3836 reader.read = stream.read 3837 tree = ET.ElementTree() 3838 tree.parse(reader) 3839 self.assertEqual(tree.getroot().tag, 'site') 3840 3841 def test_write_to_user_text_writer(self): 3842 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3843 stream = io.StringIO() 3844 writer = self.dummy() 3845 writer.write = stream.write 3846 tree.write(writer, encoding='unicode') 3847 self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''') 3848 3849 def test_read_from_user_binary_reader(self): 3850 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') 3851 reader = self.dummy() 3852 reader.read = raw.read 3853 tree = ET.ElementTree() 3854 tree.parse(reader) 3855 self.assertEqual(tree.getroot().tag, 'site') 3856 tree = ET.ElementTree() 3857 3858 def test_write_to_user_binary_writer(self): 3859 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3860 raw = io.BytesIO() 3861 writer = self.dummy() 3862 writer.write = raw.write 3863 tree.write(writer) 3864 self.assertEqual(raw.getvalue(), b'''<site>ø</site>''') 3865 3866 def test_write_to_user_binary_writer_with_bom(self): 3867 tree = ET.ElementTree(ET.XML('''<site />''')) 3868 raw = io.BytesIO() 3869 writer = self.dummy() 3870 writer.write = raw.write 3871 writer.seekable = lambda: True 3872 writer.tell = raw.tell 3873 tree.write(writer, encoding="utf-16") 3874 self.assertEqual(raw.getvalue(), 3875 '''<?xml version='1.0' encoding='utf-16'?>\n''' 3876 '''<site />'''.encode("utf-16")) 3877 3878 def test_tostringlist_invariant(self): 3879 root = ET.fromstring('<tag>foo</tag>') 3880 self.assertEqual( 3881 ET.tostring(root, 'unicode'), 3882 ''.join(ET.tostringlist(root, 'unicode'))) 3883 self.assertEqual( 3884 ET.tostring(root, 'utf-16'), 3885 b''.join(ET.tostringlist(root, 'utf-16'))) 3886 3887 def test_short_empty_elements(self): 3888 root = ET.fromstring('<tag>a<x />b<y></y>c</tag>') 3889 self.assertEqual( 3890 ET.tostring(root, 'unicode'), 3891 '<tag>a<x />b<y />c</tag>') 3892 self.assertEqual( 3893 ET.tostring(root, 'unicode', short_empty_elements=True), 3894 '<tag>a<x />b<y />c</tag>') 3895 self.assertEqual( 3896 ET.tostring(root, 'unicode', short_empty_elements=False), 3897 '<tag>a<x></x>b<y></y>c</tag>') 3898 3899 3900class ParseErrorTest(unittest.TestCase): 3901 def test_subclass(self): 3902 self.assertIsInstance(ET.ParseError(), SyntaxError) 3903 3904 def _get_error(self, s): 3905 try: 3906 ET.fromstring(s) 3907 except ET.ParseError as e: 3908 return e 3909 3910 def test_error_position(self): 3911 self.assertEqual(self._get_error('foo').position, (1, 0)) 3912 self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5)) 3913 self.assertEqual(self._get_error('foobar<').position, (1, 6)) 3914 3915 def test_error_code(self): 3916 import xml.parsers.expat.errors as ERRORS 3917 self.assertEqual(self._get_error('foo').code, 3918 ERRORS.codes[ERRORS.XML_ERROR_SYNTAX]) 3919 3920 3921class KeywordArgsTest(unittest.TestCase): 3922 # Test various issues with keyword arguments passed to ET.Element 3923 # constructor and methods 3924 def test_issue14818(self): 3925 x = ET.XML("<a>foo</a>") 3926 self.assertEqual(x.find('a', None), 3927 x.find(path='a', namespaces=None)) 3928 self.assertEqual(x.findtext('a', None, None), 3929 x.findtext(path='a', default=None, namespaces=None)) 3930 self.assertEqual(x.findall('a', None), 3931 x.findall(path='a', namespaces=None)) 3932 self.assertEqual(list(x.iterfind('a', None)), 3933 list(x.iterfind(path='a', namespaces=None))) 3934 3935 self.assertEqual(ET.Element('a').attrib, {}) 3936 elements = [ 3937 ET.Element('a', dict(href="#", id="foo")), 3938 ET.Element('a', attrib=dict(href="#", id="foo")), 3939 ET.Element('a', dict(href="#"), id="foo"), 3940 ET.Element('a', href="#", id="foo"), 3941 ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"), 3942 ] 3943 for e in elements: 3944 self.assertEqual(e.tag, 'a') 3945 self.assertEqual(e.attrib, dict(href="#", id="foo")) 3946 3947 e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'}) 3948 self.assertEqual(e2.attrib['key1'], 'value1') 3949 3950 with self.assertRaisesRegex(TypeError, 'must be dict, not str'): 3951 ET.Element('a', "I'm not a dict") 3952 with self.assertRaisesRegex(TypeError, 'must be dict, not str'): 3953 ET.Element('a', attrib="I'm not a dict") 3954 3955# -------------------------------------------------------------------- 3956 3957class NoAcceleratorTest(unittest.TestCase): 3958 def setUp(self): 3959 if not pyET: 3960 raise unittest.SkipTest('only for the Python version') 3961 3962 # Test that the C accelerator was not imported for pyET 3963 def test_correct_import_pyET(self): 3964 # The type of methods defined in Python code is types.FunctionType, 3965 # while the type of methods defined inside _elementtree is 3966 # <class 'wrapper_descriptor'> 3967 self.assertIsInstance(pyET.Element.__init__, types.FunctionType) 3968 self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType) 3969 3970 3971# -------------------------------------------------------------------- 3972 3973def c14n_roundtrip(xml, **options): 3974 return pyET.canonicalize(xml, **options) 3975 3976 3977class C14NTest(unittest.TestCase): 3978 maxDiff = None 3979 3980 # 3981 # simple roundtrip tests (from c14n.py) 3982 3983 def test_simple_roundtrip(self): 3984 # Basics 3985 self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>') 3986 self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME 3987 '<doc xmlns="uri"></doc>') 3988 self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"), 3989 '<prefix:doc xmlns:prefix="uri"></prefix:doc>') 3990 self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"), 3991 '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>') 3992 self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"), 3993 '<elem></elem>') 3994 3995 # C14N spec 3996 self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"), 3997 '<doc>Hello, world!</doc>') 3998 self.assertEqual(c14n_roundtrip("<value>2</value>"), 3999 '<value>2</value>') 4000 self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'), 4001 '<compute>value>"0" && value<"10" ?"valid":"error"</compute>') 4002 self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" && value<"10" ?"valid":"error"'>valid</compute>'''), 4003 '<compute expr="value>"0" && value<"10" ?"valid":"error"">valid</compute>') 4004 self.assertEqual(c14n_roundtrip("<norm attr=' '   
	 ' '/>"), 4005 '<norm attr=" \' 
	 \' "></norm>') 4006 self.assertEqual(c14n_roundtrip("<normNames attr=' A   
	 B '/>"), 4007 '<normNames attr=" A 
	 B "></normNames>') 4008 self.assertEqual(c14n_roundtrip("<normId id=' '   
	 ' '/>"), 4009 '<normId id=" \' 
	 \' "></normId>') 4010 4011 # fragments from PJ's tests 4012 #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"), 4013 #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>') 4014 4015 # Namespace issues 4016 xml = '<X xmlns="http://nps/a"><Y targets="abc,xyz"></Y></X>' 4017 self.assertEqual(c14n_roundtrip(xml), xml) 4018 xml = '<X xmlns="http://nps/a"><Y xmlns="http://nsp/b" targets="abc,xyz"></Y></X>' 4019 self.assertEqual(c14n_roundtrip(xml), xml) 4020 xml = '<X xmlns="http://nps/a"><Y xmlns:b="http://nsp/b" b:targets="abc,xyz"></Y></X>' 4021 self.assertEqual(c14n_roundtrip(xml), xml) 4022 4023 def test_c14n_exclusion(self): 4024 xml = textwrap.dedent("""\ 4025 <root xmlns:x="http://example.com/x"> 4026 <a x:attr="attrx"> 4027 <b>abtext</b> 4028 </a> 4029 <b>btext</b> 4030 <c> 4031 <x:d>dtext</x:d> 4032 </c> 4033 </root> 4034 """) 4035 self.assertEqual( 4036 c14n_roundtrip(xml, strip_text=True), 4037 '<root>' 4038 '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>' 4039 '<b>btext</b>' 4040 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' 4041 '</root>') 4042 self.assertEqual( 4043 c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']), 4044 '<root>' 4045 '<a><b>abtext</b></a>' 4046 '<b>btext</b>' 4047 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' 4048 '</root>') 4049 self.assertEqual( 4050 c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']), 4051 '<root>' 4052 '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>' 4053 '<b>btext</b>' 4054 '<c></c>' 4055 '</root>') 4056 self.assertEqual( 4057 c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'], 4058 exclude_tags=['{http://example.com/x}d']), 4059 '<root>' 4060 '<a><b>abtext</b></a>' 4061 '<b>btext</b>' 4062 '<c></c>' 4063 '</root>') 4064 self.assertEqual( 4065 c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']), 4066 '<root>' 4067 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' 4068 '</root>') 4069 self.assertEqual( 4070 c14n_roundtrip(xml, exclude_tags=['a', 'b']), 4071 '<root>\n' 4072 ' \n' 4073 ' \n' 4074 ' <c>\n' 4075 ' <x:d xmlns:x="http://example.com/x">dtext</x:d>\n' 4076 ' </c>\n' 4077 '</root>') 4078 self.assertEqual( 4079 c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']), 4080 '<root>' 4081 '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>' 4082 '<c></c>' 4083 '</root>') 4084 self.assertEqual( 4085 c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']), 4086 '<root>\n' 4087 ' <a xmlns:x="http://example.com/x" x:attr="attrx">\n' 4088 ' \n' 4089 ' </a>\n' 4090 ' \n' 4091 ' <c>\n' 4092 ' \n' 4093 ' </c>\n' 4094 '</root>') 4095 4096 # 4097 # basic method=c14n tests from the c14n 2.0 specification. uses 4098 # test files under xmltestdata/c14n-20. 4099 4100 # note that this uses generated C14N versions of the standard ET.write 4101 # output, not roundtripped C14N (see above). 4102 4103 def test_xml_c14n2(self): 4104 datadir = findfile("c14n-20", subdir="xmltestdata") 4105 full_path = partial(os.path.join, datadir) 4106 4107 files = [filename[:-4] for filename in sorted(os.listdir(datadir)) 4108 if filename.endswith('.xml')] 4109 input_files = [ 4110 filename for filename in files 4111 if filename.startswith('in') 4112 ] 4113 configs = { 4114 filename: { 4115 # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite> 4116 option.tag.split('}')[-1]: ((option.text or '').strip(), option) 4117 for option in ET.parse(full_path(filename) + ".xml").getroot() 4118 } 4119 for filename in files 4120 if filename.startswith('c14n') 4121 } 4122 4123 tests = { 4124 input_file: [ 4125 (filename, configs[filename.rsplit('_', 1)[-1]]) 4126 for filename in files 4127 if filename.startswith(f'out_{input_file}_') 4128 and filename.rsplit('_', 1)[-1] in configs 4129 ] 4130 for input_file in input_files 4131 } 4132 4133 # Make sure we found all test cases. 4134 self.assertEqual(30, len([ 4135 output_file for output_files in tests.values() 4136 for output_file in output_files])) 4137 4138 def get_option(config, option_name, default=None): 4139 return config.get(option_name, (default, ()))[0] 4140 4141 for input_file, output_files in tests.items(): 4142 for output_file, config in output_files: 4143 keep_comments = get_option( 4144 config, 'IgnoreComments') == 'true' # no, it's right :) 4145 strip_text = get_option( 4146 config, 'TrimTextNodes') == 'true' 4147 rewrite_prefixes = get_option( 4148 config, 'PrefixRewrite') == 'sequential' 4149 if 'QNameAware' in config: 4150 qattrs = [ 4151 f"{{{el.get('NS')}}}{el.get('Name')}" 4152 for el in config['QNameAware'][1].findall( 4153 '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr') 4154 ] 4155 qtags = [ 4156 f"{{{el.get('NS')}}}{el.get('Name')}" 4157 for el in config['QNameAware'][1].findall( 4158 '{http://www.w3.org/2010/xml-c14n2}Element') 4159 ] 4160 else: 4161 qtags = qattrs = None 4162 4163 # Build subtest description from config. 4164 config_descr = ','.join( 4165 f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}" 4166 for name, (value, children) in sorted(config.items()) 4167 ) 4168 4169 with self.subTest(f"{output_file}({config_descr})"): 4170 if input_file == 'inNsRedecl' and not rewrite_prefixes: 4171 self.skipTest( 4172 f"Redeclared namespace handling is not supported in {output_file}") 4173 if input_file == 'inNsSuperfluous' and not rewrite_prefixes: 4174 self.skipTest( 4175 f"Redeclared namespace handling is not supported in {output_file}") 4176 if 'QNameAware' in config and config['QNameAware'][1].find( 4177 '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None: 4178 self.skipTest( 4179 f"QName rewriting in XPath text is not supported in {output_file}") 4180 4181 f = full_path(input_file + ".xml") 4182 if input_file == 'inC14N5': 4183 # Hack: avoid setting up external entity resolution in the parser. 4184 with open(full_path('world.txt'), 'rb') as entity_file: 4185 with open(f, 'rb') as f: 4186 f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read())) 4187 4188 text = ET.canonicalize( 4189 from_file=f, 4190 with_comments=keep_comments, 4191 strip_text=strip_text, 4192 rewrite_prefixes=rewrite_prefixes, 4193 qname_aware_tags=qtags, qname_aware_attrs=qattrs) 4194 4195 with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f: 4196 expected = f.read() 4197 if input_file == 'inC14N3': 4198 # FIXME: cET resolves default attributes but ET does not! 4199 expected = expected.replace(' attr="default"', '') 4200 text = text.replace(' attr="default"', '') 4201 self.assertEqual(expected, text) 4202 4203# -------------------------------------------------------------------- 4204 4205 4206def test_main(module=None): 4207 # When invoked without a module, runs the Python ET tests by loading pyET. 4208 # Otherwise, uses the given module as the ET. 4209 global pyET 4210 pyET = import_fresh_module('xml.etree.ElementTree', 4211 blocked=['_elementtree']) 4212 if module is None: 4213 module = pyET 4214 4215 global ET 4216 ET = module 4217 4218 test_classes = [ 4219 ModuleTest, 4220 ElementSlicingTest, 4221 BasicElementTest, 4222 BadElementTest, 4223 BadElementPathTest, 4224 ElementTreeTest, 4225 IOTest, 4226 ParseErrorTest, 4227 XIncludeTest, 4228 ElementTreeTypeTest, 4229 ElementFindTest, 4230 ElementIterTest, 4231 TreeBuilderTest, 4232 XMLParserTest, 4233 XMLPullParserTest, 4234 BugsTest, 4235 KeywordArgsTest, 4236 C14NTest, 4237 ] 4238 4239 # These tests will only run for the pure-Python version that doesn't import 4240 # _elementtree. We can't use skipUnless here, because pyET is filled in only 4241 # after the module is loaded. 4242 if pyET is not ET: 4243 test_classes.extend([ 4244 NoAcceleratorTest, 4245 ]) 4246 4247 # Provide default namespace mapping and path cache. 4248 from xml.etree import ElementPath 4249 nsmap = ET.register_namespace._namespace_map 4250 # Copy the default namespace mapping 4251 nsmap_copy = nsmap.copy() 4252 # Copy the path cache (should be empty) 4253 path_cache = ElementPath._cache 4254 ElementPath._cache = path_cache.copy() 4255 # Align the Comment/PI factories. 4256 if hasattr(ET, '_set_factories'): 4257 old_factories = ET._set_factories(ET.Comment, ET.PI) 4258 else: 4259 old_factories = None 4260 4261 try: 4262 support.run_unittest(*test_classes) 4263 finally: 4264 from xml.etree import ElementPath 4265 # Restore mapping and path cache 4266 nsmap.clear() 4267 nsmap.update(nsmap_copy) 4268 ElementPath._cache = path_cache 4269 if old_factories is not None: 4270 ET._set_factories(*old_factories) 4271 # don't interfere with subsequent tests 4272 ET = pyET = None 4273 4274 4275if __name__ == '__main__': 4276 test_main() 4277