1# -*- coding: utf-8 -*- 2# IMPORTANT: the same tests are run from "test_xml_etree_c" in order 3# to ensure consistency between the C implementation and the Python 4# implementation. 5# 6# For this purpose, the module-level "ET" symbol is temporarily 7# monkey-patched when running the "test_xml_etree_c" test suite. 8 9import cgi 10import copy 11import functools 12import io 13import pickle 14import StringIO 15import sys 16import types 17import unittest 18import warnings 19import weakref 20 21from test import test_support as support 22from test.test_support import TESTFN, findfile, gc_collect, swap_attr 23 24# pyET is the pure-Python implementation. 25# 26# ET is pyET in test_xml_etree and is the C accelerated version in 27# test_xml_etree_c. 28from xml.etree import ElementTree as pyET 29ET = None 30 31SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") 32SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") 33UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata") 34 35SAMPLE_XML = """\ 36<body> 37 <tag class='a'>text</tag> 38 <tag class='b' /> 39 <section> 40 <tag class='b' id='inner'>subtext</tag> 41 </section> 42</body> 43""" 44 45SAMPLE_SECTION = """\ 46<section> 47 <tag class='b' id='inner'>subtext</tag> 48 <nexttag /> 49 <nextsection> 50 <tag /> 51 </nextsection> 52</section> 53""" 54 55SAMPLE_XML_NS = """ 56<body xmlns="http://effbot.org/ns"> 57 <tag>text</tag> 58 <tag /> 59 <section> 60 <tag>subtext</tag> 61 </section> 62</body> 63""" 64 65SAMPLE_XML_NS_ELEMS = """ 66<root> 67<h:table xmlns:h="hello"> 68 <h:tr> 69 <h:td>Apples</h:td> 70 <h:td>Bananas</h:td> 71 </h:tr> 72</h:table> 73 74<f:table xmlns:f="foo"> 75 <f:name>African Coffee Table</f:name> 76 <f:width>80</f:width> 77 <f:length>120</f:length> 78</f:table> 79</root> 80""" 81 82ENTITY_XML = """\ 83<!DOCTYPE points [ 84<!ENTITY % user-entities SYSTEM 'user-entities.xml'> 85%user-entities; 86]> 87<document>&entity;</document> 88""" 89 90 91def checkwarnings(*filters): 92 def decorator(test): 93 def newtest(*args, **kwargs): 94 with support.check_warnings(*filters): 95 test(*args, **kwargs) 96 functools.update_wrapper(newtest, test) 97 return newtest 98 return decorator 99 100 101class ModuleTest(unittest.TestCase): 102 # TODO: this should be removed once we get rid of the global module vars 103 104 def test_sanity(self): 105 # Import sanity. 106 107 from xml.etree import ElementTree 108 from xml.etree import ElementInclude 109 from xml.etree import ElementPath 110 111 112def serialize(elem, to_string=True, **options): 113 file = StringIO.StringIO() 114 tree = ET.ElementTree(elem) 115 tree.write(file, **options) 116 if to_string: 117 return file.getvalue() 118 else: 119 file.seek(0) 120 return file 121 122def summarize_list(seq): 123 return [elem.tag for elem in seq] 124 125def normalize_crlf(tree): 126 for elem in tree.iter(): 127 if elem.text: 128 elem.text = elem.text.replace("\r\n", "\n") 129 if elem.tail: 130 elem.tail = elem.tail.replace("\r\n", "\n") 131 132def python_only(test): 133 def wrapper(*args): 134 if ET is not pyET: 135 raise unittest.SkipTest('only for the Python version') 136 return test(*args) 137 return wrapper 138 139def cet_only(test): 140 def wrapper(*args): 141 if ET is pyET: 142 raise unittest.SkipTest('only for the C version') 143 return test(*args) 144 return wrapper 145 146# -------------------------------------------------------------------- 147# element tree tests 148 149class ElementTreeTest(unittest.TestCase): 150 151 def serialize_check(self, elem, expected): 152 self.assertEqual(serialize(elem), expected) 153 154 def test_interface(self): 155 # Test element tree interface. 156 157 def check_string(string): 158 len(string) 159 for char in string: 160 self.assertEqual(len(char), 1, 161 msg="expected one-character string, got %r" % char) 162 new_string = string + "" 163 new_string = string + " " 164 string[:0] 165 166 def check_mapping(mapping): 167 len(mapping) 168 keys = mapping.keys() 169 items = mapping.items() 170 for key in keys: 171 item = mapping[key] 172 mapping["key"] = "value" 173 self.assertEqual(mapping["key"], "value", 174 msg="expected value string, got %r" % mapping["key"]) 175 176 def check_element(element): 177 self.assertTrue(ET.iselement(element), msg="not an element") 178 self.assertTrue(hasattr(element, "tag"), msg="no tag member") 179 self.assertTrue(hasattr(element, "attrib"), msg="no attrib member") 180 self.assertTrue(hasattr(element, "text"), msg="no text member") 181 self.assertTrue(hasattr(element, "tail"), msg="no tail member") 182 183 check_string(element.tag) 184 check_mapping(element.attrib) 185 if element.text is not None: 186 check_string(element.text) 187 if element.tail is not None: 188 check_string(element.tail) 189 for elem in element: 190 check_element(elem) 191 192 element = ET.Element("tag") 193 check_element(element) 194 tree = ET.ElementTree(element) 195 check_element(tree.getroot()) 196 element = ET.Element("t\xe4g", key="value") 197 tree = ET.ElementTree(element) 198 self.assertRegexpMatches(repr(element), r"^<Element 't\\xe4g' at 0x.*>$") 199 element = ET.Element("tag", key="value") 200 201 # Make sure all standard element methods exist. 202 203 def check_method(method): 204 self.assertTrue(hasattr(method, '__call__'), 205 msg="%s not callable" % method) 206 207 check_method(element.append) 208 check_method(element.extend) 209 check_method(element.insert) 210 check_method(element.remove) 211 check_method(element.getchildren) 212 check_method(element.find) 213 check_method(element.iterfind) 214 check_method(element.findall) 215 check_method(element.findtext) 216 check_method(element.clear) 217 check_method(element.get) 218 check_method(element.set) 219 check_method(element.keys) 220 check_method(element.items) 221 check_method(element.iter) 222 check_method(element.itertext) 223 check_method(element.getiterator) 224 225 # These methods return an iterable. See bug 6472. 226 227 def check_iter(it): 228 check_method(it.next) 229 230 check_iter(element.iter("tag")) 231 check_iter(element.iterfind("tag")) 232 check_iter(element.iterfind("*")) 233 check_iter(tree.iter("tag")) 234 check_iter(tree.iterfind("tag")) 235 check_iter(tree.iterfind("*")) 236 237 # These aliases are provided: 238 239 self.assertEqual(ET.XML, ET.fromstring) 240 self.assertEqual(ET.PI, ET.ProcessingInstruction) 241 self.assertEqual(ET.XMLParser, ET.XMLTreeBuilder) 242 243 def test_set_attribute(self): 244 element = ET.Element('tag') 245 246 self.assertEqual(element.tag, 'tag') 247 element.tag = 'Tag' 248 self.assertEqual(element.tag, 'Tag') 249 element.tag = 'TAG' 250 self.assertEqual(element.tag, 'TAG') 251 252 self.assertIsNone(element.text) 253 element.text = 'Text' 254 self.assertEqual(element.text, 'Text') 255 element.text = 'TEXT' 256 self.assertEqual(element.text, 'TEXT') 257 258 self.assertIsNone(element.tail) 259 element.tail = 'Tail' 260 self.assertEqual(element.tail, 'Tail') 261 element.tail = 'TAIL' 262 self.assertEqual(element.tail, 'TAIL') 263 264 self.assertEqual(element.attrib, {}) 265 element.attrib = {'a': 'b', 'c': 'd'} 266 self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'}) 267 element.attrib = {'A': 'B', 'C': 'D'} 268 self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) 269 270 def test_simpleops(self): 271 # Basic method sanity checks. 272 273 elem = ET.XML("<body><tag/></body>") 274 self.serialize_check(elem, '<body><tag /></body>') 275 e = ET.Element("tag2") 276 elem.append(e) 277 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 278 elem.remove(e) 279 self.serialize_check(elem, '<body><tag /></body>') 280 elem.insert(0, e) 281 self.serialize_check(elem, '<body><tag2 /><tag /></body>') 282 elem.remove(e) 283 elem.extend([e]) 284 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 285 elem.remove(e) 286 287 element = ET.Element("tag", key="value") 288 self.serialize_check(element, '<tag key="value" />') # 1 289 subelement = ET.Element("subtag") 290 element.append(subelement) 291 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2 292 element.insert(0, subelement) 293 self.serialize_check(element, 294 '<tag key="value"><subtag /><subtag /></tag>') # 3 295 element.remove(subelement) 296 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4 297 element.remove(subelement) 298 self.serialize_check(element, '<tag key="value" />') # 5 299 with self.assertRaises(ValueError) as cm: 300 element.remove(subelement) 301 self.assertEqual(str(cm.exception), 'list.remove(x): x not in list') 302 self.serialize_check(element, '<tag key="value" />') # 6 303 element[0:0] = [subelement, subelement, subelement] 304 self.serialize_check(element[1], '<subtag />') 305 self.assertEqual(element[1:9], [element[1], element[2]]) 306 self.assertEqual(element[:9:2], [element[0], element[2]]) 307 del element[1:2] 308 self.serialize_check(element, 309 '<tag key="value"><subtag /><subtag /></tag>') 310 311 def test_cdata(self): 312 # Test CDATA handling (etc). 313 314 self.serialize_check(ET.XML("<tag>hello</tag>"), 315 '<tag>hello</tag>') 316 self.serialize_check(ET.XML("<tag>hello</tag>"), 317 '<tag>hello</tag>') 318 self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"), 319 '<tag>hello</tag>') 320 321 def test_file_init(self): 322 stringfile = StringIO.StringIO(SAMPLE_XML.encode("utf-8")) 323 tree = ET.ElementTree(file=stringfile) 324 self.assertEqual(tree.find("tag").tag, 'tag') 325 self.assertEqual(tree.find("section/tag").tag, 'tag') 326 327 tree = ET.ElementTree(file=SIMPLE_XMLFILE) 328 self.assertEqual(tree.find("element").tag, 'element') 329 self.assertEqual(tree.find("element/../empty-element").tag, 330 'empty-element') 331 332 def test_path_cache(self): 333 # Check that the path cache behaves sanely. 334 335 from xml.etree import ElementPath 336 337 elem = ET.XML(SAMPLE_XML) 338 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 339 cache_len_10 = len(ElementPath._cache) 340 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 341 self.assertEqual(len(ElementPath._cache), cache_len_10) 342 for i in range(20): ET.ElementTree(elem).find('./'+str(i)) 343 self.assertGreater(len(ElementPath._cache), cache_len_10) 344 for i in range(600): ET.ElementTree(elem).find('./'+str(i)) 345 self.assertLess(len(ElementPath._cache), 500) 346 347 def test_copy(self): 348 # Test copy handling (etc). 349 350 import copy 351 e1 = ET.XML("<tag>hello<foo/></tag>") 352 e2 = copy.copy(e1) 353 e3 = copy.deepcopy(e1) 354 e1.find("foo").tag = "bar" 355 self.serialize_check(e1, '<tag>hello<bar /></tag>') 356 self.serialize_check(e2, '<tag>hello<bar /></tag>') 357 self.serialize_check(e3, '<tag>hello<foo /></tag>') 358 359 def test_attrib(self): 360 # Test attribute handling. 361 362 elem = ET.Element("tag") 363 elem.get("key") # 1.1 364 self.assertEqual(elem.get("key", "default"), 'default') # 1.2 365 366 elem.set("key", "value") 367 self.assertEqual(elem.get("key"), 'value') # 1.3 368 369 elem = ET.Element("tag", key="value") 370 self.assertEqual(elem.get("key"), 'value') # 2.1 371 self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2 372 373 attrib = {"key": "value"} 374 elem = ET.Element("tag", attrib) 375 attrib.clear() # check for aliasing issues 376 self.assertEqual(elem.get("key"), 'value') # 3.1 377 self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2 378 379 attrib = {"key": "value"} 380 elem = ET.Element("tag", **attrib) 381 attrib.clear() # check for aliasing issues 382 self.assertEqual(elem.get("key"), 'value') # 4.1 383 self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2 384 385 elem = ET.Element("tag", {"key": "other"}, key="value") 386 self.assertEqual(elem.get("key"), 'value') # 5.1 387 self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2 388 389 elem = ET.Element('test') 390 elem.text = "aa" 391 elem.set('testa', 'testval') 392 elem.set('testb', 'test2') 393 self.assertEqual(ET.tostring(elem), 394 b'<test testa="testval" testb="test2">aa</test>') 395 self.assertEqual(sorted(elem.keys()), ['testa', 'testb']) 396 self.assertEqual(sorted(elem.items()), 397 [('testa', 'testval'), ('testb', 'test2')]) 398 self.assertEqual(elem.attrib['testb'], 'test2') 399 elem.attrib['testb'] = 'test1' 400 elem.attrib['testc'] = 'test2' 401 self.assertEqual(ET.tostring(elem), 402 b'<test testa="testval" testb="test1" testc="test2">aa</test>') 403 404 elem = ET.Element('test') 405 elem.set('a', '\r') 406 elem.set('b', '\r\n') 407 elem.set('c', '\t\n\r ') 408 elem.set('d', '\n\n') 409 self.assertEqual(ET.tostring(elem), 410 b'<test a="\r" b="\r " c="\t \r " d=" " />') 411 412 def test_makeelement(self): 413 # Test makeelement handling. 414 415 elem = ET.Element("tag") 416 attrib = {"key": "value"} 417 subelem = elem.makeelement("subtag", attrib) 418 self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing") 419 elem.append(subelem) 420 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 421 422 elem.clear() 423 self.serialize_check(elem, '<tag />') 424 elem.append(subelem) 425 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 426 elem.extend([subelem, subelem]) 427 self.serialize_check(elem, 428 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>') 429 elem[:] = [subelem] 430 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 431 elem[:] = tuple([subelem]) 432 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 433 434 def test_parsefile(self): 435 # Test parsing from file. 436 437 tree = ET.parse(SIMPLE_XMLFILE) 438 normalize_crlf(tree) 439 stream = StringIO.StringIO() 440 tree.write(stream) 441 self.assertEqual(stream.getvalue(), 442 '<root>\n' 443 ' <element key="value">text</element>\n' 444 ' <element>text</element>tail\n' 445 ' <empty-element />\n' 446 '</root>') 447 tree = ET.parse(SIMPLE_NS_XMLFILE) 448 normalize_crlf(tree) 449 stream = StringIO.StringIO() 450 tree.write(stream) 451 self.assertEqual(stream.getvalue(), 452 '<ns0:root xmlns:ns0="namespace">\n' 453 ' <ns0:element key="value">text</ns0:element>\n' 454 ' <ns0:element>text</ns0:element>tail\n' 455 ' <ns0:empty-element />\n' 456 '</ns0:root>') 457 458 with open(SIMPLE_XMLFILE) as f: 459 data = f.read() 460 461 parser = ET.XMLParser() 462 self.assertRegexpMatches(parser.version, r'^Expat ') 463 parser.feed(data) 464 self.serialize_check(parser.close(), 465 '<root>\n' 466 ' <element key="value">text</element>\n' 467 ' <element>text</element>tail\n' 468 ' <empty-element />\n' 469 '</root>') 470 471 parser = ET.XMLTreeBuilder() # 1.2 compatibility 472 parser.feed(data) 473 self.serialize_check(parser.close(), 474 '<root>\n' 475 ' <element key="value">text</element>\n' 476 ' <element>text</element>tail\n' 477 ' <empty-element />\n' 478 '</root>') 479 480 target = ET.TreeBuilder() 481 parser = ET.XMLParser(target=target) 482 parser.feed(data) 483 self.serialize_check(parser.close(), 484 '<root>\n' 485 ' <element key="value">text</element>\n' 486 ' <element>text</element>tail\n' 487 ' <empty-element />\n' 488 '</root>') 489 490 def test_parseliteral(self): 491 element = ET.XML("<html><body>text</body></html>") 492 self.assertEqual(ET.tostring(element), 493 '<html><body>text</body></html>') 494 element = ET.fromstring("<html><body>text</body></html>") 495 self.assertEqual(ET.tostring(element), 496 '<html><body>text</body></html>') 497 sequence = ["<html><body>", "text</bo", "dy></html>"] 498 element = ET.fromstringlist(sequence) 499 self.assertEqual(ET.tostring(element), 500 '<html><body>text</body></html>') 501 self.assertEqual("".join(ET.tostringlist(element)), 502 '<html><body>text</body></html>') 503 self.assertEqual(ET.tostring(element, "ascii"), 504 "<?xml version='1.0' encoding='ascii'?>\n" 505 "<html><body>text</body></html>") 506 _, ids = ET.XMLID("<html><body>text</body></html>") 507 self.assertEqual(len(ids), 0) 508 _, ids = ET.XMLID("<html><body id='body'>text</body></html>") 509 self.assertEqual(len(ids), 1) 510 self.assertEqual(ids["body"].tag, 'body') 511 512 def test_iterparse(self): 513 # Test iterparse interface. 514 515 iterparse = ET.iterparse 516 517 context = iterparse(SIMPLE_XMLFILE) 518 action, elem = next(context) 519 self.assertEqual((action, elem.tag), ('end', 'element')) 520 self.assertEqual([(action, elem.tag) for action, elem in context], [ 521 ('end', 'element'), 522 ('end', 'empty-element'), 523 ('end', 'root'), 524 ]) 525 self.assertEqual(context.root.tag, 'root') 526 527 context = iterparse(SIMPLE_NS_XMLFILE) 528 self.assertEqual([(action, elem.tag) for action, elem in context], [ 529 ('end', '{namespace}element'), 530 ('end', '{namespace}element'), 531 ('end', '{namespace}empty-element'), 532 ('end', '{namespace}root'), 533 ]) 534 535 events = () 536 context = iterparse(SIMPLE_XMLFILE, events) 537 self.assertEqual([(action, elem.tag) for action, elem in context], []) 538 539 events = () 540 context = iterparse(SIMPLE_XMLFILE, events=events) 541 self.assertEqual([(action, elem.tag) for action, elem in context], []) 542 543 events = ("start", "end") 544 context = iterparse(SIMPLE_XMLFILE, events) 545 self.assertEqual([(action, elem.tag) for action, elem in context], [ 546 ('start', 'root'), 547 ('start', 'element'), 548 ('end', 'element'), 549 ('start', 'element'), 550 ('end', 'element'), 551 ('start', 'empty-element'), 552 ('end', 'empty-element'), 553 ('end', 'root'), 554 ]) 555 556 events = ("start", "end", "start-ns", "end-ns") 557 context = iterparse(SIMPLE_NS_XMLFILE, events) 558 self.assertEqual([(action, elem.tag) if action in ("start", "end") 559 else (action, elem) 560 for action, elem in context], [ 561 ('start-ns', ('', 'namespace')), 562 ('start', '{namespace}root'), 563 ('start', '{namespace}element'), 564 ('end', '{namespace}element'), 565 ('start', '{namespace}element'), 566 ('end', '{namespace}element'), 567 ('start', '{namespace}empty-element'), 568 ('end', '{namespace}empty-element'), 569 ('end', '{namespace}root'), 570 ('end-ns', None), 571 ]) 572 573 events = ('start-ns', 'end-ns') 574 context = iterparse(StringIO.StringIO(r"<root xmlns=''/>"), events) 575 res = [(action, elem) for action, elem in context] 576 self.assertEqual(res, [('start-ns', ('', '')), ('end-ns', None)]) 577 578 events = ("start", "end", "bogus") 579 with open(SIMPLE_XMLFILE, "rb") as f: 580 with self.assertRaises(ValueError) as cm: 581 iterparse(f, events) 582 self.assertFalse(f.closed) 583 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 584 585 source = StringIO.StringIO( 586 "<?xml version='1.0' encoding='iso-8859-1'?>\n" 587 "<body xmlns='http://éffbot.org/ns'\n" 588 " xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n") 589 events = ("start-ns",) 590 context = iterparse(source, events) 591 self.assertEqual([(action, elem) for action, elem in context], [ 592 ('start-ns', ('', u'http://\xe9ffbot.org/ns')), 593 ('start-ns', (u'cl\xe9', 'http://effbot.org/ns')), 594 ]) 595 596 source = StringIO.StringIO("<document />junk") 597 it = iterparse(source) 598 action, elem = next(it) 599 self.assertEqual((action, elem.tag), ('end', 'document')) 600 with self.assertRaises(ET.ParseError) as cm: 601 next(it) 602 self.assertEqual(str(cm.exception), 603 'junk after document element: line 1, column 12') 604 605 def test_writefile(self): 606 elem = ET.Element("tag") 607 elem.text = "text" 608 self.serialize_check(elem, '<tag>text</tag>') 609 ET.SubElement(elem, "subtag").text = "subtext" 610 self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>') 611 612 # Test tag suppression 613 elem.tag = None 614 self.serialize_check(elem, 'text<subtag>subtext</subtag>') 615 elem.insert(0, ET.Comment("comment")) 616 self.serialize_check(elem, 617 'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3 618 619 elem[0] = ET.PI("key", "value") 620 self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>') 621 622 def test_custom_builder(self): 623 # Test parser w. custom builder. 624 625 with open(SIMPLE_XMLFILE) as f: 626 data = f.read() 627 class Builder(list): 628 def start(self, tag, attrib): 629 self.append(("start", tag)) 630 def end(self, tag): 631 self.append(("end", tag)) 632 def data(self, text): 633 pass 634 builder = Builder() 635 parser = ET.XMLParser(target=builder) 636 parser.feed(data) 637 self.assertEqual(builder, [ 638 ('start', 'root'), 639 ('start', 'element'), 640 ('end', 'element'), 641 ('start', 'element'), 642 ('end', 'element'), 643 ('start', 'empty-element'), 644 ('end', 'empty-element'), 645 ('end', 'root'), 646 ]) 647 648 with open(SIMPLE_NS_XMLFILE) as f: 649 data = f.read() 650 class Builder(list): 651 def start(self, tag, attrib): 652 self.append(("start", tag)) 653 def end(self, tag): 654 self.append(("end", tag)) 655 def data(self, text): 656 pass 657 def pi(self, target, data): 658 self.append(("pi", target, data)) 659 def comment(self, data): 660 self.append(("comment", data)) 661 builder = Builder() 662 parser = ET.XMLParser(target=builder) 663 parser.feed(data) 664 self.assertEqual(builder, [ 665 ('pi', 'pi', 'data'), 666 ('comment', ' comment '), 667 ('start', '{namespace}root'), 668 ('start', '{namespace}element'), 669 ('end', '{namespace}element'), 670 ('start', '{namespace}element'), 671 ('end', '{namespace}element'), 672 ('start', '{namespace}empty-element'), 673 ('end', '{namespace}empty-element'), 674 ('end', '{namespace}root'), 675 ]) 676 677 678 # Element.getchildren() and ElementTree.getiterator() are deprecated. 679 @checkwarnings(("This method will be removed in future versions. " 680 "Use .+ instead.", 681 (DeprecationWarning, PendingDeprecationWarning))) 682 def test_getchildren(self): 683 # Test Element.getchildren() 684 685 with open(SIMPLE_XMLFILE, "r") as f: 686 tree = ET.parse(f) 687 self.assertEqual([summarize_list(elem.getchildren()) 688 for elem in tree.getroot().iter()], [ 689 ['element', 'element', 'empty-element'], 690 [], 691 [], 692 [], 693 ]) 694 self.assertEqual([summarize_list(elem.getchildren()) 695 for elem in tree.getiterator()], [ 696 ['element', 'element', 'empty-element'], 697 [], 698 [], 699 [], 700 ]) 701 702 elem = ET.XML(SAMPLE_XML) 703 self.assertEqual(len(elem.getchildren()), 3) 704 self.assertEqual(len(elem[2].getchildren()), 1) 705 self.assertEqual(elem[:], elem.getchildren()) 706 child1 = elem[0] 707 child2 = elem[2] 708 del elem[1:2] 709 self.assertEqual(len(elem.getchildren()), 2) 710 self.assertEqual(child1, elem[0]) 711 self.assertEqual(child2, elem[1]) 712 elem[0:2] = [child2, child1] 713 self.assertEqual(child2, elem[0]) 714 self.assertEqual(child1, elem[1]) 715 self.assertNotEqual(child1, elem[0]) 716 elem.clear() 717 self.assertEqual(elem.getchildren(), []) 718 719 def test_writestring(self): 720 elem = ET.XML("<html><body>text</body></html>") 721 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 722 elem = ET.fromstring("<html><body>text</body></html>") 723 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 724 725 def test_encoding(self): 726 def check(encoding, body=''): 727 xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" % 728 (encoding, body)) 729 self.assertEqual(ET.XML(xml.encode(encoding)).text, body) 730 check("ascii", 'a') 731 check("us-ascii", 'a') 732 check("iso-8859-1", u'\xbd') 733 check("iso-8859-15", u'\u20ac') 734 check("cp437", u'\u221a') 735 check("mac-roman", u'\u02da') 736 737 def xml(encoding): 738 return "<?xml version='1.0' encoding='%s'?><xml />" % encoding 739 def bxml(encoding): 740 return xml(encoding).encode(encoding) 741 supported_encodings = [ 742 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le', 743 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5', 744 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', 745 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 746 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 747 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 748 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 749 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 750 'cp1256', 'cp1257', 'cp1258', 751 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', 752 'mac-roman', 'mac-turkish', 753 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 754 'iso2022-jp-3', 'iso2022-jp-ext', 755 'koi8-r', 'koi8-u', 756 'ptcp154', 757 ] 758 for encoding in supported_encodings: 759 self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />') 760 761 unsupported_ascii_compatible_encodings = [ 762 'big5', 'big5hkscs', 763 'cp932', 'cp949', 'cp950', 764 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr', 765 'gb2312', 'gbk', 'gb18030', 766 'iso2022-kr', 'johab', 'hz', 767 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 768 'utf-7', 769 ] 770 for encoding in unsupported_ascii_compatible_encodings: 771 self.assertRaises(ValueError, ET.XML, bxml(encoding)) 772 773 unsupported_ascii_incompatible_encodings = [ 774 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140', 775 'utf_32', 'utf_32_be', 'utf_32_le', 776 ] 777 for encoding in unsupported_ascii_incompatible_encodings: 778 self.assertRaises(ET.ParseError, ET.XML, bxml(encoding)) 779 780 self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii')) 781 self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii')) 782 783 def test_methods(self): 784 # Test serialization methods. 785 786 e = ET.XML("<html><link/><script>1 < 2</script></html>") 787 e.tail = "\n" 788 self.assertEqual(serialize(e), 789 '<html><link /><script>1 < 2</script></html>\n') 790 self.assertEqual(serialize(e, method=None), 791 '<html><link /><script>1 < 2</script></html>\n') 792 self.assertEqual(serialize(e, method="xml"), 793 '<html><link /><script>1 < 2</script></html>\n') 794 self.assertEqual(serialize(e, method="html"), 795 '<html><link><script>1 < 2</script></html>\n') 796 self.assertEqual(serialize(e, method="text"), '1 < 2\n') 797 798 def test_issue18347(self): 799 e = ET.XML('<html><CamelCase>text</CamelCase></html>') 800 self.assertEqual(serialize(e), 801 '<html><CamelCase>text</CamelCase></html>') 802 self.assertEqual(serialize(e, method="html"), 803 '<html><CamelCase>text</CamelCase></html>') 804 805 def test_entity(self): 806 # Test entity handling. 807 808 # 1) good entities 809 810 e = ET.XML("<document title='舰'>test</document>") 811 self.assertEqual(serialize(e, encoding="us-ascii"), 812 '<document title="舰">test</document>') 813 self.serialize_check(e, '<document title="舰">test</document>') 814 815 # 2) bad entities 816 817 with self.assertRaises(ET.ParseError) as cm: 818 ET.XML("<document>&entity;</document>") 819 self.assertEqual(str(cm.exception), 820 'undefined entity: line 1, column 10') 821 822 with self.assertRaises(ET.ParseError) as cm: 823 ET.XML(ENTITY_XML) 824 self.assertEqual(str(cm.exception), 825 'undefined entity &entity;: line 5, column 10') 826 827 # 3) custom entity 828 829 parser = ET.XMLParser() 830 parser.entity["entity"] = "text" 831 parser.feed(ENTITY_XML) 832 root = parser.close() 833 self.serialize_check(root, '<document>text</document>') 834 835 def test_namespace(self): 836 # Test namespace issues. 837 838 # 1) xml namespace 839 840 elem = ET.XML("<tag xml:lang='en' />") 841 self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1 842 843 # 2) other "well-known" namespaces 844 845 elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />") 846 self.serialize_check(elem, 847 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1 848 849 elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />") 850 self.serialize_check(elem, 851 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2 852 853 elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />") 854 self.serialize_check(elem, 855 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3 856 857 # 3) unknown namespaces 858 elem = ET.XML(SAMPLE_XML_NS) 859 self.serialize_check(elem, 860 '<ns0:body xmlns:ns0="http://effbot.org/ns">\n' 861 ' <ns0:tag>text</ns0:tag>\n' 862 ' <ns0:tag />\n' 863 ' <ns0:section>\n' 864 ' <ns0:tag>subtext</ns0:tag>\n' 865 ' </ns0:section>\n' 866 '</ns0:body>') 867 868 def test_qname(self): 869 # Test QName handling. 870 871 # 1) decorated tags 872 873 elem = ET.Element("{uri}tag") 874 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1 875 elem = ET.Element(ET.QName("{uri}tag")) 876 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2 877 elem = ET.Element(ET.QName("uri", "tag")) 878 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3 879 elem = ET.Element(ET.QName("uri", "tag")) 880 subelem = ET.SubElement(elem, ET.QName("uri", "tag1")) 881 subelem = ET.SubElement(elem, ET.QName("uri", "tag2")) 882 self.serialize_check(elem, 883 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4 884 885 # 2) decorated attributes 886 887 elem.clear() 888 elem.attrib["{uri}key"] = "value" 889 self.serialize_check(elem, 890 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1 891 892 elem.clear() 893 elem.attrib[ET.QName("{uri}key")] = "value" 894 self.serialize_check(elem, 895 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2 896 897 # 3) decorated values are not converted by default, but the 898 # QName wrapper can be used for values 899 900 elem.clear() 901 elem.attrib["{uri}key"] = "{uri}value" 902 self.serialize_check(elem, 903 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1 904 905 elem.clear() 906 elem.attrib["{uri}key"] = ET.QName("{uri}value") 907 self.serialize_check(elem, 908 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2 909 910 elem.clear() 911 subelem = ET.Element("tag") 912 subelem.attrib["{uri1}key"] = ET.QName("{uri2}value") 913 elem.append(subelem) 914 elem.append(subelem) 915 self.serialize_check(elem, 916 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">' 917 '<tag ns1:key="ns2:value" />' 918 '<tag ns1:key="ns2:value" />' 919 '</ns0:tag>') # 3.3 920 921 # 4) Direct QName tests 922 923 self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag') 924 self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag') 925 q1 = ET.QName('ns', 'tag') 926 q2 = ET.QName('ns', 'tag') 927 self.assertEqual(q1, q2) 928 q2 = ET.QName('ns', 'other-tag') 929 self.assertNotEqual(q1, q2) 930 self.assertNotEqual(q1, 'ns:tag') 931 self.assertEqual(q1, '{ns}tag') 932 933 def test_doctype_public(self): 934 # Test PUBLIC doctype. 935 936 elem = ET.XML('<!DOCTYPE html PUBLIC' 937 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 938 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 939 '<html>text</html>') 940 941 def test_xpath_tokenizer(self): 942 # Test the XPath tokenizer. 943 from xml.etree import ElementPath 944 def check(p, expected): 945 self.assertEqual([op or tag 946 for op, tag in ElementPath.xpath_tokenizer(p)], 947 expected) 948 949 # tests from the xml specification 950 check("*", ['*']) 951 check("text()", ['text', '()']) 952 check("@name", ['@', 'name']) 953 check("@*", ['@', '*']) 954 check("para[1]", ['para', '[', '1', ']']) 955 check("para[last()]", ['para', '[', 'last', '()', ']']) 956 check("*/para", ['*', '/', 'para']) 957 check("/doc/chapter[5]/section[2]", 958 ['/', 'doc', '/', 'chapter', '[', '5', ']', 959 '/', 'section', '[', '2', ']']) 960 check("chapter//para", ['chapter', '//', 'para']) 961 check("//para", ['//', 'para']) 962 check("//olist/item", ['//', 'olist', '/', 'item']) 963 check(".", ['.']) 964 check(".//para", ['.', '//', 'para']) 965 check("..", ['..']) 966 check("../@lang", ['..', '/', '@', 'lang']) 967 check("chapter[title]", ['chapter', '[', 'title', ']']) 968 check("employee[@secretary and @assistant]", ['employee', 969 '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']) 970 971 # additional tests 972 check("{http://spam}egg", ['{http://spam}egg']) 973 check("./spam.egg", ['.', '/', 'spam.egg']) 974 check(".//{http://spam}egg", ['.', '//', '{http://spam}egg']) 975 976 def test_processinginstruction(self): 977 # Test ProcessingInstruction directly 978 979 self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')), 980 '<?test instruction?>') 981 self.assertEqual(ET.tostring(ET.PI('test', 'instruction')), 982 '<?test instruction?>') 983 984 # Issue #2746 985 986 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')), 987 '<?test <testing&>?>') 988 self.assertEqual(ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1'), 989 "<?xml version='1.0' encoding='latin1'?>\n" 990 "<?test <testing&>\xe3?>") 991 992 def test_html_empty_elems_serialization(self): 993 # issue 15970 994 # from http://www.w3.org/TR/html401/index/elements.html 995 for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR', 996 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']: 997 for elem in [element, element.lower()]: 998 expected = '<%s>' % elem 999 serialized = serialize(ET.XML('<%s />' % elem), method='html') 1000 self.assertEqual(serialized, expected) 1001 serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)), 1002 method='html') 1003 self.assertEqual(serialized, expected) 1004 1005 1006# 1007# xinclude tests (samples from appendix C of the xinclude specification) 1008 1009XINCLUDE = {} 1010 1011XINCLUDE["C1.xml"] = """\ 1012<?xml version='1.0'?> 1013<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1014 <p>120 Mz is adequate for an average home user.</p> 1015 <xi:include href="disclaimer.xml"/> 1016</document> 1017""" 1018 1019XINCLUDE["disclaimer.xml"] = """\ 1020<?xml version='1.0'?> 1021<disclaimer> 1022 <p>The opinions represented herein represent those of the individual 1023 and should not be interpreted as official policy endorsed by this 1024 organization.</p> 1025</disclaimer> 1026""" 1027 1028XINCLUDE["C2.xml"] = """\ 1029<?xml version='1.0'?> 1030<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1031 <p>This document has been accessed 1032 <xi:include href="count.txt" parse="text"/> times.</p> 1033</document> 1034""" 1035 1036XINCLUDE["count.txt"] = "324387" 1037 1038XINCLUDE["C2b.xml"] = """\ 1039<?xml version='1.0'?> 1040<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1041 <p>This document has been <em>accessed</em> 1042 <xi:include href="count.txt" parse="text"/> times.</p> 1043</document> 1044""" 1045 1046XINCLUDE["C3.xml"] = """\ 1047<?xml version='1.0'?> 1048<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1049 <p>The following is the source of the "data.xml" resource:</p> 1050 <example><xi:include href="data.xml" parse="text"/></example> 1051</document> 1052""" 1053 1054XINCLUDE["data.xml"] = """\ 1055<?xml version='1.0'?> 1056<data> 1057 <item><![CDATA[Brooks & Shields]]></item> 1058</data> 1059""" 1060 1061XINCLUDE["C5.xml"] = """\ 1062<?xml version='1.0'?> 1063<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1064 <xi:include href="example.txt" parse="text"> 1065 <xi:fallback> 1066 <xi:include href="fallback-example.txt" parse="text"> 1067 <xi:fallback><a href="mailto:[email protected]">Report error</a></xi:fallback> 1068 </xi:include> 1069 </xi:fallback> 1070 </xi:include> 1071</div> 1072""" 1073 1074XINCLUDE["default.xml"] = """\ 1075<?xml version='1.0'?> 1076<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1077 <p>Example.</p> 1078 <xi:include href="{}"/> 1079</document> 1080""".format(cgi.escape(SIMPLE_XMLFILE, True)) 1081 1082# 1083# badly formatted xi:include tags 1084 1085XINCLUDE_BAD = {} 1086 1087XINCLUDE_BAD["B1.xml"] = """\ 1088<?xml version='1.0'?> 1089<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1090 <p>120 Mz is adequate for an average home user.</p> 1091 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/> 1092</document> 1093""" 1094 1095XINCLUDE_BAD["B2.xml"] = """\ 1096<?xml version='1.0'?> 1097<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1098 <xi:fallback></xi:fallback> 1099</div> 1100""" 1101 1102class XIncludeTest(unittest.TestCase): 1103 1104 def xinclude_loader(self, href, parse="xml", encoding=None): 1105 try: 1106 data = XINCLUDE[href] 1107 except KeyError: 1108 raise IOError("resource not found") 1109 if parse == "xml": 1110 data = ET.XML(data) 1111 return data 1112 1113 def none_loader(self, href, parser, encoding=None): 1114 return None 1115 1116 def test_xinclude_default(self): 1117 from xml.etree import ElementInclude 1118 doc = self.xinclude_loader('default.xml') 1119 ElementInclude.include(doc) 1120 self.assertEqual(serialize(doc), 1121 '<document>\n' 1122 ' <p>Example.</p>\n' 1123 ' <root>\n' 1124 ' <element key="value">text</element>\n' 1125 ' <element>text</element>tail\n' 1126 ' <empty-element />\n' 1127 '</root>\n' 1128 '</document>') 1129 1130 def test_xinclude(self): 1131 from xml.etree import ElementInclude 1132 1133 # Basic inclusion example (XInclude C.1) 1134 document = self.xinclude_loader("C1.xml") 1135 ElementInclude.include(document, self.xinclude_loader) 1136 self.assertEqual(serialize(document), 1137 '<document>\n' 1138 ' <p>120 Mz is adequate for an average home user.</p>\n' 1139 ' <disclaimer>\n' 1140 ' <p>The opinions represented herein represent those of the individual\n' 1141 ' and should not be interpreted as official policy endorsed by this\n' 1142 ' organization.</p>\n' 1143 '</disclaimer>\n' 1144 '</document>') # C1 1145 1146 # Textual inclusion example (XInclude C.2) 1147 document = self.xinclude_loader("C2.xml") 1148 ElementInclude.include(document, self.xinclude_loader) 1149 self.assertEqual(serialize(document), 1150 '<document>\n' 1151 ' <p>This document has been accessed\n' 1152 ' 324387 times.</p>\n' 1153 '</document>') # C2 1154 1155 # Textual inclusion after sibling element (based on modified XInclude C.2) 1156 document = self.xinclude_loader("C2b.xml") 1157 ElementInclude.include(document, self.xinclude_loader) 1158 self.assertEqual(serialize(document), 1159 '<document>\n' 1160 ' <p>This document has been <em>accessed</em>\n' 1161 ' 324387 times.</p>\n' 1162 '</document>') # C2b 1163 1164 # Textual inclusion of XML example (XInclude C.3) 1165 document = self.xinclude_loader("C3.xml") 1166 ElementInclude.include(document, self.xinclude_loader) 1167 self.assertEqual(serialize(document), 1168 '<document>\n' 1169 ' <p>The following is the source of the "data.xml" resource:</p>\n' 1170 " <example><?xml version='1.0'?>\n" 1171 '<data>\n' 1172 ' <item><![CDATA[Brooks & Shields]]></item>\n' 1173 '</data>\n' 1174 '</example>\n' 1175 '</document>') # C3 1176 1177 # Fallback example (XInclude C.5) 1178 # Note! Fallback support is not yet implemented 1179 document = self.xinclude_loader("C5.xml") 1180 with self.assertRaises(IOError) as cm: 1181 ElementInclude.include(document, self.xinclude_loader) 1182 self.assertEqual(str(cm.exception), 'resource not found') 1183 self.assertEqual(serialize(document), 1184 '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n' 1185 ' <ns0:include href="example.txt" parse="text">\n' 1186 ' <ns0:fallback>\n' 1187 ' <ns0:include href="fallback-example.txt" parse="text">\n' 1188 ' <ns0:fallback><a href="mailto:[email protected]">Report error</a></ns0:fallback>\n' 1189 ' </ns0:include>\n' 1190 ' </ns0:fallback>\n' 1191 ' </ns0:include>\n' 1192 '</div>') # C5 1193 1194 def test_xinclude_failures(self): 1195 from xml.etree import ElementInclude 1196 1197 # Test failure to locate included XML file. 1198 document = ET.XML(XINCLUDE["C1.xml"]) 1199 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1200 ElementInclude.include(document, loader=self.none_loader) 1201 self.assertEqual(str(cm.exception), 1202 "cannot load 'disclaimer.xml' as 'xml'") 1203 1204 # Test failure to locate included text file. 1205 document = ET.XML(XINCLUDE["C2.xml"]) 1206 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1207 ElementInclude.include(document, loader=self.none_loader) 1208 self.assertEqual(str(cm.exception), 1209 "cannot load 'count.txt' as 'text'") 1210 1211 # Test bad parse type. 1212 document = ET.XML(XINCLUDE_BAD["B1.xml"]) 1213 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1214 ElementInclude.include(document, loader=self.none_loader) 1215 self.assertEqual(str(cm.exception), 1216 "unknown parse type in xi:include tag ('BAD_TYPE')") 1217 1218 # Test xi:fallback outside xi:include. 1219 document = ET.XML(XINCLUDE_BAD["B2.xml"]) 1220 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1221 ElementInclude.include(document, loader=self.none_loader) 1222 self.assertEqual(str(cm.exception), 1223 "xi:fallback tag must be child of xi:include " 1224 "('{http://www.w3.org/2001/XInclude}fallback')") 1225 1226# -------------------------------------------------------------------- 1227# reported bugs 1228 1229class BugsTest(unittest.TestCase): 1230 1231 def test_bug_xmltoolkit21(self): 1232 # marshaller gives obscure errors for non-string values 1233 1234 def check(elem): 1235 with self.assertRaises(TypeError) as cm: 1236 serialize(elem) 1237 self.assertEqual(str(cm.exception), 1238 'cannot serialize 123 (type int)') 1239 1240 elem = ET.Element(123) 1241 check(elem) # tag 1242 1243 elem = ET.Element("elem") 1244 elem.text = 123 1245 check(elem) # text 1246 1247 elem = ET.Element("elem") 1248 elem.tail = 123 1249 check(elem) # tail 1250 1251 elem = ET.Element("elem") 1252 elem.set(123, "123") 1253 check(elem) # attribute key 1254 1255 elem = ET.Element("elem") 1256 elem.set("123", 123) 1257 check(elem) # attribute value 1258 1259 def test_bug_xmltoolkit25(self): 1260 # typo in ElementTree.findtext 1261 1262 elem = ET.XML(SAMPLE_XML) 1263 tree = ET.ElementTree(elem) 1264 self.assertEqual(tree.findtext("tag"), 'text') 1265 self.assertEqual(tree.findtext("section/tag"), 'subtext') 1266 1267 def test_bug_xmltoolkit28(self): 1268 # .//tag causes exceptions 1269 1270 tree = ET.XML("<doc><table><tbody/></table></doc>") 1271 self.assertEqual(summarize_list(tree.findall(".//thead")), []) 1272 self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody']) 1273 1274 def test_bug_xmltoolkitX1(self): 1275 # dump() doesn't flush the output buffer 1276 1277 tree = ET.XML("<doc><table><tbody/></table></doc>") 1278 with support.captured_stdout() as stdout: 1279 ET.dump(tree) 1280 self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n') 1281 1282 def test_bug_xmltoolkit39(self): 1283 # non-ascii element and attribute names doesn't work 1284 1285 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />") 1286 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 1287 1288 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1289 b"<tag \xe4ttr='välue' />") 1290 self.assertEqual(tree.attrib, {u'\xe4ttr': u'v\xe4lue'}) 1291 self.assertEqual(ET.tostring(tree, "utf-8"), 1292 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 1293 1294 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1295 b'<t\xe4g>text</t\xe4g>') 1296 self.assertEqual(ET.tostring(tree, "utf-8"), 1297 b'<t\xc3\xa4g>text</t\xc3\xa4g>') 1298 1299 tree = ET.Element(u"t\u00e4g") 1300 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 1301 1302 tree = ET.Element("tag") 1303 tree.set(u"\u00e4ttr", u"v\u00e4lue") 1304 self.assertEqual(ET.tostring(tree, "utf-8"), 1305 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 1306 1307 def test_bug_xmltoolkit54(self): 1308 # problems handling internally defined entities 1309 1310 e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '舰'>]>" 1311 '<doc>&ldots;</doc>') 1312 self.assertEqual(serialize(e), '<doc>舰</doc>') 1313 1314 def test_bug_xmltoolkit55(self): 1315 # make sure we're reporting the first error, not the last 1316 1317 with self.assertRaises(ET.ParseError) as cm: 1318 ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'>" 1319 '<doc>&ldots;&ndots;&rdots;</doc>') 1320 self.assertEqual(str(cm.exception), 1321 'undefined entity &ldots;: line 1, column 36') 1322 1323 def test_bug_xmltoolkit60(self): 1324 # Handle crash in stream source. 1325 1326 class ExceptionFile: 1327 def read(self, x): 1328 raise IOError 1329 1330 self.assertRaises(IOError, ET.parse, ExceptionFile()) 1331 1332 def test_bug_xmltoolkit62(self): 1333 # Don't crash when using custom entities. 1334 1335 ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'} 1336 parser = ET.XMLTreeBuilder() 1337 parser.entity.update(ENTITIES) 1338 parser.feed("""<?xml version="1.0" encoding="UTF-8"?> 1339<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []> 1340<patent-application-publication> 1341<subdoc-abstract> 1342<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named ‘BCT9801BEG’.</paragraph> 1343</subdoc-abstract> 1344</patent-application-publication>""") 1345 t = parser.close() 1346 self.assertEqual(t.find('.//paragraph').text, 1347 u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.') 1348 1349 @unittest.skipIf(sys.gettrace(), "Skips under coverage.") 1350 def test_bug_xmltoolkit63(self): 1351 # Check reference leak. 1352 def xmltoolkit63(): 1353 tree = ET.TreeBuilder() 1354 tree.start("tag", {}) 1355 tree.data("text") 1356 tree.end("tag") 1357 1358 xmltoolkit63() 1359 count = sys.getrefcount(None) 1360 for i in range(1000): 1361 xmltoolkit63() 1362 self.assertEqual(sys.getrefcount(None), count) 1363 1364 def test_bug_200708_newline(self): 1365 # Preserve newlines in attributes. 1366 1367 e = ET.Element('SomeTag', text="def _f():\n return 3\n") 1368 self.assertEqual(ET.tostring(e), 1369 b'<SomeTag text="def _f(): return 3 " />') 1370 self.assertEqual(ET.XML(ET.tostring(e)).get("text"), 1371 'def _f():\n return 3\n') 1372 self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))), 1373 b'<SomeTag text="def _f(): return 3 " />') 1374 1375 def test_bug_200708_close(self): 1376 # Test default builder. 1377 parser = ET.XMLParser() # default 1378 parser.feed("<element>some text</element>") 1379 self.assertEqual(parser.close().tag, 'element') 1380 1381 # Test custom builder. 1382 class EchoTarget: 1383 def start(self, tag, attrib): 1384 pass 1385 def end(self, tag): 1386 pass 1387 def data(self, text): 1388 pass 1389 def close(self): 1390 return ET.Element("element") # simulate root 1391 parser = ET.XMLParser(target=EchoTarget()) 1392 parser.feed("<element>some text</element>") 1393 self.assertEqual(parser.close().tag, 'element') 1394 1395 def test_bug_200709_default_namespace(self): 1396 e = ET.Element("{default}elem") 1397 s = ET.SubElement(e, "{default}elem") 1398 self.assertEqual(serialize(e, default_namespace="default"), # 1 1399 '<elem xmlns="default"><elem /></elem>') 1400 1401 e = ET.Element("{default}elem") 1402 s = ET.SubElement(e, "{default}elem") 1403 s = ET.SubElement(e, "{not-default}elem") 1404 self.assertEqual(serialize(e, default_namespace="default"), # 2 1405 '<elem xmlns="default" xmlns:ns1="not-default">' 1406 '<elem />' 1407 '<ns1:elem />' 1408 '</elem>') 1409 1410 e = ET.Element("{default}elem") 1411 s = ET.SubElement(e, "{default}elem") 1412 s = ET.SubElement(e, "elem") # unprefixed name 1413 with self.assertRaises(ValueError) as cm: 1414 serialize(e, default_namespace="default") # 3 1415 self.assertEqual(str(cm.exception), 1416 'cannot use non-qualified names with default_namespace option') 1417 1418 def test_bug_200709_register_namespace(self): 1419 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 1420 self.assertEqual(ET.tostring(e), 1421 '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />') 1422 ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/") 1423 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 1424 self.assertEqual(ET.tostring(e), 1425 '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />') 1426 1427 # And the Dublin Core namespace is in the default list: 1428 1429 e = ET.Element("{http://purl.org/dc/elements/1.1/}title") 1430 self.assertEqual(ET.tostring(e), 1431 '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />') 1432 1433 def test_bug_200709_element_comment(self): 1434 # Not sure if this can be fixed, really (since the serializer needs 1435 # ET.Comment, not cET.comment). 1436 1437 a = ET.Element('a') 1438 a.append(ET.Comment('foo')) 1439 self.assertEqual(a[0].tag, ET.Comment) 1440 1441 a = ET.Element('a') 1442 a.append(ET.PI('foo')) 1443 self.assertEqual(a[0].tag, ET.PI) 1444 1445 def test_bug_200709_element_insert(self): 1446 a = ET.Element('a') 1447 b = ET.SubElement(a, 'b') 1448 c = ET.SubElement(a, 'c') 1449 d = ET.Element('d') 1450 a.insert(0, d) 1451 self.assertEqual(summarize_list(a), ['d', 'b', 'c']) 1452 a.insert(-1, d) 1453 self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c']) 1454 1455 def test_bug_200709_iter_comment(self): 1456 a = ET.Element('a') 1457 b = ET.SubElement(a, 'b') 1458 comment_b = ET.Comment("TEST-b") 1459 b.append(comment_b) 1460 self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment]) 1461 1462 # -------------------------------------------------------------------- 1463 # reported on bugs.python.org 1464 1465 def test_bug_1534630(self): 1466 bob = ET.TreeBuilder() 1467 e = bob.data("data") 1468 e = bob.start("tag", {}) 1469 e = bob.end("tag") 1470 e = bob.close() 1471 self.assertEqual(serialize(e), '<tag />') 1472 1473 def test_issue6233(self): 1474 e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>" 1475 b'<body>t\xc3\xa3g</body>') 1476 self.assertEqual(ET.tostring(e, 'ascii'), 1477 b"<?xml version='1.0' encoding='ascii'?>\n" 1478 b'<body>tãg</body>') 1479 e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1480 b'<body>t\xe3g</body>') 1481 self.assertEqual(ET.tostring(e, 'ascii'), 1482 b"<?xml version='1.0' encoding='ascii'?>\n" 1483 b'<body>tãg</body>') 1484 1485 def test_issue3151(self): 1486 e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>') 1487 self.assertEqual(e.tag, '{${stuff}}localname') 1488 t = ET.ElementTree(e) 1489 self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />') 1490 1491 def test_issue6565(self): 1492 elem = ET.XML("<body><tag/></body>") 1493 self.assertEqual(summarize_list(elem), ['tag']) 1494 newelem = ET.XML(SAMPLE_XML) 1495 elem[:] = newelem[:] 1496 self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section']) 1497 1498 def test_issue10777(self): 1499 # Registering a namespace twice caused a "dictionary changed size during 1500 # iteration" bug. 1501 1502 ET.register_namespace('test10777', 'http://myuri/') 1503 ET.register_namespace('test10777', 'http://myuri/') 1504 1505 def check_expat224_utf8_bug(self, text): 1506 xml = b'<a b="%s"/>' % text 1507 root = ET.XML(xml) 1508 self.assertEqual(root.get('b'), text.decode('utf-8')) 1509 1510 def test_expat224_utf8_bug(self): 1511 # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. 1512 # Check that Expat 2.2.4 fixed the bug. 1513 # 1514 # Test buffer bounds at odd and even positions. 1515 1516 text = b'\xc3\xa0' * 1024 1517 self.check_expat224_utf8_bug(text) 1518 1519 text = b'x' + b'\xc3\xa0' * 1024 1520 self.check_expat224_utf8_bug(text) 1521 1522 def test_expat224_utf8_bug_file(self): 1523 with open(UTF8_BUG_XMLFILE, 'rb') as fp: 1524 raw = fp.read() 1525 root = ET.fromstring(raw) 1526 xmlattr = root.get('b') 1527 1528 # "Parse" manually the XML file to extract the value of the 'b' 1529 # attribute of the <a b='xxx' /> XML element 1530 text = raw.decode('utf-8').strip() 1531 text = text.replace('\r\n', ' ') 1532 text = text[6:-4] 1533 self.assertEqual(root.get('b'), text) 1534 1535 1536# -------------------------------------------------------------------- 1537 1538 1539class BasicElementTest(unittest.TestCase): 1540 @python_only 1541 def test_cyclic_gc(self): 1542 class Dummy: 1543 pass 1544 1545 # Test the shortest cycle: d->element->d 1546 d = Dummy() 1547 d.dummyref = ET.Element('joe', attr=d) 1548 wref = weakref.ref(d) 1549 del d 1550 gc_collect() 1551 self.assertIsNone(wref()) 1552 1553 # A longer cycle: d->e->e2->d 1554 e = ET.Element('joe') 1555 d = Dummy() 1556 d.dummyref = e 1557 wref = weakref.ref(d) 1558 e2 = ET.SubElement(e, 'foo', attr=d) 1559 del d, e, e2 1560 gc_collect() 1561 self.assertIsNone(wref()) 1562 1563 # A cycle between Element objects as children of one another 1564 # e1->e2->e3->e1 1565 e1 = ET.Element('e1') 1566 e2 = ET.Element('e2') 1567 e3 = ET.Element('e3') 1568 e1.append(e2) 1569 e2.append(e2) 1570 e3.append(e1) 1571 wref = weakref.ref(e1) 1572 del e1, e2, e3 1573 gc_collect() 1574 self.assertIsNone(wref()) 1575 1576 @python_only 1577 def test_weakref(self): 1578 flag = [] 1579 def wref_cb(w): 1580 flag.append(True) 1581 e = ET.Element('e') 1582 wref = weakref.ref(e, wref_cb) 1583 self.assertEqual(wref().tag, 'e') 1584 del e 1585 self.assertEqual(flag, [True]) 1586 self.assertEqual(wref(), None) 1587 1588 @python_only 1589 def test_get_keyword_args(self): 1590 e1 = ET.Element('foo' , x=1, y=2, z=3) 1591 self.assertEqual(e1.get('x', default=7), 1) 1592 self.assertEqual(e1.get('w', default=7), 7) 1593 1594 1595class BadElementTest(unittest.TestCase): 1596 def test_extend_mutable_list(self): 1597 class X(object): 1598 @property 1599 def __class__(self): 1600 L[:] = [ET.Element('baz')] 1601 return ET.Element 1602 L = [X()] 1603 e = ET.Element('foo') 1604 try: 1605 e.extend(L) 1606 except TypeError: 1607 pass 1608 1609 if ET is pyET: 1610 class Y(X, ET.Element): 1611 pass 1612 L = [Y('x')] 1613 e = ET.Element('foo') 1614 e.extend(L) 1615 1616 def test_extend_mutable_list2(self): 1617 class X(object): 1618 @property 1619 def __class__(self): 1620 del L[:] 1621 return ET.Element 1622 L = [X(), ET.Element('baz')] 1623 e = ET.Element('foo') 1624 try: 1625 e.extend(L) 1626 except TypeError: 1627 pass 1628 1629 if ET is pyET: 1630 class Y(X, ET.Element): 1631 pass 1632 L = [Y('bar'), ET.Element('baz')] 1633 e = ET.Element('foo') 1634 e.extend(L) 1635 1636 @python_only 1637 def test_remove_with_mutating(self): 1638 class X(ET.Element): 1639 def __eq__(self, o): 1640 del e[:] 1641 return False 1642 __hash__ = object.__hash__ 1643 e = ET.Element('foo') 1644 e.extend([X('bar')]) 1645 self.assertRaises(ValueError, e.remove, ET.Element('baz')) 1646 1647 e = ET.Element('foo') 1648 e.extend([ET.Element('bar')]) 1649 self.assertRaises(ValueError, e.remove, X('baz')) 1650 1651 def test_recursive_repr(self): 1652 # Issue #25455 1653 e = ET.Element('foo') 1654 with swap_attr(e, 'tag', e): 1655 with self.assertRaises(RuntimeError): 1656 repr(e) # Should not crash 1657 1658 def test_element_get_text(self): 1659 # Issue #27863 1660 class X(str): 1661 def __del__(self): 1662 try: 1663 elem.text 1664 except NameError: 1665 pass 1666 1667 b = ET.TreeBuilder() 1668 b.start('tag', {}) 1669 b.data('ABCD') 1670 b.data(X('EFGH')) 1671 b.data('IJKL') 1672 b.end('tag') 1673 1674 elem = b.close() 1675 self.assertEqual(elem.text, 'ABCDEFGHIJKL') 1676 1677 def test_element_get_tail(self): 1678 # Issue #27863 1679 class X(str): 1680 def __del__(self): 1681 try: 1682 elem[0].tail 1683 except NameError: 1684 pass 1685 1686 b = ET.TreeBuilder() 1687 b.start('root', {}) 1688 b.start('tag', {}) 1689 b.end('tag') 1690 b.data('ABCD') 1691 b.data(X('EFGH')) 1692 b.data('IJKL') 1693 b.end('root') 1694 1695 elem = b.close() 1696 self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL') 1697 1698 def test_element_iter(self): 1699 # Issue #27863 1700 e = ET.Element('tag') 1701 e.extend([None]) # non-Element 1702 1703 it = e.iter() 1704 self.assertIs(next(it), e) 1705 self.assertRaises((AttributeError, TypeError), list, it) 1706 1707 def test_subscr(self): 1708 # Issue #27863 1709 class X: 1710 def __index__(self): 1711 del e[:] 1712 return 1 1713 1714 e = ET.Element('elem') 1715 e.append(ET.Element('child')) 1716 e[:X()] # shouldn't crash 1717 1718 e.append(ET.Element('child')) 1719 e[0:10:X()] # shouldn't crash 1720 1721 def test_ass_subscr(self): 1722 # Issue #27863 1723 class X: 1724 def __index__(self): 1725 e[:] = [] 1726 return 1 1727 1728 e = ET.Element('elem') 1729 for _ in range(10): 1730 e.insert(0, ET.Element('child')) 1731 1732 e[0:10:X()] = [] # shouldn't crash 1733 1734 1735class MutatingElementPath(str): 1736 def __new__(cls, elem, *args): 1737 self = str.__new__(cls, *args) 1738 self.elem = elem 1739 return self 1740 def __eq__(self, o): 1741 del self.elem[:] 1742 return True 1743 __hash__ = str.__hash__ 1744 1745class BadElementPath(str): 1746 def __eq__(self, o): 1747 raise 1.0/0.0 1748 __hash__ = str.__hash__ 1749 1750class BadElementPathTest(unittest.TestCase): 1751 def setUp(self): 1752 super(BadElementPathTest, self).setUp() 1753 from xml.etree import ElementPath 1754 self.path_cache = ElementPath._cache 1755 ElementPath._cache = {} 1756 1757 def tearDown(self): 1758 from xml.etree import ElementPath 1759 ElementPath._cache = self.path_cache 1760 super(BadElementPathTest, self).tearDown() 1761 1762 def test_find_with_mutating(self): 1763 e = ET.Element('foo') 1764 e.extend([ET.Element('bar')]) 1765 e.find(MutatingElementPath(e, 'x')) 1766 1767 def test_find_with_error(self): 1768 e = ET.Element('foo') 1769 e.extend([ET.Element('bar')]) 1770 try: 1771 e.find(BadElementPath('x')) 1772 except ZeroDivisionError: 1773 pass 1774 1775 def test_findtext_with_mutating(self): 1776 e = ET.Element('foo') 1777 e.extend([ET.Element('bar')]) 1778 e.findtext(MutatingElementPath(e, 'x')) 1779 1780 def test_findtext_with_error(self): 1781 e = ET.Element('foo') 1782 e.extend([ET.Element('bar')]) 1783 try: 1784 e.findtext(BadElementPath('x')) 1785 except ZeroDivisionError: 1786 pass 1787 1788 def test_findall_with_mutating(self): 1789 e = ET.Element('foo') 1790 e.extend([ET.Element('bar')]) 1791 e.findall(MutatingElementPath(e, 'x')) 1792 1793 def test_findall_with_error(self): 1794 e = ET.Element('foo') 1795 e.extend([ET.Element('bar')]) 1796 try: 1797 e.findall(BadElementPath('x')) 1798 except ZeroDivisionError: 1799 pass 1800 1801 1802class ElementTreeTypeTest(unittest.TestCase): 1803 def test_istype(self): 1804 self.assertIsInstance(ET.ParseError, type) 1805 self.assertIsInstance(ET.QName, type) 1806 self.assertIsInstance(ET.ElementTree, type) 1807 if ET is pyET: 1808 self.assertIsInstance(ET.Element, type) 1809 self.assertIsInstance(ET.TreeBuilder, type) 1810 self.assertIsInstance(ET.XMLParser, type) 1811 1812 @python_only 1813 def test_Element_subclass_trivial(self): 1814 class MyElement(ET.Element): 1815 pass 1816 1817 mye = MyElement('foo') 1818 self.assertIsInstance(mye, ET.Element) 1819 self.assertIsInstance(mye, MyElement) 1820 self.assertEqual(mye.tag, 'foo') 1821 1822 # test that attribute assignment works (issue 14849) 1823 mye.text = "joe" 1824 self.assertEqual(mye.text, "joe") 1825 1826 @python_only 1827 def test_Element_subclass_constructor(self): 1828 class MyElement(ET.Element): 1829 def __init__(self, tag, attrib={}, **extra): 1830 super(MyElement, self).__init__(tag + '__', attrib, **extra) 1831 1832 mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4) 1833 self.assertEqual(mye.tag, 'foo__') 1834 self.assertEqual(sorted(mye.items()), 1835 [('a', 1), ('b', 2), ('c', 3), ('d', 4)]) 1836 1837 @python_only 1838 def test_Element_subclass_new_method(self): 1839 class MyElement(ET.Element): 1840 def newmethod(self): 1841 return self.tag 1842 1843 mye = MyElement('joe') 1844 self.assertEqual(mye.newmethod(), 'joe') 1845 1846 1847class ElementFindTest(unittest.TestCase): 1848 @python_only 1849 def test_simplefind(self): 1850 ET.ElementPath 1851 with swap_attr(ET, 'ElementPath', ET._SimpleElementPath()): 1852 e = ET.XML(SAMPLE_XML) 1853 self.assertEqual(e.find('tag').tag, 'tag') 1854 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag') 1855 self.assertEqual(e.findtext('tag'), 'text') 1856 self.assertIsNone(e.findtext('tog')) 1857 self.assertEqual(e.findtext('tog', 'default'), 'default') 1858 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text') 1859 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag']) 1860 self.assertEqual(summarize_list(e.findall('.//tag')), ['tag', 'tag', 'tag']) 1861 1862 # Path syntax doesn't work in this case. 1863 self.assertIsNone(e.find('section/tag')) 1864 self.assertIsNone(e.findtext('section/tag')) 1865 self.assertEqual(summarize_list(e.findall('section/tag')), []) 1866 1867 def test_find_simple(self): 1868 e = ET.XML(SAMPLE_XML) 1869 self.assertEqual(e.find('tag').tag, 'tag') 1870 self.assertEqual(e.find('section/tag').tag, 'tag') 1871 self.assertEqual(e.find('./tag').tag, 'tag') 1872 1873 e[2] = ET.XML(SAMPLE_SECTION) 1874 self.assertEqual(e.find('section/nexttag').tag, 'nexttag') 1875 1876 self.assertEqual(e.findtext('./tag'), 'text') 1877 self.assertEqual(e.findtext('section/tag'), 'subtext') 1878 1879 # section/nexttag is found but has no text 1880 self.assertEqual(e.findtext('section/nexttag'), '') 1881 self.assertEqual(e.findtext('section/nexttag', 'default'), '') 1882 1883 # tog doesn't exist and 'default' kicks in 1884 self.assertIsNone(e.findtext('tog')) 1885 self.assertEqual(e.findtext('tog', 'default'), 'default') 1886 1887 # Issue #16922 1888 self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '') 1889 1890 def test_find_xpath(self): 1891 LINEAR_XML = ''' 1892 <body> 1893 <tag class='a'/> 1894 <tag class='b'/> 1895 <tag class='c'/> 1896 <tag class='d'/> 1897 </body>''' 1898 e = ET.XML(LINEAR_XML) 1899 1900 # Test for numeric indexing and last() 1901 self.assertEqual(e.find('./tag[1]').attrib['class'], 'a') 1902 self.assertEqual(e.find('./tag[2]').attrib['class'], 'b') 1903 self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd') 1904 self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c') 1905 self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b') 1906 1907 def test_findall(self): 1908 e = ET.XML(SAMPLE_XML) 1909 e[2] = ET.XML(SAMPLE_SECTION) 1910 self.assertEqual(summarize_list(e.findall('.')), ['body']) 1911 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag']) 1912 self.assertEqual(summarize_list(e.findall('tog')), []) 1913 self.assertEqual(summarize_list(e.findall('tog/foo')), []) 1914 self.assertEqual(summarize_list(e.findall('*')), 1915 ['tag', 'tag', 'section']) 1916 self.assertEqual(summarize_list(e.findall('.//tag')), 1917 ['tag'] * 4) 1918 self.assertEqual(summarize_list(e.findall('section/tag')), ['tag']) 1919 self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2) 1920 self.assertEqual(summarize_list(e.findall('section/*')), 1921 ['tag', 'nexttag', 'nextsection']) 1922 self.assertEqual(summarize_list(e.findall('section//*')), 1923 ['tag', 'nexttag', 'nextsection', 'tag']) 1924 self.assertEqual(summarize_list(e.findall('section/.//*')), 1925 ['tag', 'nexttag', 'nextsection', 'tag']) 1926 self.assertEqual(summarize_list(e.findall('*/*')), 1927 ['tag', 'nexttag', 'nextsection']) 1928 self.assertEqual(summarize_list(e.findall('*//*')), 1929 ['tag', 'nexttag', 'nextsection', 'tag']) 1930 self.assertEqual(summarize_list(e.findall('*/tag')), ['tag']) 1931 self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag']) 1932 self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2) 1933 self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2) 1934 1935 self.assertEqual(summarize_list(e.findall('.//tag[@class]')), 1936 ['tag'] * 3) 1937 self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')), 1938 ['tag']) 1939 self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')), 1940 ['tag'] * 2) 1941 self.assertEqual(summarize_list(e.findall('.//tag[@id]')), 1942 ['tag']) 1943 self.assertEqual(summarize_list(e.findall('.//section[tag]')), 1944 ['section']) 1945 self.assertEqual(summarize_list(e.findall('.//section[element]')), []) 1946 self.assertEqual(summarize_list(e.findall('../tag')), []) 1947 self.assertEqual(summarize_list(e.findall('section/../tag')), 1948 ['tag'] * 2) 1949 self.assertEqual(e.findall('section//'), e.findall('section//*')) 1950 1951 def test_test_find_with_ns(self): 1952 e = ET.XML(SAMPLE_XML_NS) 1953 self.assertEqual(summarize_list(e.findall('tag')), []) 1954 self.assertEqual( 1955 summarize_list(e.findall("{http://effbot.org/ns}tag")), 1956 ['{http://effbot.org/ns}tag'] * 2) 1957 self.assertEqual( 1958 summarize_list(e.findall(".//{http://effbot.org/ns}tag")), 1959 ['{http://effbot.org/ns}tag'] * 3) 1960 1961 def test_bad_find(self): 1962 e = ET.XML(SAMPLE_XML) 1963 with self.assertRaisesRegexp(SyntaxError, 1964 'cannot use absolute path on element'): 1965 e.findall('/tag') 1966 1967 def test_find_through_ElementTree(self): 1968 e = ET.XML(SAMPLE_XML) 1969 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag') 1970 self.assertEqual(ET.ElementTree(e).find('./tag').tag, 'tag') 1971 # this produces a warning 1972 msg = ("This search is broken in 1.3 and earlier, and will be fixed " 1973 "in a future version. If you rely on the current behaviour, " 1974 "change it to '.+'") 1975 with support.check_warnings((msg, FutureWarning)): 1976 self.assertEqual(ET.ElementTree(e).find('/tag').tag, 'tag') 1977 e[2] = ET.XML(SAMPLE_SECTION) 1978 self.assertEqual(ET.ElementTree(e).find('section/tag').tag, 'tag') 1979 self.assertIsNone(ET.ElementTree(e).find('tog')) 1980 self.assertIsNone(ET.ElementTree(e).find('tog/foo')) 1981 1982 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text') 1983 self.assertIsNone(ET.ElementTree(e).findtext('tog/foo')) 1984 self.assertEqual(ET.ElementTree(e).findtext('tog/foo', 'default'), 1985 'default') 1986 self.assertEqual(ET.ElementTree(e).findtext('./tag'), 'text') 1987 with support.check_warnings((msg, FutureWarning)): 1988 self.assertEqual(ET.ElementTree(e).findtext('/tag'), 'text') 1989 self.assertEqual(ET.ElementTree(e).findtext('section/tag'), 'subtext') 1990 1991 self.assertEqual(summarize_list(ET.ElementTree(e).findall('./tag')), 1992 ['tag'] * 2) 1993 with support.check_warnings((msg, FutureWarning)): 1994 it = ET.ElementTree(e).findall('/tag') 1995 self.assertEqual(summarize_list(it), ['tag'] * 2) 1996 1997 1998class ElementIterTest(unittest.TestCase): 1999 def _ilist(self, elem, tag=None): 2000 return summarize_list(elem.iter(tag)) 2001 2002 def test_basic(self): 2003 doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>") 2004 self.assertEqual(self._ilist(doc), ['html', 'body', 'i']) 2005 self.assertEqual(self._ilist(doc.find('body')), ['body', 'i']) 2006 self.assertEqual(next(doc.iter()).tag, 'html') 2007 self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...') 2008 self.assertEqual(''.join(doc.find('body').itertext()), 2009 'this is a paragraph.') 2010 self.assertEqual(next(doc.itertext()), 'this is a ') 2011 2012 # Method iterparse should return an iterator. See bug 6472. 2013 sourcefile = serialize(doc, to_string=False) 2014 self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end') 2015 2016 if ET is pyET: 2017 # With an explitit parser too (issue #9708) 2018 sourcefile = serialize(doc, to_string=False) 2019 parser = ET.XMLParser(target=ET.TreeBuilder()) 2020 self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], 2021 'end') 2022 2023 tree = ET.ElementTree(None) 2024 self.assertRaises(AttributeError, tree.iter) 2025 2026 # Issue #16913 2027 doc = ET.XML("<root>a&<sub>b&</sub>c&</root>") 2028 self.assertEqual(''.join(doc.itertext()), 'a&b&c&') 2029 2030 def test_corners(self): 2031 # single root, no subelements 2032 a = ET.Element('a') 2033 self.assertEqual(self._ilist(a), ['a']) 2034 2035 # one child 2036 b = ET.SubElement(a, 'b') 2037 self.assertEqual(self._ilist(a), ['a', 'b']) 2038 2039 # one child and one grandchild 2040 c = ET.SubElement(b, 'c') 2041 self.assertEqual(self._ilist(a), ['a', 'b', 'c']) 2042 2043 # two children, only first with grandchild 2044 d = ET.SubElement(a, 'd') 2045 self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd']) 2046 2047 # replace first child by second 2048 a[0] = a[1] 2049 del a[1] 2050 self.assertEqual(self._ilist(a), ['a', 'd']) 2051 2052 def test_iter_by_tag(self): 2053 doc = ET.XML(''' 2054 <document> 2055 <house> 2056 <room>bedroom1</room> 2057 <room>bedroom2</room> 2058 </house> 2059 <shed>nothing here 2060 </shed> 2061 <house> 2062 <room>bedroom8</room> 2063 </house> 2064 </document>''') 2065 2066 self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3) 2067 self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2) 2068 2069 if ET is pyET: 2070 # test that iter also accepts 'tag' as a keyword arg 2071 self.assertEqual( 2072 summarize_list(doc.iter(tag='room')), 2073 ['room'] * 3) 2074 2075 # make sure both tag=None and tag='*' return all tags 2076 all_tags = ['document', 'house', 'room', 'room', 2077 'shed', 'house', 'room'] 2078 self.assertEqual(summarize_list(doc.iter()), all_tags) 2079 self.assertEqual(self._ilist(doc), all_tags) 2080 self.assertEqual(self._ilist(doc, '*'), all_tags) 2081 2082 def test_getiterator(self): 2083 # Element.getiterator() is deprecated. 2084 if sys.py3kwarning or ET is pyET: 2085 with support.check_warnings(("This method will be removed in future versions. " 2086 "Use .+ instead.", PendingDeprecationWarning)): 2087 self._test_getiterator() 2088 else: 2089 self._test_getiterator() 2090 2091 def _test_getiterator(self): 2092 doc = ET.XML(''' 2093 <document> 2094 <house> 2095 <room>bedroom1</room> 2096 <room>bedroom2</room> 2097 </house> 2098 <shed>nothing here 2099 </shed> 2100 <house> 2101 <room>bedroom8</room> 2102 </house> 2103 </document>''') 2104 2105 self.assertEqual(summarize_list(doc.getiterator('room')), 2106 ['room'] * 3) 2107 self.assertEqual(summarize_list(doc.getiterator('house')), 2108 ['house'] * 2) 2109 2110 if ET is pyET: 2111 # test that getiterator also accepts 'tag' as a keyword arg 2112 self.assertEqual( 2113 summarize_list(doc.getiterator(tag='room')), 2114 ['room'] * 3) 2115 2116 # make sure both tag=None and tag='*' return all tags 2117 all_tags = ['document', 'house', 'room', 'room', 2118 'shed', 'house', 'room'] 2119 self.assertEqual(summarize_list(doc.getiterator()), all_tags) 2120 self.assertEqual(summarize_list(doc.getiterator(None)), all_tags) 2121 self.assertEqual(summarize_list(doc.getiterator('*')), all_tags) 2122 2123 def test_copy(self): 2124 a = ET.Element('a') 2125 it = a.iter() 2126 with self.assertRaises(TypeError): 2127 copy.copy(it) 2128 2129 def test_pickle(self): 2130 a = ET.Element('a') 2131 it = a.iter() 2132 for proto in range(pickle.HIGHEST_PROTOCOL + 1): 2133 with self.assertRaises((TypeError, pickle.PicklingError)): 2134 pickle.dumps(it, proto) 2135 2136 2137class TreeBuilderTest(unittest.TestCase): 2138 sample1 = ('<!DOCTYPE html PUBLIC' 2139 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 2140 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 2141 '<html>text<div>subtext</div>tail</html>') 2142 2143 sample2 = '''<toplevel>sometext</toplevel>''' 2144 2145 def _check_sample1_element(self, e): 2146 self.assertEqual(e.tag, 'html') 2147 self.assertEqual(e.text, 'text') 2148 self.assertEqual(e.tail, None) 2149 self.assertEqual(e.attrib, {}) 2150 children = list(e) 2151 self.assertEqual(len(children), 1) 2152 child = children[0] 2153 self.assertEqual(child.tag, 'div') 2154 self.assertEqual(child.text, 'subtext') 2155 self.assertEqual(child.tail, 'tail') 2156 self.assertEqual(child.attrib, {}) 2157 2158 def test_dummy_builder(self): 2159 class DummyBuilder: 2160 data = start = end = lambda *a: None 2161 2162 def close(self): 2163 return 42 2164 2165 parser = ET.XMLParser(target=DummyBuilder()) 2166 parser.feed(self.sample1) 2167 self.assertEqual(parser.close(), 42) 2168 2169 @python_only 2170 def test_treebuilder_elementfactory_none(self): 2171 parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None)) 2172 parser.feed(self.sample1) 2173 e = parser.close() 2174 self._check_sample1_element(e) 2175 2176 @python_only 2177 def test_subclass(self): 2178 class MyTreeBuilder(ET.TreeBuilder): 2179 def foobar(self, x): 2180 return x * 2 2181 2182 tb = MyTreeBuilder() 2183 self.assertEqual(tb.foobar(10), 20) 2184 2185 parser = ET.XMLParser(target=tb) 2186 parser.feed(self.sample1) 2187 2188 e = parser.close() 2189 self._check_sample1_element(e) 2190 2191 @python_only 2192 def test_element_factory(self): 2193 lst = [] 2194 def myfactory(tag, attrib): 2195 lst.append(tag) 2196 return ET.Element(tag, attrib) 2197 2198 tb = ET.TreeBuilder(element_factory=myfactory) 2199 parser = ET.XMLParser(target=tb) 2200 parser.feed(self.sample2) 2201 parser.close() 2202 2203 self.assertEqual(lst, ['toplevel']) 2204 2205 @python_only 2206 def test_element_factory_subclass(self): 2207 class MyElement(ET.Element): 2208 pass 2209 2210 tb = ET.TreeBuilder(element_factory=MyElement) 2211 2212 parser = ET.XMLParser(target=tb) 2213 parser.feed(self.sample1) 2214 e = parser.close() 2215 self.assertIsInstance(e, MyElement) 2216 self._check_sample1_element(e) 2217 2218 2219 @python_only 2220 def test_doctype(self): 2221 class DoctypeParser: 2222 _doctype = None 2223 2224 def doctype(self, name, pubid, system): 2225 self._doctype = (name, pubid, system) 2226 2227 data = start = end = lambda *a: None 2228 2229 def close(self): 2230 return self._doctype 2231 2232 parser = ET.XMLParser(target=DoctypeParser()) 2233 parser.feed(self.sample1) 2234 2235 self.assertEqual(parser.close(), 2236 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 2237 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 2238 2239 @cet_only # PyET does not look up the attributes in XMLParser().__init__() 2240 def test_builder_lookup_errors(self): 2241 class RaisingBuilder(object): 2242 def __init__(self, raise_in=None, what=ValueError): 2243 self.raise_in = raise_in 2244 self.what = what 2245 2246 def __getattr__(self, name): 2247 if name == self.raise_in: 2248 raise self.what(self.raise_in) 2249 def handle(*args): 2250 pass 2251 return handle 2252 2253 ET.XMLParser(target=RaisingBuilder()) 2254 # cET also checks for 'close' and 'doctype', PyET does it only at need 2255 for event in ('start', 'data', 'end', 'comment', 'pi'): 2256 with self.assertRaises(ValueError): 2257 ET.XMLParser(target=RaisingBuilder(event)) 2258 2259 ET.XMLParser(target=RaisingBuilder(what=AttributeError)) 2260 for event in ('start', 'data', 'end', 'comment', 'pi'): 2261 parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError)) 2262 parser.feed(self.sample1) 2263 self.assertIsNone(parser.close()) 2264 2265 2266class XMLParserTest(unittest.TestCase): 2267 sample1 = b'<file><line>22</line></file>' 2268 sample2 = (b'<!DOCTYPE html PUBLIC' 2269 b' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 2270 b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 2271 b'<html>text</html>') 2272 2273 def _check_sample_element(self, e): 2274 self.assertEqual(e.tag, 'file') 2275 self.assertEqual(e[0].tag, 'line') 2276 self.assertEqual(e[0].text, '22') 2277 2278 @python_only 2279 def test_constructor_args(self): 2280 # Positional args. The first (html) is not supported, but should be 2281 # nevertheless correctly accepted. 2282 with support.check_py3k_warnings((r'.*\bhtml\b', DeprecationWarning)): 2283 parser = ET.XMLParser(None, ET.TreeBuilder(), 'utf-8') 2284 parser.feed(self.sample1) 2285 self._check_sample_element(parser.close()) 2286 2287 # Now as keyword args. 2288 parser2 = ET.XMLParser(encoding='utf-8', 2289 target=ET.TreeBuilder()) 2290 parser2.feed(self.sample1) 2291 self._check_sample_element(parser2.close()) 2292 2293 @python_only 2294 def test_subclass(self): 2295 class MyParser(ET.XMLParser): 2296 pass 2297 parser = MyParser() 2298 parser.feed(self.sample1) 2299 self._check_sample_element(parser.close()) 2300 2301 @python_only 2302 def test_doctype_warning(self): 2303 parser = ET.XMLParser() 2304 with support.check_warnings(('', DeprecationWarning)): 2305 parser.doctype('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 2306 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd') 2307 parser.feed('<html/>') 2308 parser.close() 2309 2310 @python_only 2311 def test_subclass_doctype(self): 2312 _doctype = [] 2313 class MyParserWithDoctype(ET.XMLParser): 2314 def doctype(self, name, pubid, system): 2315 _doctype.append((name, pubid, system)) 2316 2317 parser = MyParserWithDoctype() 2318 with support.check_warnings(('', DeprecationWarning)): 2319 parser.feed(self.sample2) 2320 parser.close() 2321 self.assertEqual(_doctype, 2322 [('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 2323 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')]) 2324 2325 _doctype = [] 2326 _doctype2 = [] 2327 with warnings.catch_warnings(): 2328 warnings.simplefilter('error', DeprecationWarning) 2329 class DoctypeParser: 2330 data = start = end = close = lambda *a: None 2331 2332 def doctype(self, name, pubid, system): 2333 _doctype2.append((name, pubid, system)) 2334 2335 parser = MyParserWithDoctype(target=DoctypeParser()) 2336 parser.feed(self.sample2) 2337 parser.close() 2338 self.assertEqual(_doctype, []) 2339 self.assertEqual(_doctype2, 2340 [('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 2341 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')]) 2342 2343 2344class NamespaceParseTest(unittest.TestCase): 2345 def test_find_with_namespace(self): 2346 nsmap = {'h': 'hello', 'f': 'foo'} 2347 doc = ET.fromstring(SAMPLE_XML_NS_ELEMS) 2348 2349 self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1) 2350 self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2) 2351 self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1) 2352 2353 2354class ElementSlicingTest(unittest.TestCase): 2355 def _elem_tags(self, elemlist): 2356 return [e.tag for e in elemlist] 2357 2358 def _subelem_tags(self, elem): 2359 return self._elem_tags(list(elem)) 2360 2361 def _make_elem_with_children(self, numchildren): 2362 """Create an Element with a tag 'a', with the given amount of children 2363 named 'a0', 'a1' ... and so on. 2364 2365 """ 2366 e = ET.Element('a') 2367 for i in range(numchildren): 2368 ET.SubElement(e, 'a%s' % i) 2369 return e 2370 2371 def test_getslice_single_index(self): 2372 e = self._make_elem_with_children(10) 2373 2374 self.assertEqual(e[1].tag, 'a1') 2375 self.assertEqual(e[-2].tag, 'a8') 2376 2377 self.assertRaises(IndexError, lambda: e[12]) 2378 self.assertRaises(IndexError, lambda: e[-12]) 2379 2380 def test_getslice_range(self): 2381 e = self._make_elem_with_children(6) 2382 2383 self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5']) 2384 self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5']) 2385 self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5']) 2386 self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4']) 2387 self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4']) 2388 self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1']) 2389 2390 def test_getslice_steps(self): 2391 e = self._make_elem_with_children(10) 2392 2393 self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9']) 2394 self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9']) 2395 self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8']) 2396 self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9']) 2397 self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3']) 2398 self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3']) 2399 2400 def test_getslice_negative_steps(self): 2401 e = self._make_elem_with_children(4) 2402 2403 self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0']) 2404 self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1']) 2405 self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3']) 2406 self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3']) 2407 self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3']) 2408 2409 def test_delslice(self): 2410 e = self._make_elem_with_children(4) 2411 del e[0:2] 2412 self.assertEqual(self._subelem_tags(e), ['a2', 'a3']) 2413 2414 e = self._make_elem_with_children(4) 2415 del e[0:] 2416 self.assertEqual(self._subelem_tags(e), []) 2417 2418 if ET is pyET: 2419 e = self._make_elem_with_children(4) 2420 del e[::-1] 2421 self.assertEqual(self._subelem_tags(e), []) 2422 2423 e = self._make_elem_with_children(4) 2424 del e[::-2] 2425 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 2426 2427 e = self._make_elem_with_children(4) 2428 del e[1::2] 2429 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 2430 2431 e = self._make_elem_with_children(2) 2432 del e[::2] 2433 self.assertEqual(self._subelem_tags(e), ['a1']) 2434 2435 def test_setslice_single_index(self): 2436 e = self._make_elem_with_children(4) 2437 e[1] = ET.Element('b') 2438 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 2439 2440 e[-2] = ET.Element('c') 2441 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 2442 2443 with self.assertRaises(IndexError): 2444 e[5] = ET.Element('d') 2445 with self.assertRaises(IndexError): 2446 e[-5] = ET.Element('d') 2447 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 2448 2449 def test_setslice_range(self): 2450 e = self._make_elem_with_children(4) 2451 e[1:3] = [ET.Element('b%s' % i) for i in range(2)] 2452 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3']) 2453 2454 e = self._make_elem_with_children(4) 2455 e[1:3] = [ET.Element('b')] 2456 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3']) 2457 2458 e = self._make_elem_with_children(4) 2459 e[1:3] = [ET.Element('b%s' % i) for i in range(3)] 2460 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3']) 2461 2462 def test_setslice_steps(self): 2463 e = self._make_elem_with_children(6) 2464 e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)] 2465 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5']) 2466 2467 e = self._make_elem_with_children(6) 2468 with self.assertRaises(ValueError): 2469 e[1:5:2] = [ET.Element('b')] 2470 with self.assertRaises(ValueError): 2471 e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)] 2472 with self.assertRaises(ValueError): 2473 e[1:5:2] = [] 2474 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5']) 2475 2476 e = self._make_elem_with_children(4) 2477 e[1::sys.maxsize] = [ET.Element('b')] 2478 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 2479 e[1::sys.maxsize<<64] = [ET.Element('c')] 2480 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 2481 2482 def test_setslice_negative_steps(self): 2483 e = self._make_elem_with_children(4) 2484 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)] 2485 self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3']) 2486 2487 e = self._make_elem_with_children(4) 2488 with self.assertRaises(ValueError): 2489 e[2:0:-1] = [ET.Element('b')] 2490 with self.assertRaises(ValueError): 2491 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)] 2492 with self.assertRaises(ValueError): 2493 e[2:0:-1] = [] 2494 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3']) 2495 2496 e = self._make_elem_with_children(4) 2497 e[1::-sys.maxsize] = [ET.Element('b')] 2498 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 2499 e[1::-sys.maxsize-1] = [ET.Element('c')] 2500 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 2501 e[1::-sys.maxsize<<64] = [ET.Element('d')] 2502 self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3']) 2503 2504 2505class IOTest(unittest.TestCase): 2506 def tearDown(self): 2507 support.unlink(TESTFN) 2508 2509 def test_encoding(self): 2510 # Test encoding issues. 2511 elem = ET.Element("tag") 2512 elem.text = u"abc" 2513 self.assertEqual(serialize(elem), '<tag>abc</tag>') 2514 self.assertEqual(serialize(elem, encoding="utf-8"), 2515 '<tag>abc</tag>') 2516 self.assertEqual(serialize(elem, encoding="us-ascii"), 2517 '<tag>abc</tag>') 2518 self.assertEqual(serialize(elem, encoding="iso-8859-1"), 2519 "<?xml version='1.0' encoding='iso-8859-1'?>\n" 2520 "<tag>abc</tag>") 2521 2522 elem = ET.Element("tag") 2523 elem.text = "<&\"\'>" 2524 self.assertEqual(serialize(elem), '<tag><&"\'></tag>') 2525 self.assertEqual(serialize(elem, encoding="utf-8"), 2526 b'<tag><&"\'></tag>') 2527 self.assertEqual(serialize(elem, encoding="us-ascii"), 2528 b'<tag><&"\'></tag>') 2529 self.assertEqual(serialize(elem, encoding="iso-8859-1"), 2530 "<?xml version='1.0' encoding='iso-8859-1'?>\n" 2531 "<tag><&\"'></tag>") 2532 2533 elem = ET.Element("tag") 2534 elem.attrib["key"] = "<&\"\'>" 2535 self.assertEqual(serialize(elem), '<tag key="<&"\'>" />') 2536 self.assertEqual(serialize(elem, encoding="utf-8"), 2537 b'<tag key="<&"\'>" />') 2538 self.assertEqual(serialize(elem, encoding="us-ascii"), 2539 b'<tag key="<&"\'>" />') 2540 self.assertEqual(serialize(elem, encoding="iso-8859-1"), 2541 "<?xml version='1.0' encoding='iso-8859-1'?>\n" 2542 "<tag key=\"<&"'>\" />") 2543 2544 elem = ET.Element("tag") 2545 elem.text = u'\xe5\xf6\xf6<>' 2546 self.assertEqual(serialize(elem), 2547 '<tag>åöö<></tag>') 2548 self.assertEqual(serialize(elem, encoding="utf-8"), 2549 '<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>') 2550 self.assertEqual(serialize(elem, encoding="us-ascii"), 2551 '<tag>åöö<></tag>') 2552 self.assertEqual(serialize(elem, encoding="iso-8859-1"), 2553 "<?xml version='1.0' encoding='iso-8859-1'?>\n" 2554 "<tag>\xe5\xf6\xf6<></tag>") 2555 2556 elem = ET.Element("tag") 2557 elem.attrib["key"] = u'\xe5\xf6\xf6<>' 2558 self.assertEqual(serialize(elem), 2559 '<tag key="åöö<>" />') 2560 self.assertEqual(serialize(elem, encoding="utf-8"), 2561 '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />') 2562 self.assertEqual(serialize(elem, encoding="us-ascii"), 2563 '<tag key="åöö<>" />') 2564 self.assertEqual(serialize(elem, encoding="iso-8859-1"), 2565 "<?xml version='1.0' encoding='iso-8859-1'?>\n" 2566 "<tag key=\"\xe5\xf6\xf6<>\" />") 2567 2568 def test_write_to_filename(self): 2569 tree = ET.ElementTree(ET.XML('''<site />''')) 2570 tree.write(TESTFN) 2571 with open(TESTFN, 'rb') as f: 2572 self.assertEqual(f.read(), b'''<site />''') 2573 2574 def test_write_to_file(self): 2575 tree = ET.ElementTree(ET.XML('''<site />''')) 2576 with open(TESTFN, 'wb') as f: 2577 tree.write(f) 2578 self.assertFalse(f.closed) 2579 with open(TESTFN, 'rb') as f: 2580 self.assertEqual(f.read(), b'''<site />''') 2581 2582 def test_read_from_stringio(self): 2583 tree = ET.ElementTree() 2584 stream = StringIO.StringIO('''<?xml version="1.0"?><site></site>''') 2585 tree.parse(stream) 2586 self.assertEqual(tree.getroot().tag, 'site') 2587 2588 def test_write_to_stringio(self): 2589 tree = ET.ElementTree(ET.XML('''<site />''')) 2590 stream = StringIO.StringIO() 2591 tree.write(stream) 2592 self.assertEqual(stream.getvalue(), '''<site />''') 2593 2594 class dummy: 2595 pass 2596 2597 def test_read_from_user_reader(self): 2598 stream = StringIO.StringIO('''<?xml version="1.0"?><site></site>''') 2599 reader = self.dummy() 2600 reader.read = stream.read 2601 tree = ET.ElementTree() 2602 tree.parse(reader) 2603 self.assertEqual(tree.getroot().tag, 'site') 2604 2605 def test_write_to_user_writer(self): 2606 tree = ET.ElementTree(ET.XML('''<site />''')) 2607 stream = StringIO.StringIO() 2608 writer = self.dummy() 2609 writer.write = stream.write 2610 tree.write(writer) 2611 self.assertEqual(stream.getvalue(), '''<site />''') 2612 2613 def test_tostringlist_invariant(self): 2614 root = ET.fromstring('<tag>foo</tag>') 2615 self.assertEqual( 2616 ET.tostring(root), 2617 ''.join(ET.tostringlist(root))) 2618 self.assertEqual( 2619 ET.tostring(root, 'utf-16'), 2620 b''.join(ET.tostringlist(root, 'utf-16'))) 2621 2622 2623class ParseErrorTest(unittest.TestCase): 2624 def test_subclass(self): 2625 self.assertIsInstance(ET.ParseError(), SyntaxError) 2626 2627 def _get_error(self, s): 2628 try: 2629 ET.fromstring(s) 2630 except ET.ParseError as e: 2631 return e 2632 2633 def test_error_position(self): 2634 self.assertEqual(self._get_error('foo').position, (1, 0)) 2635 self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5)) 2636 self.assertEqual(self._get_error('foobar<').position, (1, 6)) 2637 2638 @python_only 2639 def test_error_code(self): 2640 from xml.parsers import expat 2641 self.assertEqual(expat.ErrorString(self._get_error('foo').code), 2642 expat.errors.XML_ERROR_SYNTAX) 2643 2644 2645class KeywordArgsTest(unittest.TestCase): 2646 # Test various issues with keyword arguments passed to ET.Element 2647 # constructor and methods 2648 def test_issue14818(self): 2649 x = ET.XML("<a>foo</a>") 2650 self.assertEqual(x.find('a', None), 2651 x.find(path='a', namespaces=None)) 2652 self.assertEqual(x.findtext('a', None, None), 2653 x.findtext(path='a', default=None, namespaces=None)) 2654 self.assertEqual(x.findall('a', None), 2655 x.findall(path='a', namespaces=None)) 2656 self.assertEqual(list(x.iterfind('a', None)), 2657 list(x.iterfind(path='a', namespaces=None))) 2658 2659 self.assertEqual(ET.Element('a').attrib, {}) 2660 elements = [ 2661 ET.Element('a', dict(href="#", id="foo")), 2662 ET.Element('a', attrib=dict(href="#", id="foo")), 2663 ET.Element('a', dict(href="#"), id="foo"), 2664 ET.Element('a', href="#", id="foo"), 2665 ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"), 2666 ] 2667 for e in elements: 2668 self.assertEqual(e.tag, 'a') 2669 self.assertEqual(e.attrib, dict(href="#", id="foo")) 2670 2671 e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'}) 2672 self.assertEqual(e2.attrib['key1'], 'value1') 2673 2674 with self.assertRaisesRegexp(TypeError, 'must be dict, not str'): 2675 ET.Element('a', "I'm not a dict") 2676 with self.assertRaisesRegexp(TypeError, 'must be dict, not str'): 2677 ET.Element('a', attrib="I'm not a dict") 2678 2679# -------------------------------------------------------------------- 2680 2681class NoAcceleratorTest(unittest.TestCase): 2682 def setUp(self): 2683 if ET is not pyET: 2684 raise unittest.SkipTest('only for the Python version') 2685 2686 # Test that the C accelerator was not imported for pyET 2687 def test_correct_import_pyET(self): 2688 # The type of methods defined in Python code is types.FunctionType, 2689 # while the type of methods defined inside _elementtree is 2690 # <class 'wrapper_descriptor'> 2691 self.assertIsInstance(pyET.Element.__init__, types.FunctionType) 2692 self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType) 2693 2694# -------------------------------------------------------------------- 2695 2696 2697def test_main(module=None): 2698 # When invoked without a module, runs the Python ET tests by loading pyET. 2699 # Otherwise, uses the given module as the ET. 2700 if module is None: 2701 module = pyET 2702 2703 global ET 2704 ET = module 2705 2706 test_classes = [ 2707 ModuleTest, 2708 ElementSlicingTest, 2709 BasicElementTest, 2710 BadElementTest, 2711 BadElementPathTest, 2712 ElementTreeTest, 2713 IOTest, 2714 ParseErrorTest, 2715 XIncludeTest, 2716 ElementTreeTypeTest, 2717 ElementFindTest, 2718 ElementIterTest, 2719 TreeBuilderTest, 2720 XMLParserTest, 2721 BugsTest, 2722 ] 2723 2724 # These tests will only run for the pure-Python version that doesn't import 2725 # _elementtree. We can't use skipUnless here, because pyET is filled in only 2726 # after the module is loaded. 2727 if pyET is not ET: 2728 test_classes.extend([ 2729 NoAcceleratorTest, 2730 ]) 2731 2732 # Provide default namespace mapping and path cache. 2733 from xml.etree import ElementPath 2734 nsmap = pyET._namespace_map 2735 # Copy the default namespace mapping 2736 nsmap_copy = nsmap.copy() 2737 # Copy the path cache (should be empty) 2738 path_cache = ElementPath._cache 2739 ElementPath._cache = path_cache.copy() 2740 try: 2741 support.run_unittest(*test_classes) 2742 finally: 2743 from xml.etree import ElementPath 2744 # Restore mapping and path cache 2745 nsmap.clear() 2746 nsmap.update(nsmap_copy) 2747 ElementPath._cache = path_cache 2748 # don't interfere with subsequent tests 2749 ET = None 2750 2751 2752if __name__ == '__main__': 2753 test_main() 2754