1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# 4# this tests the DTD validation with the XmlTextReader interface 5# 6import sys 7import glob 8import os 9import setup_test 10import libxml2 11try: 12 import StringIO 13 str_io = StringIO.StringIO 14except: 15 import io 16 str_io = io.StringIO 17 18# Memory debug specific 19libxml2.debugMemory(1) 20 21err = "" 22basedir = os.path.dirname(os.path.realpath(__file__)) 23dir_prefix = os.path.realpath(os.path.join(basedir, "..", "..", "test", "valid")) 24 25# This dictionary reflects the contents of the files 26# ../../test/valid/*.xml.err that are not empty, except that 27# the file paths in the messages start with ../../test/ 28 29expect = { 30 '766956': 31"""{0}/dtds/766956.dtd:2: parser error : PEReference: expecting ';' 32%ä%ent; 33 ^ 34{0}/dtds/766956.dtd:2: parser error : Content error in the external subset 35%ä%ent; 36 ^ 37Entity: line 1: 38value 39^ 40""".format(dir_prefix), 41 '781333': 42"""{0}/781333.xml:4: element a: validity error : Element a content does not follow the DTD, expecting ( ..., got 43<a/> 44 ^ 45{0}/781333.xml:5: element a: validity error : Element a content does not follow the DTD, Expecting more children 46 47^ 48""".format(dir_prefix), 49 'cond_sect2': 50"""{0}/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity 51 %ent; 52 ^ 53Entity: line 1: 54]]> 55^ 56{0}/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset 57 58^ 59""".format(dir_prefix), 60 'rss': 61"""{0}/rss.xml:177: element rss: validity error : Element rss does not carry attribute version 62</rss> 63 ^ 64""".format(dir_prefix), 65 't8': 66"""{0}/t8.xml:6: parser error : Content error in the internal subset 67%defroot; %defmiddle; %deftest; 68 ^ 69Entity: line 1: 70<!ELEMENT root (middle) > 71^ 72""".format(dir_prefix), 73 't8a': 74"""{0}/t8a.xml:6: parser error : Content error in the internal subset 75%defroot;%defmiddle;%deftest; 76 ^ 77Entity: line 1: 78<!ELEMENT root (middle) > 79^ 80""".format(dir_prefix), 81 'xlink': 82"""{0}/xlink.xml:450: element termdef: validity error : ID dt-arc already defined 83 <p><termdef id="dt-arc" term="Arc">An <ter 84 ^ 85validity error : attribute def line 199 references an unknown ID "dt-xlg" 86""".format(dir_prefix), 87} 88 89# Add prefix_dir and extension to the keys 90expect = {os.path.join(dir_prefix, key + ".xml"): val for key, val in expect.items()} 91 92def callback(ctx, str): 93 global err 94 err = err + "%s" % (str) 95libxml2.registerErrorHandler(callback, "") 96 97parsing_error_files = ["766956", "cond_sect2", "t8", "t8a", "pe-in-text-decl"] 98expect_parsing_error = [os.path.join(dir_prefix, f + ".xml") for f in parsing_error_files] 99 100valid_files = glob.glob(os.path.join(dir_prefix, "*.x*")) 101assert valid_files, "found no valid files in '{}'".format(dir_prefix) 102valid_files.sort() 103failures = 0 104for file in valid_files: 105 err = "" 106 reader = libxml2.newTextReaderFilename(file) 107 #print "%s:" % (file) 108 reader.SetParserProp(libxml2.PARSER_VALIDATE, 1) 109 ret = reader.Read() 110 while ret == 1: 111 ret = reader.Read() 112 if ret != 0 and file not in expect_parsing_error: 113 print("Error parsing and validating %s" % (file)) 114 #sys.exit(1) 115 if file in expect and err != expect[file]: 116 failures += 1 117 print("Error: ", err) 118 if file in expect: 119 print("Expected: ", expect[file]) 120 121if failures: 122 print("Failed %d tests" % failures) 123 sys.exit(1) 124 125# 126# another separate test based on Stephane Bidoul one 127# 128s = """ 129<!DOCTYPE test [ 130<!ELEMENT test (x,b)> 131<!ELEMENT x (c)> 132<!ELEMENT b (#PCDATA)> 133<!ELEMENT c (#PCDATA)> 134<!ENTITY x "<x><c>xxx</c></x>"> 135]> 136<test> 137 &x; 138 <b>bbb</b> 139</test> 140""" 141expect="""10,test 1421,test 14314,#text 1441,x 1451,c 1463,#text 14715,c 14815,x 14914,#text 1501,b 1513,#text 15215,b 15314,#text 15415,test 155""" 156res="" 157err="" 158 159input = libxml2.inputBuffer(str_io(s)) 160reader = input.newTextReader("test2") 161reader.SetParserProp(libxml2.PARSER_LOADDTD,1) 162reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) 163reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) 164reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 165while reader.Read() == 1: 166 res = res + "%s,%s\n" % (reader.NodeType(),reader.Name()) 167 168if res != expect: 169 print("test2 failed: unexpected output") 170 print(res) 171 sys.exit(1) 172if err != "": 173 print("test2 failed: validation error found") 174 print(err) 175 sys.exit(1) 176 177# 178# Another test for external entity parsing and validation 179# 180 181s = """<!DOCTYPE test [ 182<!ELEMENT test (x)> 183<!ELEMENT x (#PCDATA)> 184<!ENTITY e SYSTEM "tst.ent"> 185]> 186<test> 187 &e; 188</test> 189""" 190tst_ent = """<x>hello</x>""" 191expect="""10 test 1921 test 19314 #text 1941 x 1953 #text 19615 x 19714 #text 19815 test 199""" 200res="" 201 202def myResolver(URL, ID, ctxt): 203 if URL == "tst.ent": 204 return(str_io(tst_ent)) 205 return None 206 207libxml2.setEntityLoader(myResolver) 208 209input = libxml2.inputBuffer(str_io(s)) 210reader = input.newTextReader("test3") 211reader.SetParserProp(libxml2.PARSER_LOADDTD,1) 212reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) 213reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) 214reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 215while reader.Read() == 1: 216 res = res + "%s %s\n" % (reader.NodeType(),reader.Name()) 217 218if res != expect: 219 print("test3 failed: unexpected output") 220 print(res) 221 sys.exit(1) 222if err != "": 223 print("test3 failed: validation error found") 224 print(err) 225 sys.exit(1) 226 227# 228# Another test for recursive entity parsing, validation, and replacement of 229# entities, making sure the entity ref node doesn't show up in that case 230# 231 232s = """<!DOCTYPE test [ 233<!ELEMENT test (x, x)> 234<!ELEMENT x (y)> 235<!ELEMENT y (#PCDATA)> 236<!ENTITY x "<x>&y;</x>"> 237<!ENTITY y "<y>yyy</y>"> 238]> 239<test> 240 &x; 241 &x; 242</test>""" 243expect="""10 test 0 2441 test 0 24514 #text 1 2461 x 1 2471 y 2 2483 #text 3 24915 y 2 25015 x 1 25114 #text 1 2521 x 1 2531 y 2 2543 #text 3 25515 y 2 25615 x 1 25714 #text 1 25815 test 0 259""" 260res="" 261err="" 262 263input = libxml2.inputBuffer(str_io(s)) 264reader = input.newTextReader("test4") 265reader.SetParserProp(libxml2.PARSER_LOADDTD,1) 266reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1) 267reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1) 268reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 269while reader.Read() == 1: 270 res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth()) 271 272if res != expect: 273 print("test4 failed: unexpected output") 274 print(res) 275 sys.exit(1) 276if err != "": 277 print("test4 failed: validation error found") 278 print(err) 279 sys.exit(1) 280 281# 282# The same test but without entity substitution this time 283# 284 285s = """<!DOCTYPE test [ 286<!ELEMENT test (x, x)> 287<!ELEMENT x (y)> 288<!ELEMENT y (#PCDATA)> 289<!ENTITY x "<x>&y;</x>"> 290<!ENTITY y "<y>yyy</y>"> 291]> 292<test> 293 &x; 294 &x; 295</test>""" 296expect="""10 test 0 2971 test 0 29814 #text 1 2995 x 1 30014 #text 1 3015 x 1 30214 #text 1 30315 test 0 304""" 305res="" 306err="" 307 308input = libxml2.inputBuffer(str_io(s)) 309reader = input.newTextReader("test5") 310reader.SetParserProp(libxml2.PARSER_VALIDATE,1) 311while reader.Read() == 1: 312 res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth()) 313 314if res != expect: 315 print("test5 failed: unexpected output") 316 print(res) 317 sys.exit(1) 318if err != "": 319 print("test5 failed: validation error found") 320 print(err) 321 sys.exit(1) 322 323# 324# cleanup 325# 326del input 327del reader 328 329# Memory debug specific 330libxml2.cleanupParser() 331if libxml2.debugMemory(1) == 0: 332 print("OK") 333else: 334 print("Memory leak %d bytes" % (libxml2.debugMemory(1))) 335