xref: /aosp_15_r20/external/libxml2/python/tests/reader2.py (revision 7c5688314b92172186c154356a6374bf7684c3ca)
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# this tests the DTD validation with the XmlTextReader interface
5#
6import sys
7import glob
8import os
9import setup_test
10import libxml2
11try:
12    import StringIO
13    str_io = StringIO.StringIO
14except:
15    import io
16    str_io = io.StringIO
17
18# Memory debug specific
19libxml2.debugMemory(1)
20
21err = ""
22basedir = os.path.dirname(os.path.realpath(__file__))
23dir_prefix = os.path.realpath(os.path.join(basedir, "..", "..", "test", "valid"))
24
25# This dictionary reflects the contents of the files
26# ../../test/valid/*.xml.err that are not empty, except that
27# the file paths in the messages start with ../../test/
28
29expect = {
30    '766956':
31"""{0}/dtds/766956.dtd:2: parser error : PEReference: expecting ';'
32%ä%ent;
33   ^
34{0}/dtds/766956.dtd:2: parser error : Content error in the external subset
35%ä%ent;
36        ^
37Entity: line 1:
38value
39^
40""".format(dir_prefix),
41    '781333':
42"""{0}/781333.xml:4: element a: validity error : Element a content does not follow the DTD, expecting ( ..., got
43<a/>
44    ^
45{0}/781333.xml:5: element a: validity error : Element a content does not follow the DTD, Expecting more children
46
47^
48""".format(dir_prefix),
49    'cond_sect2':
50"""{0}/dtds/cond_sect2.dtd:15: parser error : All markup of the conditional section is not in the same entity
51    %ent;
52         ^
53Entity: line 1:
54]]>
55^
56{0}/dtds/cond_sect2.dtd:17: parser error : Content error in the external subset
57
58^
59""".format(dir_prefix),
60    'rss':
61"""{0}/rss.xml:177: element rss: validity error : Element rss does not carry attribute version
62</rss>
63      ^
64""".format(dir_prefix),
65    't8':
66"""{0}/t8.xml:6: parser error : Content error in the internal subset
67%defroot; %defmiddle; %deftest;
68         ^
69Entity: line 1:
70&lt;!ELEMENT root (middle) >
71^
72""".format(dir_prefix),
73    't8a':
74"""{0}/t8a.xml:6: parser error : Content error in the internal subset
75%defroot;%defmiddle;%deftest;
76         ^
77Entity: line 1:
78&lt;!ELEMENT root (middle) >
79^
80""".format(dir_prefix),
81    'xlink':
82"""{0}/xlink.xml:450: element termdef: validity error : ID dt-arc already defined
83	<p><termdef id="dt-arc" term="Arc">An <ter
84	                                  ^
85validity error : attribute def line 199 references an unknown ID "dt-xlg"
86""".format(dir_prefix),
87}
88
89# Add prefix_dir and extension to the keys
90expect = {os.path.join(dir_prefix, key + ".xml"): val for key, val in expect.items()}
91
92def callback(ctx, str):
93    global err
94    err = err + "%s" % (str)
95libxml2.registerErrorHandler(callback, "")
96
97parsing_error_files = ["766956", "cond_sect2", "t8", "t8a", "pe-in-text-decl"]
98expect_parsing_error = [os.path.join(dir_prefix, f + ".xml") for f in parsing_error_files]
99
100valid_files = glob.glob(os.path.join(dir_prefix, "*.x*"))
101assert valid_files, "found no valid files in '{}'".format(dir_prefix)
102valid_files.sort()
103failures = 0
104for file in valid_files:
105    err = ""
106    reader = libxml2.newTextReaderFilename(file)
107    #print "%s:" % (file)
108    reader.SetParserProp(libxml2.PARSER_VALIDATE, 1)
109    ret = reader.Read()
110    while ret == 1:
111        ret = reader.Read()
112    if ret != 0 and file not in expect_parsing_error:
113        print("Error parsing and validating %s" % (file))
114        #sys.exit(1)
115    if file in expect and err != expect[file]:
116        failures += 1
117        print("Error: ", err)
118        if file in expect:
119            print("Expected: ", expect[file])
120
121if failures:
122    print("Failed %d tests" % failures)
123    sys.exit(1)
124
125#
126# another separate test based on Stephane Bidoul one
127#
128s = """
129<!DOCTYPE test [
130<!ELEMENT test (x,b)>
131<!ELEMENT x (c)>
132<!ELEMENT b (#PCDATA)>
133<!ELEMENT c (#PCDATA)>
134<!ENTITY x "<x><c>xxx</c></x>">
135]>
136<test>
137    &x;
138    <b>bbb</b>
139</test>
140"""
141expect="""10,test
1421,test
14314,#text
1441,x
1451,c
1463,#text
14715,c
14815,x
14914,#text
1501,b
1513,#text
15215,b
15314,#text
15415,test
155"""
156res=""
157err=""
158
159input = libxml2.inputBuffer(str_io(s))
160reader = input.newTextReader("test2")
161reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
162reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
163reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
164reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
165while reader.Read() == 1:
166    res = res + "%s,%s\n" % (reader.NodeType(),reader.Name())
167
168if res != expect:
169    print("test2 failed: unexpected output")
170    print(res)
171    sys.exit(1)
172if err != "":
173    print("test2 failed: validation error found")
174    print(err)
175    sys.exit(1)
176
177#
178# Another test for external entity parsing and validation
179#
180
181s = """<!DOCTYPE test [
182<!ELEMENT test (x)>
183<!ELEMENT x (#PCDATA)>
184<!ENTITY e SYSTEM "tst.ent">
185]>
186<test>
187  &e;
188</test>
189"""
190tst_ent = """<x>hello</x>"""
191expect="""10 test
1921 test
19314 #text
1941 x
1953 #text
19615 x
19714 #text
19815 test
199"""
200res=""
201
202def myResolver(URL, ID, ctxt):
203    if URL == "tst.ent":
204        return(str_io(tst_ent))
205    return None
206
207libxml2.setEntityLoader(myResolver)
208
209input = libxml2.inputBuffer(str_io(s))
210reader = input.newTextReader("test3")
211reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
212reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
213reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
214reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
215while reader.Read() == 1:
216    res = res + "%s %s\n" % (reader.NodeType(),reader.Name())
217
218if res != expect:
219    print("test3 failed: unexpected output")
220    print(res)
221    sys.exit(1)
222if err != "":
223    print("test3 failed: validation error found")
224    print(err)
225    sys.exit(1)
226
227#
228# Another test for recursive entity parsing, validation, and replacement of
229# entities, making sure the entity ref node doesn't show up in that case
230#
231
232s = """<!DOCTYPE test [
233<!ELEMENT test (x, x)>
234<!ELEMENT x (y)>
235<!ELEMENT y (#PCDATA)>
236<!ENTITY x "<x>&y;</x>">
237<!ENTITY y "<y>yyy</y>">
238]>
239<test>
240  &x;
241  &x;
242</test>"""
243expect="""10 test 0
2441 test 0
24514 #text 1
2461 x 1
2471 y 2
2483 #text 3
24915 y 2
25015 x 1
25114 #text 1
2521 x 1
2531 y 2
2543 #text 3
25515 y 2
25615 x 1
25714 #text 1
25815 test 0
259"""
260res=""
261err=""
262
263input = libxml2.inputBuffer(str_io(s))
264reader = input.newTextReader("test4")
265reader.SetParserProp(libxml2.PARSER_LOADDTD,1)
266reader.SetParserProp(libxml2.PARSER_DEFAULTATTRS,1)
267reader.SetParserProp(libxml2.PARSER_SUBST_ENTITIES,1)
268reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
269while reader.Read() == 1:
270    res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
271
272if res != expect:
273    print("test4 failed: unexpected output")
274    print(res)
275    sys.exit(1)
276if err != "":
277    print("test4 failed: validation error found")
278    print(err)
279    sys.exit(1)
280
281#
282# The same test but without entity substitution this time
283#
284
285s = """<!DOCTYPE test [
286<!ELEMENT test (x, x)>
287<!ELEMENT x (y)>
288<!ELEMENT y (#PCDATA)>
289<!ENTITY x "<x>&y;</x>">
290<!ENTITY y "<y>yyy</y>">
291]>
292<test>
293  &x;
294  &x;
295</test>"""
296expect="""10 test 0
2971 test 0
29814 #text 1
2995 x 1
30014 #text 1
3015 x 1
30214 #text 1
30315 test 0
304"""
305res=""
306err=""
307
308input = libxml2.inputBuffer(str_io(s))
309reader = input.newTextReader("test5")
310reader.SetParserProp(libxml2.PARSER_VALIDATE,1)
311while reader.Read() == 1:
312    res = res + "%s %s %d\n" % (reader.NodeType(),reader.Name(),reader.Depth())
313
314if res != expect:
315    print("test5 failed: unexpected output")
316    print(res)
317    sys.exit(1)
318if err != "":
319    print("test5 failed: validation error found")
320    print(err)
321    sys.exit(1)
322
323#
324# cleanup
325#
326del input
327del reader
328
329# Memory debug specific
330libxml2.cleanupParser()
331if libxml2.debugMemory(1) == 0:
332    print("OK")
333else:
334    print("Memory leak %d bytes" % (libxml2.debugMemory(1)))
335