1import sys
2import unicodedata
3import unittest
4import urllib.parse
5
6RFC1808_BASE = "http://a/b/c/d;p?q#f"
7RFC2396_BASE = "http://a/b/c/d;p?q"
8RFC3986_BASE = 'http://a/b/c/d;p?q'
9SIMPLE_BASE  = 'http://a/b/c/d'
10
11# Each parse_qsl testcase is a two-tuple that contains
12# a string with the query and a list with the expected result.
13
14parse_qsl_test_cases = [
15    ("", []),
16    ("&", []),
17    ("&&", []),
18    ("=", [('', '')]),
19    ("=a", [('', 'a')]),
20    ("a", [('a', '')]),
21    ("a=", [('a', '')]),
22    ("&a=b", [('a', 'b')]),
23    ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
24    ("a=1&a=2", [('a', '1'), ('a', '2')]),
25    (b"", []),
26    (b"&", []),
27    (b"&&", []),
28    (b"=", [(b'', b'')]),
29    (b"=a", [(b'', b'a')]),
30    (b"a", [(b'a', b'')]),
31    (b"a=", [(b'a', b'')]),
32    (b"&a=b", [(b'a', b'b')]),
33    (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
34    (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
35    (";a=b", [(';a', 'b')]),
36    ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
37    (b";a=b", [(b';a', b'b')]),
38    (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
39]
40
41# Each parse_qs testcase is a two-tuple that contains
42# a string with the query and a dictionary with the expected result.
43
44parse_qs_test_cases = [
45    ("", {}),
46    ("&", {}),
47    ("&&", {}),
48    ("=", {'': ['']}),
49    ("=a", {'': ['a']}),
50    ("a", {'a': ['']}),
51    ("a=", {'a': ['']}),
52    ("&a=b", {'a': ['b']}),
53    ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
54    ("a=1&a=2", {'a': ['1', '2']}),
55    (b"", {}),
56    (b"&", {}),
57    (b"&&", {}),
58    (b"=", {b'': [b'']}),
59    (b"=a", {b'': [b'a']}),
60    (b"a", {b'a': [b'']}),
61    (b"a=", {b'a': [b'']}),
62    (b"&a=b", {b'a': [b'b']}),
63    (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
64    (b"a=1&a=2", {b'a': [b'1', b'2']}),
65    (";a=b", {';a': ['b']}),
66    ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
67    (b";a=b", {b';a': [b'b']}),
68    (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
69]
70
71class UrlParseTestCase(unittest.TestCase):
72
73    def checkRoundtrips(self, url, parsed, split):
74        result = urllib.parse.urlparse(url)
75        self.assertEqual(result, parsed)
76        t = (result.scheme, result.netloc, result.path,
77             result.params, result.query, result.fragment)
78        self.assertEqual(t, parsed)
79        # put it back together and it should be the same
80        result2 = urllib.parse.urlunparse(result)
81        self.assertEqual(result2, url)
82        self.assertEqual(result2, result.geturl())
83
84        # the result of geturl() is a fixpoint; we can always parse it
85        # again to get the same result:
86        result3 = urllib.parse.urlparse(result.geturl())
87        self.assertEqual(result3.geturl(), result.geturl())
88        self.assertEqual(result3,          result)
89        self.assertEqual(result3.scheme,   result.scheme)
90        self.assertEqual(result3.netloc,   result.netloc)
91        self.assertEqual(result3.path,     result.path)
92        self.assertEqual(result3.params,   result.params)
93        self.assertEqual(result3.query,    result.query)
94        self.assertEqual(result3.fragment, result.fragment)
95        self.assertEqual(result3.username, result.username)
96        self.assertEqual(result3.password, result.password)
97        self.assertEqual(result3.hostname, result.hostname)
98        self.assertEqual(result3.port,     result.port)
99
100        # check the roundtrip using urlsplit() as well
101        result = urllib.parse.urlsplit(url)
102        self.assertEqual(result, split)
103        t = (result.scheme, result.netloc, result.path,
104             result.query, result.fragment)
105        self.assertEqual(t, split)
106        result2 = urllib.parse.urlunsplit(result)
107        self.assertEqual(result2, url)
108        self.assertEqual(result2, result.geturl())
109
110        # check the fixpoint property of re-parsing the result of geturl()
111        result3 = urllib.parse.urlsplit(result.geturl())
112        self.assertEqual(result3.geturl(), result.geturl())
113        self.assertEqual(result3,          result)
114        self.assertEqual(result3.scheme,   result.scheme)
115        self.assertEqual(result3.netloc,   result.netloc)
116        self.assertEqual(result3.path,     result.path)
117        self.assertEqual(result3.query,    result.query)
118        self.assertEqual(result3.fragment, result.fragment)
119        self.assertEqual(result3.username, result.username)
120        self.assertEqual(result3.password, result.password)
121        self.assertEqual(result3.hostname, result.hostname)
122        self.assertEqual(result3.port,     result.port)
123
124    def test_qsl(self):
125        for orig, expect in parse_qsl_test_cases:
126            result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
127            self.assertEqual(result, expect, "Error parsing %r" % orig)
128            expect_without_blanks = [v for v in expect if len(v[1])]
129            result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
130            self.assertEqual(result, expect_without_blanks,
131                            "Error parsing %r" % orig)
132
133    def test_qs(self):
134        for orig, expect in parse_qs_test_cases:
135            result = urllib.parse.parse_qs(orig, keep_blank_values=True)
136            self.assertEqual(result, expect, "Error parsing %r" % orig)
137            expect_without_blanks = {v: expect[v]
138                                     for v in expect if len(expect[v][0])}
139            result = urllib.parse.parse_qs(orig, keep_blank_values=False)
140            self.assertEqual(result, expect_without_blanks,
141                            "Error parsing %r" % orig)
142
143    def test_roundtrips(self):
144        str_cases = [
145            ('file:///tmp/junk.txt',
146             ('file', '', '/tmp/junk.txt', '', '', ''),
147             ('file', '', '/tmp/junk.txt', '', '')),
148            ('imap://mail.python.org/mbox1',
149             ('imap', 'mail.python.org', '/mbox1', '', '', ''),
150             ('imap', 'mail.python.org', '/mbox1', '', '')),
151            ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
152             ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
153              '', '', ''),
154             ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
155              '', '')),
156            ('nfs://server/path/to/file.txt',
157             ('nfs', 'server', '/path/to/file.txt', '', '', ''),
158             ('nfs', 'server', '/path/to/file.txt', '', '')),
159            ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
160             ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
161              '', '', ''),
162             ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
163              '', '')),
164            ('git+ssh://[email protected]/user/project.git',
165            ('git+ssh', '[email protected]','/user/project.git',
166             '','',''),
167            ('git+ssh', '[email protected]','/user/project.git',
168             '', '')),
169            ]
170        def _encode(t):
171            return (t[0].encode('ascii'),
172                    tuple(x.encode('ascii') for x in t[1]),
173                    tuple(x.encode('ascii') for x in t[2]))
174        bytes_cases = [_encode(x) for x in str_cases]
175        for url, parsed, split in str_cases + bytes_cases:
176            self.checkRoundtrips(url, parsed, split)
177
178    def test_http_roundtrips(self):
179        # urllib.parse.urlsplit treats 'http:' as an optimized special case,
180        # so we test both 'http:' and 'https:' in all the following.
181        # Three cheers for white box knowledge!
182        str_cases = [
183            ('://www.python.org',
184             ('www.python.org', '', '', '', ''),
185             ('www.python.org', '', '', '')),
186            ('://www.python.org#abc',
187             ('www.python.org', '', '', '', 'abc'),
188             ('www.python.org', '', '', 'abc')),
189            ('://www.python.org?q=abc',
190             ('www.python.org', '', '', 'q=abc', ''),
191             ('www.python.org', '', 'q=abc', '')),
192            ('://www.python.org/#abc',
193             ('www.python.org', '/', '', '', 'abc'),
194             ('www.python.org', '/', '', 'abc')),
195            ('://a/b/c/d;p?q#f',
196             ('a', '/b/c/d', 'p', 'q', 'f'),
197             ('a', '/b/c/d;p', 'q', 'f')),
198            ]
199        def _encode(t):
200            return (t[0].encode('ascii'),
201                    tuple(x.encode('ascii') for x in t[1]),
202                    tuple(x.encode('ascii') for x in t[2]))
203        bytes_cases = [_encode(x) for x in str_cases]
204        str_schemes = ('http', 'https')
205        bytes_schemes = (b'http', b'https')
206        str_tests = str_schemes, str_cases
207        bytes_tests = bytes_schemes, bytes_cases
208        for schemes, test_cases in (str_tests, bytes_tests):
209            for scheme in schemes:
210                for url, parsed, split in test_cases:
211                    url = scheme + url
212                    parsed = (scheme,) + parsed
213                    split = (scheme,) + split
214                    self.checkRoundtrips(url, parsed, split)
215
216    def checkJoin(self, base, relurl, expected):
217        str_components = (base, relurl, expected)
218        self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
219        bytes_components = baseb, relurlb, expectedb = [
220                            x.encode('ascii') for x in str_components]
221        self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
222
223    def test_unparse_parse(self):
224        str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
225        bytes_cases = [x.encode('ascii') for x in str_cases]
226        for u in str_cases + bytes_cases:
227            self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
228            self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
229
230    def test_RFC1808(self):
231        # "normal" cases from RFC 1808:
232        self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
233        self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
234        self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
235        self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
236        self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
237        self.checkJoin(RFC1808_BASE, '//g', 'http://g')
238        self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
239        self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
240        self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
241        self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
242        self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
243        self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
244        self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
245        self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
246        self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
247        self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
248        self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
249        self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
250        self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
251        self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
252        self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
253        self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
254
255        # "abnormal" cases from RFC 1808:
256        self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
257        self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
258        self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
259        self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
260        self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
261        self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
262        self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
263        self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
264        self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
265
266        # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
267        # so we'll not actually run these tests (which expect 1808 behavior).
268        #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
269        #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
270
271        # XXX: The following tests are no longer compatible with RFC3986
272        # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
273        # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
274        # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
275        # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
276
277
278    def test_RFC2368(self):
279        # Issue 11467: path that starts with a number is not parsed correctly
280        self.assertEqual(urllib.parse.urlparse('mailto:[email protected]'),
281                ('mailto', '', '[email protected]', '', '', ''))
282
283    def test_RFC2396(self):
284        # cases from RFC 2396
285
286        self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
287        self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
288        self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
289        self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
290        self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
291        self.checkJoin(RFC2396_BASE, '//g', 'http://g')
292        self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
293        self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
294        self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
295        self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
296        self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
297        self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
298        self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
299        self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
300        self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
301        self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
302        self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
303        self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
304        self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
305        self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
306        self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
307        self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
308        self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
309        self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
310        self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
311        self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
312        self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
313        self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
314        self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
315        self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
316        self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
317        self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
318        self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
319        self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
320        self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
321
322        # XXX: The following tests are no longer compatible with RFC3986
323        # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
324        # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
325        # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
326        # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
327
328    def test_RFC3986(self):
329        self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
330        self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
331        self.checkJoin(RFC3986_BASE, 'g:h','g:h')
332        self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
333        self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
334        self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
335        self.checkJoin(RFC3986_BASE, '/g','http://a/g')
336        self.checkJoin(RFC3986_BASE, '//g','http://g')
337        self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
338        self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
339        self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
340        self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
341        self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
342        self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
343        self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
344        self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
345        self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
346        self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
347        self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
348        self.checkJoin(RFC3986_BASE, '..','http://a/b/')
349        self.checkJoin(RFC3986_BASE, '../','http://a/b/')
350        self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
351        self.checkJoin(RFC3986_BASE, '../..','http://a/')
352        self.checkJoin(RFC3986_BASE, '../../','http://a/')
353        self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
354        self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
355
356        # Abnormal Examples
357
358        # The 'abnormal scenarios' are incompatible with RFC2986 parsing
359        # Tests are here for reference.
360
361        self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
362        self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
363        self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
364        self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
365        self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
366        self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
367        self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
368        self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
369        self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
370        self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
371        self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
372        self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
373        self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
374        self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
375        self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
376        self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
377        self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
378        self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
379        #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
380        self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
381
382        # Test for issue9721
383        self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
384
385    def test_urljoins(self):
386        self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
387        self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
388        self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
389        self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
390        self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
391        self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
392        self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
393        self.checkJoin(SIMPLE_BASE, '//g','http://g')
394        self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
395        self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
396        self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
397        self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
398        self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
399        self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
400        self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
401        self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
402        self.checkJoin(SIMPLE_BASE, '../..','http://a/')
403        self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
404        self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
405        self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
406        self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
407        self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
408        self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
409        self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
410        self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
411        self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
412        self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
413        self.checkJoin('http:///', '..','http:///')
414        self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
415        self.checkJoin('', 'http://a/./g', 'http://a/./g')
416        self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
417        self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
418        self.checkJoin('ws://a/b','g','ws://a/g')
419        self.checkJoin('wss://a/b','g','wss://a/g')
420
421        # XXX: The following tests are no longer compatible with RFC3986
422        # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
423        # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
424
425        # test for issue22118 duplicate slashes
426        self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
427
428        # Non-RFC-defined tests, covering variations of base and trailing
429        # slashes
430        self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
431        self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
432        self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
433        self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
434        self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
435        self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
436
437        # issue 23703: don't duplicate filename
438        self.checkJoin('a', 'b', 'b')
439
440    def test_RFC2732(self):
441        str_cases = [
442            ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
443            ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
444            ('http://[::1]:5432/foo/', '::1', 5432),
445            ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
446            ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
447            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
448             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
449            ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
450            ('http://[::ffff:12.34.56.78]:5432/foo/',
451             '::ffff:12.34.56.78', 5432),
452            ('http://Test.python.org/foo/', 'test.python.org', None),
453            ('http://12.34.56.78/foo/', '12.34.56.78', None),
454            ('http://[::1]/foo/', '::1', None),
455            ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
456            ('http://[dead:beef::]/foo/', 'dead:beef::', None),
457            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
458             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
459            ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
460            ('http://[::ffff:12.34.56.78]/foo/',
461             '::ffff:12.34.56.78', None),
462            ('http://Test.python.org:/foo/', 'test.python.org', None),
463            ('http://12.34.56.78:/foo/', '12.34.56.78', None),
464            ('http://[::1]:/foo/', '::1', None),
465            ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
466            ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
467            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
468             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
469            ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
470            ('http://[::ffff:12.34.56.78]:/foo/',
471             '::ffff:12.34.56.78', None),
472            ]
473        def _encode(t):
474            return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
475        bytes_cases = [_encode(x) for x in str_cases]
476        for url, hostname, port in str_cases + bytes_cases:
477            urlparsed = urllib.parse.urlparse(url)
478            self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
479
480        str_cases = [
481                'http://::12.34.56.78]/',
482                'http://[::1/foo/',
483                'ftp://[::1/foo/bad]/bad',
484                'http://[::1/foo/bad]/bad',
485                'http://[::ffff:12.34.56.78']
486        bytes_cases = [x.encode('ascii') for x in str_cases]
487        for invalid_url in str_cases + bytes_cases:
488            self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
489
490    def test_urldefrag(self):
491        str_cases = [
492            ('http://python.org#frag', 'http://python.org', 'frag'),
493            ('http://python.org', 'http://python.org', ''),
494            ('http://python.org/#frag', 'http://python.org/', 'frag'),
495            ('http://python.org/', 'http://python.org/', ''),
496            ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
497            ('http://python.org/?q', 'http://python.org/?q', ''),
498            ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
499            ('http://python.org/p?q', 'http://python.org/p?q', ''),
500            (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
501            (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
502        ]
503        def _encode(t):
504            return type(t)(x.encode('ascii') for x in t)
505        bytes_cases = [_encode(x) for x in str_cases]
506        for url, defrag, frag in str_cases + bytes_cases:
507            result = urllib.parse.urldefrag(url)
508            self.assertEqual(result.geturl(), url)
509            self.assertEqual(result, (defrag, frag))
510            self.assertEqual(result.url, defrag)
511            self.assertEqual(result.fragment, frag)
512
513    def test_urlsplit_scoped_IPv6(self):
514        p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
515        self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt")
516        self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234')
517
518        p = urllib.parse.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
519        self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt")
520        self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')
521
522    def test_urlsplit_attributes(self):
523        url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
524        p = urllib.parse.urlsplit(url)
525        self.assertEqual(p.scheme, "http")
526        self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
527        self.assertEqual(p.path, "/doc/")
528        self.assertEqual(p.query, "")
529        self.assertEqual(p.fragment, "frag")
530        self.assertEqual(p.username, None)
531        self.assertEqual(p.password, None)
532        self.assertEqual(p.hostname, "www.python.org")
533        self.assertEqual(p.port, None)
534        # geturl() won't return exactly the original URL in this case
535        # since the scheme is always case-normalized
536        # We handle this by ignoring the first 4 characters of the URL
537        self.assertEqual(p.geturl()[4:], url[4:])
538
539        url = "http://User:[email protected]:080/doc/?query=yes#frag"
540        p = urllib.parse.urlsplit(url)
541        self.assertEqual(p.scheme, "http")
542        self.assertEqual(p.netloc, "User:[email protected]:080")
543        self.assertEqual(p.path, "/doc/")
544        self.assertEqual(p.query, "query=yes")
545        self.assertEqual(p.fragment, "frag")
546        self.assertEqual(p.username, "User")
547        self.assertEqual(p.password, "Pass")
548        self.assertEqual(p.hostname, "www.python.org")
549        self.assertEqual(p.port, 80)
550        self.assertEqual(p.geturl(), url)
551
552        # Addressing issue1698, which suggests Username can contain
553        # "@" characters.  Though not RFC compliant, many ftp sites allow
554        # and request email addresses as usernames.
555
556        url = "http://[email protected]:[email protected]:080/doc/?query=yes#frag"
557        p = urllib.parse.urlsplit(url)
558        self.assertEqual(p.scheme, "http")
559        self.assertEqual(p.netloc, "[email protected]:[email protected]:080")
560        self.assertEqual(p.path, "/doc/")
561        self.assertEqual(p.query, "query=yes")
562        self.assertEqual(p.fragment, "frag")
563        self.assertEqual(p.username, "[email protected]")
564        self.assertEqual(p.password, "Pass")
565        self.assertEqual(p.hostname, "www.python.org")
566        self.assertEqual(p.port, 80)
567        self.assertEqual(p.geturl(), url)
568
569        # And check them all again, only with bytes this time
570        url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
571        p = urllib.parse.urlsplit(url)
572        self.assertEqual(p.scheme, b"http")
573        self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
574        self.assertEqual(p.path, b"/doc/")
575        self.assertEqual(p.query, b"")
576        self.assertEqual(p.fragment, b"frag")
577        self.assertEqual(p.username, None)
578        self.assertEqual(p.password, None)
579        self.assertEqual(p.hostname, b"www.python.org")
580        self.assertEqual(p.port, None)
581        self.assertEqual(p.geturl()[4:], url[4:])
582
583        url = b"http://User:[email protected]:080/doc/?query=yes#frag"
584        p = urllib.parse.urlsplit(url)
585        self.assertEqual(p.scheme, b"http")
586        self.assertEqual(p.netloc, b"User:[email protected]:080")
587        self.assertEqual(p.path, b"/doc/")
588        self.assertEqual(p.query, b"query=yes")
589        self.assertEqual(p.fragment, b"frag")
590        self.assertEqual(p.username, b"User")
591        self.assertEqual(p.password, b"Pass")
592        self.assertEqual(p.hostname, b"www.python.org")
593        self.assertEqual(p.port, 80)
594        self.assertEqual(p.geturl(), url)
595
596        url = b"http://[email protected]:[email protected]:080/doc/?query=yes#frag"
597        p = urllib.parse.urlsplit(url)
598        self.assertEqual(p.scheme, b"http")
599        self.assertEqual(p.netloc, b"[email protected]:[email protected]:080")
600        self.assertEqual(p.path, b"/doc/")
601        self.assertEqual(p.query, b"query=yes")
602        self.assertEqual(p.fragment, b"frag")
603        self.assertEqual(p.username, b"[email protected]")
604        self.assertEqual(p.password, b"Pass")
605        self.assertEqual(p.hostname, b"www.python.org")
606        self.assertEqual(p.port, 80)
607        self.assertEqual(p.geturl(), url)
608
609        # Verify an illegal port raises ValueError
610        url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
611        p = urllib.parse.urlsplit(url)
612        with self.assertRaisesRegex(ValueError, "out of range"):
613            p.port
614
615    def test_urlsplit_remove_unsafe_bytes(self):
616        # Remove ASCII tabs and newlines from input
617        url = "http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
618        p = urllib.parse.urlsplit(url)
619        self.assertEqual(p.scheme, "http")
620        self.assertEqual(p.netloc, "www.python.org")
621        self.assertEqual(p.path, "/javascript:alert('msg')/")
622        self.assertEqual(p.query, "query=something")
623        self.assertEqual(p.fragment, "fragment")
624        self.assertEqual(p.username, None)
625        self.assertEqual(p.password, None)
626        self.assertEqual(p.hostname, "www.python.org")
627        self.assertEqual(p.port, None)
628        self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
629
630        # Remove ASCII tabs and newlines from input as bytes.
631        url = b"http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
632        p = urllib.parse.urlsplit(url)
633        self.assertEqual(p.scheme, b"http")
634        self.assertEqual(p.netloc, b"www.python.org")
635        self.assertEqual(p.path, b"/javascript:alert('msg')/")
636        self.assertEqual(p.query, b"query=something")
637        self.assertEqual(p.fragment, b"fragment")
638        self.assertEqual(p.username, None)
639        self.assertEqual(p.password, None)
640        self.assertEqual(p.hostname, b"www.python.org")
641        self.assertEqual(p.port, None)
642        self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")
643
644        # with scheme as cache-key
645        url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
646        scheme = "ht\ntp"
647        for _ in range(2):
648            p = urllib.parse.urlsplit(url, scheme=scheme)
649            self.assertEqual(p.scheme, "http")
650            self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
651
652    def test_urlsplit_strip_url(self):
653        noise = bytes(range(0, 0x20 + 1))
654        base_url = "http://User:[email protected]:080/doc/?query=yes#frag"
655
656        url = noise.decode("utf-8") + base_url
657        p = urllib.parse.urlsplit(url)
658        self.assertEqual(p.scheme, "http")
659        self.assertEqual(p.netloc, "User:[email protected]:080")
660        self.assertEqual(p.path, "/doc/")
661        self.assertEqual(p.query, "query=yes")
662        self.assertEqual(p.fragment, "frag")
663        self.assertEqual(p.username, "User")
664        self.assertEqual(p.password, "Pass")
665        self.assertEqual(p.hostname, "www.python.org")
666        self.assertEqual(p.port, 80)
667        self.assertEqual(p.geturl(), base_url)
668
669        url = noise + base_url.encode("utf-8")
670        p = urllib.parse.urlsplit(url)
671        self.assertEqual(p.scheme, b"http")
672        self.assertEqual(p.netloc, b"User:[email protected]:080")
673        self.assertEqual(p.path, b"/doc/")
674        self.assertEqual(p.query, b"query=yes")
675        self.assertEqual(p.fragment, b"frag")
676        self.assertEqual(p.username, b"User")
677        self.assertEqual(p.password, b"Pass")
678        self.assertEqual(p.hostname, b"www.python.org")
679        self.assertEqual(p.port, 80)
680        self.assertEqual(p.geturl(), base_url.encode("utf-8"))
681
682        # Test that trailing space is preserved as some applications rely on
683        # this within query strings.
684        query_spaces_url = "https://www.python.org:88/doc/?query=    "
685        p = urllib.parse.urlsplit(noise.decode("utf-8") + query_spaces_url)
686        self.assertEqual(p.scheme, "https")
687        self.assertEqual(p.netloc, "www.python.org:88")
688        self.assertEqual(p.path, "/doc/")
689        self.assertEqual(p.query, "query=    ")
690        self.assertEqual(p.port, 88)
691        self.assertEqual(p.geturl(), query_spaces_url)
692
693        p = urllib.parse.urlsplit("www.pypi.org ")
694        # That "hostname" gets considered a "path" due to the
695        # trailing space and our existing logic...  YUCK...
696        # and re-assembles via geturl aka unurlsplit into the original.
697        # django.core.validators.URLValidator (at least through v3.2) relies on
698        # this, for better or worse, to catch it in a ValidationError via its
699        # regular expressions.
700        # Here we test the basic round trip concept of such a trailing space.
701        self.assertEqual(urllib.parse.urlunsplit(p), "www.pypi.org ")
702
703        # with scheme as cache-key
704        url = "//www.python.org/"
705        scheme = noise.decode("utf-8") + "https" + noise.decode("utf-8")
706        for _ in range(2):
707            p = urllib.parse.urlsplit(url, scheme=scheme)
708            self.assertEqual(p.scheme, "https")
709            self.assertEqual(p.geturl(), "https://www.python.org/")
710
711    def test_attributes_bad_port(self):
712        """Check handling of invalid ports."""
713        for bytes in (False, True):
714            for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
715                for port in ("foo", "1.5", "-1", "0x10", "-0", "1_1", " 1", "1 ", "६"):
716                    with self.subTest(bytes=bytes, parse=parse, port=port):
717                        netloc = "www.example.net:" + port
718                        url = "http://" + netloc + "/"
719                        if bytes:
720                            if netloc.isascii() and port.isascii():
721                                netloc = netloc.encode("ascii")
722                                url = url.encode("ascii")
723                            else:
724                                continue
725                        p = parse(url)
726                        self.assertEqual(p.netloc, netloc)
727                        with self.assertRaises(ValueError):
728                            p.port
729
730    def test_attributes_bad_scheme(self):
731        """Check handling of invalid schemes."""
732        for bytes in (False, True):
733            for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
734                for scheme in (".", "+", "-", "0", "http&", "६http"):
735                    with self.subTest(bytes=bytes, parse=parse, scheme=scheme):
736                        url = scheme + "://www.example.net"
737                        if bytes:
738                            if url.isascii():
739                                url = url.encode("ascii")
740                            else:
741                                continue
742                        p = parse(url)
743                        if bytes:
744                            self.assertEqual(p.scheme, b"")
745                        else:
746                            self.assertEqual(p.scheme, "")
747
748    def test_attributes_without_netloc(self):
749        # This example is straight from RFC 3261.  It looks like it
750        # should allow the username, hostname, and port to be filled
751        # in, but doesn't.  Since it's a URI and doesn't use the
752        # scheme://netloc syntax, the netloc and related attributes
753        # should be left empty.
754        uri = "sip:[email protected];maddr=239.255.255.1;ttl=15"
755        p = urllib.parse.urlsplit(uri)
756        self.assertEqual(p.netloc, "")
757        self.assertEqual(p.username, None)
758        self.assertEqual(p.password, None)
759        self.assertEqual(p.hostname, None)
760        self.assertEqual(p.port, None)
761        self.assertEqual(p.geturl(), uri)
762
763        p = urllib.parse.urlparse(uri)
764        self.assertEqual(p.netloc, "")
765        self.assertEqual(p.username, None)
766        self.assertEqual(p.password, None)
767        self.assertEqual(p.hostname, None)
768        self.assertEqual(p.port, None)
769        self.assertEqual(p.geturl(), uri)
770
771        # You guessed it, repeating the test with bytes input
772        uri = b"sip:[email protected];maddr=239.255.255.1;ttl=15"
773        p = urllib.parse.urlsplit(uri)
774        self.assertEqual(p.netloc, b"")
775        self.assertEqual(p.username, None)
776        self.assertEqual(p.password, None)
777        self.assertEqual(p.hostname, None)
778        self.assertEqual(p.port, None)
779        self.assertEqual(p.geturl(), uri)
780
781        p = urllib.parse.urlparse(uri)
782        self.assertEqual(p.netloc, b"")
783        self.assertEqual(p.username, None)
784        self.assertEqual(p.password, None)
785        self.assertEqual(p.hostname, None)
786        self.assertEqual(p.port, None)
787        self.assertEqual(p.geturl(), uri)
788
789    def test_noslash(self):
790        # Issue 1637: http://foo.com?query is legal
791        self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
792                         ('http', 'example.com', '', '', 'blahblah=/foo', ''))
793        self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
794                         (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
795
796    def test_withoutscheme(self):
797        # Test urlparse without scheme
798        # Issue 754016: urlparse goes wrong with IP:port without scheme
799        # RFC 1808 specifies that netloc should start with //, urlparse expects
800        # the same, otherwise it classifies the portion of url as path.
801        self.assertEqual(urllib.parse.urlparse("path"),
802                ('','','path','','',''))
803        self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
804                ('','www.python.org:80','','','',''))
805        self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
806                ('http','www.python.org:80','','','',''))
807        # Repeat for bytes input
808        self.assertEqual(urllib.parse.urlparse(b"path"),
809                (b'',b'',b'path',b'',b'',b''))
810        self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
811                (b'',b'www.python.org:80',b'',b'',b'',b''))
812        self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
813                (b'http',b'www.python.org:80',b'',b'',b'',b''))
814
815    def test_portseparator(self):
816        # Issue 754016 makes changes for port separator ':' from scheme separator
817        self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
818        self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
819        self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
820        self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
821        self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
822        self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
823                ('http','www.python.org:80','','','',''))
824        # As usual, need to check bytes input as well
825        self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
826        self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
827        self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
828        self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
829        self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
830        self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
831                (b'http',b'www.python.org:80',b'',b'',b'',b''))
832
833    def test_usingsys(self):
834        # Issue 3314: sys module is used in the error
835        self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
836
837    def test_anyscheme(self):
838        # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
839        self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
840                         ('s3', 'foo.com', '/stuff', '', '', ''))
841        self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
842                         ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
843        self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
844                         ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
845        self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
846                         ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
847
848        # And for bytes...
849        self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
850                         (b's3', b'foo.com', b'/stuff', b'', b'', b''))
851        self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
852                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
853        self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
854                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
855        self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
856                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
857
858    def test_default_scheme(self):
859        # Exercise the scheme parameter of urlparse() and urlsplit()
860        for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
861            with self.subTest(function=func):
862                result = func("http://example.net/", "ftp")
863                self.assertEqual(result.scheme, "http")
864                result = func(b"http://example.net/", b"ftp")
865                self.assertEqual(result.scheme, b"http")
866                self.assertEqual(func("path", "ftp").scheme, "ftp")
867                self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
868                self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
869                self.assertEqual(func("path").scheme, "")
870                self.assertEqual(func(b"path").scheme, b"")
871                self.assertEqual(func(b"path", "").scheme, b"")
872
873    def test_parse_fragments(self):
874        # Exercise the allow_fragments parameter of urlparse() and urlsplit()
875        tests = (
876            ("http:#frag", "path", "frag"),
877            ("//example.net#frag", "path", "frag"),
878            ("index.html#frag", "path", "frag"),
879            (";a=b#frag", "params", "frag"),
880            ("?a=b#frag", "query", "frag"),
881            ("#frag", "path", "frag"),
882            ("abc#@frag", "path", "@frag"),
883            ("//abc#@frag", "path", "@frag"),
884            ("//abc:80#@frag", "path", "@frag"),
885            ("//abc#@frag:80", "path", "@frag:80"),
886        )
887        for url, attr, expected_frag in tests:
888            for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
889                if attr == "params" and func is urllib.parse.urlsplit:
890                    attr = "path"
891                with self.subTest(url=url, function=func):
892                    result = func(url, allow_fragments=False)
893                    self.assertEqual(result.fragment, "")
894                    self.assertTrue(
895                            getattr(result, attr).endswith("#" + expected_frag))
896                    self.assertEqual(func(url, "", False).fragment, "")
897
898                    result = func(url, allow_fragments=True)
899                    self.assertEqual(result.fragment, expected_frag)
900                    self.assertFalse(
901                            getattr(result, attr).endswith(expected_frag))
902                    self.assertEqual(func(url, "", True).fragment,
903                                     expected_frag)
904                    self.assertEqual(func(url).fragment, expected_frag)
905
906    def test_mixed_types_rejected(self):
907        # Several functions that process either strings or ASCII encoded bytes
908        # accept multiple arguments. Check they reject mixed type input
909        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
910            urllib.parse.urlparse("www.python.org", b"http")
911        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
912            urllib.parse.urlparse(b"www.python.org", "http")
913        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
914            urllib.parse.urlsplit("www.python.org", b"http")
915        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
916            urllib.parse.urlsplit(b"www.python.org", "http")
917        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
918            urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
919        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
920            urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
921        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
922            urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
923        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
924            urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
925        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
926            urllib.parse.urljoin("http://python.org", b"http://python.org")
927        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
928            urllib.parse.urljoin(b"http://python.org", "http://python.org")
929
930    def _check_result_type(self, str_type):
931        num_args = len(str_type._fields)
932        bytes_type = str_type._encoded_counterpart
933        self.assertIs(bytes_type._decoded_counterpart, str_type)
934        str_args = ('',) * num_args
935        bytes_args = (b'',) * num_args
936        str_result = str_type(*str_args)
937        bytes_result = bytes_type(*bytes_args)
938        encoding = 'ascii'
939        errors = 'strict'
940        self.assertEqual(str_result, str_args)
941        self.assertEqual(bytes_result.decode(), str_args)
942        self.assertEqual(bytes_result.decode(), str_result)
943        self.assertEqual(bytes_result.decode(encoding), str_args)
944        self.assertEqual(bytes_result.decode(encoding), str_result)
945        self.assertEqual(bytes_result.decode(encoding, errors), str_args)
946        self.assertEqual(bytes_result.decode(encoding, errors), str_result)
947        self.assertEqual(bytes_result, bytes_args)
948        self.assertEqual(str_result.encode(), bytes_args)
949        self.assertEqual(str_result.encode(), bytes_result)
950        self.assertEqual(str_result.encode(encoding), bytes_args)
951        self.assertEqual(str_result.encode(encoding), bytes_result)
952        self.assertEqual(str_result.encode(encoding, errors), bytes_args)
953        self.assertEqual(str_result.encode(encoding, errors), bytes_result)
954
955    def test_result_pairs(self):
956        # Check encoding and decoding between result pairs
957        result_types = [
958          urllib.parse.DefragResult,
959          urllib.parse.SplitResult,
960          urllib.parse.ParseResult,
961        ]
962        for result_type in result_types:
963            self._check_result_type(result_type)
964
965    def test_parse_qs_encoding(self):
966        result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
967        self.assertEqual(result, {'key': ['\u0141\xE9']})
968        result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
969        self.assertEqual(result, {'key': ['\u0141\xE9']})
970        result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
971        self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
972        result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
973        self.assertEqual(result, {'key': ['\u0141\ufffd-']})
974        result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
975                                                          errors="ignore")
976        self.assertEqual(result, {'key': ['\u0141-']})
977
978    def test_parse_qsl_encoding(self):
979        result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
980        self.assertEqual(result, [('key', '\u0141\xE9')])
981        result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
982        self.assertEqual(result, [('key', '\u0141\xE9')])
983        result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
984        self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
985        result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
986        self.assertEqual(result, [('key', '\u0141\ufffd-')])
987        result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
988                                                          errors="ignore")
989        self.assertEqual(result, [('key', '\u0141-')])
990
991    def test_parse_qsl_max_num_fields(self):
992        with self.assertRaises(ValueError):
993            urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
994        urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
995
996    def test_parse_qs_separator(self):
997        parse_qs_semicolon_cases = [
998            (";", {}),
999            (";;", {}),
1000            (";a=b", {'a': ['b']}),
1001            ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
1002            ("a=1;a=2", {'a': ['1', '2']}),
1003            (b";", {}),
1004            (b";;", {}),
1005            (b";a=b", {b'a': [b'b']}),
1006            (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
1007            (b"a=1;a=2", {b'a': [b'1', b'2']}),
1008        ]
1009        for orig, expect in parse_qs_semicolon_cases:
1010            with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
1011                result = urllib.parse.parse_qs(orig, separator=';')
1012                self.assertEqual(result, expect, "Error parsing %r" % orig)
1013                result_bytes = urllib.parse.parse_qs(orig, separator=b';')
1014                self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
1015
1016
1017    def test_parse_qsl_separator(self):
1018        parse_qsl_semicolon_cases = [
1019            (";", []),
1020            (";;", []),
1021            (";a=b", [('a', 'b')]),
1022            ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
1023            ("a=1;a=2", [('a', '1'), ('a', '2')]),
1024            (b";", []),
1025            (b";;", []),
1026            (b";a=b", [(b'a', b'b')]),
1027            (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
1028            (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
1029        ]
1030        for orig, expect in parse_qsl_semicolon_cases:
1031            with self.subTest(f"Original: {orig!r}, Expected: {expect!r}"):
1032                result = urllib.parse.parse_qsl(orig, separator=';')
1033                self.assertEqual(result, expect, "Error parsing %r" % orig)
1034                result_bytes = urllib.parse.parse_qsl(orig, separator=b';')
1035                self.assertEqual(result_bytes, expect, "Error parsing %r" % orig)
1036
1037
1038    def test_urlencode_sequences(self):
1039        # Other tests incidentally urlencode things; test non-covered cases:
1040        # Sequence and object values.
1041        result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
1042        # we cannot rely on ordering here
1043        assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
1044
1045        class Trivial:
1046            def __str__(self):
1047                return 'trivial'
1048
1049        result = urllib.parse.urlencode({'a': Trivial()}, True)
1050        self.assertEqual(result, 'a=trivial')
1051
1052    def test_urlencode_quote_via(self):
1053        result = urllib.parse.urlencode({'a': 'some value'})
1054        self.assertEqual(result, "a=some+value")
1055        result = urllib.parse.urlencode({'a': 'some value/another'},
1056                                        quote_via=urllib.parse.quote)
1057        self.assertEqual(result, "a=some%20value%2Fanother")
1058        result = urllib.parse.urlencode({'a': 'some value/another'},
1059                                        safe='/', quote_via=urllib.parse.quote)
1060        self.assertEqual(result, "a=some%20value/another")
1061
1062    def test_quote_from_bytes(self):
1063        self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
1064        result = urllib.parse.quote_from_bytes(b'archaeological arcana')
1065        self.assertEqual(result, 'archaeological%20arcana')
1066        result = urllib.parse.quote_from_bytes(b'')
1067        self.assertEqual(result, '')
1068
1069    def test_unquote_to_bytes(self):
1070        result = urllib.parse.unquote_to_bytes('abc%20def')
1071        self.assertEqual(result, b'abc def')
1072        result = urllib.parse.unquote_to_bytes('')
1073        self.assertEqual(result, b'')
1074
1075    def test_quote_errors(self):
1076        self.assertRaises(TypeError, urllib.parse.quote, b'foo',
1077                          encoding='utf-8')
1078        self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
1079
1080    def test_issue14072(self):
1081        p1 = urllib.parse.urlsplit('tel:+31-641044153')
1082        self.assertEqual(p1.scheme, 'tel')
1083        self.assertEqual(p1.path, '+31-641044153')
1084        p2 = urllib.parse.urlsplit('tel:+31641044153')
1085        self.assertEqual(p2.scheme, 'tel')
1086        self.assertEqual(p2.path, '+31641044153')
1087        # assert the behavior for urlparse
1088        p1 = urllib.parse.urlparse('tel:+31-641044153')
1089        self.assertEqual(p1.scheme, 'tel')
1090        self.assertEqual(p1.path, '+31-641044153')
1091        p2 = urllib.parse.urlparse('tel:+31641044153')
1092        self.assertEqual(p2.scheme, 'tel')
1093        self.assertEqual(p2.path, '+31641044153')
1094
1095    def test_invalid_bracketed_hosts(self):
1096        self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[192.0.2.146]/Path?Query')
1097        self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[important.com:8000]/Path?Query')
1098        self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123r.IP]/Path?Query')
1099        self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v12ae]/Path?Query')
1100        self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v.IP]/Path?Query')
1101        self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v123.]/Path?Query')
1102        self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[v]/Path?Query')
1103        self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query')
1104        self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query')
1105        self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path')
1106
1107    def test_splitting_bracketed_hosts(self):
1108        p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query')
1109        self.assertEqual(p1.hostname, 'v6a.ip')
1110        self.assertEqual(p1.username, 'user')
1111        self.assertEqual(p1.path, '/path')
1112        p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query')
1113        self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test')
1114        self.assertEqual(p2.username, 'user')
1115        self.assertEqual(p2.path, '/path')
1116        p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query')
1117        self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test')
1118        self.assertEqual(p3.username, 'user')
1119        self.assertEqual(p3.path, '/path')
1120
1121    def test_port_casting_failure_message(self):
1122        message = "Port could not be cast to integer value as 'oracle'"
1123        p1 = urllib.parse.urlparse('http://Server=sde; Service=sde:oracle')
1124        with self.assertRaisesRegex(ValueError, message):
1125            p1.port
1126
1127        p2 = urllib.parse.urlsplit('http://Server=sde; Service=sde:oracle')
1128        with self.assertRaisesRegex(ValueError, message):
1129            p2.port
1130
1131    def test_telurl_params(self):
1132        p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
1133        self.assertEqual(p1.scheme, 'tel')
1134        self.assertEqual(p1.path, '123-4')
1135        self.assertEqual(p1.params, 'phone-context=+1-650-516')
1136
1137        p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
1138        self.assertEqual(p1.scheme, 'tel')
1139        self.assertEqual(p1.path, '+1-201-555-0123')
1140        self.assertEqual(p1.params, '')
1141
1142        p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
1143        self.assertEqual(p1.scheme, 'tel')
1144        self.assertEqual(p1.path, '7042')
1145        self.assertEqual(p1.params, 'phone-context=example.com')
1146
1147        p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
1148        self.assertEqual(p1.scheme, 'tel')
1149        self.assertEqual(p1.path, '863-1234')
1150        self.assertEqual(p1.params, 'phone-context=+1-914-555')
1151
1152    def test_Quoter_repr(self):
1153        quoter = urllib.parse._Quoter(urllib.parse._ALWAYS_SAFE)
1154        self.assertIn('Quoter', repr(quoter))
1155
1156    def test_clear_cache_for_code_coverage(self):
1157        urllib.parse.clear_cache()
1158
1159    def test_urllib_parse_getattr_failure(self):
1160        """Test that urllib.parse.__getattr__() fails correctly."""
1161        with self.assertRaises(AttributeError):
1162            unused = urllib.parse.this_does_not_exist
1163
1164    def test_all(self):
1165        expected = []
1166        undocumented = {
1167            'splitattr', 'splithost', 'splitnport', 'splitpasswd',
1168            'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
1169            'splitvalue',
1170            'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
1171        }
1172        for name in dir(urllib.parse):
1173            if name.startswith('_') or name in undocumented:
1174                continue
1175            object = getattr(urllib.parse, name)
1176            if getattr(object, '__module__', None) == 'urllib.parse':
1177                expected.append(name)
1178        self.assertCountEqual(urllib.parse.__all__, expected)
1179
1180    def test_urlsplit_normalization(self):
1181        # Certain characters should never occur in the netloc,
1182        # including under normalization.
1183        # Ensure that ALL of them are detected and cause an error
1184        illegal_chars = '/:#?@'
1185        hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
1186        denorm_chars = [
1187            c for c in map(chr, range(128, sys.maxunicode))
1188            if unicodedata.decomposition(c)
1189            and (hex_chars & set(unicodedata.decomposition(c).split()))
1190            and c not in illegal_chars
1191        ]
1192        # Sanity check that we found at least one such character
1193        self.assertIn('\u2100', denorm_chars)
1194        self.assertIn('\uFF03', denorm_chars)
1195
1196        # bpo-36742: Verify port separators are ignored when they
1197        # existed prior to decomposition
1198        urllib.parse.urlsplit('http://\u30d5\u309a:80')
1199        with self.assertRaises(ValueError):
1200            urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')
1201
1202        for scheme in ["http", "https", "ftp"]:
1203            for netloc in ["netloc{}false.netloc", "n{}user@netloc"]:
1204                for c in denorm_chars:
1205                    url = "{}://{}/path".format(scheme, netloc.format(c))
1206                    with self.subTest(url=url, char='{:04X}'.format(ord(c))):
1207                        with self.assertRaises(ValueError):
1208                            urllib.parse.urlsplit(url)
1209
1210class Utility_Tests(unittest.TestCase):
1211    """Testcase to test the various utility functions in the urllib."""
1212    # In Python 2 this test class was in test_urllib.
1213
1214    def test_splittype(self):
1215        splittype = urllib.parse._splittype
1216        self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
1217        self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
1218        self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
1219        self.assertEqual(splittype('type:'), ('type', ''))
1220        self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
1221
1222    def test_splithost(self):
1223        splithost = urllib.parse._splithost
1224        self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
1225                         ('www.example.org:80', '/foo/bar/baz.html'))
1226        self.assertEqual(splithost('//www.example.org:80'),
1227                         ('www.example.org:80', ''))
1228        self.assertEqual(splithost('/foo/bar/baz.html'),
1229                         (None, '/foo/bar/baz.html'))
1230
1231        # bpo-30500: # starts a fragment.
1232        self.assertEqual(splithost('//127.0.0.1#@host.com'),
1233                         ('127.0.0.1', '/#@host.com'))
1234        self.assertEqual(splithost('//127.0.0.1#@host.com:80'),
1235                         ('127.0.0.1', '/#@host.com:80'))
1236        self.assertEqual(splithost('//127.0.0.1:80#@host.com'),
1237                         ('127.0.0.1:80', '/#@host.com'))
1238
1239        # Empty host is returned as empty string.
1240        self.assertEqual(splithost("///file"),
1241                         ('', '/file'))
1242
1243        # Trailing semicolon, question mark and hash symbol are kept.
1244        self.assertEqual(splithost("//example.net/file;"),
1245                         ('example.net', '/file;'))
1246        self.assertEqual(splithost("//example.net/file?"),
1247                         ('example.net', '/file?'))
1248        self.assertEqual(splithost("//example.net/file#"),
1249                         ('example.net', '/file#'))
1250
1251    def test_splituser(self):
1252        splituser = urllib.parse._splituser
1253        self.assertEqual(splituser('User:[email protected]:080'),
1254                         ('User:Pass', 'www.python.org:080'))
1255        self.assertEqual(splituser('@www.python.org:080'),
1256                         ('', 'www.python.org:080'))
1257        self.assertEqual(splituser('www.python.org:080'),
1258                         (None, 'www.python.org:080'))
1259        self.assertEqual(splituser('User:Pass@'),
1260                         ('User:Pass', ''))
1261        self.assertEqual(splituser('[email protected]:[email protected]:080'),
1262                         ('[email protected]:Pass', 'www.python.org:080'))
1263
1264    def test_splitpasswd(self):
1265        # Some of the password examples are not sensible, but it is added to
1266        # confirming to RFC2617 and addressing issue4675.
1267        splitpasswd = urllib.parse._splitpasswd
1268        self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
1269        self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
1270        self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
1271        self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
1272        self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
1273        self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
1274        self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
1275        self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
1276        self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
1277        self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
1278        self.assertEqual(splitpasswd('user:'), ('user', ''))
1279        self.assertEqual(splitpasswd('user'), ('user', None))
1280        self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
1281
1282    def test_splitport(self):
1283        splitport = urllib.parse._splitport
1284        self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
1285        self.assertEqual(splitport('parrot'), ('parrot', None))
1286        self.assertEqual(splitport('parrot:'), ('parrot', None))
1287        self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
1288        self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
1289        self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
1290        self.assertEqual(splitport('[::1]'), ('[::1]', None))
1291        self.assertEqual(splitport(':88'), ('', '88'))
1292
1293    def test_splitnport(self):
1294        splitnport = urllib.parse._splitnport
1295        self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
1296        self.assertEqual(splitnport('parrot'), ('parrot', -1))
1297        self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
1298        self.assertEqual(splitnport('parrot:'), ('parrot', -1))
1299        self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
1300        self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
1301        self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
1302        self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
1303        self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
1304        self.assertEqual(splitnport('parrot: +1_0 '), ('parrot', None))
1305
1306    def test_splitquery(self):
1307        # Normal cases are exercised by other tests; ensure that we also
1308        # catch cases with no port specified (testcase ensuring coverage)
1309        splitquery = urllib.parse._splitquery
1310        self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
1311                         ('http://python.org/fake', 'foo=bar'))
1312        self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
1313                         ('http://python.org/fake?foo=bar', ''))
1314        self.assertEqual(splitquery('http://python.org/fake'),
1315                         ('http://python.org/fake', None))
1316        self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
1317
1318    def test_splittag(self):
1319        splittag = urllib.parse._splittag
1320        self.assertEqual(splittag('http://example.com?foo=bar#baz'),
1321                         ('http://example.com?foo=bar', 'baz'))
1322        self.assertEqual(splittag('http://example.com?foo=bar#'),
1323                         ('http://example.com?foo=bar', ''))
1324        self.assertEqual(splittag('#baz'), ('', 'baz'))
1325        self.assertEqual(splittag('http://example.com?foo=bar'),
1326                         ('http://example.com?foo=bar', None))
1327        self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
1328                         ('http://example.com?foo=bar#baz', 'boo'))
1329
1330    def test_splitattr(self):
1331        splitattr = urllib.parse._splitattr
1332        self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
1333                         ('/path', ['attr1=value1', 'attr2=value2']))
1334        self.assertEqual(splitattr('/path;'), ('/path', ['']))
1335        self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
1336                         ('', ['attr1=value1', 'attr2=value2']))
1337        self.assertEqual(splitattr('/path'), ('/path', []))
1338
1339    def test_splitvalue(self):
1340        # Normal cases are exercised by other tests; test pathological cases
1341        # with no key/value pairs. (testcase ensuring coverage)
1342        splitvalue = urllib.parse._splitvalue
1343        self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
1344        self.assertEqual(splitvalue('foo='), ('foo', ''))
1345        self.assertEqual(splitvalue('=bar'), ('', 'bar'))
1346        self.assertEqual(splitvalue('foobar'), ('foobar', None))
1347        self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
1348
1349    def test_to_bytes(self):
1350        result = urllib.parse._to_bytes('http://www.python.org')
1351        self.assertEqual(result, 'http://www.python.org')
1352        self.assertRaises(UnicodeError, urllib.parse._to_bytes,
1353                          'http://www.python.org/medi\u00e6val')
1354
1355    def test_unwrap(self):
1356        for wrapped_url in ('<URL:scheme://host/path>', '<scheme://host/path>',
1357                            'URL:scheme://host/path', 'scheme://host/path'):
1358            url = urllib.parse.unwrap(wrapped_url)
1359            self.assertEqual(url, 'scheme://host/path')
1360
1361
1362class DeprecationTest(unittest.TestCase):
1363
1364    def test_Quoter_deprecation(self):
1365        with self.assertWarns(DeprecationWarning) as cm:
1366            old_class = urllib.parse.Quoter
1367            self.assertIs(old_class, urllib.parse._Quoter)
1368        self.assertIn('Quoter will be removed', str(cm.warning))
1369
1370    def test_splittype_deprecation(self):
1371        with self.assertWarns(DeprecationWarning) as cm:
1372            urllib.parse.splittype('')
1373        self.assertEqual(str(cm.warning),
1374                         'urllib.parse.splittype() is deprecated as of 3.8, '
1375                         'use urllib.parse.urlparse() instead')
1376
1377    def test_splithost_deprecation(self):
1378        with self.assertWarns(DeprecationWarning) as cm:
1379            urllib.parse.splithost('')
1380        self.assertEqual(str(cm.warning),
1381                         'urllib.parse.splithost() is deprecated as of 3.8, '
1382                         'use urllib.parse.urlparse() instead')
1383
1384    def test_splituser_deprecation(self):
1385        with self.assertWarns(DeprecationWarning) as cm:
1386            urllib.parse.splituser('')
1387        self.assertEqual(str(cm.warning),
1388                         'urllib.parse.splituser() is deprecated as of 3.8, '
1389                         'use urllib.parse.urlparse() instead')
1390
1391    def test_splitpasswd_deprecation(self):
1392        with self.assertWarns(DeprecationWarning) as cm:
1393            urllib.parse.splitpasswd('')
1394        self.assertEqual(str(cm.warning),
1395                         'urllib.parse.splitpasswd() is deprecated as of 3.8, '
1396                         'use urllib.parse.urlparse() instead')
1397
1398    def test_splitport_deprecation(self):
1399        with self.assertWarns(DeprecationWarning) as cm:
1400            urllib.parse.splitport('')
1401        self.assertEqual(str(cm.warning),
1402                         'urllib.parse.splitport() is deprecated as of 3.8, '
1403                         'use urllib.parse.urlparse() instead')
1404
1405    def test_splitnport_deprecation(self):
1406        with self.assertWarns(DeprecationWarning) as cm:
1407            urllib.parse.splitnport('')
1408        self.assertEqual(str(cm.warning),
1409                         'urllib.parse.splitnport() is deprecated as of 3.8, '
1410                         'use urllib.parse.urlparse() instead')
1411
1412    def test_splitquery_deprecation(self):
1413        with self.assertWarns(DeprecationWarning) as cm:
1414            urllib.parse.splitquery('')
1415        self.assertEqual(str(cm.warning),
1416                         'urllib.parse.splitquery() is deprecated as of 3.8, '
1417                         'use urllib.parse.urlparse() instead')
1418
1419    def test_splittag_deprecation(self):
1420        with self.assertWarns(DeprecationWarning) as cm:
1421            urllib.parse.splittag('')
1422        self.assertEqual(str(cm.warning),
1423                         'urllib.parse.splittag() is deprecated as of 3.8, '
1424                         'use urllib.parse.urlparse() instead')
1425
1426    def test_splitattr_deprecation(self):
1427        with self.assertWarns(DeprecationWarning) as cm:
1428            urllib.parse.splitattr('')
1429        self.assertEqual(str(cm.warning),
1430                         'urllib.parse.splitattr() is deprecated as of 3.8, '
1431                         'use urllib.parse.urlparse() instead')
1432
1433    def test_splitvalue_deprecation(self):
1434        with self.assertWarns(DeprecationWarning) as cm:
1435            urllib.parse.splitvalue('')
1436        self.assertEqual(str(cm.warning),
1437                         'urllib.parse.splitvalue() is deprecated as of 3.8, '
1438                         'use urllib.parse.parse_qsl() instead')
1439
1440    def test_to_bytes_deprecation(self):
1441        with self.assertWarns(DeprecationWarning) as cm:
1442            urllib.parse.to_bytes('')
1443        self.assertEqual(str(cm.warning),
1444                         'urllib.parse.to_bytes() is deprecated as of 3.8')
1445
1446
1447if __name__ == "__main__":
1448    unittest.main()
1449