1"""An extensible library for opening URLs using a variety of protocols
2
3The simplest way to use this module is to call the urlopen function,
4which accepts a string containing a URL or a Request object (described
5below).  It opens the URL and returns the results as file-like
6object; the returned object has some extra methods described below.
7
8The OpenerDirector manages a collection of Handler objects that do
9all the actual work.  Each Handler implements a particular protocol or
10option.  The OpenerDirector is a composite object that invokes the
11Handlers needed to open the requested URL.  For example, the
12HTTPHandler performs HTTP GET and POST requests and deals with
13non-error returns.  The HTTPRedirectHandler automatically deals with
14HTTP 301, 302, 303, 307, and 308 redirect errors, and the
15HTTPDigestAuthHandler deals with digest authentication.
16
17urlopen(url, data=None) -- Basic usage is the same as original
18urllib.  pass the url and optionally data to post to an HTTP URL, and
19get a file-like object back.  One difference is that you can also pass
20a Request instance instead of URL.  Raises a URLError (subclass of
21OSError); for HTTP errors, raises an HTTPError, which can also be
22treated as a valid response.
23
24build_opener -- Function that creates a new OpenerDirector instance.
25Will install the default handlers.  Accepts one or more Handlers as
26arguments, either instances or Handler classes that it will
27instantiate.  If one of the argument is a subclass of the default
28handler, the argument will be installed instead of the default.
29
30install_opener -- Installs a new opener as the default opener.
31
32objects of interest:
33
34OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35the Handler classes, while dealing with requests and responses.
36
37Request -- An object that encapsulates the state of a request.  The
38state can be as simple as the URL.  It can also include extra HTTP
39headers, e.g. a User-Agent.
40
41BaseHandler --
42
43internals:
44BaseHandler and parent
45_call_chain conventions
46
47Example usage:
48
49import urllib.request
50
51# set up authentication info
52authinfo = urllib.request.HTTPBasicAuthHandler()
53authinfo.add_password(realm='PDQ Application',
54                      uri='https://mahler:8092/site-updates.py',
55                      user='klem',
56                      passwd='geheim$parole')
57
58proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
59
60# build a new opener that adds authentication and caching FTP handlers
61opener = urllib.request.build_opener(proxy_support, authinfo,
62                                     urllib.request.CacheFTPHandler)
63
64# install it
65urllib.request.install_opener(opener)
66
67f = urllib.request.urlopen('https://www.python.org/')
68"""
69
70# XXX issues:
71# If an authentication error handler that tries to perform
72# authentication for some reason but fails, how should the error be
73# signalled?  The client needs to know the HTTP error code.  But if
74# the handler knows that the problem was, e.g., that it didn't know
75# that hash algo that requested in the challenge, it would be good to
76# pass that information along to the client, too.
77# ftp errors aren't handled cleanly
78# check digest against correct (i.e. non-apache) implementation
79
80# Possible extensions:
81# complex proxies  XXX not sure what exactly was meant by this
82# abstract factory for opener
83
84import base64
85import bisect
86import email
87import hashlib
88import http.client
89import io
90import os
91import posixpath
92import re
93import socket
94import string
95import sys
96import time
97import tempfile
98import contextlib
99import warnings
100
101
102from urllib.error import URLError, HTTPError, ContentTooShortError
103from urllib.parse import (
104    urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105    _splittype, _splithost, _splitport, _splituser, _splitpasswd,
106    _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
107    unquote_to_bytes, urlunparse)
108from urllib.response import addinfourl, addclosehook
109
110# check for SSL
111try:
112    import ssl
113except ImportError:
114    _have_ssl = False
115else:
116    _have_ssl = True
117
118__all__ = [
119    # Classes
120    'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
121    'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
122    'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
123    'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
124    'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
125    'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
126    'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
127    'UnknownHandler', 'HTTPErrorProcessor',
128    # Functions
129    'urlopen', 'install_opener', 'build_opener',
130    'pathname2url', 'url2pathname', 'getproxies',
131    # Legacy interface
132    'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
133]
134
135# used in User-Agent header sent
136__version__ = '%d.%d' % sys.version_info[:2]
137
138_opener = None
139def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
140            *, cafile=None, capath=None, cadefault=False, context=None):
141    '''Open the URL url, which can be either a string or a Request object.
142
143    *data* must be an object specifying additional data to be sent to
144    the server, or None if no such data is needed.  See Request for
145    details.
146
147    urllib.request module uses HTTP/1.1 and includes a "Connection:close"
148    header in its HTTP requests.
149
150    The optional *timeout* parameter specifies a timeout in seconds for
151    blocking operations like the connection attempt (if not specified, the
152    global default timeout setting will be used). This only works for HTTP,
153    HTTPS and FTP connections.
154
155    If *context* is specified, it must be a ssl.SSLContext instance describing
156    the various SSL options. See HTTPSConnection for more details.
157
158    The optional *cafile* and *capath* parameters specify a set of trusted CA
159    certificates for HTTPS requests. cafile should point to a single file
160    containing a bundle of CA certificates, whereas capath should point to a
161    directory of hashed certificate files. More information can be found in
162    ssl.SSLContext.load_verify_locations().
163
164    The *cadefault* parameter is ignored.
165
166
167    This function always returns an object which can work as a
168    context manager and has the properties url, headers, and status.
169    See urllib.response.addinfourl for more detail on these properties.
170
171    For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
172    object slightly modified. In addition to the three new methods above, the
173    msg attribute contains the same information as the reason attribute ---
174    the reason phrase returned by the server --- instead of the response
175    headers as it is specified in the documentation for HTTPResponse.
176
177    For FTP, file, and data URLs and requests explicitly handled by legacy
178    URLopener and FancyURLopener classes, this function returns a
179    urllib.response.addinfourl object.
180
181    Note that None may be returned if no handler handles the request (though
182    the default installed global OpenerDirector uses UnknownHandler to ensure
183    this never happens).
184
185    In addition, if proxy settings are detected (for example, when a *_proxy
186    environment variable like http_proxy is set), ProxyHandler is default
187    installed and makes sure the requests are handled through the proxy.
188
189    '''
190    global _opener
191    if cafile or capath or cadefault:
192        import warnings
193        warnings.warn("cafile, capath and cadefault are deprecated, use a "
194                      "custom context instead.", DeprecationWarning, 2)
195        if context is not None:
196            raise ValueError(
197                "You can't pass both context and any of cafile, capath, and "
198                "cadefault"
199            )
200        if not _have_ssl:
201            raise ValueError('SSL support not available')
202        context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
203                                             cafile=cafile,
204                                             capath=capath)
205        # send ALPN extension to indicate HTTP/1.1 protocol
206        context.set_alpn_protocols(['http/1.1'])
207        https_handler = HTTPSHandler(context=context)
208        opener = build_opener(https_handler)
209    elif context:
210        https_handler = HTTPSHandler(context=context)
211        opener = build_opener(https_handler)
212    elif _opener is None:
213        _opener = opener = build_opener()
214    else:
215        opener = _opener
216    return opener.open(url, data, timeout)
217
218def install_opener(opener):
219    global _opener
220    _opener = opener
221
222_url_tempfiles = []
223def urlretrieve(url, filename=None, reporthook=None, data=None):
224    """
225    Retrieve a URL into a temporary location on disk.
226
227    Requires a URL argument. If a filename is passed, it is used as
228    the temporary file location. The reporthook argument should be
229    a callable that accepts a block number, a read size, and the
230    total file size of the URL target. The data argument should be
231    valid URL encoded data.
232
233    If a filename is passed and the URL points to a local resource,
234    the result is a copy from local file to new file.
235
236    Returns a tuple containing the path to the newly created
237    data file as well as the resulting HTTPMessage object.
238    """
239    url_type, path = _splittype(url)
240
241    with contextlib.closing(urlopen(url, data)) as fp:
242        headers = fp.info()
243
244        # Just return the local path and the "headers" for file://
245        # URLs. No sense in performing a copy unless requested.
246        if url_type == "file" and not filename:
247            return os.path.normpath(path), headers
248
249        # Handle temporary file setup.
250        if filename:
251            tfp = open(filename, 'wb')
252        else:
253            tfp = tempfile.NamedTemporaryFile(delete=False)
254            filename = tfp.name
255            _url_tempfiles.append(filename)
256
257        with tfp:
258            result = filename, headers
259            bs = 1024*8
260            size = -1
261            read = 0
262            blocknum = 0
263            if "content-length" in headers:
264                size = int(headers["Content-Length"])
265
266            if reporthook:
267                reporthook(blocknum, bs, size)
268
269            while True:
270                block = fp.read(bs)
271                if not block:
272                    break
273                read += len(block)
274                tfp.write(block)
275                blocknum += 1
276                if reporthook:
277                    reporthook(blocknum, bs, size)
278
279    if size >= 0 and read < size:
280        raise ContentTooShortError(
281            "retrieval incomplete: got only %i out of %i bytes"
282            % (read, size), result)
283
284    return result
285
286def urlcleanup():
287    """Clean up temporary files from urlretrieve calls."""
288    for temp_file in _url_tempfiles:
289        try:
290            os.unlink(temp_file)
291        except OSError:
292            pass
293
294    del _url_tempfiles[:]
295    global _opener
296    if _opener:
297        _opener = None
298
299# copied from cookielib.py
300_cut_port_re = re.compile(r":\d+$", re.ASCII)
301def request_host(request):
302    """Return request-host, as defined by RFC 2965.
303
304    Variation from RFC: returned value is lowercased, for convenient
305    comparison.
306
307    """
308    url = request.full_url
309    host = urlparse(url)[1]
310    if host == "":
311        host = request.get_header("Host", "")
312
313    # remove port, if present
314    host = _cut_port_re.sub("", host, 1)
315    return host.lower()
316
317class Request:
318
319    def __init__(self, url, data=None, headers={},
320                 origin_req_host=None, unverifiable=False,
321                 method=None):
322        self.full_url = url
323        self.headers = {}
324        self.unredirected_hdrs = {}
325        self._data = None
326        self.data = data
327        self._tunnel_host = None
328        for key, value in headers.items():
329            self.add_header(key, value)
330        if origin_req_host is None:
331            origin_req_host = request_host(self)
332        self.origin_req_host = origin_req_host
333        self.unverifiable = unverifiable
334        if method:
335            self.method = method
336
337    @property
338    def full_url(self):
339        if self.fragment:
340            return '{}#{}'.format(self._full_url, self.fragment)
341        return self._full_url
342
343    @full_url.setter
344    def full_url(self, url):
345        # unwrap('<URL:type://host/path>') --> 'type://host/path'
346        self._full_url = unwrap(url)
347        self._full_url, self.fragment = _splittag(self._full_url)
348        self._parse()
349
350    @full_url.deleter
351    def full_url(self):
352        self._full_url = None
353        self.fragment = None
354        self.selector = ''
355
356    @property
357    def data(self):
358        return self._data
359
360    @data.setter
361    def data(self, data):
362        if data != self._data:
363            self._data = data
364            # issue 16464
365            # if we change data we need to remove content-length header
366            # (cause it's most probably calculated for previous value)
367            if self.has_header("Content-length"):
368                self.remove_header("Content-length")
369
370    @data.deleter
371    def data(self):
372        self.data = None
373
374    def _parse(self):
375        self.type, rest = _splittype(self._full_url)
376        if self.type is None:
377            raise ValueError("unknown url type: %r" % self.full_url)
378        self.host, self.selector = _splithost(rest)
379        if self.host:
380            self.host = unquote(self.host)
381
382    def get_method(self):
383        """Return a string indicating the HTTP request method."""
384        default_method = "POST" if self.data is not None else "GET"
385        return getattr(self, 'method', default_method)
386
387    def get_full_url(self):
388        return self.full_url
389
390    def set_proxy(self, host, type):
391        if self.type == 'https' and not self._tunnel_host:
392            self._tunnel_host = self.host
393        else:
394            self.type= type
395            self.selector = self.full_url
396        self.host = host
397
398    def has_proxy(self):
399        return self.selector == self.full_url
400
401    def add_header(self, key, val):
402        # useful for something like authentication
403        self.headers[key.capitalize()] = val
404
405    def add_unredirected_header(self, key, val):
406        # will not be added to a redirected request
407        self.unredirected_hdrs[key.capitalize()] = val
408
409    def has_header(self, header_name):
410        return (header_name in self.headers or
411                header_name in self.unredirected_hdrs)
412
413    def get_header(self, header_name, default=None):
414        return self.headers.get(
415            header_name,
416            self.unredirected_hdrs.get(header_name, default))
417
418    def remove_header(self, header_name):
419        self.headers.pop(header_name, None)
420        self.unredirected_hdrs.pop(header_name, None)
421
422    def header_items(self):
423        hdrs = {**self.unredirected_hdrs, **self.headers}
424        return list(hdrs.items())
425
426class OpenerDirector:
427    def __init__(self):
428        client_version = "Python-urllib/%s" % __version__
429        self.addheaders = [('User-agent', client_version)]
430        # self.handlers is retained only for backward compatibility
431        self.handlers = []
432        # manage the individual handlers
433        self.handle_open = {}
434        self.handle_error = {}
435        self.process_response = {}
436        self.process_request = {}
437
438    def add_handler(self, handler):
439        if not hasattr(handler, "add_parent"):
440            raise TypeError("expected BaseHandler instance, got %r" %
441                            type(handler))
442
443        added = False
444        for meth in dir(handler):
445            if meth in ["redirect_request", "do_open", "proxy_open"]:
446                # oops, coincidental match
447                continue
448
449            i = meth.find("_")
450            protocol = meth[:i]
451            condition = meth[i+1:]
452
453            if condition.startswith("error"):
454                j = condition.find("_") + i + 1
455                kind = meth[j+1:]
456                try:
457                    kind = int(kind)
458                except ValueError:
459                    pass
460                lookup = self.handle_error.get(protocol, {})
461                self.handle_error[protocol] = lookup
462            elif condition == "open":
463                kind = protocol
464                lookup = self.handle_open
465            elif condition == "response":
466                kind = protocol
467                lookup = self.process_response
468            elif condition == "request":
469                kind = protocol
470                lookup = self.process_request
471            else:
472                continue
473
474            handlers = lookup.setdefault(kind, [])
475            if handlers:
476                bisect.insort(handlers, handler)
477            else:
478                handlers.append(handler)
479            added = True
480
481        if added:
482            bisect.insort(self.handlers, handler)
483            handler.add_parent(self)
484
485    def close(self):
486        # Only exists for backwards compatibility.
487        pass
488
489    def _call_chain(self, chain, kind, meth_name, *args):
490        # Handlers raise an exception if no one else should try to handle
491        # the request, or return None if they can't but another handler
492        # could.  Otherwise, they return the response.
493        handlers = chain.get(kind, ())
494        for handler in handlers:
495            func = getattr(handler, meth_name)
496            result = func(*args)
497            if result is not None:
498                return result
499
500    def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
501        # accept a URL or a Request object
502        if isinstance(fullurl, str):
503            req = Request(fullurl, data)
504        else:
505            req = fullurl
506            if data is not None:
507                req.data = data
508
509        req.timeout = timeout
510        protocol = req.type
511
512        # pre-process request
513        meth_name = protocol+"_request"
514        for processor in self.process_request.get(protocol, []):
515            meth = getattr(processor, meth_name)
516            req = meth(req)
517
518        sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
519        response = self._open(req, data)
520
521        # post-process response
522        meth_name = protocol+"_response"
523        for processor in self.process_response.get(protocol, []):
524            meth = getattr(processor, meth_name)
525            response = meth(req, response)
526
527        return response
528
529    def _open(self, req, data=None):
530        result = self._call_chain(self.handle_open, 'default',
531                                  'default_open', req)
532        if result:
533            return result
534
535        protocol = req.type
536        result = self._call_chain(self.handle_open, protocol, protocol +
537                                  '_open', req)
538        if result:
539            return result
540
541        return self._call_chain(self.handle_open, 'unknown',
542                                'unknown_open', req)
543
544    def error(self, proto, *args):
545        if proto in ('http', 'https'):
546            # XXX http[s] protocols are special-cased
547            dict = self.handle_error['http'] # https is not different than http
548            proto = args[2]  # YUCK!
549            meth_name = 'http_error_%s' % proto
550            http_err = 1
551            orig_args = args
552        else:
553            dict = self.handle_error
554            meth_name = proto + '_error'
555            http_err = 0
556        args = (dict, proto, meth_name) + args
557        result = self._call_chain(*args)
558        if result:
559            return result
560
561        if http_err:
562            args = (dict, 'default', 'http_error_default') + orig_args
563            return self._call_chain(*args)
564
565# XXX probably also want an abstract factory that knows when it makes
566# sense to skip a superclass in favor of a subclass and when it might
567# make sense to include both
568
569def build_opener(*handlers):
570    """Create an opener object from a list of handlers.
571
572    The opener will use several default handlers, including support
573    for HTTP, FTP and when applicable HTTPS.
574
575    If any of the handlers passed as arguments are subclasses of the
576    default handlers, the default handlers will not be used.
577    """
578    opener = OpenerDirector()
579    default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
580                       HTTPDefaultErrorHandler, HTTPRedirectHandler,
581                       FTPHandler, FileHandler, HTTPErrorProcessor,
582                       DataHandler]
583    if hasattr(http.client, "HTTPSConnection"):
584        default_classes.append(HTTPSHandler)
585    skip = set()
586    for klass in default_classes:
587        for check in handlers:
588            if isinstance(check, type):
589                if issubclass(check, klass):
590                    skip.add(klass)
591            elif isinstance(check, klass):
592                skip.add(klass)
593    for klass in skip:
594        default_classes.remove(klass)
595
596    for klass in default_classes:
597        opener.add_handler(klass())
598
599    for h in handlers:
600        if isinstance(h, type):
601            h = h()
602        opener.add_handler(h)
603    return opener
604
605class BaseHandler:
606    handler_order = 500
607
608    def add_parent(self, parent):
609        self.parent = parent
610
611    def close(self):
612        # Only exists for backwards compatibility
613        pass
614
615    def __lt__(self, other):
616        if not hasattr(other, "handler_order"):
617            # Try to preserve the old behavior of having custom classes
618            # inserted after default ones (works only for custom user
619            # classes which are not aware of handler_order).
620            return True
621        return self.handler_order < other.handler_order
622
623
624class HTTPErrorProcessor(BaseHandler):
625    """Process HTTP error responses."""
626    handler_order = 1000  # after all other processing
627
628    def http_response(self, request, response):
629        code, msg, hdrs = response.code, response.msg, response.info()
630
631        # According to RFC 2616, "2xx" code indicates that the client's
632        # request was successfully received, understood, and accepted.
633        if not (200 <= code < 300):
634            response = self.parent.error(
635                'http', request, response, code, msg, hdrs)
636
637        return response
638
639    https_response = http_response
640
641class HTTPDefaultErrorHandler(BaseHandler):
642    def http_error_default(self, req, fp, code, msg, hdrs):
643        raise HTTPError(req.full_url, code, msg, hdrs, fp)
644
645class HTTPRedirectHandler(BaseHandler):
646    # maximum number of redirections to any single URL
647    # this is needed because of the state that cookies introduce
648    max_repeats = 4
649    # maximum total number of redirections (regardless of URL) before
650    # assuming we're in a loop
651    max_redirections = 10
652
653    def redirect_request(self, req, fp, code, msg, headers, newurl):
654        """Return a Request or None in response to a redirect.
655
656        This is called by the http_error_30x methods when a
657        redirection response is received.  If a redirection should
658        take place, return a new Request to allow http_error_30x to
659        perform the redirect.  Otherwise, raise HTTPError if no-one
660        else should try to handle this url.  Return None if you can't
661        but another Handler might.
662        """
663        m = req.get_method()
664        if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
665            or code in (301, 302, 303) and m == "POST")):
666            raise HTTPError(req.full_url, code, msg, headers, fp)
667
668        # Strictly (according to RFC 2616), 301 or 302 in response to
669        # a POST MUST NOT cause a redirection without confirmation
670        # from the user (of urllib.request, in this case).  In practice,
671        # essentially all clients do redirect in this case, so we do
672        # the same.
673
674        # Be conciliant with URIs containing a space.  This is mainly
675        # redundant with the more complete encoding done in http_error_302(),
676        # but it is kept for compatibility with other callers.
677        newurl = newurl.replace(' ', '%20')
678
679        CONTENT_HEADERS = ("content-length", "content-type")
680        newheaders = {k: v for k, v in req.headers.items()
681                      if k.lower() not in CONTENT_HEADERS}
682        return Request(newurl,
683                       headers=newheaders,
684                       origin_req_host=req.origin_req_host,
685                       unverifiable=True)
686
687    # Implementation note: To avoid the server sending us into an
688    # infinite loop, the request object needs to track what URLs we
689    # have already seen.  Do this by adding a handler-specific
690    # attribute to the Request object.
691    def http_error_302(self, req, fp, code, msg, headers):
692        # Some servers (incorrectly) return multiple Location headers
693        # (so probably same goes for URI).  Use first header.
694        if "location" in headers:
695            newurl = headers["location"]
696        elif "uri" in headers:
697            newurl = headers["uri"]
698        else:
699            return
700
701        # fix a possible malformed URL
702        urlparts = urlparse(newurl)
703
704        # For security reasons we don't allow redirection to anything other
705        # than http, https or ftp.
706
707        if urlparts.scheme not in ('http', 'https', 'ftp', ''):
708            raise HTTPError(
709                newurl, code,
710                "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
711                headers, fp)
712
713        if not urlparts.path and urlparts.netloc:
714            urlparts = list(urlparts)
715            urlparts[2] = "/"
716        newurl = urlunparse(urlparts)
717
718        # http.client.parse_headers() decodes as ISO-8859-1.  Recover the
719        # original bytes and percent-encode non-ASCII bytes, and any special
720        # characters such as the space.
721        newurl = quote(
722            newurl, encoding="iso-8859-1", safe=string.punctuation)
723        newurl = urljoin(req.full_url, newurl)
724
725        # XXX Probably want to forget about the state of the current
726        # request, although that might interact poorly with other
727        # handlers that also use handler-specific request attributes
728        new = self.redirect_request(req, fp, code, msg, headers, newurl)
729        if new is None:
730            return
731
732        # loop detection
733        # .redirect_dict has a key url if url was previously visited.
734        if hasattr(req, 'redirect_dict'):
735            visited = new.redirect_dict = req.redirect_dict
736            if (visited.get(newurl, 0) >= self.max_repeats or
737                len(visited) >= self.max_redirections):
738                raise HTTPError(req.full_url, code,
739                                self.inf_msg + msg, headers, fp)
740        else:
741            visited = new.redirect_dict = req.redirect_dict = {}
742        visited[newurl] = visited.get(newurl, 0) + 1
743
744        # Don't close the fp until we are sure that we won't use it
745        # with HTTPError.
746        fp.read()
747        fp.close()
748
749        return self.parent.open(new, timeout=req.timeout)
750
751    http_error_301 = http_error_303 = http_error_307 = http_error_308 = http_error_302
752
753    inf_msg = "The HTTP server returned a redirect error that would " \
754              "lead to an infinite loop.\n" \
755              "The last 30x error message was:\n"
756
757
758def _parse_proxy(proxy):
759    """Return (scheme, user, password, host/port) given a URL or an authority.
760
761    If a URL is supplied, it must have an authority (host:port) component.
762    According to RFC 3986, having an authority component means the URL must
763    have two slashes after the scheme.
764    """
765    scheme, r_scheme = _splittype(proxy)
766    if not r_scheme.startswith("/"):
767        # authority
768        scheme = None
769        authority = proxy
770    else:
771        # URL
772        if not r_scheme.startswith("//"):
773            raise ValueError("proxy URL with no authority: %r" % proxy)
774        # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
775        # and 3.3.), path is empty or starts with '/'
776        if '@' in r_scheme:
777            host_separator = r_scheme.find('@')
778            end = r_scheme.find("/", host_separator)
779        else:
780            end = r_scheme.find("/", 2)
781        if end == -1:
782            end = None
783        authority = r_scheme[2:end]
784    userinfo, hostport = _splituser(authority)
785    if userinfo is not None:
786        user, password = _splitpasswd(userinfo)
787    else:
788        user = password = None
789    return scheme, user, password, hostport
790
791class ProxyHandler(BaseHandler):
792    # Proxies must be in front
793    handler_order = 100
794
795    def __init__(self, proxies=None):
796        if proxies is None:
797            proxies = getproxies()
798        assert hasattr(proxies, 'keys'), "proxies must be a mapping"
799        self.proxies = proxies
800        for type, url in proxies.items():
801            type = type.lower()
802            setattr(self, '%s_open' % type,
803                    lambda r, proxy=url, type=type, meth=self.proxy_open:
804                        meth(r, proxy, type))
805
806    def proxy_open(self, req, proxy, type):
807        orig_type = req.type
808        proxy_type, user, password, hostport = _parse_proxy(proxy)
809        if proxy_type is None:
810            proxy_type = orig_type
811
812        if req.host and proxy_bypass(req.host):
813            return None
814
815        if user and password:
816            user_pass = '%s:%s' % (unquote(user),
817                                   unquote(password))
818            creds = base64.b64encode(user_pass.encode()).decode("ascii")
819            req.add_header('Proxy-authorization', 'Basic ' + creds)
820        hostport = unquote(hostport)
821        req.set_proxy(hostport, proxy_type)
822        if orig_type == proxy_type or orig_type == 'https':
823            # let other handlers take care of it
824            return None
825        else:
826            # need to start over, because the other handlers don't
827            # grok the proxy's URL type
828            # e.g. if we have a constructor arg proxies like so:
829            # {'http': 'ftp://proxy.example.com'}, we may end up turning
830            # a request for http://acme.example.com/a into one for
831            # ftp://proxy.example.com/a
832            return self.parent.open(req, timeout=req.timeout)
833
834class HTTPPasswordMgr:
835
836    def __init__(self):
837        self.passwd = {}
838
839    def add_password(self, realm, uri, user, passwd):
840        # uri could be a single URI or a sequence
841        if isinstance(uri, str):
842            uri = [uri]
843        if realm not in self.passwd:
844            self.passwd[realm] = {}
845        for default_port in True, False:
846            reduced_uri = tuple(
847                self.reduce_uri(u, default_port) for u in uri)
848            self.passwd[realm][reduced_uri] = (user, passwd)
849
850    def find_user_password(self, realm, authuri):
851        domains = self.passwd.get(realm, {})
852        for default_port in True, False:
853            reduced_authuri = self.reduce_uri(authuri, default_port)
854            for uris, authinfo in domains.items():
855                for uri in uris:
856                    if self.is_suburi(uri, reduced_authuri):
857                        return authinfo
858        return None, None
859
860    def reduce_uri(self, uri, default_port=True):
861        """Accept authority or URI and extract only the authority and path."""
862        # note HTTP URLs do not have a userinfo component
863        parts = urlsplit(uri)
864        if parts[1]:
865            # URI
866            scheme = parts[0]
867            authority = parts[1]
868            path = parts[2] or '/'
869        else:
870            # host or host:port
871            scheme = None
872            authority = uri
873            path = '/'
874        host, port = _splitport(authority)
875        if default_port and port is None and scheme is not None:
876            dport = {"http": 80,
877                     "https": 443,
878                     }.get(scheme)
879            if dport is not None:
880                authority = "%s:%d" % (host, dport)
881        return authority, path
882
883    def is_suburi(self, base, test):
884        """Check if test is below base in a URI tree
885
886        Both args must be URIs in reduced form.
887        """
888        if base == test:
889            return True
890        if base[0] != test[0]:
891            return False
892        prefix = base[1]
893        if prefix[-1:] != '/':
894            prefix += '/'
895        return test[1].startswith(prefix)
896
897
898class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
899
900    def find_user_password(self, realm, authuri):
901        user, password = HTTPPasswordMgr.find_user_password(self, realm,
902                                                            authuri)
903        if user is not None:
904            return user, password
905        return HTTPPasswordMgr.find_user_password(self, None, authuri)
906
907
908class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
909
910    def __init__(self, *args, **kwargs):
911        self.authenticated = {}
912        super().__init__(*args, **kwargs)
913
914    def add_password(self, realm, uri, user, passwd, is_authenticated=False):
915        self.update_authenticated(uri, is_authenticated)
916        # Add a default for prior auth requests
917        if realm is not None:
918            super().add_password(None, uri, user, passwd)
919        super().add_password(realm, uri, user, passwd)
920
921    def update_authenticated(self, uri, is_authenticated=False):
922        # uri could be a single URI or a sequence
923        if isinstance(uri, str):
924            uri = [uri]
925
926        for default_port in True, False:
927            for u in uri:
928                reduced_uri = self.reduce_uri(u, default_port)
929                self.authenticated[reduced_uri] = is_authenticated
930
931    def is_authenticated(self, authuri):
932        for default_port in True, False:
933            reduced_authuri = self.reduce_uri(authuri, default_port)
934            for uri in self.authenticated:
935                if self.is_suburi(uri, reduced_authuri):
936                    return self.authenticated[uri]
937
938
939class AbstractBasicAuthHandler:
940
941    # XXX this allows for multiple auth-schemes, but will stupidly pick
942    # the last one with a realm specified.
943
944    # allow for double- and single-quoted realm values
945    # (single quotes are a violation of the RFC, but appear in the wild)
946    rx = re.compile('(?:^|,)'   # start of the string or ','
947                    '[ \t]*'    # optional whitespaces
948                    '([^ \t,]+)' # scheme like "Basic"
949                    '[ \t]+'    # mandatory whitespaces
950                    # realm=xxx
951                    # realm='xxx'
952                    # realm="xxx"
953                    'realm=(["\']?)([^"\']*)\\2',
954                    re.I)
955
956    # XXX could pre-emptively send auth info already accepted (RFC 2617,
957    # end of section 2, and section 1.2 immediately after "credentials"
958    # production).
959
960    def __init__(self, password_mgr=None):
961        if password_mgr is None:
962            password_mgr = HTTPPasswordMgr()
963        self.passwd = password_mgr
964        self.add_password = self.passwd.add_password
965
966    def _parse_realm(self, header):
967        # parse WWW-Authenticate header: accept multiple challenges per header
968        found_challenge = False
969        for mo in AbstractBasicAuthHandler.rx.finditer(header):
970            scheme, quote, realm = mo.groups()
971            if quote not in ['"', "'"]:
972                warnings.warn("Basic Auth Realm was unquoted",
973                              UserWarning, 3)
974
975            yield (scheme, realm)
976
977            found_challenge = True
978
979        if not found_challenge:
980            if header:
981                scheme = header.split()[0]
982            else:
983                scheme = ''
984            yield (scheme, None)
985
986    def http_error_auth_reqed(self, authreq, host, req, headers):
987        # host may be an authority (without userinfo) or a URL with an
988        # authority
989        headers = headers.get_all(authreq)
990        if not headers:
991            # no header found
992            return
993
994        unsupported = None
995        for header in headers:
996            for scheme, realm in self._parse_realm(header):
997                if scheme.lower() != 'basic':
998                    unsupported = scheme
999                    continue
1000
1001                if realm is not None:
1002                    # Use the first matching Basic challenge.
1003                    # Ignore following challenges even if they use the Basic
1004                    # scheme.
1005                    return self.retry_http_basic_auth(host, req, realm)
1006
1007        if unsupported is not None:
1008            raise ValueError("AbstractBasicAuthHandler does not "
1009                             "support the following scheme: %r"
1010                             % (scheme,))
1011
1012    def retry_http_basic_auth(self, host, req, realm):
1013        user, pw = self.passwd.find_user_password(realm, host)
1014        if pw is not None:
1015            raw = "%s:%s" % (user, pw)
1016            auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
1017            if req.get_header(self.auth_header, None) == auth:
1018                return None
1019            req.add_unredirected_header(self.auth_header, auth)
1020            return self.parent.open(req, timeout=req.timeout)
1021        else:
1022            return None
1023
1024    def http_request(self, req):
1025        if (not hasattr(self.passwd, 'is_authenticated') or
1026           not self.passwd.is_authenticated(req.full_url)):
1027            return req
1028
1029        if not req.has_header('Authorization'):
1030            user, passwd = self.passwd.find_user_password(None, req.full_url)
1031            credentials = '{0}:{1}'.format(user, passwd).encode()
1032            auth_str = base64.standard_b64encode(credentials).decode()
1033            req.add_unredirected_header('Authorization',
1034                                        'Basic {}'.format(auth_str.strip()))
1035        return req
1036
1037    def http_response(self, req, response):
1038        if hasattr(self.passwd, 'is_authenticated'):
1039            if 200 <= response.code < 300:
1040                self.passwd.update_authenticated(req.full_url, True)
1041            else:
1042                self.passwd.update_authenticated(req.full_url, False)
1043        return response
1044
1045    https_request = http_request
1046    https_response = http_response
1047
1048
1049
1050class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1051
1052    auth_header = 'Authorization'
1053
1054    def http_error_401(self, req, fp, code, msg, headers):
1055        url = req.full_url
1056        response = self.http_error_auth_reqed('www-authenticate',
1057                                          url, req, headers)
1058        return response
1059
1060
1061class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1062
1063    auth_header = 'Proxy-authorization'
1064
1065    def http_error_407(self, req, fp, code, msg, headers):
1066        # http_error_auth_reqed requires that there is no userinfo component in
1067        # authority.  Assume there isn't one, since urllib.request does not (and
1068        # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
1069        # userinfo.
1070        authority = req.host
1071        response = self.http_error_auth_reqed('proxy-authenticate',
1072                                          authority, req, headers)
1073        return response
1074
1075
1076# Return n random bytes.
1077_randombytes = os.urandom
1078
1079
1080class AbstractDigestAuthHandler:
1081    # Digest authentication is specified in RFC 2617.
1082
1083    # XXX The client does not inspect the Authentication-Info header
1084    # in a successful response.
1085
1086    # XXX It should be possible to test this implementation against
1087    # a mock server that just generates a static set of challenges.
1088
1089    # XXX qop="auth-int" supports is shaky
1090
1091    def __init__(self, passwd=None):
1092        if passwd is None:
1093            passwd = HTTPPasswordMgr()
1094        self.passwd = passwd
1095        self.add_password = self.passwd.add_password
1096        self.retried = 0
1097        self.nonce_count = 0
1098        self.last_nonce = None
1099
1100    def reset_retry_count(self):
1101        self.retried = 0
1102
1103    def http_error_auth_reqed(self, auth_header, host, req, headers):
1104        authreq = headers.get(auth_header, None)
1105        if self.retried > 5:
1106            # Don't fail endlessly - if we failed once, we'll probably
1107            # fail a second time. Hm. Unless the Password Manager is
1108            # prompting for the information. Crap. This isn't great
1109            # but it's better than the current 'repeat until recursion
1110            # depth exceeded' approach <wink>
1111            raise HTTPError(req.full_url, 401, "digest auth failed",
1112                            headers, None)
1113        else:
1114            self.retried += 1
1115        if authreq:
1116            scheme = authreq.split()[0]
1117            if scheme.lower() == 'digest':
1118                return self.retry_http_digest_auth(req, authreq)
1119            elif scheme.lower() != 'basic':
1120                raise ValueError("AbstractDigestAuthHandler does not support"
1121                                 " the following scheme: '%s'" % scheme)
1122
1123    def retry_http_digest_auth(self, req, auth):
1124        token, challenge = auth.split(' ', 1)
1125        chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1126        auth = self.get_authorization(req, chal)
1127        if auth:
1128            auth_val = 'Digest %s' % auth
1129            if req.headers.get(self.auth_header, None) == auth_val:
1130                return None
1131            req.add_unredirected_header(self.auth_header, auth_val)
1132            resp = self.parent.open(req, timeout=req.timeout)
1133            return resp
1134
1135    def get_cnonce(self, nonce):
1136        # The cnonce-value is an opaque
1137        # quoted string value provided by the client and used by both client
1138        # and server to avoid chosen plaintext attacks, to provide mutual
1139        # authentication, and to provide some message integrity protection.
1140        # This isn't a fabulous effort, but it's probably Good Enough.
1141        s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
1142        b = s.encode("ascii") + _randombytes(8)
1143        dig = hashlib.sha1(b).hexdigest()
1144        return dig[:16]
1145
1146    def get_authorization(self, req, chal):
1147        try:
1148            realm = chal['realm']
1149            nonce = chal['nonce']
1150            qop = chal.get('qop')
1151            algorithm = chal.get('algorithm', 'MD5')
1152            # mod_digest doesn't send an opaque, even though it isn't
1153            # supposed to be optional
1154            opaque = chal.get('opaque', None)
1155        except KeyError:
1156            return None
1157
1158        H, KD = self.get_algorithm_impls(algorithm)
1159        if H is None:
1160            return None
1161
1162        user, pw = self.passwd.find_user_password(realm, req.full_url)
1163        if user is None:
1164            return None
1165
1166        # XXX not implemented yet
1167        if req.data is not None:
1168            entdig = self.get_entity_digest(req.data, chal)
1169        else:
1170            entdig = None
1171
1172        A1 = "%s:%s:%s" % (user, realm, pw)
1173        A2 = "%s:%s" % (req.get_method(),
1174                        # XXX selector: what about proxies and full urls
1175                        req.selector)
1176        # NOTE: As per  RFC 2617, when server sends "auth,auth-int", the client could use either `auth`
1177        #     or `auth-int` to the response back. we use `auth` to send the response back.
1178        if qop is None:
1179            respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1180        elif 'auth' in qop.split(','):
1181            if nonce == self.last_nonce:
1182                self.nonce_count += 1
1183            else:
1184                self.nonce_count = 1
1185                self.last_nonce = nonce
1186            ncvalue = '%08x' % self.nonce_count
1187            cnonce = self.get_cnonce(nonce)
1188            noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, 'auth', H(A2))
1189            respdig = KD(H(A1), noncebit)
1190        else:
1191            # XXX handle auth-int.
1192            raise URLError("qop '%s' is not supported." % qop)
1193
1194        # XXX should the partial digests be encoded too?
1195
1196        base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
1197               'response="%s"' % (user, realm, nonce, req.selector,
1198                                  respdig)
1199        if opaque:
1200            base += ', opaque="%s"' % opaque
1201        if entdig:
1202            base += ', digest="%s"' % entdig
1203        base += ', algorithm="%s"' % algorithm
1204        if qop:
1205            base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1206        return base
1207
1208    def get_algorithm_impls(self, algorithm):
1209        # lambdas assume digest modules are imported at the top level
1210        if algorithm == 'MD5':
1211            H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1212        elif algorithm == 'SHA':
1213            H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1214        # XXX MD5-sess
1215        else:
1216            raise ValueError("Unsupported digest authentication "
1217                             "algorithm %r" % algorithm)
1218        KD = lambda s, d: H("%s:%s" % (s, d))
1219        return H, KD
1220
1221    def get_entity_digest(self, data, chal):
1222        # XXX not implemented yet
1223        return None
1224
1225
1226class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1227    """An authentication protocol defined by RFC 2069
1228
1229    Digest authentication improves on basic authentication because it
1230    does not transmit passwords in the clear.
1231    """
1232
1233    auth_header = 'Authorization'
1234    handler_order = 490  # before Basic auth
1235
1236    def http_error_401(self, req, fp, code, msg, headers):
1237        host = urlparse(req.full_url)[1]
1238        retry = self.http_error_auth_reqed('www-authenticate',
1239                                           host, req, headers)
1240        self.reset_retry_count()
1241        return retry
1242
1243
1244class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1245
1246    auth_header = 'Proxy-Authorization'
1247    handler_order = 490  # before Basic auth
1248
1249    def http_error_407(self, req, fp, code, msg, headers):
1250        host = req.host
1251        retry = self.http_error_auth_reqed('proxy-authenticate',
1252                                           host, req, headers)
1253        self.reset_retry_count()
1254        return retry
1255
1256class AbstractHTTPHandler(BaseHandler):
1257
1258    def __init__(self, debuglevel=0):
1259        self._debuglevel = debuglevel
1260
1261    def set_http_debuglevel(self, level):
1262        self._debuglevel = level
1263
1264    def _get_content_length(self, request):
1265        return http.client.HTTPConnection._get_content_length(
1266            request.data,
1267            request.get_method())
1268
1269    def do_request_(self, request):
1270        host = request.host
1271        if not host:
1272            raise URLError('no host given')
1273
1274        if request.data is not None:  # POST
1275            data = request.data
1276            if isinstance(data, str):
1277                msg = "POST data should be bytes, an iterable of bytes, " \
1278                      "or a file object. It cannot be of type str."
1279                raise TypeError(msg)
1280            if not request.has_header('Content-type'):
1281                request.add_unredirected_header(
1282                    'Content-type',
1283                    'application/x-www-form-urlencoded')
1284            if (not request.has_header('Content-length')
1285                    and not request.has_header('Transfer-encoding')):
1286                content_length = self._get_content_length(request)
1287                if content_length is not None:
1288                    request.add_unredirected_header(
1289                            'Content-length', str(content_length))
1290                else:
1291                    request.add_unredirected_header(
1292                            'Transfer-encoding', 'chunked')
1293
1294        sel_host = host
1295        if request.has_proxy():
1296            scheme, sel = _splittype(request.selector)
1297            sel_host, sel_path = _splithost(sel)
1298        if not request.has_header('Host'):
1299            request.add_unredirected_header('Host', sel_host)
1300        for name, value in self.parent.addheaders:
1301            name = name.capitalize()
1302            if not request.has_header(name):
1303                request.add_unredirected_header(name, value)
1304
1305        return request
1306
1307    def do_open(self, http_class, req, **http_conn_args):
1308        """Return an HTTPResponse object for the request, using http_class.
1309
1310        http_class must implement the HTTPConnection API from http.client.
1311        """
1312        host = req.host
1313        if not host:
1314            raise URLError('no host given')
1315
1316        # will parse host:port
1317        h = http_class(host, timeout=req.timeout, **http_conn_args)
1318        h.set_debuglevel(self._debuglevel)
1319
1320        headers = dict(req.unredirected_hdrs)
1321        headers.update({k: v for k, v in req.headers.items()
1322                        if k not in headers})
1323
1324        # TODO(jhylton): Should this be redesigned to handle
1325        # persistent connections?
1326
1327        # We want to make an HTTP/1.1 request, but the addinfourl
1328        # class isn't prepared to deal with a persistent connection.
1329        # It will try to read all remaining data from the socket,
1330        # which will block while the server waits for the next request.
1331        # So make sure the connection gets closed after the (only)
1332        # request.
1333        headers["Connection"] = "close"
1334        headers = {name.title(): val for name, val in headers.items()}
1335
1336        if req._tunnel_host:
1337            tunnel_headers = {}
1338            proxy_auth_hdr = "Proxy-Authorization"
1339            if proxy_auth_hdr in headers:
1340                tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1341                # Proxy-Authorization should not be sent to origin
1342                # server.
1343                del headers[proxy_auth_hdr]
1344            h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
1345
1346        try:
1347            try:
1348                h.request(req.get_method(), req.selector, req.data, headers,
1349                          encode_chunked=req.has_header('Transfer-encoding'))
1350            except OSError as err: # timeout error
1351                raise URLError(err)
1352            r = h.getresponse()
1353        except:
1354            h.close()
1355            raise
1356
1357        # If the server does not send us a 'Connection: close' header,
1358        # HTTPConnection assumes the socket should be left open. Manually
1359        # mark the socket to be closed when this response object goes away.
1360        if h.sock:
1361            h.sock.close()
1362            h.sock = None
1363
1364        r.url = req.get_full_url()
1365        # This line replaces the .msg attribute of the HTTPResponse
1366        # with .headers, because urllib clients expect the response to
1367        # have the reason in .msg.  It would be good to mark this
1368        # attribute is deprecated and get then to use info() or
1369        # .headers.
1370        r.msg = r.reason
1371        return r
1372
1373
1374class HTTPHandler(AbstractHTTPHandler):
1375
1376    def http_open(self, req):
1377        return self.do_open(http.client.HTTPConnection, req)
1378
1379    http_request = AbstractHTTPHandler.do_request_
1380
1381if hasattr(http.client, 'HTTPSConnection'):
1382
1383    class HTTPSHandler(AbstractHTTPHandler):
1384
1385        def __init__(self, debuglevel=0, context=None, check_hostname=None):
1386            AbstractHTTPHandler.__init__(self, debuglevel)
1387            self._context = context
1388            self._check_hostname = check_hostname
1389
1390        def https_open(self, req):
1391            return self.do_open(http.client.HTTPSConnection, req,
1392                context=self._context, check_hostname=self._check_hostname)
1393
1394        https_request = AbstractHTTPHandler.do_request_
1395
1396    __all__.append('HTTPSHandler')
1397
1398class HTTPCookieProcessor(BaseHandler):
1399    def __init__(self, cookiejar=None):
1400        import http.cookiejar
1401        if cookiejar is None:
1402            cookiejar = http.cookiejar.CookieJar()
1403        self.cookiejar = cookiejar
1404
1405    def http_request(self, request):
1406        self.cookiejar.add_cookie_header(request)
1407        return request
1408
1409    def http_response(self, request, response):
1410        self.cookiejar.extract_cookies(response, request)
1411        return response
1412
1413    https_request = http_request
1414    https_response = http_response
1415
1416class UnknownHandler(BaseHandler):
1417    def unknown_open(self, req):
1418        type = req.type
1419        raise URLError('unknown url type: %s' % type)
1420
1421def parse_keqv_list(l):
1422    """Parse list of key=value strings where keys are not duplicated."""
1423    parsed = {}
1424    for elt in l:
1425        k, v = elt.split('=', 1)
1426        if v[0] == '"' and v[-1] == '"':
1427            v = v[1:-1]
1428        parsed[k] = v
1429    return parsed
1430
1431def parse_http_list(s):
1432    """Parse lists as described by RFC 2068 Section 2.
1433
1434    In particular, parse comma-separated lists where the elements of
1435    the list may include quoted-strings.  A quoted-string could
1436    contain a comma.  A non-quoted string could have quotes in the
1437    middle.  Neither commas nor quotes count if they are escaped.
1438    Only double-quotes count, not single-quotes.
1439    """
1440    res = []
1441    part = ''
1442
1443    escape = quote = False
1444    for cur in s:
1445        if escape:
1446            part += cur
1447            escape = False
1448            continue
1449        if quote:
1450            if cur == '\\':
1451                escape = True
1452                continue
1453            elif cur == '"':
1454                quote = False
1455            part += cur
1456            continue
1457
1458        if cur == ',':
1459            res.append(part)
1460            part = ''
1461            continue
1462
1463        if cur == '"':
1464            quote = True
1465
1466        part += cur
1467
1468    # append last part
1469    if part:
1470        res.append(part)
1471
1472    return [part.strip() for part in res]
1473
1474class FileHandler(BaseHandler):
1475    # Use local file or FTP depending on form of URL
1476    def file_open(self, req):
1477        url = req.selector
1478        if url[:2] == '//' and url[2:3] != '/' and (req.host and
1479                req.host != 'localhost'):
1480            if not req.host in self.get_names():
1481                raise URLError("file:// scheme is supported only on localhost")
1482        else:
1483            return self.open_local_file(req)
1484
1485    # names for the localhost
1486    names = None
1487    def get_names(self):
1488        if FileHandler.names is None:
1489            try:
1490                FileHandler.names = tuple(
1491                    socket.gethostbyname_ex('localhost')[2] +
1492                    socket.gethostbyname_ex(socket.gethostname())[2])
1493            except socket.gaierror:
1494                FileHandler.names = (socket.gethostbyname('localhost'),)
1495        return FileHandler.names
1496
1497    # not entirely sure what the rules are here
1498    def open_local_file(self, req):
1499        import email.utils
1500        import mimetypes
1501        host = req.host
1502        filename = req.selector
1503        localfile = url2pathname(filename)
1504        try:
1505            stats = os.stat(localfile)
1506            size = stats.st_size
1507            modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1508            mtype = mimetypes.guess_type(filename)[0]
1509            headers = email.message_from_string(
1510                'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1511                (mtype or 'text/plain', size, modified))
1512            if host:
1513                host, port = _splitport(host)
1514            if not host or \
1515                (not port and _safe_gethostbyname(host) in self.get_names()):
1516                if host:
1517                    origurl = 'file://' + host + filename
1518                else:
1519                    origurl = 'file://' + filename
1520                return addinfourl(open(localfile, 'rb'), headers, origurl)
1521        except OSError as exp:
1522            raise URLError(exp)
1523        raise URLError('file not on local host')
1524
1525def _safe_gethostbyname(host):
1526    try:
1527        return socket.gethostbyname(host)
1528    except socket.gaierror:
1529        return None
1530
1531class FTPHandler(BaseHandler):
1532    def ftp_open(self, req):
1533        import ftplib
1534        import mimetypes
1535        host = req.host
1536        if not host:
1537            raise URLError('ftp error: no host given')
1538        host, port = _splitport(host)
1539        if port is None:
1540            port = ftplib.FTP_PORT
1541        else:
1542            port = int(port)
1543
1544        # username/password handling
1545        user, host = _splituser(host)
1546        if user:
1547            user, passwd = _splitpasswd(user)
1548        else:
1549            passwd = None
1550        host = unquote(host)
1551        user = user or ''
1552        passwd = passwd or ''
1553
1554        try:
1555            host = socket.gethostbyname(host)
1556        except OSError as msg:
1557            raise URLError(msg)
1558        path, attrs = _splitattr(req.selector)
1559        dirs = path.split('/')
1560        dirs = list(map(unquote, dirs))
1561        dirs, file = dirs[:-1], dirs[-1]
1562        if dirs and not dirs[0]:
1563            dirs = dirs[1:]
1564        try:
1565            fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1566            type = file and 'I' or 'D'
1567            for attr in attrs:
1568                attr, value = _splitvalue(attr)
1569                if attr.lower() == 'type' and \
1570                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
1571                    type = value.upper()
1572            fp, retrlen = fw.retrfile(file, type)
1573            headers = ""
1574            mtype = mimetypes.guess_type(req.full_url)[0]
1575            if mtype:
1576                headers += "Content-type: %s\n" % mtype
1577            if retrlen is not None and retrlen >= 0:
1578                headers += "Content-length: %d\n" % retrlen
1579            headers = email.message_from_string(headers)
1580            return addinfourl(fp, headers, req.full_url)
1581        except ftplib.all_errors as exp:
1582            raise URLError(exp) from exp
1583
1584    def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1585        return ftpwrapper(user, passwd, host, port, dirs, timeout,
1586                          persistent=False)
1587
1588class CacheFTPHandler(FTPHandler):
1589    # XXX would be nice to have pluggable cache strategies
1590    # XXX this stuff is definitely not thread safe
1591    def __init__(self):
1592        self.cache = {}
1593        self.timeout = {}
1594        self.soonest = 0
1595        self.delay = 60
1596        self.max_conns = 16
1597
1598    def setTimeout(self, t):
1599        self.delay = t
1600
1601    def setMaxConns(self, m):
1602        self.max_conns = m
1603
1604    def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1605        key = user, host, port, '/'.join(dirs), timeout
1606        if key in self.cache:
1607            self.timeout[key] = time.time() + self.delay
1608        else:
1609            self.cache[key] = ftpwrapper(user, passwd, host, port,
1610                                         dirs, timeout)
1611            self.timeout[key] = time.time() + self.delay
1612        self.check_cache()
1613        return self.cache[key]
1614
1615    def check_cache(self):
1616        # first check for old ones
1617        t = time.time()
1618        if self.soonest <= t:
1619            for k, v in list(self.timeout.items()):
1620                if v < t:
1621                    self.cache[k].close()
1622                    del self.cache[k]
1623                    del self.timeout[k]
1624        self.soonest = min(list(self.timeout.values()))
1625
1626        # then check the size
1627        if len(self.cache) == self.max_conns:
1628            for k, v in list(self.timeout.items()):
1629                if v == self.soonest:
1630                    del self.cache[k]
1631                    del self.timeout[k]
1632                    break
1633            self.soonest = min(list(self.timeout.values()))
1634
1635    def clear_cache(self):
1636        for conn in self.cache.values():
1637            conn.close()
1638        self.cache.clear()
1639        self.timeout.clear()
1640
1641class DataHandler(BaseHandler):
1642    def data_open(self, req):
1643        # data URLs as specified in RFC 2397.
1644        #
1645        # ignores POSTed data
1646        #
1647        # syntax:
1648        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
1649        # mediatype := [ type "/" subtype ] *( ";" parameter )
1650        # data      := *urlchar
1651        # parameter := attribute "=" value
1652        url = req.full_url
1653
1654        scheme, data = url.split(":",1)
1655        mediatype, data = data.split(",",1)
1656
1657        # even base64 encoded data URLs might be quoted so unquote in any case:
1658        data = unquote_to_bytes(data)
1659        if mediatype.endswith(";base64"):
1660            data = base64.decodebytes(data)
1661            mediatype = mediatype[:-7]
1662
1663        if not mediatype:
1664            mediatype = "text/plain;charset=US-ASCII"
1665
1666        headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1667            (mediatype, len(data)))
1668
1669        return addinfourl(io.BytesIO(data), headers, url)
1670
1671
1672# Code move from the old urllib module
1673
1674MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
1675
1676# Helper for non-unix systems
1677if os.name == 'nt':
1678    from nturl2path import url2pathname, pathname2url
1679else:
1680    def url2pathname(pathname):
1681        """OS-specific conversion from a relative URL of the 'file' scheme
1682        to a file system path; not recommended for general use."""
1683        return unquote(pathname)
1684
1685    def pathname2url(pathname):
1686        """OS-specific conversion from a file system path to a relative URL
1687        of the 'file' scheme; not recommended for general use."""
1688        return quote(pathname)
1689
1690
1691ftpcache = {}
1692
1693
1694class URLopener:
1695    """Class to open URLs.
1696    This is a class rather than just a subroutine because we may need
1697    more than one set of global protocol-specific options.
1698    Note -- this is a base class for those who don't want the
1699    automatic handling of errors type 302 (relocated) and 401
1700    (authorization needed)."""
1701
1702    __tempfiles = None
1703
1704    version = "Python-urllib/%s" % __version__
1705
1706    # Constructor
1707    def __init__(self, proxies=None, **x509):
1708        msg = "%(class)s style of invoking requests is deprecated. " \
1709              "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1710        warnings.warn(msg, DeprecationWarning, stacklevel=3)
1711        if proxies is None:
1712            proxies = getproxies()
1713        assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1714        self.proxies = proxies
1715        self.key_file = x509.get('key_file')
1716        self.cert_file = x509.get('cert_file')
1717        self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
1718        self.__tempfiles = []
1719        self.__unlink = os.unlink # See cleanup()
1720        self.tempcache = None
1721        # Undocumented feature: if you assign {} to tempcache,
1722        # it is used to cache files retrieved with
1723        # self.retrieve().  This is not enabled by default
1724        # since it does not work for changing documents (and I
1725        # haven't got the logic to check expiration headers
1726        # yet).
1727        self.ftpcache = ftpcache
1728        # Undocumented feature: you can use a different
1729        # ftp cache by assigning to the .ftpcache member;
1730        # in case you want logically independent URL openers
1731        # XXX This is not threadsafe.  Bah.
1732
1733    def __del__(self):
1734        self.close()
1735
1736    def close(self):
1737        self.cleanup()
1738
1739    def cleanup(self):
1740        # This code sometimes runs when the rest of this module
1741        # has already been deleted, so it can't use any globals
1742        # or import anything.
1743        if self.__tempfiles:
1744            for file in self.__tempfiles:
1745                try:
1746                    self.__unlink(file)
1747                except OSError:
1748                    pass
1749            del self.__tempfiles[:]
1750        if self.tempcache:
1751            self.tempcache.clear()
1752
1753    def addheader(self, *args):
1754        """Add a header to be used by the HTTP interface only
1755        e.g. u.addheader('Accept', 'sound/basic')"""
1756        self.addheaders.append(args)
1757
1758    # External interface
1759    def open(self, fullurl, data=None):
1760        """Use URLopener().open(file) instead of open(file, 'r')."""
1761        fullurl = unwrap(_to_bytes(fullurl))
1762        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
1763        if self.tempcache and fullurl in self.tempcache:
1764            filename, headers = self.tempcache[fullurl]
1765            fp = open(filename, 'rb')
1766            return addinfourl(fp, headers, fullurl)
1767        urltype, url = _splittype(fullurl)
1768        if not urltype:
1769            urltype = 'file'
1770        if urltype in self.proxies:
1771            proxy = self.proxies[urltype]
1772            urltype, proxyhost = _splittype(proxy)
1773            host, selector = _splithost(proxyhost)
1774            url = (host, fullurl) # Signal special case to open_*()
1775        else:
1776            proxy = None
1777        name = 'open_' + urltype
1778        self.type = urltype
1779        name = name.replace('-', '_')
1780        if not hasattr(self, name) or name == 'open_local_file':
1781            if proxy:
1782                return self.open_unknown_proxy(proxy, fullurl, data)
1783            else:
1784                return self.open_unknown(fullurl, data)
1785        try:
1786            if data is None:
1787                return getattr(self, name)(url)
1788            else:
1789                return getattr(self, name)(url, data)
1790        except (HTTPError, URLError):
1791            raise
1792        except OSError as msg:
1793            raise OSError('socket error', msg) from msg
1794
1795    def open_unknown(self, fullurl, data=None):
1796        """Overridable interface to open unknown URL type."""
1797        type, url = _splittype(fullurl)
1798        raise OSError('url error', 'unknown url type', type)
1799
1800    def open_unknown_proxy(self, proxy, fullurl, data=None):
1801        """Overridable interface to open unknown URL type."""
1802        type, url = _splittype(fullurl)
1803        raise OSError('url error', 'invalid proxy for %s' % type, proxy)
1804
1805    # External interface
1806    def retrieve(self, url, filename=None, reporthook=None, data=None):
1807        """retrieve(url) returns (filename, headers) for a local object
1808        or (tempfilename, headers) for a remote object."""
1809        url = unwrap(_to_bytes(url))
1810        if self.tempcache and url in self.tempcache:
1811            return self.tempcache[url]
1812        type, url1 = _splittype(url)
1813        if filename is None and (not type or type == 'file'):
1814            try:
1815                fp = self.open_local_file(url1)
1816                hdrs = fp.info()
1817                fp.close()
1818                return url2pathname(_splithost(url1)[1]), hdrs
1819            except OSError:
1820                pass
1821        fp = self.open(url, data)
1822        try:
1823            headers = fp.info()
1824            if filename:
1825                tfp = open(filename, 'wb')
1826            else:
1827                garbage, path = _splittype(url)
1828                garbage, path = _splithost(path or "")
1829                path, garbage = _splitquery(path or "")
1830                path, garbage = _splitattr(path or "")
1831                suffix = os.path.splitext(path)[1]
1832                (fd, filename) = tempfile.mkstemp(suffix)
1833                self.__tempfiles.append(filename)
1834                tfp = os.fdopen(fd, 'wb')
1835            try:
1836                result = filename, headers
1837                if self.tempcache is not None:
1838                    self.tempcache[url] = result
1839                bs = 1024*8
1840                size = -1
1841                read = 0
1842                blocknum = 0
1843                if "content-length" in headers:
1844                    size = int(headers["Content-Length"])
1845                if reporthook:
1846                    reporthook(blocknum, bs, size)
1847                while 1:
1848                    block = fp.read(bs)
1849                    if not block:
1850                        break
1851                    read += len(block)
1852                    tfp.write(block)
1853                    blocknum += 1
1854                    if reporthook:
1855                        reporthook(blocknum, bs, size)
1856            finally:
1857                tfp.close()
1858        finally:
1859            fp.close()
1860
1861        # raise exception if actual size does not match content-length header
1862        if size >= 0 and read < size:
1863            raise ContentTooShortError(
1864                "retrieval incomplete: got only %i out of %i bytes"
1865                % (read, size), result)
1866
1867        return result
1868
1869    # Each method named open_<type> knows how to open that type of URL
1870
1871    def _open_generic_http(self, connection_factory, url, data):
1872        """Make an HTTP connection using connection_class.
1873
1874        This is an internal method that should be called from
1875        open_http() or open_https().
1876
1877        Arguments:
1878        - connection_factory should take a host name and return an
1879          HTTPConnection instance.
1880        - url is the url to retrieval or a host, relative-path pair.
1881        - data is payload for a POST request or None.
1882        """
1883
1884        user_passwd = None
1885        proxy_passwd= None
1886        if isinstance(url, str):
1887            host, selector = _splithost(url)
1888            if host:
1889                user_passwd, host = _splituser(host)
1890                host = unquote(host)
1891            realhost = host
1892        else:
1893            host, selector = url
1894            # check whether the proxy contains authorization information
1895            proxy_passwd, host = _splituser(host)
1896            # now we proceed with the url we want to obtain
1897            urltype, rest = _splittype(selector)
1898            url = rest
1899            user_passwd = None
1900            if urltype.lower() != 'http':
1901                realhost = None
1902            else:
1903                realhost, rest = _splithost(rest)
1904                if realhost:
1905                    user_passwd, realhost = _splituser(realhost)
1906                if user_passwd:
1907                    selector = "%s://%s%s" % (urltype, realhost, rest)
1908                if proxy_bypass(realhost):
1909                    host = realhost
1910
1911        if not host: raise OSError('http error', 'no host given')
1912
1913        if proxy_passwd:
1914            proxy_passwd = unquote(proxy_passwd)
1915            proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
1916        else:
1917            proxy_auth = None
1918
1919        if user_passwd:
1920            user_passwd = unquote(user_passwd)
1921            auth = base64.b64encode(user_passwd.encode()).decode('ascii')
1922        else:
1923            auth = None
1924        http_conn = connection_factory(host)
1925        headers = {}
1926        if proxy_auth:
1927            headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1928        if auth:
1929            headers["Authorization"] =  "Basic %s" % auth
1930        if realhost:
1931            headers["Host"] = realhost
1932
1933        # Add Connection:close as we don't support persistent connections yet.
1934        # This helps in closing the socket and avoiding ResourceWarning
1935
1936        headers["Connection"] = "close"
1937
1938        for header, value in self.addheaders:
1939            headers[header] = value
1940
1941        if data is not None:
1942            headers["Content-Type"] = "application/x-www-form-urlencoded"
1943            http_conn.request("POST", selector, data, headers)
1944        else:
1945            http_conn.request("GET", selector, headers=headers)
1946
1947        try:
1948            response = http_conn.getresponse()
1949        except http.client.BadStatusLine:
1950            # something went wrong with the HTTP status line
1951            raise URLError("http protocol error: bad status line")
1952
1953        # According to RFC 2616, "2xx" code indicates that the client's
1954        # request was successfully received, understood, and accepted.
1955        if 200 <= response.status < 300:
1956            return addinfourl(response, response.msg, "http:" + url,
1957                              response.status)
1958        else:
1959            return self.http_error(
1960                url, response.fp,
1961                response.status, response.reason, response.msg, data)
1962
1963    def open_http(self, url, data=None):
1964        """Use HTTP protocol."""
1965        return self._open_generic_http(http.client.HTTPConnection, url, data)
1966
1967    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1968        """Handle http errors.
1969
1970        Derived class can override this, or provide specific handlers
1971        named http_error_DDD where DDD is the 3-digit error code."""
1972        # First check if there's a specific handler for this error
1973        name = 'http_error_%d' % errcode
1974        if hasattr(self, name):
1975            method = getattr(self, name)
1976            if data is None:
1977                result = method(url, fp, errcode, errmsg, headers)
1978            else:
1979                result = method(url, fp, errcode, errmsg, headers, data)
1980            if result: return result
1981        return self.http_error_default(url, fp, errcode, errmsg, headers)
1982
1983    def http_error_default(self, url, fp, errcode, errmsg, headers):
1984        """Default error handler: close the connection and raise OSError."""
1985        fp.close()
1986        raise HTTPError(url, errcode, errmsg, headers, None)
1987
1988    if _have_ssl:
1989        def _https_connection(self, host):
1990            return http.client.HTTPSConnection(host,
1991                                           key_file=self.key_file,
1992                                           cert_file=self.cert_file)
1993
1994        def open_https(self, url, data=None):
1995            """Use HTTPS protocol."""
1996            return self._open_generic_http(self._https_connection, url, data)
1997
1998    def open_file(self, url):
1999        """Use local file or FTP depending on form of URL."""
2000        if not isinstance(url, str):
2001            raise URLError('file error: proxy support for file protocol currently not implemented')
2002        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
2003            raise ValueError("file:// scheme is supported only on localhost")
2004        else:
2005            return self.open_local_file(url)
2006
2007    def open_local_file(self, url):
2008        """Use local file."""
2009        import email.utils
2010        import mimetypes
2011        host, file = _splithost(url)
2012        localname = url2pathname(file)
2013        try:
2014            stats = os.stat(localname)
2015        except OSError as e:
2016            raise URLError(e.strerror, e.filename)
2017        size = stats.st_size
2018        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
2019        mtype = mimetypes.guess_type(url)[0]
2020        headers = email.message_from_string(
2021            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
2022            (mtype or 'text/plain', size, modified))
2023        if not host:
2024            urlfile = file
2025            if file[:1] == '/':
2026                urlfile = 'file://' + file
2027            return addinfourl(open(localname, 'rb'), headers, urlfile)
2028        host, port = _splitport(host)
2029        if (not port
2030           and socket.gethostbyname(host) in ((localhost(),) + thishost())):
2031            urlfile = file
2032            if file[:1] == '/':
2033                urlfile = 'file://' + file
2034            elif file[:2] == './':
2035                raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
2036            return addinfourl(open(localname, 'rb'), headers, urlfile)
2037        raise URLError('local file error: not on local host')
2038
2039    def open_ftp(self, url):
2040        """Use FTP protocol."""
2041        if not isinstance(url, str):
2042            raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
2043        import mimetypes
2044        host, path = _splithost(url)
2045        if not host: raise URLError('ftp error: no host given')
2046        host, port = _splitport(host)
2047        user, host = _splituser(host)
2048        if user: user, passwd = _splitpasswd(user)
2049        else: passwd = None
2050        host = unquote(host)
2051        user = unquote(user or '')
2052        passwd = unquote(passwd or '')
2053        host = socket.gethostbyname(host)
2054        if not port:
2055            import ftplib
2056            port = ftplib.FTP_PORT
2057        else:
2058            port = int(port)
2059        path, attrs = _splitattr(path)
2060        path = unquote(path)
2061        dirs = path.split('/')
2062        dirs, file = dirs[:-1], dirs[-1]
2063        if dirs and not dirs[0]: dirs = dirs[1:]
2064        if dirs and not dirs[0]: dirs[0] = '/'
2065        key = user, host, port, '/'.join(dirs)
2066        # XXX thread unsafe!
2067        if len(self.ftpcache) > MAXFTPCACHE:
2068            # Prune the cache, rather arbitrarily
2069            for k in list(self.ftpcache):
2070                if k != key:
2071                    v = self.ftpcache[k]
2072                    del self.ftpcache[k]
2073                    v.close()
2074        try:
2075            if key not in self.ftpcache:
2076                self.ftpcache[key] = \
2077                    ftpwrapper(user, passwd, host, port, dirs)
2078            if not file: type = 'D'
2079            else: type = 'I'
2080            for attr in attrs:
2081                attr, value = _splitvalue(attr)
2082                if attr.lower() == 'type' and \
2083                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
2084                    type = value.upper()
2085            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
2086            mtype = mimetypes.guess_type("ftp:" + url)[0]
2087            headers = ""
2088            if mtype:
2089                headers += "Content-Type: %s\n" % mtype
2090            if retrlen is not None and retrlen >= 0:
2091                headers += "Content-Length: %d\n" % retrlen
2092            headers = email.message_from_string(headers)
2093            return addinfourl(fp, headers, "ftp:" + url)
2094        except ftperrors() as exp:
2095            raise URLError(f'ftp error: {exp}') from exp
2096
2097    def open_data(self, url, data=None):
2098        """Use "data" URL."""
2099        if not isinstance(url, str):
2100            raise URLError('data error: proxy support for data protocol currently not implemented')
2101        # ignore POSTed data
2102        #
2103        # syntax of data URLs:
2104        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
2105        # mediatype := [ type "/" subtype ] *( ";" parameter )
2106        # data      := *urlchar
2107        # parameter := attribute "=" value
2108        try:
2109            [type, data] = url.split(',', 1)
2110        except ValueError:
2111            raise OSError('data error', 'bad data URL')
2112        if not type:
2113            type = 'text/plain;charset=US-ASCII'
2114        semi = type.rfind(';')
2115        if semi >= 0 and '=' not in type[semi:]:
2116            encoding = type[semi+1:]
2117            type = type[:semi]
2118        else:
2119            encoding = ''
2120        msg = []
2121        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
2122                                            time.gmtime(time.time())))
2123        msg.append('Content-type: %s' % type)
2124        if encoding == 'base64':
2125            # XXX is this encoding/decoding ok?
2126            data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
2127        else:
2128            data = unquote(data)
2129        msg.append('Content-Length: %d' % len(data))
2130        msg.append('')
2131        msg.append(data)
2132        msg = '\n'.join(msg)
2133        headers = email.message_from_string(msg)
2134        f = io.StringIO(msg)
2135        #f.fileno = None     # needed for addinfourl
2136        return addinfourl(f, headers, url)
2137
2138
2139class FancyURLopener(URLopener):
2140    """Derived class with handlers for errors we can handle (perhaps)."""
2141
2142    def __init__(self, *args, **kwargs):
2143        URLopener.__init__(self, *args, **kwargs)
2144        self.auth_cache = {}
2145        self.tries = 0
2146        self.maxtries = 10
2147
2148    def http_error_default(self, url, fp, errcode, errmsg, headers):
2149        """Default error handling -- don't raise an exception."""
2150        return addinfourl(fp, headers, "http:" + url, errcode)
2151
2152    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2153        """Error 302 -- relocated (temporarily)."""
2154        self.tries += 1
2155        try:
2156            if self.maxtries and self.tries >= self.maxtries:
2157                if hasattr(self, "http_error_500"):
2158                    meth = self.http_error_500
2159                else:
2160                    meth = self.http_error_default
2161                return meth(url, fp, 500,
2162                            "Internal Server Error: Redirect Recursion",
2163                            headers)
2164            result = self.redirect_internal(url, fp, errcode, errmsg,
2165                                            headers, data)
2166            return result
2167        finally:
2168            self.tries = 0
2169
2170    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2171        if 'location' in headers:
2172            newurl = headers['location']
2173        elif 'uri' in headers:
2174            newurl = headers['uri']
2175        else:
2176            return
2177        fp.close()
2178
2179        # In case the server sent a relative URL, join with original:
2180        newurl = urljoin(self.type + ":" + url, newurl)
2181
2182        urlparts = urlparse(newurl)
2183
2184        # For security reasons, we don't allow redirection to anything other
2185        # than http, https and ftp.
2186
2187        # We are using newer HTTPError with older redirect_internal method
2188        # This older method will get deprecated in 3.3
2189
2190        if urlparts.scheme not in ('http', 'https', 'ftp', ''):
2191            raise HTTPError(newurl, errcode,
2192                            errmsg +
2193                            " Redirection to url '%s' is not allowed." % newurl,
2194                            headers, fp)
2195
2196        return self.open(newurl)
2197
2198    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2199        """Error 301 -- also relocated (permanently)."""
2200        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2201
2202    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2203        """Error 303 -- also relocated (essentially identical to 302)."""
2204        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2205
2206    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2207        """Error 307 -- relocated, but turn POST into error."""
2208        if data is None:
2209            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2210        else:
2211            return self.http_error_default(url, fp, errcode, errmsg, headers)
2212
2213    def http_error_308(self, url, fp, errcode, errmsg, headers, data=None):
2214        """Error 308 -- relocated, but turn POST into error."""
2215        if data is None:
2216            return self.http_error_301(url, fp, errcode, errmsg, headers, data)
2217        else:
2218            return self.http_error_default(url, fp, errcode, errmsg, headers)
2219
2220    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2221            retry=False):
2222        """Error 401 -- authentication required.
2223        This function supports Basic authentication only."""
2224        if 'www-authenticate' not in headers:
2225            URLopener.http_error_default(self, url, fp,
2226                                         errcode, errmsg, headers)
2227        stuff = headers['www-authenticate']
2228        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2229        if not match:
2230            URLopener.http_error_default(self, url, fp,
2231                                         errcode, errmsg, headers)
2232        scheme, realm = match.groups()
2233        if scheme.lower() != 'basic':
2234            URLopener.http_error_default(self, url, fp,
2235                                         errcode, errmsg, headers)
2236        if not retry:
2237            URLopener.http_error_default(self, url, fp, errcode, errmsg,
2238                    headers)
2239        name = 'retry_' + self.type + '_basic_auth'
2240        if data is None:
2241            return getattr(self,name)(url, realm)
2242        else:
2243            return getattr(self,name)(url, realm, data)
2244
2245    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2246            retry=False):
2247        """Error 407 -- proxy authentication required.
2248        This function supports Basic authentication only."""
2249        if 'proxy-authenticate' not in headers:
2250            URLopener.http_error_default(self, url, fp,
2251                                         errcode, errmsg, headers)
2252        stuff = headers['proxy-authenticate']
2253        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2254        if not match:
2255            URLopener.http_error_default(self, url, fp,
2256                                         errcode, errmsg, headers)
2257        scheme, realm = match.groups()
2258        if scheme.lower() != 'basic':
2259            URLopener.http_error_default(self, url, fp,
2260                                         errcode, errmsg, headers)
2261        if not retry:
2262            URLopener.http_error_default(self, url, fp, errcode, errmsg,
2263                    headers)
2264        name = 'retry_proxy_' + self.type + '_basic_auth'
2265        if data is None:
2266            return getattr(self,name)(url, realm)
2267        else:
2268            return getattr(self,name)(url, realm, data)
2269
2270    def retry_proxy_http_basic_auth(self, url, realm, data=None):
2271        host, selector = _splithost(url)
2272        newurl = 'http://' + host + selector
2273        proxy = self.proxies['http']
2274        urltype, proxyhost = _splittype(proxy)
2275        proxyhost, proxyselector = _splithost(proxyhost)
2276        i = proxyhost.find('@') + 1
2277        proxyhost = proxyhost[i:]
2278        user, passwd = self.get_user_passwd(proxyhost, realm, i)
2279        if not (user or passwd): return None
2280        proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2281                                  quote(passwd, safe=''), proxyhost)
2282        self.proxies['http'] = 'http://' + proxyhost + proxyselector
2283        if data is None:
2284            return self.open(newurl)
2285        else:
2286            return self.open(newurl, data)
2287
2288    def retry_proxy_https_basic_auth(self, url, realm, data=None):
2289        host, selector = _splithost(url)
2290        newurl = 'https://' + host + selector
2291        proxy = self.proxies['https']
2292        urltype, proxyhost = _splittype(proxy)
2293        proxyhost, proxyselector = _splithost(proxyhost)
2294        i = proxyhost.find('@') + 1
2295        proxyhost = proxyhost[i:]
2296        user, passwd = self.get_user_passwd(proxyhost, realm, i)
2297        if not (user or passwd): return None
2298        proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2299                                  quote(passwd, safe=''), proxyhost)
2300        self.proxies['https'] = 'https://' + proxyhost + proxyselector
2301        if data is None:
2302            return self.open(newurl)
2303        else:
2304            return self.open(newurl, data)
2305
2306    def retry_http_basic_auth(self, url, realm, data=None):
2307        host, selector = _splithost(url)
2308        i = host.find('@') + 1
2309        host = host[i:]
2310        user, passwd = self.get_user_passwd(host, realm, i)
2311        if not (user or passwd): return None
2312        host = "%s:%s@%s" % (quote(user, safe=''),
2313                             quote(passwd, safe=''), host)
2314        newurl = 'http://' + host + selector
2315        if data is None:
2316            return self.open(newurl)
2317        else:
2318            return self.open(newurl, data)
2319
2320    def retry_https_basic_auth(self, url, realm, data=None):
2321        host, selector = _splithost(url)
2322        i = host.find('@') + 1
2323        host = host[i:]
2324        user, passwd = self.get_user_passwd(host, realm, i)
2325        if not (user or passwd): return None
2326        host = "%s:%s@%s" % (quote(user, safe=''),
2327                             quote(passwd, safe=''), host)
2328        newurl = 'https://' + host + selector
2329        if data is None:
2330            return self.open(newurl)
2331        else:
2332            return self.open(newurl, data)
2333
2334    def get_user_passwd(self, host, realm, clear_cache=0):
2335        key = realm + '@' + host.lower()
2336        if key in self.auth_cache:
2337            if clear_cache:
2338                del self.auth_cache[key]
2339            else:
2340                return self.auth_cache[key]
2341        user, passwd = self.prompt_user_passwd(host, realm)
2342        if user or passwd: self.auth_cache[key] = (user, passwd)
2343        return user, passwd
2344
2345    def prompt_user_passwd(self, host, realm):
2346        """Override this in a GUI environment!"""
2347        import getpass
2348        try:
2349            user = input("Enter username for %s at %s: " % (realm, host))
2350            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2351                (user, realm, host))
2352            return user, passwd
2353        except KeyboardInterrupt:
2354            print()
2355            return None, None
2356
2357
2358# Utility functions
2359
2360_localhost = None
2361def localhost():
2362    """Return the IP address of the magic hostname 'localhost'."""
2363    global _localhost
2364    if _localhost is None:
2365        _localhost = socket.gethostbyname('localhost')
2366    return _localhost
2367
2368_thishost = None
2369def thishost():
2370    """Return the IP addresses of the current host."""
2371    global _thishost
2372    if _thishost is None:
2373        try:
2374            _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2375        except socket.gaierror:
2376            _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
2377    return _thishost
2378
2379_ftperrors = None
2380def ftperrors():
2381    """Return the set of errors raised by the FTP class."""
2382    global _ftperrors
2383    if _ftperrors is None:
2384        import ftplib
2385        _ftperrors = ftplib.all_errors
2386    return _ftperrors
2387
2388_noheaders = None
2389def noheaders():
2390    """Return an empty email Message object."""
2391    global _noheaders
2392    if _noheaders is None:
2393        _noheaders = email.message_from_string("")
2394    return _noheaders
2395
2396
2397# Utility classes
2398
2399class ftpwrapper:
2400    """Class used by open_ftp() for cache of open FTP connections."""
2401
2402    def __init__(self, user, passwd, host, port, dirs, timeout=None,
2403                 persistent=True):
2404        self.user = user
2405        self.passwd = passwd
2406        self.host = host
2407        self.port = port
2408        self.dirs = dirs
2409        self.timeout = timeout
2410        self.refcount = 0
2411        self.keepalive = persistent
2412        try:
2413            self.init()
2414        except:
2415            self.close()
2416            raise
2417
2418    def init(self):
2419        import ftplib
2420        self.busy = 0
2421        self.ftp = ftplib.FTP()
2422        self.ftp.connect(self.host, self.port, self.timeout)
2423        self.ftp.login(self.user, self.passwd)
2424        _target = '/'.join(self.dirs)
2425        self.ftp.cwd(_target)
2426
2427    def retrfile(self, file, type):
2428        import ftplib
2429        self.endtransfer()
2430        if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2431        else: cmd = 'TYPE ' + type; isdir = 0
2432        try:
2433            self.ftp.voidcmd(cmd)
2434        except ftplib.all_errors:
2435            self.init()
2436            self.ftp.voidcmd(cmd)
2437        conn = None
2438        if file and not isdir:
2439            # Try to retrieve as a file
2440            try:
2441                cmd = 'RETR ' + file
2442                conn, retrlen = self.ftp.ntransfercmd(cmd)
2443            except ftplib.error_perm as reason:
2444                if str(reason)[:3] != '550':
2445                    raise URLError(f'ftp error: {reason}') from reason
2446        if not conn:
2447            # Set transfer mode to ASCII!
2448            self.ftp.voidcmd('TYPE A')
2449            # Try a directory listing. Verify that directory exists.
2450            if file:
2451                pwd = self.ftp.pwd()
2452                try:
2453                    try:
2454                        self.ftp.cwd(file)
2455                    except ftplib.error_perm as reason:
2456                        raise URLError('ftp error: %r' % reason) from reason
2457                finally:
2458                    self.ftp.cwd(pwd)
2459                cmd = 'LIST ' + file
2460            else:
2461                cmd = 'LIST'
2462            conn, retrlen = self.ftp.ntransfercmd(cmd)
2463        self.busy = 1
2464
2465        ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2466        self.refcount += 1
2467        conn.close()
2468        # Pass back both a suitably decorated object and a retrieval length
2469        return (ftpobj, retrlen)
2470
2471    def endtransfer(self):
2472        if not self.busy:
2473            return
2474        self.busy = 0
2475        try:
2476            self.ftp.voidresp()
2477        except ftperrors():
2478            pass
2479
2480    def close(self):
2481        self.keepalive = False
2482        if self.refcount <= 0:
2483            self.real_close()
2484
2485    def file_close(self):
2486        self.endtransfer()
2487        self.refcount -= 1
2488        if self.refcount <= 0 and not self.keepalive:
2489            self.real_close()
2490
2491    def real_close(self):
2492        self.endtransfer()
2493        try:
2494            self.ftp.close()
2495        except ftperrors():
2496            pass
2497
2498# Proxy handling
2499def getproxies_environment():
2500    """Return a dictionary of scheme -> proxy server URL mappings.
2501
2502    Scan the environment for variables named <scheme>_proxy;
2503    this seems to be the standard convention.  If you need a
2504    different way, you can pass a proxies dictionary to the
2505    [Fancy]URLopener constructor.
2506
2507    """
2508    proxies = {}
2509    # in order to prefer lowercase variables, process environment in
2510    # two passes: first matches any, second pass matches lowercase only
2511    for name, value in os.environ.items():
2512        name = name.lower()
2513        if value and name[-6:] == '_proxy':
2514            proxies[name[:-6]] = value
2515    # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
2516    # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
2517    # header from the client
2518    # If "proxy" is lowercase, it will still be used thanks to the next block
2519    if 'REQUEST_METHOD' in os.environ:
2520        proxies.pop('http', None)
2521    for name, value in os.environ.items():
2522        if name[-6:] == '_proxy':
2523            name = name.lower()
2524            if value:
2525                proxies[name[:-6]] = value
2526            else:
2527                proxies.pop(name[:-6], None)
2528    return proxies
2529
2530def proxy_bypass_environment(host, proxies=None):
2531    """Test if proxies should not be used for a particular host.
2532
2533    Checks the proxy dict for the value of no_proxy, which should
2534    be a list of comma separated DNS suffixes, or '*' for all hosts.
2535
2536    """
2537    if proxies is None:
2538        proxies = getproxies_environment()
2539    # don't bypass, if no_proxy isn't specified
2540    try:
2541        no_proxy = proxies['no']
2542    except KeyError:
2543        return False
2544    # '*' is special case for always bypass
2545    if no_proxy == '*':
2546        return True
2547    host = host.lower()
2548    # strip port off host
2549    hostonly, port = _splitport(host)
2550    # check if the host ends with any of the DNS suffixes
2551    for name in no_proxy.split(','):
2552        name = name.strip()
2553        if name:
2554            name = name.lstrip('.')  # ignore leading dots
2555            name = name.lower()
2556            if hostonly == name or host == name:
2557                return True
2558            name = '.' + name
2559            if hostonly.endswith(name) or host.endswith(name):
2560                return True
2561    # otherwise, don't bypass
2562    return False
2563
2564
2565# This code tests an OSX specific data structure but is testable on all
2566# platforms
2567def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2568    """
2569    Return True iff this host shouldn't be accessed using a proxy
2570
2571    This function uses the MacOSX framework SystemConfiguration
2572    to fetch the proxy information.
2573
2574    proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2575    { 'exclude_simple': bool,
2576      'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2577    }
2578    """
2579    from fnmatch import fnmatch
2580
2581    hostonly, port = _splitport(host)
2582
2583    def ip2num(ipAddr):
2584        parts = ipAddr.split('.')
2585        parts = list(map(int, parts))
2586        if len(parts) != 4:
2587            parts = (parts + [0, 0, 0, 0])[:4]
2588        return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2589
2590    # Check for simple host names:
2591    if '.' not in host:
2592        if proxy_settings['exclude_simple']:
2593            return True
2594
2595    hostIP = None
2596
2597    for value in proxy_settings.get('exceptions', ()):
2598        # Items in the list are strings like these: *.local, 169.254/16
2599        if not value: continue
2600
2601        m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2602        if m is not None:
2603            if hostIP is None:
2604                try:
2605                    hostIP = socket.gethostbyname(hostonly)
2606                    hostIP = ip2num(hostIP)
2607                except OSError:
2608                    continue
2609
2610            base = ip2num(m.group(1))
2611            mask = m.group(2)
2612            if mask is None:
2613                mask = 8 * (m.group(1).count('.') + 1)
2614            else:
2615                mask = int(mask[1:])
2616
2617            if mask < 0 or mask > 32:
2618                # System libraries ignore invalid prefix lengths
2619                continue
2620
2621            mask = 32 - mask
2622
2623            if (hostIP >> mask) == (base >> mask):
2624                return True
2625
2626        elif fnmatch(host, value):
2627            return True
2628
2629    return False
2630
2631
2632if sys.platform == 'darwin':
2633    from _scproxy import _get_proxy_settings, _get_proxies
2634
2635    def proxy_bypass_macosx_sysconf(host):
2636        proxy_settings = _get_proxy_settings()
2637        return _proxy_bypass_macosx_sysconf(host, proxy_settings)
2638
2639    def getproxies_macosx_sysconf():
2640        """Return a dictionary of scheme -> proxy server URL mappings.
2641
2642        This function uses the MacOSX framework SystemConfiguration
2643        to fetch the proxy information.
2644        """
2645        return _get_proxies()
2646
2647
2648
2649    def proxy_bypass(host):
2650        """Return True, if host should be bypassed.
2651
2652        Checks proxy settings gathered from the environment, if specified,
2653        or from the MacOSX framework SystemConfiguration.
2654
2655        """
2656        proxies = getproxies_environment()
2657        if proxies:
2658            return proxy_bypass_environment(host, proxies)
2659        else:
2660            return proxy_bypass_macosx_sysconf(host)
2661
2662    def getproxies():
2663        return getproxies_environment() or getproxies_macosx_sysconf()
2664
2665
2666elif os.name == 'nt':
2667    def getproxies_registry():
2668        """Return a dictionary of scheme -> proxy server URL mappings.
2669
2670        Win32 uses the registry to store proxies.
2671
2672        """
2673        proxies = {}
2674        try:
2675            import winreg
2676        except ImportError:
2677            # Std module, so should be around - but you never know!
2678            return proxies
2679        try:
2680            internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2681                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2682            proxyEnable = winreg.QueryValueEx(internetSettings,
2683                                               'ProxyEnable')[0]
2684            if proxyEnable:
2685                # Returned as Unicode but problems if not converted to ASCII
2686                proxyServer = str(winreg.QueryValueEx(internetSettings,
2687                                                       'ProxyServer')[0])
2688                if '=' not in proxyServer and ';' not in proxyServer:
2689                    # Use one setting for all protocols.
2690                    proxyServer = 'http={0};https={0};ftp={0}'.format(proxyServer)
2691                for p in proxyServer.split(';'):
2692                    protocol, address = p.split('=', 1)
2693                    # See if address has a type:// prefix
2694                    if not re.match('(?:[^/:]+)://', address):
2695                        # Add type:// prefix to address without specifying type
2696                        if protocol in ('http', 'https', 'ftp'):
2697                            # The default proxy type of Windows is HTTP
2698                            address = 'http://' + address
2699                        elif protocol == 'socks':
2700                            address = 'socks://' + address
2701                    proxies[protocol] = address
2702                # Use SOCKS proxy for HTTP(S) protocols
2703                if proxies.get('socks'):
2704                    # The default SOCKS proxy type of Windows is SOCKS4
2705                    address = re.sub(r'^socks://', 'socks4://', proxies['socks'])
2706                    proxies['http'] = proxies.get('http') or address
2707                    proxies['https'] = proxies.get('https') or address
2708            internetSettings.Close()
2709        except (OSError, ValueError, TypeError):
2710            # Either registry key not found etc, or the value in an
2711            # unexpected format.
2712            # proxies already set up to be empty so nothing to do
2713            pass
2714        return proxies
2715
2716    def getproxies():
2717        """Return a dictionary of scheme -> proxy server URL mappings.
2718
2719        Returns settings gathered from the environment, if specified,
2720        or the registry.
2721
2722        """
2723        return getproxies_environment() or getproxies_registry()
2724
2725    def proxy_bypass_registry(host):
2726        try:
2727            import winreg
2728        except ImportError:
2729            # Std modules, so should be around - but you never know!
2730            return 0
2731        try:
2732            internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2733                r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2734            proxyEnable = winreg.QueryValueEx(internetSettings,
2735                                               'ProxyEnable')[0]
2736            proxyOverride = str(winreg.QueryValueEx(internetSettings,
2737                                                     'ProxyOverride')[0])
2738            # ^^^^ Returned as Unicode but problems if not converted to ASCII
2739        except OSError:
2740            return 0
2741        if not proxyEnable or not proxyOverride:
2742            return 0
2743        # try to make a host list from name and IP address.
2744        rawHost, port = _splitport(host)
2745        host = [rawHost]
2746        try:
2747            addr = socket.gethostbyname(rawHost)
2748            if addr != rawHost:
2749                host.append(addr)
2750        except OSError:
2751            pass
2752        try:
2753            fqdn = socket.getfqdn(rawHost)
2754            if fqdn != rawHost:
2755                host.append(fqdn)
2756        except OSError:
2757            pass
2758        # make a check value list from the registry entry: replace the
2759        # '<local>' string by the localhost entry and the corresponding
2760        # canonical entry.
2761        proxyOverride = proxyOverride.split(';')
2762        # now check if we match one of the registry values.
2763        for test in proxyOverride:
2764            if test == '<local>':
2765                if '.' not in rawHost:
2766                    return 1
2767            test = test.replace(".", r"\.")     # mask dots
2768            test = test.replace("*", r".*")     # change glob sequence
2769            test = test.replace("?", r".")      # change glob char
2770            for val in host:
2771                if re.match(test, val, re.I):
2772                    return 1
2773        return 0
2774
2775    def proxy_bypass(host):
2776        """Return True, if host should be bypassed.
2777
2778        Checks proxy settings gathered from the environment, if specified,
2779        or the registry.
2780
2781        """
2782        proxies = getproxies_environment()
2783        if proxies:
2784            return proxy_bypass_environment(host, proxies)
2785        else:
2786            return proxy_bypass_registry(host)
2787
2788else:
2789    # By default use environment variables
2790    getproxies = getproxies_environment
2791    proxy_bypass = proxy_bypass_environment
2792