1"""HTTP server classes.
2
3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see
4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,
5and CGIHTTPRequestHandler for CGI scripts.
6
7It does, however, optionally implement HTTP/1.1 persistent connections,
8as of version 0.3.
9
10Notes on CGIHTTPRequestHandler
11------------------------------
12
13This class implements GET and POST requests to cgi-bin scripts.
14
15If the os.fork() function is not present (e.g. on Windows),
16subprocess.Popen() is used as a fallback, with slightly altered semantics.
17
18In all cases, the implementation is intentionally naive -- all
19requests are executed synchronously.
20
21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
22-- it may execute arbitrary Python code or external programs.
23
24Note that status code 200 is sent prior to execution of a CGI script, so
25scripts cannot send other status codes such as 302 (redirect).
26
27XXX To do:
28
29- log requests even later (to capture byte count)
30- log user-agent header and other interesting goodies
31- send error log to separate file
32"""
33
34
35# See also:
36#
37# HTTP Working Group                                        T. Berners-Lee
38# INTERNET-DRAFT                                            R. T. Fielding
39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen
40# Expires September 8, 1995                                  March 8, 1995
41#
42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt
43#
44# and
45#
46# Network Working Group                                      R. Fielding
47# Request for Comments: 2616                                       et al
48# Obsoletes: 2068                                              June 1999
49# Category: Standards Track
50#
51# URL: http://www.faqs.org/rfcs/rfc2616.html
52
53# Log files
54# ---------
55#
56# Here's a quote from the NCSA httpd docs about log file format.
57#
58# | The logfile format is as follows. Each line consists of:
59# |
60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb
61# |
62# |        host: Either the DNS name or the IP number of the remote client
63# |        rfc931: Any information returned by identd for this person,
64# |                - otherwise.
65# |        authuser: If user sent a userid for authentication, the user name,
66# |                  - otherwise.
67# |        DD: Day
68# |        Mon: Month (calendar name)
69# |        YYYY: Year
70# |        hh: hour (24-hour format, the machine's timezone)
71# |        mm: minutes
72# |        ss: seconds
73# |        request: The first line of the HTTP request as sent by the client.
74# |        ddd: the status code returned by the server, - if not available.
75# |        bbbb: the total number of bytes sent,
76# |              *not including the HTTP/1.0 header*, - if not available
77# |
78# | You can determine the name of the file accessed through request.
79#
80# (Actually, the latter is only true if you know the server configuration
81# at the time the request was made!)
82
83__version__ = "0.6"
84
85__all__ = [
86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",
87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",
88]
89
90import copy
91import datetime
92import email.utils
93import html
94import http.client
95import io
96import itertools
97import mimetypes
98import os
99import posixpath
100import select
101import shutil
102import socket # For gethostbyaddr()
103import socketserver
104import sys
105import time
106import urllib.parse
107
108from http import HTTPStatus
109
110
111# Default error message template
112DEFAULT_ERROR_MESSAGE = """\
113<!DOCTYPE HTML>
114<html lang="en">
115    <head>
116        <meta charset="utf-8">
117        <title>Error response</title>
118    </head>
119    <body>
120        <h1>Error response</h1>
121        <p>Error code: %(code)d</p>
122        <p>Message: %(message)s.</p>
123        <p>Error code explanation: %(code)s - %(explain)s.</p>
124    </body>
125</html>
126"""
127
128DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"
129
130class HTTPServer(socketserver.TCPServer):
131
132    allow_reuse_address = 1    # Seems to make sense in testing environment
133
134    def server_bind(self):
135        """Override server_bind to store the server name."""
136        socketserver.TCPServer.server_bind(self)
137        host, port = self.server_address[:2]
138        self.server_name = socket.getfqdn(host)
139        self.server_port = port
140
141
142class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):
143    daemon_threads = True
144
145
146class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):
147
148    """HTTP request handler base class.
149
150    The following explanation of HTTP serves to guide you through the
151    code as well as to expose any misunderstandings I may have about
152    HTTP (so you don't need to read the code to figure out I'm wrong
153    :-).
154
155    HTTP (HyperText Transfer Protocol) is an extensible protocol on
156    top of a reliable stream transport (e.g. TCP/IP).  The protocol
157    recognizes three parts to a request:
158
159    1. One line identifying the request type and path
160    2. An optional set of RFC-822-style headers
161    3. An optional data part
162
163    The headers and data are separated by a blank line.
164
165    The first line of the request has the form
166
167    <command> <path> <version>
168
169    where <command> is a (case-sensitive) keyword such as GET or POST,
170    <path> is a string containing path information for the request,
171    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".
172    <path> is encoded using the URL encoding scheme (using %xx to signify
173    the ASCII character with hex code xx).
174
175    The specification specifies that lines are separated by CRLF but
176    for compatibility with the widest range of clients recommends
177    servers also handle LF.  Similarly, whitespace in the request line
178    is treated sensibly (allowing multiple spaces between components
179    and allowing trailing whitespace).
180
181    Similarly, for output, lines ought to be separated by CRLF pairs
182    but most clients grok LF characters just fine.
183
184    If the first line of the request has the form
185
186    <command> <path>
187
188    (i.e. <version> is left out) then this is assumed to be an HTTP
189    0.9 request; this form has no optional headers and data part and
190    the reply consists of just the data.
191
192    The reply form of the HTTP 1.x protocol again has three parts:
193
194    1. One line giving the response code
195    2. An optional set of RFC-822-style headers
196    3. The data
197
198    Again, the headers and data are separated by a blank line.
199
200    The response code line has the form
201
202    <version> <responsecode> <responsestring>
203
204    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),
205    <responsecode> is a 3-digit response code indicating success or
206    failure of the request, and <responsestring> is an optional
207    human-readable string explaining what the response code means.
208
209    This server parses the request and the headers, and then calls a
210    function specific to the request type (<command>).  Specifically,
211    a request SPAM will be handled by a method do_SPAM().  If no
212    such method exists the server sends an error response to the
213    client.  If it exists, it is called with no arguments:
214
215    do_SPAM()
216
217    Note that the request name is case sensitive (i.e. SPAM and spam
218    are different requests).
219
220    The various request details are stored in instance variables:
221
222    - client_address is the client IP address in the form (host,
223    port);
224
225    - command, path and version are the broken-down request line;
226
227    - headers is an instance of email.message.Message (or a derived
228    class) containing the header information;
229
230    - rfile is a file object open for reading positioned at the
231    start of the optional input data part;
232
233    - wfile is a file object open for writing.
234
235    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!
236
237    The first thing to be written must be the response line.  Then
238    follow 0 or more header lines, then a blank line, and then the
239    actual data (if any).  The meaning of the header lines depends on
240    the command executed by the server; in most cases, when data is
241    returned, there should be at least one header line of the form
242
243    Content-type: <type>/<subtype>
244
245    where <type> and <subtype> should be registered MIME types,
246    e.g. "text/html" or "text/plain".
247
248    """
249
250    # The Python system version, truncated to its first component.
251    sys_version = "Python/" + sys.version.split()[0]
252
253    # The server software version.  You may want to override this.
254    # The format is multiple whitespace-separated strings,
255    # where each string is of the form name[/version].
256    server_version = "BaseHTTP/" + __version__
257
258    error_message_format = DEFAULT_ERROR_MESSAGE
259    error_content_type = DEFAULT_ERROR_CONTENT_TYPE
260
261    # The default request version.  This only affects responses up until
262    # the point where the request line is parsed, so it mainly decides what
263    # the client gets back when sending a malformed request line.
264    # Most web servers default to HTTP 0.9, i.e. don't send a status line.
265    default_request_version = "HTTP/0.9"
266
267    def parse_request(self):
268        """Parse a request (internal).
269
270        The request should be stored in self.raw_requestline; the results
271        are in self.command, self.path, self.request_version and
272        self.headers.
273
274        Return True for success, False for failure; on failure, any relevant
275        error response has already been sent back.
276
277        """
278        self.command = None  # set in case of error on the first line
279        self.request_version = version = self.default_request_version
280        self.close_connection = True
281        requestline = str(self.raw_requestline, 'iso-8859-1')
282        requestline = requestline.rstrip('\r\n')
283        self.requestline = requestline
284        words = requestline.split()
285        if len(words) == 0:
286            return False
287
288        if len(words) >= 3:  # Enough to determine protocol version
289            version = words[-1]
290            try:
291                if not version.startswith('HTTP/'):
292                    raise ValueError
293                base_version_number = version.split('/', 1)[1]
294                version_number = base_version_number.split(".")
295                # RFC 2145 section 3.1 says there can be only one "." and
296                #   - major and minor numbers MUST be treated as
297                #      separate integers;
298                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in
299                #      turn is lower than HTTP/12.3;
300                #   - Leading zeros MUST be ignored by recipients.
301                if len(version_number) != 2:
302                    raise ValueError
303                if any(not component.isdigit() for component in version_number):
304                    raise ValueError("non digit in http version")
305                if any(len(component) > 10 for component in version_number):
306                    raise ValueError("unreasonable length http version")
307                version_number = int(version_number[0]), int(version_number[1])
308            except (ValueError, IndexError):
309                self.send_error(
310                    HTTPStatus.BAD_REQUEST,
311                    "Bad request version (%r)" % version)
312                return False
313            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":
314                self.close_connection = False
315            if version_number >= (2, 0):
316                self.send_error(
317                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,
318                    "Invalid HTTP version (%s)" % base_version_number)
319                return False
320            self.request_version = version
321
322        if not 2 <= len(words) <= 3:
323            self.send_error(
324                HTTPStatus.BAD_REQUEST,
325                "Bad request syntax (%r)" % requestline)
326            return False
327        command, path = words[:2]
328        if len(words) == 2:
329            self.close_connection = True
330            if command != 'GET':
331                self.send_error(
332                    HTTPStatus.BAD_REQUEST,
333                    "Bad HTTP/0.9 request type (%r)" % command)
334                return False
335        self.command, self.path = command, path
336
337        # gh-87389: The purpose of replacing '//' with '/' is to protect
338        # against open redirect attacks possibly triggered if the path starts
339        # with '//' because http clients treat //path as an absolute URI
340        # without scheme (similar to http://path) rather than a path.
341        if self.path.startswith('//'):
342            self.path = '/' + self.path.lstrip('/')  # Reduce to a single /
343
344        # Examine the headers and look for a Connection directive.
345        try:
346            self.headers = http.client.parse_headers(self.rfile,
347                                                     _class=self.MessageClass)
348        except http.client.LineTooLong as err:
349            self.send_error(
350                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
351                "Line too long",
352                str(err))
353            return False
354        except http.client.HTTPException as err:
355            self.send_error(
356                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,
357                "Too many headers",
358                str(err)
359            )
360            return False
361
362        conntype = self.headers.get('Connection', "")
363        if conntype.lower() == 'close':
364            self.close_connection = True
365        elif (conntype.lower() == 'keep-alive' and
366              self.protocol_version >= "HTTP/1.1"):
367            self.close_connection = False
368        # Examine the headers and look for an Expect directive
369        expect = self.headers.get('Expect', "")
370        if (expect.lower() == "100-continue" and
371                self.protocol_version >= "HTTP/1.1" and
372                self.request_version >= "HTTP/1.1"):
373            if not self.handle_expect_100():
374                return False
375        return True
376
377    def handle_expect_100(self):
378        """Decide what to do with an "Expect: 100-continue" header.
379
380        If the client is expecting a 100 Continue response, we must
381        respond with either a 100 Continue or a final response before
382        waiting for the request body. The default is to always respond
383        with a 100 Continue. You can behave differently (for example,
384        reject unauthorized requests) by overriding this method.
385
386        This method should either return True (possibly after sending
387        a 100 Continue response) or send an error response and return
388        False.
389
390        """
391        self.send_response_only(HTTPStatus.CONTINUE)
392        self.end_headers()
393        return True
394
395    def handle_one_request(self):
396        """Handle a single HTTP request.
397
398        You normally don't need to override this method; see the class
399        __doc__ string for information on how to handle specific HTTP
400        commands such as GET and POST.
401
402        """
403        try:
404            self.raw_requestline = self.rfile.readline(65537)
405            if len(self.raw_requestline) > 65536:
406                self.requestline = ''
407                self.request_version = ''
408                self.command = ''
409                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)
410                return
411            if not self.raw_requestline:
412                self.close_connection = True
413                return
414            if not self.parse_request():
415                # An error code has been sent, just exit
416                return
417            mname = 'do_' + self.command
418            if not hasattr(self, mname):
419                self.send_error(
420                    HTTPStatus.NOT_IMPLEMENTED,
421                    "Unsupported method (%r)" % self.command)
422                return
423            method = getattr(self, mname)
424            method()
425            self.wfile.flush() #actually send the response if not already done.
426        except TimeoutError as e:
427            #a read or a write timed out.  Discard this connection
428            self.log_error("Request timed out: %r", e)
429            self.close_connection = True
430            return
431
432    def handle(self):
433        """Handle multiple requests if necessary."""
434        self.close_connection = True
435
436        self.handle_one_request()
437        while not self.close_connection:
438            self.handle_one_request()
439
440    def send_error(self, code, message=None, explain=None):
441        """Send and log an error reply.
442
443        Arguments are
444        * code:    an HTTP error code
445                   3 digits
446        * message: a simple optional 1 line reason phrase.
447                   *( HTAB / SP / VCHAR / %x80-FF )
448                   defaults to short entry matching the response code
449        * explain: a detailed message defaults to the long entry
450                   matching the response code.
451
452        This sends an error response (so it must be called before any
453        output has been generated), logs the error, and finally sends
454        a piece of HTML explaining the error to the user.
455
456        """
457
458        try:
459            shortmsg, longmsg = self.responses[code]
460        except KeyError:
461            shortmsg, longmsg = '???', '???'
462        if message is None:
463            message = shortmsg
464        if explain is None:
465            explain = longmsg
466        self.log_error("code %d, message %s", code, message)
467        self.send_response(code, message)
468        self.send_header('Connection', 'close')
469
470        # Message body is omitted for cases described in:
471        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)
472        #  - RFC7231: 6.3.6. 205(Reset Content)
473        body = None
474        if (code >= 200 and
475            code not in (HTTPStatus.NO_CONTENT,
476                         HTTPStatus.RESET_CONTENT,
477                         HTTPStatus.NOT_MODIFIED)):
478            # HTML encode to prevent Cross Site Scripting attacks
479            # (see bug #1100201)
480            content = (self.error_message_format % {
481                'code': code,
482                'message': html.escape(message, quote=False),
483                'explain': html.escape(explain, quote=False)
484            })
485            body = content.encode('UTF-8', 'replace')
486            self.send_header("Content-Type", self.error_content_type)
487            self.send_header('Content-Length', str(len(body)))
488        self.end_headers()
489
490        if self.command != 'HEAD' and body:
491            self.wfile.write(body)
492
493    def send_response(self, code, message=None):
494        """Add the response header to the headers buffer and log the
495        response code.
496
497        Also send two standard headers with the server software
498        version and the current date.
499
500        """
501        self.log_request(code)
502        self.send_response_only(code, message)
503        self.send_header('Server', self.version_string())
504        self.send_header('Date', self.date_time_string())
505
506    def send_response_only(self, code, message=None):
507        """Send the response header only."""
508        if self.request_version != 'HTTP/0.9':
509            if message is None:
510                if code in self.responses:
511                    message = self.responses[code][0]
512                else:
513                    message = ''
514            if not hasattr(self, '_headers_buffer'):
515                self._headers_buffer = []
516            self._headers_buffer.append(("%s %d %s\r\n" %
517                    (self.protocol_version, code, message)).encode(
518                        'latin-1', 'strict'))
519
520    def send_header(self, keyword, value):
521        """Send a MIME header to the headers buffer."""
522        if self.request_version != 'HTTP/0.9':
523            if not hasattr(self, '_headers_buffer'):
524                self._headers_buffer = []
525            self._headers_buffer.append(
526                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))
527
528        if keyword.lower() == 'connection':
529            if value.lower() == 'close':
530                self.close_connection = True
531            elif value.lower() == 'keep-alive':
532                self.close_connection = False
533
534    def end_headers(self):
535        """Send the blank line ending the MIME headers."""
536        if self.request_version != 'HTTP/0.9':
537            self._headers_buffer.append(b"\r\n")
538            self.flush_headers()
539
540    def flush_headers(self):
541        if hasattr(self, '_headers_buffer'):
542            self.wfile.write(b"".join(self._headers_buffer))
543            self._headers_buffer = []
544
545    def log_request(self, code='-', size='-'):
546        """Log an accepted request.
547
548        This is called by send_response().
549
550        """
551        if isinstance(code, HTTPStatus):
552            code = code.value
553        self.log_message('"%s" %s %s',
554                         self.requestline, str(code), str(size))
555
556    def log_error(self, format, *args):
557        """Log an error.
558
559        This is called when a request cannot be fulfilled.  By
560        default it passes the message on to log_message().
561
562        Arguments are the same as for log_message().
563
564        XXX This should go to the separate error log.
565
566        """
567
568        self.log_message(format, *args)
569
570    # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes
571    _control_char_table = str.maketrans(
572            {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))})
573    _control_char_table[ord('\\')] = r'\\'
574
575    def log_message(self, format, *args):
576        """Log an arbitrary message.
577
578        This is used by all other logging functions.  Override
579        it if you have specific logging wishes.
580
581        The first argument, FORMAT, is a format string for the
582        message to be logged.  If the format string contains
583        any % escapes requiring parameters, they should be
584        specified as subsequent arguments (it's just like
585        printf!).
586
587        The client ip and current date/time are prefixed to
588        every message.
589
590        Unicode control characters are replaced with escaped hex
591        before writing the output to stderr.
592
593        """
594
595        message = format % args
596        sys.stderr.write("%s - - [%s] %s\n" %
597                         (self.address_string(),
598                          self.log_date_time_string(),
599                          message.translate(self._control_char_table)))
600
601    def version_string(self):
602        """Return the server software version string."""
603        return self.server_version + ' ' + self.sys_version
604
605    def date_time_string(self, timestamp=None):
606        """Return the current date and time formatted for a message header."""
607        if timestamp is None:
608            timestamp = time.time()
609        return email.utils.formatdate(timestamp, usegmt=True)
610
611    def log_date_time_string(self):
612        """Return the current time formatted for logging."""
613        now = time.time()
614        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)
615        s = "%02d/%3s/%04d %02d:%02d:%02d" % (
616                day, self.monthname[month], year, hh, mm, ss)
617        return s
618
619    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
620
621    monthname = [None,
622                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
623                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
624
625    def address_string(self):
626        """Return the client address."""
627
628        return self.client_address[0]
629
630    # Essentially static class variables
631
632    # The version of the HTTP protocol we support.
633    # Set this to HTTP/1.1 to enable automatic keepalive
634    protocol_version = "HTTP/1.0"
635
636    # MessageClass used to parse headers
637    MessageClass = http.client.HTTPMessage
638
639    # hack to maintain backwards compatibility
640    responses = {
641        v: (v.phrase, v.description)
642        for v in HTTPStatus.__members__.values()
643    }
644
645
646class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
647
648    """Simple HTTP request handler with GET and HEAD commands.
649
650    This serves files from the current directory and any of its
651    subdirectories.  The MIME type for files is determined by
652    calling the .guess_type() method.
653
654    The GET and HEAD requests are identical except that the HEAD
655    request omits the actual contents of the file.
656
657    """
658
659    server_version = "SimpleHTTP/" + __version__
660    extensions_map = _encodings_map_default = {
661        '.gz': 'application/gzip',
662        '.Z': 'application/octet-stream',
663        '.bz2': 'application/x-bzip2',
664        '.xz': 'application/x-xz',
665    }
666
667    def __init__(self, *args, directory=None, **kwargs):
668        if directory is None:
669            directory = os.getcwd()
670        self.directory = os.fspath(directory)
671        super().__init__(*args, **kwargs)
672
673    def do_GET(self):
674        """Serve a GET request."""
675        f = self.send_head()
676        if f:
677            try:
678                self.copyfile(f, self.wfile)
679            finally:
680                f.close()
681
682    def do_HEAD(self):
683        """Serve a HEAD request."""
684        f = self.send_head()
685        if f:
686            f.close()
687
688    def send_head(self):
689        """Common code for GET and HEAD commands.
690
691        This sends the response code and MIME headers.
692
693        Return value is either a file object (which has to be copied
694        to the outputfile by the caller unless the command was HEAD,
695        and must be closed by the caller under all circumstances), or
696        None, in which case the caller has nothing further to do.
697
698        """
699        path = self.translate_path(self.path)
700        f = None
701        if os.path.isdir(path):
702            parts = urllib.parse.urlsplit(self.path)
703            if not parts.path.endswith('/'):
704                # redirect browser - doing basically what apache does
705                self.send_response(HTTPStatus.MOVED_PERMANENTLY)
706                new_parts = (parts[0], parts[1], parts[2] + '/',
707                             parts[3], parts[4])
708                new_url = urllib.parse.urlunsplit(new_parts)
709                self.send_header("Location", new_url)
710                self.send_header("Content-Length", "0")
711                self.end_headers()
712                return None
713            for index in "index.html", "index.htm":
714                index = os.path.join(path, index)
715                if os.path.isfile(index):
716                    path = index
717                    break
718            else:
719                return self.list_directory(path)
720        ctype = self.guess_type(path)
721        # check for trailing "/" which should return 404. See Issue17324
722        # The test for this was added in test_httpserver.py
723        # However, some OS platforms accept a trailingSlash as a filename
724        # See discussion on python-dev and Issue34711 regarding
725        # parsing and rejection of filenames with a trailing slash
726        if path.endswith("/"):
727            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
728            return None
729        try:
730            f = open(path, 'rb')
731        except OSError:
732            self.send_error(HTTPStatus.NOT_FOUND, "File not found")
733            return None
734
735        try:
736            fs = os.fstat(f.fileno())
737            # Use browser cache if possible
738            if ("If-Modified-Since" in self.headers
739                    and "If-None-Match" not in self.headers):
740                # compare If-Modified-Since and time of last file modification
741                try:
742                    ims = email.utils.parsedate_to_datetime(
743                        self.headers["If-Modified-Since"])
744                except (TypeError, IndexError, OverflowError, ValueError):
745                    # ignore ill-formed values
746                    pass
747                else:
748                    if ims.tzinfo is None:
749                        # obsolete format with no timezone, cf.
750                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1
751                        ims = ims.replace(tzinfo=datetime.timezone.utc)
752                    if ims.tzinfo is datetime.timezone.utc:
753                        # compare to UTC datetime of last modification
754                        last_modif = datetime.datetime.fromtimestamp(
755                            fs.st_mtime, datetime.timezone.utc)
756                        # remove microseconds, like in If-Modified-Since
757                        last_modif = last_modif.replace(microsecond=0)
758
759                        if last_modif <= ims:
760                            self.send_response(HTTPStatus.NOT_MODIFIED)
761                            self.end_headers()
762                            f.close()
763                            return None
764
765            self.send_response(HTTPStatus.OK)
766            self.send_header("Content-type", ctype)
767            self.send_header("Content-Length", str(fs[6]))
768            self.send_header("Last-Modified",
769                self.date_time_string(fs.st_mtime))
770            self.end_headers()
771            return f
772        except:
773            f.close()
774            raise
775
776    def list_directory(self, path):
777        """Helper to produce a directory listing (absent index.html).
778
779        Return value is either a file object, or None (indicating an
780        error).  In either case, the headers are sent, making the
781        interface the same as for send_head().
782
783        """
784        try:
785            list = os.listdir(path)
786        except OSError:
787            self.send_error(
788                HTTPStatus.NOT_FOUND,
789                "No permission to list directory")
790            return None
791        list.sort(key=lambda a: a.lower())
792        r = []
793        try:
794            displaypath = urllib.parse.unquote(self.path,
795                                               errors='surrogatepass')
796        except UnicodeDecodeError:
797            displaypath = urllib.parse.unquote(self.path)
798        displaypath = html.escape(displaypath, quote=False)
799        enc = sys.getfilesystemencoding()
800        title = f'Directory listing for {displaypath}'
801        r.append('<!DOCTYPE HTML>')
802        r.append('<html lang="en">')
803        r.append('<head>')
804        r.append(f'<meta charset="{enc}">')
805        r.append(f'<title>{title}</title>\n</head>')
806        r.append(f'<body>\n<h1>{title}</h1>')
807        r.append('<hr>\n<ul>')
808        for name in list:
809            fullname = os.path.join(path, name)
810            displayname = linkname = name
811            # Append / for directories or @ for symbolic links
812            if os.path.isdir(fullname):
813                displayname = name + "/"
814                linkname = name + "/"
815            if os.path.islink(fullname):
816                displayname = name + "@"
817                # Note: a link to a directory displays with @ and links with /
818            r.append('<li><a href="%s">%s</a></li>'
819                    % (urllib.parse.quote(linkname,
820                                          errors='surrogatepass'),
821                       html.escape(displayname, quote=False)))
822        r.append('</ul>\n<hr>\n</body>\n</html>\n')
823        encoded = '\n'.join(r).encode(enc, 'surrogateescape')
824        f = io.BytesIO()
825        f.write(encoded)
826        f.seek(0)
827        self.send_response(HTTPStatus.OK)
828        self.send_header("Content-type", "text/html; charset=%s" % enc)
829        self.send_header("Content-Length", str(len(encoded)))
830        self.end_headers()
831        return f
832
833    def translate_path(self, path):
834        """Translate a /-separated PATH to the local filename syntax.
835
836        Components that mean special things to the local file system
837        (e.g. drive or directory names) are ignored.  (XXX They should
838        probably be diagnosed.)
839
840        """
841        # abandon query parameters
842        path = path.split('?',1)[0]
843        path = path.split('#',1)[0]
844        # Don't forget explicit trailing slash when normalizing. Issue17324
845        trailing_slash = path.rstrip().endswith('/')
846        try:
847            path = urllib.parse.unquote(path, errors='surrogatepass')
848        except UnicodeDecodeError:
849            path = urllib.parse.unquote(path)
850        path = posixpath.normpath(path)
851        words = path.split('/')
852        words = filter(None, words)
853        path = self.directory
854        for word in words:
855            if os.path.dirname(word) or word in (os.curdir, os.pardir):
856                # Ignore components that are not a simple file/directory name
857                continue
858            path = os.path.join(path, word)
859        if trailing_slash:
860            path += '/'
861        return path
862
863    def copyfile(self, source, outputfile):
864        """Copy all data between two file objects.
865
866        The SOURCE argument is a file object open for reading
867        (or anything with a read() method) and the DESTINATION
868        argument is a file object open for writing (or
869        anything with a write() method).
870
871        The only reason for overriding this would be to change
872        the block size or perhaps to replace newlines by CRLF
873        -- note however that this the default server uses this
874        to copy binary data as well.
875
876        """
877        shutil.copyfileobj(source, outputfile)
878
879    def guess_type(self, path):
880        """Guess the type of a file.
881
882        Argument is a PATH (a filename).
883
884        Return value is a string of the form type/subtype,
885        usable for a MIME Content-type header.
886
887        The default implementation looks the file's extension
888        up in the table self.extensions_map, using application/octet-stream
889        as a default; however it would be permissible (if
890        slow) to look inside the data to make a better guess.
891
892        """
893        base, ext = posixpath.splitext(path)
894        if ext in self.extensions_map:
895            return self.extensions_map[ext]
896        ext = ext.lower()
897        if ext in self.extensions_map:
898            return self.extensions_map[ext]
899        guess, _ = mimetypes.guess_type(path)
900        if guess:
901            return guess
902        return 'application/octet-stream'
903
904
905# Utilities for CGIHTTPRequestHandler
906
907def _url_collapse_path(path):
908    """
909    Given a URL path, remove extra '/'s and '.' path elements and collapse
910    any '..' references and returns a collapsed path.
911
912    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.
913    The utility of this function is limited to is_cgi method and helps
914    preventing some security attacks.
915
916    Returns: The reconstituted URL, which will always start with a '/'.
917
918    Raises: IndexError if too many '..' occur within the path.
919
920    """
921    # Query component should not be involved.
922    path, _, query = path.partition('?')
923    path = urllib.parse.unquote(path)
924
925    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
926    # path semantics rather than local operating system semantics.
927    path_parts = path.split('/')
928    head_parts = []
929    for part in path_parts[:-1]:
930        if part == '..':
931            head_parts.pop() # IndexError if more '..' than prior parts
932        elif part and part != '.':
933            head_parts.append( part )
934    if path_parts:
935        tail_part = path_parts.pop()
936        if tail_part:
937            if tail_part == '..':
938                head_parts.pop()
939                tail_part = ''
940            elif tail_part == '.':
941                tail_part = ''
942    else:
943        tail_part = ''
944
945    if query:
946        tail_part = '?'.join((tail_part, query))
947
948    splitpath = ('/' + '/'.join(head_parts), tail_part)
949    collapsed_path = "/".join(splitpath)
950
951    return collapsed_path
952
953
954
955nobody = None
956
957def nobody_uid():
958    """Internal routine to get nobody's uid"""
959    global nobody
960    if nobody:
961        return nobody
962    try:
963        import pwd
964    except ImportError:
965        return -1
966    try:
967        nobody = pwd.getpwnam('nobody')[2]
968    except KeyError:
969        nobody = 1 + max(x[2] for x in pwd.getpwall())
970    return nobody
971
972
973def executable(path):
974    """Test for executable file."""
975    return os.access(path, os.X_OK)
976
977
978class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):
979
980    """Complete HTTP server with GET, HEAD and POST commands.
981
982    GET and HEAD also support running CGI scripts.
983
984    The POST command is *only* implemented for CGI scripts.
985
986    """
987
988    # Determine platform specifics
989    have_fork = hasattr(os, 'fork')
990
991    # Make rfile unbuffered -- we need to read one line and then pass
992    # the rest to a subprocess, so we can't use buffered input.
993    rbufsize = 0
994
995    def do_POST(self):
996        """Serve a POST request.
997
998        This is only implemented for CGI scripts.
999
1000        """
1001
1002        if self.is_cgi():
1003            self.run_cgi()
1004        else:
1005            self.send_error(
1006                HTTPStatus.NOT_IMPLEMENTED,
1007                "Can only POST to CGI scripts")
1008
1009    def send_head(self):
1010        """Version of send_head that support CGI scripts"""
1011        if self.is_cgi():
1012            return self.run_cgi()
1013        else:
1014            return SimpleHTTPRequestHandler.send_head(self)
1015
1016    def is_cgi(self):
1017        """Test whether self.path corresponds to a CGI script.
1018
1019        Returns True and updates the cgi_info attribute to the tuple
1020        (dir, rest) if self.path requires running a CGI script.
1021        Returns False otherwise.
1022
1023        If any exception is raised, the caller should assume that
1024        self.path was rejected as invalid and act accordingly.
1025
1026        The default implementation tests whether the normalized url
1027        path begins with one of the strings in self.cgi_directories
1028        (and the next character is a '/' or the end of the string).
1029
1030        """
1031        collapsed_path = _url_collapse_path(self.path)
1032        dir_sep = collapsed_path.find('/', 1)
1033        while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:
1034            dir_sep = collapsed_path.find('/', dir_sep+1)
1035        if dir_sep > 0:
1036            head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
1037            self.cgi_info = head, tail
1038            return True
1039        return False
1040
1041
1042    cgi_directories = ['/cgi-bin', '/htbin']
1043
1044    def is_executable(self, path):
1045        """Test whether argument path is an executable file."""
1046        return executable(path)
1047
1048    def is_python(self, path):
1049        """Test whether argument path is a Python script."""
1050        head, tail = os.path.splitext(path)
1051        return tail.lower() in (".py", ".pyw")
1052
1053    def run_cgi(self):
1054        """Execute a CGI script."""
1055        dir, rest = self.cgi_info
1056        path = dir + '/' + rest
1057        i = path.find('/', len(dir)+1)
1058        while i >= 0:
1059            nextdir = path[:i]
1060            nextrest = path[i+1:]
1061
1062            scriptdir = self.translate_path(nextdir)
1063            if os.path.isdir(scriptdir):
1064                dir, rest = nextdir, nextrest
1065                i = path.find('/', len(dir)+1)
1066            else:
1067                break
1068
1069        # find an explicit query string, if present.
1070        rest, _, query = rest.partition('?')
1071
1072        # dissect the part after the directory name into a script name &
1073        # a possible additional path, to be stored in PATH_INFO.
1074        i = rest.find('/')
1075        if i >= 0:
1076            script, rest = rest[:i], rest[i:]
1077        else:
1078            script, rest = rest, ''
1079
1080        scriptname = dir + '/' + script
1081        scriptfile = self.translate_path(scriptname)
1082        if not os.path.exists(scriptfile):
1083            self.send_error(
1084                HTTPStatus.NOT_FOUND,
1085                "No such CGI script (%r)" % scriptname)
1086            return
1087        if not os.path.isfile(scriptfile):
1088            self.send_error(
1089                HTTPStatus.FORBIDDEN,
1090                "CGI script is not a plain file (%r)" % scriptname)
1091            return
1092        ispy = self.is_python(scriptname)
1093        if self.have_fork or not ispy:
1094            if not self.is_executable(scriptfile):
1095                self.send_error(
1096                    HTTPStatus.FORBIDDEN,
1097                    "CGI script is not executable (%r)" % scriptname)
1098                return
1099
1100        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
1101        # XXX Much of the following could be prepared ahead of time!
1102        env = copy.deepcopy(os.environ)
1103        env['SERVER_SOFTWARE'] = self.version_string()
1104        env['SERVER_NAME'] = self.server.server_name
1105        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
1106        env['SERVER_PROTOCOL'] = self.protocol_version
1107        env['SERVER_PORT'] = str(self.server.server_port)
1108        env['REQUEST_METHOD'] = self.command
1109        uqrest = urllib.parse.unquote(rest)
1110        env['PATH_INFO'] = uqrest
1111        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
1112        env['SCRIPT_NAME'] = scriptname
1113        env['QUERY_STRING'] = query
1114        env['REMOTE_ADDR'] = self.client_address[0]
1115        authorization = self.headers.get("authorization")
1116        if authorization:
1117            authorization = authorization.split()
1118            if len(authorization) == 2:
1119                import base64, binascii
1120                env['AUTH_TYPE'] = authorization[0]
1121                if authorization[0].lower() == "basic":
1122                    try:
1123                        authorization = authorization[1].encode('ascii')
1124                        authorization = base64.decodebytes(authorization).\
1125                                        decode('ascii')
1126                    except (binascii.Error, UnicodeError):
1127                        pass
1128                    else:
1129                        authorization = authorization.split(':')
1130                        if len(authorization) == 2:
1131                            env['REMOTE_USER'] = authorization[0]
1132        # XXX REMOTE_IDENT
1133        if self.headers.get('content-type') is None:
1134            env['CONTENT_TYPE'] = self.headers.get_content_type()
1135        else:
1136            env['CONTENT_TYPE'] = self.headers['content-type']
1137        length = self.headers.get('content-length')
1138        if length:
1139            env['CONTENT_LENGTH'] = length
1140        referer = self.headers.get('referer')
1141        if referer:
1142            env['HTTP_REFERER'] = referer
1143        accept = self.headers.get_all('accept', ())
1144        env['HTTP_ACCEPT'] = ','.join(accept)
1145        ua = self.headers.get('user-agent')
1146        if ua:
1147            env['HTTP_USER_AGENT'] = ua
1148        co = filter(None, self.headers.get_all('cookie', []))
1149        cookie_str = ', '.join(co)
1150        if cookie_str:
1151            env['HTTP_COOKIE'] = cookie_str
1152        # XXX Other HTTP_* headers
1153        # Since we're setting the env in the parent, provide empty
1154        # values to override previously set values
1155        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
1156                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
1157            env.setdefault(k, "")
1158
1159        self.send_response(HTTPStatus.OK, "Script output follows")
1160        self.flush_headers()
1161
1162        decoded_query = query.replace('+', ' ')
1163
1164        if self.have_fork:
1165            # Unix -- fork as we should
1166            args = [script]
1167            if '=' not in decoded_query:
1168                args.append(decoded_query)
1169            nobody = nobody_uid()
1170            self.wfile.flush() # Always flush before forking
1171            pid = os.fork()
1172            if pid != 0:
1173                # Parent
1174                pid, sts = os.waitpid(pid, 0)
1175                # throw away additional data [see bug #427345]
1176                while select.select([self.rfile], [], [], 0)[0]:
1177                    if not self.rfile.read(1):
1178                        break
1179                exitcode = os.waitstatus_to_exitcode(sts)
1180                if exitcode:
1181                    self.log_error(f"CGI script exit code {exitcode}")
1182                return
1183            # Child
1184            try:
1185                try:
1186                    os.setuid(nobody)
1187                except OSError:
1188                    pass
1189                os.dup2(self.rfile.fileno(), 0)
1190                os.dup2(self.wfile.fileno(), 1)
1191                os.execve(scriptfile, args, env)
1192            except:
1193                self.server.handle_error(self.request, self.client_address)
1194                os._exit(127)
1195
1196        else:
1197            # Non-Unix -- use subprocess
1198            import subprocess
1199            cmdline = [scriptfile]
1200            if self.is_python(scriptfile):
1201                interp = sys.executable
1202                if interp.lower().endswith("w.exe"):
1203                    # On Windows, use python.exe, not pythonw.exe
1204                    interp = interp[:-5] + interp[-4:]
1205                cmdline = [interp, '-u'] + cmdline
1206            if '=' not in query:
1207                cmdline.append(query)
1208            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
1209            try:
1210                nbytes = int(length)
1211            except (TypeError, ValueError):
1212                nbytes = 0
1213            p = subprocess.Popen(cmdline,
1214                                 stdin=subprocess.PIPE,
1215                                 stdout=subprocess.PIPE,
1216                                 stderr=subprocess.PIPE,
1217                                 env = env
1218                                 )
1219            if self.command.lower() == "post" and nbytes > 0:
1220                data = self.rfile.read(nbytes)
1221            else:
1222                data = None
1223            # throw away additional data [see bug #427345]
1224            while select.select([self.rfile._sock], [], [], 0)[0]:
1225                if not self.rfile._sock.recv(1):
1226                    break
1227            stdout, stderr = p.communicate(data)
1228            self.wfile.write(stdout)
1229            if stderr:
1230                self.log_error('%s', stderr)
1231            p.stderr.close()
1232            p.stdout.close()
1233            status = p.returncode
1234            if status:
1235                self.log_error("CGI script exit status %#x", status)
1236            else:
1237                self.log_message("CGI script exited OK")
1238
1239
1240def _get_best_family(*address):
1241    infos = socket.getaddrinfo(
1242        *address,
1243        type=socket.SOCK_STREAM,
1244        flags=socket.AI_PASSIVE,
1245    )
1246    family, type, proto, canonname, sockaddr = next(iter(infos))
1247    return family, sockaddr
1248
1249
1250def test(HandlerClass=BaseHTTPRequestHandler,
1251         ServerClass=ThreadingHTTPServer,
1252         protocol="HTTP/1.0", port=8000, bind=None):
1253    """Test the HTTP request handler class.
1254
1255    This runs an HTTP server on port 8000 (or the port argument).
1256
1257    """
1258    ServerClass.address_family, addr = _get_best_family(bind, port)
1259    HandlerClass.protocol_version = protocol
1260    with ServerClass(addr, HandlerClass) as httpd:
1261        host, port = httpd.socket.getsockname()[:2]
1262        url_host = f'[{host}]' if ':' in host else host
1263        print(
1264            f"Serving HTTP on {host} port {port} "
1265            f"(http://{url_host}:{port}/) ..."
1266        )
1267        try:
1268            httpd.serve_forever()
1269        except KeyboardInterrupt:
1270            print("\nKeyboard interrupt received, exiting.")
1271            sys.exit(0)
1272
1273if __name__ == '__main__':
1274    import argparse
1275    import contextlib
1276
1277    parser = argparse.ArgumentParser()
1278    parser.add_argument('--cgi', action='store_true',
1279                        help='run as CGI server')
1280    parser.add_argument('-b', '--bind', metavar='ADDRESS',
1281                        help='bind to this address '
1282                             '(default: all interfaces)')
1283    parser.add_argument('-d', '--directory', default=os.getcwd(),
1284                        help='serve this directory '
1285                             '(default: current directory)')
1286    parser.add_argument('-p', '--protocol', metavar='VERSION',
1287                        default='HTTP/1.0',
1288                        help='conform to this HTTP version '
1289                             '(default: %(default)s)')
1290    parser.add_argument('port', default=8000, type=int, nargs='?',
1291                        help='bind to this port '
1292                             '(default: %(default)s)')
1293    args = parser.parse_args()
1294    if args.cgi:
1295        handler_class = CGIHTTPRequestHandler
1296    else:
1297        handler_class = SimpleHTTPRequestHandler
1298
1299    # ensure dual-stack is not disabled; ref #38907
1300    class DualStackServer(ThreadingHTTPServer):
1301
1302        def server_bind(self):
1303            # suppress exception when protocol is IPv4
1304            with contextlib.suppress(Exception):
1305                self.socket.setsockopt(
1306                    socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)
1307            return super().server_bind()
1308
1309        def finish_request(self, request, client_address):
1310            self.RequestHandlerClass(request, client_address, self,
1311                                     directory=args.directory)
1312
1313    test(
1314        HandlerClass=handler_class,
1315        ServerClass=DualStackServer,
1316        port=args.port,
1317        bind=args.bind,
1318        protocol=args.protocol,
1319    )
1320