1"""HTTP server classes. 2 3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see 4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, 5and CGIHTTPRequestHandler for CGI scripts. 6 7It does, however, optionally implement HTTP/1.1 persistent connections, 8as of version 0.3. 9 10Notes on CGIHTTPRequestHandler 11------------------------------ 12 13This class implements GET and POST requests to cgi-bin scripts. 14 15If the os.fork() function is not present (e.g. on Windows), 16subprocess.Popen() is used as a fallback, with slightly altered semantics. 17 18In all cases, the implementation is intentionally naive -- all 19requests are executed synchronously. 20 21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL 22-- it may execute arbitrary Python code or external programs. 23 24Note that status code 200 is sent prior to execution of a CGI script, so 25scripts cannot send other status codes such as 302 (redirect). 26 27XXX To do: 28 29- log requests even later (to capture byte count) 30- log user-agent header and other interesting goodies 31- send error log to separate file 32""" 33 34 35# See also: 36# 37# HTTP Working Group T. Berners-Lee 38# INTERNET-DRAFT R. T. Fielding 39# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen 40# Expires September 8, 1995 March 8, 1995 41# 42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt 43# 44# and 45# 46# Network Working Group R. Fielding 47# Request for Comments: 2616 et al 48# Obsoletes: 2068 June 1999 49# Category: Standards Track 50# 51# URL: http://www.faqs.org/rfcs/rfc2616.html 52 53# Log files 54# --------- 55# 56# Here's a quote from the NCSA httpd docs about log file format. 57# 58# | The logfile format is as follows. Each line consists of: 59# | 60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb 61# | 62# | host: Either the DNS name or the IP number of the remote client 63# | rfc931: Any information returned by identd for this person, 64# | - otherwise. 65# | authuser: If user sent a userid for authentication, the user name, 66# | - otherwise. 67# | DD: Day 68# | Mon: Month (calendar name) 69# | YYYY: Year 70# | hh: hour (24-hour format, the machine's timezone) 71# | mm: minutes 72# | ss: seconds 73# | request: The first line of the HTTP request as sent by the client. 74# | ddd: the status code returned by the server, - if not available. 75# | bbbb: the total number of bytes sent, 76# | *not including the HTTP/1.0 header*, - if not available 77# | 78# | You can determine the name of the file accessed through request. 79# 80# (Actually, the latter is only true if you know the server configuration 81# at the time the request was made!) 82 83__version__ = "0.6" 84 85__all__ = [ 86 "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler", 87 "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", 88] 89 90import copy 91import datetime 92import email.utils 93import html 94import http.client 95import io 96import itertools 97import mimetypes 98import os 99import posixpath 100import select 101import shutil 102import socket # For gethostbyaddr() 103import socketserver 104import sys 105import time 106import urllib.parse 107 108from http import HTTPStatus 109 110 111# Default error message template 112DEFAULT_ERROR_MESSAGE = """\ 113<!DOCTYPE HTML> 114<html lang="en"> 115 <head> 116 <meta charset="utf-8"> 117 <title>Error response</title> 118 </head> 119 <body> 120 <h1>Error response</h1> 121 <p>Error code: %(code)d</p> 122 <p>Message: %(message)s.</p> 123 <p>Error code explanation: %(code)s - %(explain)s.</p> 124 </body> 125</html> 126""" 127 128DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" 129 130class HTTPServer(socketserver.TCPServer): 131 132 allow_reuse_address = 1 # Seems to make sense in testing environment 133 134 def server_bind(self): 135 """Override server_bind to store the server name.""" 136 socketserver.TCPServer.server_bind(self) 137 host, port = self.server_address[:2] 138 self.server_name = socket.getfqdn(host) 139 self.server_port = port 140 141 142class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer): 143 daemon_threads = True 144 145 146class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): 147 148 """HTTP request handler base class. 149 150 The following explanation of HTTP serves to guide you through the 151 code as well as to expose any misunderstandings I may have about 152 HTTP (so you don't need to read the code to figure out I'm wrong 153 :-). 154 155 HTTP (HyperText Transfer Protocol) is an extensible protocol on 156 top of a reliable stream transport (e.g. TCP/IP). The protocol 157 recognizes three parts to a request: 158 159 1. One line identifying the request type and path 160 2. An optional set of RFC-822-style headers 161 3. An optional data part 162 163 The headers and data are separated by a blank line. 164 165 The first line of the request has the form 166 167 <command> <path> <version> 168 169 where <command> is a (case-sensitive) keyword such as GET or POST, 170 <path> is a string containing path information for the request, 171 and <version> should be the string "HTTP/1.0" or "HTTP/1.1". 172 <path> is encoded using the URL encoding scheme (using %xx to signify 173 the ASCII character with hex code xx). 174 175 The specification specifies that lines are separated by CRLF but 176 for compatibility with the widest range of clients recommends 177 servers also handle LF. Similarly, whitespace in the request line 178 is treated sensibly (allowing multiple spaces between components 179 and allowing trailing whitespace). 180 181 Similarly, for output, lines ought to be separated by CRLF pairs 182 but most clients grok LF characters just fine. 183 184 If the first line of the request has the form 185 186 <command> <path> 187 188 (i.e. <version> is left out) then this is assumed to be an HTTP 189 0.9 request; this form has no optional headers and data part and 190 the reply consists of just the data. 191 192 The reply form of the HTTP 1.x protocol again has three parts: 193 194 1. One line giving the response code 195 2. An optional set of RFC-822-style headers 196 3. The data 197 198 Again, the headers and data are separated by a blank line. 199 200 The response code line has the form 201 202 <version> <responsecode> <responsestring> 203 204 where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), 205 <responsecode> is a 3-digit response code indicating success or 206 failure of the request, and <responsestring> is an optional 207 human-readable string explaining what the response code means. 208 209 This server parses the request and the headers, and then calls a 210 function specific to the request type (<command>). Specifically, 211 a request SPAM will be handled by a method do_SPAM(). If no 212 such method exists the server sends an error response to the 213 client. If it exists, it is called with no arguments: 214 215 do_SPAM() 216 217 Note that the request name is case sensitive (i.e. SPAM and spam 218 are different requests). 219 220 The various request details are stored in instance variables: 221 222 - client_address is the client IP address in the form (host, 223 port); 224 225 - command, path and version are the broken-down request line; 226 227 - headers is an instance of email.message.Message (or a derived 228 class) containing the header information; 229 230 - rfile is a file object open for reading positioned at the 231 start of the optional input data part; 232 233 - wfile is a file object open for writing. 234 235 IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! 236 237 The first thing to be written must be the response line. Then 238 follow 0 or more header lines, then a blank line, and then the 239 actual data (if any). The meaning of the header lines depends on 240 the command executed by the server; in most cases, when data is 241 returned, there should be at least one header line of the form 242 243 Content-type: <type>/<subtype> 244 245 where <type> and <subtype> should be registered MIME types, 246 e.g. "text/html" or "text/plain". 247 248 """ 249 250 # The Python system version, truncated to its first component. 251 sys_version = "Python/" + sys.version.split()[0] 252 253 # The server software version. You may want to override this. 254 # The format is multiple whitespace-separated strings, 255 # where each string is of the form name[/version]. 256 server_version = "BaseHTTP/" + __version__ 257 258 error_message_format = DEFAULT_ERROR_MESSAGE 259 error_content_type = DEFAULT_ERROR_CONTENT_TYPE 260 261 # The default request version. This only affects responses up until 262 # the point where the request line is parsed, so it mainly decides what 263 # the client gets back when sending a malformed request line. 264 # Most web servers default to HTTP 0.9, i.e. don't send a status line. 265 default_request_version = "HTTP/0.9" 266 267 def parse_request(self): 268 """Parse a request (internal). 269 270 The request should be stored in self.raw_requestline; the results 271 are in self.command, self.path, self.request_version and 272 self.headers. 273 274 Return True for success, False for failure; on failure, any relevant 275 error response has already been sent back. 276 277 """ 278 self.command = None # set in case of error on the first line 279 self.request_version = version = self.default_request_version 280 self.close_connection = True 281 requestline = str(self.raw_requestline, 'iso-8859-1') 282 requestline = requestline.rstrip('\r\n') 283 self.requestline = requestline 284 words = requestline.split() 285 if len(words) == 0: 286 return False 287 288 if len(words) >= 3: # Enough to determine protocol version 289 version = words[-1] 290 try: 291 if not version.startswith('HTTP/'): 292 raise ValueError 293 base_version_number = version.split('/', 1)[1] 294 version_number = base_version_number.split(".") 295 # RFC 2145 section 3.1 says there can be only one "." and 296 # - major and minor numbers MUST be treated as 297 # separate integers; 298 # - HTTP/2.4 is a lower version than HTTP/2.13, which in 299 # turn is lower than HTTP/12.3; 300 # - Leading zeros MUST be ignored by recipients. 301 if len(version_number) != 2: 302 raise ValueError 303 if any(not component.isdigit() for component in version_number): 304 raise ValueError("non digit in http version") 305 if any(len(component) > 10 for component in version_number): 306 raise ValueError("unreasonable length http version") 307 version_number = int(version_number[0]), int(version_number[1]) 308 except (ValueError, IndexError): 309 self.send_error( 310 HTTPStatus.BAD_REQUEST, 311 "Bad request version (%r)" % version) 312 return False 313 if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": 314 self.close_connection = False 315 if version_number >= (2, 0): 316 self.send_error( 317 HTTPStatus.HTTP_VERSION_NOT_SUPPORTED, 318 "Invalid HTTP version (%s)" % base_version_number) 319 return False 320 self.request_version = version 321 322 if not 2 <= len(words) <= 3: 323 self.send_error( 324 HTTPStatus.BAD_REQUEST, 325 "Bad request syntax (%r)" % requestline) 326 return False 327 command, path = words[:2] 328 if len(words) == 2: 329 self.close_connection = True 330 if command != 'GET': 331 self.send_error( 332 HTTPStatus.BAD_REQUEST, 333 "Bad HTTP/0.9 request type (%r)" % command) 334 return False 335 self.command, self.path = command, path 336 337 # gh-87389: The purpose of replacing '//' with '/' is to protect 338 # against open redirect attacks possibly triggered if the path starts 339 # with '//' because http clients treat //path as an absolute URI 340 # without scheme (similar to http://path) rather than a path. 341 if self.path.startswith('//'): 342 self.path = '/' + self.path.lstrip('/') # Reduce to a single / 343 344 # Examine the headers and look for a Connection directive. 345 try: 346 self.headers = http.client.parse_headers(self.rfile, 347 _class=self.MessageClass) 348 except http.client.LineTooLong as err: 349 self.send_error( 350 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 351 "Line too long", 352 str(err)) 353 return False 354 except http.client.HTTPException as err: 355 self.send_error( 356 HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE, 357 "Too many headers", 358 str(err) 359 ) 360 return False 361 362 conntype = self.headers.get('Connection', "") 363 if conntype.lower() == 'close': 364 self.close_connection = True 365 elif (conntype.lower() == 'keep-alive' and 366 self.protocol_version >= "HTTP/1.1"): 367 self.close_connection = False 368 # Examine the headers and look for an Expect directive 369 expect = self.headers.get('Expect', "") 370 if (expect.lower() == "100-continue" and 371 self.protocol_version >= "HTTP/1.1" and 372 self.request_version >= "HTTP/1.1"): 373 if not self.handle_expect_100(): 374 return False 375 return True 376 377 def handle_expect_100(self): 378 """Decide what to do with an "Expect: 100-continue" header. 379 380 If the client is expecting a 100 Continue response, we must 381 respond with either a 100 Continue or a final response before 382 waiting for the request body. The default is to always respond 383 with a 100 Continue. You can behave differently (for example, 384 reject unauthorized requests) by overriding this method. 385 386 This method should either return True (possibly after sending 387 a 100 Continue response) or send an error response and return 388 False. 389 390 """ 391 self.send_response_only(HTTPStatus.CONTINUE) 392 self.end_headers() 393 return True 394 395 def handle_one_request(self): 396 """Handle a single HTTP request. 397 398 You normally don't need to override this method; see the class 399 __doc__ string for information on how to handle specific HTTP 400 commands such as GET and POST. 401 402 """ 403 try: 404 self.raw_requestline = self.rfile.readline(65537) 405 if len(self.raw_requestline) > 65536: 406 self.requestline = '' 407 self.request_version = '' 408 self.command = '' 409 self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG) 410 return 411 if not self.raw_requestline: 412 self.close_connection = True 413 return 414 if not self.parse_request(): 415 # An error code has been sent, just exit 416 return 417 mname = 'do_' + self.command 418 if not hasattr(self, mname): 419 self.send_error( 420 HTTPStatus.NOT_IMPLEMENTED, 421 "Unsupported method (%r)" % self.command) 422 return 423 method = getattr(self, mname) 424 method() 425 self.wfile.flush() #actually send the response if not already done. 426 except TimeoutError as e: 427 #a read or a write timed out. Discard this connection 428 self.log_error("Request timed out: %r", e) 429 self.close_connection = True 430 return 431 432 def handle(self): 433 """Handle multiple requests if necessary.""" 434 self.close_connection = True 435 436 self.handle_one_request() 437 while not self.close_connection: 438 self.handle_one_request() 439 440 def send_error(self, code, message=None, explain=None): 441 """Send and log an error reply. 442 443 Arguments are 444 * code: an HTTP error code 445 3 digits 446 * message: a simple optional 1 line reason phrase. 447 *( HTAB / SP / VCHAR / %x80-FF ) 448 defaults to short entry matching the response code 449 * explain: a detailed message defaults to the long entry 450 matching the response code. 451 452 This sends an error response (so it must be called before any 453 output has been generated), logs the error, and finally sends 454 a piece of HTML explaining the error to the user. 455 456 """ 457 458 try: 459 shortmsg, longmsg = self.responses[code] 460 except KeyError: 461 shortmsg, longmsg = '???', '???' 462 if message is None: 463 message = shortmsg 464 if explain is None: 465 explain = longmsg 466 self.log_error("code %d, message %s", code, message) 467 self.send_response(code, message) 468 self.send_header('Connection', 'close') 469 470 # Message body is omitted for cases described in: 471 # - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified) 472 # - RFC7231: 6.3.6. 205(Reset Content) 473 body = None 474 if (code >= 200 and 475 code not in (HTTPStatus.NO_CONTENT, 476 HTTPStatus.RESET_CONTENT, 477 HTTPStatus.NOT_MODIFIED)): 478 # HTML encode to prevent Cross Site Scripting attacks 479 # (see bug #1100201) 480 content = (self.error_message_format % { 481 'code': code, 482 'message': html.escape(message, quote=False), 483 'explain': html.escape(explain, quote=False) 484 }) 485 body = content.encode('UTF-8', 'replace') 486 self.send_header("Content-Type", self.error_content_type) 487 self.send_header('Content-Length', str(len(body))) 488 self.end_headers() 489 490 if self.command != 'HEAD' and body: 491 self.wfile.write(body) 492 493 def send_response(self, code, message=None): 494 """Add the response header to the headers buffer and log the 495 response code. 496 497 Also send two standard headers with the server software 498 version and the current date. 499 500 """ 501 self.log_request(code) 502 self.send_response_only(code, message) 503 self.send_header('Server', self.version_string()) 504 self.send_header('Date', self.date_time_string()) 505 506 def send_response_only(self, code, message=None): 507 """Send the response header only.""" 508 if self.request_version != 'HTTP/0.9': 509 if message is None: 510 if code in self.responses: 511 message = self.responses[code][0] 512 else: 513 message = '' 514 if not hasattr(self, '_headers_buffer'): 515 self._headers_buffer = [] 516 self._headers_buffer.append(("%s %d %s\r\n" % 517 (self.protocol_version, code, message)).encode( 518 'latin-1', 'strict')) 519 520 def send_header(self, keyword, value): 521 """Send a MIME header to the headers buffer.""" 522 if self.request_version != 'HTTP/0.9': 523 if not hasattr(self, '_headers_buffer'): 524 self._headers_buffer = [] 525 self._headers_buffer.append( 526 ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) 527 528 if keyword.lower() == 'connection': 529 if value.lower() == 'close': 530 self.close_connection = True 531 elif value.lower() == 'keep-alive': 532 self.close_connection = False 533 534 def end_headers(self): 535 """Send the blank line ending the MIME headers.""" 536 if self.request_version != 'HTTP/0.9': 537 self._headers_buffer.append(b"\r\n") 538 self.flush_headers() 539 540 def flush_headers(self): 541 if hasattr(self, '_headers_buffer'): 542 self.wfile.write(b"".join(self._headers_buffer)) 543 self._headers_buffer = [] 544 545 def log_request(self, code='-', size='-'): 546 """Log an accepted request. 547 548 This is called by send_response(). 549 550 """ 551 if isinstance(code, HTTPStatus): 552 code = code.value 553 self.log_message('"%s" %s %s', 554 self.requestline, str(code), str(size)) 555 556 def log_error(self, format, *args): 557 """Log an error. 558 559 This is called when a request cannot be fulfilled. By 560 default it passes the message on to log_message(). 561 562 Arguments are the same as for log_message(). 563 564 XXX This should go to the separate error log. 565 566 """ 567 568 self.log_message(format, *args) 569 570 # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes 571 _control_char_table = str.maketrans( 572 {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))}) 573 _control_char_table[ord('\\')] = r'\\' 574 575 def log_message(self, format, *args): 576 """Log an arbitrary message. 577 578 This is used by all other logging functions. Override 579 it if you have specific logging wishes. 580 581 The first argument, FORMAT, is a format string for the 582 message to be logged. If the format string contains 583 any % escapes requiring parameters, they should be 584 specified as subsequent arguments (it's just like 585 printf!). 586 587 The client ip and current date/time are prefixed to 588 every message. 589 590 Unicode control characters are replaced with escaped hex 591 before writing the output to stderr. 592 593 """ 594 595 message = format % args 596 sys.stderr.write("%s - - [%s] %s\n" % 597 (self.address_string(), 598 self.log_date_time_string(), 599 message.translate(self._control_char_table))) 600 601 def version_string(self): 602 """Return the server software version string.""" 603 return self.server_version + ' ' + self.sys_version 604 605 def date_time_string(self, timestamp=None): 606 """Return the current date and time formatted for a message header.""" 607 if timestamp is None: 608 timestamp = time.time() 609 return email.utils.formatdate(timestamp, usegmt=True) 610 611 def log_date_time_string(self): 612 """Return the current time formatted for logging.""" 613 now = time.time() 614 year, month, day, hh, mm, ss, x, y, z = time.localtime(now) 615 s = "%02d/%3s/%04d %02d:%02d:%02d" % ( 616 day, self.monthname[month], year, hh, mm, ss) 617 return s 618 619 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 620 621 monthname = [None, 622 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 623 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 624 625 def address_string(self): 626 """Return the client address.""" 627 628 return self.client_address[0] 629 630 # Essentially static class variables 631 632 # The version of the HTTP protocol we support. 633 # Set this to HTTP/1.1 to enable automatic keepalive 634 protocol_version = "HTTP/1.0" 635 636 # MessageClass used to parse headers 637 MessageClass = http.client.HTTPMessage 638 639 # hack to maintain backwards compatibility 640 responses = { 641 v: (v.phrase, v.description) 642 for v in HTTPStatus.__members__.values() 643 } 644 645 646class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): 647 648 """Simple HTTP request handler with GET and HEAD commands. 649 650 This serves files from the current directory and any of its 651 subdirectories. The MIME type for files is determined by 652 calling the .guess_type() method. 653 654 The GET and HEAD requests are identical except that the HEAD 655 request omits the actual contents of the file. 656 657 """ 658 659 server_version = "SimpleHTTP/" + __version__ 660 extensions_map = _encodings_map_default = { 661 '.gz': 'application/gzip', 662 '.Z': 'application/octet-stream', 663 '.bz2': 'application/x-bzip2', 664 '.xz': 'application/x-xz', 665 } 666 667 def __init__(self, *args, directory=None, **kwargs): 668 if directory is None: 669 directory = os.getcwd() 670 self.directory = os.fspath(directory) 671 super().__init__(*args, **kwargs) 672 673 def do_GET(self): 674 """Serve a GET request.""" 675 f = self.send_head() 676 if f: 677 try: 678 self.copyfile(f, self.wfile) 679 finally: 680 f.close() 681 682 def do_HEAD(self): 683 """Serve a HEAD request.""" 684 f = self.send_head() 685 if f: 686 f.close() 687 688 def send_head(self): 689 """Common code for GET and HEAD commands. 690 691 This sends the response code and MIME headers. 692 693 Return value is either a file object (which has to be copied 694 to the outputfile by the caller unless the command was HEAD, 695 and must be closed by the caller under all circumstances), or 696 None, in which case the caller has nothing further to do. 697 698 """ 699 path = self.translate_path(self.path) 700 f = None 701 if os.path.isdir(path): 702 parts = urllib.parse.urlsplit(self.path) 703 if not parts.path.endswith('/'): 704 # redirect browser - doing basically what apache does 705 self.send_response(HTTPStatus.MOVED_PERMANENTLY) 706 new_parts = (parts[0], parts[1], parts[2] + '/', 707 parts[3], parts[4]) 708 new_url = urllib.parse.urlunsplit(new_parts) 709 self.send_header("Location", new_url) 710 self.send_header("Content-Length", "0") 711 self.end_headers() 712 return None 713 for index in "index.html", "index.htm": 714 index = os.path.join(path, index) 715 if os.path.isfile(index): 716 path = index 717 break 718 else: 719 return self.list_directory(path) 720 ctype = self.guess_type(path) 721 # check for trailing "/" which should return 404. See Issue17324 722 # The test for this was added in test_httpserver.py 723 # However, some OS platforms accept a trailingSlash as a filename 724 # See discussion on python-dev and Issue34711 regarding 725 # parsing and rejection of filenames with a trailing slash 726 if path.endswith("/"): 727 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 728 return None 729 try: 730 f = open(path, 'rb') 731 except OSError: 732 self.send_error(HTTPStatus.NOT_FOUND, "File not found") 733 return None 734 735 try: 736 fs = os.fstat(f.fileno()) 737 # Use browser cache if possible 738 if ("If-Modified-Since" in self.headers 739 and "If-None-Match" not in self.headers): 740 # compare If-Modified-Since and time of last file modification 741 try: 742 ims = email.utils.parsedate_to_datetime( 743 self.headers["If-Modified-Since"]) 744 except (TypeError, IndexError, OverflowError, ValueError): 745 # ignore ill-formed values 746 pass 747 else: 748 if ims.tzinfo is None: 749 # obsolete format with no timezone, cf. 750 # https://tools.ietf.org/html/rfc7231#section-7.1.1.1 751 ims = ims.replace(tzinfo=datetime.timezone.utc) 752 if ims.tzinfo is datetime.timezone.utc: 753 # compare to UTC datetime of last modification 754 last_modif = datetime.datetime.fromtimestamp( 755 fs.st_mtime, datetime.timezone.utc) 756 # remove microseconds, like in If-Modified-Since 757 last_modif = last_modif.replace(microsecond=0) 758 759 if last_modif <= ims: 760 self.send_response(HTTPStatus.NOT_MODIFIED) 761 self.end_headers() 762 f.close() 763 return None 764 765 self.send_response(HTTPStatus.OK) 766 self.send_header("Content-type", ctype) 767 self.send_header("Content-Length", str(fs[6])) 768 self.send_header("Last-Modified", 769 self.date_time_string(fs.st_mtime)) 770 self.end_headers() 771 return f 772 except: 773 f.close() 774 raise 775 776 def list_directory(self, path): 777 """Helper to produce a directory listing (absent index.html). 778 779 Return value is either a file object, or None (indicating an 780 error). In either case, the headers are sent, making the 781 interface the same as for send_head(). 782 783 """ 784 try: 785 list = os.listdir(path) 786 except OSError: 787 self.send_error( 788 HTTPStatus.NOT_FOUND, 789 "No permission to list directory") 790 return None 791 list.sort(key=lambda a: a.lower()) 792 r = [] 793 try: 794 displaypath = urllib.parse.unquote(self.path, 795 errors='surrogatepass') 796 except UnicodeDecodeError: 797 displaypath = urllib.parse.unquote(self.path) 798 displaypath = html.escape(displaypath, quote=False) 799 enc = sys.getfilesystemencoding() 800 title = f'Directory listing for {displaypath}' 801 r.append('<!DOCTYPE HTML>') 802 r.append('<html lang="en">') 803 r.append('<head>') 804 r.append(f'<meta charset="{enc}">') 805 r.append(f'<title>{title}</title>\n</head>') 806 r.append(f'<body>\n<h1>{title}</h1>') 807 r.append('<hr>\n<ul>') 808 for name in list: 809 fullname = os.path.join(path, name) 810 displayname = linkname = name 811 # Append / for directories or @ for symbolic links 812 if os.path.isdir(fullname): 813 displayname = name + "/" 814 linkname = name + "/" 815 if os.path.islink(fullname): 816 displayname = name + "@" 817 # Note: a link to a directory displays with @ and links with / 818 r.append('<li><a href="%s">%s</a></li>' 819 % (urllib.parse.quote(linkname, 820 errors='surrogatepass'), 821 html.escape(displayname, quote=False))) 822 r.append('</ul>\n<hr>\n</body>\n</html>\n') 823 encoded = '\n'.join(r).encode(enc, 'surrogateescape') 824 f = io.BytesIO() 825 f.write(encoded) 826 f.seek(0) 827 self.send_response(HTTPStatus.OK) 828 self.send_header("Content-type", "text/html; charset=%s" % enc) 829 self.send_header("Content-Length", str(len(encoded))) 830 self.end_headers() 831 return f 832 833 def translate_path(self, path): 834 """Translate a /-separated PATH to the local filename syntax. 835 836 Components that mean special things to the local file system 837 (e.g. drive or directory names) are ignored. (XXX They should 838 probably be diagnosed.) 839 840 """ 841 # abandon query parameters 842 path = path.split('?',1)[0] 843 path = path.split('#',1)[0] 844 # Don't forget explicit trailing slash when normalizing. Issue17324 845 trailing_slash = path.rstrip().endswith('/') 846 try: 847 path = urllib.parse.unquote(path, errors='surrogatepass') 848 except UnicodeDecodeError: 849 path = urllib.parse.unquote(path) 850 path = posixpath.normpath(path) 851 words = path.split('/') 852 words = filter(None, words) 853 path = self.directory 854 for word in words: 855 if os.path.dirname(word) or word in (os.curdir, os.pardir): 856 # Ignore components that are not a simple file/directory name 857 continue 858 path = os.path.join(path, word) 859 if trailing_slash: 860 path += '/' 861 return path 862 863 def copyfile(self, source, outputfile): 864 """Copy all data between two file objects. 865 866 The SOURCE argument is a file object open for reading 867 (or anything with a read() method) and the DESTINATION 868 argument is a file object open for writing (or 869 anything with a write() method). 870 871 The only reason for overriding this would be to change 872 the block size or perhaps to replace newlines by CRLF 873 -- note however that this the default server uses this 874 to copy binary data as well. 875 876 """ 877 shutil.copyfileobj(source, outputfile) 878 879 def guess_type(self, path): 880 """Guess the type of a file. 881 882 Argument is a PATH (a filename). 883 884 Return value is a string of the form type/subtype, 885 usable for a MIME Content-type header. 886 887 The default implementation looks the file's extension 888 up in the table self.extensions_map, using application/octet-stream 889 as a default; however it would be permissible (if 890 slow) to look inside the data to make a better guess. 891 892 """ 893 base, ext = posixpath.splitext(path) 894 if ext in self.extensions_map: 895 return self.extensions_map[ext] 896 ext = ext.lower() 897 if ext in self.extensions_map: 898 return self.extensions_map[ext] 899 guess, _ = mimetypes.guess_type(path) 900 if guess: 901 return guess 902 return 'application/octet-stream' 903 904 905# Utilities for CGIHTTPRequestHandler 906 907def _url_collapse_path(path): 908 """ 909 Given a URL path, remove extra '/'s and '.' path elements and collapse 910 any '..' references and returns a collapsed path. 911 912 Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. 913 The utility of this function is limited to is_cgi method and helps 914 preventing some security attacks. 915 916 Returns: The reconstituted URL, which will always start with a '/'. 917 918 Raises: IndexError if too many '..' occur within the path. 919 920 """ 921 # Query component should not be involved. 922 path, _, query = path.partition('?') 923 path = urllib.parse.unquote(path) 924 925 # Similar to os.path.split(os.path.normpath(path)) but specific to URL 926 # path semantics rather than local operating system semantics. 927 path_parts = path.split('/') 928 head_parts = [] 929 for part in path_parts[:-1]: 930 if part == '..': 931 head_parts.pop() # IndexError if more '..' than prior parts 932 elif part and part != '.': 933 head_parts.append( part ) 934 if path_parts: 935 tail_part = path_parts.pop() 936 if tail_part: 937 if tail_part == '..': 938 head_parts.pop() 939 tail_part = '' 940 elif tail_part == '.': 941 tail_part = '' 942 else: 943 tail_part = '' 944 945 if query: 946 tail_part = '?'.join((tail_part, query)) 947 948 splitpath = ('/' + '/'.join(head_parts), tail_part) 949 collapsed_path = "/".join(splitpath) 950 951 return collapsed_path 952 953 954 955nobody = None 956 957def nobody_uid(): 958 """Internal routine to get nobody's uid""" 959 global nobody 960 if nobody: 961 return nobody 962 try: 963 import pwd 964 except ImportError: 965 return -1 966 try: 967 nobody = pwd.getpwnam('nobody')[2] 968 except KeyError: 969 nobody = 1 + max(x[2] for x in pwd.getpwall()) 970 return nobody 971 972 973def executable(path): 974 """Test for executable file.""" 975 return os.access(path, os.X_OK) 976 977 978class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): 979 980 """Complete HTTP server with GET, HEAD and POST commands. 981 982 GET and HEAD also support running CGI scripts. 983 984 The POST command is *only* implemented for CGI scripts. 985 986 """ 987 988 # Determine platform specifics 989 have_fork = hasattr(os, 'fork') 990 991 # Make rfile unbuffered -- we need to read one line and then pass 992 # the rest to a subprocess, so we can't use buffered input. 993 rbufsize = 0 994 995 def do_POST(self): 996 """Serve a POST request. 997 998 This is only implemented for CGI scripts. 999 1000 """ 1001 1002 if self.is_cgi(): 1003 self.run_cgi() 1004 else: 1005 self.send_error( 1006 HTTPStatus.NOT_IMPLEMENTED, 1007 "Can only POST to CGI scripts") 1008 1009 def send_head(self): 1010 """Version of send_head that support CGI scripts""" 1011 if self.is_cgi(): 1012 return self.run_cgi() 1013 else: 1014 return SimpleHTTPRequestHandler.send_head(self) 1015 1016 def is_cgi(self): 1017 """Test whether self.path corresponds to a CGI script. 1018 1019 Returns True and updates the cgi_info attribute to the tuple 1020 (dir, rest) if self.path requires running a CGI script. 1021 Returns False otherwise. 1022 1023 If any exception is raised, the caller should assume that 1024 self.path was rejected as invalid and act accordingly. 1025 1026 The default implementation tests whether the normalized url 1027 path begins with one of the strings in self.cgi_directories 1028 (and the next character is a '/' or the end of the string). 1029 1030 """ 1031 collapsed_path = _url_collapse_path(self.path) 1032 dir_sep = collapsed_path.find('/', 1) 1033 while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories: 1034 dir_sep = collapsed_path.find('/', dir_sep+1) 1035 if dir_sep > 0: 1036 head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] 1037 self.cgi_info = head, tail 1038 return True 1039 return False 1040 1041 1042 cgi_directories = ['/cgi-bin', '/htbin'] 1043 1044 def is_executable(self, path): 1045 """Test whether argument path is an executable file.""" 1046 return executable(path) 1047 1048 def is_python(self, path): 1049 """Test whether argument path is a Python script.""" 1050 head, tail = os.path.splitext(path) 1051 return tail.lower() in (".py", ".pyw") 1052 1053 def run_cgi(self): 1054 """Execute a CGI script.""" 1055 dir, rest = self.cgi_info 1056 path = dir + '/' + rest 1057 i = path.find('/', len(dir)+1) 1058 while i >= 0: 1059 nextdir = path[:i] 1060 nextrest = path[i+1:] 1061 1062 scriptdir = self.translate_path(nextdir) 1063 if os.path.isdir(scriptdir): 1064 dir, rest = nextdir, nextrest 1065 i = path.find('/', len(dir)+1) 1066 else: 1067 break 1068 1069 # find an explicit query string, if present. 1070 rest, _, query = rest.partition('?') 1071 1072 # dissect the part after the directory name into a script name & 1073 # a possible additional path, to be stored in PATH_INFO. 1074 i = rest.find('/') 1075 if i >= 0: 1076 script, rest = rest[:i], rest[i:] 1077 else: 1078 script, rest = rest, '' 1079 1080 scriptname = dir + '/' + script 1081 scriptfile = self.translate_path(scriptname) 1082 if not os.path.exists(scriptfile): 1083 self.send_error( 1084 HTTPStatus.NOT_FOUND, 1085 "No such CGI script (%r)" % scriptname) 1086 return 1087 if not os.path.isfile(scriptfile): 1088 self.send_error( 1089 HTTPStatus.FORBIDDEN, 1090 "CGI script is not a plain file (%r)" % scriptname) 1091 return 1092 ispy = self.is_python(scriptname) 1093 if self.have_fork or not ispy: 1094 if not self.is_executable(scriptfile): 1095 self.send_error( 1096 HTTPStatus.FORBIDDEN, 1097 "CGI script is not executable (%r)" % scriptname) 1098 return 1099 1100 # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html 1101 # XXX Much of the following could be prepared ahead of time! 1102 env = copy.deepcopy(os.environ) 1103 env['SERVER_SOFTWARE'] = self.version_string() 1104 env['SERVER_NAME'] = self.server.server_name 1105 env['GATEWAY_INTERFACE'] = 'CGI/1.1' 1106 env['SERVER_PROTOCOL'] = self.protocol_version 1107 env['SERVER_PORT'] = str(self.server.server_port) 1108 env['REQUEST_METHOD'] = self.command 1109 uqrest = urllib.parse.unquote(rest) 1110 env['PATH_INFO'] = uqrest 1111 env['PATH_TRANSLATED'] = self.translate_path(uqrest) 1112 env['SCRIPT_NAME'] = scriptname 1113 env['QUERY_STRING'] = query 1114 env['REMOTE_ADDR'] = self.client_address[0] 1115 authorization = self.headers.get("authorization") 1116 if authorization: 1117 authorization = authorization.split() 1118 if len(authorization) == 2: 1119 import base64, binascii 1120 env['AUTH_TYPE'] = authorization[0] 1121 if authorization[0].lower() == "basic": 1122 try: 1123 authorization = authorization[1].encode('ascii') 1124 authorization = base64.decodebytes(authorization).\ 1125 decode('ascii') 1126 except (binascii.Error, UnicodeError): 1127 pass 1128 else: 1129 authorization = authorization.split(':') 1130 if len(authorization) == 2: 1131 env['REMOTE_USER'] = authorization[0] 1132 # XXX REMOTE_IDENT 1133 if self.headers.get('content-type') is None: 1134 env['CONTENT_TYPE'] = self.headers.get_content_type() 1135 else: 1136 env['CONTENT_TYPE'] = self.headers['content-type'] 1137 length = self.headers.get('content-length') 1138 if length: 1139 env['CONTENT_LENGTH'] = length 1140 referer = self.headers.get('referer') 1141 if referer: 1142 env['HTTP_REFERER'] = referer 1143 accept = self.headers.get_all('accept', ()) 1144 env['HTTP_ACCEPT'] = ','.join(accept) 1145 ua = self.headers.get('user-agent') 1146 if ua: 1147 env['HTTP_USER_AGENT'] = ua 1148 co = filter(None, self.headers.get_all('cookie', [])) 1149 cookie_str = ', '.join(co) 1150 if cookie_str: 1151 env['HTTP_COOKIE'] = cookie_str 1152 # XXX Other HTTP_* headers 1153 # Since we're setting the env in the parent, provide empty 1154 # values to override previously set values 1155 for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', 1156 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): 1157 env.setdefault(k, "") 1158 1159 self.send_response(HTTPStatus.OK, "Script output follows") 1160 self.flush_headers() 1161 1162 decoded_query = query.replace('+', ' ') 1163 1164 if self.have_fork: 1165 # Unix -- fork as we should 1166 args = [script] 1167 if '=' not in decoded_query: 1168 args.append(decoded_query) 1169 nobody = nobody_uid() 1170 self.wfile.flush() # Always flush before forking 1171 pid = os.fork() 1172 if pid != 0: 1173 # Parent 1174 pid, sts = os.waitpid(pid, 0) 1175 # throw away additional data [see bug #427345] 1176 while select.select([self.rfile], [], [], 0)[0]: 1177 if not self.rfile.read(1): 1178 break 1179 exitcode = os.waitstatus_to_exitcode(sts) 1180 if exitcode: 1181 self.log_error(f"CGI script exit code {exitcode}") 1182 return 1183 # Child 1184 try: 1185 try: 1186 os.setuid(nobody) 1187 except OSError: 1188 pass 1189 os.dup2(self.rfile.fileno(), 0) 1190 os.dup2(self.wfile.fileno(), 1) 1191 os.execve(scriptfile, args, env) 1192 except: 1193 self.server.handle_error(self.request, self.client_address) 1194 os._exit(127) 1195 1196 else: 1197 # Non-Unix -- use subprocess 1198 import subprocess 1199 cmdline = [scriptfile] 1200 if self.is_python(scriptfile): 1201 interp = sys.executable 1202 if interp.lower().endswith("w.exe"): 1203 # On Windows, use python.exe, not pythonw.exe 1204 interp = interp[:-5] + interp[-4:] 1205 cmdline = [interp, '-u'] + cmdline 1206 if '=' not in query: 1207 cmdline.append(query) 1208 self.log_message("command: %s", subprocess.list2cmdline(cmdline)) 1209 try: 1210 nbytes = int(length) 1211 except (TypeError, ValueError): 1212 nbytes = 0 1213 p = subprocess.Popen(cmdline, 1214 stdin=subprocess.PIPE, 1215 stdout=subprocess.PIPE, 1216 stderr=subprocess.PIPE, 1217 env = env 1218 ) 1219 if self.command.lower() == "post" and nbytes > 0: 1220 data = self.rfile.read(nbytes) 1221 else: 1222 data = None 1223 # throw away additional data [see bug #427345] 1224 while select.select([self.rfile._sock], [], [], 0)[0]: 1225 if not self.rfile._sock.recv(1): 1226 break 1227 stdout, stderr = p.communicate(data) 1228 self.wfile.write(stdout) 1229 if stderr: 1230 self.log_error('%s', stderr) 1231 p.stderr.close() 1232 p.stdout.close() 1233 status = p.returncode 1234 if status: 1235 self.log_error("CGI script exit status %#x", status) 1236 else: 1237 self.log_message("CGI script exited OK") 1238 1239 1240def _get_best_family(*address): 1241 infos = socket.getaddrinfo( 1242 *address, 1243 type=socket.SOCK_STREAM, 1244 flags=socket.AI_PASSIVE, 1245 ) 1246 family, type, proto, canonname, sockaddr = next(iter(infos)) 1247 return family, sockaddr 1248 1249 1250def test(HandlerClass=BaseHTTPRequestHandler, 1251 ServerClass=ThreadingHTTPServer, 1252 protocol="HTTP/1.0", port=8000, bind=None): 1253 """Test the HTTP request handler class. 1254 1255 This runs an HTTP server on port 8000 (or the port argument). 1256 1257 """ 1258 ServerClass.address_family, addr = _get_best_family(bind, port) 1259 HandlerClass.protocol_version = protocol 1260 with ServerClass(addr, HandlerClass) as httpd: 1261 host, port = httpd.socket.getsockname()[:2] 1262 url_host = f'[{host}]' if ':' in host else host 1263 print( 1264 f"Serving HTTP on {host} port {port} " 1265 f"(http://{url_host}:{port}/) ..." 1266 ) 1267 try: 1268 httpd.serve_forever() 1269 except KeyboardInterrupt: 1270 print("\nKeyboard interrupt received, exiting.") 1271 sys.exit(0) 1272 1273if __name__ == '__main__': 1274 import argparse 1275 import contextlib 1276 1277 parser = argparse.ArgumentParser() 1278 parser.add_argument('--cgi', action='store_true', 1279 help='run as CGI server') 1280 parser.add_argument('-b', '--bind', metavar='ADDRESS', 1281 help='bind to this address ' 1282 '(default: all interfaces)') 1283 parser.add_argument('-d', '--directory', default=os.getcwd(), 1284 help='serve this directory ' 1285 '(default: current directory)') 1286 parser.add_argument('-p', '--protocol', metavar='VERSION', 1287 default='HTTP/1.0', 1288 help='conform to this HTTP version ' 1289 '(default: %(default)s)') 1290 parser.add_argument('port', default=8000, type=int, nargs='?', 1291 help='bind to this port ' 1292 '(default: %(default)s)') 1293 args = parser.parse_args() 1294 if args.cgi: 1295 handler_class = CGIHTTPRequestHandler 1296 else: 1297 handler_class = SimpleHTTPRequestHandler 1298 1299 # ensure dual-stack is not disabled; ref #38907 1300 class DualStackServer(ThreadingHTTPServer): 1301 1302 def server_bind(self): 1303 # suppress exception when protocol is IPv4 1304 with contextlib.suppress(Exception): 1305 self.socket.setsockopt( 1306 socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0) 1307 return super().server_bind() 1308 1309 def finish_request(self, request, client_address): 1310 self.RequestHandlerClass(request, client_address, self, 1311 directory=args.directory) 1312 1313 test( 1314 HandlerClass=handler_class, 1315 ServerClass=DualStackServer, 1316 port=args.port, 1317 bind=args.bind, 1318 protocol=args.protocol, 1319 ) 1320