1""" 2Python implementation of the io module. 3""" 4 5import os 6import abc 7import codecs 8import errno 9import stat 10import sys 11# Import _thread instead of threading to reduce startup cost 12from _thread import allocate_lock as Lock 13if sys.platform in {'win32', 'cygwin'}: 14 from msvcrt import setmode as _setmode 15else: 16 _setmode = None 17 18import io 19from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) 20 21valid_seek_flags = {0, 1, 2} # Hardwired values 22if hasattr(os, 'SEEK_HOLE') : 23 valid_seek_flags.add(os.SEEK_HOLE) 24 valid_seek_flags.add(os.SEEK_DATA) 25 26# open() uses st_blksize whenever we can 27DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes 28 29# NOTE: Base classes defined here are registered with the "official" ABCs 30# defined in io.py. We don't use real inheritance though, because we don't want 31# to inherit the C implementations. 32 33# Rebind for compatibility 34BlockingIOError = BlockingIOError 35 36# Does io.IOBase finalizer log the exception if the close() method fails? 37# The exception is ignored silently by default in release build. 38_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) 39# Does open() check its 'errors' argument? 40_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE 41 42 43def text_encoding(encoding, stacklevel=2): 44 """ 45 A helper function to choose the text encoding. 46 47 When encoding is not None, this function returns it. 48 Otherwise, this function returns the default text encoding 49 (i.e. "locale" or "utf-8" depends on UTF-8 mode). 50 51 This function emits an EncodingWarning if *encoding* is None and 52 sys.flags.warn_default_encoding is true. 53 54 This can be used in APIs with an encoding=None parameter 55 that pass it to TextIOWrapper or open. 56 However, please consider using encoding="utf-8" for new APIs. 57 """ 58 if encoding is None: 59 if sys.flags.utf8_mode: 60 encoding = "utf-8" 61 else: 62 encoding = "locale" 63 if sys.flags.warn_default_encoding: 64 import warnings 65 warnings.warn("'encoding' argument not specified.", 66 EncodingWarning, stacklevel + 1) 67 return encoding 68 69 70# Wrapper for builtins.open 71# 72# Trick so that open() won't become a bound method when stored 73# as a class variable (as dbm.dumb does). 74# 75# See init_set_builtins_open() in Python/pylifecycle.c. 76@staticmethod 77def open(file, mode="r", buffering=-1, encoding=None, errors=None, 78 newline=None, closefd=True, opener=None): 79 80 r"""Open file and return a stream. Raise OSError upon failure. 81 82 file is either a text or byte string giving the name (and the path 83 if the file isn't in the current working directory) of the file to 84 be opened or an integer file descriptor of the file to be 85 wrapped. (If a file descriptor is given, it is closed when the 86 returned I/O object is closed, unless closefd is set to False.) 87 88 mode is an optional string that specifies the mode in which the file is 89 opened. It defaults to 'r' which means open for reading in text mode. Other 90 common values are 'w' for writing (truncating the file if it already 91 exists), 'x' for exclusive creation of a new file, and 'a' for appending 92 (which on some Unix systems, means that all writes append to the end of the 93 file regardless of the current seek position). In text mode, if encoding is 94 not specified the encoding used is platform dependent. (For reading and 95 writing raw bytes use binary mode and leave encoding unspecified.) The 96 available modes are: 97 98 ========= =============================================================== 99 Character Meaning 100 --------- --------------------------------------------------------------- 101 'r' open for reading (default) 102 'w' open for writing, truncating the file first 103 'x' create a new file and open it for writing 104 'a' open for writing, appending to the end of the file if it exists 105 'b' binary mode 106 't' text mode (default) 107 '+' open a disk file for updating (reading and writing) 108 ========= =============================================================== 109 110 The default mode is 'rt' (open for reading text). For binary random 111 access, the mode 'w+b' opens and truncates the file to 0 bytes, while 112 'r+b' opens the file without truncation. The 'x' mode implies 'w' and 113 raises an `FileExistsError` if the file already exists. 114 115 Python distinguishes between files opened in binary and text modes, 116 even when the underlying operating system doesn't. Files opened in 117 binary mode (appending 'b' to the mode argument) return contents as 118 bytes objects without any decoding. In text mode (the default, or when 119 't' is appended to the mode argument), the contents of the file are 120 returned as strings, the bytes having been first decoded using a 121 platform-dependent encoding or using the specified encoding if given. 122 123 buffering is an optional integer used to set the buffering policy. 124 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select 125 line buffering (only usable in text mode), and an integer > 1 to indicate 126 the size of a fixed-size chunk buffer. When no buffering argument is 127 given, the default buffering policy works as follows: 128 129 * Binary files are buffered in fixed-size chunks; the size of the buffer 130 is chosen using a heuristic trying to determine the underlying device's 131 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. 132 On many systems, the buffer will typically be 4096 or 8192 bytes long. 133 134 * "Interactive" text files (files for which isatty() returns True) 135 use line buffering. Other text files use the policy described above 136 for binary files. 137 138 encoding is the str name of the encoding used to decode or encode the 139 file. This should only be used in text mode. The default encoding is 140 platform dependent, but any encoding supported by Python can be 141 passed. See the codecs module for the list of supported encodings. 142 143 errors is an optional string that specifies how encoding errors are to 144 be handled---this argument should not be used in binary mode. Pass 145 'strict' to raise a ValueError exception if there is an encoding error 146 (the default of None has the same effect), or pass 'ignore' to ignore 147 errors. (Note that ignoring encoding errors can lead to data loss.) 148 See the documentation for codecs.register for a list of the permitted 149 encoding error strings. 150 151 newline is a string controlling how universal newlines works (it only 152 applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works 153 as follows: 154 155 * On input, if newline is None, universal newlines mode is 156 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and 157 these are translated into '\n' before being returned to the 158 caller. If it is '', universal newline mode is enabled, but line 159 endings are returned to the caller untranslated. If it has any of 160 the other legal values, input lines are only terminated by the given 161 string, and the line ending is returned to the caller untranslated. 162 163 * On output, if newline is None, any '\n' characters written are 164 translated to the system default line separator, os.linesep. If 165 newline is '', no translation takes place. If newline is any of the 166 other legal values, any '\n' characters written are translated to 167 the given string. 168 169 closedfd is a bool. If closefd is False, the underlying file descriptor will 170 be kept open when the file is closed. This does not work when a file name is 171 given and must be True in that case. 172 173 The newly created file is non-inheritable. 174 175 A custom opener can be used by passing a callable as *opener*. The 176 underlying file descriptor for the file object is then obtained by calling 177 *opener* with (*file*, *flags*). *opener* must return an open file 178 descriptor (passing os.open as *opener* results in functionality similar to 179 passing None). 180 181 open() returns a file object whose type depends on the mode, and 182 through which the standard file operations such as reading and writing 183 are performed. When open() is used to open a file in a text mode ('w', 184 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open 185 a file in a binary mode, the returned class varies: in read binary 186 mode, it returns a BufferedReader; in write binary and append binary 187 modes, it returns a BufferedWriter, and in read/write mode, it returns 188 a BufferedRandom. 189 190 It is also possible to use a string or bytearray as a file for both 191 reading and writing. For strings StringIO can be used like a file 192 opened in a text mode, and for bytes a BytesIO can be used like a file 193 opened in a binary mode. 194 """ 195 if not isinstance(file, int): 196 file = os.fspath(file) 197 if not isinstance(file, (str, bytes, int)): 198 raise TypeError("invalid file: %r" % file) 199 if not isinstance(mode, str): 200 raise TypeError("invalid mode: %r" % mode) 201 if not isinstance(buffering, int): 202 raise TypeError("invalid buffering: %r" % buffering) 203 if encoding is not None and not isinstance(encoding, str): 204 raise TypeError("invalid encoding: %r" % encoding) 205 if errors is not None and not isinstance(errors, str): 206 raise TypeError("invalid errors: %r" % errors) 207 modes = set(mode) 208 if modes - set("axrwb+t") or len(mode) > len(modes): 209 raise ValueError("invalid mode: %r" % mode) 210 creating = "x" in modes 211 reading = "r" in modes 212 writing = "w" in modes 213 appending = "a" in modes 214 updating = "+" in modes 215 text = "t" in modes 216 binary = "b" in modes 217 if text and binary: 218 raise ValueError("can't have text and binary mode at once") 219 if creating + reading + writing + appending > 1: 220 raise ValueError("can't have read/write/append mode at once") 221 if not (creating or reading or writing or appending): 222 raise ValueError("must have exactly one of read/write/append mode") 223 if binary and encoding is not None: 224 raise ValueError("binary mode doesn't take an encoding argument") 225 if binary and errors is not None: 226 raise ValueError("binary mode doesn't take an errors argument") 227 if binary and newline is not None: 228 raise ValueError("binary mode doesn't take a newline argument") 229 if binary and buffering == 1: 230 import warnings 231 warnings.warn("line buffering (buffering=1) isn't supported in binary " 232 "mode, the default buffer size will be used", 233 RuntimeWarning, 2) 234 raw = FileIO(file, 235 (creating and "x" or "") + 236 (reading and "r" or "") + 237 (writing and "w" or "") + 238 (appending and "a" or "") + 239 (updating and "+" or ""), 240 closefd, opener=opener) 241 result = raw 242 try: 243 line_buffering = False 244 if buffering == 1 or buffering < 0 and raw.isatty(): 245 buffering = -1 246 line_buffering = True 247 if buffering < 0: 248 buffering = DEFAULT_BUFFER_SIZE 249 try: 250 bs = os.fstat(raw.fileno()).st_blksize 251 except (OSError, AttributeError): 252 pass 253 else: 254 if bs > 1: 255 buffering = bs 256 if buffering < 0: 257 raise ValueError("invalid buffering size") 258 if buffering == 0: 259 if binary: 260 return result 261 raise ValueError("can't have unbuffered text I/O") 262 if updating: 263 buffer = BufferedRandom(raw, buffering) 264 elif creating or writing or appending: 265 buffer = BufferedWriter(raw, buffering) 266 elif reading: 267 buffer = BufferedReader(raw, buffering) 268 else: 269 raise ValueError("unknown mode: %r" % mode) 270 result = buffer 271 if binary: 272 return result 273 encoding = text_encoding(encoding) 274 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) 275 result = text 276 text.mode = mode 277 return result 278 except: 279 result.close() 280 raise 281 282# Define a default pure-Python implementation for open_code() 283# that does not allow hooks. Warn on first use. Defined for tests. 284def _open_code_with_warning(path): 285 """Opens the provided file with mode ``'rb'``. This function 286 should be used when the intent is to treat the contents as 287 executable code. 288 289 ``path`` should be an absolute path. 290 291 When supported by the runtime, this function can be hooked 292 in order to allow embedders more control over code files. 293 This functionality is not supported on the current runtime. 294 """ 295 import warnings 296 warnings.warn("_pyio.open_code() may not be using hooks", 297 RuntimeWarning, 2) 298 return open(path, "rb") 299 300try: 301 open_code = io.open_code 302except AttributeError: 303 open_code = _open_code_with_warning 304 305 306def __getattr__(name): 307 if name == "OpenWrapper": 308 # bpo-43680: Until Python 3.9, _pyio.open was not a static method and 309 # builtins.open was set to OpenWrapper to not become a bound method 310 # when set to a class variable. _io.open is a built-in function whereas 311 # _pyio.open is a Python function. In Python 3.10, _pyio.open() is now 312 # a static method, and builtins.open() is now io.open(). 313 import warnings 314 warnings.warn('OpenWrapper is deprecated, use open instead', 315 DeprecationWarning, stacklevel=2) 316 global OpenWrapper 317 OpenWrapper = open 318 return OpenWrapper 319 raise AttributeError(f"module {__name__!r} has no attribute {name!r}") 320 321 322# In normal operation, both `UnsupportedOperation`s should be bound to the 323# same object. 324try: 325 UnsupportedOperation = io.UnsupportedOperation 326except AttributeError: 327 class UnsupportedOperation(OSError, ValueError): 328 pass 329 330 331class IOBase(metaclass=abc.ABCMeta): 332 333 """The abstract base class for all I/O classes. 334 335 This class provides dummy implementations for many methods that 336 derived classes can override selectively; the default implementations 337 represent a file that cannot be read, written or seeked. 338 339 Even though IOBase does not declare read or write because 340 their signatures will vary, implementations and clients should 341 consider those methods part of the interface. Also, implementations 342 may raise UnsupportedOperation when operations they do not support are 343 called. 344 345 The basic type used for binary data read from or written to a file is 346 bytes. Other bytes-like objects are accepted as method arguments too. 347 Text I/O classes work with str data. 348 349 Note that calling any method (even inquiries) on a closed stream is 350 undefined. Implementations may raise OSError in this case. 351 352 IOBase (and its subclasses) support the iterator protocol, meaning 353 that an IOBase object can be iterated over yielding the lines in a 354 stream. 355 356 IOBase also supports the :keyword:`with` statement. In this example, 357 fp is closed after the suite of the with statement is complete: 358 359 with open('spam.txt', 'r') as fp: 360 fp.write('Spam and eggs!') 361 """ 362 363 ### Internal ### 364 365 def _unsupported(self, name): 366 """Internal: raise an OSError exception for unsupported operations.""" 367 raise UnsupportedOperation("%s.%s() not supported" % 368 (self.__class__.__name__, name)) 369 370 ### Positioning ### 371 372 def seek(self, pos, whence=0): 373 """Change stream position. 374 375 Change the stream position to byte offset pos. Argument pos is 376 interpreted relative to the position indicated by whence. Values 377 for whence are ints: 378 379 * 0 -- start of stream (the default); offset should be zero or positive 380 * 1 -- current stream position; offset may be negative 381 * 2 -- end of stream; offset is usually negative 382 Some operating systems / file systems could provide additional values. 383 384 Return an int indicating the new absolute position. 385 """ 386 self._unsupported("seek") 387 388 def tell(self): 389 """Return an int indicating the current stream position.""" 390 return self.seek(0, 1) 391 392 def truncate(self, pos=None): 393 """Truncate file to size bytes. 394 395 Size defaults to the current IO position as reported by tell(). Return 396 the new size. 397 """ 398 self._unsupported("truncate") 399 400 ### Flush and close ### 401 402 def flush(self): 403 """Flush write buffers, if applicable. 404 405 This is not implemented for read-only and non-blocking streams. 406 """ 407 self._checkClosed() 408 # XXX Should this return the number of bytes written??? 409 410 __closed = False 411 412 def close(self): 413 """Flush and close the IO object. 414 415 This method has no effect if the file is already closed. 416 """ 417 if not self.__closed: 418 try: 419 self.flush() 420 finally: 421 self.__closed = True 422 423 def __del__(self): 424 """Destructor. Calls close().""" 425 try: 426 closed = self.closed 427 except AttributeError: 428 # If getting closed fails, then the object is probably 429 # in an unusable state, so ignore. 430 return 431 432 if closed: 433 return 434 435 if _IOBASE_EMITS_UNRAISABLE: 436 self.close() 437 else: 438 # The try/except block is in case this is called at program 439 # exit time, when it's possible that globals have already been 440 # deleted, and then the close() call might fail. Since 441 # there's nothing we can do about such failures and they annoy 442 # the end users, we suppress the traceback. 443 try: 444 self.close() 445 except: 446 pass 447 448 ### Inquiries ### 449 450 def seekable(self): 451 """Return a bool indicating whether object supports random access. 452 453 If False, seek(), tell() and truncate() will raise OSError. 454 This method may need to do a test seek(). 455 """ 456 return False 457 458 def _checkSeekable(self, msg=None): 459 """Internal: raise UnsupportedOperation if file is not seekable 460 """ 461 if not self.seekable(): 462 raise UnsupportedOperation("File or stream is not seekable." 463 if msg is None else msg) 464 465 def readable(self): 466 """Return a bool indicating whether object was opened for reading. 467 468 If False, read() will raise OSError. 469 """ 470 return False 471 472 def _checkReadable(self, msg=None): 473 """Internal: raise UnsupportedOperation if file is not readable 474 """ 475 if not self.readable(): 476 raise UnsupportedOperation("File or stream is not readable." 477 if msg is None else msg) 478 479 def writable(self): 480 """Return a bool indicating whether object was opened for writing. 481 482 If False, write() and truncate() will raise OSError. 483 """ 484 return False 485 486 def _checkWritable(self, msg=None): 487 """Internal: raise UnsupportedOperation if file is not writable 488 """ 489 if not self.writable(): 490 raise UnsupportedOperation("File or stream is not writable." 491 if msg is None else msg) 492 493 @property 494 def closed(self): 495 """closed: bool. True iff the file has been closed. 496 497 For backwards compatibility, this is a property, not a predicate. 498 """ 499 return self.__closed 500 501 def _checkClosed(self, msg=None): 502 """Internal: raise a ValueError if file is closed 503 """ 504 if self.closed: 505 raise ValueError("I/O operation on closed file." 506 if msg is None else msg) 507 508 ### Context manager ### 509 510 def __enter__(self): # That's a forward reference 511 """Context management protocol. Returns self (an instance of IOBase).""" 512 self._checkClosed() 513 return self 514 515 def __exit__(self, *args): 516 """Context management protocol. Calls close()""" 517 self.close() 518 519 ### Lower-level APIs ### 520 521 # XXX Should these be present even if unimplemented? 522 523 def fileno(self): 524 """Returns underlying file descriptor (an int) if one exists. 525 526 An OSError is raised if the IO object does not use a file descriptor. 527 """ 528 self._unsupported("fileno") 529 530 def isatty(self): 531 """Return a bool indicating whether this is an 'interactive' stream. 532 533 Return False if it can't be determined. 534 """ 535 self._checkClosed() 536 return False 537 538 ### Readline[s] and writelines ### 539 540 def readline(self, size=-1): 541 r"""Read and return a line of bytes from the stream. 542 543 If size is specified, at most size bytes will be read. 544 Size should be an int. 545 546 The line terminator is always b'\n' for binary files; for text 547 files, the newlines argument to open can be used to select the line 548 terminator(s) recognized. 549 """ 550 # For backwards compatibility, a (slowish) readline(). 551 if hasattr(self, "peek"): 552 def nreadahead(): 553 readahead = self.peek(1) 554 if not readahead: 555 return 1 556 n = (readahead.find(b"\n") + 1) or len(readahead) 557 if size >= 0: 558 n = min(n, size) 559 return n 560 else: 561 def nreadahead(): 562 return 1 563 if size is None: 564 size = -1 565 else: 566 try: 567 size_index = size.__index__ 568 except AttributeError: 569 raise TypeError(f"{size!r} is not an integer") 570 else: 571 size = size_index() 572 res = bytearray() 573 while size < 0 or len(res) < size: 574 b = self.read(nreadahead()) 575 if not b: 576 break 577 res += b 578 if res.endswith(b"\n"): 579 break 580 return bytes(res) 581 582 def __iter__(self): 583 self._checkClosed() 584 return self 585 586 def __next__(self): 587 line = self.readline() 588 if not line: 589 raise StopIteration 590 return line 591 592 def readlines(self, hint=None): 593 """Return a list of lines from the stream. 594 595 hint can be specified to control the number of lines read: no more 596 lines will be read if the total size (in bytes/characters) of all 597 lines so far exceeds hint. 598 """ 599 if hint is None or hint <= 0: 600 return list(self) 601 n = 0 602 lines = [] 603 for line in self: 604 lines.append(line) 605 n += len(line) 606 if n >= hint: 607 break 608 return lines 609 610 def writelines(self, lines): 611 """Write a list of lines to the stream. 612 613 Line separators are not added, so it is usual for each of the lines 614 provided to have a line separator at the end. 615 """ 616 self._checkClosed() 617 for line in lines: 618 self.write(line) 619 620io.IOBase.register(IOBase) 621 622 623class RawIOBase(IOBase): 624 625 """Base class for raw binary I/O.""" 626 627 # The read() method is implemented by calling readinto(); derived 628 # classes that want to support read() only need to implement 629 # readinto() as a primitive operation. In general, readinto() can be 630 # more efficient than read(). 631 632 # (It would be tempting to also provide an implementation of 633 # readinto() in terms of read(), in case the latter is a more suitable 634 # primitive operation, but that would lead to nasty recursion in case 635 # a subclass doesn't implement either.) 636 637 def read(self, size=-1): 638 """Read and return up to size bytes, where size is an int. 639 640 Returns an empty bytes object on EOF, or None if the object is 641 set not to block and has no data to read. 642 """ 643 if size is None: 644 size = -1 645 if size < 0: 646 return self.readall() 647 b = bytearray(size.__index__()) 648 n = self.readinto(b) 649 if n is None: 650 return None 651 del b[n:] 652 return bytes(b) 653 654 def readall(self): 655 """Read until EOF, using multiple read() call.""" 656 res = bytearray() 657 while True: 658 data = self.read(DEFAULT_BUFFER_SIZE) 659 if not data: 660 break 661 res += data 662 if res: 663 return bytes(res) 664 else: 665 # b'' or None 666 return data 667 668 def readinto(self, b): 669 """Read bytes into a pre-allocated bytes-like object b. 670 671 Returns an int representing the number of bytes read (0 for EOF), or 672 None if the object is set not to block and has no data to read. 673 """ 674 self._unsupported("readinto") 675 676 def write(self, b): 677 """Write the given buffer to the IO stream. 678 679 Returns the number of bytes written, which may be less than the 680 length of b in bytes. 681 """ 682 self._unsupported("write") 683 684io.RawIOBase.register(RawIOBase) 685from _io import FileIO 686RawIOBase.register(FileIO) 687 688 689class BufferedIOBase(IOBase): 690 691 """Base class for buffered IO objects. 692 693 The main difference with RawIOBase is that the read() method 694 supports omitting the size argument, and does not have a default 695 implementation that defers to readinto(). 696 697 In addition, read(), readinto() and write() may raise 698 BlockingIOError if the underlying raw stream is in non-blocking 699 mode and not ready; unlike their raw counterparts, they will never 700 return None. 701 702 A typical implementation should not inherit from a RawIOBase 703 implementation, but wrap one. 704 """ 705 706 def read(self, size=-1): 707 """Read and return up to size bytes, where size is an int. 708 709 If the argument is omitted, None, or negative, reads and 710 returns all data until EOF. 711 712 If the argument is positive, and the underlying raw stream is 713 not 'interactive', multiple raw reads may be issued to satisfy 714 the byte count (unless EOF is reached first). But for 715 interactive raw streams (XXX and for pipes?), at most one raw 716 read will be issued, and a short result does not imply that 717 EOF is imminent. 718 719 Returns an empty bytes array on EOF. 720 721 Raises BlockingIOError if the underlying raw stream has no 722 data at the moment. 723 """ 724 self._unsupported("read") 725 726 def read1(self, size=-1): 727 """Read up to size bytes with at most one read() system call, 728 where size is an int. 729 """ 730 self._unsupported("read1") 731 732 def readinto(self, b): 733 """Read bytes into a pre-allocated bytes-like object b. 734 735 Like read(), this may issue multiple reads to the underlying raw 736 stream, unless the latter is 'interactive'. 737 738 Returns an int representing the number of bytes read (0 for EOF). 739 740 Raises BlockingIOError if the underlying raw stream has no 741 data at the moment. 742 """ 743 744 return self._readinto(b, read1=False) 745 746 def readinto1(self, b): 747 """Read bytes into buffer *b*, using at most one system call 748 749 Returns an int representing the number of bytes read (0 for EOF). 750 751 Raises BlockingIOError if the underlying raw stream has no 752 data at the moment. 753 """ 754 755 return self._readinto(b, read1=True) 756 757 def _readinto(self, b, read1): 758 if not isinstance(b, memoryview): 759 b = memoryview(b) 760 b = b.cast('B') 761 762 if read1: 763 data = self.read1(len(b)) 764 else: 765 data = self.read(len(b)) 766 n = len(data) 767 768 b[:n] = data 769 770 return n 771 772 def write(self, b): 773 """Write the given bytes buffer to the IO stream. 774 775 Return the number of bytes written, which is always the length of b 776 in bytes. 777 778 Raises BlockingIOError if the buffer is full and the 779 underlying raw stream cannot accept more data at the moment. 780 """ 781 self._unsupported("write") 782 783 def detach(self): 784 """ 785 Separate the underlying raw stream from the buffer and return it. 786 787 After the raw stream has been detached, the buffer is in an unusable 788 state. 789 """ 790 self._unsupported("detach") 791 792io.BufferedIOBase.register(BufferedIOBase) 793 794 795class _BufferedIOMixin(BufferedIOBase): 796 797 """A mixin implementation of BufferedIOBase with an underlying raw stream. 798 799 This passes most requests on to the underlying raw stream. It 800 does *not* provide implementations of read(), readinto() or 801 write(). 802 """ 803 804 def __init__(self, raw): 805 self._raw = raw 806 807 ### Positioning ### 808 809 def seek(self, pos, whence=0): 810 new_position = self.raw.seek(pos, whence) 811 if new_position < 0: 812 raise OSError("seek() returned an invalid position") 813 return new_position 814 815 def tell(self): 816 pos = self.raw.tell() 817 if pos < 0: 818 raise OSError("tell() returned an invalid position") 819 return pos 820 821 def truncate(self, pos=None): 822 self._checkClosed() 823 self._checkWritable() 824 825 # Flush the stream. We're mixing buffered I/O with lower-level I/O, 826 # and a flush may be necessary to synch both views of the current 827 # file state. 828 self.flush() 829 830 if pos is None: 831 pos = self.tell() 832 # XXX: Should seek() be used, instead of passing the position 833 # XXX directly to truncate? 834 return self.raw.truncate(pos) 835 836 ### Flush and close ### 837 838 def flush(self): 839 if self.closed: 840 raise ValueError("flush on closed file") 841 self.raw.flush() 842 843 def close(self): 844 if self.raw is not None and not self.closed: 845 try: 846 # may raise BlockingIOError or BrokenPipeError etc 847 self.flush() 848 finally: 849 self.raw.close() 850 851 def detach(self): 852 if self.raw is None: 853 raise ValueError("raw stream already detached") 854 self.flush() 855 raw = self._raw 856 self._raw = None 857 return raw 858 859 ### Inquiries ### 860 861 def seekable(self): 862 return self.raw.seekable() 863 864 @property 865 def raw(self): 866 return self._raw 867 868 @property 869 def closed(self): 870 return self.raw.closed 871 872 @property 873 def name(self): 874 return self.raw.name 875 876 @property 877 def mode(self): 878 return self.raw.mode 879 880 def __getstate__(self): 881 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") 882 883 def __repr__(self): 884 modname = self.__class__.__module__ 885 clsname = self.__class__.__qualname__ 886 try: 887 name = self.name 888 except AttributeError: 889 return "<{}.{}>".format(modname, clsname) 890 else: 891 return "<{}.{} name={!r}>".format(modname, clsname, name) 892 893 ### Lower-level APIs ### 894 895 def fileno(self): 896 return self.raw.fileno() 897 898 def isatty(self): 899 return self.raw.isatty() 900 901 902class BytesIO(BufferedIOBase): 903 904 """Buffered I/O implementation using an in-memory bytes buffer.""" 905 906 # Initialize _buffer as soon as possible since it's used by __del__() 907 # which calls close() 908 _buffer = None 909 910 def __init__(self, initial_bytes=None): 911 buf = bytearray() 912 if initial_bytes is not None: 913 buf += initial_bytes 914 self._buffer = buf 915 self._pos = 0 916 917 def __getstate__(self): 918 if self.closed: 919 raise ValueError("__getstate__ on closed file") 920 return self.__dict__.copy() 921 922 def getvalue(self): 923 """Return the bytes value (contents) of the buffer 924 """ 925 if self.closed: 926 raise ValueError("getvalue on closed file") 927 return bytes(self._buffer) 928 929 def getbuffer(self): 930 """Return a readable and writable view of the buffer. 931 """ 932 if self.closed: 933 raise ValueError("getbuffer on closed file") 934 return memoryview(self._buffer) 935 936 def close(self): 937 if self._buffer is not None: 938 self._buffer.clear() 939 super().close() 940 941 def read(self, size=-1): 942 if self.closed: 943 raise ValueError("read from closed file") 944 if size is None: 945 size = -1 946 else: 947 try: 948 size_index = size.__index__ 949 except AttributeError: 950 raise TypeError(f"{size!r} is not an integer") 951 else: 952 size = size_index() 953 if size < 0: 954 size = len(self._buffer) 955 if len(self._buffer) <= self._pos: 956 return b"" 957 newpos = min(len(self._buffer), self._pos + size) 958 b = self._buffer[self._pos : newpos] 959 self._pos = newpos 960 return bytes(b) 961 962 def read1(self, size=-1): 963 """This is the same as read. 964 """ 965 return self.read(size) 966 967 def write(self, b): 968 if self.closed: 969 raise ValueError("write to closed file") 970 if isinstance(b, str): 971 raise TypeError("can't write str to binary stream") 972 with memoryview(b) as view: 973 n = view.nbytes # Size of any bytes-like object 974 if n == 0: 975 return 0 976 pos = self._pos 977 if pos > len(self._buffer): 978 # Inserts null bytes between the current end of the file 979 # and the new write position. 980 padding = b'\x00' * (pos - len(self._buffer)) 981 self._buffer += padding 982 self._buffer[pos:pos + n] = b 983 self._pos += n 984 return n 985 986 def seek(self, pos, whence=0): 987 if self.closed: 988 raise ValueError("seek on closed file") 989 try: 990 pos_index = pos.__index__ 991 except AttributeError: 992 raise TypeError(f"{pos!r} is not an integer") 993 else: 994 pos = pos_index() 995 if whence == 0: 996 if pos < 0: 997 raise ValueError("negative seek position %r" % (pos,)) 998 self._pos = pos 999 elif whence == 1: 1000 self._pos = max(0, self._pos + pos) 1001 elif whence == 2: 1002 self._pos = max(0, len(self._buffer) + pos) 1003 else: 1004 raise ValueError("unsupported whence value") 1005 return self._pos 1006 1007 def tell(self): 1008 if self.closed: 1009 raise ValueError("tell on closed file") 1010 return self._pos 1011 1012 def truncate(self, pos=None): 1013 if self.closed: 1014 raise ValueError("truncate on closed file") 1015 if pos is None: 1016 pos = self._pos 1017 else: 1018 try: 1019 pos_index = pos.__index__ 1020 except AttributeError: 1021 raise TypeError(f"{pos!r} is not an integer") 1022 else: 1023 pos = pos_index() 1024 if pos < 0: 1025 raise ValueError("negative truncate position %r" % (pos,)) 1026 del self._buffer[pos:] 1027 return pos 1028 1029 def readable(self): 1030 if self.closed: 1031 raise ValueError("I/O operation on closed file.") 1032 return True 1033 1034 def writable(self): 1035 if self.closed: 1036 raise ValueError("I/O operation on closed file.") 1037 return True 1038 1039 def seekable(self): 1040 if self.closed: 1041 raise ValueError("I/O operation on closed file.") 1042 return True 1043 1044 1045class BufferedReader(_BufferedIOMixin): 1046 1047 """BufferedReader(raw[, buffer_size]) 1048 1049 A buffer for a readable, sequential BaseRawIO object. 1050 1051 The constructor creates a BufferedReader for the given readable raw 1052 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE 1053 is used. 1054 """ 1055 1056 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1057 """Create a new buffered reader using the given readable raw IO object. 1058 """ 1059 if not raw.readable(): 1060 raise OSError('"raw" argument must be readable.') 1061 1062 _BufferedIOMixin.__init__(self, raw) 1063 if buffer_size <= 0: 1064 raise ValueError("invalid buffer size") 1065 self.buffer_size = buffer_size 1066 self._reset_read_buf() 1067 self._read_lock = Lock() 1068 1069 def readable(self): 1070 return self.raw.readable() 1071 1072 def _reset_read_buf(self): 1073 self._read_buf = b"" 1074 self._read_pos = 0 1075 1076 def read(self, size=None): 1077 """Read size bytes. 1078 1079 Returns exactly size bytes of data unless the underlying raw IO 1080 stream reaches EOF or if the call would block in non-blocking 1081 mode. If size is negative, read until EOF or until read() would 1082 block. 1083 """ 1084 if size is not None and size < -1: 1085 raise ValueError("invalid number of bytes to read") 1086 with self._read_lock: 1087 return self._read_unlocked(size) 1088 1089 def _read_unlocked(self, n=None): 1090 nodata_val = b"" 1091 empty_values = (b"", None) 1092 buf = self._read_buf 1093 pos = self._read_pos 1094 1095 # Special case for when the number of bytes to read is unspecified. 1096 if n is None or n == -1: 1097 self._reset_read_buf() 1098 if hasattr(self.raw, 'readall'): 1099 chunk = self.raw.readall() 1100 if chunk is None: 1101 return buf[pos:] or None 1102 else: 1103 return buf[pos:] + chunk 1104 chunks = [buf[pos:]] # Strip the consumed bytes. 1105 current_size = 0 1106 while True: 1107 # Read until EOF or until read() would block. 1108 chunk = self.raw.read() 1109 if chunk in empty_values: 1110 nodata_val = chunk 1111 break 1112 current_size += len(chunk) 1113 chunks.append(chunk) 1114 return b"".join(chunks) or nodata_val 1115 1116 # The number of bytes to read is specified, return at most n bytes. 1117 avail = len(buf) - pos # Length of the available buffered data. 1118 if n <= avail: 1119 # Fast path: the data to read is fully buffered. 1120 self._read_pos += n 1121 return buf[pos:pos+n] 1122 # Slow path: read from the stream until enough bytes are read, 1123 # or until an EOF occurs or until read() would block. 1124 chunks = [buf[pos:]] 1125 wanted = max(self.buffer_size, n) 1126 while avail < n: 1127 chunk = self.raw.read(wanted) 1128 if chunk in empty_values: 1129 nodata_val = chunk 1130 break 1131 avail += len(chunk) 1132 chunks.append(chunk) 1133 # n is more than avail only when an EOF occurred or when 1134 # read() would have blocked. 1135 n = min(n, avail) 1136 out = b"".join(chunks) 1137 self._read_buf = out[n:] # Save the extra data in the buffer. 1138 self._read_pos = 0 1139 return out[:n] if out else nodata_val 1140 1141 def peek(self, size=0): 1142 """Returns buffered bytes without advancing the position. 1143 1144 The argument indicates a desired minimal number of bytes; we 1145 do at most one raw read to satisfy it. We never return more 1146 than self.buffer_size. 1147 """ 1148 with self._read_lock: 1149 return self._peek_unlocked(size) 1150 1151 def _peek_unlocked(self, n=0): 1152 want = min(n, self.buffer_size) 1153 have = len(self._read_buf) - self._read_pos 1154 if have < want or have <= 0: 1155 to_read = self.buffer_size - have 1156 current = self.raw.read(to_read) 1157 if current: 1158 self._read_buf = self._read_buf[self._read_pos:] + current 1159 self._read_pos = 0 1160 return self._read_buf[self._read_pos:] 1161 1162 def read1(self, size=-1): 1163 """Reads up to size bytes, with at most one read() system call.""" 1164 # Returns up to size bytes. If at least one byte is buffered, we 1165 # only return buffered bytes. Otherwise, we do one raw read. 1166 if size < 0: 1167 size = self.buffer_size 1168 if size == 0: 1169 return b"" 1170 with self._read_lock: 1171 self._peek_unlocked(1) 1172 return self._read_unlocked( 1173 min(size, len(self._read_buf) - self._read_pos)) 1174 1175 # Implementing readinto() and readinto1() is not strictly necessary (we 1176 # could rely on the base class that provides an implementation in terms of 1177 # read() and read1()). We do it anyway to keep the _pyio implementation 1178 # similar to the io implementation (which implements the methods for 1179 # performance reasons). 1180 def _readinto(self, buf, read1): 1181 """Read data into *buf* with at most one system call.""" 1182 1183 # Need to create a memoryview object of type 'b', otherwise 1184 # we may not be able to assign bytes to it, and slicing it 1185 # would create a new object. 1186 if not isinstance(buf, memoryview): 1187 buf = memoryview(buf) 1188 if buf.nbytes == 0: 1189 return 0 1190 buf = buf.cast('B') 1191 1192 written = 0 1193 with self._read_lock: 1194 while written < len(buf): 1195 1196 # First try to read from internal buffer 1197 avail = min(len(self._read_buf) - self._read_pos, len(buf)) 1198 if avail: 1199 buf[written:written+avail] = \ 1200 self._read_buf[self._read_pos:self._read_pos+avail] 1201 self._read_pos += avail 1202 written += avail 1203 if written == len(buf): 1204 break 1205 1206 # If remaining space in callers buffer is larger than 1207 # internal buffer, read directly into callers buffer 1208 if len(buf) - written > self.buffer_size: 1209 n = self.raw.readinto(buf[written:]) 1210 if not n: 1211 break # eof 1212 written += n 1213 1214 # Otherwise refill internal buffer - unless we're 1215 # in read1 mode and already got some data 1216 elif not (read1 and written): 1217 if not self._peek_unlocked(1): 1218 break # eof 1219 1220 # In readinto1 mode, return as soon as we have some data 1221 if read1 and written: 1222 break 1223 1224 return written 1225 1226 def tell(self): 1227 return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos 1228 1229 def seek(self, pos, whence=0): 1230 if whence not in valid_seek_flags: 1231 raise ValueError("invalid whence value") 1232 with self._read_lock: 1233 if whence == 1: 1234 pos -= len(self._read_buf) - self._read_pos 1235 pos = _BufferedIOMixin.seek(self, pos, whence) 1236 self._reset_read_buf() 1237 return pos 1238 1239class BufferedWriter(_BufferedIOMixin): 1240 1241 """A buffer for a writeable sequential RawIO object. 1242 1243 The constructor creates a BufferedWriter for the given writeable raw 1244 stream. If the buffer_size is not given, it defaults to 1245 DEFAULT_BUFFER_SIZE. 1246 """ 1247 1248 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1249 if not raw.writable(): 1250 raise OSError('"raw" argument must be writable.') 1251 1252 _BufferedIOMixin.__init__(self, raw) 1253 if buffer_size <= 0: 1254 raise ValueError("invalid buffer size") 1255 self.buffer_size = buffer_size 1256 self._write_buf = bytearray() 1257 self._write_lock = Lock() 1258 1259 def writable(self): 1260 return self.raw.writable() 1261 1262 def write(self, b): 1263 if isinstance(b, str): 1264 raise TypeError("can't write str to binary stream") 1265 with self._write_lock: 1266 if self.closed: 1267 raise ValueError("write to closed file") 1268 # XXX we can implement some more tricks to try and avoid 1269 # partial writes 1270 if len(self._write_buf) > self.buffer_size: 1271 # We're full, so let's pre-flush the buffer. (This may 1272 # raise BlockingIOError with characters_written == 0.) 1273 self._flush_unlocked() 1274 before = len(self._write_buf) 1275 self._write_buf.extend(b) 1276 written = len(self._write_buf) - before 1277 if len(self._write_buf) > self.buffer_size: 1278 try: 1279 self._flush_unlocked() 1280 except BlockingIOError as e: 1281 if len(self._write_buf) > self.buffer_size: 1282 # We've hit the buffer_size. We have to accept a partial 1283 # write and cut back our buffer. 1284 overage = len(self._write_buf) - self.buffer_size 1285 written -= overage 1286 self._write_buf = self._write_buf[:self.buffer_size] 1287 raise BlockingIOError(e.errno, e.strerror, written) 1288 return written 1289 1290 def truncate(self, pos=None): 1291 with self._write_lock: 1292 self._flush_unlocked() 1293 if pos is None: 1294 pos = self.raw.tell() 1295 return self.raw.truncate(pos) 1296 1297 def flush(self): 1298 with self._write_lock: 1299 self._flush_unlocked() 1300 1301 def _flush_unlocked(self): 1302 if self.closed: 1303 raise ValueError("flush on closed file") 1304 while self._write_buf: 1305 try: 1306 n = self.raw.write(self._write_buf) 1307 except BlockingIOError: 1308 raise RuntimeError("self.raw should implement RawIOBase: it " 1309 "should not raise BlockingIOError") 1310 if n is None: 1311 raise BlockingIOError( 1312 errno.EAGAIN, 1313 "write could not complete without blocking", 0) 1314 if n > len(self._write_buf) or n < 0: 1315 raise OSError("write() returned incorrect number of bytes") 1316 del self._write_buf[:n] 1317 1318 def tell(self): 1319 return _BufferedIOMixin.tell(self) + len(self._write_buf) 1320 1321 def seek(self, pos, whence=0): 1322 if whence not in valid_seek_flags: 1323 raise ValueError("invalid whence value") 1324 with self._write_lock: 1325 self._flush_unlocked() 1326 return _BufferedIOMixin.seek(self, pos, whence) 1327 1328 def close(self): 1329 with self._write_lock: 1330 if self.raw is None or self.closed: 1331 return 1332 # We have to release the lock and call self.flush() (which will 1333 # probably just re-take the lock) in case flush has been overridden in 1334 # a subclass or the user set self.flush to something. This is the same 1335 # behavior as the C implementation. 1336 try: 1337 # may raise BlockingIOError or BrokenPipeError etc 1338 self.flush() 1339 finally: 1340 with self._write_lock: 1341 self.raw.close() 1342 1343 1344class BufferedRWPair(BufferedIOBase): 1345 1346 """A buffered reader and writer object together. 1347 1348 A buffered reader object and buffered writer object put together to 1349 form a sequential IO object that can read and write. This is typically 1350 used with a socket or two-way pipe. 1351 1352 reader and writer are RawIOBase objects that are readable and 1353 writeable respectively. If the buffer_size is omitted it defaults to 1354 DEFAULT_BUFFER_SIZE. 1355 """ 1356 1357 # XXX The usefulness of this (compared to having two separate IO 1358 # objects) is questionable. 1359 1360 def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE): 1361 """Constructor. 1362 1363 The arguments are two RawIO instances. 1364 """ 1365 if not reader.readable(): 1366 raise OSError('"reader" argument must be readable.') 1367 1368 if not writer.writable(): 1369 raise OSError('"writer" argument must be writable.') 1370 1371 self.reader = BufferedReader(reader, buffer_size) 1372 self.writer = BufferedWriter(writer, buffer_size) 1373 1374 def read(self, size=-1): 1375 if size is None: 1376 size = -1 1377 return self.reader.read(size) 1378 1379 def readinto(self, b): 1380 return self.reader.readinto(b) 1381 1382 def write(self, b): 1383 return self.writer.write(b) 1384 1385 def peek(self, size=0): 1386 return self.reader.peek(size) 1387 1388 def read1(self, size=-1): 1389 return self.reader.read1(size) 1390 1391 def readinto1(self, b): 1392 return self.reader.readinto1(b) 1393 1394 def readable(self): 1395 return self.reader.readable() 1396 1397 def writable(self): 1398 return self.writer.writable() 1399 1400 def flush(self): 1401 return self.writer.flush() 1402 1403 def close(self): 1404 try: 1405 self.writer.close() 1406 finally: 1407 self.reader.close() 1408 1409 def isatty(self): 1410 return self.reader.isatty() or self.writer.isatty() 1411 1412 @property 1413 def closed(self): 1414 return self.writer.closed 1415 1416 1417class BufferedRandom(BufferedWriter, BufferedReader): 1418 1419 """A buffered interface to random access streams. 1420 1421 The constructor creates a reader and writer for a seekable stream, 1422 raw, given in the first argument. If the buffer_size is omitted it 1423 defaults to DEFAULT_BUFFER_SIZE. 1424 """ 1425 1426 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 1427 raw._checkSeekable() 1428 BufferedReader.__init__(self, raw, buffer_size) 1429 BufferedWriter.__init__(self, raw, buffer_size) 1430 1431 def seek(self, pos, whence=0): 1432 if whence not in valid_seek_flags: 1433 raise ValueError("invalid whence value") 1434 self.flush() 1435 if self._read_buf: 1436 # Undo read ahead. 1437 with self._read_lock: 1438 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1439 # First do the raw seek, then empty the read buffer, so that 1440 # if the raw seek fails, we don't lose buffered data forever. 1441 pos = self.raw.seek(pos, whence) 1442 with self._read_lock: 1443 self._reset_read_buf() 1444 if pos < 0: 1445 raise OSError("seek() returned invalid position") 1446 return pos 1447 1448 def tell(self): 1449 if self._write_buf: 1450 return BufferedWriter.tell(self) 1451 else: 1452 return BufferedReader.tell(self) 1453 1454 def truncate(self, pos=None): 1455 if pos is None: 1456 pos = self.tell() 1457 # Use seek to flush the read buffer. 1458 return BufferedWriter.truncate(self, pos) 1459 1460 def read(self, size=None): 1461 if size is None: 1462 size = -1 1463 self.flush() 1464 return BufferedReader.read(self, size) 1465 1466 def readinto(self, b): 1467 self.flush() 1468 return BufferedReader.readinto(self, b) 1469 1470 def peek(self, size=0): 1471 self.flush() 1472 return BufferedReader.peek(self, size) 1473 1474 def read1(self, size=-1): 1475 self.flush() 1476 return BufferedReader.read1(self, size) 1477 1478 def readinto1(self, b): 1479 self.flush() 1480 return BufferedReader.readinto1(self, b) 1481 1482 def write(self, b): 1483 if self._read_buf: 1484 # Undo readahead 1485 with self._read_lock: 1486 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1487 self._reset_read_buf() 1488 return BufferedWriter.write(self, b) 1489 1490 1491class FileIO(RawIOBase): 1492 _fd = -1 1493 _created = False 1494 _readable = False 1495 _writable = False 1496 _appending = False 1497 _seekable = None 1498 _closefd = True 1499 1500 def __init__(self, file, mode='r', closefd=True, opener=None): 1501 """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading, 1502 writing, exclusive creation or appending. The file will be created if it 1503 doesn't exist when opened for writing or appending; it will be truncated 1504 when opened for writing. A FileExistsError will be raised if it already 1505 exists when opened for creating. Opening a file for creating implies 1506 writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode 1507 to allow simultaneous reading and writing. A custom opener can be used by 1508 passing a callable as *opener*. The underlying file descriptor for the file 1509 object is then obtained by calling opener with (*name*, *flags*). 1510 *opener* must return an open file descriptor (passing os.open as *opener* 1511 results in functionality similar to passing None). 1512 """ 1513 if self._fd >= 0: 1514 # Have to close the existing file first. 1515 try: 1516 if self._closefd: 1517 os.close(self._fd) 1518 finally: 1519 self._fd = -1 1520 1521 if isinstance(file, float): 1522 raise TypeError('integer argument expected, got float') 1523 if isinstance(file, int): 1524 fd = file 1525 if fd < 0: 1526 raise ValueError('negative file descriptor') 1527 else: 1528 fd = -1 1529 1530 if not isinstance(mode, str): 1531 raise TypeError('invalid mode: %s' % (mode,)) 1532 if not set(mode) <= set('xrwab+'): 1533 raise ValueError('invalid mode: %s' % (mode,)) 1534 if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1: 1535 raise ValueError('Must have exactly one of create/read/write/append ' 1536 'mode and at most one plus') 1537 1538 if 'x' in mode: 1539 self._created = True 1540 self._writable = True 1541 flags = os.O_EXCL | os.O_CREAT 1542 elif 'r' in mode: 1543 self._readable = True 1544 flags = 0 1545 elif 'w' in mode: 1546 self._writable = True 1547 flags = os.O_CREAT | os.O_TRUNC 1548 elif 'a' in mode: 1549 self._writable = True 1550 self._appending = True 1551 flags = os.O_APPEND | os.O_CREAT 1552 1553 if '+' in mode: 1554 self._readable = True 1555 self._writable = True 1556 1557 if self._readable and self._writable: 1558 flags |= os.O_RDWR 1559 elif self._readable: 1560 flags |= os.O_RDONLY 1561 else: 1562 flags |= os.O_WRONLY 1563 1564 flags |= getattr(os, 'O_BINARY', 0) 1565 1566 noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or 1567 getattr(os, 'O_CLOEXEC', 0)) 1568 flags |= noinherit_flag 1569 1570 owned_fd = None 1571 try: 1572 if fd < 0: 1573 if not closefd: 1574 raise ValueError('Cannot use closefd=False with file name') 1575 if opener is None: 1576 fd = os.open(file, flags, 0o666) 1577 else: 1578 fd = opener(file, flags) 1579 if not isinstance(fd, int): 1580 raise TypeError('expected integer from opener') 1581 if fd < 0: 1582 raise OSError('Negative file descriptor') 1583 owned_fd = fd 1584 if not noinherit_flag: 1585 os.set_inheritable(fd, False) 1586 1587 self._closefd = closefd 1588 fdfstat = os.fstat(fd) 1589 try: 1590 if stat.S_ISDIR(fdfstat.st_mode): 1591 raise IsADirectoryError(errno.EISDIR, 1592 os.strerror(errno.EISDIR), file) 1593 except AttributeError: 1594 # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR 1595 # don't exist. 1596 pass 1597 self._blksize = getattr(fdfstat, 'st_blksize', 0) 1598 if self._blksize <= 1: 1599 self._blksize = DEFAULT_BUFFER_SIZE 1600 1601 if _setmode: 1602 # don't translate newlines (\r\n <=> \n) 1603 _setmode(fd, os.O_BINARY) 1604 1605 self.name = file 1606 if self._appending: 1607 # For consistent behaviour, we explicitly seek to the 1608 # end of file (otherwise, it might be done only on the 1609 # first write()). 1610 try: 1611 os.lseek(fd, 0, SEEK_END) 1612 except OSError as e: 1613 if e.errno != errno.ESPIPE: 1614 raise 1615 except: 1616 if owned_fd is not None: 1617 os.close(owned_fd) 1618 raise 1619 self._fd = fd 1620 1621 def __del__(self): 1622 if self._fd >= 0 and self._closefd and not self.closed: 1623 import warnings 1624 warnings.warn('unclosed file %r' % (self,), ResourceWarning, 1625 stacklevel=2, source=self) 1626 self.close() 1627 1628 def __getstate__(self): 1629 raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") 1630 1631 def __repr__(self): 1632 class_name = '%s.%s' % (self.__class__.__module__, 1633 self.__class__.__qualname__) 1634 if self.closed: 1635 return '<%s [closed]>' % class_name 1636 try: 1637 name = self.name 1638 except AttributeError: 1639 return ('<%s fd=%d mode=%r closefd=%r>' % 1640 (class_name, self._fd, self.mode, self._closefd)) 1641 else: 1642 return ('<%s name=%r mode=%r closefd=%r>' % 1643 (class_name, name, self.mode, self._closefd)) 1644 1645 def _checkReadable(self): 1646 if not self._readable: 1647 raise UnsupportedOperation('File not open for reading') 1648 1649 def _checkWritable(self, msg=None): 1650 if not self._writable: 1651 raise UnsupportedOperation('File not open for writing') 1652 1653 def read(self, size=None): 1654 """Read at most size bytes, returned as bytes. 1655 1656 Only makes one system call, so less data may be returned than requested 1657 In non-blocking mode, returns None if no data is available. 1658 Return an empty bytes object at EOF. 1659 """ 1660 self._checkClosed() 1661 self._checkReadable() 1662 if size is None or size < 0: 1663 return self.readall() 1664 try: 1665 return os.read(self._fd, size) 1666 except BlockingIOError: 1667 return None 1668 1669 def readall(self): 1670 """Read all data from the file, returned as bytes. 1671 1672 In non-blocking mode, returns as much as is immediately available, 1673 or None if no data is available. Return an empty bytes object at EOF. 1674 """ 1675 self._checkClosed() 1676 self._checkReadable() 1677 bufsize = DEFAULT_BUFFER_SIZE 1678 try: 1679 pos = os.lseek(self._fd, 0, SEEK_CUR) 1680 end = os.fstat(self._fd).st_size 1681 if end >= pos: 1682 bufsize = end - pos + 1 1683 except OSError: 1684 pass 1685 1686 result = bytearray() 1687 while True: 1688 if len(result) >= bufsize: 1689 bufsize = len(result) 1690 bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) 1691 n = bufsize - len(result) 1692 try: 1693 chunk = os.read(self._fd, n) 1694 except BlockingIOError: 1695 if result: 1696 break 1697 return None 1698 if not chunk: # reached the end of the file 1699 break 1700 result += chunk 1701 1702 return bytes(result) 1703 1704 def readinto(self, b): 1705 """Same as RawIOBase.readinto().""" 1706 m = memoryview(b).cast('B') 1707 data = self.read(len(m)) 1708 n = len(data) 1709 m[:n] = data 1710 return n 1711 1712 def write(self, b): 1713 """Write bytes b to file, return number written. 1714 1715 Only makes one system call, so not all of the data may be written. 1716 The number of bytes actually written is returned. In non-blocking mode, 1717 returns None if the write would block. 1718 """ 1719 self._checkClosed() 1720 self._checkWritable() 1721 try: 1722 return os.write(self._fd, b) 1723 except BlockingIOError: 1724 return None 1725 1726 def seek(self, pos, whence=SEEK_SET): 1727 """Move to new file position. 1728 1729 Argument offset is a byte count. Optional argument whence defaults to 1730 SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values 1731 are SEEK_CUR or 1 (move relative to current position, positive or negative), 1732 and SEEK_END or 2 (move relative to end of file, usually negative, although 1733 many platforms allow seeking beyond the end of a file). 1734 1735 Note that not all file objects are seekable. 1736 """ 1737 if isinstance(pos, float): 1738 raise TypeError('an integer is required') 1739 self._checkClosed() 1740 return os.lseek(self._fd, pos, whence) 1741 1742 def tell(self): 1743 """tell() -> int. Current file position. 1744 1745 Can raise OSError for non seekable files.""" 1746 self._checkClosed() 1747 return os.lseek(self._fd, 0, SEEK_CUR) 1748 1749 def truncate(self, size=None): 1750 """Truncate the file to at most size bytes. 1751 1752 Size defaults to the current file position, as returned by tell(). 1753 The current file position is changed to the value of size. 1754 """ 1755 self._checkClosed() 1756 self._checkWritable() 1757 if size is None: 1758 size = self.tell() 1759 os.ftruncate(self._fd, size) 1760 return size 1761 1762 def close(self): 1763 """Close the file. 1764 1765 A closed file cannot be used for further I/O operations. close() may be 1766 called more than once without error. 1767 """ 1768 if not self.closed: 1769 try: 1770 if self._closefd: 1771 os.close(self._fd) 1772 finally: 1773 super().close() 1774 1775 def seekable(self): 1776 """True if file supports random-access.""" 1777 self._checkClosed() 1778 if self._seekable is None: 1779 try: 1780 self.tell() 1781 except OSError: 1782 self._seekable = False 1783 else: 1784 self._seekable = True 1785 return self._seekable 1786 1787 def readable(self): 1788 """True if file was opened in a read mode.""" 1789 self._checkClosed() 1790 return self._readable 1791 1792 def writable(self): 1793 """True if file was opened in a write mode.""" 1794 self._checkClosed() 1795 return self._writable 1796 1797 def fileno(self): 1798 """Return the underlying file descriptor (an integer).""" 1799 self._checkClosed() 1800 return self._fd 1801 1802 def isatty(self): 1803 """True if the file is connected to a TTY device.""" 1804 self._checkClosed() 1805 return os.isatty(self._fd) 1806 1807 @property 1808 def closefd(self): 1809 """True if the file descriptor will be closed by close().""" 1810 return self._closefd 1811 1812 @property 1813 def mode(self): 1814 """String giving the file mode""" 1815 if self._created: 1816 if self._readable: 1817 return 'xb+' 1818 else: 1819 return 'xb' 1820 elif self._appending: 1821 if self._readable: 1822 return 'ab+' 1823 else: 1824 return 'ab' 1825 elif self._readable: 1826 if self._writable: 1827 return 'rb+' 1828 else: 1829 return 'rb' 1830 else: 1831 return 'wb' 1832 1833 1834class TextIOBase(IOBase): 1835 1836 """Base class for text I/O. 1837 1838 This class provides a character and line based interface to stream 1839 I/O. 1840 """ 1841 1842 def read(self, size=-1): 1843 """Read at most size characters from stream, where size is an int. 1844 1845 Read from underlying buffer until we have size characters or we hit EOF. 1846 If size is negative or omitted, read until EOF. 1847 1848 Returns a string. 1849 """ 1850 self._unsupported("read") 1851 1852 def write(self, s): 1853 """Write string s to stream and returning an int.""" 1854 self._unsupported("write") 1855 1856 def truncate(self, pos=None): 1857 """Truncate size to pos, where pos is an int.""" 1858 self._unsupported("truncate") 1859 1860 def readline(self): 1861 """Read until newline or EOF. 1862 1863 Returns an empty string if EOF is hit immediately. 1864 """ 1865 self._unsupported("readline") 1866 1867 def detach(self): 1868 """ 1869 Separate the underlying buffer from the TextIOBase and return it. 1870 1871 After the underlying buffer has been detached, the TextIO is in an 1872 unusable state. 1873 """ 1874 self._unsupported("detach") 1875 1876 @property 1877 def encoding(self): 1878 """Subclasses should override.""" 1879 return None 1880 1881 @property 1882 def newlines(self): 1883 """Line endings translated so far. 1884 1885 Only line endings translated during reading are considered. 1886 1887 Subclasses should override. 1888 """ 1889 return None 1890 1891 @property 1892 def errors(self): 1893 """Error setting of the decoder or encoder. 1894 1895 Subclasses should override.""" 1896 return None 1897 1898io.TextIOBase.register(TextIOBase) 1899 1900 1901class IncrementalNewlineDecoder(codecs.IncrementalDecoder): 1902 r"""Codec used when reading a file in universal newlines mode. It wraps 1903 another incremental decoder, translating \r\n and \r into \n. It also 1904 records the types of newlines encountered. When used with 1905 translate=False, it ensures that the newline sequence is returned in 1906 one piece. 1907 """ 1908 def __init__(self, decoder, translate, errors='strict'): 1909 codecs.IncrementalDecoder.__init__(self, errors=errors) 1910 self.translate = translate 1911 self.decoder = decoder 1912 self.seennl = 0 1913 self.pendingcr = False 1914 1915 def decode(self, input, final=False): 1916 # decode input (with the eventual \r from a previous pass) 1917 if self.decoder is None: 1918 output = input 1919 else: 1920 output = self.decoder.decode(input, final=final) 1921 if self.pendingcr and (output or final): 1922 output = "\r" + output 1923 self.pendingcr = False 1924 1925 # retain last \r even when not translating data: 1926 # then readline() is sure to get \r\n in one pass 1927 if output.endswith("\r") and not final: 1928 output = output[:-1] 1929 self.pendingcr = True 1930 1931 # Record which newlines are read 1932 crlf = output.count('\r\n') 1933 cr = output.count('\r') - crlf 1934 lf = output.count('\n') - crlf 1935 self.seennl |= (lf and self._LF) | (cr and self._CR) \ 1936 | (crlf and self._CRLF) 1937 1938 if self.translate: 1939 if crlf: 1940 output = output.replace("\r\n", "\n") 1941 if cr: 1942 output = output.replace("\r", "\n") 1943 1944 return output 1945 1946 def getstate(self): 1947 if self.decoder is None: 1948 buf = b"" 1949 flag = 0 1950 else: 1951 buf, flag = self.decoder.getstate() 1952 flag <<= 1 1953 if self.pendingcr: 1954 flag |= 1 1955 return buf, flag 1956 1957 def setstate(self, state): 1958 buf, flag = state 1959 self.pendingcr = bool(flag & 1) 1960 if self.decoder is not None: 1961 self.decoder.setstate((buf, flag >> 1)) 1962 1963 def reset(self): 1964 self.seennl = 0 1965 self.pendingcr = False 1966 if self.decoder is not None: 1967 self.decoder.reset() 1968 1969 _LF = 1 1970 _CR = 2 1971 _CRLF = 4 1972 1973 @property 1974 def newlines(self): 1975 return (None, 1976 "\n", 1977 "\r", 1978 ("\r", "\n"), 1979 "\r\n", 1980 ("\n", "\r\n"), 1981 ("\r", "\r\n"), 1982 ("\r", "\n", "\r\n") 1983 )[self.seennl] 1984 1985 1986class TextIOWrapper(TextIOBase): 1987 1988 r"""Character and line based layer over a BufferedIOBase object, buffer. 1989 1990 encoding gives the name of the encoding that the stream will be 1991 decoded or encoded with. It defaults to locale.getencoding(). 1992 1993 errors determines the strictness of encoding and decoding (see the 1994 codecs.register) and defaults to "strict". 1995 1996 newline can be None, '', '\n', '\r', or '\r\n'. It controls the 1997 handling of line endings. If it is None, universal newlines is 1998 enabled. With this enabled, on input, the lines endings '\n', '\r', 1999 or '\r\n' are translated to '\n' before being returned to the 2000 caller. Conversely, on output, '\n' is translated to the system 2001 default line separator, os.linesep. If newline is any other of its 2002 legal values, that newline becomes the newline when the file is read 2003 and it is returned untranslated. On output, '\n' is converted to the 2004 newline. 2005 2006 If line_buffering is True, a call to flush is implied when a call to 2007 write contains a newline character. 2008 """ 2009 2010 _CHUNK_SIZE = 2048 2011 2012 # Initialize _buffer as soon as possible since it's used by __del__() 2013 # which calls close() 2014 _buffer = None 2015 2016 # The write_through argument has no effect here since this 2017 # implementation always writes through. The argument is present only 2018 # so that the signature can match the signature of the C version. 2019 def __init__(self, buffer, encoding=None, errors=None, newline=None, 2020 line_buffering=False, write_through=False): 2021 self._check_newline(newline) 2022 encoding = text_encoding(encoding) 2023 2024 if encoding == "locale": 2025 encoding = self._get_locale_encoding() 2026 2027 if not isinstance(encoding, str): 2028 raise ValueError("invalid encoding: %r" % encoding) 2029 2030 if not codecs.lookup(encoding)._is_text_encoding: 2031 msg = ("%r is not a text encoding; " 2032 "use codecs.open() to handle arbitrary codecs") 2033 raise LookupError(msg % encoding) 2034 2035 if errors is None: 2036 errors = "strict" 2037 else: 2038 if not isinstance(errors, str): 2039 raise ValueError("invalid errors: %r" % errors) 2040 if _CHECK_ERRORS: 2041 codecs.lookup_error(errors) 2042 2043 self._buffer = buffer 2044 self._decoded_chars = '' # buffer for text returned from decoder 2045 self._decoded_chars_used = 0 # offset into _decoded_chars for read() 2046 self._snapshot = None # info for reconstructing decoder state 2047 self._seekable = self._telling = self.buffer.seekable() 2048 self._has_read1 = hasattr(self.buffer, 'read1') 2049 self._configure(encoding, errors, newline, 2050 line_buffering, write_through) 2051 2052 def _check_newline(self, newline): 2053 if newline is not None and not isinstance(newline, str): 2054 raise TypeError("illegal newline type: %r" % (type(newline),)) 2055 if newline not in (None, "", "\n", "\r", "\r\n"): 2056 raise ValueError("illegal newline value: %r" % (newline,)) 2057 2058 def _configure(self, encoding=None, errors=None, newline=None, 2059 line_buffering=False, write_through=False): 2060 self._encoding = encoding 2061 self._errors = errors 2062 self._encoder = None 2063 self._decoder = None 2064 self._b2cratio = 0.0 2065 2066 self._readuniversal = not newline 2067 self._readtranslate = newline is None 2068 self._readnl = newline 2069 self._writetranslate = newline != '' 2070 self._writenl = newline or os.linesep 2071 2072 self._line_buffering = line_buffering 2073 self._write_through = write_through 2074 2075 # don't write a BOM in the middle of a file 2076 if self._seekable and self.writable(): 2077 position = self.buffer.tell() 2078 if position != 0: 2079 try: 2080 self._get_encoder().setstate(0) 2081 except LookupError: 2082 # Sometimes the encoder doesn't exist 2083 pass 2084 2085 # self._snapshot is either None, or a tuple (dec_flags, next_input) 2086 # where dec_flags is the second (integer) item of the decoder state 2087 # and next_input is the chunk of input bytes that comes next after the 2088 # snapshot point. We use this to reconstruct decoder states in tell(). 2089 2090 # Naming convention: 2091 # - "bytes_..." for integer variables that count input bytes 2092 # - "chars_..." for integer variables that count decoded characters 2093 2094 def __repr__(self): 2095 result = "<{}.{}".format(self.__class__.__module__, 2096 self.__class__.__qualname__) 2097 try: 2098 name = self.name 2099 except AttributeError: 2100 pass 2101 else: 2102 result += " name={0!r}".format(name) 2103 try: 2104 mode = self.mode 2105 except AttributeError: 2106 pass 2107 else: 2108 result += " mode={0!r}".format(mode) 2109 return result + " encoding={0!r}>".format(self.encoding) 2110 2111 @property 2112 def encoding(self): 2113 return self._encoding 2114 2115 @property 2116 def errors(self): 2117 return self._errors 2118 2119 @property 2120 def line_buffering(self): 2121 return self._line_buffering 2122 2123 @property 2124 def write_through(self): 2125 return self._write_through 2126 2127 @property 2128 def buffer(self): 2129 return self._buffer 2130 2131 def reconfigure(self, *, 2132 encoding=None, errors=None, newline=Ellipsis, 2133 line_buffering=None, write_through=None): 2134 """Reconfigure the text stream with new parameters. 2135 2136 This also flushes the stream. 2137 """ 2138 if (self._decoder is not None 2139 and (encoding is not None or errors is not None 2140 or newline is not Ellipsis)): 2141 raise UnsupportedOperation( 2142 "It is not possible to set the encoding or newline of stream " 2143 "after the first read") 2144 2145 if errors is None: 2146 if encoding is None: 2147 errors = self._errors 2148 else: 2149 errors = 'strict' 2150 elif not isinstance(errors, str): 2151 raise TypeError("invalid errors: %r" % errors) 2152 2153 if encoding is None: 2154 encoding = self._encoding 2155 else: 2156 if not isinstance(encoding, str): 2157 raise TypeError("invalid encoding: %r" % encoding) 2158 if encoding == "locale": 2159 encoding = self._get_locale_encoding() 2160 2161 if newline is Ellipsis: 2162 newline = self._readnl 2163 self._check_newline(newline) 2164 2165 if line_buffering is None: 2166 line_buffering = self.line_buffering 2167 if write_through is None: 2168 write_through = self.write_through 2169 2170 self.flush() 2171 self._configure(encoding, errors, newline, 2172 line_buffering, write_through) 2173 2174 def seekable(self): 2175 if self.closed: 2176 raise ValueError("I/O operation on closed file.") 2177 return self._seekable 2178 2179 def readable(self): 2180 return self.buffer.readable() 2181 2182 def writable(self): 2183 return self.buffer.writable() 2184 2185 def flush(self): 2186 self.buffer.flush() 2187 self._telling = self._seekable 2188 2189 def close(self): 2190 if self.buffer is not None and not self.closed: 2191 try: 2192 self.flush() 2193 finally: 2194 self.buffer.close() 2195 2196 @property 2197 def closed(self): 2198 return self.buffer.closed 2199 2200 @property 2201 def name(self): 2202 return self.buffer.name 2203 2204 def fileno(self): 2205 return self.buffer.fileno() 2206 2207 def isatty(self): 2208 return self.buffer.isatty() 2209 2210 def write(self, s): 2211 'Write data, where s is a str' 2212 if self.closed: 2213 raise ValueError("write to closed file") 2214 if not isinstance(s, str): 2215 raise TypeError("can't write %s to text stream" % 2216 s.__class__.__name__) 2217 length = len(s) 2218 haslf = (self._writetranslate or self._line_buffering) and "\n" in s 2219 if haslf and self._writetranslate and self._writenl != "\n": 2220 s = s.replace("\n", self._writenl) 2221 encoder = self._encoder or self._get_encoder() 2222 # XXX What if we were just reading? 2223 b = encoder.encode(s) 2224 self.buffer.write(b) 2225 if self._line_buffering and (haslf or "\r" in s): 2226 self.flush() 2227 self._set_decoded_chars('') 2228 self._snapshot = None 2229 if self._decoder: 2230 self._decoder.reset() 2231 return length 2232 2233 def _get_encoder(self): 2234 make_encoder = codecs.getincrementalencoder(self._encoding) 2235 self._encoder = make_encoder(self._errors) 2236 return self._encoder 2237 2238 def _get_decoder(self): 2239 make_decoder = codecs.getincrementaldecoder(self._encoding) 2240 decoder = make_decoder(self._errors) 2241 if self._readuniversal: 2242 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) 2243 self._decoder = decoder 2244 return decoder 2245 2246 # The following three methods implement an ADT for _decoded_chars. 2247 # Text returned from the decoder is buffered here until the client 2248 # requests it by calling our read() or readline() method. 2249 def _set_decoded_chars(self, chars): 2250 """Set the _decoded_chars buffer.""" 2251 self._decoded_chars = chars 2252 self._decoded_chars_used = 0 2253 2254 def _get_decoded_chars(self, n=None): 2255 """Advance into the _decoded_chars buffer.""" 2256 offset = self._decoded_chars_used 2257 if n is None: 2258 chars = self._decoded_chars[offset:] 2259 else: 2260 chars = self._decoded_chars[offset:offset + n] 2261 self._decoded_chars_used += len(chars) 2262 return chars 2263 2264 def _get_locale_encoding(self): 2265 try: 2266 import locale 2267 except ImportError: 2268 # Importing locale may fail if Python is being built 2269 return "utf-8" 2270 else: 2271 return locale.getencoding() 2272 2273 def _rewind_decoded_chars(self, n): 2274 """Rewind the _decoded_chars buffer.""" 2275 if self._decoded_chars_used < n: 2276 raise AssertionError("rewind decoded_chars out of bounds") 2277 self._decoded_chars_used -= n 2278 2279 def _read_chunk(self): 2280 """ 2281 Read and decode the next chunk of data from the BufferedReader. 2282 """ 2283 2284 # The return value is True unless EOF was reached. The decoded 2285 # string is placed in self._decoded_chars (replacing its previous 2286 # value). The entire input chunk is sent to the decoder, though 2287 # some of it may remain buffered in the decoder, yet to be 2288 # converted. 2289 2290 if self._decoder is None: 2291 raise ValueError("no decoder") 2292 2293 if self._telling: 2294 # To prepare for tell(), we need to snapshot a point in the 2295 # file where the decoder's input buffer is empty. 2296 2297 dec_buffer, dec_flags = self._decoder.getstate() 2298 # Given this, we know there was a valid snapshot point 2299 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 2300 2301 # Read a chunk, decode it, and put the result in self._decoded_chars. 2302 if self._has_read1: 2303 input_chunk = self.buffer.read1(self._CHUNK_SIZE) 2304 else: 2305 input_chunk = self.buffer.read(self._CHUNK_SIZE) 2306 eof = not input_chunk 2307 decoded_chars = self._decoder.decode(input_chunk, eof) 2308 self._set_decoded_chars(decoded_chars) 2309 if decoded_chars: 2310 self._b2cratio = len(input_chunk) / len(self._decoded_chars) 2311 else: 2312 self._b2cratio = 0.0 2313 2314 if self._telling: 2315 # At the snapshot point, len(dec_buffer) bytes before the read, 2316 # the next input to be decoded is dec_buffer + input_chunk. 2317 self._snapshot = (dec_flags, dec_buffer + input_chunk) 2318 2319 return not eof 2320 2321 def _pack_cookie(self, position, dec_flags=0, 2322 bytes_to_feed=0, need_eof=False, chars_to_skip=0): 2323 # The meaning of a tell() cookie is: seek to position, set the 2324 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them 2325 # into the decoder with need_eof as the EOF flag, then skip 2326 # chars_to_skip characters of the decoded result. For most simple 2327 # decoders, tell() will often just give a byte offset in the file. 2328 return (position | (dec_flags<<64) | (bytes_to_feed<<128) | 2329 (chars_to_skip<<192) | bool(need_eof)<<256) 2330 2331 def _unpack_cookie(self, bigint): 2332 rest, position = divmod(bigint, 1<<64) 2333 rest, dec_flags = divmod(rest, 1<<64) 2334 rest, bytes_to_feed = divmod(rest, 1<<64) 2335 need_eof, chars_to_skip = divmod(rest, 1<<64) 2336 return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip 2337 2338 def tell(self): 2339 if not self._seekable: 2340 raise UnsupportedOperation("underlying stream is not seekable") 2341 if not self._telling: 2342 raise OSError("telling position disabled by next() call") 2343 self.flush() 2344 position = self.buffer.tell() 2345 decoder = self._decoder 2346 if decoder is None or self._snapshot is None: 2347 if self._decoded_chars: 2348 # This should never happen. 2349 raise AssertionError("pending decoded text") 2350 return position 2351 2352 # Skip backward to the snapshot point (see _read_chunk). 2353 dec_flags, next_input = self._snapshot 2354 position -= len(next_input) 2355 2356 # How many decoded characters have been used up since the snapshot? 2357 chars_to_skip = self._decoded_chars_used 2358 if chars_to_skip == 0: 2359 # We haven't moved from the snapshot point. 2360 return self._pack_cookie(position, dec_flags) 2361 2362 # Starting from the snapshot position, we will walk the decoder 2363 # forward until it gives us enough decoded characters. 2364 saved_state = decoder.getstate() 2365 try: 2366 # Fast search for an acceptable start point, close to our 2367 # current pos. 2368 # Rationale: calling decoder.decode() has a large overhead 2369 # regardless of chunk size; we want the number of such calls to 2370 # be O(1) in most situations (common decoders, sensible input). 2371 # Actually, it will be exactly 1 for fixed-size codecs (all 2372 # 8-bit codecs, also UTF-16 and UTF-32). 2373 skip_bytes = int(self._b2cratio * chars_to_skip) 2374 skip_back = 1 2375 assert skip_bytes <= len(next_input) 2376 while skip_bytes > 0: 2377 decoder.setstate((b'', dec_flags)) 2378 # Decode up to temptative start point 2379 n = len(decoder.decode(next_input[:skip_bytes])) 2380 if n <= chars_to_skip: 2381 b, d = decoder.getstate() 2382 if not b: 2383 # Before pos and no bytes buffered in decoder => OK 2384 dec_flags = d 2385 chars_to_skip -= n 2386 break 2387 # Skip back by buffered amount and reset heuristic 2388 skip_bytes -= len(b) 2389 skip_back = 1 2390 else: 2391 # We're too far ahead, skip back a bit 2392 skip_bytes -= skip_back 2393 skip_back = skip_back * 2 2394 else: 2395 skip_bytes = 0 2396 decoder.setstate((b'', dec_flags)) 2397 2398 # Note our initial start point. 2399 start_pos = position + skip_bytes 2400 start_flags = dec_flags 2401 if chars_to_skip == 0: 2402 # We haven't moved from the start point. 2403 return self._pack_cookie(start_pos, start_flags) 2404 2405 # Feed the decoder one byte at a time. As we go, note the 2406 # nearest "safe start point" before the current location 2407 # (a point where the decoder has nothing buffered, so seek() 2408 # can safely start from there and advance to this location). 2409 bytes_fed = 0 2410 need_eof = False 2411 # Chars decoded since `start_pos` 2412 chars_decoded = 0 2413 for i in range(skip_bytes, len(next_input)): 2414 bytes_fed += 1 2415 chars_decoded += len(decoder.decode(next_input[i:i+1])) 2416 dec_buffer, dec_flags = decoder.getstate() 2417 if not dec_buffer and chars_decoded <= chars_to_skip: 2418 # Decoder buffer is empty, so this is a safe start point. 2419 start_pos += bytes_fed 2420 chars_to_skip -= chars_decoded 2421 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 2422 if chars_decoded >= chars_to_skip: 2423 break 2424 else: 2425 # We didn't get enough decoded data; signal EOF to get more. 2426 chars_decoded += len(decoder.decode(b'', final=True)) 2427 need_eof = True 2428 if chars_decoded < chars_to_skip: 2429 raise OSError("can't reconstruct logical file position") 2430 2431 # The returned cookie corresponds to the last safe start point. 2432 return self._pack_cookie( 2433 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) 2434 finally: 2435 decoder.setstate(saved_state) 2436 2437 def truncate(self, pos=None): 2438 self.flush() 2439 if pos is None: 2440 pos = self.tell() 2441 return self.buffer.truncate(pos) 2442 2443 def detach(self): 2444 if self.buffer is None: 2445 raise ValueError("buffer is already detached") 2446 self.flush() 2447 buffer = self._buffer 2448 self._buffer = None 2449 return buffer 2450 2451 def seek(self, cookie, whence=0): 2452 def _reset_encoder(position): 2453 """Reset the encoder (merely useful for proper BOM handling)""" 2454 try: 2455 encoder = self._encoder or self._get_encoder() 2456 except LookupError: 2457 # Sometimes the encoder doesn't exist 2458 pass 2459 else: 2460 if position != 0: 2461 encoder.setstate(0) 2462 else: 2463 encoder.reset() 2464 2465 if self.closed: 2466 raise ValueError("tell on closed file") 2467 if not self._seekable: 2468 raise UnsupportedOperation("underlying stream is not seekable") 2469 if whence == SEEK_CUR: 2470 if cookie != 0: 2471 raise UnsupportedOperation("can't do nonzero cur-relative seeks") 2472 # Seeking to the current position should attempt to 2473 # sync the underlying buffer with the current position. 2474 whence = 0 2475 cookie = self.tell() 2476 elif whence == SEEK_END: 2477 if cookie != 0: 2478 raise UnsupportedOperation("can't do nonzero end-relative seeks") 2479 self.flush() 2480 position = self.buffer.seek(0, whence) 2481 self._set_decoded_chars('') 2482 self._snapshot = None 2483 if self._decoder: 2484 self._decoder.reset() 2485 _reset_encoder(position) 2486 return position 2487 if whence != 0: 2488 raise ValueError("unsupported whence (%r)" % (whence,)) 2489 if cookie < 0: 2490 raise ValueError("negative seek position %r" % (cookie,)) 2491 self.flush() 2492 2493 # The strategy of seek() is to go back to the safe start point 2494 # and replay the effect of read(chars_to_skip) from there. 2495 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ 2496 self._unpack_cookie(cookie) 2497 2498 # Seek back to the safe start point. 2499 self.buffer.seek(start_pos) 2500 self._set_decoded_chars('') 2501 self._snapshot = None 2502 2503 # Restore the decoder to its state from the safe start point. 2504 if cookie == 0 and self._decoder: 2505 self._decoder.reset() 2506 elif self._decoder or dec_flags or chars_to_skip: 2507 self._decoder = self._decoder or self._get_decoder() 2508 self._decoder.setstate((b'', dec_flags)) 2509 self._snapshot = (dec_flags, b'') 2510 2511 if chars_to_skip: 2512 # Just like _read_chunk, feed the decoder and save a snapshot. 2513 input_chunk = self.buffer.read(bytes_to_feed) 2514 self._set_decoded_chars( 2515 self._decoder.decode(input_chunk, need_eof)) 2516 self._snapshot = (dec_flags, input_chunk) 2517 2518 # Skip chars_to_skip of the decoded characters. 2519 if len(self._decoded_chars) < chars_to_skip: 2520 raise OSError("can't restore logical file position") 2521 self._decoded_chars_used = chars_to_skip 2522 2523 _reset_encoder(cookie) 2524 return cookie 2525 2526 def read(self, size=None): 2527 self._checkReadable() 2528 if size is None: 2529 size = -1 2530 else: 2531 try: 2532 size_index = size.__index__ 2533 except AttributeError: 2534 raise TypeError(f"{size!r} is not an integer") 2535 else: 2536 size = size_index() 2537 decoder = self._decoder or self._get_decoder() 2538 if size < 0: 2539 # Read everything. 2540 result = (self._get_decoded_chars() + 2541 decoder.decode(self.buffer.read(), final=True)) 2542 self._set_decoded_chars('') 2543 self._snapshot = None 2544 return result 2545 else: 2546 # Keep reading chunks until we have size characters to return. 2547 eof = False 2548 result = self._get_decoded_chars(size) 2549 while len(result) < size and not eof: 2550 eof = not self._read_chunk() 2551 result += self._get_decoded_chars(size - len(result)) 2552 return result 2553 2554 def __next__(self): 2555 self._telling = False 2556 line = self.readline() 2557 if not line: 2558 self._snapshot = None 2559 self._telling = self._seekable 2560 raise StopIteration 2561 return line 2562 2563 def readline(self, size=None): 2564 if self.closed: 2565 raise ValueError("read from closed file") 2566 if size is None: 2567 size = -1 2568 else: 2569 try: 2570 size_index = size.__index__ 2571 except AttributeError: 2572 raise TypeError(f"{size!r} is not an integer") 2573 else: 2574 size = size_index() 2575 2576 # Grab all the decoded text (we will rewind any extra bits later). 2577 line = self._get_decoded_chars() 2578 2579 start = 0 2580 # Make the decoder if it doesn't already exist. 2581 if not self._decoder: 2582 self._get_decoder() 2583 2584 pos = endpos = None 2585 while True: 2586 if self._readtranslate: 2587 # Newlines are already translated, only search for \n 2588 pos = line.find('\n', start) 2589 if pos >= 0: 2590 endpos = pos + 1 2591 break 2592 else: 2593 start = len(line) 2594 2595 elif self._readuniversal: 2596 # Universal newline search. Find any of \r, \r\n, \n 2597 # The decoder ensures that \r\n are not split in two pieces 2598 2599 # In C we'd look for these in parallel of course. 2600 nlpos = line.find("\n", start) 2601 crpos = line.find("\r", start) 2602 if crpos == -1: 2603 if nlpos == -1: 2604 # Nothing found 2605 start = len(line) 2606 else: 2607 # Found \n 2608 endpos = nlpos + 1 2609 break 2610 elif nlpos == -1: 2611 # Found lone \r 2612 endpos = crpos + 1 2613 break 2614 elif nlpos < crpos: 2615 # Found \n 2616 endpos = nlpos + 1 2617 break 2618 elif nlpos == crpos + 1: 2619 # Found \r\n 2620 endpos = crpos + 2 2621 break 2622 else: 2623 # Found \r 2624 endpos = crpos + 1 2625 break 2626 else: 2627 # non-universal 2628 pos = line.find(self._readnl) 2629 if pos >= 0: 2630 endpos = pos + len(self._readnl) 2631 break 2632 2633 if size >= 0 and len(line) >= size: 2634 endpos = size # reached length size 2635 break 2636 2637 # No line ending seen yet - get more data' 2638 while self._read_chunk(): 2639 if self._decoded_chars: 2640 break 2641 if self._decoded_chars: 2642 line += self._get_decoded_chars() 2643 else: 2644 # end of file 2645 self._set_decoded_chars('') 2646 self._snapshot = None 2647 return line 2648 2649 if size >= 0 and endpos > size: 2650 endpos = size # don't exceed size 2651 2652 # Rewind _decoded_chars to just after the line ending we found. 2653 self._rewind_decoded_chars(len(line) - endpos) 2654 return line[:endpos] 2655 2656 @property 2657 def newlines(self): 2658 return self._decoder.newlines if self._decoder else None 2659 2660 2661class StringIO(TextIOWrapper): 2662 """Text I/O implementation using an in-memory buffer. 2663 2664 The initial_value argument sets the value of object. The newline 2665 argument is like the one of TextIOWrapper's constructor. 2666 """ 2667 2668 def __init__(self, initial_value="", newline="\n"): 2669 super(StringIO, self).__init__(BytesIO(), 2670 encoding="utf-8", 2671 errors="surrogatepass", 2672 newline=newline) 2673 # Issue #5645: make universal newlines semantics the same as in the 2674 # C version, even under Windows. 2675 if newline is None: 2676 self._writetranslate = False 2677 if initial_value is not None: 2678 if not isinstance(initial_value, str): 2679 raise TypeError("initial_value must be str or None, not {0}" 2680 .format(type(initial_value).__name__)) 2681 self.write(initial_value) 2682 self.seek(0) 2683 2684 def getvalue(self): 2685 self.flush() 2686 decoder = self._decoder or self._get_decoder() 2687 old_state = decoder.getstate() 2688 decoder.reset() 2689 try: 2690 return decoder.decode(self.buffer.getvalue(), final=True) 2691 finally: 2692 decoder.setstate(old_state) 2693 2694 def __repr__(self): 2695 # TextIOWrapper tells the encoding in its repr. In StringIO, 2696 # that's an implementation detail. 2697 return object.__repr__(self) 2698 2699 @property 2700 def errors(self): 2701 return None 2702 2703 @property 2704 def encoding(self): 2705 return None 2706 2707 def detach(self): 2708 # This doesn't make sense on StringIO. 2709 self._unsupported("detach") 2710