1#!/usr/bin/env python3 2#------------------------------------------------------------------- 3# tarfile.py 4#------------------------------------------------------------------- 5# Copyright (C) 2002 Lars Gustaebel <[email protected]> 6# All rights reserved. 7# 8# Permission is hereby granted, free of charge, to any person 9# obtaining a copy of this software and associated documentation 10# files (the "Software"), to deal in the Software without 11# restriction, including without limitation the rights to use, 12# copy, modify, merge, publish, distribute, sublicense, and/or sell 13# copies of the Software, and to permit persons to whom the 14# Software is furnished to do so, subject to the following 15# conditions: 16# 17# The above copyright notice and this permission notice shall be 18# included in all copies or substantial portions of the Software. 19# 20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 27# OTHER DEALINGS IN THE SOFTWARE. 28# 29"""Read from and write to tar format archives. 30""" 31 32version = "0.9.0" 33__author__ = "Lars Gust\u00e4bel ([email protected])" 34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." 35 36#--------- 37# Imports 38#--------- 39from builtins import open as bltn_open 40import sys 41import os 42import io 43import shutil 44import stat 45import time 46import struct 47import copy 48import re 49import warnings 50 51try: 52 import pwd 53except ImportError: 54 pwd = None 55try: 56 import grp 57except ImportError: 58 grp = None 59 60# os.symlink on Windows prior to 6.0 raises NotImplementedError 61symlink_exception = (AttributeError, NotImplementedError) 62try: 63 # OSError (winerror=1314) will be raised if the caller does not hold the 64 # SeCreateSymbolicLinkPrivilege privilege 65 symlink_exception += (OSError,) 66except NameError: 67 pass 68 69# from tarfile import * 70__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError", 71 "CompressionError", "StreamError", "ExtractError", "HeaderError", 72 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT", 73 "DEFAULT_FORMAT", "open"] 74 75 76#--------------------------------------------------------- 77# tar constants 78#--------------------------------------------------------- 79NUL = b"\0" # the null character 80BLOCKSIZE = 512 # length of processing blocks 81RECORDSIZE = BLOCKSIZE * 20 # length of records 82GNU_MAGIC = b"ustar \0" # magic gnu tar string 83POSIX_MAGIC = b"ustar\x0000" # magic posix tar string 84 85LENGTH_NAME = 100 # maximum length of a filename 86LENGTH_LINK = 100 # maximum length of a linkname 87LENGTH_PREFIX = 155 # maximum length of the prefix field 88 89REGTYPE = b"0" # regular file 90AREGTYPE = b"\0" # regular file 91LNKTYPE = b"1" # link (inside tarfile) 92SYMTYPE = b"2" # symbolic link 93CHRTYPE = b"3" # character special device 94BLKTYPE = b"4" # block special device 95DIRTYPE = b"5" # directory 96FIFOTYPE = b"6" # fifo special device 97CONTTYPE = b"7" # contiguous file 98 99GNUTYPE_LONGNAME = b"L" # GNU tar longname 100GNUTYPE_LONGLINK = b"K" # GNU tar longlink 101GNUTYPE_SPARSE = b"S" # GNU tar sparse file 102 103XHDTYPE = b"x" # POSIX.1-2001 extended header 104XGLTYPE = b"g" # POSIX.1-2001 global header 105SOLARIS_XHDTYPE = b"X" # Solaris extended header 106 107USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format 108GNU_FORMAT = 1 # GNU tar format 109PAX_FORMAT = 2 # POSIX.1-2001 (pax) format 110DEFAULT_FORMAT = PAX_FORMAT 111 112#--------------------------------------------------------- 113# tarfile constants 114#--------------------------------------------------------- 115# File types that tarfile supports: 116SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, 117 SYMTYPE, DIRTYPE, FIFOTYPE, 118 CONTTYPE, CHRTYPE, BLKTYPE, 119 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 120 GNUTYPE_SPARSE) 121 122# File types that will be treated as a regular file. 123REGULAR_TYPES = (REGTYPE, AREGTYPE, 124 CONTTYPE, GNUTYPE_SPARSE) 125 126# File types that are part of the GNU tar format. 127GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 128 GNUTYPE_SPARSE) 129 130# Fields from a pax header that override a TarInfo attribute. 131PAX_FIELDS = ("path", "linkpath", "size", "mtime", 132 "uid", "gid", "uname", "gname") 133 134# Fields from a pax header that are affected by hdrcharset. 135PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"} 136 137# Fields in a pax header that are numbers, all other fields 138# are treated as strings. 139PAX_NUMBER_FIELDS = { 140 "atime": float, 141 "ctime": float, 142 "mtime": float, 143 "uid": int, 144 "gid": int, 145 "size": int 146} 147 148#--------------------------------------------------------- 149# initialization 150#--------------------------------------------------------- 151if os.name == "nt": 152 ENCODING = "utf-8" 153else: 154 ENCODING = sys.getfilesystemencoding() 155 156#--------------------------------------------------------- 157# Some useful functions 158#--------------------------------------------------------- 159 160def stn(s, length, encoding, errors): 161 """Convert a string to a null-terminated bytes object. 162 """ 163 if s is None: 164 raise ValueError("metadata cannot contain None") 165 s = s.encode(encoding, errors) 166 return s[:length] + (length - len(s)) * NUL 167 168def nts(s, encoding, errors): 169 """Convert a null-terminated bytes object to a string. 170 """ 171 p = s.find(b"\0") 172 if p != -1: 173 s = s[:p] 174 return s.decode(encoding, errors) 175 176def nti(s): 177 """Convert a number field to a python number. 178 """ 179 # There are two possible encodings for a number field, see 180 # itn() below. 181 if s[0] in (0o200, 0o377): 182 n = 0 183 for i in range(len(s) - 1): 184 n <<= 8 185 n += s[i + 1] 186 if s[0] == 0o377: 187 n = -(256 ** (len(s) - 1) - n) 188 else: 189 try: 190 s = nts(s, "ascii", "strict") 191 n = int(s.strip() or "0", 8) 192 except ValueError: 193 raise InvalidHeaderError("invalid header") 194 return n 195 196def itn(n, digits=8, format=DEFAULT_FORMAT): 197 """Convert a python number to a number field. 198 """ 199 # POSIX 1003.1-1988 requires numbers to be encoded as a string of 200 # octal digits followed by a null-byte, this allows values up to 201 # (8**(digits-1))-1. GNU tar allows storing numbers greater than 202 # that if necessary. A leading 0o200 or 0o377 byte indicate this 203 # particular encoding, the following digits-1 bytes are a big-endian 204 # base-256 representation. This allows values up to (256**(digits-1))-1. 205 # A 0o200 byte indicates a positive number, a 0o377 byte a negative 206 # number. 207 original_n = n 208 n = int(n) 209 if 0 <= n < 8 ** (digits - 1): 210 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL 211 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1): 212 if n >= 0: 213 s = bytearray([0o200]) 214 else: 215 s = bytearray([0o377]) 216 n = 256 ** digits + n 217 218 for i in range(digits - 1): 219 s.insert(1, n & 0o377) 220 n >>= 8 221 else: 222 raise ValueError("overflow in number field") 223 224 return s 225 226def calc_chksums(buf): 227 """Calculate the checksum for a member's header by summing up all 228 characters except for the chksum field which is treated as if 229 it was filled with spaces. According to the GNU tar sources, 230 some tars (Sun and NeXT) calculate chksum with signed char, 231 which will be different if there are chars in the buffer with 232 the high bit set. So we calculate two checksums, unsigned and 233 signed. 234 """ 235 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf)) 236 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) 237 return unsigned_chksum, signed_chksum 238 239def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None): 240 """Copy length bytes from fileobj src to fileobj dst. 241 If length is None, copy the entire content. 242 """ 243 bufsize = bufsize or 16 * 1024 244 if length == 0: 245 return 246 if length is None: 247 shutil.copyfileobj(src, dst, bufsize) 248 return 249 250 blocks, remainder = divmod(length, bufsize) 251 for b in range(blocks): 252 buf = src.read(bufsize) 253 if len(buf) < bufsize: 254 raise exception("unexpected end of data") 255 dst.write(buf) 256 257 if remainder != 0: 258 buf = src.read(remainder) 259 if len(buf) < remainder: 260 raise exception("unexpected end of data") 261 dst.write(buf) 262 return 263 264def _safe_print(s): 265 encoding = getattr(sys.stdout, 'encoding', None) 266 if encoding is not None: 267 s = s.encode(encoding, 'backslashreplace').decode(encoding) 268 print(s, end=' ') 269 270 271class TarError(Exception): 272 """Base exception.""" 273 pass 274class ExtractError(TarError): 275 """General exception for extract errors.""" 276 pass 277class ReadError(TarError): 278 """Exception for unreadable tar archives.""" 279 pass 280class CompressionError(TarError): 281 """Exception for unavailable compression methods.""" 282 pass 283class StreamError(TarError): 284 """Exception for unsupported operations on stream-like TarFiles.""" 285 pass 286class HeaderError(TarError): 287 """Base exception for header errors.""" 288 pass 289class EmptyHeaderError(HeaderError): 290 """Exception for empty headers.""" 291 pass 292class TruncatedHeaderError(HeaderError): 293 """Exception for truncated headers.""" 294 pass 295class EOFHeaderError(HeaderError): 296 """Exception for end of file headers.""" 297 pass 298class InvalidHeaderError(HeaderError): 299 """Exception for invalid headers.""" 300 pass 301class SubsequentHeaderError(HeaderError): 302 """Exception for missing and invalid extended headers.""" 303 pass 304 305#--------------------------- 306# internal stream interface 307#--------------------------- 308class _LowLevelFile: 309 """Low-level file object. Supports reading and writing. 310 It is used instead of a regular file object for streaming 311 access. 312 """ 313 314 def __init__(self, name, mode): 315 mode = { 316 "r": os.O_RDONLY, 317 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 318 }[mode] 319 if hasattr(os, "O_BINARY"): 320 mode |= os.O_BINARY 321 self.fd = os.open(name, mode, 0o666) 322 323 def close(self): 324 os.close(self.fd) 325 326 def read(self, size): 327 return os.read(self.fd, size) 328 329 def write(self, s): 330 os.write(self.fd, s) 331 332class _Stream: 333 """Class that serves as an adapter between TarFile and 334 a stream-like object. The stream-like object only 335 needs to have a read() or write() method and is accessed 336 blockwise. Use of gzip or bzip2 compression is possible. 337 A stream-like object could be for example: sys.stdin, 338 sys.stdout, a socket, a tape device etc. 339 340 _Stream is intended to be used only internally. 341 """ 342 343 def __init__(self, name, mode, comptype, fileobj, bufsize): 344 """Construct a _Stream object. 345 """ 346 self._extfileobj = True 347 if fileobj is None: 348 fileobj = _LowLevelFile(name, mode) 349 self._extfileobj = False 350 351 if comptype == '*': 352 # Enable transparent compression detection for the 353 # stream interface 354 fileobj = _StreamProxy(fileobj) 355 comptype = fileobj.getcomptype() 356 357 self.name = name or "" 358 self.mode = mode 359 self.comptype = comptype 360 self.fileobj = fileobj 361 self.bufsize = bufsize 362 self.buf = b"" 363 self.pos = 0 364 self.closed = False 365 366 try: 367 if comptype == "gz": 368 try: 369 import zlib 370 except ImportError: 371 raise CompressionError("zlib module is not available") from None 372 self.zlib = zlib 373 self.crc = zlib.crc32(b"") 374 if mode == "r": 375 self._init_read_gz() 376 self.exception = zlib.error 377 else: 378 self._init_write_gz() 379 380 elif comptype == "bz2": 381 try: 382 import bz2 383 except ImportError: 384 raise CompressionError("bz2 module is not available") from None 385 if mode == "r": 386 self.dbuf = b"" 387 self.cmp = bz2.BZ2Decompressor() 388 self.exception = OSError 389 else: 390 self.cmp = bz2.BZ2Compressor() 391 392 elif comptype == "xz": 393 try: 394 import lzma 395 except ImportError: 396 raise CompressionError("lzma module is not available") from None 397 if mode == "r": 398 self.dbuf = b"" 399 self.cmp = lzma.LZMADecompressor() 400 self.exception = lzma.LZMAError 401 else: 402 self.cmp = lzma.LZMACompressor() 403 404 elif comptype != "tar": 405 raise CompressionError("unknown compression type %r" % comptype) 406 407 except: 408 if not self._extfileobj: 409 self.fileobj.close() 410 self.closed = True 411 raise 412 413 def __del__(self): 414 if hasattr(self, "closed") and not self.closed: 415 self.close() 416 417 def _init_write_gz(self): 418 """Initialize for writing with gzip compression. 419 """ 420 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, 421 -self.zlib.MAX_WBITS, 422 self.zlib.DEF_MEM_LEVEL, 423 0) 424 timestamp = struct.pack("<L", int(time.time())) 425 self.__write(b"\037\213\010\010" + timestamp + b"\002\377") 426 if self.name.endswith(".gz"): 427 self.name = self.name[:-3] 428 # Honor "directory components removed" from RFC1952 429 self.name = os.path.basename(self.name) 430 # RFC1952 says we must use ISO-8859-1 for the FNAME field. 431 self.__write(self.name.encode("iso-8859-1", "replace") + NUL) 432 433 def write(self, s): 434 """Write string s to the stream. 435 """ 436 if self.comptype == "gz": 437 self.crc = self.zlib.crc32(s, self.crc) 438 self.pos += len(s) 439 if self.comptype != "tar": 440 s = self.cmp.compress(s) 441 self.__write(s) 442 443 def __write(self, s): 444 """Write string s to the stream if a whole new block 445 is ready to be written. 446 """ 447 self.buf += s 448 while len(self.buf) > self.bufsize: 449 self.fileobj.write(self.buf[:self.bufsize]) 450 self.buf = self.buf[self.bufsize:] 451 452 def close(self): 453 """Close the _Stream object. No operation should be 454 done on it afterwards. 455 """ 456 if self.closed: 457 return 458 459 self.closed = True 460 try: 461 if self.mode == "w" and self.comptype != "tar": 462 self.buf += self.cmp.flush() 463 464 if self.mode == "w" and self.buf: 465 self.fileobj.write(self.buf) 466 self.buf = b"" 467 if self.comptype == "gz": 468 self.fileobj.write(struct.pack("<L", self.crc)) 469 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF)) 470 finally: 471 if not self._extfileobj: 472 self.fileobj.close() 473 474 def _init_read_gz(self): 475 """Initialize for reading a gzip compressed fileobj. 476 """ 477 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) 478 self.dbuf = b"" 479 480 # taken from gzip.GzipFile with some alterations 481 if self.__read(2) != b"\037\213": 482 raise ReadError("not a gzip file") 483 if self.__read(1) != b"\010": 484 raise CompressionError("unsupported compression method") 485 486 flag = ord(self.__read(1)) 487 self.__read(6) 488 489 if flag & 4: 490 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) 491 self.read(xlen) 492 if flag & 8: 493 while True: 494 s = self.__read(1) 495 if not s or s == NUL: 496 break 497 if flag & 16: 498 while True: 499 s = self.__read(1) 500 if not s or s == NUL: 501 break 502 if flag & 2: 503 self.__read(2) 504 505 def tell(self): 506 """Return the stream's file pointer position. 507 """ 508 return self.pos 509 510 def seek(self, pos=0): 511 """Set the stream's file pointer to pos. Negative seeking 512 is forbidden. 513 """ 514 if pos - self.pos >= 0: 515 blocks, remainder = divmod(pos - self.pos, self.bufsize) 516 for i in range(blocks): 517 self.read(self.bufsize) 518 self.read(remainder) 519 else: 520 raise StreamError("seeking backwards is not allowed") 521 return self.pos 522 523 def read(self, size): 524 """Return the next size number of bytes from the stream.""" 525 assert size is not None 526 buf = self._read(size) 527 self.pos += len(buf) 528 return buf 529 530 def _read(self, size): 531 """Return size bytes from the stream. 532 """ 533 if self.comptype == "tar": 534 return self.__read(size) 535 536 c = len(self.dbuf) 537 t = [self.dbuf] 538 while c < size: 539 # Skip underlying buffer to avoid unaligned double buffering. 540 if self.buf: 541 buf = self.buf 542 self.buf = b"" 543 else: 544 buf = self.fileobj.read(self.bufsize) 545 if not buf: 546 break 547 try: 548 buf = self.cmp.decompress(buf) 549 except self.exception as e: 550 raise ReadError("invalid compressed data") from e 551 t.append(buf) 552 c += len(buf) 553 t = b"".join(t) 554 self.dbuf = t[size:] 555 return t[:size] 556 557 def __read(self, size): 558 """Return size bytes from stream. If internal buffer is empty, 559 read another block from the stream. 560 """ 561 c = len(self.buf) 562 t = [self.buf] 563 while c < size: 564 buf = self.fileobj.read(self.bufsize) 565 if not buf: 566 break 567 t.append(buf) 568 c += len(buf) 569 t = b"".join(t) 570 self.buf = t[size:] 571 return t[:size] 572# class _Stream 573 574class _StreamProxy(object): 575 """Small proxy class that enables transparent compression 576 detection for the Stream interface (mode 'r|*'). 577 """ 578 579 def __init__(self, fileobj): 580 self.fileobj = fileobj 581 self.buf = self.fileobj.read(BLOCKSIZE) 582 583 def read(self, size): 584 self.read = self.fileobj.read 585 return self.buf 586 587 def getcomptype(self): 588 if self.buf.startswith(b"\x1f\x8b\x08"): 589 return "gz" 590 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY": 591 return "bz2" 592 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")): 593 return "xz" 594 else: 595 return "tar" 596 597 def close(self): 598 self.fileobj.close() 599# class StreamProxy 600 601#------------------------ 602# Extraction file object 603#------------------------ 604class _FileInFile(object): 605 """A thin wrapper around an existing file object that 606 provides a part of its data as an individual file 607 object. 608 """ 609 610 def __init__(self, fileobj, offset, size, blockinfo=None): 611 self.fileobj = fileobj 612 self.offset = offset 613 self.size = size 614 self.position = 0 615 self.name = getattr(fileobj, "name", None) 616 self.closed = False 617 618 if blockinfo is None: 619 blockinfo = [(0, size)] 620 621 # Construct a map with data and zero blocks. 622 self.map_index = 0 623 self.map = [] 624 lastpos = 0 625 realpos = self.offset 626 for offset, size in blockinfo: 627 if offset > lastpos: 628 self.map.append((False, lastpos, offset, None)) 629 self.map.append((True, offset, offset + size, realpos)) 630 realpos += size 631 lastpos = offset + size 632 if lastpos < self.size: 633 self.map.append((False, lastpos, self.size, None)) 634 635 def flush(self): 636 pass 637 638 def readable(self): 639 return True 640 641 def writable(self): 642 return False 643 644 def seekable(self): 645 return self.fileobj.seekable() 646 647 def tell(self): 648 """Return the current file position. 649 """ 650 return self.position 651 652 def seek(self, position, whence=io.SEEK_SET): 653 """Seek to a position in the file. 654 """ 655 if whence == io.SEEK_SET: 656 self.position = min(max(position, 0), self.size) 657 elif whence == io.SEEK_CUR: 658 if position < 0: 659 self.position = max(self.position + position, 0) 660 else: 661 self.position = min(self.position + position, self.size) 662 elif whence == io.SEEK_END: 663 self.position = max(min(self.size + position, self.size), 0) 664 else: 665 raise ValueError("Invalid argument") 666 return self.position 667 668 def read(self, size=None): 669 """Read data from the file. 670 """ 671 if size is None: 672 size = self.size - self.position 673 else: 674 size = min(size, self.size - self.position) 675 676 buf = b"" 677 while size > 0: 678 while True: 679 data, start, stop, offset = self.map[self.map_index] 680 if start <= self.position < stop: 681 break 682 else: 683 self.map_index += 1 684 if self.map_index == len(self.map): 685 self.map_index = 0 686 length = min(size, stop - self.position) 687 if data: 688 self.fileobj.seek(offset + (self.position - start)) 689 b = self.fileobj.read(length) 690 if len(b) != length: 691 raise ReadError("unexpected end of data") 692 buf += b 693 else: 694 buf += NUL * length 695 size -= length 696 self.position += length 697 return buf 698 699 def readinto(self, b): 700 buf = self.read(len(b)) 701 b[:len(buf)] = buf 702 return len(buf) 703 704 def close(self): 705 self.closed = True 706#class _FileInFile 707 708class ExFileObject(io.BufferedReader): 709 710 def __init__(self, tarfile, tarinfo): 711 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data, 712 tarinfo.size, tarinfo.sparse) 713 super().__init__(fileobj) 714#class ExFileObject 715 716 717#----------------------------- 718# extraction filters (PEP 706) 719#----------------------------- 720 721class FilterError(TarError): 722 pass 723 724class AbsolutePathError(FilterError): 725 def __init__(self, tarinfo): 726 self.tarinfo = tarinfo 727 super().__init__(f'member {tarinfo.name!r} has an absolute path') 728 729class OutsideDestinationError(FilterError): 730 def __init__(self, tarinfo, path): 731 self.tarinfo = tarinfo 732 self._path = path 733 super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, ' 734 + 'which is outside the destination') 735 736class SpecialFileError(FilterError): 737 def __init__(self, tarinfo): 738 self.tarinfo = tarinfo 739 super().__init__(f'{tarinfo.name!r} is a special file') 740 741class AbsoluteLinkError(FilterError): 742 def __init__(self, tarinfo): 743 self.tarinfo = tarinfo 744 super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path') 745 746class LinkOutsideDestinationError(FilterError): 747 def __init__(self, tarinfo, path): 748 self.tarinfo = tarinfo 749 self._path = path 750 super().__init__(f'{tarinfo.name!r} would link to {path!r}, ' 751 + 'which is outside the destination') 752 753def _get_filtered_attrs(member, dest_path, for_data=True): 754 new_attrs = {} 755 name = member.name 756 dest_path = os.path.realpath(dest_path) 757 # Strip leading / (tar's directory separator) from filenames. 758 # Include os.sep (target OS directory separator) as well. 759 if name.startswith(('/', os.sep)): 760 name = new_attrs['name'] = member.path.lstrip('/' + os.sep) 761 if os.path.isabs(name): 762 # Path is absolute even after stripping. 763 # For example, 'C:/foo' on Windows. 764 raise AbsolutePathError(member) 765 # Ensure we stay in the destination 766 target_path = os.path.realpath(os.path.join(dest_path, name)) 767 if os.path.commonpath([target_path, dest_path]) != dest_path: 768 raise OutsideDestinationError(member, target_path) 769 # Limit permissions (no high bits, and go-w) 770 mode = member.mode 771 if mode is not None: 772 # Strip high bits & group/other write bits 773 mode = mode & 0o755 774 if for_data: 775 # For data, handle permissions & file types 776 if member.isreg() or member.islnk(): 777 if not mode & 0o100: 778 # Clear executable bits if not executable by user 779 mode &= ~0o111 780 # Ensure owner can read & write 781 mode |= 0o600 782 elif member.isdir() or member.issym(): 783 # Ignore mode for directories & symlinks 784 mode = None 785 else: 786 # Reject special files 787 raise SpecialFileError(member) 788 if mode != member.mode: 789 new_attrs['mode'] = mode 790 if for_data: 791 # Ignore ownership for 'data' 792 if member.uid is not None: 793 new_attrs['uid'] = None 794 if member.gid is not None: 795 new_attrs['gid'] = None 796 if member.uname is not None: 797 new_attrs['uname'] = None 798 if member.gname is not None: 799 new_attrs['gname'] = None 800 # Check link destination for 'data' 801 if member.islnk() or member.issym(): 802 if os.path.isabs(member.linkname): 803 raise AbsoluteLinkError(member) 804 target_path = os.path.realpath(os.path.join(dest_path, member.linkname)) 805 if os.path.commonpath([target_path, dest_path]) != dest_path: 806 raise LinkOutsideDestinationError(member, target_path) 807 return new_attrs 808 809def fully_trusted_filter(member, dest_path): 810 return member 811 812def tar_filter(member, dest_path): 813 new_attrs = _get_filtered_attrs(member, dest_path, False) 814 if new_attrs: 815 return member.replace(**new_attrs, deep=False) 816 return member 817 818def data_filter(member, dest_path): 819 new_attrs = _get_filtered_attrs(member, dest_path, True) 820 if new_attrs: 821 return member.replace(**new_attrs, deep=False) 822 return member 823 824_NAMED_FILTERS = { 825 "fully_trusted": fully_trusted_filter, 826 "tar": tar_filter, 827 "data": data_filter, 828} 829 830#------------------ 831# Exported Classes 832#------------------ 833 834# Sentinel for replace() defaults, meaning "don't change the attribute" 835_KEEP = object() 836 837class TarInfo(object): 838 """Informational class which holds the details about an 839 archive member given by a tar header block. 840 TarInfo objects are returned by TarFile.getmember(), 841 TarFile.getmembers() and TarFile.gettarinfo() and are 842 usually created internally. 843 """ 844 845 __slots__ = dict( 846 name = 'Name of the archive member.', 847 mode = 'Permission bits.', 848 uid = 'User ID of the user who originally stored this member.', 849 gid = 'Group ID of the user who originally stored this member.', 850 size = 'Size in bytes.', 851 mtime = 'Time of last modification.', 852 chksum = 'Header checksum.', 853 type = ('File type. type is usually one of these constants: ' 854 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, ' 855 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'), 856 linkname = ('Name of the target file name, which is only present ' 857 'in TarInfo objects of type LNKTYPE and SYMTYPE.'), 858 uname = 'User name.', 859 gname = 'Group name.', 860 devmajor = 'Device major number.', 861 devminor = 'Device minor number.', 862 offset = 'The tar header starts here.', 863 offset_data = "The file's data starts here.", 864 pax_headers = ('A dictionary containing key-value pairs of an ' 865 'associated pax extended header.'), 866 sparse = 'Sparse member information.', 867 tarfile = None, 868 _sparse_structs = None, 869 _link_target = None, 870 ) 871 872 def __init__(self, name=""): 873 """Construct a TarInfo object. name is the optional name 874 of the member. 875 """ 876 self.name = name # member name 877 self.mode = 0o644 # file permissions 878 self.uid = 0 # user id 879 self.gid = 0 # group id 880 self.size = 0 # file size 881 self.mtime = 0 # modification time 882 self.chksum = 0 # header checksum 883 self.type = REGTYPE # member type 884 self.linkname = "" # link name 885 self.uname = "" # user name 886 self.gname = "" # group name 887 self.devmajor = 0 # device major number 888 self.devminor = 0 # device minor number 889 890 self.offset = 0 # the tar header starts here 891 self.offset_data = 0 # the file's data starts here 892 893 self.sparse = None # sparse member information 894 self.pax_headers = {} # pax header information 895 896 @property 897 def path(self): 898 'In pax headers, "name" is called "path".' 899 return self.name 900 901 @path.setter 902 def path(self, name): 903 self.name = name 904 905 @property 906 def linkpath(self): 907 'In pax headers, "linkname" is called "linkpath".' 908 return self.linkname 909 910 @linkpath.setter 911 def linkpath(self, linkname): 912 self.linkname = linkname 913 914 def __repr__(self): 915 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) 916 917 def replace(self, *, 918 name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP, 919 uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP, 920 deep=True, _KEEP=_KEEP): 921 """Return a deep copy of self with the given attributes replaced. 922 """ 923 if deep: 924 result = copy.deepcopy(self) 925 else: 926 result = copy.copy(self) 927 if name is not _KEEP: 928 result.name = name 929 if mtime is not _KEEP: 930 result.mtime = mtime 931 if mode is not _KEEP: 932 result.mode = mode 933 if linkname is not _KEEP: 934 result.linkname = linkname 935 if uid is not _KEEP: 936 result.uid = uid 937 if gid is not _KEEP: 938 result.gid = gid 939 if uname is not _KEEP: 940 result.uname = uname 941 if gname is not _KEEP: 942 result.gname = gname 943 return result 944 945 def get_info(self): 946 """Return the TarInfo's attributes as a dictionary. 947 """ 948 if self.mode is None: 949 mode = None 950 else: 951 mode = self.mode & 0o7777 952 info = { 953 "name": self.name, 954 "mode": mode, 955 "uid": self.uid, 956 "gid": self.gid, 957 "size": self.size, 958 "mtime": self.mtime, 959 "chksum": self.chksum, 960 "type": self.type, 961 "linkname": self.linkname, 962 "uname": self.uname, 963 "gname": self.gname, 964 "devmajor": self.devmajor, 965 "devminor": self.devminor 966 } 967 968 if info["type"] == DIRTYPE and not info["name"].endswith("/"): 969 info["name"] += "/" 970 971 return info 972 973 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"): 974 """Return a tar header as a string of 512 byte blocks. 975 """ 976 info = self.get_info() 977 for name, value in info.items(): 978 if value is None: 979 raise ValueError("%s may not be None" % name) 980 981 if format == USTAR_FORMAT: 982 return self.create_ustar_header(info, encoding, errors) 983 elif format == GNU_FORMAT: 984 return self.create_gnu_header(info, encoding, errors) 985 elif format == PAX_FORMAT: 986 return self.create_pax_header(info, encoding) 987 else: 988 raise ValueError("invalid format") 989 990 def create_ustar_header(self, info, encoding, errors): 991 """Return the object as a ustar header block. 992 """ 993 info["magic"] = POSIX_MAGIC 994 995 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: 996 raise ValueError("linkname is too long") 997 998 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: 999 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors) 1000 1001 return self._create_header(info, USTAR_FORMAT, encoding, errors) 1002 1003 def create_gnu_header(self, info, encoding, errors): 1004 """Return the object as a GNU header block sequence. 1005 """ 1006 info["magic"] = GNU_MAGIC 1007 1008 buf = b"" 1009 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK: 1010 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) 1011 1012 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME: 1013 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) 1014 1015 return buf + self._create_header(info, GNU_FORMAT, encoding, errors) 1016 1017 def create_pax_header(self, info, encoding): 1018 """Return the object as a ustar header block. If it cannot be 1019 represented this way, prepend a pax extended header sequence 1020 with supplement information. 1021 """ 1022 info["magic"] = POSIX_MAGIC 1023 pax_headers = self.pax_headers.copy() 1024 1025 # Test string fields for values that exceed the field length or cannot 1026 # be represented in ASCII encoding. 1027 for name, hname, length in ( 1028 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), 1029 ("uname", "uname", 32), ("gname", "gname", 32)): 1030 1031 if hname in pax_headers: 1032 # The pax header has priority. 1033 continue 1034 1035 # Try to encode the string as ASCII. 1036 try: 1037 info[name].encode("ascii", "strict") 1038 except UnicodeEncodeError: 1039 pax_headers[hname] = info[name] 1040 continue 1041 1042 if len(info[name]) > length: 1043 pax_headers[hname] = info[name] 1044 1045 # Test number fields for values that exceed the field limit or values 1046 # that like to be stored as float. 1047 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): 1048 needs_pax = False 1049 1050 val = info[name] 1051 val_is_float = isinstance(val, float) 1052 val_int = round(val) if val_is_float else val 1053 if not 0 <= val_int < 8 ** (digits - 1): 1054 # Avoid overflow. 1055 info[name] = 0 1056 needs_pax = True 1057 elif val_is_float: 1058 # Put rounded value in ustar header, and full 1059 # precision value in pax header. 1060 info[name] = val_int 1061 needs_pax = True 1062 1063 # The existing pax header has priority. 1064 if needs_pax and name not in pax_headers: 1065 pax_headers[name] = str(val) 1066 1067 # Create a pax extended header if necessary. 1068 if pax_headers: 1069 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding) 1070 else: 1071 buf = b"" 1072 1073 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") 1074 1075 @classmethod 1076 def create_pax_global_header(cls, pax_headers): 1077 """Return the object as a pax global header block sequence. 1078 """ 1079 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") 1080 1081 def _posix_split_name(self, name, encoding, errors): 1082 """Split a name longer than 100 chars into a prefix 1083 and a name part. 1084 """ 1085 components = name.split("/") 1086 for i in range(1, len(components)): 1087 prefix = "/".join(components[:i]) 1088 name = "/".join(components[i:]) 1089 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \ 1090 len(name.encode(encoding, errors)) <= LENGTH_NAME: 1091 break 1092 else: 1093 raise ValueError("name is too long") 1094 1095 return prefix, name 1096 1097 @staticmethod 1098 def _create_header(info, format, encoding, errors): 1099 """Return a header block. info is a dictionary with file 1100 information, format must be one of the *_FORMAT constants. 1101 """ 1102 has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE) 1103 if has_device_fields: 1104 devmajor = itn(info.get("devmajor", 0), 8, format) 1105 devminor = itn(info.get("devminor", 0), 8, format) 1106 else: 1107 devmajor = stn("", 8, encoding, errors) 1108 devminor = stn("", 8, encoding, errors) 1109 1110 # None values in metadata should cause ValueError. 1111 # itn()/stn() do this for all fields except type. 1112 filetype = info.get("type", REGTYPE) 1113 if filetype is None: 1114 raise ValueError("TarInfo.type must not be None") 1115 1116 parts = [ 1117 stn(info.get("name", ""), 100, encoding, errors), 1118 itn(info.get("mode", 0) & 0o7777, 8, format), 1119 itn(info.get("uid", 0), 8, format), 1120 itn(info.get("gid", 0), 8, format), 1121 itn(info.get("size", 0), 12, format), 1122 itn(info.get("mtime", 0), 12, format), 1123 b" ", # checksum field 1124 filetype, 1125 stn(info.get("linkname", ""), 100, encoding, errors), 1126 info.get("magic", POSIX_MAGIC), 1127 stn(info.get("uname", ""), 32, encoding, errors), 1128 stn(info.get("gname", ""), 32, encoding, errors), 1129 devmajor, 1130 devminor, 1131 stn(info.get("prefix", ""), 155, encoding, errors) 1132 ] 1133 1134 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) 1135 chksum = calc_chksums(buf[-BLOCKSIZE:])[0] 1136 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:] 1137 return buf 1138 1139 @staticmethod 1140 def _create_payload(payload): 1141 """Return the string payload filled with zero bytes 1142 up to the next 512 byte border. 1143 """ 1144 blocks, remainder = divmod(len(payload), BLOCKSIZE) 1145 if remainder > 0: 1146 payload += (BLOCKSIZE - remainder) * NUL 1147 return payload 1148 1149 @classmethod 1150 def _create_gnu_long_header(cls, name, type, encoding, errors): 1151 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence 1152 for name. 1153 """ 1154 name = name.encode(encoding, errors) + NUL 1155 1156 info = {} 1157 info["name"] = "././@LongLink" 1158 info["type"] = type 1159 info["size"] = len(name) 1160 info["magic"] = GNU_MAGIC 1161 1162 # create extended header + name blocks. 1163 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ 1164 cls._create_payload(name) 1165 1166 @classmethod 1167 def _create_pax_generic_header(cls, pax_headers, type, encoding): 1168 """Return a POSIX.1-2008 extended or global header sequence 1169 that contains a list of keyword, value pairs. The values 1170 must be strings. 1171 """ 1172 # Check if one of the fields contains surrogate characters and thereby 1173 # forces hdrcharset=BINARY, see _proc_pax() for more information. 1174 binary = False 1175 for keyword, value in pax_headers.items(): 1176 try: 1177 value.encode("utf-8", "strict") 1178 except UnicodeEncodeError: 1179 binary = True 1180 break 1181 1182 records = b"" 1183 if binary: 1184 # Put the hdrcharset field at the beginning of the header. 1185 records += b"21 hdrcharset=BINARY\n" 1186 1187 for keyword, value in pax_headers.items(): 1188 keyword = keyword.encode("utf-8") 1189 if binary: 1190 # Try to restore the original byte representation of `value'. 1191 # Needless to say, that the encoding must match the string. 1192 value = value.encode(encoding, "surrogateescape") 1193 else: 1194 value = value.encode("utf-8") 1195 1196 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' 1197 n = p = 0 1198 while True: 1199 n = l + len(str(p)) 1200 if n == p: 1201 break 1202 p = n 1203 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n" 1204 1205 # We use a hardcoded "././@PaxHeader" name like star does 1206 # instead of the one that POSIX recommends. 1207 info = {} 1208 info["name"] = "././@PaxHeader" 1209 info["type"] = type 1210 info["size"] = len(records) 1211 info["magic"] = POSIX_MAGIC 1212 1213 # Create pax header + record blocks. 1214 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ 1215 cls._create_payload(records) 1216 1217 @classmethod 1218 def frombuf(cls, buf, encoding, errors): 1219 """Construct a TarInfo object from a 512 byte bytes object. 1220 """ 1221 if len(buf) == 0: 1222 raise EmptyHeaderError("empty header") 1223 if len(buf) != BLOCKSIZE: 1224 raise TruncatedHeaderError("truncated header") 1225 if buf.count(NUL) == BLOCKSIZE: 1226 raise EOFHeaderError("end of file header") 1227 1228 chksum = nti(buf[148:156]) 1229 if chksum not in calc_chksums(buf): 1230 raise InvalidHeaderError("bad checksum") 1231 1232 obj = cls() 1233 obj.name = nts(buf[0:100], encoding, errors) 1234 obj.mode = nti(buf[100:108]) 1235 obj.uid = nti(buf[108:116]) 1236 obj.gid = nti(buf[116:124]) 1237 obj.size = nti(buf[124:136]) 1238 obj.mtime = nti(buf[136:148]) 1239 obj.chksum = chksum 1240 obj.type = buf[156:157] 1241 obj.linkname = nts(buf[157:257], encoding, errors) 1242 obj.uname = nts(buf[265:297], encoding, errors) 1243 obj.gname = nts(buf[297:329], encoding, errors) 1244 obj.devmajor = nti(buf[329:337]) 1245 obj.devminor = nti(buf[337:345]) 1246 prefix = nts(buf[345:500], encoding, errors) 1247 1248 # Old V7 tar format represents a directory as a regular 1249 # file with a trailing slash. 1250 if obj.type == AREGTYPE and obj.name.endswith("/"): 1251 obj.type = DIRTYPE 1252 1253 # The old GNU sparse format occupies some of the unused 1254 # space in the buffer for up to 4 sparse structures. 1255 # Save them for later processing in _proc_sparse(). 1256 if obj.type == GNUTYPE_SPARSE: 1257 pos = 386 1258 structs = [] 1259 for i in range(4): 1260 try: 1261 offset = nti(buf[pos:pos + 12]) 1262 numbytes = nti(buf[pos + 12:pos + 24]) 1263 except ValueError: 1264 break 1265 structs.append((offset, numbytes)) 1266 pos += 24 1267 isextended = bool(buf[482]) 1268 origsize = nti(buf[483:495]) 1269 obj._sparse_structs = (structs, isextended, origsize) 1270 1271 # Remove redundant slashes from directories. 1272 if obj.isdir(): 1273 obj.name = obj.name.rstrip("/") 1274 1275 # Reconstruct a ustar longname. 1276 if prefix and obj.type not in GNU_TYPES: 1277 obj.name = prefix + "/" + obj.name 1278 return obj 1279 1280 @classmethod 1281 def fromtarfile(cls, tarfile): 1282 """Return the next TarInfo object from TarFile object 1283 tarfile. 1284 """ 1285 buf = tarfile.fileobj.read(BLOCKSIZE) 1286 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) 1287 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE 1288 return obj._proc_member(tarfile) 1289 1290 #-------------------------------------------------------------------------- 1291 # The following are methods that are called depending on the type of a 1292 # member. The entry point is _proc_member() which can be overridden in a 1293 # subclass to add custom _proc_*() methods. A _proc_*() method MUST 1294 # implement the following 1295 # operations: 1296 # 1. Set self.offset_data to the position where the data blocks begin, 1297 # if there is data that follows. 1298 # 2. Set tarfile.offset to the position where the next member's header will 1299 # begin. 1300 # 3. Return self or another valid TarInfo object. 1301 def _proc_member(self, tarfile): 1302 """Choose the right processing method depending on 1303 the type and call it. 1304 """ 1305 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): 1306 return self._proc_gnulong(tarfile) 1307 elif self.type == GNUTYPE_SPARSE: 1308 return self._proc_sparse(tarfile) 1309 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): 1310 return self._proc_pax(tarfile) 1311 else: 1312 return self._proc_builtin(tarfile) 1313 1314 def _proc_builtin(self, tarfile): 1315 """Process a builtin type or an unknown type which 1316 will be treated as a regular file. 1317 """ 1318 self.offset_data = tarfile.fileobj.tell() 1319 offset = self.offset_data 1320 if self.isreg() or self.type not in SUPPORTED_TYPES: 1321 # Skip the following data blocks. 1322 offset += self._block(self.size) 1323 tarfile.offset = offset 1324 1325 # Patch the TarInfo object with saved global 1326 # header information. 1327 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) 1328 1329 # Remove redundant slashes from directories. This is to be consistent 1330 # with frombuf(). 1331 if self.isdir(): 1332 self.name = self.name.rstrip("/") 1333 1334 return self 1335 1336 def _proc_gnulong(self, tarfile): 1337 """Process the blocks that hold a GNU longname 1338 or longlink member. 1339 """ 1340 buf = tarfile.fileobj.read(self._block(self.size)) 1341 1342 # Fetch the next header and process it. 1343 try: 1344 next = self.fromtarfile(tarfile) 1345 except HeaderError as e: 1346 raise SubsequentHeaderError(str(e)) from None 1347 1348 # Patch the TarInfo object from the next header with 1349 # the longname information. 1350 next.offset = self.offset 1351 if self.type == GNUTYPE_LONGNAME: 1352 next.name = nts(buf, tarfile.encoding, tarfile.errors) 1353 elif self.type == GNUTYPE_LONGLINK: 1354 next.linkname = nts(buf, tarfile.encoding, tarfile.errors) 1355 1356 # Remove redundant slashes from directories. This is to be consistent 1357 # with frombuf(). 1358 if next.isdir(): 1359 next.name = next.name.removesuffix("/") 1360 1361 return next 1362 1363 def _proc_sparse(self, tarfile): 1364 """Process a GNU sparse header plus extra headers. 1365 """ 1366 # We already collected some sparse structures in frombuf(). 1367 structs, isextended, origsize = self._sparse_structs 1368 del self._sparse_structs 1369 1370 # Collect sparse structures from extended header blocks. 1371 while isextended: 1372 buf = tarfile.fileobj.read(BLOCKSIZE) 1373 pos = 0 1374 for i in range(21): 1375 try: 1376 offset = nti(buf[pos:pos + 12]) 1377 numbytes = nti(buf[pos + 12:pos + 24]) 1378 except ValueError: 1379 break 1380 if offset and numbytes: 1381 structs.append((offset, numbytes)) 1382 pos += 24 1383 isextended = bool(buf[504]) 1384 self.sparse = structs 1385 1386 self.offset_data = tarfile.fileobj.tell() 1387 tarfile.offset = self.offset_data + self._block(self.size) 1388 self.size = origsize 1389 return self 1390 1391 def _proc_pax(self, tarfile): 1392 """Process an extended or global header as described in 1393 POSIX.1-2008. 1394 """ 1395 # Read the header information. 1396 buf = tarfile.fileobj.read(self._block(self.size)) 1397 1398 # A pax header stores supplemental information for either 1399 # the following file (extended) or all following files 1400 # (global). 1401 if self.type == XGLTYPE: 1402 pax_headers = tarfile.pax_headers 1403 else: 1404 pax_headers = tarfile.pax_headers.copy() 1405 1406 # Check if the pax header contains a hdrcharset field. This tells us 1407 # the encoding of the path, linkpath, uname and gname fields. Normally, 1408 # these fields are UTF-8 encoded but since POSIX.1-2008 tar 1409 # implementations are allowed to store them as raw binary strings if 1410 # the translation to UTF-8 fails. 1411 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) 1412 if match is not None: 1413 pax_headers["hdrcharset"] = match.group(1).decode("utf-8") 1414 1415 # For the time being, we don't care about anything other than "BINARY". 1416 # The only other value that is currently allowed by the standard is 1417 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. 1418 hdrcharset = pax_headers.get("hdrcharset") 1419 if hdrcharset == "BINARY": 1420 encoding = tarfile.encoding 1421 else: 1422 encoding = "utf-8" 1423 1424 # Parse pax header information. A record looks like that: 1425 # "%d %s=%s\n" % (length, keyword, value). length is the size 1426 # of the complete record including the length field itself and 1427 # the newline. keyword and value are both UTF-8 encoded strings. 1428 regex = re.compile(br"(\d+) ([^=]+)=") 1429 pos = 0 1430 while True: 1431 match = regex.match(buf, pos) 1432 if not match: 1433 break 1434 1435 length, keyword = match.groups() 1436 length = int(length) 1437 if length == 0: 1438 raise InvalidHeaderError("invalid header") 1439 value = buf[match.end(2) + 1:match.start(1) + length - 1] 1440 1441 # Normally, we could just use "utf-8" as the encoding and "strict" 1442 # as the error handler, but we better not take the risk. For 1443 # example, GNU tar <= 1.23 is known to store filenames it cannot 1444 # translate to UTF-8 as raw strings (unfortunately without a 1445 # hdrcharset=BINARY header). 1446 # We first try the strict standard encoding, and if that fails we 1447 # fall back on the user's encoding and error handler. 1448 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", 1449 tarfile.errors) 1450 if keyword in PAX_NAME_FIELDS: 1451 value = self._decode_pax_field(value, encoding, tarfile.encoding, 1452 tarfile.errors) 1453 else: 1454 value = self._decode_pax_field(value, "utf-8", "utf-8", 1455 tarfile.errors) 1456 1457 pax_headers[keyword] = value 1458 pos += length 1459 1460 # Fetch the next header. 1461 try: 1462 next = self.fromtarfile(tarfile) 1463 except HeaderError as e: 1464 raise SubsequentHeaderError(str(e)) from None 1465 1466 # Process GNU sparse information. 1467 if "GNU.sparse.map" in pax_headers: 1468 # GNU extended sparse format version 0.1. 1469 self._proc_gnusparse_01(next, pax_headers) 1470 1471 elif "GNU.sparse.size" in pax_headers: 1472 # GNU extended sparse format version 0.0. 1473 self._proc_gnusparse_00(next, pax_headers, buf) 1474 1475 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": 1476 # GNU extended sparse format version 1.0. 1477 self._proc_gnusparse_10(next, pax_headers, tarfile) 1478 1479 if self.type in (XHDTYPE, SOLARIS_XHDTYPE): 1480 # Patch the TarInfo object with the extended header info. 1481 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) 1482 next.offset = self.offset 1483 1484 if "size" in pax_headers: 1485 # If the extended header replaces the size field, 1486 # we need to recalculate the offset where the next 1487 # header starts. 1488 offset = next.offset_data 1489 if next.isreg() or next.type not in SUPPORTED_TYPES: 1490 offset += next._block(next.size) 1491 tarfile.offset = offset 1492 1493 return next 1494 1495 def _proc_gnusparse_00(self, next, pax_headers, buf): 1496 """Process a GNU tar extended sparse header, version 0.0. 1497 """ 1498 offsets = [] 1499 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): 1500 offsets.append(int(match.group(1))) 1501 numbytes = [] 1502 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): 1503 numbytes.append(int(match.group(1))) 1504 next.sparse = list(zip(offsets, numbytes)) 1505 1506 def _proc_gnusparse_01(self, next, pax_headers): 1507 """Process a GNU tar extended sparse header, version 0.1. 1508 """ 1509 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")] 1510 next.sparse = list(zip(sparse[::2], sparse[1::2])) 1511 1512 def _proc_gnusparse_10(self, next, pax_headers, tarfile): 1513 """Process a GNU tar extended sparse header, version 1.0. 1514 """ 1515 fields = None 1516 sparse = [] 1517 buf = tarfile.fileobj.read(BLOCKSIZE) 1518 fields, buf = buf.split(b"\n", 1) 1519 fields = int(fields) 1520 while len(sparse) < fields * 2: 1521 if b"\n" not in buf: 1522 buf += tarfile.fileobj.read(BLOCKSIZE) 1523 number, buf = buf.split(b"\n", 1) 1524 sparse.append(int(number)) 1525 next.offset_data = tarfile.fileobj.tell() 1526 next.sparse = list(zip(sparse[::2], sparse[1::2])) 1527 1528 def _apply_pax_info(self, pax_headers, encoding, errors): 1529 """Replace fields with supplemental information from a previous 1530 pax extended or global header. 1531 """ 1532 for keyword, value in pax_headers.items(): 1533 if keyword == "GNU.sparse.name": 1534 setattr(self, "path", value) 1535 elif keyword == "GNU.sparse.size": 1536 setattr(self, "size", int(value)) 1537 elif keyword == "GNU.sparse.realsize": 1538 setattr(self, "size", int(value)) 1539 elif keyword in PAX_FIELDS: 1540 if keyword in PAX_NUMBER_FIELDS: 1541 try: 1542 value = PAX_NUMBER_FIELDS[keyword](value) 1543 except ValueError: 1544 value = 0 1545 if keyword == "path": 1546 value = value.rstrip("/") 1547 setattr(self, keyword, value) 1548 1549 self.pax_headers = pax_headers.copy() 1550 1551 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors): 1552 """Decode a single field from a pax record. 1553 """ 1554 try: 1555 return value.decode(encoding, "strict") 1556 except UnicodeDecodeError: 1557 return value.decode(fallback_encoding, fallback_errors) 1558 1559 def _block(self, count): 1560 """Round up a byte count by BLOCKSIZE and return it, 1561 e.g. _block(834) => 1024. 1562 """ 1563 blocks, remainder = divmod(count, BLOCKSIZE) 1564 if remainder: 1565 blocks += 1 1566 return blocks * BLOCKSIZE 1567 1568 def isreg(self): 1569 'Return True if the Tarinfo object is a regular file.' 1570 return self.type in REGULAR_TYPES 1571 1572 def isfile(self): 1573 'Return True if the Tarinfo object is a regular file.' 1574 return self.isreg() 1575 1576 def isdir(self): 1577 'Return True if it is a directory.' 1578 return self.type == DIRTYPE 1579 1580 def issym(self): 1581 'Return True if it is a symbolic link.' 1582 return self.type == SYMTYPE 1583 1584 def islnk(self): 1585 'Return True if it is a hard link.' 1586 return self.type == LNKTYPE 1587 1588 def ischr(self): 1589 'Return True if it is a character device.' 1590 return self.type == CHRTYPE 1591 1592 def isblk(self): 1593 'Return True if it is a block device.' 1594 return self.type == BLKTYPE 1595 1596 def isfifo(self): 1597 'Return True if it is a FIFO.' 1598 return self.type == FIFOTYPE 1599 1600 def issparse(self): 1601 return self.sparse is not None 1602 1603 def isdev(self): 1604 'Return True if it is one of character device, block device or FIFO.' 1605 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) 1606# class TarInfo 1607 1608class TarFile(object): 1609 """The TarFile Class provides an interface to tar archives. 1610 """ 1611 1612 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) 1613 1614 dereference = False # If true, add content of linked file to the 1615 # tar file, else the link. 1616 1617 ignore_zeros = False # If true, skips empty or invalid blocks and 1618 # continues processing. 1619 1620 errorlevel = 1 # If 0, fatal errors only appear in debug 1621 # messages (if debug >= 0). If > 0, errors 1622 # are passed to the caller as exceptions. 1623 1624 format = DEFAULT_FORMAT # The format to use when creating an archive. 1625 1626 encoding = ENCODING # Encoding for 8-bit character strings. 1627 1628 errors = None # Error handler for unicode conversion. 1629 1630 tarinfo = TarInfo # The default TarInfo class to use. 1631 1632 fileobject = ExFileObject # The file-object for extractfile(). 1633 1634 extraction_filter = None # The default filter for extraction. 1635 1636 def __init__(self, name=None, mode="r", fileobj=None, format=None, 1637 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, 1638 errors="surrogateescape", pax_headers=None, debug=None, 1639 errorlevel=None, copybufsize=None): 1640 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to 1641 read from an existing archive, 'a' to append data to an existing 1642 file or 'w' to create a new file overwriting an existing one. `mode' 1643 defaults to 'r'. 1644 If `fileobj' is given, it is used for reading or writing data. If it 1645 can be determined, `mode' is overridden by `fileobj's mode. 1646 `fileobj' is not closed, when TarFile is closed. 1647 """ 1648 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"} 1649 if mode not in modes: 1650 raise ValueError("mode must be 'r', 'a', 'w' or 'x'") 1651 self.mode = mode 1652 self._mode = modes[mode] 1653 1654 if not fileobj: 1655 if self.mode == "a" and not os.path.exists(name): 1656 # Create nonexistent files in append mode. 1657 self.mode = "w" 1658 self._mode = "wb" 1659 fileobj = bltn_open(name, self._mode) 1660 self._extfileobj = False 1661 else: 1662 if (name is None and hasattr(fileobj, "name") and 1663 isinstance(fileobj.name, (str, bytes))): 1664 name = fileobj.name 1665 if hasattr(fileobj, "mode"): 1666 self._mode = fileobj.mode 1667 self._extfileobj = True 1668 self.name = os.path.abspath(name) if name else None 1669 self.fileobj = fileobj 1670 1671 # Init attributes. 1672 if format is not None: 1673 self.format = format 1674 if tarinfo is not None: 1675 self.tarinfo = tarinfo 1676 if dereference is not None: 1677 self.dereference = dereference 1678 if ignore_zeros is not None: 1679 self.ignore_zeros = ignore_zeros 1680 if encoding is not None: 1681 self.encoding = encoding 1682 self.errors = errors 1683 1684 if pax_headers is not None and self.format == PAX_FORMAT: 1685 self.pax_headers = pax_headers 1686 else: 1687 self.pax_headers = {} 1688 1689 if debug is not None: 1690 self.debug = debug 1691 if errorlevel is not None: 1692 self.errorlevel = errorlevel 1693 1694 # Init datastructures. 1695 self.copybufsize = copybufsize 1696 self.closed = False 1697 self.members = [] # list of members as TarInfo objects 1698 self._loaded = False # flag if all members have been read 1699 self.offset = self.fileobj.tell() 1700 # current position in the archive file 1701 self.inodes = {} # dictionary caching the inodes of 1702 # archive members already added 1703 1704 try: 1705 if self.mode == "r": 1706 self.firstmember = None 1707 self.firstmember = self.next() 1708 1709 if self.mode == "a": 1710 # Move to the end of the archive, 1711 # before the first empty block. 1712 while True: 1713 self.fileobj.seek(self.offset) 1714 try: 1715 tarinfo = self.tarinfo.fromtarfile(self) 1716 self.members.append(tarinfo) 1717 except EOFHeaderError: 1718 self.fileobj.seek(self.offset) 1719 break 1720 except HeaderError as e: 1721 raise ReadError(str(e)) from None 1722 1723 if self.mode in ("a", "w", "x"): 1724 self._loaded = True 1725 1726 if self.pax_headers: 1727 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) 1728 self.fileobj.write(buf) 1729 self.offset += len(buf) 1730 except: 1731 if not self._extfileobj: 1732 self.fileobj.close() 1733 self.closed = True 1734 raise 1735 1736 #-------------------------------------------------------------------------- 1737 # Below are the classmethods which act as alternate constructors to the 1738 # TarFile class. The open() method is the only one that is needed for 1739 # public use; it is the "super"-constructor and is able to select an 1740 # adequate "sub"-constructor for a particular compression using the mapping 1741 # from OPEN_METH. 1742 # 1743 # This concept allows one to subclass TarFile without losing the comfort of 1744 # the super-constructor. A sub-constructor is registered and made available 1745 # by adding it to the mapping in OPEN_METH. 1746 1747 @classmethod 1748 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): 1749 """Open a tar archive for reading, writing or appending. Return 1750 an appropriate TarFile class. 1751 1752 mode: 1753 'r' or 'r:*' open for reading with transparent compression 1754 'r:' open for reading exclusively uncompressed 1755 'r:gz' open for reading with gzip compression 1756 'r:bz2' open for reading with bzip2 compression 1757 'r:xz' open for reading with lzma compression 1758 'a' or 'a:' open for appending, creating the file if necessary 1759 'w' or 'w:' open for writing without compression 1760 'w:gz' open for writing with gzip compression 1761 'w:bz2' open for writing with bzip2 compression 1762 'w:xz' open for writing with lzma compression 1763 1764 'x' or 'x:' create a tarfile exclusively without compression, raise 1765 an exception if the file is already created 1766 'x:gz' create a gzip compressed tarfile, raise an exception 1767 if the file is already created 1768 'x:bz2' create a bzip2 compressed tarfile, raise an exception 1769 if the file is already created 1770 'x:xz' create an lzma compressed tarfile, raise an exception 1771 if the file is already created 1772 1773 'r|*' open a stream of tar blocks with transparent compression 1774 'r|' open an uncompressed stream of tar blocks for reading 1775 'r|gz' open a gzip compressed stream of tar blocks 1776 'r|bz2' open a bzip2 compressed stream of tar blocks 1777 'r|xz' open an lzma compressed stream of tar blocks 1778 'w|' open an uncompressed stream for writing 1779 'w|gz' open a gzip compressed stream for writing 1780 'w|bz2' open a bzip2 compressed stream for writing 1781 'w|xz' open an lzma compressed stream for writing 1782 """ 1783 1784 if not name and not fileobj: 1785 raise ValueError("nothing to open") 1786 1787 if mode in ("r", "r:*"): 1788 # Find out which *open() is appropriate for opening the file. 1789 def not_compressed(comptype): 1790 return cls.OPEN_METH[comptype] == 'taropen' 1791 error_msgs = [] 1792 for comptype in sorted(cls.OPEN_METH, key=not_compressed): 1793 func = getattr(cls, cls.OPEN_METH[comptype]) 1794 if fileobj is not None: 1795 saved_pos = fileobj.tell() 1796 try: 1797 return func(name, "r", fileobj, **kwargs) 1798 except (ReadError, CompressionError) as e: 1799 error_msgs.append(f'- method {comptype}: {e!r}') 1800 if fileobj is not None: 1801 fileobj.seek(saved_pos) 1802 continue 1803 error_msgs_summary = '\n'.join(error_msgs) 1804 raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}") 1805 1806 elif ":" in mode: 1807 filemode, comptype = mode.split(":", 1) 1808 filemode = filemode or "r" 1809 comptype = comptype or "tar" 1810 1811 # Select the *open() function according to 1812 # given compression. 1813 if comptype in cls.OPEN_METH: 1814 func = getattr(cls, cls.OPEN_METH[comptype]) 1815 else: 1816 raise CompressionError("unknown compression type %r" % comptype) 1817 return func(name, filemode, fileobj, **kwargs) 1818 1819 elif "|" in mode: 1820 filemode, comptype = mode.split("|", 1) 1821 filemode = filemode or "r" 1822 comptype = comptype or "tar" 1823 1824 if filemode not in ("r", "w"): 1825 raise ValueError("mode must be 'r' or 'w'") 1826 1827 stream = _Stream(name, filemode, comptype, fileobj, bufsize) 1828 try: 1829 t = cls(name, filemode, stream, **kwargs) 1830 except: 1831 stream.close() 1832 raise 1833 t._extfileobj = False 1834 return t 1835 1836 elif mode in ("a", "w", "x"): 1837 return cls.taropen(name, mode, fileobj, **kwargs) 1838 1839 raise ValueError("undiscernible mode") 1840 1841 @classmethod 1842 def taropen(cls, name, mode="r", fileobj=None, **kwargs): 1843 """Open uncompressed tar archive name for reading or writing. 1844 """ 1845 if mode not in ("r", "a", "w", "x"): 1846 raise ValueError("mode must be 'r', 'a', 'w' or 'x'") 1847 return cls(name, mode, fileobj, **kwargs) 1848 1849 @classmethod 1850 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1851 """Open gzip compressed tar archive name for reading or writing. 1852 Appending is not allowed. 1853 """ 1854 if mode not in ("r", "w", "x"): 1855 raise ValueError("mode must be 'r', 'w' or 'x'") 1856 1857 try: 1858 from gzip import GzipFile 1859 except ImportError: 1860 raise CompressionError("gzip module is not available") from None 1861 1862 try: 1863 fileobj = GzipFile(name, mode + "b", compresslevel, fileobj) 1864 except OSError as e: 1865 if fileobj is not None and mode == 'r': 1866 raise ReadError("not a gzip file") from e 1867 raise 1868 1869 try: 1870 t = cls.taropen(name, mode, fileobj, **kwargs) 1871 except OSError as e: 1872 fileobj.close() 1873 if mode == 'r': 1874 raise ReadError("not a gzip file") from e 1875 raise 1876 except: 1877 fileobj.close() 1878 raise 1879 t._extfileobj = False 1880 return t 1881 1882 @classmethod 1883 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1884 """Open bzip2 compressed tar archive name for reading or writing. 1885 Appending is not allowed. 1886 """ 1887 if mode not in ("r", "w", "x"): 1888 raise ValueError("mode must be 'r', 'w' or 'x'") 1889 1890 try: 1891 from bz2 import BZ2File 1892 except ImportError: 1893 raise CompressionError("bz2 module is not available") from None 1894 1895 fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel) 1896 1897 try: 1898 t = cls.taropen(name, mode, fileobj, **kwargs) 1899 except (OSError, EOFError) as e: 1900 fileobj.close() 1901 if mode == 'r': 1902 raise ReadError("not a bzip2 file") from e 1903 raise 1904 except: 1905 fileobj.close() 1906 raise 1907 t._extfileobj = False 1908 return t 1909 1910 @classmethod 1911 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs): 1912 """Open lzma compressed tar archive name for reading or writing. 1913 Appending is not allowed. 1914 """ 1915 if mode not in ("r", "w", "x"): 1916 raise ValueError("mode must be 'r', 'w' or 'x'") 1917 1918 try: 1919 from lzma import LZMAFile, LZMAError 1920 except ImportError: 1921 raise CompressionError("lzma module is not available") from None 1922 1923 fileobj = LZMAFile(fileobj or name, mode, preset=preset) 1924 1925 try: 1926 t = cls.taropen(name, mode, fileobj, **kwargs) 1927 except (LZMAError, EOFError) as e: 1928 fileobj.close() 1929 if mode == 'r': 1930 raise ReadError("not an lzma file") from e 1931 raise 1932 except: 1933 fileobj.close() 1934 raise 1935 t._extfileobj = False 1936 return t 1937 1938 # All *open() methods are registered here. 1939 OPEN_METH = { 1940 "tar": "taropen", # uncompressed tar 1941 "gz": "gzopen", # gzip compressed tar 1942 "bz2": "bz2open", # bzip2 compressed tar 1943 "xz": "xzopen" # lzma compressed tar 1944 } 1945 1946 #-------------------------------------------------------------------------- 1947 # The public methods which TarFile provides: 1948 1949 def close(self): 1950 """Close the TarFile. In write-mode, two finishing zero blocks are 1951 appended to the archive. 1952 """ 1953 if self.closed: 1954 return 1955 1956 self.closed = True 1957 try: 1958 if self.mode in ("a", "w", "x"): 1959 self.fileobj.write(NUL * (BLOCKSIZE * 2)) 1960 self.offset += (BLOCKSIZE * 2) 1961 # fill up the end with zero-blocks 1962 # (like option -b20 for tar does) 1963 blocks, remainder = divmod(self.offset, RECORDSIZE) 1964 if remainder > 0: 1965 self.fileobj.write(NUL * (RECORDSIZE - remainder)) 1966 finally: 1967 if not self._extfileobj: 1968 self.fileobj.close() 1969 1970 def getmember(self, name): 1971 """Return a TarInfo object for member `name'. If `name' can not be 1972 found in the archive, KeyError is raised. If a member occurs more 1973 than once in the archive, its last occurrence is assumed to be the 1974 most up-to-date version. 1975 """ 1976 tarinfo = self._getmember(name.rstrip('/')) 1977 if tarinfo is None: 1978 raise KeyError("filename %r not found" % name) 1979 return tarinfo 1980 1981 def getmembers(self): 1982 """Return the members of the archive as a list of TarInfo objects. The 1983 list has the same order as the members in the archive. 1984 """ 1985 self._check() 1986 if not self._loaded: # if we want to obtain a list of 1987 self._load() # all members, we first have to 1988 # scan the whole archive. 1989 return self.members 1990 1991 def getnames(self): 1992 """Return the members of the archive as a list of their names. It has 1993 the same order as the list returned by getmembers(). 1994 """ 1995 return [tarinfo.name for tarinfo in self.getmembers()] 1996 1997 def gettarinfo(self, name=None, arcname=None, fileobj=None): 1998 """Create a TarInfo object from the result of os.stat or equivalent 1999 on an existing file. The file is either named by `name', or 2000 specified as a file object `fileobj' with a file descriptor. If 2001 given, `arcname' specifies an alternative name for the file in the 2002 archive, otherwise, the name is taken from the 'name' attribute of 2003 'fileobj', or the 'name' argument. The name should be a text 2004 string. 2005 """ 2006 self._check("awx") 2007 2008 # When fileobj is given, replace name by 2009 # fileobj's real name. 2010 if fileobj is not None: 2011 name = fileobj.name 2012 2013 # Building the name of the member in the archive. 2014 # Backward slashes are converted to forward slashes, 2015 # Absolute paths are turned to relative paths. 2016 if arcname is None: 2017 arcname = name 2018 drv, arcname = os.path.splitdrive(arcname) 2019 arcname = arcname.replace(os.sep, "/") 2020 arcname = arcname.lstrip("/") 2021 2022 # Now, fill the TarInfo object with 2023 # information specific for the file. 2024 tarinfo = self.tarinfo() 2025 tarinfo.tarfile = self # Not needed 2026 2027 # Use os.stat or os.lstat, depending on if symlinks shall be resolved. 2028 if fileobj is None: 2029 if not self.dereference: 2030 statres = os.lstat(name) 2031 else: 2032 statres = os.stat(name) 2033 else: 2034 statres = os.fstat(fileobj.fileno()) 2035 linkname = "" 2036 2037 stmd = statres.st_mode 2038 if stat.S_ISREG(stmd): 2039 inode = (statres.st_ino, statres.st_dev) 2040 if not self.dereference and statres.st_nlink > 1 and \ 2041 inode in self.inodes and arcname != self.inodes[inode]: 2042 # Is it a hardlink to an already 2043 # archived file? 2044 type = LNKTYPE 2045 linkname = self.inodes[inode] 2046 else: 2047 # The inode is added only if its valid. 2048 # For win32 it is always 0. 2049 type = REGTYPE 2050 if inode[0]: 2051 self.inodes[inode] = arcname 2052 elif stat.S_ISDIR(stmd): 2053 type = DIRTYPE 2054 elif stat.S_ISFIFO(stmd): 2055 type = FIFOTYPE 2056 elif stat.S_ISLNK(stmd): 2057 type = SYMTYPE 2058 linkname = os.readlink(name) 2059 elif stat.S_ISCHR(stmd): 2060 type = CHRTYPE 2061 elif stat.S_ISBLK(stmd): 2062 type = BLKTYPE 2063 else: 2064 return None 2065 2066 # Fill the TarInfo object with all 2067 # information we can get. 2068 tarinfo.name = arcname 2069 tarinfo.mode = stmd 2070 tarinfo.uid = statres.st_uid 2071 tarinfo.gid = statres.st_gid 2072 if type == REGTYPE: 2073 tarinfo.size = statres.st_size 2074 else: 2075 tarinfo.size = 0 2076 tarinfo.mtime = statres.st_mtime 2077 tarinfo.type = type 2078 tarinfo.linkname = linkname 2079 if pwd: 2080 try: 2081 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] 2082 except KeyError: 2083 pass 2084 if grp: 2085 try: 2086 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] 2087 except KeyError: 2088 pass 2089 2090 if type in (CHRTYPE, BLKTYPE): 2091 if hasattr(os, "major") and hasattr(os, "minor"): 2092 tarinfo.devmajor = os.major(statres.st_rdev) 2093 tarinfo.devminor = os.minor(statres.st_rdev) 2094 return tarinfo 2095 2096 def list(self, verbose=True, *, members=None): 2097 """Print a table of contents to sys.stdout. If `verbose' is False, only 2098 the names of the members are printed. If it is True, an `ls -l'-like 2099 output is produced. `members' is optional and must be a subset of the 2100 list returned by getmembers(). 2101 """ 2102 self._check() 2103 2104 if members is None: 2105 members = self 2106 for tarinfo in members: 2107 if verbose: 2108 if tarinfo.mode is None: 2109 _safe_print("??????????") 2110 else: 2111 _safe_print(stat.filemode(tarinfo.mode)) 2112 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid, 2113 tarinfo.gname or tarinfo.gid)) 2114 if tarinfo.ischr() or tarinfo.isblk(): 2115 _safe_print("%10s" % 2116 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor))) 2117 else: 2118 _safe_print("%10d" % tarinfo.size) 2119 if tarinfo.mtime is None: 2120 _safe_print("????-??-?? ??:??:??") 2121 else: 2122 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \ 2123 % time.localtime(tarinfo.mtime)[:6]) 2124 2125 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else "")) 2126 2127 if verbose: 2128 if tarinfo.issym(): 2129 _safe_print("-> " + tarinfo.linkname) 2130 if tarinfo.islnk(): 2131 _safe_print("link to " + tarinfo.linkname) 2132 print() 2133 2134 def add(self, name, arcname=None, recursive=True, *, filter=None): 2135 """Add the file `name' to the archive. `name' may be any type of file 2136 (directory, fifo, symbolic link, etc.). If given, `arcname' 2137 specifies an alternative name for the file in the archive. 2138 Directories are added recursively by default. This can be avoided by 2139 setting `recursive' to False. `filter' is a function 2140 that expects a TarInfo object argument and returns the changed 2141 TarInfo object, if it returns None the TarInfo object will be 2142 excluded from the archive. 2143 """ 2144 self._check("awx") 2145 2146 if arcname is None: 2147 arcname = name 2148 2149 # Skip if somebody tries to archive the archive... 2150 if self.name is not None and os.path.abspath(name) == self.name: 2151 self._dbg(2, "tarfile: Skipped %r" % name) 2152 return 2153 2154 self._dbg(1, name) 2155 2156 # Create a TarInfo object from the file. 2157 tarinfo = self.gettarinfo(name, arcname) 2158 2159 if tarinfo is None: 2160 self._dbg(1, "tarfile: Unsupported type %r" % name) 2161 return 2162 2163 # Change or exclude the TarInfo object. 2164 if filter is not None: 2165 tarinfo = filter(tarinfo) 2166 if tarinfo is None: 2167 self._dbg(2, "tarfile: Excluded %r" % name) 2168 return 2169 2170 # Append the tar header and data to the archive. 2171 if tarinfo.isreg(): 2172 with bltn_open(name, "rb") as f: 2173 self.addfile(tarinfo, f) 2174 2175 elif tarinfo.isdir(): 2176 self.addfile(tarinfo) 2177 if recursive: 2178 for f in sorted(os.listdir(name)): 2179 self.add(os.path.join(name, f), os.path.join(arcname, f), 2180 recursive, filter=filter) 2181 2182 else: 2183 self.addfile(tarinfo) 2184 2185 def addfile(self, tarinfo, fileobj=None): 2186 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is 2187 given, it should be a binary file, and tarinfo.size bytes are read 2188 from it and added to the archive. You can create TarInfo objects 2189 directly, or by using gettarinfo(). 2190 """ 2191 self._check("awx") 2192 2193 tarinfo = copy.copy(tarinfo) 2194 2195 buf = tarinfo.tobuf(self.format, self.encoding, self.errors) 2196 self.fileobj.write(buf) 2197 self.offset += len(buf) 2198 bufsize=self.copybufsize 2199 # If there's data to follow, append it. 2200 if fileobj is not None: 2201 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize) 2202 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) 2203 if remainder > 0: 2204 self.fileobj.write(NUL * (BLOCKSIZE - remainder)) 2205 blocks += 1 2206 self.offset += blocks * BLOCKSIZE 2207 2208 self.members.append(tarinfo) 2209 2210 def _get_filter_function(self, filter): 2211 if filter is None: 2212 filter = self.extraction_filter 2213 if filter is None: 2214 return fully_trusted_filter 2215 if isinstance(filter, str): 2216 raise TypeError( 2217 'String names are not supported for ' 2218 + 'TarFile.extraction_filter. Use a function such as ' 2219 + 'tarfile.data_filter directly.') 2220 return filter 2221 if callable(filter): 2222 return filter 2223 try: 2224 return _NAMED_FILTERS[filter] 2225 except KeyError: 2226 raise ValueError(f"filter {filter!r} not found") from None 2227 2228 def extractall(self, path=".", members=None, *, numeric_owner=False, 2229 filter=None): 2230 """Extract all members from the archive to the current working 2231 directory and set owner, modification time and permissions on 2232 directories afterwards. `path' specifies a different directory 2233 to extract to. `members' is optional and must be a subset of the 2234 list returned by getmembers(). If `numeric_owner` is True, only 2235 the numbers for user/group names are used and not the names. 2236 2237 The `filter` function will be called on each member just 2238 before extraction. 2239 It can return a changed TarInfo or None to skip the member. 2240 String names of common filters are accepted. 2241 """ 2242 directories = [] 2243 2244 filter_function = self._get_filter_function(filter) 2245 if members is None: 2246 members = self 2247 2248 for member in members: 2249 tarinfo = self._get_extract_tarinfo(member, filter_function, path) 2250 if tarinfo is None: 2251 continue 2252 if tarinfo.isdir(): 2253 # For directories, delay setting attributes until later, 2254 # since permissions can interfere with extraction and 2255 # extracting contents can reset mtime. 2256 directories.append(tarinfo) 2257 self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), 2258 numeric_owner=numeric_owner) 2259 2260 # Reverse sort directories. 2261 directories.sort(key=lambda a: a.name, reverse=True) 2262 2263 # Set correct owner, mtime and filemode on directories. 2264 for tarinfo in directories: 2265 dirpath = os.path.join(path, tarinfo.name) 2266 try: 2267 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) 2268 self.utime(tarinfo, dirpath) 2269 self.chmod(tarinfo, dirpath) 2270 except ExtractError as e: 2271 self._handle_nonfatal_error(e) 2272 2273 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, 2274 filter=None): 2275 """Extract a member from the archive to the current working directory, 2276 using its full name. Its file information is extracted as accurately 2277 as possible. `member' may be a filename or a TarInfo object. You can 2278 specify a different directory using `path'. File attributes (owner, 2279 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` 2280 is True, only the numbers for user/group names are used and not 2281 the names. 2282 2283 The `filter` function will be called before extraction. 2284 It can return a changed TarInfo or None to skip the member. 2285 String names of common filters are accepted. 2286 """ 2287 filter_function = self._get_filter_function(filter) 2288 tarinfo = self._get_extract_tarinfo(member, filter_function, path) 2289 if tarinfo is not None: 2290 self._extract_one(tarinfo, path, set_attrs, numeric_owner) 2291 2292 def _get_extract_tarinfo(self, member, filter_function, path): 2293 """Get filtered TarInfo (or None) from member, which might be a str""" 2294 if isinstance(member, str): 2295 tarinfo = self.getmember(member) 2296 else: 2297 tarinfo = member 2298 2299 unfiltered = tarinfo 2300 try: 2301 tarinfo = filter_function(tarinfo, path) 2302 except (OSError, FilterError) as e: 2303 self._handle_fatal_error(e) 2304 except ExtractError as e: 2305 self._handle_nonfatal_error(e) 2306 if tarinfo is None: 2307 self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) 2308 return None 2309 # Prepare the link target for makelink(). 2310 if tarinfo.islnk(): 2311 tarinfo = copy.copy(tarinfo) 2312 tarinfo._link_target = os.path.join(path, tarinfo.linkname) 2313 return tarinfo 2314 2315 def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): 2316 """Extract from filtered tarinfo to disk""" 2317 self._check("r") 2318 2319 try: 2320 self._extract_member(tarinfo, os.path.join(path, tarinfo.name), 2321 set_attrs=set_attrs, 2322 numeric_owner=numeric_owner) 2323 except OSError as e: 2324 self._handle_fatal_error(e) 2325 except ExtractError as e: 2326 self._handle_nonfatal_error(e) 2327 2328 def _handle_nonfatal_error(self, e): 2329 """Handle non-fatal error (ExtractError) according to errorlevel""" 2330 if self.errorlevel > 1: 2331 raise 2332 else: 2333 self._dbg(1, "tarfile: %s" % e) 2334 2335 def _handle_fatal_error(self, e): 2336 """Handle "fatal" error according to self.errorlevel""" 2337 if self.errorlevel > 0: 2338 raise 2339 elif isinstance(e, OSError): 2340 if e.filename is None: 2341 self._dbg(1, "tarfile: %s" % e.strerror) 2342 else: 2343 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) 2344 else: 2345 self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e)) 2346 2347 def extractfile(self, member): 2348 """Extract a member from the archive as a file object. `member' may be 2349 a filename or a TarInfo object. If `member' is a regular file or 2350 a link, an io.BufferedReader object is returned. For all other 2351 existing members, None is returned. If `member' does not appear 2352 in the archive, KeyError is raised. 2353 """ 2354 self._check("r") 2355 2356 if isinstance(member, str): 2357 tarinfo = self.getmember(member) 2358 else: 2359 tarinfo = member 2360 2361 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES: 2362 # Members with unknown types are treated as regular files. 2363 return self.fileobject(self, tarinfo) 2364 2365 elif tarinfo.islnk() or tarinfo.issym(): 2366 if isinstance(self.fileobj, _Stream): 2367 # A small but ugly workaround for the case that someone tries 2368 # to extract a (sym)link as a file-object from a non-seekable 2369 # stream of tar blocks. 2370 raise StreamError("cannot extract (sym)link as file object") 2371 else: 2372 # A (sym)link's file object is its target's file object. 2373 return self.extractfile(self._find_link_target(tarinfo)) 2374 else: 2375 # If there's no data associated with the member (directory, chrdev, 2376 # blkdev, etc.), return None instead of a file object. 2377 return None 2378 2379 def _extract_member(self, tarinfo, targetpath, set_attrs=True, 2380 numeric_owner=False): 2381 """Extract the TarInfo object tarinfo to a physical 2382 file called targetpath. 2383 """ 2384 # Fetch the TarInfo object for the given name 2385 # and build the destination pathname, replacing 2386 # forward slashes to platform specific separators. 2387 targetpath = targetpath.rstrip("/") 2388 targetpath = targetpath.replace("/", os.sep) 2389 2390 # Create all upper directories. 2391 upperdirs = os.path.dirname(targetpath) 2392 if upperdirs and not os.path.exists(upperdirs): 2393 # Create directories that are not part of the archive with 2394 # default permissions. 2395 os.makedirs(upperdirs) 2396 2397 if tarinfo.islnk() or tarinfo.issym(): 2398 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) 2399 else: 2400 self._dbg(1, tarinfo.name) 2401 2402 if tarinfo.isreg(): 2403 self.makefile(tarinfo, targetpath) 2404 elif tarinfo.isdir(): 2405 self.makedir(tarinfo, targetpath) 2406 elif tarinfo.isfifo(): 2407 self.makefifo(tarinfo, targetpath) 2408 elif tarinfo.ischr() or tarinfo.isblk(): 2409 self.makedev(tarinfo, targetpath) 2410 elif tarinfo.islnk() or tarinfo.issym(): 2411 self.makelink(tarinfo, targetpath) 2412 elif tarinfo.type not in SUPPORTED_TYPES: 2413 self.makeunknown(tarinfo, targetpath) 2414 else: 2415 self.makefile(tarinfo, targetpath) 2416 2417 if set_attrs: 2418 self.chown(tarinfo, targetpath, numeric_owner) 2419 if not tarinfo.issym(): 2420 self.chmod(tarinfo, targetpath) 2421 self.utime(tarinfo, targetpath) 2422 2423 #-------------------------------------------------------------------------- 2424 # Below are the different file methods. They are called via 2425 # _extract_member() when extract() is called. They can be replaced in a 2426 # subclass to implement other functionality. 2427 2428 def makedir(self, tarinfo, targetpath): 2429 """Make a directory called targetpath. 2430 """ 2431 try: 2432 if tarinfo.mode is None: 2433 # Use the system's default mode 2434 os.mkdir(targetpath) 2435 else: 2436 # Use a safe mode for the directory, the real mode is set 2437 # later in _extract_member(). 2438 os.mkdir(targetpath, 0o700) 2439 except FileExistsError: 2440 pass 2441 2442 def makefile(self, tarinfo, targetpath): 2443 """Make a file called targetpath. 2444 """ 2445 source = self.fileobj 2446 source.seek(tarinfo.offset_data) 2447 bufsize = self.copybufsize 2448 with bltn_open(targetpath, "wb") as target: 2449 if tarinfo.sparse is not None: 2450 for offset, size in tarinfo.sparse: 2451 target.seek(offset) 2452 copyfileobj(source, target, size, ReadError, bufsize) 2453 target.seek(tarinfo.size) 2454 target.truncate() 2455 else: 2456 copyfileobj(source, target, tarinfo.size, ReadError, bufsize) 2457 2458 def makeunknown(self, tarinfo, targetpath): 2459 """Make a file from a TarInfo object with an unknown type 2460 at targetpath. 2461 """ 2462 self.makefile(tarinfo, targetpath) 2463 self._dbg(1, "tarfile: Unknown file type %r, " \ 2464 "extracted as regular file." % tarinfo.type) 2465 2466 def makefifo(self, tarinfo, targetpath): 2467 """Make a fifo called targetpath. 2468 """ 2469 if hasattr(os, "mkfifo"): 2470 os.mkfifo(targetpath) 2471 else: 2472 raise ExtractError("fifo not supported by system") 2473 2474 def makedev(self, tarinfo, targetpath): 2475 """Make a character or block device called targetpath. 2476 """ 2477 if not hasattr(os, "mknod") or not hasattr(os, "makedev"): 2478 raise ExtractError("special devices not supported by system") 2479 2480 mode = tarinfo.mode 2481 if mode is None: 2482 # Use mknod's default 2483 mode = 0o600 2484 if tarinfo.isblk(): 2485 mode |= stat.S_IFBLK 2486 else: 2487 mode |= stat.S_IFCHR 2488 2489 os.mknod(targetpath, mode, 2490 os.makedev(tarinfo.devmajor, tarinfo.devminor)) 2491 2492 def makelink(self, tarinfo, targetpath): 2493 """Make a (symbolic) link called targetpath. If it cannot be created 2494 (platform limitation), we try to make a copy of the referenced file 2495 instead of a link. 2496 """ 2497 try: 2498 # For systems that support symbolic and hard links. 2499 if tarinfo.issym(): 2500 if os.path.lexists(targetpath): 2501 # Avoid FileExistsError on following os.symlink. 2502 os.unlink(targetpath) 2503 os.symlink(tarinfo.linkname, targetpath) 2504 else: 2505 if os.path.exists(tarinfo._link_target): 2506 os.link(tarinfo._link_target, targetpath) 2507 else: 2508 self._extract_member(self._find_link_target(tarinfo), 2509 targetpath) 2510 except symlink_exception: 2511 try: 2512 self._extract_member(self._find_link_target(tarinfo), 2513 targetpath) 2514 except KeyError: 2515 raise ExtractError("unable to resolve link inside archive") from None 2516 2517 def chown(self, tarinfo, targetpath, numeric_owner): 2518 """Set owner of targetpath according to tarinfo. If numeric_owner 2519 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner 2520 is False, fall back to .gid/.uid when the search based on name 2521 fails. 2522 """ 2523 if hasattr(os, "geteuid") and os.geteuid() == 0: 2524 # We have to be root to do so. 2525 g = tarinfo.gid 2526 u = tarinfo.uid 2527 if not numeric_owner: 2528 try: 2529 if grp and tarinfo.gname: 2530 g = grp.getgrnam(tarinfo.gname)[2] 2531 except KeyError: 2532 pass 2533 try: 2534 if pwd and tarinfo.uname: 2535 u = pwd.getpwnam(tarinfo.uname)[2] 2536 except KeyError: 2537 pass 2538 if g is None: 2539 g = -1 2540 if u is None: 2541 u = -1 2542 try: 2543 if tarinfo.issym() and hasattr(os, "lchown"): 2544 os.lchown(targetpath, u, g) 2545 else: 2546 os.chown(targetpath, u, g) 2547 except OSError as e: 2548 raise ExtractError("could not change owner") from e 2549 2550 def chmod(self, tarinfo, targetpath): 2551 """Set file permissions of targetpath according to tarinfo. 2552 """ 2553 if tarinfo.mode is None: 2554 return 2555 try: 2556 os.chmod(targetpath, tarinfo.mode) 2557 except OSError as e: 2558 raise ExtractError("could not change mode") from e 2559 2560 def utime(self, tarinfo, targetpath): 2561 """Set modification time of targetpath according to tarinfo. 2562 """ 2563 mtime = tarinfo.mtime 2564 if mtime is None: 2565 return 2566 if not hasattr(os, 'utime'): 2567 return 2568 try: 2569 os.utime(targetpath, (mtime, mtime)) 2570 except OSError as e: 2571 raise ExtractError("could not change modification time") from e 2572 2573 #-------------------------------------------------------------------------- 2574 def next(self): 2575 """Return the next member of the archive as a TarInfo object, when 2576 TarFile is opened for reading. Return None if there is no more 2577 available. 2578 """ 2579 self._check("ra") 2580 if self.firstmember is not None: 2581 m = self.firstmember 2582 self.firstmember = None 2583 return m 2584 2585 # Advance the file pointer. 2586 if self.offset != self.fileobj.tell(): 2587 if self.offset == 0: 2588 return None 2589 self.fileobj.seek(self.offset - 1) 2590 if not self.fileobj.read(1): 2591 raise ReadError("unexpected end of data") 2592 2593 # Read the next block. 2594 tarinfo = None 2595 while True: 2596 try: 2597 tarinfo = self.tarinfo.fromtarfile(self) 2598 except EOFHeaderError as e: 2599 if self.ignore_zeros: 2600 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2601 self.offset += BLOCKSIZE 2602 continue 2603 except InvalidHeaderError as e: 2604 if self.ignore_zeros: 2605 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2606 self.offset += BLOCKSIZE 2607 continue 2608 elif self.offset == 0: 2609 raise ReadError(str(e)) from None 2610 except EmptyHeaderError: 2611 if self.offset == 0: 2612 raise ReadError("empty file") from None 2613 except TruncatedHeaderError as e: 2614 if self.offset == 0: 2615 raise ReadError(str(e)) from None 2616 except SubsequentHeaderError as e: 2617 raise ReadError(str(e)) from None 2618 except Exception as e: 2619 try: 2620 import zlib 2621 if isinstance(e, zlib.error): 2622 raise ReadError(f'zlib error: {e}') from None 2623 else: 2624 raise e 2625 except ImportError: 2626 raise e 2627 break 2628 2629 if tarinfo is not None: 2630 self.members.append(tarinfo) 2631 else: 2632 self._loaded = True 2633 2634 return tarinfo 2635 2636 #-------------------------------------------------------------------------- 2637 # Little helper methods: 2638 2639 def _getmember(self, name, tarinfo=None, normalize=False): 2640 """Find an archive member by name from bottom to top. 2641 If tarinfo is given, it is used as the starting point. 2642 """ 2643 # Ensure that all members have been loaded. 2644 members = self.getmembers() 2645 2646 # Limit the member search list up to tarinfo. 2647 skipping = False 2648 if tarinfo is not None: 2649 try: 2650 index = members.index(tarinfo) 2651 except ValueError: 2652 # The given starting point might be a (modified) copy. 2653 # We'll later skip members until we find an equivalent. 2654 skipping = True 2655 else: 2656 # Happy fast path 2657 members = members[:index] 2658 2659 if normalize: 2660 name = os.path.normpath(name) 2661 2662 for member in reversed(members): 2663 if skipping: 2664 if tarinfo.offset == member.offset: 2665 skipping = False 2666 continue 2667 if normalize: 2668 member_name = os.path.normpath(member.name) 2669 else: 2670 member_name = member.name 2671 2672 if name == member_name: 2673 return member 2674 2675 if skipping: 2676 # Starting point was not found 2677 raise ValueError(tarinfo) 2678 2679 def _load(self): 2680 """Read through the entire archive file and look for readable 2681 members. 2682 """ 2683 while True: 2684 tarinfo = self.next() 2685 if tarinfo is None: 2686 break 2687 self._loaded = True 2688 2689 def _check(self, mode=None): 2690 """Check if TarFile is still open, and if the operation's mode 2691 corresponds to TarFile's mode. 2692 """ 2693 if self.closed: 2694 raise OSError("%s is closed" % self.__class__.__name__) 2695 if mode is not None and self.mode not in mode: 2696 raise OSError("bad operation for mode %r" % self.mode) 2697 2698 def _find_link_target(self, tarinfo): 2699 """Find the target member of a symlink or hardlink member in the 2700 archive. 2701 """ 2702 if tarinfo.issym(): 2703 # Always search the entire archive. 2704 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname))) 2705 limit = None 2706 else: 2707 # Search the archive before the link, because a hard link is 2708 # just a reference to an already archived file. 2709 linkname = tarinfo.linkname 2710 limit = tarinfo 2711 2712 member = self._getmember(linkname, tarinfo=limit, normalize=True) 2713 if member is None: 2714 raise KeyError("linkname %r not found" % linkname) 2715 return member 2716 2717 def __iter__(self): 2718 """Provide an iterator object. 2719 """ 2720 if self._loaded: 2721 yield from self.members 2722 return 2723 2724 # Yield items using TarFile's next() method. 2725 # When all members have been read, set TarFile as _loaded. 2726 index = 0 2727 # Fix for SF #1100429: Under rare circumstances it can 2728 # happen that getmembers() is called during iteration, 2729 # which will have already exhausted the next() method. 2730 if self.firstmember is not None: 2731 tarinfo = self.next() 2732 index += 1 2733 yield tarinfo 2734 2735 while True: 2736 if index < len(self.members): 2737 tarinfo = self.members[index] 2738 elif not self._loaded: 2739 tarinfo = self.next() 2740 if not tarinfo: 2741 self._loaded = True 2742 return 2743 else: 2744 return 2745 index += 1 2746 yield tarinfo 2747 2748 def _dbg(self, level, msg): 2749 """Write debugging output to sys.stderr. 2750 """ 2751 if level <= self.debug: 2752 print(msg, file=sys.stderr) 2753 2754 def __enter__(self): 2755 self._check() 2756 return self 2757 2758 def __exit__(self, type, value, traceback): 2759 if type is None: 2760 self.close() 2761 else: 2762 # An exception occurred. We must not call close() because 2763 # it would try to write end-of-archive blocks and padding. 2764 if not self._extfileobj: 2765 self.fileobj.close() 2766 self.closed = True 2767 2768#-------------------- 2769# exported functions 2770#-------------------- 2771 2772def is_tarfile(name): 2773 """Return True if name points to a tar archive that we 2774 are able to handle, else return False. 2775 2776 'name' should be a string, file, or file-like object. 2777 """ 2778 try: 2779 if hasattr(name, "read"): 2780 pos = name.tell() 2781 t = open(fileobj=name) 2782 name.seek(pos) 2783 else: 2784 t = open(name) 2785 t.close() 2786 return True 2787 except TarError: 2788 return False 2789 2790open = TarFile.open 2791 2792 2793def main(): 2794 import argparse 2795 2796 description = 'A simple command-line interface for tarfile module.' 2797 parser = argparse.ArgumentParser(description=description) 2798 parser.add_argument('-v', '--verbose', action='store_true', default=False, 2799 help='Verbose output') 2800 parser.add_argument('--filter', metavar='<filtername>', 2801 choices=_NAMED_FILTERS, 2802 help='Filter for extraction') 2803 2804 group = parser.add_mutually_exclusive_group(required=True) 2805 group.add_argument('-l', '--list', metavar='<tarfile>', 2806 help='Show listing of a tarfile') 2807 group.add_argument('-e', '--extract', nargs='+', 2808 metavar=('<tarfile>', '<output_dir>'), 2809 help='Extract tarfile into target dir') 2810 group.add_argument('-c', '--create', nargs='+', 2811 metavar=('<name>', '<file>'), 2812 help='Create tarfile from sources') 2813 group.add_argument('-t', '--test', metavar='<tarfile>', 2814 help='Test if a tarfile is valid') 2815 2816 args = parser.parse_args() 2817 2818 if args.filter and args.extract is None: 2819 parser.exit(1, '--filter is only valid for extraction\n') 2820 2821 if args.test is not None: 2822 src = args.test 2823 if is_tarfile(src): 2824 with open(src, 'r') as tar: 2825 tar.getmembers() 2826 print(tar.getmembers(), file=sys.stderr) 2827 if args.verbose: 2828 print('{!r} is a tar archive.'.format(src)) 2829 else: 2830 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2831 2832 elif args.list is not None: 2833 src = args.list 2834 if is_tarfile(src): 2835 with TarFile.open(src, 'r:*') as tf: 2836 tf.list(verbose=args.verbose) 2837 else: 2838 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2839 2840 elif args.extract is not None: 2841 if len(args.extract) == 1: 2842 src = args.extract[0] 2843 curdir = os.curdir 2844 elif len(args.extract) == 2: 2845 src, curdir = args.extract 2846 else: 2847 parser.exit(1, parser.format_help()) 2848 2849 if is_tarfile(src): 2850 with TarFile.open(src, 'r:*') as tf: 2851 tf.extractall(path=curdir, filter=args.filter) 2852 if args.verbose: 2853 if curdir == '.': 2854 msg = '{!r} file is extracted.'.format(src) 2855 else: 2856 msg = ('{!r} file is extracted ' 2857 'into {!r} directory.').format(src, curdir) 2858 print(msg) 2859 else: 2860 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2861 2862 elif args.create is not None: 2863 tar_name = args.create.pop(0) 2864 _, ext = os.path.splitext(tar_name) 2865 compressions = { 2866 # gz 2867 '.gz': 'gz', 2868 '.tgz': 'gz', 2869 # xz 2870 '.xz': 'xz', 2871 '.txz': 'xz', 2872 # bz2 2873 '.bz2': 'bz2', 2874 '.tbz': 'bz2', 2875 '.tbz2': 'bz2', 2876 '.tb2': 'bz2', 2877 } 2878 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w' 2879 tar_files = args.create 2880 2881 with TarFile.open(tar_name, tar_mode) as tf: 2882 for file_name in tar_files: 2883 tf.add(file_name) 2884 2885 if args.verbose: 2886 print('{!r} file created.'.format(tar_name)) 2887 2888if __name__ == '__main__': 2889 main() 2890