1#!/usr/bin/env python3
2#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
5# Copyright (C) 2002 Lars Gustaebel <[email protected]>
6# All rights reserved.
7#
8# Permission  is  hereby granted,  free  of charge,  to  any person
9# obtaining a  copy of  this software  and associated documentation
10# files  (the  "Software"),  to   deal  in  the  Software   without
11# restriction,  including  without limitation  the  rights to  use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies  of  the  Software,  and to  permit  persons  to  whom the
14# Software  is  furnished  to  do  so,  subject  to  the  following
15# conditions:
16#
17# The above copyright  notice and this  permission notice shall  be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
21# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
22# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
23# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
24# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
25# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
32version     = "0.9.0"
33__author__  = "Lars Gust\u00e4bel ([email protected])"
34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
35
36#---------
37# Imports
38#---------
39from builtins import open as bltn_open
40import sys
41import os
42import io
43import shutil
44import stat
45import time
46import struct
47import copy
48import re
49import warnings
50
51try:
52    import pwd
53except ImportError:
54    pwd = None
55try:
56    import grp
57except ImportError:
58    grp = None
59
60# os.symlink on Windows prior to 6.0 raises NotImplementedError
61symlink_exception = (AttributeError, NotImplementedError)
62try:
63    # OSError (winerror=1314) will be raised if the caller does not hold the
64    # SeCreateSymbolicLinkPrivilege privilege
65    symlink_exception += (OSError,)
66except NameError:
67    pass
68
69# from tarfile import *
70__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
71           "CompressionError", "StreamError", "ExtractError", "HeaderError",
72           "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
73           "DEFAULT_FORMAT", "open"]
74
75
76#---------------------------------------------------------
77# tar constants
78#---------------------------------------------------------
79NUL = b"\0"                     # the null character
80BLOCKSIZE = 512                 # length of processing blocks
81RECORDSIZE = BLOCKSIZE * 20     # length of records
82GNU_MAGIC = b"ustar  \0"        # magic gnu tar string
83POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string
84
85LENGTH_NAME = 100               # maximum length of a filename
86LENGTH_LINK = 100               # maximum length of a linkname
87LENGTH_PREFIX = 155             # maximum length of the prefix field
88
89REGTYPE = b"0"                  # regular file
90AREGTYPE = b"\0"                # regular file
91LNKTYPE = b"1"                  # link (inside tarfile)
92SYMTYPE = b"2"                  # symbolic link
93CHRTYPE = b"3"                  # character special device
94BLKTYPE = b"4"                  # block special device
95DIRTYPE = b"5"                  # directory
96FIFOTYPE = b"6"                 # fifo special device
97CONTTYPE = b"7"                 # contiguous file
98
99GNUTYPE_LONGNAME = b"L"         # GNU tar longname
100GNUTYPE_LONGLINK = b"K"         # GNU tar longlink
101GNUTYPE_SPARSE = b"S"           # GNU tar sparse file
102
103XHDTYPE = b"x"                  # POSIX.1-2001 extended header
104XGLTYPE = b"g"                  # POSIX.1-2001 global header
105SOLARIS_XHDTYPE = b"X"          # Solaris extended header
106
107USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
108GNU_FORMAT = 1                  # GNU tar format
109PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
110DEFAULT_FORMAT = PAX_FORMAT
111
112#---------------------------------------------------------
113# tarfile constants
114#---------------------------------------------------------
115# File types that tarfile supports:
116SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
117                   SYMTYPE, DIRTYPE, FIFOTYPE,
118                   CONTTYPE, CHRTYPE, BLKTYPE,
119                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
120                   GNUTYPE_SPARSE)
121
122# File types that will be treated as a regular file.
123REGULAR_TYPES = (REGTYPE, AREGTYPE,
124                 CONTTYPE, GNUTYPE_SPARSE)
125
126# File types that are part of the GNU tar format.
127GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
128             GNUTYPE_SPARSE)
129
130# Fields from a pax header that override a TarInfo attribute.
131PAX_FIELDS = ("path", "linkpath", "size", "mtime",
132              "uid", "gid", "uname", "gname")
133
134# Fields from a pax header that are affected by hdrcharset.
135PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
136
137# Fields in a pax header that are numbers, all other fields
138# are treated as strings.
139PAX_NUMBER_FIELDS = {
140    "atime": float,
141    "ctime": float,
142    "mtime": float,
143    "uid": int,
144    "gid": int,
145    "size": int
146}
147
148#---------------------------------------------------------
149# initialization
150#---------------------------------------------------------
151if os.name == "nt":
152    ENCODING = "utf-8"
153else:
154    ENCODING = sys.getfilesystemencoding()
155
156#---------------------------------------------------------
157# Some useful functions
158#---------------------------------------------------------
159
160def stn(s, length, encoding, errors):
161    """Convert a string to a null-terminated bytes object.
162    """
163    if s is None:
164        raise ValueError("metadata cannot contain None")
165    s = s.encode(encoding, errors)
166    return s[:length] + (length - len(s)) * NUL
167
168def nts(s, encoding, errors):
169    """Convert a null-terminated bytes object to a string.
170    """
171    p = s.find(b"\0")
172    if p != -1:
173        s = s[:p]
174    return s.decode(encoding, errors)
175
176def nti(s):
177    """Convert a number field to a python number.
178    """
179    # There are two possible encodings for a number field, see
180    # itn() below.
181    if s[0] in (0o200, 0o377):
182        n = 0
183        for i in range(len(s) - 1):
184            n <<= 8
185            n += s[i + 1]
186        if s[0] == 0o377:
187            n = -(256 ** (len(s) - 1) - n)
188    else:
189        try:
190            s = nts(s, "ascii", "strict")
191            n = int(s.strip() or "0", 8)
192        except ValueError:
193            raise InvalidHeaderError("invalid header")
194    return n
195
196def itn(n, digits=8, format=DEFAULT_FORMAT):
197    """Convert a python number to a number field.
198    """
199    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
200    # octal digits followed by a null-byte, this allows values up to
201    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
202    # that if necessary. A leading 0o200 or 0o377 byte indicate this
203    # particular encoding, the following digits-1 bytes are a big-endian
204    # base-256 representation. This allows values up to (256**(digits-1))-1.
205    # A 0o200 byte indicates a positive number, a 0o377 byte a negative
206    # number.
207    original_n = n
208    n = int(n)
209    if 0 <= n < 8 ** (digits - 1):
210        s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
211    elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
212        if n >= 0:
213            s = bytearray([0o200])
214        else:
215            s = bytearray([0o377])
216            n = 256 ** digits + n
217
218        for i in range(digits - 1):
219            s.insert(1, n & 0o377)
220            n >>= 8
221    else:
222        raise ValueError("overflow in number field")
223
224    return s
225
226def calc_chksums(buf):
227    """Calculate the checksum for a member's header by summing up all
228       characters except for the chksum field which is treated as if
229       it was filled with spaces. According to the GNU tar sources,
230       some tars (Sun and NeXT) calculate chksum with signed char,
231       which will be different if there are chars in the buffer with
232       the high bit set. So we calculate two checksums, unsigned and
233       signed.
234    """
235    unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
236    signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
237    return unsigned_chksum, signed_chksum
238
239def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
240    """Copy length bytes from fileobj src to fileobj dst.
241       If length is None, copy the entire content.
242    """
243    bufsize = bufsize or 16 * 1024
244    if length == 0:
245        return
246    if length is None:
247        shutil.copyfileobj(src, dst, bufsize)
248        return
249
250    blocks, remainder = divmod(length, bufsize)
251    for b in range(blocks):
252        buf = src.read(bufsize)
253        if len(buf) < bufsize:
254            raise exception("unexpected end of data")
255        dst.write(buf)
256
257    if remainder != 0:
258        buf = src.read(remainder)
259        if len(buf) < remainder:
260            raise exception("unexpected end of data")
261        dst.write(buf)
262    return
263
264def _safe_print(s):
265    encoding = getattr(sys.stdout, 'encoding', None)
266    if encoding is not None:
267        s = s.encode(encoding, 'backslashreplace').decode(encoding)
268    print(s, end=' ')
269
270
271class TarError(Exception):
272    """Base exception."""
273    pass
274class ExtractError(TarError):
275    """General exception for extract errors."""
276    pass
277class ReadError(TarError):
278    """Exception for unreadable tar archives."""
279    pass
280class CompressionError(TarError):
281    """Exception for unavailable compression methods."""
282    pass
283class StreamError(TarError):
284    """Exception for unsupported operations on stream-like TarFiles."""
285    pass
286class HeaderError(TarError):
287    """Base exception for header errors."""
288    pass
289class EmptyHeaderError(HeaderError):
290    """Exception for empty headers."""
291    pass
292class TruncatedHeaderError(HeaderError):
293    """Exception for truncated headers."""
294    pass
295class EOFHeaderError(HeaderError):
296    """Exception for end of file headers."""
297    pass
298class InvalidHeaderError(HeaderError):
299    """Exception for invalid headers."""
300    pass
301class SubsequentHeaderError(HeaderError):
302    """Exception for missing and invalid extended headers."""
303    pass
304
305#---------------------------
306# internal stream interface
307#---------------------------
308class _LowLevelFile:
309    """Low-level file object. Supports reading and writing.
310       It is used instead of a regular file object for streaming
311       access.
312    """
313
314    def __init__(self, name, mode):
315        mode = {
316            "r": os.O_RDONLY,
317            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
318        }[mode]
319        if hasattr(os, "O_BINARY"):
320            mode |= os.O_BINARY
321        self.fd = os.open(name, mode, 0o666)
322
323    def close(self):
324        os.close(self.fd)
325
326    def read(self, size):
327        return os.read(self.fd, size)
328
329    def write(self, s):
330        os.write(self.fd, s)
331
332class _Stream:
333    """Class that serves as an adapter between TarFile and
334       a stream-like object.  The stream-like object only
335       needs to have a read() or write() method and is accessed
336       blockwise.  Use of gzip or bzip2 compression is possible.
337       A stream-like object could be for example: sys.stdin,
338       sys.stdout, a socket, a tape device etc.
339
340       _Stream is intended to be used only internally.
341    """
342
343    def __init__(self, name, mode, comptype, fileobj, bufsize):
344        """Construct a _Stream object.
345        """
346        self._extfileobj = True
347        if fileobj is None:
348            fileobj = _LowLevelFile(name, mode)
349            self._extfileobj = False
350
351        if comptype == '*':
352            # Enable transparent compression detection for the
353            # stream interface
354            fileobj = _StreamProxy(fileobj)
355            comptype = fileobj.getcomptype()
356
357        self.name     = name or ""
358        self.mode     = mode
359        self.comptype = comptype
360        self.fileobj  = fileobj
361        self.bufsize  = bufsize
362        self.buf      = b""
363        self.pos      = 0
364        self.closed   = False
365
366        try:
367            if comptype == "gz":
368                try:
369                    import zlib
370                except ImportError:
371                    raise CompressionError("zlib module is not available") from None
372                self.zlib = zlib
373                self.crc = zlib.crc32(b"")
374                if mode == "r":
375                    self._init_read_gz()
376                    self.exception = zlib.error
377                else:
378                    self._init_write_gz()
379
380            elif comptype == "bz2":
381                try:
382                    import bz2
383                except ImportError:
384                    raise CompressionError("bz2 module is not available") from None
385                if mode == "r":
386                    self.dbuf = b""
387                    self.cmp = bz2.BZ2Decompressor()
388                    self.exception = OSError
389                else:
390                    self.cmp = bz2.BZ2Compressor()
391
392            elif comptype == "xz":
393                try:
394                    import lzma
395                except ImportError:
396                    raise CompressionError("lzma module is not available") from None
397                if mode == "r":
398                    self.dbuf = b""
399                    self.cmp = lzma.LZMADecompressor()
400                    self.exception = lzma.LZMAError
401                else:
402                    self.cmp = lzma.LZMACompressor()
403
404            elif comptype != "tar":
405                raise CompressionError("unknown compression type %r" % comptype)
406
407        except:
408            if not self._extfileobj:
409                self.fileobj.close()
410            self.closed = True
411            raise
412
413    def __del__(self):
414        if hasattr(self, "closed") and not self.closed:
415            self.close()
416
417    def _init_write_gz(self):
418        """Initialize for writing with gzip compression.
419        """
420        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
421                                            -self.zlib.MAX_WBITS,
422                                            self.zlib.DEF_MEM_LEVEL,
423                                            0)
424        timestamp = struct.pack("<L", int(time.time()))
425        self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
426        if self.name.endswith(".gz"):
427            self.name = self.name[:-3]
428        # Honor "directory components removed" from RFC1952
429        self.name = os.path.basename(self.name)
430        # RFC1952 says we must use ISO-8859-1 for the FNAME field.
431        self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
432
433    def write(self, s):
434        """Write string s to the stream.
435        """
436        if self.comptype == "gz":
437            self.crc = self.zlib.crc32(s, self.crc)
438        self.pos += len(s)
439        if self.comptype != "tar":
440            s = self.cmp.compress(s)
441        self.__write(s)
442
443    def __write(self, s):
444        """Write string s to the stream if a whole new block
445           is ready to be written.
446        """
447        self.buf += s
448        while len(self.buf) > self.bufsize:
449            self.fileobj.write(self.buf[:self.bufsize])
450            self.buf = self.buf[self.bufsize:]
451
452    def close(self):
453        """Close the _Stream object. No operation should be
454           done on it afterwards.
455        """
456        if self.closed:
457            return
458
459        self.closed = True
460        try:
461            if self.mode == "w" and self.comptype != "tar":
462                self.buf += self.cmp.flush()
463
464            if self.mode == "w" and self.buf:
465                self.fileobj.write(self.buf)
466                self.buf = b""
467                if self.comptype == "gz":
468                    self.fileobj.write(struct.pack("<L", self.crc))
469                    self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
470        finally:
471            if not self._extfileobj:
472                self.fileobj.close()
473
474    def _init_read_gz(self):
475        """Initialize for reading a gzip compressed fileobj.
476        """
477        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
478        self.dbuf = b""
479
480        # taken from gzip.GzipFile with some alterations
481        if self.__read(2) != b"\037\213":
482            raise ReadError("not a gzip file")
483        if self.__read(1) != b"\010":
484            raise CompressionError("unsupported compression method")
485
486        flag = ord(self.__read(1))
487        self.__read(6)
488
489        if flag & 4:
490            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
491            self.read(xlen)
492        if flag & 8:
493            while True:
494                s = self.__read(1)
495                if not s or s == NUL:
496                    break
497        if flag & 16:
498            while True:
499                s = self.__read(1)
500                if not s or s == NUL:
501                    break
502        if flag & 2:
503            self.__read(2)
504
505    def tell(self):
506        """Return the stream's file pointer position.
507        """
508        return self.pos
509
510    def seek(self, pos=0):
511        """Set the stream's file pointer to pos. Negative seeking
512           is forbidden.
513        """
514        if pos - self.pos >= 0:
515            blocks, remainder = divmod(pos - self.pos, self.bufsize)
516            for i in range(blocks):
517                self.read(self.bufsize)
518            self.read(remainder)
519        else:
520            raise StreamError("seeking backwards is not allowed")
521        return self.pos
522
523    def read(self, size):
524        """Return the next size number of bytes from the stream."""
525        assert size is not None
526        buf = self._read(size)
527        self.pos += len(buf)
528        return buf
529
530    def _read(self, size):
531        """Return size bytes from the stream.
532        """
533        if self.comptype == "tar":
534            return self.__read(size)
535
536        c = len(self.dbuf)
537        t = [self.dbuf]
538        while c < size:
539            # Skip underlying buffer to avoid unaligned double buffering.
540            if self.buf:
541                buf = self.buf
542                self.buf = b""
543            else:
544                buf = self.fileobj.read(self.bufsize)
545                if not buf:
546                    break
547            try:
548                buf = self.cmp.decompress(buf)
549            except self.exception as e:
550                raise ReadError("invalid compressed data") from e
551            t.append(buf)
552            c += len(buf)
553        t = b"".join(t)
554        self.dbuf = t[size:]
555        return t[:size]
556
557    def __read(self, size):
558        """Return size bytes from stream. If internal buffer is empty,
559           read another block from the stream.
560        """
561        c = len(self.buf)
562        t = [self.buf]
563        while c < size:
564            buf = self.fileobj.read(self.bufsize)
565            if not buf:
566                break
567            t.append(buf)
568            c += len(buf)
569        t = b"".join(t)
570        self.buf = t[size:]
571        return t[:size]
572# class _Stream
573
574class _StreamProxy(object):
575    """Small proxy class that enables transparent compression
576       detection for the Stream interface (mode 'r|*').
577    """
578
579    def __init__(self, fileobj):
580        self.fileobj = fileobj
581        self.buf = self.fileobj.read(BLOCKSIZE)
582
583    def read(self, size):
584        self.read = self.fileobj.read
585        return self.buf
586
587    def getcomptype(self):
588        if self.buf.startswith(b"\x1f\x8b\x08"):
589            return "gz"
590        elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
591            return "bz2"
592        elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
593            return "xz"
594        else:
595            return "tar"
596
597    def close(self):
598        self.fileobj.close()
599# class StreamProxy
600
601#------------------------
602# Extraction file object
603#------------------------
604class _FileInFile(object):
605    """A thin wrapper around an existing file object that
606       provides a part of its data as an individual file
607       object.
608    """
609
610    def __init__(self, fileobj, offset, size, blockinfo=None):
611        self.fileobj = fileobj
612        self.offset = offset
613        self.size = size
614        self.position = 0
615        self.name = getattr(fileobj, "name", None)
616        self.closed = False
617
618        if blockinfo is None:
619            blockinfo = [(0, size)]
620
621        # Construct a map with data and zero blocks.
622        self.map_index = 0
623        self.map = []
624        lastpos = 0
625        realpos = self.offset
626        for offset, size in blockinfo:
627            if offset > lastpos:
628                self.map.append((False, lastpos, offset, None))
629            self.map.append((True, offset, offset + size, realpos))
630            realpos += size
631            lastpos = offset + size
632        if lastpos < self.size:
633            self.map.append((False, lastpos, self.size, None))
634
635    def flush(self):
636        pass
637
638    def readable(self):
639        return True
640
641    def writable(self):
642        return False
643
644    def seekable(self):
645        return self.fileobj.seekable()
646
647    def tell(self):
648        """Return the current file position.
649        """
650        return self.position
651
652    def seek(self, position, whence=io.SEEK_SET):
653        """Seek to a position in the file.
654        """
655        if whence == io.SEEK_SET:
656            self.position = min(max(position, 0), self.size)
657        elif whence == io.SEEK_CUR:
658            if position < 0:
659                self.position = max(self.position + position, 0)
660            else:
661                self.position = min(self.position + position, self.size)
662        elif whence == io.SEEK_END:
663            self.position = max(min(self.size + position, self.size), 0)
664        else:
665            raise ValueError("Invalid argument")
666        return self.position
667
668    def read(self, size=None):
669        """Read data from the file.
670        """
671        if size is None:
672            size = self.size - self.position
673        else:
674            size = min(size, self.size - self.position)
675
676        buf = b""
677        while size > 0:
678            while True:
679                data, start, stop, offset = self.map[self.map_index]
680                if start <= self.position < stop:
681                    break
682                else:
683                    self.map_index += 1
684                    if self.map_index == len(self.map):
685                        self.map_index = 0
686            length = min(size, stop - self.position)
687            if data:
688                self.fileobj.seek(offset + (self.position - start))
689                b = self.fileobj.read(length)
690                if len(b) != length:
691                    raise ReadError("unexpected end of data")
692                buf += b
693            else:
694                buf += NUL * length
695            size -= length
696            self.position += length
697        return buf
698
699    def readinto(self, b):
700        buf = self.read(len(b))
701        b[:len(buf)] = buf
702        return len(buf)
703
704    def close(self):
705        self.closed = True
706#class _FileInFile
707
708class ExFileObject(io.BufferedReader):
709
710    def __init__(self, tarfile, tarinfo):
711        fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
712                tarinfo.size, tarinfo.sparse)
713        super().__init__(fileobj)
714#class ExFileObject
715
716
717#-----------------------------
718# extraction filters (PEP 706)
719#-----------------------------
720
721class FilterError(TarError):
722    pass
723
724class AbsolutePathError(FilterError):
725    def __init__(self, tarinfo):
726        self.tarinfo = tarinfo
727        super().__init__(f'member {tarinfo.name!r} has an absolute path')
728
729class OutsideDestinationError(FilterError):
730    def __init__(self, tarinfo, path):
731        self.tarinfo = tarinfo
732        self._path = path
733        super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, '
734                         + 'which is outside the destination')
735
736class SpecialFileError(FilterError):
737    def __init__(self, tarinfo):
738        self.tarinfo = tarinfo
739        super().__init__(f'{tarinfo.name!r} is a special file')
740
741class AbsoluteLinkError(FilterError):
742    def __init__(self, tarinfo):
743        self.tarinfo = tarinfo
744        super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
745
746class LinkOutsideDestinationError(FilterError):
747    def __init__(self, tarinfo, path):
748        self.tarinfo = tarinfo
749        self._path = path
750        super().__init__(f'{tarinfo.name!r} would link to {path!r}, '
751                         + 'which is outside the destination')
752
753def _get_filtered_attrs(member, dest_path, for_data=True):
754    new_attrs = {}
755    name = member.name
756    dest_path = os.path.realpath(dest_path)
757    # Strip leading / (tar's directory separator) from filenames.
758    # Include os.sep (target OS directory separator) as well.
759    if name.startswith(('/', os.sep)):
760        name = new_attrs['name'] = member.path.lstrip('/' + os.sep)
761    if os.path.isabs(name):
762        # Path is absolute even after stripping.
763        # For example, 'C:/foo' on Windows.
764        raise AbsolutePathError(member)
765    # Ensure we stay in the destination
766    target_path = os.path.realpath(os.path.join(dest_path, name))
767    if os.path.commonpath([target_path, dest_path]) != dest_path:
768        raise OutsideDestinationError(member, target_path)
769    # Limit permissions (no high bits, and go-w)
770    mode = member.mode
771    if mode is not None:
772        # Strip high bits & group/other write bits
773        mode = mode & 0o755
774        if for_data:
775            # For data, handle permissions & file types
776            if member.isreg() or member.islnk():
777                if not mode & 0o100:
778                    # Clear executable bits if not executable by user
779                    mode &= ~0o111
780                # Ensure owner can read & write
781                mode |= 0o600
782            elif member.isdir() or member.issym():
783                # Ignore mode for directories & symlinks
784                mode = None
785            else:
786                # Reject special files
787                raise SpecialFileError(member)
788        if mode != member.mode:
789            new_attrs['mode'] = mode
790    if for_data:
791        # Ignore ownership for 'data'
792        if member.uid is not None:
793            new_attrs['uid'] = None
794        if member.gid is not None:
795            new_attrs['gid'] = None
796        if member.uname is not None:
797            new_attrs['uname'] = None
798        if member.gname is not None:
799            new_attrs['gname'] = None
800        # Check link destination for 'data'
801        if member.islnk() or member.issym():
802            if os.path.isabs(member.linkname):
803                raise AbsoluteLinkError(member)
804            target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
805            if os.path.commonpath([target_path, dest_path]) != dest_path:
806                raise LinkOutsideDestinationError(member, target_path)
807    return new_attrs
808
809def fully_trusted_filter(member, dest_path):
810    return member
811
812def tar_filter(member, dest_path):
813    new_attrs = _get_filtered_attrs(member, dest_path, False)
814    if new_attrs:
815        return member.replace(**new_attrs, deep=False)
816    return member
817
818def data_filter(member, dest_path):
819    new_attrs = _get_filtered_attrs(member, dest_path, True)
820    if new_attrs:
821        return member.replace(**new_attrs, deep=False)
822    return member
823
824_NAMED_FILTERS = {
825    "fully_trusted": fully_trusted_filter,
826    "tar": tar_filter,
827    "data": data_filter,
828}
829
830#------------------
831# Exported Classes
832#------------------
833
834# Sentinel for replace() defaults, meaning "don't change the attribute"
835_KEEP = object()
836
837class TarInfo(object):
838    """Informational class which holds the details about an
839       archive member given by a tar header block.
840       TarInfo objects are returned by TarFile.getmember(),
841       TarFile.getmembers() and TarFile.gettarinfo() and are
842       usually created internally.
843    """
844
845    __slots__ = dict(
846        name = 'Name of the archive member.',
847        mode = 'Permission bits.',
848        uid = 'User ID of the user who originally stored this member.',
849        gid = 'Group ID of the user who originally stored this member.',
850        size = 'Size in bytes.',
851        mtime = 'Time of last modification.',
852        chksum = 'Header checksum.',
853        type = ('File type. type is usually one of these constants: '
854                'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
855                'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
856        linkname = ('Name of the target file name, which is only present '
857                    'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
858        uname = 'User name.',
859        gname = 'Group name.',
860        devmajor = 'Device major number.',
861        devminor = 'Device minor number.',
862        offset = 'The tar header starts here.',
863        offset_data = "The file's data starts here.",
864        pax_headers = ('A dictionary containing key-value pairs of an '
865                       'associated pax extended header.'),
866        sparse = 'Sparse member information.',
867        tarfile = None,
868        _sparse_structs = None,
869        _link_target = None,
870        )
871
872    def __init__(self, name=""):
873        """Construct a TarInfo object. name is the optional name
874           of the member.
875        """
876        self.name = name        # member name
877        self.mode = 0o644       # file permissions
878        self.uid = 0            # user id
879        self.gid = 0            # group id
880        self.size = 0           # file size
881        self.mtime = 0          # modification time
882        self.chksum = 0         # header checksum
883        self.type = REGTYPE     # member type
884        self.linkname = ""      # link name
885        self.uname = ""         # user name
886        self.gname = ""         # group name
887        self.devmajor = 0       # device major number
888        self.devminor = 0       # device minor number
889
890        self.offset = 0         # the tar header starts here
891        self.offset_data = 0    # the file's data starts here
892
893        self.sparse = None      # sparse member information
894        self.pax_headers = {}   # pax header information
895
896    @property
897    def path(self):
898        'In pax headers, "name" is called "path".'
899        return self.name
900
901    @path.setter
902    def path(self, name):
903        self.name = name
904
905    @property
906    def linkpath(self):
907        'In pax headers, "linkname" is called "linkpath".'
908        return self.linkname
909
910    @linkpath.setter
911    def linkpath(self, linkname):
912        self.linkname = linkname
913
914    def __repr__(self):
915        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
916
917    def replace(self, *,
918                name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP,
919                uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP,
920                deep=True, _KEEP=_KEEP):
921        """Return a deep copy of self with the given attributes replaced.
922        """
923        if deep:
924            result = copy.deepcopy(self)
925        else:
926            result = copy.copy(self)
927        if name is not _KEEP:
928            result.name = name
929        if mtime is not _KEEP:
930            result.mtime = mtime
931        if mode is not _KEEP:
932            result.mode = mode
933        if linkname is not _KEEP:
934            result.linkname = linkname
935        if uid is not _KEEP:
936            result.uid = uid
937        if gid is not _KEEP:
938            result.gid = gid
939        if uname is not _KEEP:
940            result.uname = uname
941        if gname is not _KEEP:
942            result.gname = gname
943        return result
944
945    def get_info(self):
946        """Return the TarInfo's attributes as a dictionary.
947        """
948        if self.mode is None:
949            mode = None
950        else:
951            mode = self.mode & 0o7777
952        info = {
953            "name":     self.name,
954            "mode":     mode,
955            "uid":      self.uid,
956            "gid":      self.gid,
957            "size":     self.size,
958            "mtime":    self.mtime,
959            "chksum":   self.chksum,
960            "type":     self.type,
961            "linkname": self.linkname,
962            "uname":    self.uname,
963            "gname":    self.gname,
964            "devmajor": self.devmajor,
965            "devminor": self.devminor
966        }
967
968        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
969            info["name"] += "/"
970
971        return info
972
973    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
974        """Return a tar header as a string of 512 byte blocks.
975        """
976        info = self.get_info()
977        for name, value in info.items():
978            if value is None:
979                raise ValueError("%s may not be None" % name)
980
981        if format == USTAR_FORMAT:
982            return self.create_ustar_header(info, encoding, errors)
983        elif format == GNU_FORMAT:
984            return self.create_gnu_header(info, encoding, errors)
985        elif format == PAX_FORMAT:
986            return self.create_pax_header(info, encoding)
987        else:
988            raise ValueError("invalid format")
989
990    def create_ustar_header(self, info, encoding, errors):
991        """Return the object as a ustar header block.
992        """
993        info["magic"] = POSIX_MAGIC
994
995        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
996            raise ValueError("linkname is too long")
997
998        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
999            info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
1000
1001        return self._create_header(info, USTAR_FORMAT, encoding, errors)
1002
1003    def create_gnu_header(self, info, encoding, errors):
1004        """Return the object as a GNU header block sequence.
1005        """
1006        info["magic"] = GNU_MAGIC
1007
1008        buf = b""
1009        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
1010            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
1011
1012        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
1013            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
1014
1015        return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
1016
1017    def create_pax_header(self, info, encoding):
1018        """Return the object as a ustar header block. If it cannot be
1019           represented this way, prepend a pax extended header sequence
1020           with supplement information.
1021        """
1022        info["magic"] = POSIX_MAGIC
1023        pax_headers = self.pax_headers.copy()
1024
1025        # Test string fields for values that exceed the field length or cannot
1026        # be represented in ASCII encoding.
1027        for name, hname, length in (
1028                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1029                ("uname", "uname", 32), ("gname", "gname", 32)):
1030
1031            if hname in pax_headers:
1032                # The pax header has priority.
1033                continue
1034
1035            # Try to encode the string as ASCII.
1036            try:
1037                info[name].encode("ascii", "strict")
1038            except UnicodeEncodeError:
1039                pax_headers[hname] = info[name]
1040                continue
1041
1042            if len(info[name]) > length:
1043                pax_headers[hname] = info[name]
1044
1045        # Test number fields for values that exceed the field limit or values
1046        # that like to be stored as float.
1047        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1048            needs_pax = False
1049
1050            val = info[name]
1051            val_is_float = isinstance(val, float)
1052            val_int = round(val) if val_is_float else val
1053            if not 0 <= val_int < 8 ** (digits - 1):
1054                # Avoid overflow.
1055                info[name] = 0
1056                needs_pax = True
1057            elif val_is_float:
1058                # Put rounded value in ustar header, and full
1059                # precision value in pax header.
1060                info[name] = val_int
1061                needs_pax = True
1062
1063            # The existing pax header has priority.
1064            if needs_pax and name not in pax_headers:
1065                pax_headers[name] = str(val)
1066
1067        # Create a pax extended header if necessary.
1068        if pax_headers:
1069            buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
1070        else:
1071            buf = b""
1072
1073        return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
1074
1075    @classmethod
1076    def create_pax_global_header(cls, pax_headers):
1077        """Return the object as a pax global header block sequence.
1078        """
1079        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
1080
1081    def _posix_split_name(self, name, encoding, errors):
1082        """Split a name longer than 100 chars into a prefix
1083           and a name part.
1084        """
1085        components = name.split("/")
1086        for i in range(1, len(components)):
1087            prefix = "/".join(components[:i])
1088            name = "/".join(components[i:])
1089            if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
1090                    len(name.encode(encoding, errors)) <= LENGTH_NAME:
1091                break
1092        else:
1093            raise ValueError("name is too long")
1094
1095        return prefix, name
1096
1097    @staticmethod
1098    def _create_header(info, format, encoding, errors):
1099        """Return a header block. info is a dictionary with file
1100           information, format must be one of the *_FORMAT constants.
1101        """
1102        has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
1103        if has_device_fields:
1104            devmajor = itn(info.get("devmajor", 0), 8, format)
1105            devminor = itn(info.get("devminor", 0), 8, format)
1106        else:
1107            devmajor = stn("", 8, encoding, errors)
1108            devminor = stn("", 8, encoding, errors)
1109
1110        # None values in metadata should cause ValueError.
1111        # itn()/stn() do this for all fields except type.
1112        filetype = info.get("type", REGTYPE)
1113        if filetype is None:
1114            raise ValueError("TarInfo.type must not be None")
1115
1116        parts = [
1117            stn(info.get("name", ""), 100, encoding, errors),
1118            itn(info.get("mode", 0) & 0o7777, 8, format),
1119            itn(info.get("uid", 0), 8, format),
1120            itn(info.get("gid", 0), 8, format),
1121            itn(info.get("size", 0), 12, format),
1122            itn(info.get("mtime", 0), 12, format),
1123            b"        ", # checksum field
1124            filetype,
1125            stn(info.get("linkname", ""), 100, encoding, errors),
1126            info.get("magic", POSIX_MAGIC),
1127            stn(info.get("uname", ""), 32, encoding, errors),
1128            stn(info.get("gname", ""), 32, encoding, errors),
1129            devmajor,
1130            devminor,
1131            stn(info.get("prefix", ""), 155, encoding, errors)
1132        ]
1133
1134        buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
1135        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1136        buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
1137        return buf
1138
1139    @staticmethod
1140    def _create_payload(payload):
1141        """Return the string payload filled with zero bytes
1142           up to the next 512 byte border.
1143        """
1144        blocks, remainder = divmod(len(payload), BLOCKSIZE)
1145        if remainder > 0:
1146            payload += (BLOCKSIZE - remainder) * NUL
1147        return payload
1148
1149    @classmethod
1150    def _create_gnu_long_header(cls, name, type, encoding, errors):
1151        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1152           for name.
1153        """
1154        name = name.encode(encoding, errors) + NUL
1155
1156        info = {}
1157        info["name"] = "././@LongLink"
1158        info["type"] = type
1159        info["size"] = len(name)
1160        info["magic"] = GNU_MAGIC
1161
1162        # create extended header + name blocks.
1163        return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
1164                cls._create_payload(name)
1165
1166    @classmethod
1167    def _create_pax_generic_header(cls, pax_headers, type, encoding):
1168        """Return a POSIX.1-2008 extended or global header sequence
1169           that contains a list of keyword, value pairs. The values
1170           must be strings.
1171        """
1172        # Check if one of the fields contains surrogate characters and thereby
1173        # forces hdrcharset=BINARY, see _proc_pax() for more information.
1174        binary = False
1175        for keyword, value in pax_headers.items():
1176            try:
1177                value.encode("utf-8", "strict")
1178            except UnicodeEncodeError:
1179                binary = True
1180                break
1181
1182        records = b""
1183        if binary:
1184            # Put the hdrcharset field at the beginning of the header.
1185            records += b"21 hdrcharset=BINARY\n"
1186
1187        for keyword, value in pax_headers.items():
1188            keyword = keyword.encode("utf-8")
1189            if binary:
1190                # Try to restore the original byte representation of `value'.
1191                # Needless to say, that the encoding must match the string.
1192                value = value.encode(encoding, "surrogateescape")
1193            else:
1194                value = value.encode("utf-8")
1195
1196            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1197            n = p = 0
1198            while True:
1199                n = l + len(str(p))
1200                if n == p:
1201                    break
1202                p = n
1203            records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
1204
1205        # We use a hardcoded "././@PaxHeader" name like star does
1206        # instead of the one that POSIX recommends.
1207        info = {}
1208        info["name"] = "././@PaxHeader"
1209        info["type"] = type
1210        info["size"] = len(records)
1211        info["magic"] = POSIX_MAGIC
1212
1213        # Create pax header + record blocks.
1214        return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
1215                cls._create_payload(records)
1216
1217    @classmethod
1218    def frombuf(cls, buf, encoding, errors):
1219        """Construct a TarInfo object from a 512 byte bytes object.
1220        """
1221        if len(buf) == 0:
1222            raise EmptyHeaderError("empty header")
1223        if len(buf) != BLOCKSIZE:
1224            raise TruncatedHeaderError("truncated header")
1225        if buf.count(NUL) == BLOCKSIZE:
1226            raise EOFHeaderError("end of file header")
1227
1228        chksum = nti(buf[148:156])
1229        if chksum not in calc_chksums(buf):
1230            raise InvalidHeaderError("bad checksum")
1231
1232        obj = cls()
1233        obj.name = nts(buf[0:100], encoding, errors)
1234        obj.mode = nti(buf[100:108])
1235        obj.uid = nti(buf[108:116])
1236        obj.gid = nti(buf[116:124])
1237        obj.size = nti(buf[124:136])
1238        obj.mtime = nti(buf[136:148])
1239        obj.chksum = chksum
1240        obj.type = buf[156:157]
1241        obj.linkname = nts(buf[157:257], encoding, errors)
1242        obj.uname = nts(buf[265:297], encoding, errors)
1243        obj.gname = nts(buf[297:329], encoding, errors)
1244        obj.devmajor = nti(buf[329:337])
1245        obj.devminor = nti(buf[337:345])
1246        prefix = nts(buf[345:500], encoding, errors)
1247
1248        # Old V7 tar format represents a directory as a regular
1249        # file with a trailing slash.
1250        if obj.type == AREGTYPE and obj.name.endswith("/"):
1251            obj.type = DIRTYPE
1252
1253        # The old GNU sparse format occupies some of the unused
1254        # space in the buffer for up to 4 sparse structures.
1255        # Save them for later processing in _proc_sparse().
1256        if obj.type == GNUTYPE_SPARSE:
1257            pos = 386
1258            structs = []
1259            for i in range(4):
1260                try:
1261                    offset = nti(buf[pos:pos + 12])
1262                    numbytes = nti(buf[pos + 12:pos + 24])
1263                except ValueError:
1264                    break
1265                structs.append((offset, numbytes))
1266                pos += 24
1267            isextended = bool(buf[482])
1268            origsize = nti(buf[483:495])
1269            obj._sparse_structs = (structs, isextended, origsize)
1270
1271        # Remove redundant slashes from directories.
1272        if obj.isdir():
1273            obj.name = obj.name.rstrip("/")
1274
1275        # Reconstruct a ustar longname.
1276        if prefix and obj.type not in GNU_TYPES:
1277            obj.name = prefix + "/" + obj.name
1278        return obj
1279
1280    @classmethod
1281    def fromtarfile(cls, tarfile):
1282        """Return the next TarInfo object from TarFile object
1283           tarfile.
1284        """
1285        buf = tarfile.fileobj.read(BLOCKSIZE)
1286        obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1287        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1288        return obj._proc_member(tarfile)
1289
1290    #--------------------------------------------------------------------------
1291    # The following are methods that are called depending on the type of a
1292    # member. The entry point is _proc_member() which can be overridden in a
1293    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1294    # implement the following
1295    # operations:
1296    # 1. Set self.offset_data to the position where the data blocks begin,
1297    #    if there is data that follows.
1298    # 2. Set tarfile.offset to the position where the next member's header will
1299    #    begin.
1300    # 3. Return self or another valid TarInfo object.
1301    def _proc_member(self, tarfile):
1302        """Choose the right processing method depending on
1303           the type and call it.
1304        """
1305        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1306            return self._proc_gnulong(tarfile)
1307        elif self.type == GNUTYPE_SPARSE:
1308            return self._proc_sparse(tarfile)
1309        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1310            return self._proc_pax(tarfile)
1311        else:
1312            return self._proc_builtin(tarfile)
1313
1314    def _proc_builtin(self, tarfile):
1315        """Process a builtin type or an unknown type which
1316           will be treated as a regular file.
1317        """
1318        self.offset_data = tarfile.fileobj.tell()
1319        offset = self.offset_data
1320        if self.isreg() or self.type not in SUPPORTED_TYPES:
1321            # Skip the following data blocks.
1322            offset += self._block(self.size)
1323        tarfile.offset = offset
1324
1325        # Patch the TarInfo object with saved global
1326        # header information.
1327        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1328
1329        # Remove redundant slashes from directories. This is to be consistent
1330        # with frombuf().
1331        if self.isdir():
1332            self.name = self.name.rstrip("/")
1333
1334        return self
1335
1336    def _proc_gnulong(self, tarfile):
1337        """Process the blocks that hold a GNU longname
1338           or longlink member.
1339        """
1340        buf = tarfile.fileobj.read(self._block(self.size))
1341
1342        # Fetch the next header and process it.
1343        try:
1344            next = self.fromtarfile(tarfile)
1345        except HeaderError as e:
1346            raise SubsequentHeaderError(str(e)) from None
1347
1348        # Patch the TarInfo object from the next header with
1349        # the longname information.
1350        next.offset = self.offset
1351        if self.type == GNUTYPE_LONGNAME:
1352            next.name = nts(buf, tarfile.encoding, tarfile.errors)
1353        elif self.type == GNUTYPE_LONGLINK:
1354            next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
1355
1356        # Remove redundant slashes from directories. This is to be consistent
1357        # with frombuf().
1358        if next.isdir():
1359            next.name = next.name.removesuffix("/")
1360
1361        return next
1362
1363    def _proc_sparse(self, tarfile):
1364        """Process a GNU sparse header plus extra headers.
1365        """
1366        # We already collected some sparse structures in frombuf().
1367        structs, isextended, origsize = self._sparse_structs
1368        del self._sparse_structs
1369
1370        # Collect sparse structures from extended header blocks.
1371        while isextended:
1372            buf = tarfile.fileobj.read(BLOCKSIZE)
1373            pos = 0
1374            for i in range(21):
1375                try:
1376                    offset = nti(buf[pos:pos + 12])
1377                    numbytes = nti(buf[pos + 12:pos + 24])
1378                except ValueError:
1379                    break
1380                if offset and numbytes:
1381                    structs.append((offset, numbytes))
1382                pos += 24
1383            isextended = bool(buf[504])
1384        self.sparse = structs
1385
1386        self.offset_data = tarfile.fileobj.tell()
1387        tarfile.offset = self.offset_data + self._block(self.size)
1388        self.size = origsize
1389        return self
1390
1391    def _proc_pax(self, tarfile):
1392        """Process an extended or global header as described in
1393           POSIX.1-2008.
1394        """
1395        # Read the header information.
1396        buf = tarfile.fileobj.read(self._block(self.size))
1397
1398        # A pax header stores supplemental information for either
1399        # the following file (extended) or all following files
1400        # (global).
1401        if self.type == XGLTYPE:
1402            pax_headers = tarfile.pax_headers
1403        else:
1404            pax_headers = tarfile.pax_headers.copy()
1405
1406        # Check if the pax header contains a hdrcharset field. This tells us
1407        # the encoding of the path, linkpath, uname and gname fields. Normally,
1408        # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1409        # implementations are allowed to store them as raw binary strings if
1410        # the translation to UTF-8 fails.
1411        match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1412        if match is not None:
1413            pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
1414
1415        # For the time being, we don't care about anything other than "BINARY".
1416        # The only other value that is currently allowed by the standard is
1417        # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1418        hdrcharset = pax_headers.get("hdrcharset")
1419        if hdrcharset == "BINARY":
1420            encoding = tarfile.encoding
1421        else:
1422            encoding = "utf-8"
1423
1424        # Parse pax header information. A record looks like that:
1425        # "%d %s=%s\n" % (length, keyword, value). length is the size
1426        # of the complete record including the length field itself and
1427        # the newline. keyword and value are both UTF-8 encoded strings.
1428        regex = re.compile(br"(\d+) ([^=]+)=")
1429        pos = 0
1430        while True:
1431            match = regex.match(buf, pos)
1432            if not match:
1433                break
1434
1435            length, keyword = match.groups()
1436            length = int(length)
1437            if length == 0:
1438                raise InvalidHeaderError("invalid header")
1439            value = buf[match.end(2) + 1:match.start(1) + length - 1]
1440
1441            # Normally, we could just use "utf-8" as the encoding and "strict"
1442            # as the error handler, but we better not take the risk. For
1443            # example, GNU tar <= 1.23 is known to store filenames it cannot
1444            # translate to UTF-8 as raw strings (unfortunately without a
1445            # hdrcharset=BINARY header).
1446            # We first try the strict standard encoding, and if that fails we
1447            # fall back on the user's encoding and error handler.
1448            keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
1449                    tarfile.errors)
1450            if keyword in PAX_NAME_FIELDS:
1451                value = self._decode_pax_field(value, encoding, tarfile.encoding,
1452                        tarfile.errors)
1453            else:
1454                value = self._decode_pax_field(value, "utf-8", "utf-8",
1455                        tarfile.errors)
1456
1457            pax_headers[keyword] = value
1458            pos += length
1459
1460        # Fetch the next header.
1461        try:
1462            next = self.fromtarfile(tarfile)
1463        except HeaderError as e:
1464            raise SubsequentHeaderError(str(e)) from None
1465
1466        # Process GNU sparse information.
1467        if "GNU.sparse.map" in pax_headers:
1468            # GNU extended sparse format version 0.1.
1469            self._proc_gnusparse_01(next, pax_headers)
1470
1471        elif "GNU.sparse.size" in pax_headers:
1472            # GNU extended sparse format version 0.0.
1473            self._proc_gnusparse_00(next, pax_headers, buf)
1474
1475        elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1476            # GNU extended sparse format version 1.0.
1477            self._proc_gnusparse_10(next, pax_headers, tarfile)
1478
1479        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1480            # Patch the TarInfo object with the extended header info.
1481            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1482            next.offset = self.offset
1483
1484            if "size" in pax_headers:
1485                # If the extended header replaces the size field,
1486                # we need to recalculate the offset where the next
1487                # header starts.
1488                offset = next.offset_data
1489                if next.isreg() or next.type not in SUPPORTED_TYPES:
1490                    offset += next._block(next.size)
1491                tarfile.offset = offset
1492
1493        return next
1494
1495    def _proc_gnusparse_00(self, next, pax_headers, buf):
1496        """Process a GNU tar extended sparse header, version 0.0.
1497        """
1498        offsets = []
1499        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1500            offsets.append(int(match.group(1)))
1501        numbytes = []
1502        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1503            numbytes.append(int(match.group(1)))
1504        next.sparse = list(zip(offsets, numbytes))
1505
1506    def _proc_gnusparse_01(self, next, pax_headers):
1507        """Process a GNU tar extended sparse header, version 0.1.
1508        """
1509        sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1510        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1511
1512    def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1513        """Process a GNU tar extended sparse header, version 1.0.
1514        """
1515        fields = None
1516        sparse = []
1517        buf = tarfile.fileobj.read(BLOCKSIZE)
1518        fields, buf = buf.split(b"\n", 1)
1519        fields = int(fields)
1520        while len(sparse) < fields * 2:
1521            if b"\n" not in buf:
1522                buf += tarfile.fileobj.read(BLOCKSIZE)
1523            number, buf = buf.split(b"\n", 1)
1524            sparse.append(int(number))
1525        next.offset_data = tarfile.fileobj.tell()
1526        next.sparse = list(zip(sparse[::2], sparse[1::2]))
1527
1528    def _apply_pax_info(self, pax_headers, encoding, errors):
1529        """Replace fields with supplemental information from a previous
1530           pax extended or global header.
1531        """
1532        for keyword, value in pax_headers.items():
1533            if keyword == "GNU.sparse.name":
1534                setattr(self, "path", value)
1535            elif keyword == "GNU.sparse.size":
1536                setattr(self, "size", int(value))
1537            elif keyword == "GNU.sparse.realsize":
1538                setattr(self, "size", int(value))
1539            elif keyword in PAX_FIELDS:
1540                if keyword in PAX_NUMBER_FIELDS:
1541                    try:
1542                        value = PAX_NUMBER_FIELDS[keyword](value)
1543                    except ValueError:
1544                        value = 0
1545                if keyword == "path":
1546                    value = value.rstrip("/")
1547                setattr(self, keyword, value)
1548
1549        self.pax_headers = pax_headers.copy()
1550
1551    def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1552        """Decode a single field from a pax record.
1553        """
1554        try:
1555            return value.decode(encoding, "strict")
1556        except UnicodeDecodeError:
1557            return value.decode(fallback_encoding, fallback_errors)
1558
1559    def _block(self, count):
1560        """Round up a byte count by BLOCKSIZE and return it,
1561           e.g. _block(834) => 1024.
1562        """
1563        blocks, remainder = divmod(count, BLOCKSIZE)
1564        if remainder:
1565            blocks += 1
1566        return blocks * BLOCKSIZE
1567
1568    def isreg(self):
1569        'Return True if the Tarinfo object is a regular file.'
1570        return self.type in REGULAR_TYPES
1571
1572    def isfile(self):
1573        'Return True if the Tarinfo object is a regular file.'
1574        return self.isreg()
1575
1576    def isdir(self):
1577        'Return True if it is a directory.'
1578        return self.type == DIRTYPE
1579
1580    def issym(self):
1581        'Return True if it is a symbolic link.'
1582        return self.type == SYMTYPE
1583
1584    def islnk(self):
1585        'Return True if it is a hard link.'
1586        return self.type == LNKTYPE
1587
1588    def ischr(self):
1589        'Return True if it is a character device.'
1590        return self.type == CHRTYPE
1591
1592    def isblk(self):
1593        'Return True if it is a block device.'
1594        return self.type == BLKTYPE
1595
1596    def isfifo(self):
1597        'Return True if it is a FIFO.'
1598        return self.type == FIFOTYPE
1599
1600    def issparse(self):
1601        return self.sparse is not None
1602
1603    def isdev(self):
1604        'Return True if it is one of character device, block device or FIFO.'
1605        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1606# class TarInfo
1607
1608class TarFile(object):
1609    """The TarFile Class provides an interface to tar archives.
1610    """
1611
1612    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1613
1614    dereference = False         # If true, add content of linked file to the
1615                                # tar file, else the link.
1616
1617    ignore_zeros = False        # If true, skips empty or invalid blocks and
1618                                # continues processing.
1619
1620    errorlevel = 1              # If 0, fatal errors only appear in debug
1621                                # messages (if debug >= 0). If > 0, errors
1622                                # are passed to the caller as exceptions.
1623
1624    format = DEFAULT_FORMAT     # The format to use when creating an archive.
1625
1626    encoding = ENCODING         # Encoding for 8-bit character strings.
1627
1628    errors = None               # Error handler for unicode conversion.
1629
1630    tarinfo = TarInfo           # The default TarInfo class to use.
1631
1632    fileobject = ExFileObject   # The file-object for extractfile().
1633
1634    extraction_filter = None    # The default filter for extraction.
1635
1636    def __init__(self, name=None, mode="r", fileobj=None, format=None,
1637            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1638            errors="surrogateescape", pax_headers=None, debug=None,
1639            errorlevel=None, copybufsize=None):
1640        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1641           read from an existing archive, 'a' to append data to an existing
1642           file or 'w' to create a new file overwriting an existing one. `mode'
1643           defaults to 'r'.
1644           If `fileobj' is given, it is used for reading or writing data. If it
1645           can be determined, `mode' is overridden by `fileobj's mode.
1646           `fileobj' is not closed, when TarFile is closed.
1647        """
1648        modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
1649        if mode not in modes:
1650            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1651        self.mode = mode
1652        self._mode = modes[mode]
1653
1654        if not fileobj:
1655            if self.mode == "a" and not os.path.exists(name):
1656                # Create nonexistent files in append mode.
1657                self.mode = "w"
1658                self._mode = "wb"
1659            fileobj = bltn_open(name, self._mode)
1660            self._extfileobj = False
1661        else:
1662            if (name is None and hasattr(fileobj, "name") and
1663                isinstance(fileobj.name, (str, bytes))):
1664                name = fileobj.name
1665            if hasattr(fileobj, "mode"):
1666                self._mode = fileobj.mode
1667            self._extfileobj = True
1668        self.name = os.path.abspath(name) if name else None
1669        self.fileobj = fileobj
1670
1671        # Init attributes.
1672        if format is not None:
1673            self.format = format
1674        if tarinfo is not None:
1675            self.tarinfo = tarinfo
1676        if dereference is not None:
1677            self.dereference = dereference
1678        if ignore_zeros is not None:
1679            self.ignore_zeros = ignore_zeros
1680        if encoding is not None:
1681            self.encoding = encoding
1682        self.errors = errors
1683
1684        if pax_headers is not None and self.format == PAX_FORMAT:
1685            self.pax_headers = pax_headers
1686        else:
1687            self.pax_headers = {}
1688
1689        if debug is not None:
1690            self.debug = debug
1691        if errorlevel is not None:
1692            self.errorlevel = errorlevel
1693
1694        # Init datastructures.
1695        self.copybufsize = copybufsize
1696        self.closed = False
1697        self.members = []       # list of members as TarInfo objects
1698        self._loaded = False    # flag if all members have been read
1699        self.offset = self.fileobj.tell()
1700                                # current position in the archive file
1701        self.inodes = {}        # dictionary caching the inodes of
1702                                # archive members already added
1703
1704        try:
1705            if self.mode == "r":
1706                self.firstmember = None
1707                self.firstmember = self.next()
1708
1709            if self.mode == "a":
1710                # Move to the end of the archive,
1711                # before the first empty block.
1712                while True:
1713                    self.fileobj.seek(self.offset)
1714                    try:
1715                        tarinfo = self.tarinfo.fromtarfile(self)
1716                        self.members.append(tarinfo)
1717                    except EOFHeaderError:
1718                        self.fileobj.seek(self.offset)
1719                        break
1720                    except HeaderError as e:
1721                        raise ReadError(str(e)) from None
1722
1723            if self.mode in ("a", "w", "x"):
1724                self._loaded = True
1725
1726                if self.pax_headers:
1727                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1728                    self.fileobj.write(buf)
1729                    self.offset += len(buf)
1730        except:
1731            if not self._extfileobj:
1732                self.fileobj.close()
1733            self.closed = True
1734            raise
1735
1736    #--------------------------------------------------------------------------
1737    # Below are the classmethods which act as alternate constructors to the
1738    # TarFile class. The open() method is the only one that is needed for
1739    # public use; it is the "super"-constructor and is able to select an
1740    # adequate "sub"-constructor for a particular compression using the mapping
1741    # from OPEN_METH.
1742    #
1743    # This concept allows one to subclass TarFile without losing the comfort of
1744    # the super-constructor. A sub-constructor is registered and made available
1745    # by adding it to the mapping in OPEN_METH.
1746
1747    @classmethod
1748    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1749        """Open a tar archive for reading, writing or appending. Return
1750           an appropriate TarFile class.
1751
1752           mode:
1753           'r' or 'r:*' open for reading with transparent compression
1754           'r:'         open for reading exclusively uncompressed
1755           'r:gz'       open for reading with gzip compression
1756           'r:bz2'      open for reading with bzip2 compression
1757           'r:xz'       open for reading with lzma compression
1758           'a' or 'a:'  open for appending, creating the file if necessary
1759           'w' or 'w:'  open for writing without compression
1760           'w:gz'       open for writing with gzip compression
1761           'w:bz2'      open for writing with bzip2 compression
1762           'w:xz'       open for writing with lzma compression
1763
1764           'x' or 'x:'  create a tarfile exclusively without compression, raise
1765                        an exception if the file is already created
1766           'x:gz'       create a gzip compressed tarfile, raise an exception
1767                        if the file is already created
1768           'x:bz2'      create a bzip2 compressed tarfile, raise an exception
1769                        if the file is already created
1770           'x:xz'       create an lzma compressed tarfile, raise an exception
1771                        if the file is already created
1772
1773           'r|*'        open a stream of tar blocks with transparent compression
1774           'r|'         open an uncompressed stream of tar blocks for reading
1775           'r|gz'       open a gzip compressed stream of tar blocks
1776           'r|bz2'      open a bzip2 compressed stream of tar blocks
1777           'r|xz'       open an lzma compressed stream of tar blocks
1778           'w|'         open an uncompressed stream for writing
1779           'w|gz'       open a gzip compressed stream for writing
1780           'w|bz2'      open a bzip2 compressed stream for writing
1781           'w|xz'       open an lzma compressed stream for writing
1782        """
1783
1784        if not name and not fileobj:
1785            raise ValueError("nothing to open")
1786
1787        if mode in ("r", "r:*"):
1788            # Find out which *open() is appropriate for opening the file.
1789            def not_compressed(comptype):
1790                return cls.OPEN_METH[comptype] == 'taropen'
1791            error_msgs = []
1792            for comptype in sorted(cls.OPEN_METH, key=not_compressed):
1793                func = getattr(cls, cls.OPEN_METH[comptype])
1794                if fileobj is not None:
1795                    saved_pos = fileobj.tell()
1796                try:
1797                    return func(name, "r", fileobj, **kwargs)
1798                except (ReadError, CompressionError) as e:
1799                    error_msgs.append(f'- method {comptype}: {e!r}')
1800                    if fileobj is not None:
1801                        fileobj.seek(saved_pos)
1802                    continue
1803            error_msgs_summary = '\n'.join(error_msgs)
1804            raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}")
1805
1806        elif ":" in mode:
1807            filemode, comptype = mode.split(":", 1)
1808            filemode = filemode or "r"
1809            comptype = comptype or "tar"
1810
1811            # Select the *open() function according to
1812            # given compression.
1813            if comptype in cls.OPEN_METH:
1814                func = getattr(cls, cls.OPEN_METH[comptype])
1815            else:
1816                raise CompressionError("unknown compression type %r" % comptype)
1817            return func(name, filemode, fileobj, **kwargs)
1818
1819        elif "|" in mode:
1820            filemode, comptype = mode.split("|", 1)
1821            filemode = filemode or "r"
1822            comptype = comptype or "tar"
1823
1824            if filemode not in ("r", "w"):
1825                raise ValueError("mode must be 'r' or 'w'")
1826
1827            stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1828            try:
1829                t = cls(name, filemode, stream, **kwargs)
1830            except:
1831                stream.close()
1832                raise
1833            t._extfileobj = False
1834            return t
1835
1836        elif mode in ("a", "w", "x"):
1837            return cls.taropen(name, mode, fileobj, **kwargs)
1838
1839        raise ValueError("undiscernible mode")
1840
1841    @classmethod
1842    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1843        """Open uncompressed tar archive name for reading or writing.
1844        """
1845        if mode not in ("r", "a", "w", "x"):
1846            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1847        return cls(name, mode, fileobj, **kwargs)
1848
1849    @classmethod
1850    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1851        """Open gzip compressed tar archive name for reading or writing.
1852           Appending is not allowed.
1853        """
1854        if mode not in ("r", "w", "x"):
1855            raise ValueError("mode must be 'r', 'w' or 'x'")
1856
1857        try:
1858            from gzip import GzipFile
1859        except ImportError:
1860            raise CompressionError("gzip module is not available") from None
1861
1862        try:
1863            fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
1864        except OSError as e:
1865            if fileobj is not None and mode == 'r':
1866                raise ReadError("not a gzip file") from e
1867            raise
1868
1869        try:
1870            t = cls.taropen(name, mode, fileobj, **kwargs)
1871        except OSError as e:
1872            fileobj.close()
1873            if mode == 'r':
1874                raise ReadError("not a gzip file") from e
1875            raise
1876        except:
1877            fileobj.close()
1878            raise
1879        t._extfileobj = False
1880        return t
1881
1882    @classmethod
1883    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1884        """Open bzip2 compressed tar archive name for reading or writing.
1885           Appending is not allowed.
1886        """
1887        if mode not in ("r", "w", "x"):
1888            raise ValueError("mode must be 'r', 'w' or 'x'")
1889
1890        try:
1891            from bz2 import BZ2File
1892        except ImportError:
1893            raise CompressionError("bz2 module is not available") from None
1894
1895        fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
1896
1897        try:
1898            t = cls.taropen(name, mode, fileobj, **kwargs)
1899        except (OSError, EOFError) as e:
1900            fileobj.close()
1901            if mode == 'r':
1902                raise ReadError("not a bzip2 file") from e
1903            raise
1904        except:
1905            fileobj.close()
1906            raise
1907        t._extfileobj = False
1908        return t
1909
1910    @classmethod
1911    def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
1912        """Open lzma compressed tar archive name for reading or writing.
1913           Appending is not allowed.
1914        """
1915        if mode not in ("r", "w", "x"):
1916            raise ValueError("mode must be 'r', 'w' or 'x'")
1917
1918        try:
1919            from lzma import LZMAFile, LZMAError
1920        except ImportError:
1921            raise CompressionError("lzma module is not available") from None
1922
1923        fileobj = LZMAFile(fileobj or name, mode, preset=preset)
1924
1925        try:
1926            t = cls.taropen(name, mode, fileobj, **kwargs)
1927        except (LZMAError, EOFError) as e:
1928            fileobj.close()
1929            if mode == 'r':
1930                raise ReadError("not an lzma file") from e
1931            raise
1932        except:
1933            fileobj.close()
1934            raise
1935        t._extfileobj = False
1936        return t
1937
1938    # All *open() methods are registered here.
1939    OPEN_METH = {
1940        "tar": "taropen",   # uncompressed tar
1941        "gz":  "gzopen",    # gzip compressed tar
1942        "bz2": "bz2open",   # bzip2 compressed tar
1943        "xz":  "xzopen"     # lzma compressed tar
1944    }
1945
1946    #--------------------------------------------------------------------------
1947    # The public methods which TarFile provides:
1948
1949    def close(self):
1950        """Close the TarFile. In write-mode, two finishing zero blocks are
1951           appended to the archive.
1952        """
1953        if self.closed:
1954            return
1955
1956        self.closed = True
1957        try:
1958            if self.mode in ("a", "w", "x"):
1959                self.fileobj.write(NUL * (BLOCKSIZE * 2))
1960                self.offset += (BLOCKSIZE * 2)
1961                # fill up the end with zero-blocks
1962                # (like option -b20 for tar does)
1963                blocks, remainder = divmod(self.offset, RECORDSIZE)
1964                if remainder > 0:
1965                    self.fileobj.write(NUL * (RECORDSIZE - remainder))
1966        finally:
1967            if not self._extfileobj:
1968                self.fileobj.close()
1969
1970    def getmember(self, name):
1971        """Return a TarInfo object for member `name'. If `name' can not be
1972           found in the archive, KeyError is raised. If a member occurs more
1973           than once in the archive, its last occurrence is assumed to be the
1974           most up-to-date version.
1975        """
1976        tarinfo = self._getmember(name.rstrip('/'))
1977        if tarinfo is None:
1978            raise KeyError("filename %r not found" % name)
1979        return tarinfo
1980
1981    def getmembers(self):
1982        """Return the members of the archive as a list of TarInfo objects. The
1983           list has the same order as the members in the archive.
1984        """
1985        self._check()
1986        if not self._loaded:    # if we want to obtain a list of
1987            self._load()        # all members, we first have to
1988                                # scan the whole archive.
1989        return self.members
1990
1991    def getnames(self):
1992        """Return the members of the archive as a list of their names. It has
1993           the same order as the list returned by getmembers().
1994        """
1995        return [tarinfo.name for tarinfo in self.getmembers()]
1996
1997    def gettarinfo(self, name=None, arcname=None, fileobj=None):
1998        """Create a TarInfo object from the result of os.stat or equivalent
1999           on an existing file. The file is either named by `name', or
2000           specified as a file object `fileobj' with a file descriptor. If
2001           given, `arcname' specifies an alternative name for the file in the
2002           archive, otherwise, the name is taken from the 'name' attribute of
2003           'fileobj', or the 'name' argument. The name should be a text
2004           string.
2005        """
2006        self._check("awx")
2007
2008        # When fileobj is given, replace name by
2009        # fileobj's real name.
2010        if fileobj is not None:
2011            name = fileobj.name
2012
2013        # Building the name of the member in the archive.
2014        # Backward slashes are converted to forward slashes,
2015        # Absolute paths are turned to relative paths.
2016        if arcname is None:
2017            arcname = name
2018        drv, arcname = os.path.splitdrive(arcname)
2019        arcname = arcname.replace(os.sep, "/")
2020        arcname = arcname.lstrip("/")
2021
2022        # Now, fill the TarInfo object with
2023        # information specific for the file.
2024        tarinfo = self.tarinfo()
2025        tarinfo.tarfile = self  # Not needed
2026
2027        # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
2028        if fileobj is None:
2029            if not self.dereference:
2030                statres = os.lstat(name)
2031            else:
2032                statres = os.stat(name)
2033        else:
2034            statres = os.fstat(fileobj.fileno())
2035        linkname = ""
2036
2037        stmd = statres.st_mode
2038        if stat.S_ISREG(stmd):
2039            inode = (statres.st_ino, statres.st_dev)
2040            if not self.dereference and statres.st_nlink > 1 and \
2041                    inode in self.inodes and arcname != self.inodes[inode]:
2042                # Is it a hardlink to an already
2043                # archived file?
2044                type = LNKTYPE
2045                linkname = self.inodes[inode]
2046            else:
2047                # The inode is added only if its valid.
2048                # For win32 it is always 0.
2049                type = REGTYPE
2050                if inode[0]:
2051                    self.inodes[inode] = arcname
2052        elif stat.S_ISDIR(stmd):
2053            type = DIRTYPE
2054        elif stat.S_ISFIFO(stmd):
2055            type = FIFOTYPE
2056        elif stat.S_ISLNK(stmd):
2057            type = SYMTYPE
2058            linkname = os.readlink(name)
2059        elif stat.S_ISCHR(stmd):
2060            type = CHRTYPE
2061        elif stat.S_ISBLK(stmd):
2062            type = BLKTYPE
2063        else:
2064            return None
2065
2066        # Fill the TarInfo object with all
2067        # information we can get.
2068        tarinfo.name = arcname
2069        tarinfo.mode = stmd
2070        tarinfo.uid = statres.st_uid
2071        tarinfo.gid = statres.st_gid
2072        if type == REGTYPE:
2073            tarinfo.size = statres.st_size
2074        else:
2075            tarinfo.size = 0
2076        tarinfo.mtime = statres.st_mtime
2077        tarinfo.type = type
2078        tarinfo.linkname = linkname
2079        if pwd:
2080            try:
2081                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
2082            except KeyError:
2083                pass
2084        if grp:
2085            try:
2086                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
2087            except KeyError:
2088                pass
2089
2090        if type in (CHRTYPE, BLKTYPE):
2091            if hasattr(os, "major") and hasattr(os, "minor"):
2092                tarinfo.devmajor = os.major(statres.st_rdev)
2093                tarinfo.devminor = os.minor(statres.st_rdev)
2094        return tarinfo
2095
2096    def list(self, verbose=True, *, members=None):
2097        """Print a table of contents to sys.stdout. If `verbose' is False, only
2098           the names of the members are printed. If it is True, an `ls -l'-like
2099           output is produced. `members' is optional and must be a subset of the
2100           list returned by getmembers().
2101        """
2102        self._check()
2103
2104        if members is None:
2105            members = self
2106        for tarinfo in members:
2107            if verbose:
2108                if tarinfo.mode is None:
2109                    _safe_print("??????????")
2110                else:
2111                    _safe_print(stat.filemode(tarinfo.mode))
2112                _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
2113                                       tarinfo.gname or tarinfo.gid))
2114                if tarinfo.ischr() or tarinfo.isblk():
2115                    _safe_print("%10s" %
2116                            ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
2117                else:
2118                    _safe_print("%10d" % tarinfo.size)
2119                if tarinfo.mtime is None:
2120                    _safe_print("????-??-?? ??:??:??")
2121                else:
2122                    _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
2123                                % time.localtime(tarinfo.mtime)[:6])
2124
2125            _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
2126
2127            if verbose:
2128                if tarinfo.issym():
2129                    _safe_print("-> " + tarinfo.linkname)
2130                if tarinfo.islnk():
2131                    _safe_print("link to " + tarinfo.linkname)
2132            print()
2133
2134    def add(self, name, arcname=None, recursive=True, *, filter=None):
2135        """Add the file `name' to the archive. `name' may be any type of file
2136           (directory, fifo, symbolic link, etc.). If given, `arcname'
2137           specifies an alternative name for the file in the archive.
2138           Directories are added recursively by default. This can be avoided by
2139           setting `recursive' to False. `filter' is a function
2140           that expects a TarInfo object argument and returns the changed
2141           TarInfo object, if it returns None the TarInfo object will be
2142           excluded from the archive.
2143        """
2144        self._check("awx")
2145
2146        if arcname is None:
2147            arcname = name
2148
2149        # Skip if somebody tries to archive the archive...
2150        if self.name is not None and os.path.abspath(name) == self.name:
2151            self._dbg(2, "tarfile: Skipped %r" % name)
2152            return
2153
2154        self._dbg(1, name)
2155
2156        # Create a TarInfo object from the file.
2157        tarinfo = self.gettarinfo(name, arcname)
2158
2159        if tarinfo is None:
2160            self._dbg(1, "tarfile: Unsupported type %r" % name)
2161            return
2162
2163        # Change or exclude the TarInfo object.
2164        if filter is not None:
2165            tarinfo = filter(tarinfo)
2166            if tarinfo is None:
2167                self._dbg(2, "tarfile: Excluded %r" % name)
2168                return
2169
2170        # Append the tar header and data to the archive.
2171        if tarinfo.isreg():
2172            with bltn_open(name, "rb") as f:
2173                self.addfile(tarinfo, f)
2174
2175        elif tarinfo.isdir():
2176            self.addfile(tarinfo)
2177            if recursive:
2178                for f in sorted(os.listdir(name)):
2179                    self.add(os.path.join(name, f), os.path.join(arcname, f),
2180                            recursive, filter=filter)
2181
2182        else:
2183            self.addfile(tarinfo)
2184
2185    def addfile(self, tarinfo, fileobj=None):
2186        """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2187           given, it should be a binary file, and tarinfo.size bytes are read
2188           from it and added to the archive. You can create TarInfo objects
2189           directly, or by using gettarinfo().
2190        """
2191        self._check("awx")
2192
2193        tarinfo = copy.copy(tarinfo)
2194
2195        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2196        self.fileobj.write(buf)
2197        self.offset += len(buf)
2198        bufsize=self.copybufsize
2199        # If there's data to follow, append it.
2200        if fileobj is not None:
2201            copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
2202            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2203            if remainder > 0:
2204                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2205                blocks += 1
2206            self.offset += blocks * BLOCKSIZE
2207
2208        self.members.append(tarinfo)
2209
2210    def _get_filter_function(self, filter):
2211        if filter is None:
2212            filter = self.extraction_filter
2213            if filter is None:
2214                return fully_trusted_filter
2215            if isinstance(filter, str):
2216                raise TypeError(
2217                    'String names are not supported for '
2218                    + 'TarFile.extraction_filter. Use a function such as '
2219                    + 'tarfile.data_filter directly.')
2220            return filter
2221        if callable(filter):
2222            return filter
2223        try:
2224            return _NAMED_FILTERS[filter]
2225        except KeyError:
2226            raise ValueError(f"filter {filter!r} not found") from None
2227
2228    def extractall(self, path=".", members=None, *, numeric_owner=False,
2229                   filter=None):
2230        """Extract all members from the archive to the current working
2231           directory and set owner, modification time and permissions on
2232           directories afterwards. `path' specifies a different directory
2233           to extract to. `members' is optional and must be a subset of the
2234           list returned by getmembers(). If `numeric_owner` is True, only
2235           the numbers for user/group names are used and not the names.
2236
2237           The `filter` function will be called on each member just
2238           before extraction.
2239           It can return a changed TarInfo or None to skip the member.
2240           String names of common filters are accepted.
2241        """
2242        directories = []
2243
2244        filter_function = self._get_filter_function(filter)
2245        if members is None:
2246            members = self
2247
2248        for member in members:
2249            tarinfo = self._get_extract_tarinfo(member, filter_function, path)
2250            if tarinfo is None:
2251                continue
2252            if tarinfo.isdir():
2253                # For directories, delay setting attributes until later,
2254                # since permissions can interfere with extraction and
2255                # extracting contents can reset mtime.
2256                directories.append(tarinfo)
2257            self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),
2258                              numeric_owner=numeric_owner)
2259
2260        # Reverse sort directories.
2261        directories.sort(key=lambda a: a.name, reverse=True)
2262
2263        # Set correct owner, mtime and filemode on directories.
2264        for tarinfo in directories:
2265            dirpath = os.path.join(path, tarinfo.name)
2266            try:
2267                self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
2268                self.utime(tarinfo, dirpath)
2269                self.chmod(tarinfo, dirpath)
2270            except ExtractError as e:
2271                self._handle_nonfatal_error(e)
2272
2273    def extract(self, member, path="", set_attrs=True, *, numeric_owner=False,
2274                filter=None):
2275        """Extract a member from the archive to the current working directory,
2276           using its full name. Its file information is extracted as accurately
2277           as possible. `member' may be a filename or a TarInfo object. You can
2278           specify a different directory using `path'. File attributes (owner,
2279           mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2280           is True, only the numbers for user/group names are used and not
2281           the names.
2282
2283           The `filter` function will be called before extraction.
2284           It can return a changed TarInfo or None to skip the member.
2285           String names of common filters are accepted.
2286        """
2287        filter_function = self._get_filter_function(filter)
2288        tarinfo = self._get_extract_tarinfo(member, filter_function, path)
2289        if tarinfo is not None:
2290            self._extract_one(tarinfo, path, set_attrs, numeric_owner)
2291
2292    def _get_extract_tarinfo(self, member, filter_function, path):
2293        """Get filtered TarInfo (or None) from member, which might be a str"""
2294        if isinstance(member, str):
2295            tarinfo = self.getmember(member)
2296        else:
2297            tarinfo = member
2298
2299        unfiltered = tarinfo
2300        try:
2301            tarinfo = filter_function(tarinfo, path)
2302        except (OSError, FilterError) as e:
2303            self._handle_fatal_error(e)
2304        except ExtractError as e:
2305            self._handle_nonfatal_error(e)
2306        if tarinfo is None:
2307            self._dbg(2, "tarfile: Excluded %r" % unfiltered.name)
2308            return None
2309        # Prepare the link target for makelink().
2310        if tarinfo.islnk():
2311            tarinfo = copy.copy(tarinfo)
2312            tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2313        return tarinfo
2314
2315    def _extract_one(self, tarinfo, path, set_attrs, numeric_owner):
2316        """Extract from filtered tarinfo to disk"""
2317        self._check("r")
2318
2319        try:
2320            self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2321                                 set_attrs=set_attrs,
2322                                 numeric_owner=numeric_owner)
2323        except OSError as e:
2324            self._handle_fatal_error(e)
2325        except ExtractError as e:
2326            self._handle_nonfatal_error(e)
2327
2328    def _handle_nonfatal_error(self, e):
2329        """Handle non-fatal error (ExtractError) according to errorlevel"""
2330        if self.errorlevel > 1:
2331            raise
2332        else:
2333            self._dbg(1, "tarfile: %s" % e)
2334
2335    def _handle_fatal_error(self, e):
2336        """Handle "fatal" error according to self.errorlevel"""
2337        if self.errorlevel > 0:
2338            raise
2339        elif isinstance(e, OSError):
2340            if e.filename is None:
2341                self._dbg(1, "tarfile: %s" % e.strerror)
2342            else:
2343                self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2344        else:
2345            self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e))
2346
2347    def extractfile(self, member):
2348        """Extract a member from the archive as a file object. `member' may be
2349           a filename or a TarInfo object. If `member' is a regular file or
2350           a link, an io.BufferedReader object is returned. For all other
2351           existing members, None is returned. If `member' does not appear
2352           in the archive, KeyError is raised.
2353        """
2354        self._check("r")
2355
2356        if isinstance(member, str):
2357            tarinfo = self.getmember(member)
2358        else:
2359            tarinfo = member
2360
2361        if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2362            # Members with unknown types are treated as regular files.
2363            return self.fileobject(self, tarinfo)
2364
2365        elif tarinfo.islnk() or tarinfo.issym():
2366            if isinstance(self.fileobj, _Stream):
2367                # A small but ugly workaround for the case that someone tries
2368                # to extract a (sym)link as a file-object from a non-seekable
2369                # stream of tar blocks.
2370                raise StreamError("cannot extract (sym)link as file object")
2371            else:
2372                # A (sym)link's file object is its target's file object.
2373                return self.extractfile(self._find_link_target(tarinfo))
2374        else:
2375            # If there's no data associated with the member (directory, chrdev,
2376            # blkdev, etc.), return None instead of a file object.
2377            return None
2378
2379    def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2380                        numeric_owner=False):
2381        """Extract the TarInfo object tarinfo to a physical
2382           file called targetpath.
2383        """
2384        # Fetch the TarInfo object for the given name
2385        # and build the destination pathname, replacing
2386        # forward slashes to platform specific separators.
2387        targetpath = targetpath.rstrip("/")
2388        targetpath = targetpath.replace("/", os.sep)
2389
2390        # Create all upper directories.
2391        upperdirs = os.path.dirname(targetpath)
2392        if upperdirs and not os.path.exists(upperdirs):
2393            # Create directories that are not part of the archive with
2394            # default permissions.
2395            os.makedirs(upperdirs)
2396
2397        if tarinfo.islnk() or tarinfo.issym():
2398            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2399        else:
2400            self._dbg(1, tarinfo.name)
2401
2402        if tarinfo.isreg():
2403            self.makefile(tarinfo, targetpath)
2404        elif tarinfo.isdir():
2405            self.makedir(tarinfo, targetpath)
2406        elif tarinfo.isfifo():
2407            self.makefifo(tarinfo, targetpath)
2408        elif tarinfo.ischr() or tarinfo.isblk():
2409            self.makedev(tarinfo, targetpath)
2410        elif tarinfo.islnk() or tarinfo.issym():
2411            self.makelink(tarinfo, targetpath)
2412        elif tarinfo.type not in SUPPORTED_TYPES:
2413            self.makeunknown(tarinfo, targetpath)
2414        else:
2415            self.makefile(tarinfo, targetpath)
2416
2417        if set_attrs:
2418            self.chown(tarinfo, targetpath, numeric_owner)
2419            if not tarinfo.issym():
2420                self.chmod(tarinfo, targetpath)
2421                self.utime(tarinfo, targetpath)
2422
2423    #--------------------------------------------------------------------------
2424    # Below are the different file methods. They are called via
2425    # _extract_member() when extract() is called. They can be replaced in a
2426    # subclass to implement other functionality.
2427
2428    def makedir(self, tarinfo, targetpath):
2429        """Make a directory called targetpath.
2430        """
2431        try:
2432            if tarinfo.mode is None:
2433                # Use the system's default mode
2434                os.mkdir(targetpath)
2435            else:
2436                # Use a safe mode for the directory, the real mode is set
2437                # later in _extract_member().
2438                os.mkdir(targetpath, 0o700)
2439        except FileExistsError:
2440            pass
2441
2442    def makefile(self, tarinfo, targetpath):
2443        """Make a file called targetpath.
2444        """
2445        source = self.fileobj
2446        source.seek(tarinfo.offset_data)
2447        bufsize = self.copybufsize
2448        with bltn_open(targetpath, "wb") as target:
2449            if tarinfo.sparse is not None:
2450                for offset, size in tarinfo.sparse:
2451                    target.seek(offset)
2452                    copyfileobj(source, target, size, ReadError, bufsize)
2453                target.seek(tarinfo.size)
2454                target.truncate()
2455            else:
2456                copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
2457
2458    def makeunknown(self, tarinfo, targetpath):
2459        """Make a file from a TarInfo object with an unknown type
2460           at targetpath.
2461        """
2462        self.makefile(tarinfo, targetpath)
2463        self._dbg(1, "tarfile: Unknown file type %r, " \
2464                     "extracted as regular file." % tarinfo.type)
2465
2466    def makefifo(self, tarinfo, targetpath):
2467        """Make a fifo called targetpath.
2468        """
2469        if hasattr(os, "mkfifo"):
2470            os.mkfifo(targetpath)
2471        else:
2472            raise ExtractError("fifo not supported by system")
2473
2474    def makedev(self, tarinfo, targetpath):
2475        """Make a character or block device called targetpath.
2476        """
2477        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2478            raise ExtractError("special devices not supported by system")
2479
2480        mode = tarinfo.mode
2481        if mode is None:
2482            # Use mknod's default
2483            mode = 0o600
2484        if tarinfo.isblk():
2485            mode |= stat.S_IFBLK
2486        else:
2487            mode |= stat.S_IFCHR
2488
2489        os.mknod(targetpath, mode,
2490                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2491
2492    def makelink(self, tarinfo, targetpath):
2493        """Make a (symbolic) link called targetpath. If it cannot be created
2494          (platform limitation), we try to make a copy of the referenced file
2495          instead of a link.
2496        """
2497        try:
2498            # For systems that support symbolic and hard links.
2499            if tarinfo.issym():
2500                if os.path.lexists(targetpath):
2501                    # Avoid FileExistsError on following os.symlink.
2502                    os.unlink(targetpath)
2503                os.symlink(tarinfo.linkname, targetpath)
2504            else:
2505                if os.path.exists(tarinfo._link_target):
2506                    os.link(tarinfo._link_target, targetpath)
2507                else:
2508                    self._extract_member(self._find_link_target(tarinfo),
2509                                         targetpath)
2510        except symlink_exception:
2511            try:
2512                self._extract_member(self._find_link_target(tarinfo),
2513                                     targetpath)
2514            except KeyError:
2515                raise ExtractError("unable to resolve link inside archive") from None
2516
2517    def chown(self, tarinfo, targetpath, numeric_owner):
2518        """Set owner of targetpath according to tarinfo. If numeric_owner
2519           is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2520           is False, fall back to .gid/.uid when the search based on name
2521           fails.
2522        """
2523        if hasattr(os, "geteuid") and os.geteuid() == 0:
2524            # We have to be root to do so.
2525            g = tarinfo.gid
2526            u = tarinfo.uid
2527            if not numeric_owner:
2528                try:
2529                    if grp and tarinfo.gname:
2530                        g = grp.getgrnam(tarinfo.gname)[2]
2531                except KeyError:
2532                    pass
2533                try:
2534                    if pwd and tarinfo.uname:
2535                        u = pwd.getpwnam(tarinfo.uname)[2]
2536                except KeyError:
2537                    pass
2538            if g is None:
2539                g = -1
2540            if u is None:
2541                u = -1
2542            try:
2543                if tarinfo.issym() and hasattr(os, "lchown"):
2544                    os.lchown(targetpath, u, g)
2545                else:
2546                    os.chown(targetpath, u, g)
2547            except OSError as e:
2548                raise ExtractError("could not change owner") from e
2549
2550    def chmod(self, tarinfo, targetpath):
2551        """Set file permissions of targetpath according to tarinfo.
2552        """
2553        if tarinfo.mode is None:
2554            return
2555        try:
2556            os.chmod(targetpath, tarinfo.mode)
2557        except OSError as e:
2558            raise ExtractError("could not change mode") from e
2559
2560    def utime(self, tarinfo, targetpath):
2561        """Set modification time of targetpath according to tarinfo.
2562        """
2563        mtime = tarinfo.mtime
2564        if mtime is None:
2565            return
2566        if not hasattr(os, 'utime'):
2567            return
2568        try:
2569            os.utime(targetpath, (mtime, mtime))
2570        except OSError as e:
2571            raise ExtractError("could not change modification time") from e
2572
2573    #--------------------------------------------------------------------------
2574    def next(self):
2575        """Return the next member of the archive as a TarInfo object, when
2576           TarFile is opened for reading. Return None if there is no more
2577           available.
2578        """
2579        self._check("ra")
2580        if self.firstmember is not None:
2581            m = self.firstmember
2582            self.firstmember = None
2583            return m
2584
2585        # Advance the file pointer.
2586        if self.offset != self.fileobj.tell():
2587            if self.offset == 0:
2588                return None
2589            self.fileobj.seek(self.offset - 1)
2590            if not self.fileobj.read(1):
2591                raise ReadError("unexpected end of data")
2592
2593        # Read the next block.
2594        tarinfo = None
2595        while True:
2596            try:
2597                tarinfo = self.tarinfo.fromtarfile(self)
2598            except EOFHeaderError as e:
2599                if self.ignore_zeros:
2600                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2601                    self.offset += BLOCKSIZE
2602                    continue
2603            except InvalidHeaderError as e:
2604                if self.ignore_zeros:
2605                    self._dbg(2, "0x%X: %s" % (self.offset, e))
2606                    self.offset += BLOCKSIZE
2607                    continue
2608                elif self.offset == 0:
2609                    raise ReadError(str(e)) from None
2610            except EmptyHeaderError:
2611                if self.offset == 0:
2612                    raise ReadError("empty file") from None
2613            except TruncatedHeaderError as e:
2614                if self.offset == 0:
2615                    raise ReadError(str(e)) from None
2616            except SubsequentHeaderError as e:
2617                raise ReadError(str(e)) from None
2618            except Exception as e:
2619                try:
2620                    import zlib
2621                    if isinstance(e, zlib.error):
2622                        raise ReadError(f'zlib error: {e}') from None
2623                    else:
2624                        raise e
2625                except ImportError:
2626                    raise e
2627            break
2628
2629        if tarinfo is not None:
2630            self.members.append(tarinfo)
2631        else:
2632            self._loaded = True
2633
2634        return tarinfo
2635
2636    #--------------------------------------------------------------------------
2637    # Little helper methods:
2638
2639    def _getmember(self, name, tarinfo=None, normalize=False):
2640        """Find an archive member by name from bottom to top.
2641           If tarinfo is given, it is used as the starting point.
2642        """
2643        # Ensure that all members have been loaded.
2644        members = self.getmembers()
2645
2646        # Limit the member search list up to tarinfo.
2647        skipping = False
2648        if tarinfo is not None:
2649            try:
2650                index = members.index(tarinfo)
2651            except ValueError:
2652                # The given starting point might be a (modified) copy.
2653                # We'll later skip members until we find an equivalent.
2654                skipping = True
2655            else:
2656                # Happy fast path
2657                members = members[:index]
2658
2659        if normalize:
2660            name = os.path.normpath(name)
2661
2662        for member in reversed(members):
2663            if skipping:
2664                if tarinfo.offset == member.offset:
2665                    skipping = False
2666                continue
2667            if normalize:
2668                member_name = os.path.normpath(member.name)
2669            else:
2670                member_name = member.name
2671
2672            if name == member_name:
2673                return member
2674
2675        if skipping:
2676            # Starting point was not found
2677            raise ValueError(tarinfo)
2678
2679    def _load(self):
2680        """Read through the entire archive file and look for readable
2681           members.
2682        """
2683        while True:
2684            tarinfo = self.next()
2685            if tarinfo is None:
2686                break
2687        self._loaded = True
2688
2689    def _check(self, mode=None):
2690        """Check if TarFile is still open, and if the operation's mode
2691           corresponds to TarFile's mode.
2692        """
2693        if self.closed:
2694            raise OSError("%s is closed" % self.__class__.__name__)
2695        if mode is not None and self.mode not in mode:
2696            raise OSError("bad operation for mode %r" % self.mode)
2697
2698    def _find_link_target(self, tarinfo):
2699        """Find the target member of a symlink or hardlink member in the
2700           archive.
2701        """
2702        if tarinfo.issym():
2703            # Always search the entire archive.
2704            linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2705            limit = None
2706        else:
2707            # Search the archive before the link, because a hard link is
2708            # just a reference to an already archived file.
2709            linkname = tarinfo.linkname
2710            limit = tarinfo
2711
2712        member = self._getmember(linkname, tarinfo=limit, normalize=True)
2713        if member is None:
2714            raise KeyError("linkname %r not found" % linkname)
2715        return member
2716
2717    def __iter__(self):
2718        """Provide an iterator object.
2719        """
2720        if self._loaded:
2721            yield from self.members
2722            return
2723
2724        # Yield items using TarFile's next() method.
2725        # When all members have been read, set TarFile as _loaded.
2726        index = 0
2727        # Fix for SF #1100429: Under rare circumstances it can
2728        # happen that getmembers() is called during iteration,
2729        # which will have already exhausted the next() method.
2730        if self.firstmember is not None:
2731            tarinfo = self.next()
2732            index += 1
2733            yield tarinfo
2734
2735        while True:
2736            if index < len(self.members):
2737                tarinfo = self.members[index]
2738            elif not self._loaded:
2739                tarinfo = self.next()
2740                if not tarinfo:
2741                    self._loaded = True
2742                    return
2743            else:
2744                return
2745            index += 1
2746            yield tarinfo
2747
2748    def _dbg(self, level, msg):
2749        """Write debugging output to sys.stderr.
2750        """
2751        if level <= self.debug:
2752            print(msg, file=sys.stderr)
2753
2754    def __enter__(self):
2755        self._check()
2756        return self
2757
2758    def __exit__(self, type, value, traceback):
2759        if type is None:
2760            self.close()
2761        else:
2762            # An exception occurred. We must not call close() because
2763            # it would try to write end-of-archive blocks and padding.
2764            if not self._extfileobj:
2765                self.fileobj.close()
2766            self.closed = True
2767
2768#--------------------
2769# exported functions
2770#--------------------
2771
2772def is_tarfile(name):
2773    """Return True if name points to a tar archive that we
2774       are able to handle, else return False.
2775
2776       'name' should be a string, file, or file-like object.
2777    """
2778    try:
2779        if hasattr(name, "read"):
2780            pos = name.tell()
2781            t = open(fileobj=name)
2782            name.seek(pos)
2783        else:
2784            t = open(name)
2785        t.close()
2786        return True
2787    except TarError:
2788        return False
2789
2790open = TarFile.open
2791
2792
2793def main():
2794    import argparse
2795
2796    description = 'A simple command-line interface for tarfile module.'
2797    parser = argparse.ArgumentParser(description=description)
2798    parser.add_argument('-v', '--verbose', action='store_true', default=False,
2799                        help='Verbose output')
2800    parser.add_argument('--filter', metavar='<filtername>',
2801                        choices=_NAMED_FILTERS,
2802                        help='Filter for extraction')
2803
2804    group = parser.add_mutually_exclusive_group(required=True)
2805    group.add_argument('-l', '--list', metavar='<tarfile>',
2806                       help='Show listing of a tarfile')
2807    group.add_argument('-e', '--extract', nargs='+',
2808                       metavar=('<tarfile>', '<output_dir>'),
2809                       help='Extract tarfile into target dir')
2810    group.add_argument('-c', '--create', nargs='+',
2811                       metavar=('<name>', '<file>'),
2812                       help='Create tarfile from sources')
2813    group.add_argument('-t', '--test', metavar='<tarfile>',
2814                       help='Test if a tarfile is valid')
2815
2816    args = parser.parse_args()
2817
2818    if args.filter and args.extract is None:
2819        parser.exit(1, '--filter is only valid for extraction\n')
2820
2821    if args.test is not None:
2822        src = args.test
2823        if is_tarfile(src):
2824            with open(src, 'r') as tar:
2825                tar.getmembers()
2826                print(tar.getmembers(), file=sys.stderr)
2827            if args.verbose:
2828                print('{!r} is a tar archive.'.format(src))
2829        else:
2830            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2831
2832    elif args.list is not None:
2833        src = args.list
2834        if is_tarfile(src):
2835            with TarFile.open(src, 'r:*') as tf:
2836                tf.list(verbose=args.verbose)
2837        else:
2838            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2839
2840    elif args.extract is not None:
2841        if len(args.extract) == 1:
2842            src = args.extract[0]
2843            curdir = os.curdir
2844        elif len(args.extract) == 2:
2845            src, curdir = args.extract
2846        else:
2847            parser.exit(1, parser.format_help())
2848
2849        if is_tarfile(src):
2850            with TarFile.open(src, 'r:*') as tf:
2851                tf.extractall(path=curdir, filter=args.filter)
2852            if args.verbose:
2853                if curdir == '.':
2854                    msg = '{!r} file is extracted.'.format(src)
2855                else:
2856                    msg = ('{!r} file is extracted '
2857                           'into {!r} directory.').format(src, curdir)
2858                print(msg)
2859        else:
2860            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2861
2862    elif args.create is not None:
2863        tar_name = args.create.pop(0)
2864        _, ext = os.path.splitext(tar_name)
2865        compressions = {
2866            # gz
2867            '.gz': 'gz',
2868            '.tgz': 'gz',
2869            # xz
2870            '.xz': 'xz',
2871            '.txz': 'xz',
2872            # bz2
2873            '.bz2': 'bz2',
2874            '.tbz': 'bz2',
2875            '.tbz2': 'bz2',
2876            '.tb2': 'bz2',
2877        }
2878        tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2879        tar_files = args.create
2880
2881        with TarFile.open(tar_name, tar_mode) as tf:
2882            for file_name in tar_files:
2883                tf.add(file_name)
2884
2885        if args.verbose:
2886            print('{!r} file created.'.format(tar_name))
2887
2888if __name__ == '__main__':
2889    main()
2890