1"""
2Read and write ZIP files.
3
4XXX references to utf-8 need further investigation.
5"""
6import binascii
7import importlib.util
8import io
9import itertools
10import os
11import posixpath
12import shutil
13import stat
14import struct
15import sys
16import threading
17import time
18import contextlib
19import pathlib
20
21try:
22    import zlib # We may need its compression method
23    crc32 = zlib.crc32
24except ImportError:
25    zlib = None
26    crc32 = binascii.crc32
27
28try:
29    import bz2 # We may need its compression method
30except ImportError:
31    bz2 = None
32
33try:
34    import lzma # We may need its compression method
35except ImportError:
36    lzma = None
37
38__all__ = ["BadZipFile", "BadZipfile", "error",
39           "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA",
40           "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
41           "Path"]
42
43class BadZipFile(Exception):
44    pass
45
46
47class LargeZipFile(Exception):
48    """
49    Raised when writing a zipfile, the zipfile requires ZIP64 extensions
50    and those extensions are disabled.
51    """
52
53error = BadZipfile = BadZipFile      # Pre-3.2 compatibility names
54
55
56ZIP64_LIMIT = (1 << 31) - 1
57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1
58ZIP_MAX_COMMENT = (1 << 16) - 1
59
60# constants for Zip file compression methods
61ZIP_STORED = 0
62ZIP_DEFLATED = 8
63ZIP_BZIP2 = 12
64ZIP_LZMA = 14
65# Other ZIP compression methods not supported
66
67DEFAULT_VERSION = 20
68ZIP64_VERSION = 45
69BZIP2_VERSION = 46
70LZMA_VERSION = 63
71# we recognize (but not necessarily support) all features up to that version
72MAX_EXTRACT_VERSION = 63
73
74# Below are some formats and associated data for reading/writing headers using
75# the struct module.  The names and structures of headers/records are those used
76# in the PKWARE description of the ZIP file format:
77#     http://www.pkware.com/documents/casestudies/APPNOTE.TXT
78# (URL valid as of January 2008)
79
80# The "end of central directory" structure, magic number, size, and indices
81# (section V.I in the format document)
82structEndArchive = b"<4s4H2LH"
83stringEndArchive = b"PK\005\006"
84sizeEndCentDir = struct.calcsize(structEndArchive)
85
86_ECD_SIGNATURE = 0
87_ECD_DISK_NUMBER = 1
88_ECD_DISK_START = 2
89_ECD_ENTRIES_THIS_DISK = 3
90_ECD_ENTRIES_TOTAL = 4
91_ECD_SIZE = 5
92_ECD_OFFSET = 6
93_ECD_COMMENT_SIZE = 7
94# These last two indices are not part of the structure as defined in the
95# spec, but they are used internally by this module as a convenience
96_ECD_COMMENT = 8
97_ECD_LOCATION = 9
98
99# The "central directory" structure, magic number, size, and indices
100# of entries in the structure (section V.F in the format document)
101structCentralDir = "<4s4B4HL2L5H2L"
102stringCentralDir = b"PK\001\002"
103sizeCentralDir = struct.calcsize(structCentralDir)
104
105# indexes of entries in the central directory structure
106_CD_SIGNATURE = 0
107_CD_CREATE_VERSION = 1
108_CD_CREATE_SYSTEM = 2
109_CD_EXTRACT_VERSION = 3
110_CD_EXTRACT_SYSTEM = 4
111_CD_FLAG_BITS = 5
112_CD_COMPRESS_TYPE = 6
113_CD_TIME = 7
114_CD_DATE = 8
115_CD_CRC = 9
116_CD_COMPRESSED_SIZE = 10
117_CD_UNCOMPRESSED_SIZE = 11
118_CD_FILENAME_LENGTH = 12
119_CD_EXTRA_FIELD_LENGTH = 13
120_CD_COMMENT_LENGTH = 14
121_CD_DISK_NUMBER_START = 15
122_CD_INTERNAL_FILE_ATTRIBUTES = 16
123_CD_EXTERNAL_FILE_ATTRIBUTES = 17
124_CD_LOCAL_HEADER_OFFSET = 18
125
126# General purpose bit flags
127# Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes)
128_MASK_ENCRYPTED = 1 << 0
129# Bits 1 and 2 have different meanings depending on the compression used.
130_MASK_COMPRESS_OPTION_1 = 1 << 1
131# _MASK_COMPRESS_OPTION_2 = 1 << 2
132# _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed
133# size are zero in the local header and the real values are written in the data
134# descriptor immediately following the compressed data.
135_MASK_USE_DATA_DESCRIPTOR = 1 << 3
136# Bit 4: Reserved for use with compression method 8, for enhanced deflating.
137# _MASK_RESERVED_BIT_4 = 1 << 4
138_MASK_COMPRESSED_PATCH = 1 << 5
139_MASK_STRONG_ENCRYPTION = 1 << 6
140# _MASK_UNUSED_BIT_7 = 1 << 7
141# _MASK_UNUSED_BIT_8 = 1 << 8
142# _MASK_UNUSED_BIT_9 = 1 << 9
143# _MASK_UNUSED_BIT_10 = 1 << 10
144_MASK_UTF_FILENAME = 1 << 11
145# Bit 12: Reserved by PKWARE for enhanced compression.
146# _MASK_RESERVED_BIT_12 = 1 << 12
147# _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13
148# Bit 14, 15: Reserved by PKWARE
149# _MASK_RESERVED_BIT_14 = 1 << 14
150# _MASK_RESERVED_BIT_15 = 1 << 15
151
152# The "local file header" structure, magic number, size, and indices
153# (section V.A in the format document)
154structFileHeader = "<4s2B4HL2L2H"
155stringFileHeader = b"PK\003\004"
156sizeFileHeader = struct.calcsize(structFileHeader)
157
158_FH_SIGNATURE = 0
159_FH_EXTRACT_VERSION = 1
160_FH_EXTRACT_SYSTEM = 2
161_FH_GENERAL_PURPOSE_FLAG_BITS = 3
162_FH_COMPRESSION_METHOD = 4
163_FH_LAST_MOD_TIME = 5
164_FH_LAST_MOD_DATE = 6
165_FH_CRC = 7
166_FH_COMPRESSED_SIZE = 8
167_FH_UNCOMPRESSED_SIZE = 9
168_FH_FILENAME_LENGTH = 10
169_FH_EXTRA_FIELD_LENGTH = 11
170
171# The "Zip64 end of central directory locator" structure, magic number, and size
172structEndArchive64Locator = "<4sLQL"
173stringEndArchive64Locator = b"PK\x06\x07"
174sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
175
176# The "Zip64 end of central directory" record, magic number, size, and indices
177# (section V.G in the format document)
178structEndArchive64 = "<4sQ2H2L4Q"
179stringEndArchive64 = b"PK\x06\x06"
180sizeEndCentDir64 = struct.calcsize(structEndArchive64)
181
182_CD64_SIGNATURE = 0
183_CD64_DIRECTORY_RECSIZE = 1
184_CD64_CREATE_VERSION = 2
185_CD64_EXTRACT_VERSION = 3
186_CD64_DISK_NUMBER = 4
187_CD64_DISK_NUMBER_START = 5
188_CD64_NUMBER_ENTRIES_THIS_DISK = 6
189_CD64_NUMBER_ENTRIES_TOTAL = 7
190_CD64_DIRECTORY_SIZE = 8
191_CD64_OFFSET_START_CENTDIR = 9
192
193_DD_SIGNATURE = 0x08074b50
194
195_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
196
197def _strip_extra(extra, xids):
198    # Remove Extra Fields with specified IDs.
199    unpack = _EXTRA_FIELD_STRUCT.unpack
200    modified = False
201    buffer = []
202    start = i = 0
203    while i + 4 <= len(extra):
204        xid, xlen = unpack(extra[i : i + 4])
205        j = i + 4 + xlen
206        if xid in xids:
207            if i != start:
208                buffer.append(extra[start : i])
209            start = j
210            modified = True
211        i = j
212    if not modified:
213        return extra
214    if start != len(extra):
215        buffer.append(extra[start:])
216    return b''.join(buffer)
217
218def _check_zipfile(fp):
219    try:
220        if _EndRecData(fp):
221            return True         # file has correct magic number
222    except OSError:
223        pass
224    return False
225
226def is_zipfile(filename):
227    """Quickly see if a file is a ZIP file by checking the magic number.
228
229    The filename argument may be a file or file-like object too.
230    """
231    result = False
232    try:
233        if hasattr(filename, "read"):
234            result = _check_zipfile(fp=filename)
235        else:
236            with open(filename, "rb") as fp:
237                result = _check_zipfile(fp)
238    except OSError:
239        pass
240    return result
241
242def _EndRecData64(fpin, offset, endrec):
243    """
244    Read the ZIP64 end-of-archive records and use that to update endrec
245    """
246    try:
247        fpin.seek(offset - sizeEndCentDir64Locator, 2)
248    except OSError:
249        # If the seek fails, the file is not large enough to contain a ZIP64
250        # end-of-archive record, so just return the end record we were given.
251        return endrec
252
253    data = fpin.read(sizeEndCentDir64Locator)
254    if len(data) != sizeEndCentDir64Locator:
255        return endrec
256    sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
257    if sig != stringEndArchive64Locator:
258        return endrec
259
260    if diskno != 0 or disks > 1:
261        raise BadZipFile("zipfiles that span multiple disks are not supported")
262
263    # Assume no 'zip64 extensible data'
264    fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
265    data = fpin.read(sizeEndCentDir64)
266    if len(data) != sizeEndCentDir64:
267        return endrec
268    sig, sz, create_version, read_version, disk_num, disk_dir, \
269        dircount, dircount2, dirsize, diroffset = \
270        struct.unpack(structEndArchive64, data)
271    if sig != stringEndArchive64:
272        return endrec
273
274    # Update the original endrec using data from the ZIP64 record
275    endrec[_ECD_SIGNATURE] = sig
276    endrec[_ECD_DISK_NUMBER] = disk_num
277    endrec[_ECD_DISK_START] = disk_dir
278    endrec[_ECD_ENTRIES_THIS_DISK] = dircount
279    endrec[_ECD_ENTRIES_TOTAL] = dircount2
280    endrec[_ECD_SIZE] = dirsize
281    endrec[_ECD_OFFSET] = diroffset
282    return endrec
283
284
285def _EndRecData(fpin):
286    """Return data from the "End of Central Directory" record, or None.
287
288    The data is a list of the nine items in the ZIP "End of central dir"
289    record followed by a tenth item, the file seek offset of this record."""
290
291    # Determine file size
292    fpin.seek(0, 2)
293    filesize = fpin.tell()
294
295    # Check to see if this is ZIP file with no archive comment (the
296    # "end of central directory" structure should be the last item in the
297    # file if this is the case).
298    try:
299        fpin.seek(-sizeEndCentDir, 2)
300    except OSError:
301        return None
302    data = fpin.read()
303    if (len(data) == sizeEndCentDir and
304        data[0:4] == stringEndArchive and
305        data[-2:] == b"\000\000"):
306        # the signature is correct and there's no comment, unpack structure
307        endrec = struct.unpack(structEndArchive, data)
308        endrec=list(endrec)
309
310        # Append a blank comment and record start offset
311        endrec.append(b"")
312        endrec.append(filesize - sizeEndCentDir)
313
314        # Try to read the "Zip64 end of central directory" structure
315        return _EndRecData64(fpin, -sizeEndCentDir, endrec)
316
317    # Either this is not a ZIP file, or it is a ZIP file with an archive
318    # comment.  Search the end of the file for the "end of central directory"
319    # record signature. The comment is the last item in the ZIP file and may be
320    # up to 64K long.  It is assumed that the "end of central directory" magic
321    # number does not appear in the comment.
322    maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
323    fpin.seek(maxCommentStart, 0)
324    data = fpin.read()
325    start = data.rfind(stringEndArchive)
326    if start >= 0:
327        # found the magic number; attempt to unpack and interpret
328        recData = data[start:start+sizeEndCentDir]
329        if len(recData) != sizeEndCentDir:
330            # Zip file is corrupted.
331            return None
332        endrec = list(struct.unpack(structEndArchive, recData))
333        commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
334        comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
335        endrec.append(comment)
336        endrec.append(maxCommentStart + start)
337
338        # Try to read the "Zip64 end of central directory" structure
339        return _EndRecData64(fpin, maxCommentStart + start - filesize,
340                             endrec)
341
342    # Unable to find a valid end of central directory structure
343    return None
344
345
346class ZipInfo (object):
347    """Class with attributes describing each file in the ZIP archive."""
348
349    __slots__ = (
350        'orig_filename',
351        'filename',
352        'date_time',
353        'compress_type',
354        '_compresslevel',
355        'comment',
356        'extra',
357        'create_system',
358        'create_version',
359        'extract_version',
360        'reserved',
361        'flag_bits',
362        'volume',
363        'internal_attr',
364        'external_attr',
365        'header_offset',
366        'CRC',
367        'compress_size',
368        'file_size',
369        '_raw_time',
370    )
371
372    def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
373        self.orig_filename = filename   # Original file name in archive
374
375        # Terminate the file name at the first null byte.  Null bytes in file
376        # names are used as tricks by viruses in archives.
377        null_byte = filename.find(chr(0))
378        if null_byte >= 0:
379            filename = filename[0:null_byte]
380        # This is used to ensure paths in generated ZIP files always use
381        # forward slashes as the directory separator, as required by the
382        # ZIP format specification.
383        if os.sep != "/" and os.sep in filename:
384            filename = filename.replace(os.sep, "/")
385
386        self.filename = filename        # Normalized file name
387        self.date_time = date_time      # year, month, day, hour, min, sec
388
389        if date_time[0] < 1980:
390            raise ValueError('ZIP does not support timestamps before 1980')
391
392        # Standard values:
393        self.compress_type = ZIP_STORED # Type of compression for the file
394        self._compresslevel = None      # Level for the compressor
395        self.comment = b""              # Comment for each file
396        self.extra = b""                # ZIP extra data
397        if sys.platform == 'win32':
398            self.create_system = 0          # System which created ZIP archive
399        else:
400            # Assume everything else is unix-y
401            self.create_system = 3          # System which created ZIP archive
402        self.create_version = DEFAULT_VERSION  # Version which created ZIP archive
403        self.extract_version = DEFAULT_VERSION # Version needed to extract archive
404        self.reserved = 0               # Must be zero
405        self.flag_bits = 0              # ZIP flag bits
406        self.volume = 0                 # Volume number of file header
407        self.internal_attr = 0          # Internal attributes
408        self.external_attr = 0          # External file attributes
409        self.compress_size = 0          # Size of the compressed file
410        self.file_size = 0              # Size of the uncompressed file
411        # Other attributes are set by class ZipFile:
412        # header_offset         Byte offset to the file header
413        # CRC                   CRC-32 of the uncompressed file
414
415    def __repr__(self):
416        result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)]
417        if self.compress_type != ZIP_STORED:
418            result.append(' compress_type=%s' %
419                          compressor_names.get(self.compress_type,
420                                               self.compress_type))
421        hi = self.external_attr >> 16
422        lo = self.external_attr & 0xFFFF
423        if hi:
424            result.append(' filemode=%r' % stat.filemode(hi))
425        if lo:
426            result.append(' external_attr=%#x' % lo)
427        isdir = self.is_dir()
428        if not isdir or self.file_size:
429            result.append(' file_size=%r' % self.file_size)
430        if ((not isdir or self.compress_size) and
431            (self.compress_type != ZIP_STORED or
432             self.file_size != self.compress_size)):
433            result.append(' compress_size=%r' % self.compress_size)
434        result.append('>')
435        return ''.join(result)
436
437    def FileHeader(self, zip64=None):
438        """Return the per-file header as a bytes object.
439
440        When the optional zip64 arg is None rather than a bool, we will
441        decide based upon the file_size and compress_size, if known,
442        False otherwise.
443        """
444        dt = self.date_time
445        dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
446        dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
447        if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
448            # Set these to zero because we write them after the file data
449            CRC = compress_size = file_size = 0
450        else:
451            CRC = self.CRC
452            compress_size = self.compress_size
453            file_size = self.file_size
454
455        extra = self.extra
456
457        min_version = 0
458        if zip64 is None:
459            # We always explicitly pass zip64 within this module.... This
460            # remains for anyone using ZipInfo.FileHeader as a public API.
461            zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
462        if zip64:
463            fmt = '<HHQQ'
464            extra = extra + struct.pack(fmt,
465                                        1, struct.calcsize(fmt)-4, file_size, compress_size)
466            file_size = 0xffffffff
467            compress_size = 0xffffffff
468            min_version = ZIP64_VERSION
469
470        if self.compress_type == ZIP_BZIP2:
471            min_version = max(BZIP2_VERSION, min_version)
472        elif self.compress_type == ZIP_LZMA:
473            min_version = max(LZMA_VERSION, min_version)
474
475        self.extract_version = max(min_version, self.extract_version)
476        self.create_version = max(min_version, self.create_version)
477        filename, flag_bits = self._encodeFilenameFlags()
478        header = struct.pack(structFileHeader, stringFileHeader,
479                             self.extract_version, self.reserved, flag_bits,
480                             self.compress_type, dostime, dosdate, CRC,
481                             compress_size, file_size,
482                             len(filename), len(extra))
483        return header + filename + extra
484
485    def _encodeFilenameFlags(self):
486        try:
487            return self.filename.encode('ascii'), self.flag_bits
488        except UnicodeEncodeError:
489            return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME
490
491    def _decodeExtra(self):
492        # Try to decode the extra field.
493        extra = self.extra
494        unpack = struct.unpack
495        while len(extra) >= 4:
496            tp, ln = unpack('<HH', extra[:4])
497            if ln+4 > len(extra):
498                raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln))
499            if tp == 0x0001:
500                data = extra[4:ln+4]
501                # ZIP64 extension (large files and/or large archives)
502                try:
503                    if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF):
504                        field = "File size"
505                        self.file_size, = unpack('<Q', data[:8])
506                        data = data[8:]
507                    if self.compress_size == 0xFFFF_FFFF:
508                        field = "Compress size"
509                        self.compress_size, = unpack('<Q', data[:8])
510                        data = data[8:]
511                    if self.header_offset == 0xFFFF_FFFF:
512                        field = "Header offset"
513                        self.header_offset, = unpack('<Q', data[:8])
514                except struct.error:
515                    raise BadZipFile(f"Corrupt zip64 extra field. "
516                                     f"{field} not found.") from None
517
518            extra = extra[ln+4:]
519
520    @classmethod
521    def from_file(cls, filename, arcname=None, *, strict_timestamps=True):
522        """Construct an appropriate ZipInfo for a file on the filesystem.
523
524        filename should be the path to a file or directory on the filesystem.
525
526        arcname is the name which it will have within the archive (by default,
527        this will be the same as filename, but without a drive letter and with
528        leading path separators removed).
529        """
530        if isinstance(filename, os.PathLike):
531            filename = os.fspath(filename)
532        st = os.stat(filename)
533        isdir = stat.S_ISDIR(st.st_mode)
534        mtime = time.localtime(st.st_mtime)
535        date_time = mtime[0:6]
536        if not strict_timestamps and date_time[0] < 1980:
537            date_time = (1980, 1, 1, 0, 0, 0)
538        elif not strict_timestamps and date_time[0] > 2107:
539            date_time = (2107, 12, 31, 23, 59, 59)
540        # Create ZipInfo instance to store file information
541        if arcname is None:
542            arcname = filename
543        arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
544        while arcname[0] in (os.sep, os.altsep):
545            arcname = arcname[1:]
546        if isdir:
547            arcname += '/'
548        zinfo = cls(arcname, date_time)
549        zinfo.external_attr = (st.st_mode & 0xFFFF) << 16  # Unix attributes
550        if isdir:
551            zinfo.file_size = 0
552            zinfo.external_attr |= 0x10  # MS-DOS directory flag
553        else:
554            zinfo.file_size = st.st_size
555
556        return zinfo
557
558    def is_dir(self):
559        """Return True if this archive member is a directory."""
560        return self.filename[-1] == '/'
561
562
563# ZIP encryption uses the CRC32 one-byte primitive for scrambling some
564# internal keys. We noticed that a direct implementation is faster than
565# relying on binascii.crc32().
566
567_crctable = None
568def _gen_crc(crc):
569    for j in range(8):
570        if crc & 1:
571            crc = (crc >> 1) ^ 0xEDB88320
572        else:
573            crc >>= 1
574    return crc
575
576# ZIP supports a password-based form of encryption. Even though known
577# plaintext attacks have been found against it, it is still useful
578# to be able to get data out of such a file.
579#
580# Usage:
581#     zd = _ZipDecrypter(mypwd)
582#     plain_bytes = zd(cypher_bytes)
583
584def _ZipDecrypter(pwd):
585    key0 = 305419896
586    key1 = 591751049
587    key2 = 878082192
588
589    global _crctable
590    if _crctable is None:
591        _crctable = list(map(_gen_crc, range(256)))
592    crctable = _crctable
593
594    def crc32(ch, crc):
595        """Compute the CRC32 primitive on one byte."""
596        return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF]
597
598    def update_keys(c):
599        nonlocal key0, key1, key2
600        key0 = crc32(c, key0)
601        key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF
602        key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF
603        key2 = crc32(key1 >> 24, key2)
604
605    for p in pwd:
606        update_keys(p)
607
608    def decrypter(data):
609        """Decrypt a bytes object."""
610        result = bytearray()
611        append = result.append
612        for c in data:
613            k = key2 | 2
614            c ^= ((k * (k^1)) >> 8) & 0xFF
615            update_keys(c)
616            append(c)
617        return bytes(result)
618
619    return decrypter
620
621
622class LZMACompressor:
623
624    def __init__(self):
625        self._comp = None
626
627    def _init(self):
628        props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1})
629        self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[
630            lzma._decode_filter_properties(lzma.FILTER_LZMA1, props)
631        ])
632        return struct.pack('<BBH', 9, 4, len(props)) + props
633
634    def compress(self, data):
635        if self._comp is None:
636            return self._init() + self._comp.compress(data)
637        return self._comp.compress(data)
638
639    def flush(self):
640        if self._comp is None:
641            return self._init() + self._comp.flush()
642        return self._comp.flush()
643
644
645class LZMADecompressor:
646
647    def __init__(self):
648        self._decomp = None
649        self._unconsumed = b''
650        self.eof = False
651
652    def decompress(self, data):
653        if self._decomp is None:
654            self._unconsumed += data
655            if len(self._unconsumed) <= 4:
656                return b''
657            psize, = struct.unpack('<H', self._unconsumed[2:4])
658            if len(self._unconsumed) <= 4 + psize:
659                return b''
660
661            self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[
662                lzma._decode_filter_properties(lzma.FILTER_LZMA1,
663                                               self._unconsumed[4:4 + psize])
664            ])
665            data = self._unconsumed[4 + psize:]
666            del self._unconsumed
667
668        result = self._decomp.decompress(data)
669        self.eof = self._decomp.eof
670        return result
671
672
673compressor_names = {
674    0: 'store',
675    1: 'shrink',
676    2: 'reduce',
677    3: 'reduce',
678    4: 'reduce',
679    5: 'reduce',
680    6: 'implode',
681    7: 'tokenize',
682    8: 'deflate',
683    9: 'deflate64',
684    10: 'implode',
685    12: 'bzip2',
686    14: 'lzma',
687    18: 'terse',
688    19: 'lz77',
689    97: 'wavpack',
690    98: 'ppmd',
691}
692
693def _check_compression(compression):
694    if compression == ZIP_STORED:
695        pass
696    elif compression == ZIP_DEFLATED:
697        if not zlib:
698            raise RuntimeError(
699                "Compression requires the (missing) zlib module")
700    elif compression == ZIP_BZIP2:
701        if not bz2:
702            raise RuntimeError(
703                "Compression requires the (missing) bz2 module")
704    elif compression == ZIP_LZMA:
705        if not lzma:
706            raise RuntimeError(
707                "Compression requires the (missing) lzma module")
708    else:
709        raise NotImplementedError("That compression method is not supported")
710
711
712def _get_compressor(compress_type, compresslevel=None):
713    if compress_type == ZIP_DEFLATED:
714        if compresslevel is not None:
715            return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
716        return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
717    elif compress_type == ZIP_BZIP2:
718        if compresslevel is not None:
719            return bz2.BZ2Compressor(compresslevel)
720        return bz2.BZ2Compressor()
721    # compresslevel is ignored for ZIP_LZMA
722    elif compress_type == ZIP_LZMA:
723        return LZMACompressor()
724    else:
725        return None
726
727
728def _get_decompressor(compress_type):
729    _check_compression(compress_type)
730    if compress_type == ZIP_STORED:
731        return None
732    elif compress_type == ZIP_DEFLATED:
733        return zlib.decompressobj(-15)
734    elif compress_type == ZIP_BZIP2:
735        return bz2.BZ2Decompressor()
736    elif compress_type == ZIP_LZMA:
737        return LZMADecompressor()
738    else:
739        descr = compressor_names.get(compress_type)
740        if descr:
741            raise NotImplementedError("compression type %d (%s)" % (compress_type, descr))
742        else:
743            raise NotImplementedError("compression type %d" % (compress_type,))
744
745
746class _SharedFile:
747    def __init__(self, file, pos, close, lock, writing):
748        self._file = file
749        self._pos = pos
750        self._close = close
751        self._lock = lock
752        self._writing = writing
753        self.seekable = file.seekable
754
755    def tell(self):
756        return self._pos
757
758    def seek(self, offset, whence=0):
759        with self._lock:
760            if self._writing():
761                raise ValueError("Can't reposition in the ZIP file while "
762                        "there is an open writing handle on it. "
763                        "Close the writing handle before trying to read.")
764            self._file.seek(offset, whence)
765            self._pos = self._file.tell()
766            return self._pos
767
768    def read(self, n=-1):
769        with self._lock:
770            if self._writing():
771                raise ValueError("Can't read from the ZIP file while there "
772                        "is an open writing handle on it. "
773                        "Close the writing handle before trying to read.")
774            self._file.seek(self._pos)
775            data = self._file.read(n)
776            self._pos = self._file.tell()
777            return data
778
779    def close(self):
780        if self._file is not None:
781            fileobj = self._file
782            self._file = None
783            self._close(fileobj)
784
785# Provide the tell method for unseekable stream
786class _Tellable:
787    def __init__(self, fp):
788        self.fp = fp
789        self.offset = 0
790
791    def write(self, data):
792        n = self.fp.write(data)
793        self.offset += n
794        return n
795
796    def tell(self):
797        return self.offset
798
799    def flush(self):
800        self.fp.flush()
801
802    def close(self):
803        self.fp.close()
804
805
806class ZipExtFile(io.BufferedIOBase):
807    """File-like object for reading an archive member.
808       Is returned by ZipFile.open().
809    """
810
811    # Max size supported by decompressor.
812    MAX_N = 1 << 31 - 1
813
814    # Read from compressed files in 4k blocks.
815    MIN_READ_SIZE = 4096
816
817    # Chunk size to read during seek
818    MAX_SEEK_READ = 1 << 24
819
820    def __init__(self, fileobj, mode, zipinfo, pwd=None,
821                 close_fileobj=False):
822        self._fileobj = fileobj
823        self._pwd = pwd
824        self._close_fileobj = close_fileobj
825
826        self._compress_type = zipinfo.compress_type
827        self._compress_left = zipinfo.compress_size
828        self._left = zipinfo.file_size
829
830        self._decompressor = _get_decompressor(self._compress_type)
831
832        self._eof = False
833        self._readbuffer = b''
834        self._offset = 0
835
836        self.newlines = None
837
838        self.mode = mode
839        self.name = zipinfo.filename
840
841        if hasattr(zipinfo, 'CRC'):
842            self._expected_crc = zipinfo.CRC
843            self._running_crc = crc32(b'')
844        else:
845            self._expected_crc = None
846
847        self._seekable = False
848        try:
849            if fileobj.seekable():
850                self._orig_compress_start = fileobj.tell()
851                self._orig_compress_size = zipinfo.compress_size
852                self._orig_file_size = zipinfo.file_size
853                self._orig_start_crc = self._running_crc
854                self._seekable = True
855        except AttributeError:
856            pass
857
858        self._decrypter = None
859        if pwd:
860            if zipinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
861                # compare against the file type from extended local headers
862                check_byte = (zipinfo._raw_time >> 8) & 0xff
863            else:
864                # compare against the CRC otherwise
865                check_byte = (zipinfo.CRC >> 24) & 0xff
866            h = self._init_decrypter()
867            if h != check_byte:
868                raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename)
869
870
871    def _init_decrypter(self):
872        self._decrypter = _ZipDecrypter(self._pwd)
873        # The first 12 bytes in the cypher stream is an encryption header
874        #  used to strengthen the algorithm. The first 11 bytes are
875        #  completely random, while the 12th contains the MSB of the CRC,
876        #  or the MSB of the file time depending on the header type
877        #  and is used to check the correctness of the password.
878        header = self._fileobj.read(12)
879        self._compress_left -= 12
880        return self._decrypter(header)[11]
881
882    def __repr__(self):
883        result = ['<%s.%s' % (self.__class__.__module__,
884                              self.__class__.__qualname__)]
885        if not self.closed:
886            result.append(' name=%r mode=%r' % (self.name, self.mode))
887            if self._compress_type != ZIP_STORED:
888                result.append(' compress_type=%s' %
889                              compressor_names.get(self._compress_type,
890                                                   self._compress_type))
891        else:
892            result.append(' [closed]')
893        result.append('>')
894        return ''.join(result)
895
896    def readline(self, limit=-1):
897        """Read and return a line from the stream.
898
899        If limit is specified, at most limit bytes will be read.
900        """
901
902        if limit < 0:
903            # Shortcut common case - newline found in buffer.
904            i = self._readbuffer.find(b'\n', self._offset) + 1
905            if i > 0:
906                line = self._readbuffer[self._offset: i]
907                self._offset = i
908                return line
909
910        return io.BufferedIOBase.readline(self, limit)
911
912    def peek(self, n=1):
913        """Returns buffered bytes without advancing the position."""
914        if n > len(self._readbuffer) - self._offset:
915            chunk = self.read(n)
916            if len(chunk) > self._offset:
917                self._readbuffer = chunk + self._readbuffer[self._offset:]
918                self._offset = 0
919            else:
920                self._offset -= len(chunk)
921
922        # Return up to 512 bytes to reduce allocation overhead for tight loops.
923        return self._readbuffer[self._offset: self._offset + 512]
924
925    def readable(self):
926        if self.closed:
927            raise ValueError("I/O operation on closed file.")
928        return True
929
930    def read(self, n=-1):
931        """Read and return up to n bytes.
932        If the argument is omitted, None, or negative, data is read and returned until EOF is reached.
933        """
934        if self.closed:
935            raise ValueError("read from closed file.")
936        if n is None or n < 0:
937            buf = self._readbuffer[self._offset:]
938            self._readbuffer = b''
939            self._offset = 0
940            while not self._eof:
941                buf += self._read1(self.MAX_N)
942            return buf
943
944        end = n + self._offset
945        if end < len(self._readbuffer):
946            buf = self._readbuffer[self._offset:end]
947            self._offset = end
948            return buf
949
950        n = end - len(self._readbuffer)
951        buf = self._readbuffer[self._offset:]
952        self._readbuffer = b''
953        self._offset = 0
954        while n > 0 and not self._eof:
955            data = self._read1(n)
956            if n < len(data):
957                self._readbuffer = data
958                self._offset = n
959                buf += data[:n]
960                break
961            buf += data
962            n -= len(data)
963        return buf
964
965    def _update_crc(self, newdata):
966        # Update the CRC using the given data.
967        if self._expected_crc is None:
968            # No need to compute the CRC if we don't have a reference value
969            return
970        self._running_crc = crc32(newdata, self._running_crc)
971        # Check the CRC if we're at the end of the file
972        if self._eof and self._running_crc != self._expected_crc:
973            raise BadZipFile("Bad CRC-32 for file %r" % self.name)
974
975    def read1(self, n):
976        """Read up to n bytes with at most one read() system call."""
977
978        if n is None or n < 0:
979            buf = self._readbuffer[self._offset:]
980            self._readbuffer = b''
981            self._offset = 0
982            while not self._eof:
983                data = self._read1(self.MAX_N)
984                if data:
985                    buf += data
986                    break
987            return buf
988
989        end = n + self._offset
990        if end < len(self._readbuffer):
991            buf = self._readbuffer[self._offset:end]
992            self._offset = end
993            return buf
994
995        n = end - len(self._readbuffer)
996        buf = self._readbuffer[self._offset:]
997        self._readbuffer = b''
998        self._offset = 0
999        if n > 0:
1000            while not self._eof:
1001                data = self._read1(n)
1002                if n < len(data):
1003                    self._readbuffer = data
1004                    self._offset = n
1005                    buf += data[:n]
1006                    break
1007                if data:
1008                    buf += data
1009                    break
1010        return buf
1011
1012    def _read1(self, n):
1013        # Read up to n compressed bytes with at most one read() system call,
1014        # decrypt and decompress them.
1015        if self._eof or n <= 0:
1016            return b''
1017
1018        # Read from file.
1019        if self._compress_type == ZIP_DEFLATED:
1020            ## Handle unconsumed data.
1021            data = self._decompressor.unconsumed_tail
1022            if n > len(data):
1023                data += self._read2(n - len(data))
1024        else:
1025            data = self._read2(n)
1026
1027        if self._compress_type == ZIP_STORED:
1028            self._eof = self._compress_left <= 0
1029        elif self._compress_type == ZIP_DEFLATED:
1030            n = max(n, self.MIN_READ_SIZE)
1031            data = self._decompressor.decompress(data, n)
1032            self._eof = (self._decompressor.eof or
1033                         self._compress_left <= 0 and
1034                         not self._decompressor.unconsumed_tail)
1035            if self._eof:
1036                data += self._decompressor.flush()
1037        else:
1038            data = self._decompressor.decompress(data)
1039            self._eof = self._decompressor.eof or self._compress_left <= 0
1040
1041        data = data[:self._left]
1042        self._left -= len(data)
1043        if self._left <= 0:
1044            self._eof = True
1045        self._update_crc(data)
1046        return data
1047
1048    def _read2(self, n):
1049        if self._compress_left <= 0:
1050            return b''
1051
1052        n = max(n, self.MIN_READ_SIZE)
1053        n = min(n, self._compress_left)
1054
1055        data = self._fileobj.read(n)
1056        self._compress_left -= len(data)
1057        if not data:
1058            raise EOFError
1059
1060        if self._decrypter is not None:
1061            data = self._decrypter(data)
1062        return data
1063
1064    def close(self):
1065        try:
1066            if self._close_fileobj:
1067                self._fileobj.close()
1068        finally:
1069            super().close()
1070
1071    def seekable(self):
1072        if self.closed:
1073            raise ValueError("I/O operation on closed file.")
1074        return self._seekable
1075
1076    def seek(self, offset, whence=0):
1077        if self.closed:
1078            raise ValueError("seek on closed file.")
1079        if not self._seekable:
1080            raise io.UnsupportedOperation("underlying stream is not seekable")
1081        curr_pos = self.tell()
1082        if whence == 0: # Seek from start of file
1083            new_pos = offset
1084        elif whence == 1: # Seek from current position
1085            new_pos = curr_pos + offset
1086        elif whence == 2: # Seek from EOF
1087            new_pos = self._orig_file_size + offset
1088        else:
1089            raise ValueError("whence must be os.SEEK_SET (0), "
1090                             "os.SEEK_CUR (1), or os.SEEK_END (2)")
1091
1092        if new_pos > self._orig_file_size:
1093            new_pos = self._orig_file_size
1094
1095        if new_pos < 0:
1096            new_pos = 0
1097
1098        read_offset = new_pos - curr_pos
1099        buff_offset = read_offset + self._offset
1100
1101        if buff_offset >= 0 and buff_offset < len(self._readbuffer):
1102            # Just move the _offset index if the new position is in the _readbuffer
1103            self._offset = buff_offset
1104            read_offset = 0
1105        elif read_offset < 0:
1106            # Position is before the current position. Reset the ZipExtFile
1107            self._fileobj.seek(self._orig_compress_start)
1108            self._running_crc = self._orig_start_crc
1109            self._compress_left = self._orig_compress_size
1110            self._left = self._orig_file_size
1111            self._readbuffer = b''
1112            self._offset = 0
1113            self._decompressor = _get_decompressor(self._compress_type)
1114            self._eof = False
1115            read_offset = new_pos
1116            if self._decrypter is not None:
1117                self._init_decrypter()
1118
1119        while read_offset > 0:
1120            read_len = min(self.MAX_SEEK_READ, read_offset)
1121            self.read(read_len)
1122            read_offset -= read_len
1123
1124        return self.tell()
1125
1126    def tell(self):
1127        if self.closed:
1128            raise ValueError("tell on closed file.")
1129        if not self._seekable:
1130            raise io.UnsupportedOperation("underlying stream is not seekable")
1131        filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
1132        return filepos
1133
1134
1135class _ZipWriteFile(io.BufferedIOBase):
1136    def __init__(self, zf, zinfo, zip64):
1137        self._zinfo = zinfo
1138        self._zip64 = zip64
1139        self._zipfile = zf
1140        self._compressor = _get_compressor(zinfo.compress_type,
1141                                           zinfo._compresslevel)
1142        self._file_size = 0
1143        self._compress_size = 0
1144        self._crc = 0
1145
1146    @property
1147    def _fileobj(self):
1148        return self._zipfile.fp
1149
1150    def writable(self):
1151        return True
1152
1153    def write(self, data):
1154        if self.closed:
1155            raise ValueError('I/O operation on closed file.')
1156
1157        # Accept any data that supports the buffer protocol
1158        if isinstance(data, (bytes, bytearray)):
1159            nbytes = len(data)
1160        else:
1161            data = memoryview(data)
1162            nbytes = data.nbytes
1163        self._file_size += nbytes
1164
1165        self._crc = crc32(data, self._crc)
1166        if self._compressor:
1167            data = self._compressor.compress(data)
1168            self._compress_size += len(data)
1169        self._fileobj.write(data)
1170        return nbytes
1171
1172    def close(self):
1173        if self.closed:
1174            return
1175        try:
1176            super().close()
1177            # Flush any data from the compressor, and update header info
1178            if self._compressor:
1179                buf = self._compressor.flush()
1180                self._compress_size += len(buf)
1181                self._fileobj.write(buf)
1182                self._zinfo.compress_size = self._compress_size
1183            else:
1184                self._zinfo.compress_size = self._file_size
1185            self._zinfo.CRC = self._crc
1186            self._zinfo.file_size = self._file_size
1187
1188            if not self._zip64:
1189                if self._file_size > ZIP64_LIMIT:
1190                    raise RuntimeError("File size too large, try using force_zip64")
1191                if self._compress_size > ZIP64_LIMIT:
1192                    raise RuntimeError("Compressed size too large, try using force_zip64")
1193
1194            # Write updated header info
1195            if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
1196                # Write CRC and file sizes after the file data
1197                fmt = '<LLQQ' if self._zip64 else '<LLLL'
1198                self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC,
1199                    self._zinfo.compress_size, self._zinfo.file_size))
1200                self._zipfile.start_dir = self._fileobj.tell()
1201            else:
1202                # Seek backwards and write file header (which will now include
1203                # correct CRC and file sizes)
1204
1205                # Preserve current position in file
1206                self._zipfile.start_dir = self._fileobj.tell()
1207                self._fileobj.seek(self._zinfo.header_offset)
1208                self._fileobj.write(self._zinfo.FileHeader(self._zip64))
1209                self._fileobj.seek(self._zipfile.start_dir)
1210
1211            # Successfully written: Add file to our caches
1212            self._zipfile.filelist.append(self._zinfo)
1213            self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
1214        finally:
1215            self._zipfile._writing = False
1216
1217
1218
1219class ZipFile:
1220    """ Class with methods to open, read, write, close, list zip files.
1221
1222    z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
1223                compresslevel=None)
1224
1225    file: Either the path to the file, or a file-like object.
1226          If it is a path, the file will be opened and closed by ZipFile.
1227    mode: The mode can be either read 'r', write 'w', exclusive create 'x',
1228          or append 'a'.
1229    compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
1230                 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma).
1231    allowZip64: if True ZipFile will create files with ZIP64 extensions when
1232                needed, otherwise it will raise an exception when this would
1233                be necessary.
1234    compresslevel: None (default for the given compression type) or an integer
1235                   specifying the level to pass to the compressor.
1236                   When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
1237                   When using ZIP_DEFLATED integers 0 through 9 are accepted.
1238                   When using ZIP_BZIP2 integers 1 through 9 are accepted.
1239
1240    """
1241
1242    fp = None                   # Set here since __del__ checks it
1243    _windows_illegal_name_trans_table = None
1244
1245    def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
1246                 compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
1247        """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
1248        or append 'a'."""
1249        if mode not in ('r', 'w', 'x', 'a'):
1250            raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'")
1251
1252        _check_compression(compression)
1253
1254        self._allowZip64 = allowZip64
1255        self._didModify = False
1256        self.debug = 0  # Level of printing: 0 through 3
1257        self.NameToInfo = {}    # Find file info given name
1258        self.filelist = []      # List of ZipInfo instances for archive
1259        self.compression = compression  # Method of compression
1260        self.compresslevel = compresslevel
1261        self.mode = mode
1262        self.pwd = None
1263        self._comment = b''
1264        self._strict_timestamps = strict_timestamps
1265        self.metadata_encoding = metadata_encoding
1266
1267        # Check that we don't try to write with nonconforming codecs
1268        if self.metadata_encoding and mode != 'r':
1269            raise ValueError(
1270                "metadata_encoding is only supported for reading files")
1271
1272        # Check if we were passed a file-like object
1273        if isinstance(file, os.PathLike):
1274            file = os.fspath(file)
1275        if isinstance(file, str):
1276            # No, it's a filename
1277            self._filePassed = 0
1278            self.filename = file
1279            modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b',
1280                        'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'}
1281            filemode = modeDict[mode]
1282            while True:
1283                try:
1284                    self.fp = io.open(file, filemode)
1285                except OSError:
1286                    if filemode in modeDict:
1287                        filemode = modeDict[filemode]
1288                        continue
1289                    raise
1290                break
1291        else:
1292            self._filePassed = 1
1293            self.fp = file
1294            self.filename = getattr(file, 'name', None)
1295        self._fileRefCnt = 1
1296        self._lock = threading.RLock()
1297        self._seekable = True
1298        self._writing = False
1299
1300        try:
1301            if mode == 'r':
1302                self._RealGetContents()
1303            elif mode in ('w', 'x'):
1304                # set the modified flag so central directory gets written
1305                # even if no files are added to the archive
1306                self._didModify = True
1307                try:
1308                    self.start_dir = self.fp.tell()
1309                except (AttributeError, OSError):
1310                    self.fp = _Tellable(self.fp)
1311                    self.start_dir = 0
1312                    self._seekable = False
1313                else:
1314                    # Some file-like objects can provide tell() but not seek()
1315                    try:
1316                        self.fp.seek(self.start_dir)
1317                    except (AttributeError, OSError):
1318                        self._seekable = False
1319            elif mode == 'a':
1320                try:
1321                    # See if file is a zip file
1322                    self._RealGetContents()
1323                    # seek to start of directory and overwrite
1324                    self.fp.seek(self.start_dir)
1325                except BadZipFile:
1326                    # file is not a zip file, just append
1327                    self.fp.seek(0, 2)
1328
1329                    # set the modified flag so central directory gets written
1330                    # even if no files are added to the archive
1331                    self._didModify = True
1332                    self.start_dir = self.fp.tell()
1333            else:
1334                raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
1335        except:
1336            fp = self.fp
1337            self.fp = None
1338            self._fpclose(fp)
1339            raise
1340
1341    def __enter__(self):
1342        return self
1343
1344    def __exit__(self, type, value, traceback):
1345        self.close()
1346
1347    def __repr__(self):
1348        result = ['<%s.%s' % (self.__class__.__module__,
1349                              self.__class__.__qualname__)]
1350        if self.fp is not None:
1351            if self._filePassed:
1352                result.append(' file=%r' % self.fp)
1353            elif self.filename is not None:
1354                result.append(' filename=%r' % self.filename)
1355            result.append(' mode=%r' % self.mode)
1356        else:
1357            result.append(' [closed]')
1358        result.append('>')
1359        return ''.join(result)
1360
1361    def _RealGetContents(self):
1362        """Read in the table of contents for the ZIP file."""
1363        fp = self.fp
1364        try:
1365            endrec = _EndRecData(fp)
1366        except OSError:
1367            raise BadZipFile("File is not a zip file")
1368        if not endrec:
1369            raise BadZipFile("File is not a zip file")
1370        if self.debug > 1:
1371            print(endrec)
1372        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
1373        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
1374        self._comment = endrec[_ECD_COMMENT]    # archive comment
1375
1376        # "concat" is zero, unless zip was concatenated to another file
1377        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
1378        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
1379            # If Zip64 extension structures are present, account for them
1380            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
1381
1382        if self.debug > 2:
1383            inferred = concat + offset_cd
1384            print("given, inferred, offset", offset_cd, inferred, concat)
1385        # self.start_dir:  Position of start of central directory
1386        self.start_dir = offset_cd + concat
1387        if self.start_dir < 0:
1388            raise BadZipFile("Bad offset for central directory")
1389        fp.seek(self.start_dir, 0)
1390        data = fp.read(size_cd)
1391        fp = io.BytesIO(data)
1392        total = 0
1393        while total < size_cd:
1394            centdir = fp.read(sizeCentralDir)
1395            if len(centdir) != sizeCentralDir:
1396                raise BadZipFile("Truncated central directory")
1397            centdir = struct.unpack(structCentralDir, centdir)
1398            if centdir[_CD_SIGNATURE] != stringCentralDir:
1399                raise BadZipFile("Bad magic number for central directory")
1400            if self.debug > 2:
1401                print(centdir)
1402            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
1403            flags = centdir[_CD_FLAG_BITS]
1404            if flags & _MASK_UTF_FILENAME:
1405                # UTF-8 file names extension
1406                filename = filename.decode('utf-8')
1407            else:
1408                # Historical ZIP filename encoding
1409                filename = filename.decode(self.metadata_encoding or 'cp437')
1410            # Create ZipInfo instance to store file information
1411            x = ZipInfo(filename)
1412            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
1413            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
1414            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
1415            (x.create_version, x.create_system, x.extract_version, x.reserved,
1416             x.flag_bits, x.compress_type, t, d,
1417             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
1418            if x.extract_version > MAX_EXTRACT_VERSION:
1419                raise NotImplementedError("zip file version %.1f" %
1420                                          (x.extract_version / 10))
1421            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
1422            # Convert date/time code to (year, month, day, hour, min, sec)
1423            x._raw_time = t
1424            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
1425                            t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
1426
1427            x._decodeExtra()
1428            x.header_offset = x.header_offset + concat
1429            self.filelist.append(x)
1430            self.NameToInfo[x.filename] = x
1431
1432            # update total bytes read from central directory
1433            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
1434                     + centdir[_CD_EXTRA_FIELD_LENGTH]
1435                     + centdir[_CD_COMMENT_LENGTH])
1436
1437            if self.debug > 2:
1438                print("total", total)
1439
1440
1441    def namelist(self):
1442        """Return a list of file names in the archive."""
1443        return [data.filename for data in self.filelist]
1444
1445    def infolist(self):
1446        """Return a list of class ZipInfo instances for files in the
1447        archive."""
1448        return self.filelist
1449
1450    def printdir(self, file=None):
1451        """Print a table of contents for the zip file."""
1452        print("%-46s %19s %12s" % ("File Name", "Modified    ", "Size"),
1453              file=file)
1454        for zinfo in self.filelist:
1455            date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
1456            print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
1457                  file=file)
1458
1459    def testzip(self):
1460        """Read all the files and check the CRC."""
1461        chunk_size = 2 ** 20
1462        for zinfo in self.filelist:
1463            try:
1464                # Read by chunks, to avoid an OverflowError or a
1465                # MemoryError with very large embedded files.
1466                with self.open(zinfo.filename, "r") as f:
1467                    while f.read(chunk_size):     # Check CRC-32
1468                        pass
1469            except BadZipFile:
1470                return zinfo.filename
1471
1472    def getinfo(self, name):
1473        """Return the instance of ZipInfo given 'name'."""
1474        info = self.NameToInfo.get(name)
1475        if info is None:
1476            raise KeyError(
1477                'There is no item named %r in the archive' % name)
1478
1479        return info
1480
1481    def setpassword(self, pwd):
1482        """Set default password for encrypted files."""
1483        if pwd and not isinstance(pwd, bytes):
1484            raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1485        if pwd:
1486            self.pwd = pwd
1487        else:
1488            self.pwd = None
1489
1490    @property
1491    def comment(self):
1492        """The comment text associated with the ZIP file."""
1493        return self._comment
1494
1495    @comment.setter
1496    def comment(self, comment):
1497        if not isinstance(comment, bytes):
1498            raise TypeError("comment: expected bytes, got %s" % type(comment).__name__)
1499        # check for valid comment length
1500        if len(comment) > ZIP_MAX_COMMENT:
1501            import warnings
1502            warnings.warn('Archive comment is too long; truncating to %d bytes'
1503                          % ZIP_MAX_COMMENT, stacklevel=2)
1504            comment = comment[:ZIP_MAX_COMMENT]
1505        self._comment = comment
1506        self._didModify = True
1507
1508    def read(self, name, pwd=None):
1509        """Return file bytes for name."""
1510        with self.open(name, "r", pwd) as fp:
1511            return fp.read()
1512
1513    def open(self, name, mode="r", pwd=None, *, force_zip64=False):
1514        """Return file-like object for 'name'.
1515
1516        name is a string for the file name within the ZIP file, or a ZipInfo
1517        object.
1518
1519        mode should be 'r' to read a file already in the ZIP file, or 'w' to
1520        write to a file newly added to the archive.
1521
1522        pwd is the password to decrypt files (only used for reading).
1523
1524        When writing, if the file size is not known in advance but may exceed
1525        2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
1526        files.  If the size is known in advance, it is best to pass a ZipInfo
1527        instance for name, with zinfo.file_size set.
1528        """
1529        if mode not in {"r", "w"}:
1530            raise ValueError('open() requires mode "r" or "w"')
1531        if pwd and (mode == "w"):
1532            raise ValueError("pwd is only supported for reading files")
1533        if not self.fp:
1534            raise ValueError(
1535                "Attempt to use ZIP archive that was already closed")
1536
1537        # Make sure we have an info object
1538        if isinstance(name, ZipInfo):
1539            # 'name' is already an info object
1540            zinfo = name
1541        elif mode == 'w':
1542            zinfo = ZipInfo(name)
1543            zinfo.compress_type = self.compression
1544            zinfo._compresslevel = self.compresslevel
1545        else:
1546            # Get info object for name
1547            zinfo = self.getinfo(name)
1548
1549        if mode == 'w':
1550            return self._open_to_write(zinfo, force_zip64=force_zip64)
1551
1552        if self._writing:
1553            raise ValueError("Can't read from the ZIP file while there "
1554                    "is an open writing handle on it. "
1555                    "Close the writing handle before trying to read.")
1556
1557        # Open for reading:
1558        self._fileRefCnt += 1
1559        zef_file = _SharedFile(self.fp, zinfo.header_offset,
1560                               self._fpclose, self._lock, lambda: self._writing)
1561        try:
1562            # Skip the file header:
1563            fheader = zef_file.read(sizeFileHeader)
1564            if len(fheader) != sizeFileHeader:
1565                raise BadZipFile("Truncated file header")
1566            fheader = struct.unpack(structFileHeader, fheader)
1567            if fheader[_FH_SIGNATURE] != stringFileHeader:
1568                raise BadZipFile("Bad magic number for file header")
1569
1570            fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
1571            if fheader[_FH_EXTRA_FIELD_LENGTH]:
1572                zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
1573
1574            if zinfo.flag_bits & _MASK_COMPRESSED_PATCH:
1575                # Zip 2.7: compressed patched data
1576                raise NotImplementedError("compressed patched data (flag bit 5)")
1577
1578            if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION:
1579                # strong encryption
1580                raise NotImplementedError("strong encryption (flag bit 6)")
1581
1582            if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME:
1583                # UTF-8 filename
1584                fname_str = fname.decode("utf-8")
1585            else:
1586                fname_str = fname.decode(self.metadata_encoding or "cp437")
1587
1588            if fname_str != zinfo.orig_filename:
1589                raise BadZipFile(
1590                    'File name in directory %r and header %r differ.'
1591                    % (zinfo.orig_filename, fname))
1592
1593            # check for encrypted flag & handle password
1594            is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED
1595            if is_encrypted:
1596                if not pwd:
1597                    pwd = self.pwd
1598                if pwd and not isinstance(pwd, bytes):
1599                    raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__)
1600                if not pwd:
1601                    raise RuntimeError("File %r is encrypted, password "
1602                                       "required for extraction" % name)
1603            else:
1604                pwd = None
1605
1606            return ZipExtFile(zef_file, mode, zinfo, pwd, True)
1607        except:
1608            zef_file.close()
1609            raise
1610
1611    def _open_to_write(self, zinfo, force_zip64=False):
1612        if force_zip64 and not self._allowZip64:
1613            raise ValueError(
1614                "force_zip64 is True, but allowZip64 was False when opening "
1615                "the ZIP file."
1616            )
1617        if self._writing:
1618            raise ValueError("Can't write to the ZIP file while there is "
1619                             "another write handle open on it. "
1620                             "Close the first handle before opening another.")
1621
1622        # Size and CRC are overwritten with correct data after processing the file
1623        zinfo.compress_size = 0
1624        zinfo.CRC = 0
1625
1626        zinfo.flag_bits = 0x00
1627        if zinfo.compress_type == ZIP_LZMA:
1628            # Compressed data includes an end-of-stream (EOS) marker
1629            zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
1630        if not self._seekable:
1631            zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR
1632
1633        if not zinfo.external_attr:
1634            zinfo.external_attr = 0o600 << 16  # permissions: ?rw-------
1635
1636        # Compressed size can be larger than uncompressed size
1637        zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT)
1638        if not self._allowZip64 and zip64:
1639            raise LargeZipFile("Filesize would require ZIP64 extensions")
1640
1641        if self._seekable:
1642            self.fp.seek(self.start_dir)
1643        zinfo.header_offset = self.fp.tell()
1644
1645        self._writecheck(zinfo)
1646        self._didModify = True
1647
1648        self.fp.write(zinfo.FileHeader(zip64))
1649
1650        self._writing = True
1651        return _ZipWriteFile(self, zinfo, zip64)
1652
1653    def extract(self, member, path=None, pwd=None):
1654        """Extract a member from the archive to the current working directory,
1655           using its full name. Its file information is extracted as accurately
1656           as possible. `member' may be a filename or a ZipInfo object. You can
1657           specify a different directory using `path'.
1658        """
1659        if path is None:
1660            path = os.getcwd()
1661        else:
1662            path = os.fspath(path)
1663
1664        return self._extract_member(member, path, pwd)
1665
1666    def extractall(self, path=None, members=None, pwd=None):
1667        """Extract all members from the archive to the current working
1668           directory. `path' specifies a different directory to extract to.
1669           `members' is optional and must be a subset of the list returned
1670           by namelist().
1671        """
1672        if members is None:
1673            members = self.namelist()
1674
1675        if path is None:
1676            path = os.getcwd()
1677        else:
1678            path = os.fspath(path)
1679
1680        for zipinfo in members:
1681            self._extract_member(zipinfo, path, pwd)
1682
1683    @classmethod
1684    def _sanitize_windows_name(cls, arcname, pathsep):
1685        """Replace bad characters and remove trailing dots from parts."""
1686        table = cls._windows_illegal_name_trans_table
1687        if not table:
1688            illegal = ':<>|"?*'
1689            table = str.maketrans(illegal, '_' * len(illegal))
1690            cls._windows_illegal_name_trans_table = table
1691        arcname = arcname.translate(table)
1692        # remove trailing dots
1693        arcname = (x.rstrip('.') for x in arcname.split(pathsep))
1694        # rejoin, removing empty parts.
1695        arcname = pathsep.join(x for x in arcname if x)
1696        return arcname
1697
1698    def _extract_member(self, member, targetpath, pwd):
1699        """Extract the ZipInfo object 'member' to a physical
1700           file on the path targetpath.
1701        """
1702        if not isinstance(member, ZipInfo):
1703            member = self.getinfo(member)
1704
1705        # build the destination pathname, replacing
1706        # forward slashes to platform specific separators.
1707        arcname = member.filename.replace('/', os.path.sep)
1708
1709        if os.path.altsep:
1710            arcname = arcname.replace(os.path.altsep, os.path.sep)
1711        # interpret absolute pathname as relative, remove drive letter or
1712        # UNC path, redundant separators, "." and ".." components.
1713        arcname = os.path.splitdrive(arcname)[1]
1714        invalid_path_parts = ('', os.path.curdir, os.path.pardir)
1715        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1716                                   if x not in invalid_path_parts)
1717        if os.path.sep == '\\':
1718            # filter illegal characters on Windows
1719            arcname = self._sanitize_windows_name(arcname, os.path.sep)
1720
1721        targetpath = os.path.join(targetpath, arcname)
1722        targetpath = os.path.normpath(targetpath)
1723
1724        # Create all upper directories if necessary.
1725        upperdirs = os.path.dirname(targetpath)
1726        if upperdirs and not os.path.exists(upperdirs):
1727            os.makedirs(upperdirs)
1728
1729        if member.is_dir():
1730            if not os.path.isdir(targetpath):
1731                os.mkdir(targetpath)
1732            return targetpath
1733
1734        with self.open(member, pwd=pwd) as source, \
1735             open(targetpath, "wb") as target:
1736            shutil.copyfileobj(source, target)
1737
1738        return targetpath
1739
1740    def _writecheck(self, zinfo):
1741        """Check for errors before writing a file to the archive."""
1742        if zinfo.filename in self.NameToInfo:
1743            import warnings
1744            warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3)
1745        if self.mode not in ('w', 'x', 'a'):
1746            raise ValueError("write() requires mode 'w', 'x', or 'a'")
1747        if not self.fp:
1748            raise ValueError(
1749                "Attempt to write ZIP archive that was already closed")
1750        _check_compression(zinfo.compress_type)
1751        if not self._allowZip64:
1752            requires_zip64 = None
1753            if len(self.filelist) >= ZIP_FILECOUNT_LIMIT:
1754                requires_zip64 = "Files count"
1755            elif zinfo.file_size > ZIP64_LIMIT:
1756                requires_zip64 = "Filesize"
1757            elif zinfo.header_offset > ZIP64_LIMIT:
1758                requires_zip64 = "Zipfile size"
1759            if requires_zip64:
1760                raise LargeZipFile(requires_zip64 +
1761                                   " would require ZIP64 extensions")
1762
1763    def write(self, filename, arcname=None,
1764              compress_type=None, compresslevel=None):
1765        """Put the bytes from filename into the archive under the name
1766        arcname."""
1767        if not self.fp:
1768            raise ValueError(
1769                "Attempt to write to ZIP archive that was already closed")
1770        if self._writing:
1771            raise ValueError(
1772                "Can't write to ZIP archive while an open writing handle exists"
1773            )
1774
1775        zinfo = ZipInfo.from_file(filename, arcname,
1776                                  strict_timestamps=self._strict_timestamps)
1777
1778        if zinfo.is_dir():
1779            zinfo.compress_size = 0
1780            zinfo.CRC = 0
1781            self.mkdir(zinfo)
1782        else:
1783            if compress_type is not None:
1784                zinfo.compress_type = compress_type
1785            else:
1786                zinfo.compress_type = self.compression
1787
1788            if compresslevel is not None:
1789                zinfo._compresslevel = compresslevel
1790            else:
1791                zinfo._compresslevel = self.compresslevel
1792
1793            with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
1794                shutil.copyfileobj(src, dest, 1024*8)
1795
1796    def writestr(self, zinfo_or_arcname, data,
1797                 compress_type=None, compresslevel=None):
1798        """Write a file into the archive.  The contents is 'data', which
1799        may be either a 'str' or a 'bytes' instance; if it is a 'str',
1800        it is encoded as UTF-8 first.
1801        'zinfo_or_arcname' is either a ZipInfo instance or
1802        the name of the file in the archive."""
1803        if isinstance(data, str):
1804            data = data.encode("utf-8")
1805        if not isinstance(zinfo_or_arcname, ZipInfo):
1806            zinfo = ZipInfo(filename=zinfo_or_arcname,
1807                            date_time=time.localtime(time.time())[:6])
1808            zinfo.compress_type = self.compression
1809            zinfo._compresslevel = self.compresslevel
1810            if zinfo.filename[-1] == '/':
1811                zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
1812                zinfo.external_attr |= 0x10           # MS-DOS directory flag
1813            else:
1814                zinfo.external_attr = 0o600 << 16     # ?rw-------
1815        else:
1816            zinfo = zinfo_or_arcname
1817
1818        if not self.fp:
1819            raise ValueError(
1820                "Attempt to write to ZIP archive that was already closed")
1821        if self._writing:
1822            raise ValueError(
1823                "Can't write to ZIP archive while an open writing handle exists."
1824            )
1825
1826        if compress_type is not None:
1827            zinfo.compress_type = compress_type
1828
1829        if compresslevel is not None:
1830            zinfo._compresslevel = compresslevel
1831
1832        zinfo.file_size = len(data)            # Uncompressed size
1833        with self._lock:
1834            with self.open(zinfo, mode='w') as dest:
1835                dest.write(data)
1836
1837    def mkdir(self, zinfo_or_directory_name, mode=511):
1838        """Creates a directory inside the zip archive."""
1839        if isinstance(zinfo_or_directory_name, ZipInfo):
1840            zinfo = zinfo_or_directory_name
1841            if not zinfo.is_dir():
1842                raise ValueError("The given ZipInfo does not describe a directory")
1843        elif isinstance(zinfo_or_directory_name, str):
1844            directory_name = zinfo_or_directory_name
1845            if not directory_name.endswith("/"):
1846                directory_name += "/"
1847            zinfo = ZipInfo(directory_name)
1848            zinfo.compress_size = 0
1849            zinfo.CRC = 0
1850            zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16
1851            zinfo.file_size = 0
1852            zinfo.external_attr |= 0x10
1853        else:
1854            raise TypeError("Expected type str or ZipInfo")
1855
1856        with self._lock:
1857            if self._seekable:
1858                self.fp.seek(self.start_dir)
1859            zinfo.header_offset = self.fp.tell()  # Start of header bytes
1860            if zinfo.compress_type == ZIP_LZMA:
1861            # Compressed data includes an end-of-stream (EOS) marker
1862                zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1
1863
1864            self._writecheck(zinfo)
1865            self._didModify = True
1866
1867            self.filelist.append(zinfo)
1868            self.NameToInfo[zinfo.filename] = zinfo
1869            self.fp.write(zinfo.FileHeader(False))
1870            self.start_dir = self.fp.tell()
1871
1872    def __del__(self):
1873        """Call the "close()" method in case the user forgot."""
1874        self.close()
1875
1876    def close(self):
1877        """Close the file, and for mode 'w', 'x' and 'a' write the ending
1878        records."""
1879        if self.fp is None:
1880            return
1881
1882        if self._writing:
1883            raise ValueError("Can't close the ZIP file while there is "
1884                             "an open writing handle on it. "
1885                             "Close the writing handle before closing the zip.")
1886
1887        try:
1888            if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
1889                with self._lock:
1890                    if self._seekable:
1891                        self.fp.seek(self.start_dir)
1892                    self._write_end_record()
1893        finally:
1894            fp = self.fp
1895            self.fp = None
1896            self._fpclose(fp)
1897
1898    def _write_end_record(self):
1899        for zinfo in self.filelist:         # write central directory
1900            dt = zinfo.date_time
1901            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1902            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1903            extra = []
1904            if zinfo.file_size > ZIP64_LIMIT \
1905               or zinfo.compress_size > ZIP64_LIMIT:
1906                extra.append(zinfo.file_size)
1907                extra.append(zinfo.compress_size)
1908                file_size = 0xffffffff
1909                compress_size = 0xffffffff
1910            else:
1911                file_size = zinfo.file_size
1912                compress_size = zinfo.compress_size
1913
1914            if zinfo.header_offset > ZIP64_LIMIT:
1915                extra.append(zinfo.header_offset)
1916                header_offset = 0xffffffff
1917            else:
1918                header_offset = zinfo.header_offset
1919
1920            extra_data = zinfo.extra
1921            min_version = 0
1922            if extra:
1923                # Append a ZIP64 field to the extra's
1924                extra_data = _strip_extra(extra_data, (1,))
1925                extra_data = struct.pack(
1926                    '<HH' + 'Q'*len(extra),
1927                    1, 8*len(extra), *extra) + extra_data
1928
1929                min_version = ZIP64_VERSION
1930
1931            if zinfo.compress_type == ZIP_BZIP2:
1932                min_version = max(BZIP2_VERSION, min_version)
1933            elif zinfo.compress_type == ZIP_LZMA:
1934                min_version = max(LZMA_VERSION, min_version)
1935
1936            extract_version = max(min_version, zinfo.extract_version)
1937            create_version = max(min_version, zinfo.create_version)
1938            filename, flag_bits = zinfo._encodeFilenameFlags()
1939            centdir = struct.pack(structCentralDir,
1940                                  stringCentralDir, create_version,
1941                                  zinfo.create_system, extract_version, zinfo.reserved,
1942                                  flag_bits, zinfo.compress_type, dostime, dosdate,
1943                                  zinfo.CRC, compress_size, file_size,
1944                                  len(filename), len(extra_data), len(zinfo.comment),
1945                                  0, zinfo.internal_attr, zinfo.external_attr,
1946                                  header_offset)
1947            self.fp.write(centdir)
1948            self.fp.write(filename)
1949            self.fp.write(extra_data)
1950            self.fp.write(zinfo.comment)
1951
1952        pos2 = self.fp.tell()
1953        # Write end-of-zip-archive record
1954        centDirCount = len(self.filelist)
1955        centDirSize = pos2 - self.start_dir
1956        centDirOffset = self.start_dir
1957        requires_zip64 = None
1958        if centDirCount > ZIP_FILECOUNT_LIMIT:
1959            requires_zip64 = "Files count"
1960        elif centDirOffset > ZIP64_LIMIT:
1961            requires_zip64 = "Central directory offset"
1962        elif centDirSize > ZIP64_LIMIT:
1963            requires_zip64 = "Central directory size"
1964        if requires_zip64:
1965            # Need to write the ZIP64 end-of-archive records
1966            if not self._allowZip64:
1967                raise LargeZipFile(requires_zip64 +
1968                                   " would require ZIP64 extensions")
1969            zip64endrec = struct.pack(
1970                structEndArchive64, stringEndArchive64,
1971                44, 45, 45, 0, 0, centDirCount, centDirCount,
1972                centDirSize, centDirOffset)
1973            self.fp.write(zip64endrec)
1974
1975            zip64locrec = struct.pack(
1976                structEndArchive64Locator,
1977                stringEndArchive64Locator, 0, pos2, 1)
1978            self.fp.write(zip64locrec)
1979            centDirCount = min(centDirCount, 0xFFFF)
1980            centDirSize = min(centDirSize, 0xFFFFFFFF)
1981            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1982
1983        endrec = struct.pack(structEndArchive, stringEndArchive,
1984                             0, 0, centDirCount, centDirCount,
1985                             centDirSize, centDirOffset, len(self._comment))
1986        self.fp.write(endrec)
1987        self.fp.write(self._comment)
1988        if self.mode == "a":
1989            self.fp.truncate()
1990        self.fp.flush()
1991
1992    def _fpclose(self, fp):
1993        assert self._fileRefCnt > 0
1994        self._fileRefCnt -= 1
1995        if not self._fileRefCnt and not self._filePassed:
1996            fp.close()
1997
1998
1999class PyZipFile(ZipFile):
2000    """Class to create ZIP archives with Python library files and packages."""
2001
2002    def __init__(self, file, mode="r", compression=ZIP_STORED,
2003                 allowZip64=True, optimize=-1):
2004        ZipFile.__init__(self, file, mode=mode, compression=compression,
2005                         allowZip64=allowZip64)
2006        self._optimize = optimize
2007
2008    def writepy(self, pathname, basename="", filterfunc=None):
2009        """Add all files from "pathname" to the ZIP archive.
2010
2011        If pathname is a package directory, search the directory and
2012        all package subdirectories recursively for all *.py and enter
2013        the modules into the archive.  If pathname is a plain
2014        directory, listdir *.py and enter all modules.  Else, pathname
2015        must be a Python *.py file and the module will be put into the
2016        archive.  Added modules are always module.pyc.
2017        This method will compile the module.py into module.pyc if
2018        necessary.
2019        If filterfunc(pathname) is given, it is called with every argument.
2020        When it is False, the file or directory is skipped.
2021        """
2022        pathname = os.fspath(pathname)
2023        if filterfunc and not filterfunc(pathname):
2024            if self.debug:
2025                label = 'path' if os.path.isdir(pathname) else 'file'
2026                print('%s %r skipped by filterfunc' % (label, pathname))
2027            return
2028        dir, name = os.path.split(pathname)
2029        if os.path.isdir(pathname):
2030            initname = os.path.join(pathname, "__init__.py")
2031            if os.path.isfile(initname):
2032                # This is a package directory, add it
2033                if basename:
2034                    basename = "%s/%s" % (basename, name)
2035                else:
2036                    basename = name
2037                if self.debug:
2038                    print("Adding package in", pathname, "as", basename)
2039                fname, arcname = self._get_codename(initname[0:-3], basename)
2040                if self.debug:
2041                    print("Adding", arcname)
2042                self.write(fname, arcname)
2043                dirlist = sorted(os.listdir(pathname))
2044                dirlist.remove("__init__.py")
2045                # Add all *.py files and package subdirectories
2046                for filename in dirlist:
2047                    path = os.path.join(pathname, filename)
2048                    root, ext = os.path.splitext(filename)
2049                    if os.path.isdir(path):
2050                        if os.path.isfile(os.path.join(path, "__init__.py")):
2051                            # This is a package directory, add it
2052                            self.writepy(path, basename,
2053                                         filterfunc=filterfunc)  # Recursive call
2054                    elif ext == ".py":
2055                        if filterfunc and not filterfunc(path):
2056                            if self.debug:
2057                                print('file %r skipped by filterfunc' % path)
2058                            continue
2059                        fname, arcname = self._get_codename(path[0:-3],
2060                                                            basename)
2061                        if self.debug:
2062                            print("Adding", arcname)
2063                        self.write(fname, arcname)
2064            else:
2065                # This is NOT a package directory, add its files at top level
2066                if self.debug:
2067                    print("Adding files from directory", pathname)
2068                for filename in sorted(os.listdir(pathname)):
2069                    path = os.path.join(pathname, filename)
2070                    root, ext = os.path.splitext(filename)
2071                    if ext == ".py":
2072                        if filterfunc and not filterfunc(path):
2073                            if self.debug:
2074                                print('file %r skipped by filterfunc' % path)
2075                            continue
2076                        fname, arcname = self._get_codename(path[0:-3],
2077                                                            basename)
2078                        if self.debug:
2079                            print("Adding", arcname)
2080                        self.write(fname, arcname)
2081        else:
2082            if pathname[-3:] != ".py":
2083                raise RuntimeError(
2084                    'Files added with writepy() must end with ".py"')
2085            fname, arcname = self._get_codename(pathname[0:-3], basename)
2086            if self.debug:
2087                print("Adding file", arcname)
2088            self.write(fname, arcname)
2089
2090    def _get_codename(self, pathname, basename):
2091        """Return (filename, archivename) for the path.
2092
2093        Given a module name path, return the correct file path and
2094        archive name, compiling if necessary.  For example, given
2095        /python/lib/string, return (/python/lib/string.pyc, string).
2096        """
2097        def _compile(file, optimize=-1):
2098            import py_compile
2099            if self.debug:
2100                print("Compiling", file)
2101            try:
2102                py_compile.compile(file, doraise=True, optimize=optimize)
2103            except py_compile.PyCompileError as err:
2104                print(err.msg)
2105                return False
2106            return True
2107
2108        file_py  = pathname + ".py"
2109        file_pyc = pathname + ".pyc"
2110        pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='')
2111        pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1)
2112        pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2)
2113        if self._optimize == -1:
2114            # legacy mode: use whatever file is present
2115            if (os.path.isfile(file_pyc) and
2116                  os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
2117                # Use .pyc file.
2118                arcname = fname = file_pyc
2119            elif (os.path.isfile(pycache_opt0) and
2120                  os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime):
2121                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2122                # file name in the archive.
2123                fname = pycache_opt0
2124                arcname = file_pyc
2125            elif (os.path.isfile(pycache_opt1) and
2126                  os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime):
2127                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2128                # file name in the archive.
2129                fname = pycache_opt1
2130                arcname = file_pyc
2131            elif (os.path.isfile(pycache_opt2) and
2132                  os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime):
2133                # Use the __pycache__/*.pyc file, but write it to the legacy pyc
2134                # file name in the archive.
2135                fname = pycache_opt2
2136                arcname = file_pyc
2137            else:
2138                # Compile py into PEP 3147 pyc file.
2139                if _compile(file_py):
2140                    if sys.flags.optimize == 0:
2141                        fname = pycache_opt0
2142                    elif sys.flags.optimize == 1:
2143                        fname = pycache_opt1
2144                    else:
2145                        fname = pycache_opt2
2146                    arcname = file_pyc
2147                else:
2148                    fname = arcname = file_py
2149        else:
2150            # new mode: use given optimization level
2151            if self._optimize == 0:
2152                fname = pycache_opt0
2153                arcname = file_pyc
2154            else:
2155                arcname = file_pyc
2156                if self._optimize == 1:
2157                    fname = pycache_opt1
2158                elif self._optimize == 2:
2159                    fname = pycache_opt2
2160                else:
2161                    msg = "invalid value for 'optimize': {!r}".format(self._optimize)
2162                    raise ValueError(msg)
2163            if not (os.path.isfile(fname) and
2164                    os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
2165                if not _compile(file_py, optimize=self._optimize):
2166                    fname = arcname = file_py
2167        archivename = os.path.split(arcname)[1]
2168        if basename:
2169            archivename = "%s/%s" % (basename, archivename)
2170        return (fname, archivename)
2171
2172
2173def _parents(path):
2174    """
2175    Given a path with elements separated by
2176    posixpath.sep, generate all parents of that path.
2177
2178    >>> list(_parents('b/d'))
2179    ['b']
2180    >>> list(_parents('/b/d/'))
2181    ['/b']
2182    >>> list(_parents('b/d/f/'))
2183    ['b/d', 'b']
2184    >>> list(_parents('b'))
2185    []
2186    >>> list(_parents(''))
2187    []
2188    """
2189    return itertools.islice(_ancestry(path), 1, None)
2190
2191
2192def _ancestry(path):
2193    """
2194    Given a path with elements separated by
2195    posixpath.sep, generate all elements of that path
2196
2197    >>> list(_ancestry('b/d'))
2198    ['b/d', 'b']
2199    >>> list(_ancestry('/b/d/'))
2200    ['/b/d', '/b']
2201    >>> list(_ancestry('b/d/f/'))
2202    ['b/d/f', 'b/d', 'b']
2203    >>> list(_ancestry('b'))
2204    ['b']
2205    >>> list(_ancestry(''))
2206    []
2207    """
2208    path = path.rstrip(posixpath.sep)
2209    while path and path != posixpath.sep:
2210        yield path
2211        path, tail = posixpath.split(path)
2212
2213
2214_dedupe = dict.fromkeys
2215"""Deduplicate an iterable in original order"""
2216
2217
2218def _difference(minuend, subtrahend):
2219    """
2220    Return items in minuend not in subtrahend, retaining order
2221    with O(1) lookup.
2222    """
2223    return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2224
2225
2226class CompleteDirs(ZipFile):
2227    """
2228    A ZipFile subclass that ensures that implied directories
2229    are always included in the namelist.
2230    """
2231
2232    @staticmethod
2233    def _implied_dirs(names):
2234        parents = itertools.chain.from_iterable(map(_parents, names))
2235        as_dirs = (p + posixpath.sep for p in parents)
2236        return _dedupe(_difference(as_dirs, names))
2237
2238    def namelist(self):
2239        names = super(CompleteDirs, self).namelist()
2240        return names + list(self._implied_dirs(names))
2241
2242    def _name_set(self):
2243        return set(self.namelist())
2244
2245    def resolve_dir(self, name):
2246        """
2247        If the name represents a directory, return that name
2248        as a directory (with the trailing slash).
2249        """
2250        names = self._name_set()
2251        dirname = name + '/'
2252        dir_match = name not in names and dirname in names
2253        return dirname if dir_match else name
2254
2255    def getinfo(self, name):
2256        """
2257        Supplement getinfo for implied dirs.
2258        """
2259        try:
2260            return super().getinfo(name)
2261        except KeyError:
2262            if not name.endswith('/') or name not in self._name_set():
2263                raise
2264            return ZipInfo(filename=name)
2265
2266    @classmethod
2267    def make(cls, source):
2268        """
2269        Given a source (filename or zipfile), return an
2270        appropriate CompleteDirs subclass.
2271        """
2272        if isinstance(source, CompleteDirs):
2273            return source
2274
2275        if not isinstance(source, ZipFile):
2276            return cls(source)
2277
2278        # Only allow for FastLookup when supplied zipfile is read-only
2279        if 'r' not in source.mode:
2280            cls = CompleteDirs
2281
2282        source.__class__ = cls
2283        return source
2284
2285
2286class FastLookup(CompleteDirs):
2287    """
2288    ZipFile subclass to ensure implicit
2289    dirs exist and are resolved rapidly.
2290    """
2291
2292    def namelist(self):
2293        with contextlib.suppress(AttributeError):
2294            return self.__names
2295        self.__names = super(FastLookup, self).namelist()
2296        return self.__names
2297
2298    def _name_set(self):
2299        with contextlib.suppress(AttributeError):
2300            return self.__lookup
2301        self.__lookup = super(FastLookup, self)._name_set()
2302        return self.__lookup
2303
2304
2305def _extract_text_encoding(encoding=None, *args, **kwargs):
2306    # stacklevel=3 so that the caller of the caller see any warning.
2307    return io.text_encoding(encoding, 3), args, kwargs
2308
2309
2310class Path:
2311    """
2312    A pathlib-compatible interface for zip files.
2313
2314    Consider a zip file with this structure::
2315
2316        .
2317        ├── a.txt
2318        └── b
2319            ├── c.txt
2320            └── d
2321                └── e.txt
2322
2323    >>> data = io.BytesIO()
2324    >>> zf = ZipFile(data, 'w')
2325    >>> zf.writestr('a.txt', 'content of a')
2326    >>> zf.writestr('b/c.txt', 'content of c')
2327    >>> zf.writestr('b/d/e.txt', 'content of e')
2328    >>> zf.filename = 'mem/abcde.zip'
2329
2330    Path accepts the zipfile object itself or a filename
2331
2332    >>> root = Path(zf)
2333
2334    From there, several path operations are available.
2335
2336    Directory iteration (including the zip file itself):
2337
2338    >>> a, b = root.iterdir()
2339    >>> a
2340    Path('mem/abcde.zip', 'a.txt')
2341    >>> b
2342    Path('mem/abcde.zip', 'b/')
2343
2344    name property:
2345
2346    >>> b.name
2347    'b'
2348
2349    join with divide operator:
2350
2351    >>> c = b / 'c.txt'
2352    >>> c
2353    Path('mem/abcde.zip', 'b/c.txt')
2354    >>> c.name
2355    'c.txt'
2356
2357    Read text:
2358
2359    >>> c.read_text()
2360    'content of c'
2361
2362    existence:
2363
2364    >>> c.exists()
2365    True
2366    >>> (b / 'missing.txt').exists()
2367    False
2368
2369    Coercion to string:
2370
2371    >>> import os
2372    >>> str(c).replace(os.sep, posixpath.sep)
2373    'mem/abcde.zip/b/c.txt'
2374
2375    At the root, ``name``, ``filename``, and ``parent``
2376    resolve to the zipfile. Note these attributes are not
2377    valid and will raise a ``ValueError`` if the zipfile
2378    has no filename.
2379
2380    >>> root.name
2381    'abcde.zip'
2382    >>> str(root.filename).replace(os.sep, posixpath.sep)
2383    'mem/abcde.zip'
2384    >>> str(root.parent)
2385    'mem'
2386    """
2387
2388    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2389
2390    def __init__(self, root, at=""):
2391        """
2392        Construct a Path from a ZipFile or filename.
2393
2394        Note: When the source is an existing ZipFile object,
2395        its type (__class__) will be mutated to a
2396        specialized type. If the caller wishes to retain the
2397        original type, the caller should either create a
2398        separate ZipFile object or pass a filename.
2399        """
2400        self.root = FastLookup.make(root)
2401        self.at = at
2402
2403    def open(self, mode='r', *args, pwd=None, **kwargs):
2404        """
2405        Open this entry as text or binary following the semantics
2406        of ``pathlib.Path.open()`` by passing arguments through
2407        to io.TextIOWrapper().
2408        """
2409        if self.is_dir():
2410            raise IsADirectoryError(self)
2411        zip_mode = mode[0]
2412        if not self.exists() and zip_mode == 'r':
2413            raise FileNotFoundError(self)
2414        stream = self.root.open(self.at, zip_mode, pwd=pwd)
2415        if 'b' in mode:
2416            if args or kwargs:
2417                raise ValueError("encoding args invalid for binary operation")
2418            return stream
2419        # Text mode:
2420        encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
2421        return io.TextIOWrapper(stream, encoding, *args, **kwargs)
2422
2423    @property
2424    def name(self):
2425        return pathlib.Path(self.at).name or self.filename.name
2426
2427    @property
2428    def suffix(self):
2429        return pathlib.Path(self.at).suffix or self.filename.suffix
2430
2431    @property
2432    def suffixes(self):
2433        return pathlib.Path(self.at).suffixes or self.filename.suffixes
2434
2435    @property
2436    def stem(self):
2437        return pathlib.Path(self.at).stem or self.filename.stem
2438
2439    @property
2440    def filename(self):
2441        return pathlib.Path(self.root.filename).joinpath(self.at)
2442
2443    def read_text(self, *args, **kwargs):
2444        encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
2445        with self.open('r', encoding, *args, **kwargs) as strm:
2446            return strm.read()
2447
2448    def read_bytes(self):
2449        with self.open('rb') as strm:
2450            return strm.read()
2451
2452    def _is_child(self, path):
2453        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2454
2455    def _next(self, at):
2456        return self.__class__(self.root, at)
2457
2458    def is_dir(self):
2459        return not self.at or self.at.endswith("/")
2460
2461    def is_file(self):
2462        return self.exists() and not self.is_dir()
2463
2464    def exists(self):
2465        return self.at in self.root._name_set()
2466
2467    def iterdir(self):
2468        if not self.is_dir():
2469            raise ValueError("Can't listdir a file")
2470        subs = map(self._next, self.root.namelist())
2471        return filter(self._is_child, subs)
2472
2473    def __str__(self):
2474        return posixpath.join(self.root.filename, self.at)
2475
2476    def __repr__(self):
2477        return self.__repr.format(self=self)
2478
2479    def joinpath(self, *other):
2480        next = posixpath.join(self.at, *other)
2481        return self._next(self.root.resolve_dir(next))
2482
2483    __truediv__ = joinpath
2484
2485    @property
2486    def parent(self):
2487        if not self.at:
2488            return self.filename.parent
2489        parent_at = posixpath.dirname(self.at.rstrip('/'))
2490        if parent_at:
2491            parent_at += '/'
2492        return self._next(parent_at)
2493
2494
2495def main(args=None):
2496    import argparse
2497
2498    description = 'A simple command-line interface for zipfile module.'
2499    parser = argparse.ArgumentParser(description=description)
2500    group = parser.add_mutually_exclusive_group(required=True)
2501    group.add_argument('-l', '--list', metavar='<zipfile>',
2502                       help='Show listing of a zipfile')
2503    group.add_argument('-e', '--extract', nargs=2,
2504                       metavar=('<zipfile>', '<output_dir>'),
2505                       help='Extract zipfile into target dir')
2506    group.add_argument('-c', '--create', nargs='+',
2507                       metavar=('<name>', '<file>'),
2508                       help='Create zipfile from sources')
2509    group.add_argument('-t', '--test', metavar='<zipfile>',
2510                       help='Test if a zipfile is valid')
2511    parser.add_argument('--metadata-encoding', metavar='<encoding>',
2512                        help='Specify encoding of member names for -l, -e and -t')
2513    args = parser.parse_args(args)
2514
2515    encoding = args.metadata_encoding
2516
2517    if args.test is not None:
2518        src = args.test
2519        with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
2520            badfile = zf.testzip()
2521        if badfile:
2522            print("The following enclosed file is corrupted: {!r}".format(badfile))
2523        print("Done testing")
2524
2525    elif args.list is not None:
2526        src = args.list
2527        with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
2528            zf.printdir()
2529
2530    elif args.extract is not None:
2531        src, curdir = args.extract
2532        with ZipFile(src, 'r', metadata_encoding=encoding) as zf:
2533            zf.extractall(curdir)
2534
2535    elif args.create is not None:
2536        if encoding:
2537            print("Non-conforming encodings not supported with -c.",
2538                  file=sys.stderr)
2539            sys.exit(1)
2540
2541        zip_name = args.create.pop(0)
2542        files = args.create
2543
2544        def addToZip(zf, path, zippath):
2545            if os.path.isfile(path):
2546                zf.write(path, zippath, ZIP_DEFLATED)
2547            elif os.path.isdir(path):
2548                if zippath:
2549                    zf.write(path, zippath)
2550                for nm in sorted(os.listdir(path)):
2551                    addToZip(zf,
2552                             os.path.join(path, nm), os.path.join(zippath, nm))
2553            # else: ignore
2554
2555        with ZipFile(zip_name, 'w') as zf:
2556            for path in files:
2557                zippath = os.path.basename(path)
2558                if not zippath:
2559                    zippath = os.path.basename(os.path.dirname(path))
2560                if zippath in ('', os.curdir, os.pardir):
2561                    zippath = ''
2562                addToZip(zf, path, zippath)
2563
2564
2565if __name__ == "__main__":
2566    main()
2567