1""" 2Read and write ZIP files. 3 4XXX references to utf-8 need further investigation. 5""" 6import binascii 7import importlib.util 8import io 9import itertools 10import os 11import posixpath 12import shutil 13import stat 14import struct 15import sys 16import threading 17import time 18import contextlib 19import pathlib 20 21try: 22 import zlib # We may need its compression method 23 crc32 = zlib.crc32 24except ImportError: 25 zlib = None 26 crc32 = binascii.crc32 27 28try: 29 import bz2 # We may need its compression method 30except ImportError: 31 bz2 = None 32 33try: 34 import lzma # We may need its compression method 35except ImportError: 36 lzma = None 37 38__all__ = ["BadZipFile", "BadZipfile", "error", 39 "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", 40 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", 41 "Path"] 42 43class BadZipFile(Exception): 44 pass 45 46 47class LargeZipFile(Exception): 48 """ 49 Raised when writing a zipfile, the zipfile requires ZIP64 extensions 50 and those extensions are disabled. 51 """ 52 53error = BadZipfile = BadZipFile # Pre-3.2 compatibility names 54 55 56ZIP64_LIMIT = (1 << 31) - 1 57ZIP_FILECOUNT_LIMIT = (1 << 16) - 1 58ZIP_MAX_COMMENT = (1 << 16) - 1 59 60# constants for Zip file compression methods 61ZIP_STORED = 0 62ZIP_DEFLATED = 8 63ZIP_BZIP2 = 12 64ZIP_LZMA = 14 65# Other ZIP compression methods not supported 66 67DEFAULT_VERSION = 20 68ZIP64_VERSION = 45 69BZIP2_VERSION = 46 70LZMA_VERSION = 63 71# we recognize (but not necessarily support) all features up to that version 72MAX_EXTRACT_VERSION = 63 73 74# Below are some formats and associated data for reading/writing headers using 75# the struct module. The names and structures of headers/records are those used 76# in the PKWARE description of the ZIP file format: 77# http://www.pkware.com/documents/casestudies/APPNOTE.TXT 78# (URL valid as of January 2008) 79 80# The "end of central directory" structure, magic number, size, and indices 81# (section V.I in the format document) 82structEndArchive = b"<4s4H2LH" 83stringEndArchive = b"PK\005\006" 84sizeEndCentDir = struct.calcsize(structEndArchive) 85 86_ECD_SIGNATURE = 0 87_ECD_DISK_NUMBER = 1 88_ECD_DISK_START = 2 89_ECD_ENTRIES_THIS_DISK = 3 90_ECD_ENTRIES_TOTAL = 4 91_ECD_SIZE = 5 92_ECD_OFFSET = 6 93_ECD_COMMENT_SIZE = 7 94# These last two indices are not part of the structure as defined in the 95# spec, but they are used internally by this module as a convenience 96_ECD_COMMENT = 8 97_ECD_LOCATION = 9 98 99# The "central directory" structure, magic number, size, and indices 100# of entries in the structure (section V.F in the format document) 101structCentralDir = "<4s4B4HL2L5H2L" 102stringCentralDir = b"PK\001\002" 103sizeCentralDir = struct.calcsize(structCentralDir) 104 105# indexes of entries in the central directory structure 106_CD_SIGNATURE = 0 107_CD_CREATE_VERSION = 1 108_CD_CREATE_SYSTEM = 2 109_CD_EXTRACT_VERSION = 3 110_CD_EXTRACT_SYSTEM = 4 111_CD_FLAG_BITS = 5 112_CD_COMPRESS_TYPE = 6 113_CD_TIME = 7 114_CD_DATE = 8 115_CD_CRC = 9 116_CD_COMPRESSED_SIZE = 10 117_CD_UNCOMPRESSED_SIZE = 11 118_CD_FILENAME_LENGTH = 12 119_CD_EXTRA_FIELD_LENGTH = 13 120_CD_COMMENT_LENGTH = 14 121_CD_DISK_NUMBER_START = 15 122_CD_INTERNAL_FILE_ATTRIBUTES = 16 123_CD_EXTERNAL_FILE_ATTRIBUTES = 17 124_CD_LOCAL_HEADER_OFFSET = 18 125 126# General purpose bit flags 127# Zip Appnote: 4.4.4 general purpose bit flag: (2 bytes) 128_MASK_ENCRYPTED = 1 << 0 129# Bits 1 and 2 have different meanings depending on the compression used. 130_MASK_COMPRESS_OPTION_1 = 1 << 1 131# _MASK_COMPRESS_OPTION_2 = 1 << 2 132# _MASK_USE_DATA_DESCRIPTOR: If set, crc-32, compressed size and uncompressed 133# size are zero in the local header and the real values are written in the data 134# descriptor immediately following the compressed data. 135_MASK_USE_DATA_DESCRIPTOR = 1 << 3 136# Bit 4: Reserved for use with compression method 8, for enhanced deflating. 137# _MASK_RESERVED_BIT_4 = 1 << 4 138_MASK_COMPRESSED_PATCH = 1 << 5 139_MASK_STRONG_ENCRYPTION = 1 << 6 140# _MASK_UNUSED_BIT_7 = 1 << 7 141# _MASK_UNUSED_BIT_8 = 1 << 8 142# _MASK_UNUSED_BIT_9 = 1 << 9 143# _MASK_UNUSED_BIT_10 = 1 << 10 144_MASK_UTF_FILENAME = 1 << 11 145# Bit 12: Reserved by PKWARE for enhanced compression. 146# _MASK_RESERVED_BIT_12 = 1 << 12 147# _MASK_ENCRYPTED_CENTRAL_DIR = 1 << 13 148# Bit 14, 15: Reserved by PKWARE 149# _MASK_RESERVED_BIT_14 = 1 << 14 150# _MASK_RESERVED_BIT_15 = 1 << 15 151 152# The "local file header" structure, magic number, size, and indices 153# (section V.A in the format document) 154structFileHeader = "<4s2B4HL2L2H" 155stringFileHeader = b"PK\003\004" 156sizeFileHeader = struct.calcsize(structFileHeader) 157 158_FH_SIGNATURE = 0 159_FH_EXTRACT_VERSION = 1 160_FH_EXTRACT_SYSTEM = 2 161_FH_GENERAL_PURPOSE_FLAG_BITS = 3 162_FH_COMPRESSION_METHOD = 4 163_FH_LAST_MOD_TIME = 5 164_FH_LAST_MOD_DATE = 6 165_FH_CRC = 7 166_FH_COMPRESSED_SIZE = 8 167_FH_UNCOMPRESSED_SIZE = 9 168_FH_FILENAME_LENGTH = 10 169_FH_EXTRA_FIELD_LENGTH = 11 170 171# The "Zip64 end of central directory locator" structure, magic number, and size 172structEndArchive64Locator = "<4sLQL" 173stringEndArchive64Locator = b"PK\x06\x07" 174sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) 175 176# The "Zip64 end of central directory" record, magic number, size, and indices 177# (section V.G in the format document) 178structEndArchive64 = "<4sQ2H2L4Q" 179stringEndArchive64 = b"PK\x06\x06" 180sizeEndCentDir64 = struct.calcsize(structEndArchive64) 181 182_CD64_SIGNATURE = 0 183_CD64_DIRECTORY_RECSIZE = 1 184_CD64_CREATE_VERSION = 2 185_CD64_EXTRACT_VERSION = 3 186_CD64_DISK_NUMBER = 4 187_CD64_DISK_NUMBER_START = 5 188_CD64_NUMBER_ENTRIES_THIS_DISK = 6 189_CD64_NUMBER_ENTRIES_TOTAL = 7 190_CD64_DIRECTORY_SIZE = 8 191_CD64_OFFSET_START_CENTDIR = 9 192 193_DD_SIGNATURE = 0x08074b50 194 195_EXTRA_FIELD_STRUCT = struct.Struct('<HH') 196 197def _strip_extra(extra, xids): 198 # Remove Extra Fields with specified IDs. 199 unpack = _EXTRA_FIELD_STRUCT.unpack 200 modified = False 201 buffer = [] 202 start = i = 0 203 while i + 4 <= len(extra): 204 xid, xlen = unpack(extra[i : i + 4]) 205 j = i + 4 + xlen 206 if xid in xids: 207 if i != start: 208 buffer.append(extra[start : i]) 209 start = j 210 modified = True 211 i = j 212 if not modified: 213 return extra 214 if start != len(extra): 215 buffer.append(extra[start:]) 216 return b''.join(buffer) 217 218def _check_zipfile(fp): 219 try: 220 if _EndRecData(fp): 221 return True # file has correct magic number 222 except OSError: 223 pass 224 return False 225 226def is_zipfile(filename): 227 """Quickly see if a file is a ZIP file by checking the magic number. 228 229 The filename argument may be a file or file-like object too. 230 """ 231 result = False 232 try: 233 if hasattr(filename, "read"): 234 result = _check_zipfile(fp=filename) 235 else: 236 with open(filename, "rb") as fp: 237 result = _check_zipfile(fp) 238 except OSError: 239 pass 240 return result 241 242def _EndRecData64(fpin, offset, endrec): 243 """ 244 Read the ZIP64 end-of-archive records and use that to update endrec 245 """ 246 try: 247 fpin.seek(offset - sizeEndCentDir64Locator, 2) 248 except OSError: 249 # If the seek fails, the file is not large enough to contain a ZIP64 250 # end-of-archive record, so just return the end record we were given. 251 return endrec 252 253 data = fpin.read(sizeEndCentDir64Locator) 254 if len(data) != sizeEndCentDir64Locator: 255 return endrec 256 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 257 if sig != stringEndArchive64Locator: 258 return endrec 259 260 if diskno != 0 or disks > 1: 261 raise BadZipFile("zipfiles that span multiple disks are not supported") 262 263 # Assume no 'zip64 extensible data' 264 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 265 data = fpin.read(sizeEndCentDir64) 266 if len(data) != sizeEndCentDir64: 267 return endrec 268 sig, sz, create_version, read_version, disk_num, disk_dir, \ 269 dircount, dircount2, dirsize, diroffset = \ 270 struct.unpack(structEndArchive64, data) 271 if sig != stringEndArchive64: 272 return endrec 273 274 # Update the original endrec using data from the ZIP64 record 275 endrec[_ECD_SIGNATURE] = sig 276 endrec[_ECD_DISK_NUMBER] = disk_num 277 endrec[_ECD_DISK_START] = disk_dir 278 endrec[_ECD_ENTRIES_THIS_DISK] = dircount 279 endrec[_ECD_ENTRIES_TOTAL] = dircount2 280 endrec[_ECD_SIZE] = dirsize 281 endrec[_ECD_OFFSET] = diroffset 282 return endrec 283 284 285def _EndRecData(fpin): 286 """Return data from the "End of Central Directory" record, or None. 287 288 The data is a list of the nine items in the ZIP "End of central dir" 289 record followed by a tenth item, the file seek offset of this record.""" 290 291 # Determine file size 292 fpin.seek(0, 2) 293 filesize = fpin.tell() 294 295 # Check to see if this is ZIP file with no archive comment (the 296 # "end of central directory" structure should be the last item in the 297 # file if this is the case). 298 try: 299 fpin.seek(-sizeEndCentDir, 2) 300 except OSError: 301 return None 302 data = fpin.read() 303 if (len(data) == sizeEndCentDir and 304 data[0:4] == stringEndArchive and 305 data[-2:] == b"\000\000"): 306 # the signature is correct and there's no comment, unpack structure 307 endrec = struct.unpack(structEndArchive, data) 308 endrec=list(endrec) 309 310 # Append a blank comment and record start offset 311 endrec.append(b"") 312 endrec.append(filesize - sizeEndCentDir) 313 314 # Try to read the "Zip64 end of central directory" structure 315 return _EndRecData64(fpin, -sizeEndCentDir, endrec) 316 317 # Either this is not a ZIP file, or it is a ZIP file with an archive 318 # comment. Search the end of the file for the "end of central directory" 319 # record signature. The comment is the last item in the ZIP file and may be 320 # up to 64K long. It is assumed that the "end of central directory" magic 321 # number does not appear in the comment. 322 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) 323 fpin.seek(maxCommentStart, 0) 324 data = fpin.read() 325 start = data.rfind(stringEndArchive) 326 if start >= 0: 327 # found the magic number; attempt to unpack and interpret 328 recData = data[start:start+sizeEndCentDir] 329 if len(recData) != sizeEndCentDir: 330 # Zip file is corrupted. 331 return None 332 endrec = list(struct.unpack(structEndArchive, recData)) 333 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 334 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 335 endrec.append(comment) 336 endrec.append(maxCommentStart + start) 337 338 # Try to read the "Zip64 end of central directory" structure 339 return _EndRecData64(fpin, maxCommentStart + start - filesize, 340 endrec) 341 342 # Unable to find a valid end of central directory structure 343 return None 344 345 346class ZipInfo (object): 347 """Class with attributes describing each file in the ZIP archive.""" 348 349 __slots__ = ( 350 'orig_filename', 351 'filename', 352 'date_time', 353 'compress_type', 354 '_compresslevel', 355 'comment', 356 'extra', 357 'create_system', 358 'create_version', 359 'extract_version', 360 'reserved', 361 'flag_bits', 362 'volume', 363 'internal_attr', 364 'external_attr', 365 'header_offset', 366 'CRC', 367 'compress_size', 368 'file_size', 369 '_raw_time', 370 ) 371 372 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): 373 self.orig_filename = filename # Original file name in archive 374 375 # Terminate the file name at the first null byte. Null bytes in file 376 # names are used as tricks by viruses in archives. 377 null_byte = filename.find(chr(0)) 378 if null_byte >= 0: 379 filename = filename[0:null_byte] 380 # This is used to ensure paths in generated ZIP files always use 381 # forward slashes as the directory separator, as required by the 382 # ZIP format specification. 383 if os.sep != "/" and os.sep in filename: 384 filename = filename.replace(os.sep, "/") 385 386 self.filename = filename # Normalized file name 387 self.date_time = date_time # year, month, day, hour, min, sec 388 389 if date_time[0] < 1980: 390 raise ValueError('ZIP does not support timestamps before 1980') 391 392 # Standard values: 393 self.compress_type = ZIP_STORED # Type of compression for the file 394 self._compresslevel = None # Level for the compressor 395 self.comment = b"" # Comment for each file 396 self.extra = b"" # ZIP extra data 397 if sys.platform == 'win32': 398 self.create_system = 0 # System which created ZIP archive 399 else: 400 # Assume everything else is unix-y 401 self.create_system = 3 # System which created ZIP archive 402 self.create_version = DEFAULT_VERSION # Version which created ZIP archive 403 self.extract_version = DEFAULT_VERSION # Version needed to extract archive 404 self.reserved = 0 # Must be zero 405 self.flag_bits = 0 # ZIP flag bits 406 self.volume = 0 # Volume number of file header 407 self.internal_attr = 0 # Internal attributes 408 self.external_attr = 0 # External file attributes 409 self.compress_size = 0 # Size of the compressed file 410 self.file_size = 0 # Size of the uncompressed file 411 # Other attributes are set by class ZipFile: 412 # header_offset Byte offset to the file header 413 # CRC CRC-32 of the uncompressed file 414 415 def __repr__(self): 416 result = ['<%s filename=%r' % (self.__class__.__name__, self.filename)] 417 if self.compress_type != ZIP_STORED: 418 result.append(' compress_type=%s' % 419 compressor_names.get(self.compress_type, 420 self.compress_type)) 421 hi = self.external_attr >> 16 422 lo = self.external_attr & 0xFFFF 423 if hi: 424 result.append(' filemode=%r' % stat.filemode(hi)) 425 if lo: 426 result.append(' external_attr=%#x' % lo) 427 isdir = self.is_dir() 428 if not isdir or self.file_size: 429 result.append(' file_size=%r' % self.file_size) 430 if ((not isdir or self.compress_size) and 431 (self.compress_type != ZIP_STORED or 432 self.file_size != self.compress_size)): 433 result.append(' compress_size=%r' % self.compress_size) 434 result.append('>') 435 return ''.join(result) 436 437 def FileHeader(self, zip64=None): 438 """Return the per-file header as a bytes object. 439 440 When the optional zip64 arg is None rather than a bool, we will 441 decide based upon the file_size and compress_size, if known, 442 False otherwise. 443 """ 444 dt = self.date_time 445 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 446 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 447 if self.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 448 # Set these to zero because we write them after the file data 449 CRC = compress_size = file_size = 0 450 else: 451 CRC = self.CRC 452 compress_size = self.compress_size 453 file_size = self.file_size 454 455 extra = self.extra 456 457 min_version = 0 458 if zip64 is None: 459 # We always explicitly pass zip64 within this module.... This 460 # remains for anyone using ZipInfo.FileHeader as a public API. 461 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 462 if zip64: 463 fmt = '<HHQQ' 464 extra = extra + struct.pack(fmt, 465 1, struct.calcsize(fmt)-4, file_size, compress_size) 466 file_size = 0xffffffff 467 compress_size = 0xffffffff 468 min_version = ZIP64_VERSION 469 470 if self.compress_type == ZIP_BZIP2: 471 min_version = max(BZIP2_VERSION, min_version) 472 elif self.compress_type == ZIP_LZMA: 473 min_version = max(LZMA_VERSION, min_version) 474 475 self.extract_version = max(min_version, self.extract_version) 476 self.create_version = max(min_version, self.create_version) 477 filename, flag_bits = self._encodeFilenameFlags() 478 header = struct.pack(structFileHeader, stringFileHeader, 479 self.extract_version, self.reserved, flag_bits, 480 self.compress_type, dostime, dosdate, CRC, 481 compress_size, file_size, 482 len(filename), len(extra)) 483 return header + filename + extra 484 485 def _encodeFilenameFlags(self): 486 try: 487 return self.filename.encode('ascii'), self.flag_bits 488 except UnicodeEncodeError: 489 return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME 490 491 def _decodeExtra(self): 492 # Try to decode the extra field. 493 extra = self.extra 494 unpack = struct.unpack 495 while len(extra) >= 4: 496 tp, ln = unpack('<HH', extra[:4]) 497 if ln+4 > len(extra): 498 raise BadZipFile("Corrupt extra field %04x (size=%d)" % (tp, ln)) 499 if tp == 0x0001: 500 data = extra[4:ln+4] 501 # ZIP64 extension (large files and/or large archives) 502 try: 503 if self.file_size in (0xFFFF_FFFF_FFFF_FFFF, 0xFFFF_FFFF): 504 field = "File size" 505 self.file_size, = unpack('<Q', data[:8]) 506 data = data[8:] 507 if self.compress_size == 0xFFFF_FFFF: 508 field = "Compress size" 509 self.compress_size, = unpack('<Q', data[:8]) 510 data = data[8:] 511 if self.header_offset == 0xFFFF_FFFF: 512 field = "Header offset" 513 self.header_offset, = unpack('<Q', data[:8]) 514 except struct.error: 515 raise BadZipFile(f"Corrupt zip64 extra field. " 516 f"{field} not found.") from None 517 518 extra = extra[ln+4:] 519 520 @classmethod 521 def from_file(cls, filename, arcname=None, *, strict_timestamps=True): 522 """Construct an appropriate ZipInfo for a file on the filesystem. 523 524 filename should be the path to a file or directory on the filesystem. 525 526 arcname is the name which it will have within the archive (by default, 527 this will be the same as filename, but without a drive letter and with 528 leading path separators removed). 529 """ 530 if isinstance(filename, os.PathLike): 531 filename = os.fspath(filename) 532 st = os.stat(filename) 533 isdir = stat.S_ISDIR(st.st_mode) 534 mtime = time.localtime(st.st_mtime) 535 date_time = mtime[0:6] 536 if not strict_timestamps and date_time[0] < 1980: 537 date_time = (1980, 1, 1, 0, 0, 0) 538 elif not strict_timestamps and date_time[0] > 2107: 539 date_time = (2107, 12, 31, 23, 59, 59) 540 # Create ZipInfo instance to store file information 541 if arcname is None: 542 arcname = filename 543 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) 544 while arcname[0] in (os.sep, os.altsep): 545 arcname = arcname[1:] 546 if isdir: 547 arcname += '/' 548 zinfo = cls(arcname, date_time) 549 zinfo.external_attr = (st.st_mode & 0xFFFF) << 16 # Unix attributes 550 if isdir: 551 zinfo.file_size = 0 552 zinfo.external_attr |= 0x10 # MS-DOS directory flag 553 else: 554 zinfo.file_size = st.st_size 555 556 return zinfo 557 558 def is_dir(self): 559 """Return True if this archive member is a directory.""" 560 return self.filename[-1] == '/' 561 562 563# ZIP encryption uses the CRC32 one-byte primitive for scrambling some 564# internal keys. We noticed that a direct implementation is faster than 565# relying on binascii.crc32(). 566 567_crctable = None 568def _gen_crc(crc): 569 for j in range(8): 570 if crc & 1: 571 crc = (crc >> 1) ^ 0xEDB88320 572 else: 573 crc >>= 1 574 return crc 575 576# ZIP supports a password-based form of encryption. Even though known 577# plaintext attacks have been found against it, it is still useful 578# to be able to get data out of such a file. 579# 580# Usage: 581# zd = _ZipDecrypter(mypwd) 582# plain_bytes = zd(cypher_bytes) 583 584def _ZipDecrypter(pwd): 585 key0 = 305419896 586 key1 = 591751049 587 key2 = 878082192 588 589 global _crctable 590 if _crctable is None: 591 _crctable = list(map(_gen_crc, range(256))) 592 crctable = _crctable 593 594 def crc32(ch, crc): 595 """Compute the CRC32 primitive on one byte.""" 596 return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] 597 598 def update_keys(c): 599 nonlocal key0, key1, key2 600 key0 = crc32(c, key0) 601 key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF 602 key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF 603 key2 = crc32(key1 >> 24, key2) 604 605 for p in pwd: 606 update_keys(p) 607 608 def decrypter(data): 609 """Decrypt a bytes object.""" 610 result = bytearray() 611 append = result.append 612 for c in data: 613 k = key2 | 2 614 c ^= ((k * (k^1)) >> 8) & 0xFF 615 update_keys(c) 616 append(c) 617 return bytes(result) 618 619 return decrypter 620 621 622class LZMACompressor: 623 624 def __init__(self): 625 self._comp = None 626 627 def _init(self): 628 props = lzma._encode_filter_properties({'id': lzma.FILTER_LZMA1}) 629 self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[ 630 lzma._decode_filter_properties(lzma.FILTER_LZMA1, props) 631 ]) 632 return struct.pack('<BBH', 9, 4, len(props)) + props 633 634 def compress(self, data): 635 if self._comp is None: 636 return self._init() + self._comp.compress(data) 637 return self._comp.compress(data) 638 639 def flush(self): 640 if self._comp is None: 641 return self._init() + self._comp.flush() 642 return self._comp.flush() 643 644 645class LZMADecompressor: 646 647 def __init__(self): 648 self._decomp = None 649 self._unconsumed = b'' 650 self.eof = False 651 652 def decompress(self, data): 653 if self._decomp is None: 654 self._unconsumed += data 655 if len(self._unconsumed) <= 4: 656 return b'' 657 psize, = struct.unpack('<H', self._unconsumed[2:4]) 658 if len(self._unconsumed) <= 4 + psize: 659 return b'' 660 661 self._decomp = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=[ 662 lzma._decode_filter_properties(lzma.FILTER_LZMA1, 663 self._unconsumed[4:4 + psize]) 664 ]) 665 data = self._unconsumed[4 + psize:] 666 del self._unconsumed 667 668 result = self._decomp.decompress(data) 669 self.eof = self._decomp.eof 670 return result 671 672 673compressor_names = { 674 0: 'store', 675 1: 'shrink', 676 2: 'reduce', 677 3: 'reduce', 678 4: 'reduce', 679 5: 'reduce', 680 6: 'implode', 681 7: 'tokenize', 682 8: 'deflate', 683 9: 'deflate64', 684 10: 'implode', 685 12: 'bzip2', 686 14: 'lzma', 687 18: 'terse', 688 19: 'lz77', 689 97: 'wavpack', 690 98: 'ppmd', 691} 692 693def _check_compression(compression): 694 if compression == ZIP_STORED: 695 pass 696 elif compression == ZIP_DEFLATED: 697 if not zlib: 698 raise RuntimeError( 699 "Compression requires the (missing) zlib module") 700 elif compression == ZIP_BZIP2: 701 if not bz2: 702 raise RuntimeError( 703 "Compression requires the (missing) bz2 module") 704 elif compression == ZIP_LZMA: 705 if not lzma: 706 raise RuntimeError( 707 "Compression requires the (missing) lzma module") 708 else: 709 raise NotImplementedError("That compression method is not supported") 710 711 712def _get_compressor(compress_type, compresslevel=None): 713 if compress_type == ZIP_DEFLATED: 714 if compresslevel is not None: 715 return zlib.compressobj(compresslevel, zlib.DEFLATED, -15) 716 return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) 717 elif compress_type == ZIP_BZIP2: 718 if compresslevel is not None: 719 return bz2.BZ2Compressor(compresslevel) 720 return bz2.BZ2Compressor() 721 # compresslevel is ignored for ZIP_LZMA 722 elif compress_type == ZIP_LZMA: 723 return LZMACompressor() 724 else: 725 return None 726 727 728def _get_decompressor(compress_type): 729 _check_compression(compress_type) 730 if compress_type == ZIP_STORED: 731 return None 732 elif compress_type == ZIP_DEFLATED: 733 return zlib.decompressobj(-15) 734 elif compress_type == ZIP_BZIP2: 735 return bz2.BZ2Decompressor() 736 elif compress_type == ZIP_LZMA: 737 return LZMADecompressor() 738 else: 739 descr = compressor_names.get(compress_type) 740 if descr: 741 raise NotImplementedError("compression type %d (%s)" % (compress_type, descr)) 742 else: 743 raise NotImplementedError("compression type %d" % (compress_type,)) 744 745 746class _SharedFile: 747 def __init__(self, file, pos, close, lock, writing): 748 self._file = file 749 self._pos = pos 750 self._close = close 751 self._lock = lock 752 self._writing = writing 753 self.seekable = file.seekable 754 755 def tell(self): 756 return self._pos 757 758 def seek(self, offset, whence=0): 759 with self._lock: 760 if self._writing(): 761 raise ValueError("Can't reposition in the ZIP file while " 762 "there is an open writing handle on it. " 763 "Close the writing handle before trying to read.") 764 self._file.seek(offset, whence) 765 self._pos = self._file.tell() 766 return self._pos 767 768 def read(self, n=-1): 769 with self._lock: 770 if self._writing(): 771 raise ValueError("Can't read from the ZIP file while there " 772 "is an open writing handle on it. " 773 "Close the writing handle before trying to read.") 774 self._file.seek(self._pos) 775 data = self._file.read(n) 776 self._pos = self._file.tell() 777 return data 778 779 def close(self): 780 if self._file is not None: 781 fileobj = self._file 782 self._file = None 783 self._close(fileobj) 784 785# Provide the tell method for unseekable stream 786class _Tellable: 787 def __init__(self, fp): 788 self.fp = fp 789 self.offset = 0 790 791 def write(self, data): 792 n = self.fp.write(data) 793 self.offset += n 794 return n 795 796 def tell(self): 797 return self.offset 798 799 def flush(self): 800 self.fp.flush() 801 802 def close(self): 803 self.fp.close() 804 805 806class ZipExtFile(io.BufferedIOBase): 807 """File-like object for reading an archive member. 808 Is returned by ZipFile.open(). 809 """ 810 811 # Max size supported by decompressor. 812 MAX_N = 1 << 31 - 1 813 814 # Read from compressed files in 4k blocks. 815 MIN_READ_SIZE = 4096 816 817 # Chunk size to read during seek 818 MAX_SEEK_READ = 1 << 24 819 820 def __init__(self, fileobj, mode, zipinfo, pwd=None, 821 close_fileobj=False): 822 self._fileobj = fileobj 823 self._pwd = pwd 824 self._close_fileobj = close_fileobj 825 826 self._compress_type = zipinfo.compress_type 827 self._compress_left = zipinfo.compress_size 828 self._left = zipinfo.file_size 829 830 self._decompressor = _get_decompressor(self._compress_type) 831 832 self._eof = False 833 self._readbuffer = b'' 834 self._offset = 0 835 836 self.newlines = None 837 838 self.mode = mode 839 self.name = zipinfo.filename 840 841 if hasattr(zipinfo, 'CRC'): 842 self._expected_crc = zipinfo.CRC 843 self._running_crc = crc32(b'') 844 else: 845 self._expected_crc = None 846 847 self._seekable = False 848 try: 849 if fileobj.seekable(): 850 self._orig_compress_start = fileobj.tell() 851 self._orig_compress_size = zipinfo.compress_size 852 self._orig_file_size = zipinfo.file_size 853 self._orig_start_crc = self._running_crc 854 self._seekable = True 855 except AttributeError: 856 pass 857 858 self._decrypter = None 859 if pwd: 860 if zipinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 861 # compare against the file type from extended local headers 862 check_byte = (zipinfo._raw_time >> 8) & 0xff 863 else: 864 # compare against the CRC otherwise 865 check_byte = (zipinfo.CRC >> 24) & 0xff 866 h = self._init_decrypter() 867 if h != check_byte: 868 raise RuntimeError("Bad password for file %r" % zipinfo.orig_filename) 869 870 871 def _init_decrypter(self): 872 self._decrypter = _ZipDecrypter(self._pwd) 873 # The first 12 bytes in the cypher stream is an encryption header 874 # used to strengthen the algorithm. The first 11 bytes are 875 # completely random, while the 12th contains the MSB of the CRC, 876 # or the MSB of the file time depending on the header type 877 # and is used to check the correctness of the password. 878 header = self._fileobj.read(12) 879 self._compress_left -= 12 880 return self._decrypter(header)[11] 881 882 def __repr__(self): 883 result = ['<%s.%s' % (self.__class__.__module__, 884 self.__class__.__qualname__)] 885 if not self.closed: 886 result.append(' name=%r mode=%r' % (self.name, self.mode)) 887 if self._compress_type != ZIP_STORED: 888 result.append(' compress_type=%s' % 889 compressor_names.get(self._compress_type, 890 self._compress_type)) 891 else: 892 result.append(' [closed]') 893 result.append('>') 894 return ''.join(result) 895 896 def readline(self, limit=-1): 897 """Read and return a line from the stream. 898 899 If limit is specified, at most limit bytes will be read. 900 """ 901 902 if limit < 0: 903 # Shortcut common case - newline found in buffer. 904 i = self._readbuffer.find(b'\n', self._offset) + 1 905 if i > 0: 906 line = self._readbuffer[self._offset: i] 907 self._offset = i 908 return line 909 910 return io.BufferedIOBase.readline(self, limit) 911 912 def peek(self, n=1): 913 """Returns buffered bytes without advancing the position.""" 914 if n > len(self._readbuffer) - self._offset: 915 chunk = self.read(n) 916 if len(chunk) > self._offset: 917 self._readbuffer = chunk + self._readbuffer[self._offset:] 918 self._offset = 0 919 else: 920 self._offset -= len(chunk) 921 922 # Return up to 512 bytes to reduce allocation overhead for tight loops. 923 return self._readbuffer[self._offset: self._offset + 512] 924 925 def readable(self): 926 if self.closed: 927 raise ValueError("I/O operation on closed file.") 928 return True 929 930 def read(self, n=-1): 931 """Read and return up to n bytes. 932 If the argument is omitted, None, or negative, data is read and returned until EOF is reached. 933 """ 934 if self.closed: 935 raise ValueError("read from closed file.") 936 if n is None or n < 0: 937 buf = self._readbuffer[self._offset:] 938 self._readbuffer = b'' 939 self._offset = 0 940 while not self._eof: 941 buf += self._read1(self.MAX_N) 942 return buf 943 944 end = n + self._offset 945 if end < len(self._readbuffer): 946 buf = self._readbuffer[self._offset:end] 947 self._offset = end 948 return buf 949 950 n = end - len(self._readbuffer) 951 buf = self._readbuffer[self._offset:] 952 self._readbuffer = b'' 953 self._offset = 0 954 while n > 0 and not self._eof: 955 data = self._read1(n) 956 if n < len(data): 957 self._readbuffer = data 958 self._offset = n 959 buf += data[:n] 960 break 961 buf += data 962 n -= len(data) 963 return buf 964 965 def _update_crc(self, newdata): 966 # Update the CRC using the given data. 967 if self._expected_crc is None: 968 # No need to compute the CRC if we don't have a reference value 969 return 970 self._running_crc = crc32(newdata, self._running_crc) 971 # Check the CRC if we're at the end of the file 972 if self._eof and self._running_crc != self._expected_crc: 973 raise BadZipFile("Bad CRC-32 for file %r" % self.name) 974 975 def read1(self, n): 976 """Read up to n bytes with at most one read() system call.""" 977 978 if n is None or n < 0: 979 buf = self._readbuffer[self._offset:] 980 self._readbuffer = b'' 981 self._offset = 0 982 while not self._eof: 983 data = self._read1(self.MAX_N) 984 if data: 985 buf += data 986 break 987 return buf 988 989 end = n + self._offset 990 if end < len(self._readbuffer): 991 buf = self._readbuffer[self._offset:end] 992 self._offset = end 993 return buf 994 995 n = end - len(self._readbuffer) 996 buf = self._readbuffer[self._offset:] 997 self._readbuffer = b'' 998 self._offset = 0 999 if n > 0: 1000 while not self._eof: 1001 data = self._read1(n) 1002 if n < len(data): 1003 self._readbuffer = data 1004 self._offset = n 1005 buf += data[:n] 1006 break 1007 if data: 1008 buf += data 1009 break 1010 return buf 1011 1012 def _read1(self, n): 1013 # Read up to n compressed bytes with at most one read() system call, 1014 # decrypt and decompress them. 1015 if self._eof or n <= 0: 1016 return b'' 1017 1018 # Read from file. 1019 if self._compress_type == ZIP_DEFLATED: 1020 ## Handle unconsumed data. 1021 data = self._decompressor.unconsumed_tail 1022 if n > len(data): 1023 data += self._read2(n - len(data)) 1024 else: 1025 data = self._read2(n) 1026 1027 if self._compress_type == ZIP_STORED: 1028 self._eof = self._compress_left <= 0 1029 elif self._compress_type == ZIP_DEFLATED: 1030 n = max(n, self.MIN_READ_SIZE) 1031 data = self._decompressor.decompress(data, n) 1032 self._eof = (self._decompressor.eof or 1033 self._compress_left <= 0 and 1034 not self._decompressor.unconsumed_tail) 1035 if self._eof: 1036 data += self._decompressor.flush() 1037 else: 1038 data = self._decompressor.decompress(data) 1039 self._eof = self._decompressor.eof or self._compress_left <= 0 1040 1041 data = data[:self._left] 1042 self._left -= len(data) 1043 if self._left <= 0: 1044 self._eof = True 1045 self._update_crc(data) 1046 return data 1047 1048 def _read2(self, n): 1049 if self._compress_left <= 0: 1050 return b'' 1051 1052 n = max(n, self.MIN_READ_SIZE) 1053 n = min(n, self._compress_left) 1054 1055 data = self._fileobj.read(n) 1056 self._compress_left -= len(data) 1057 if not data: 1058 raise EOFError 1059 1060 if self._decrypter is not None: 1061 data = self._decrypter(data) 1062 return data 1063 1064 def close(self): 1065 try: 1066 if self._close_fileobj: 1067 self._fileobj.close() 1068 finally: 1069 super().close() 1070 1071 def seekable(self): 1072 if self.closed: 1073 raise ValueError("I/O operation on closed file.") 1074 return self._seekable 1075 1076 def seek(self, offset, whence=0): 1077 if self.closed: 1078 raise ValueError("seek on closed file.") 1079 if not self._seekable: 1080 raise io.UnsupportedOperation("underlying stream is not seekable") 1081 curr_pos = self.tell() 1082 if whence == 0: # Seek from start of file 1083 new_pos = offset 1084 elif whence == 1: # Seek from current position 1085 new_pos = curr_pos + offset 1086 elif whence == 2: # Seek from EOF 1087 new_pos = self._orig_file_size + offset 1088 else: 1089 raise ValueError("whence must be os.SEEK_SET (0), " 1090 "os.SEEK_CUR (1), or os.SEEK_END (2)") 1091 1092 if new_pos > self._orig_file_size: 1093 new_pos = self._orig_file_size 1094 1095 if new_pos < 0: 1096 new_pos = 0 1097 1098 read_offset = new_pos - curr_pos 1099 buff_offset = read_offset + self._offset 1100 1101 if buff_offset >= 0 and buff_offset < len(self._readbuffer): 1102 # Just move the _offset index if the new position is in the _readbuffer 1103 self._offset = buff_offset 1104 read_offset = 0 1105 elif read_offset < 0: 1106 # Position is before the current position. Reset the ZipExtFile 1107 self._fileobj.seek(self._orig_compress_start) 1108 self._running_crc = self._orig_start_crc 1109 self._compress_left = self._orig_compress_size 1110 self._left = self._orig_file_size 1111 self._readbuffer = b'' 1112 self._offset = 0 1113 self._decompressor = _get_decompressor(self._compress_type) 1114 self._eof = False 1115 read_offset = new_pos 1116 if self._decrypter is not None: 1117 self._init_decrypter() 1118 1119 while read_offset > 0: 1120 read_len = min(self.MAX_SEEK_READ, read_offset) 1121 self.read(read_len) 1122 read_offset -= read_len 1123 1124 return self.tell() 1125 1126 def tell(self): 1127 if self.closed: 1128 raise ValueError("tell on closed file.") 1129 if not self._seekable: 1130 raise io.UnsupportedOperation("underlying stream is not seekable") 1131 filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset 1132 return filepos 1133 1134 1135class _ZipWriteFile(io.BufferedIOBase): 1136 def __init__(self, zf, zinfo, zip64): 1137 self._zinfo = zinfo 1138 self._zip64 = zip64 1139 self._zipfile = zf 1140 self._compressor = _get_compressor(zinfo.compress_type, 1141 zinfo._compresslevel) 1142 self._file_size = 0 1143 self._compress_size = 0 1144 self._crc = 0 1145 1146 @property 1147 def _fileobj(self): 1148 return self._zipfile.fp 1149 1150 def writable(self): 1151 return True 1152 1153 def write(self, data): 1154 if self.closed: 1155 raise ValueError('I/O operation on closed file.') 1156 1157 # Accept any data that supports the buffer protocol 1158 if isinstance(data, (bytes, bytearray)): 1159 nbytes = len(data) 1160 else: 1161 data = memoryview(data) 1162 nbytes = data.nbytes 1163 self._file_size += nbytes 1164 1165 self._crc = crc32(data, self._crc) 1166 if self._compressor: 1167 data = self._compressor.compress(data) 1168 self._compress_size += len(data) 1169 self._fileobj.write(data) 1170 return nbytes 1171 1172 def close(self): 1173 if self.closed: 1174 return 1175 try: 1176 super().close() 1177 # Flush any data from the compressor, and update header info 1178 if self._compressor: 1179 buf = self._compressor.flush() 1180 self._compress_size += len(buf) 1181 self._fileobj.write(buf) 1182 self._zinfo.compress_size = self._compress_size 1183 else: 1184 self._zinfo.compress_size = self._file_size 1185 self._zinfo.CRC = self._crc 1186 self._zinfo.file_size = self._file_size 1187 1188 if not self._zip64: 1189 if self._file_size > ZIP64_LIMIT: 1190 raise RuntimeError("File size too large, try using force_zip64") 1191 if self._compress_size > ZIP64_LIMIT: 1192 raise RuntimeError("Compressed size too large, try using force_zip64") 1193 1194 # Write updated header info 1195 if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR: 1196 # Write CRC and file sizes after the file data 1197 fmt = '<LLQQ' if self._zip64 else '<LLLL' 1198 self._fileobj.write(struct.pack(fmt, _DD_SIGNATURE, self._zinfo.CRC, 1199 self._zinfo.compress_size, self._zinfo.file_size)) 1200 self._zipfile.start_dir = self._fileobj.tell() 1201 else: 1202 # Seek backwards and write file header (which will now include 1203 # correct CRC and file sizes) 1204 1205 # Preserve current position in file 1206 self._zipfile.start_dir = self._fileobj.tell() 1207 self._fileobj.seek(self._zinfo.header_offset) 1208 self._fileobj.write(self._zinfo.FileHeader(self._zip64)) 1209 self._fileobj.seek(self._zipfile.start_dir) 1210 1211 # Successfully written: Add file to our caches 1212 self._zipfile.filelist.append(self._zinfo) 1213 self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo 1214 finally: 1215 self._zipfile._writing = False 1216 1217 1218 1219class ZipFile: 1220 """ Class with methods to open, read, write, close, list zip files. 1221 1222 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True, 1223 compresslevel=None) 1224 1225 file: Either the path to the file, or a file-like object. 1226 If it is a path, the file will be opened and closed by ZipFile. 1227 mode: The mode can be either read 'r', write 'w', exclusive create 'x', 1228 or append 'a'. 1229 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), 1230 ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). 1231 allowZip64: if True ZipFile will create files with ZIP64 extensions when 1232 needed, otherwise it will raise an exception when this would 1233 be necessary. 1234 compresslevel: None (default for the given compression type) or an integer 1235 specifying the level to pass to the compressor. 1236 When using ZIP_STORED or ZIP_LZMA this keyword has no effect. 1237 When using ZIP_DEFLATED integers 0 through 9 are accepted. 1238 When using ZIP_BZIP2 integers 1 through 9 are accepted. 1239 1240 """ 1241 1242 fp = None # Set here since __del__ checks it 1243 _windows_illegal_name_trans_table = None 1244 1245 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, 1246 compresslevel=None, *, strict_timestamps=True, metadata_encoding=None): 1247 """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', 1248 or append 'a'.""" 1249 if mode not in ('r', 'w', 'x', 'a'): 1250 raise ValueError("ZipFile requires mode 'r', 'w', 'x', or 'a'") 1251 1252 _check_compression(compression) 1253 1254 self._allowZip64 = allowZip64 1255 self._didModify = False 1256 self.debug = 0 # Level of printing: 0 through 3 1257 self.NameToInfo = {} # Find file info given name 1258 self.filelist = [] # List of ZipInfo instances for archive 1259 self.compression = compression # Method of compression 1260 self.compresslevel = compresslevel 1261 self.mode = mode 1262 self.pwd = None 1263 self._comment = b'' 1264 self._strict_timestamps = strict_timestamps 1265 self.metadata_encoding = metadata_encoding 1266 1267 # Check that we don't try to write with nonconforming codecs 1268 if self.metadata_encoding and mode != 'r': 1269 raise ValueError( 1270 "metadata_encoding is only supported for reading files") 1271 1272 # Check if we were passed a file-like object 1273 if isinstance(file, os.PathLike): 1274 file = os.fspath(file) 1275 if isinstance(file, str): 1276 # No, it's a filename 1277 self._filePassed = 0 1278 self.filename = file 1279 modeDict = {'r' : 'rb', 'w': 'w+b', 'x': 'x+b', 'a' : 'r+b', 1280 'r+b': 'w+b', 'w+b': 'wb', 'x+b': 'xb'} 1281 filemode = modeDict[mode] 1282 while True: 1283 try: 1284 self.fp = io.open(file, filemode) 1285 except OSError: 1286 if filemode in modeDict: 1287 filemode = modeDict[filemode] 1288 continue 1289 raise 1290 break 1291 else: 1292 self._filePassed = 1 1293 self.fp = file 1294 self.filename = getattr(file, 'name', None) 1295 self._fileRefCnt = 1 1296 self._lock = threading.RLock() 1297 self._seekable = True 1298 self._writing = False 1299 1300 try: 1301 if mode == 'r': 1302 self._RealGetContents() 1303 elif mode in ('w', 'x'): 1304 # set the modified flag so central directory gets written 1305 # even if no files are added to the archive 1306 self._didModify = True 1307 try: 1308 self.start_dir = self.fp.tell() 1309 except (AttributeError, OSError): 1310 self.fp = _Tellable(self.fp) 1311 self.start_dir = 0 1312 self._seekable = False 1313 else: 1314 # Some file-like objects can provide tell() but not seek() 1315 try: 1316 self.fp.seek(self.start_dir) 1317 except (AttributeError, OSError): 1318 self._seekable = False 1319 elif mode == 'a': 1320 try: 1321 # See if file is a zip file 1322 self._RealGetContents() 1323 # seek to start of directory and overwrite 1324 self.fp.seek(self.start_dir) 1325 except BadZipFile: 1326 # file is not a zip file, just append 1327 self.fp.seek(0, 2) 1328 1329 # set the modified flag so central directory gets written 1330 # even if no files are added to the archive 1331 self._didModify = True 1332 self.start_dir = self.fp.tell() 1333 else: 1334 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'") 1335 except: 1336 fp = self.fp 1337 self.fp = None 1338 self._fpclose(fp) 1339 raise 1340 1341 def __enter__(self): 1342 return self 1343 1344 def __exit__(self, type, value, traceback): 1345 self.close() 1346 1347 def __repr__(self): 1348 result = ['<%s.%s' % (self.__class__.__module__, 1349 self.__class__.__qualname__)] 1350 if self.fp is not None: 1351 if self._filePassed: 1352 result.append(' file=%r' % self.fp) 1353 elif self.filename is not None: 1354 result.append(' filename=%r' % self.filename) 1355 result.append(' mode=%r' % self.mode) 1356 else: 1357 result.append(' [closed]') 1358 result.append('>') 1359 return ''.join(result) 1360 1361 def _RealGetContents(self): 1362 """Read in the table of contents for the ZIP file.""" 1363 fp = self.fp 1364 try: 1365 endrec = _EndRecData(fp) 1366 except OSError: 1367 raise BadZipFile("File is not a zip file") 1368 if not endrec: 1369 raise BadZipFile("File is not a zip file") 1370 if self.debug > 1: 1371 print(endrec) 1372 size_cd = endrec[_ECD_SIZE] # bytes in central directory 1373 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 1374 self._comment = endrec[_ECD_COMMENT] # archive comment 1375 1376 # "concat" is zero, unless zip was concatenated to another file 1377 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd 1378 if endrec[_ECD_SIGNATURE] == stringEndArchive64: 1379 # If Zip64 extension structures are present, account for them 1380 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) 1381 1382 if self.debug > 2: 1383 inferred = concat + offset_cd 1384 print("given, inferred, offset", offset_cd, inferred, concat) 1385 # self.start_dir: Position of start of central directory 1386 self.start_dir = offset_cd + concat 1387 if self.start_dir < 0: 1388 raise BadZipFile("Bad offset for central directory") 1389 fp.seek(self.start_dir, 0) 1390 data = fp.read(size_cd) 1391 fp = io.BytesIO(data) 1392 total = 0 1393 while total < size_cd: 1394 centdir = fp.read(sizeCentralDir) 1395 if len(centdir) != sizeCentralDir: 1396 raise BadZipFile("Truncated central directory") 1397 centdir = struct.unpack(structCentralDir, centdir) 1398 if centdir[_CD_SIGNATURE] != stringCentralDir: 1399 raise BadZipFile("Bad magic number for central directory") 1400 if self.debug > 2: 1401 print(centdir) 1402 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) 1403 flags = centdir[_CD_FLAG_BITS] 1404 if flags & _MASK_UTF_FILENAME: 1405 # UTF-8 file names extension 1406 filename = filename.decode('utf-8') 1407 else: 1408 # Historical ZIP filename encoding 1409 filename = filename.decode(self.metadata_encoding or 'cp437') 1410 # Create ZipInfo instance to store file information 1411 x = ZipInfo(filename) 1412 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) 1413 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) 1414 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] 1415 (x.create_version, x.create_system, x.extract_version, x.reserved, 1416 x.flag_bits, x.compress_type, t, d, 1417 x.CRC, x.compress_size, x.file_size) = centdir[1:12] 1418 if x.extract_version > MAX_EXTRACT_VERSION: 1419 raise NotImplementedError("zip file version %.1f" % 1420 (x.extract_version / 10)) 1421 x.volume, x.internal_attr, x.external_attr = centdir[15:18] 1422 # Convert date/time code to (year, month, day, hour, min, sec) 1423 x._raw_time = t 1424 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, 1425 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) 1426 1427 x._decodeExtra() 1428 x.header_offset = x.header_offset + concat 1429 self.filelist.append(x) 1430 self.NameToInfo[x.filename] = x 1431 1432 # update total bytes read from central directory 1433 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] 1434 + centdir[_CD_EXTRA_FIELD_LENGTH] 1435 + centdir[_CD_COMMENT_LENGTH]) 1436 1437 if self.debug > 2: 1438 print("total", total) 1439 1440 1441 def namelist(self): 1442 """Return a list of file names in the archive.""" 1443 return [data.filename for data in self.filelist] 1444 1445 def infolist(self): 1446 """Return a list of class ZipInfo instances for files in the 1447 archive.""" 1448 return self.filelist 1449 1450 def printdir(self, file=None): 1451 """Print a table of contents for the zip file.""" 1452 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"), 1453 file=file) 1454 for zinfo in self.filelist: 1455 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] 1456 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size), 1457 file=file) 1458 1459 def testzip(self): 1460 """Read all the files and check the CRC.""" 1461 chunk_size = 2 ** 20 1462 for zinfo in self.filelist: 1463 try: 1464 # Read by chunks, to avoid an OverflowError or a 1465 # MemoryError with very large embedded files. 1466 with self.open(zinfo.filename, "r") as f: 1467 while f.read(chunk_size): # Check CRC-32 1468 pass 1469 except BadZipFile: 1470 return zinfo.filename 1471 1472 def getinfo(self, name): 1473 """Return the instance of ZipInfo given 'name'.""" 1474 info = self.NameToInfo.get(name) 1475 if info is None: 1476 raise KeyError( 1477 'There is no item named %r in the archive' % name) 1478 1479 return info 1480 1481 def setpassword(self, pwd): 1482 """Set default password for encrypted files.""" 1483 if pwd and not isinstance(pwd, bytes): 1484 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1485 if pwd: 1486 self.pwd = pwd 1487 else: 1488 self.pwd = None 1489 1490 @property 1491 def comment(self): 1492 """The comment text associated with the ZIP file.""" 1493 return self._comment 1494 1495 @comment.setter 1496 def comment(self, comment): 1497 if not isinstance(comment, bytes): 1498 raise TypeError("comment: expected bytes, got %s" % type(comment).__name__) 1499 # check for valid comment length 1500 if len(comment) > ZIP_MAX_COMMENT: 1501 import warnings 1502 warnings.warn('Archive comment is too long; truncating to %d bytes' 1503 % ZIP_MAX_COMMENT, stacklevel=2) 1504 comment = comment[:ZIP_MAX_COMMENT] 1505 self._comment = comment 1506 self._didModify = True 1507 1508 def read(self, name, pwd=None): 1509 """Return file bytes for name.""" 1510 with self.open(name, "r", pwd) as fp: 1511 return fp.read() 1512 1513 def open(self, name, mode="r", pwd=None, *, force_zip64=False): 1514 """Return file-like object for 'name'. 1515 1516 name is a string for the file name within the ZIP file, or a ZipInfo 1517 object. 1518 1519 mode should be 'r' to read a file already in the ZIP file, or 'w' to 1520 write to a file newly added to the archive. 1521 1522 pwd is the password to decrypt files (only used for reading). 1523 1524 When writing, if the file size is not known in advance but may exceed 1525 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large 1526 files. If the size is known in advance, it is best to pass a ZipInfo 1527 instance for name, with zinfo.file_size set. 1528 """ 1529 if mode not in {"r", "w"}: 1530 raise ValueError('open() requires mode "r" or "w"') 1531 if pwd and (mode == "w"): 1532 raise ValueError("pwd is only supported for reading files") 1533 if not self.fp: 1534 raise ValueError( 1535 "Attempt to use ZIP archive that was already closed") 1536 1537 # Make sure we have an info object 1538 if isinstance(name, ZipInfo): 1539 # 'name' is already an info object 1540 zinfo = name 1541 elif mode == 'w': 1542 zinfo = ZipInfo(name) 1543 zinfo.compress_type = self.compression 1544 zinfo._compresslevel = self.compresslevel 1545 else: 1546 # Get info object for name 1547 zinfo = self.getinfo(name) 1548 1549 if mode == 'w': 1550 return self._open_to_write(zinfo, force_zip64=force_zip64) 1551 1552 if self._writing: 1553 raise ValueError("Can't read from the ZIP file while there " 1554 "is an open writing handle on it. " 1555 "Close the writing handle before trying to read.") 1556 1557 # Open for reading: 1558 self._fileRefCnt += 1 1559 zef_file = _SharedFile(self.fp, zinfo.header_offset, 1560 self._fpclose, self._lock, lambda: self._writing) 1561 try: 1562 # Skip the file header: 1563 fheader = zef_file.read(sizeFileHeader) 1564 if len(fheader) != sizeFileHeader: 1565 raise BadZipFile("Truncated file header") 1566 fheader = struct.unpack(structFileHeader, fheader) 1567 if fheader[_FH_SIGNATURE] != stringFileHeader: 1568 raise BadZipFile("Bad magic number for file header") 1569 1570 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 1571 if fheader[_FH_EXTRA_FIELD_LENGTH]: 1572 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 1573 1574 if zinfo.flag_bits & _MASK_COMPRESSED_PATCH: 1575 # Zip 2.7: compressed patched data 1576 raise NotImplementedError("compressed patched data (flag bit 5)") 1577 1578 if zinfo.flag_bits & _MASK_STRONG_ENCRYPTION: 1579 # strong encryption 1580 raise NotImplementedError("strong encryption (flag bit 6)") 1581 1582 if fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] & _MASK_UTF_FILENAME: 1583 # UTF-8 filename 1584 fname_str = fname.decode("utf-8") 1585 else: 1586 fname_str = fname.decode(self.metadata_encoding or "cp437") 1587 1588 if fname_str != zinfo.orig_filename: 1589 raise BadZipFile( 1590 'File name in directory %r and header %r differ.' 1591 % (zinfo.orig_filename, fname)) 1592 1593 # check for encrypted flag & handle password 1594 is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED 1595 if is_encrypted: 1596 if not pwd: 1597 pwd = self.pwd 1598 if pwd and not isinstance(pwd, bytes): 1599 raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) 1600 if not pwd: 1601 raise RuntimeError("File %r is encrypted, password " 1602 "required for extraction" % name) 1603 else: 1604 pwd = None 1605 1606 return ZipExtFile(zef_file, mode, zinfo, pwd, True) 1607 except: 1608 zef_file.close() 1609 raise 1610 1611 def _open_to_write(self, zinfo, force_zip64=False): 1612 if force_zip64 and not self._allowZip64: 1613 raise ValueError( 1614 "force_zip64 is True, but allowZip64 was False when opening " 1615 "the ZIP file." 1616 ) 1617 if self._writing: 1618 raise ValueError("Can't write to the ZIP file while there is " 1619 "another write handle open on it. " 1620 "Close the first handle before opening another.") 1621 1622 # Size and CRC are overwritten with correct data after processing the file 1623 zinfo.compress_size = 0 1624 zinfo.CRC = 0 1625 1626 zinfo.flag_bits = 0x00 1627 if zinfo.compress_type == ZIP_LZMA: 1628 # Compressed data includes an end-of-stream (EOS) marker 1629 zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 1630 if not self._seekable: 1631 zinfo.flag_bits |= _MASK_USE_DATA_DESCRIPTOR 1632 1633 if not zinfo.external_attr: 1634 zinfo.external_attr = 0o600 << 16 # permissions: ?rw------- 1635 1636 # Compressed size can be larger than uncompressed size 1637 zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT) 1638 if not self._allowZip64 and zip64: 1639 raise LargeZipFile("Filesize would require ZIP64 extensions") 1640 1641 if self._seekable: 1642 self.fp.seek(self.start_dir) 1643 zinfo.header_offset = self.fp.tell() 1644 1645 self._writecheck(zinfo) 1646 self._didModify = True 1647 1648 self.fp.write(zinfo.FileHeader(zip64)) 1649 1650 self._writing = True 1651 return _ZipWriteFile(self, zinfo, zip64) 1652 1653 def extract(self, member, path=None, pwd=None): 1654 """Extract a member from the archive to the current working directory, 1655 using its full name. Its file information is extracted as accurately 1656 as possible. `member' may be a filename or a ZipInfo object. You can 1657 specify a different directory using `path'. 1658 """ 1659 if path is None: 1660 path = os.getcwd() 1661 else: 1662 path = os.fspath(path) 1663 1664 return self._extract_member(member, path, pwd) 1665 1666 def extractall(self, path=None, members=None, pwd=None): 1667 """Extract all members from the archive to the current working 1668 directory. `path' specifies a different directory to extract to. 1669 `members' is optional and must be a subset of the list returned 1670 by namelist(). 1671 """ 1672 if members is None: 1673 members = self.namelist() 1674 1675 if path is None: 1676 path = os.getcwd() 1677 else: 1678 path = os.fspath(path) 1679 1680 for zipinfo in members: 1681 self._extract_member(zipinfo, path, pwd) 1682 1683 @classmethod 1684 def _sanitize_windows_name(cls, arcname, pathsep): 1685 """Replace bad characters and remove trailing dots from parts.""" 1686 table = cls._windows_illegal_name_trans_table 1687 if not table: 1688 illegal = ':<>|"?*' 1689 table = str.maketrans(illegal, '_' * len(illegal)) 1690 cls._windows_illegal_name_trans_table = table 1691 arcname = arcname.translate(table) 1692 # remove trailing dots 1693 arcname = (x.rstrip('.') for x in arcname.split(pathsep)) 1694 # rejoin, removing empty parts. 1695 arcname = pathsep.join(x for x in arcname if x) 1696 return arcname 1697 1698 def _extract_member(self, member, targetpath, pwd): 1699 """Extract the ZipInfo object 'member' to a physical 1700 file on the path targetpath. 1701 """ 1702 if not isinstance(member, ZipInfo): 1703 member = self.getinfo(member) 1704 1705 # build the destination pathname, replacing 1706 # forward slashes to platform specific separators. 1707 arcname = member.filename.replace('/', os.path.sep) 1708 1709 if os.path.altsep: 1710 arcname = arcname.replace(os.path.altsep, os.path.sep) 1711 # interpret absolute pathname as relative, remove drive letter or 1712 # UNC path, redundant separators, "." and ".." components. 1713 arcname = os.path.splitdrive(arcname)[1] 1714 invalid_path_parts = ('', os.path.curdir, os.path.pardir) 1715 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1716 if x not in invalid_path_parts) 1717 if os.path.sep == '\\': 1718 # filter illegal characters on Windows 1719 arcname = self._sanitize_windows_name(arcname, os.path.sep) 1720 1721 targetpath = os.path.join(targetpath, arcname) 1722 targetpath = os.path.normpath(targetpath) 1723 1724 # Create all upper directories if necessary. 1725 upperdirs = os.path.dirname(targetpath) 1726 if upperdirs and not os.path.exists(upperdirs): 1727 os.makedirs(upperdirs) 1728 1729 if member.is_dir(): 1730 if not os.path.isdir(targetpath): 1731 os.mkdir(targetpath) 1732 return targetpath 1733 1734 with self.open(member, pwd=pwd) as source, \ 1735 open(targetpath, "wb") as target: 1736 shutil.copyfileobj(source, target) 1737 1738 return targetpath 1739 1740 def _writecheck(self, zinfo): 1741 """Check for errors before writing a file to the archive.""" 1742 if zinfo.filename in self.NameToInfo: 1743 import warnings 1744 warnings.warn('Duplicate name: %r' % zinfo.filename, stacklevel=3) 1745 if self.mode not in ('w', 'x', 'a'): 1746 raise ValueError("write() requires mode 'w', 'x', or 'a'") 1747 if not self.fp: 1748 raise ValueError( 1749 "Attempt to write ZIP archive that was already closed") 1750 _check_compression(zinfo.compress_type) 1751 if not self._allowZip64: 1752 requires_zip64 = None 1753 if len(self.filelist) >= ZIP_FILECOUNT_LIMIT: 1754 requires_zip64 = "Files count" 1755 elif zinfo.file_size > ZIP64_LIMIT: 1756 requires_zip64 = "Filesize" 1757 elif zinfo.header_offset > ZIP64_LIMIT: 1758 requires_zip64 = "Zipfile size" 1759 if requires_zip64: 1760 raise LargeZipFile(requires_zip64 + 1761 " would require ZIP64 extensions") 1762 1763 def write(self, filename, arcname=None, 1764 compress_type=None, compresslevel=None): 1765 """Put the bytes from filename into the archive under the name 1766 arcname.""" 1767 if not self.fp: 1768 raise ValueError( 1769 "Attempt to write to ZIP archive that was already closed") 1770 if self._writing: 1771 raise ValueError( 1772 "Can't write to ZIP archive while an open writing handle exists" 1773 ) 1774 1775 zinfo = ZipInfo.from_file(filename, arcname, 1776 strict_timestamps=self._strict_timestamps) 1777 1778 if zinfo.is_dir(): 1779 zinfo.compress_size = 0 1780 zinfo.CRC = 0 1781 self.mkdir(zinfo) 1782 else: 1783 if compress_type is not None: 1784 zinfo.compress_type = compress_type 1785 else: 1786 zinfo.compress_type = self.compression 1787 1788 if compresslevel is not None: 1789 zinfo._compresslevel = compresslevel 1790 else: 1791 zinfo._compresslevel = self.compresslevel 1792 1793 with open(filename, "rb") as src, self.open(zinfo, 'w') as dest: 1794 shutil.copyfileobj(src, dest, 1024*8) 1795 1796 def writestr(self, zinfo_or_arcname, data, 1797 compress_type=None, compresslevel=None): 1798 """Write a file into the archive. The contents is 'data', which 1799 may be either a 'str' or a 'bytes' instance; if it is a 'str', 1800 it is encoded as UTF-8 first. 1801 'zinfo_or_arcname' is either a ZipInfo instance or 1802 the name of the file in the archive.""" 1803 if isinstance(data, str): 1804 data = data.encode("utf-8") 1805 if not isinstance(zinfo_or_arcname, ZipInfo): 1806 zinfo = ZipInfo(filename=zinfo_or_arcname, 1807 date_time=time.localtime(time.time())[:6]) 1808 zinfo.compress_type = self.compression 1809 zinfo._compresslevel = self.compresslevel 1810 if zinfo.filename[-1] == '/': 1811 zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x 1812 zinfo.external_attr |= 0x10 # MS-DOS directory flag 1813 else: 1814 zinfo.external_attr = 0o600 << 16 # ?rw------- 1815 else: 1816 zinfo = zinfo_or_arcname 1817 1818 if not self.fp: 1819 raise ValueError( 1820 "Attempt to write to ZIP archive that was already closed") 1821 if self._writing: 1822 raise ValueError( 1823 "Can't write to ZIP archive while an open writing handle exists." 1824 ) 1825 1826 if compress_type is not None: 1827 zinfo.compress_type = compress_type 1828 1829 if compresslevel is not None: 1830 zinfo._compresslevel = compresslevel 1831 1832 zinfo.file_size = len(data) # Uncompressed size 1833 with self._lock: 1834 with self.open(zinfo, mode='w') as dest: 1835 dest.write(data) 1836 1837 def mkdir(self, zinfo_or_directory_name, mode=511): 1838 """Creates a directory inside the zip archive.""" 1839 if isinstance(zinfo_or_directory_name, ZipInfo): 1840 zinfo = zinfo_or_directory_name 1841 if not zinfo.is_dir(): 1842 raise ValueError("The given ZipInfo does not describe a directory") 1843 elif isinstance(zinfo_or_directory_name, str): 1844 directory_name = zinfo_or_directory_name 1845 if not directory_name.endswith("/"): 1846 directory_name += "/" 1847 zinfo = ZipInfo(directory_name) 1848 zinfo.compress_size = 0 1849 zinfo.CRC = 0 1850 zinfo.external_attr = ((0o40000 | mode) & 0xFFFF) << 16 1851 zinfo.file_size = 0 1852 zinfo.external_attr |= 0x10 1853 else: 1854 raise TypeError("Expected type str or ZipInfo") 1855 1856 with self._lock: 1857 if self._seekable: 1858 self.fp.seek(self.start_dir) 1859 zinfo.header_offset = self.fp.tell() # Start of header bytes 1860 if zinfo.compress_type == ZIP_LZMA: 1861 # Compressed data includes an end-of-stream (EOS) marker 1862 zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 1863 1864 self._writecheck(zinfo) 1865 self._didModify = True 1866 1867 self.filelist.append(zinfo) 1868 self.NameToInfo[zinfo.filename] = zinfo 1869 self.fp.write(zinfo.FileHeader(False)) 1870 self.start_dir = self.fp.tell() 1871 1872 def __del__(self): 1873 """Call the "close()" method in case the user forgot.""" 1874 self.close() 1875 1876 def close(self): 1877 """Close the file, and for mode 'w', 'x' and 'a' write the ending 1878 records.""" 1879 if self.fp is None: 1880 return 1881 1882 if self._writing: 1883 raise ValueError("Can't close the ZIP file while there is " 1884 "an open writing handle on it. " 1885 "Close the writing handle before closing the zip.") 1886 1887 try: 1888 if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records 1889 with self._lock: 1890 if self._seekable: 1891 self.fp.seek(self.start_dir) 1892 self._write_end_record() 1893 finally: 1894 fp = self.fp 1895 self.fp = None 1896 self._fpclose(fp) 1897 1898 def _write_end_record(self): 1899 for zinfo in self.filelist: # write central directory 1900 dt = zinfo.date_time 1901 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1902 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1903 extra = [] 1904 if zinfo.file_size > ZIP64_LIMIT \ 1905 or zinfo.compress_size > ZIP64_LIMIT: 1906 extra.append(zinfo.file_size) 1907 extra.append(zinfo.compress_size) 1908 file_size = 0xffffffff 1909 compress_size = 0xffffffff 1910 else: 1911 file_size = zinfo.file_size 1912 compress_size = zinfo.compress_size 1913 1914 if zinfo.header_offset > ZIP64_LIMIT: 1915 extra.append(zinfo.header_offset) 1916 header_offset = 0xffffffff 1917 else: 1918 header_offset = zinfo.header_offset 1919 1920 extra_data = zinfo.extra 1921 min_version = 0 1922 if extra: 1923 # Append a ZIP64 field to the extra's 1924 extra_data = _strip_extra(extra_data, (1,)) 1925 extra_data = struct.pack( 1926 '<HH' + 'Q'*len(extra), 1927 1, 8*len(extra), *extra) + extra_data 1928 1929 min_version = ZIP64_VERSION 1930 1931 if zinfo.compress_type == ZIP_BZIP2: 1932 min_version = max(BZIP2_VERSION, min_version) 1933 elif zinfo.compress_type == ZIP_LZMA: 1934 min_version = max(LZMA_VERSION, min_version) 1935 1936 extract_version = max(min_version, zinfo.extract_version) 1937 create_version = max(min_version, zinfo.create_version) 1938 filename, flag_bits = zinfo._encodeFilenameFlags() 1939 centdir = struct.pack(structCentralDir, 1940 stringCentralDir, create_version, 1941 zinfo.create_system, extract_version, zinfo.reserved, 1942 flag_bits, zinfo.compress_type, dostime, dosdate, 1943 zinfo.CRC, compress_size, file_size, 1944 len(filename), len(extra_data), len(zinfo.comment), 1945 0, zinfo.internal_attr, zinfo.external_attr, 1946 header_offset) 1947 self.fp.write(centdir) 1948 self.fp.write(filename) 1949 self.fp.write(extra_data) 1950 self.fp.write(zinfo.comment) 1951 1952 pos2 = self.fp.tell() 1953 # Write end-of-zip-archive record 1954 centDirCount = len(self.filelist) 1955 centDirSize = pos2 - self.start_dir 1956 centDirOffset = self.start_dir 1957 requires_zip64 = None 1958 if centDirCount > ZIP_FILECOUNT_LIMIT: 1959 requires_zip64 = "Files count" 1960 elif centDirOffset > ZIP64_LIMIT: 1961 requires_zip64 = "Central directory offset" 1962 elif centDirSize > ZIP64_LIMIT: 1963 requires_zip64 = "Central directory size" 1964 if requires_zip64: 1965 # Need to write the ZIP64 end-of-archive records 1966 if not self._allowZip64: 1967 raise LargeZipFile(requires_zip64 + 1968 " would require ZIP64 extensions") 1969 zip64endrec = struct.pack( 1970 structEndArchive64, stringEndArchive64, 1971 44, 45, 45, 0, 0, centDirCount, centDirCount, 1972 centDirSize, centDirOffset) 1973 self.fp.write(zip64endrec) 1974 1975 zip64locrec = struct.pack( 1976 structEndArchive64Locator, 1977 stringEndArchive64Locator, 0, pos2, 1) 1978 self.fp.write(zip64locrec) 1979 centDirCount = min(centDirCount, 0xFFFF) 1980 centDirSize = min(centDirSize, 0xFFFFFFFF) 1981 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1982 1983 endrec = struct.pack(structEndArchive, stringEndArchive, 1984 0, 0, centDirCount, centDirCount, 1985 centDirSize, centDirOffset, len(self._comment)) 1986 self.fp.write(endrec) 1987 self.fp.write(self._comment) 1988 if self.mode == "a": 1989 self.fp.truncate() 1990 self.fp.flush() 1991 1992 def _fpclose(self, fp): 1993 assert self._fileRefCnt > 0 1994 self._fileRefCnt -= 1 1995 if not self._fileRefCnt and not self._filePassed: 1996 fp.close() 1997 1998 1999class PyZipFile(ZipFile): 2000 """Class to create ZIP archives with Python library files and packages.""" 2001 2002 def __init__(self, file, mode="r", compression=ZIP_STORED, 2003 allowZip64=True, optimize=-1): 2004 ZipFile.__init__(self, file, mode=mode, compression=compression, 2005 allowZip64=allowZip64) 2006 self._optimize = optimize 2007 2008 def writepy(self, pathname, basename="", filterfunc=None): 2009 """Add all files from "pathname" to the ZIP archive. 2010 2011 If pathname is a package directory, search the directory and 2012 all package subdirectories recursively for all *.py and enter 2013 the modules into the archive. If pathname is a plain 2014 directory, listdir *.py and enter all modules. Else, pathname 2015 must be a Python *.py file and the module will be put into the 2016 archive. Added modules are always module.pyc. 2017 This method will compile the module.py into module.pyc if 2018 necessary. 2019 If filterfunc(pathname) is given, it is called with every argument. 2020 When it is False, the file or directory is skipped. 2021 """ 2022 pathname = os.fspath(pathname) 2023 if filterfunc and not filterfunc(pathname): 2024 if self.debug: 2025 label = 'path' if os.path.isdir(pathname) else 'file' 2026 print('%s %r skipped by filterfunc' % (label, pathname)) 2027 return 2028 dir, name = os.path.split(pathname) 2029 if os.path.isdir(pathname): 2030 initname = os.path.join(pathname, "__init__.py") 2031 if os.path.isfile(initname): 2032 # This is a package directory, add it 2033 if basename: 2034 basename = "%s/%s" % (basename, name) 2035 else: 2036 basename = name 2037 if self.debug: 2038 print("Adding package in", pathname, "as", basename) 2039 fname, arcname = self._get_codename(initname[0:-3], basename) 2040 if self.debug: 2041 print("Adding", arcname) 2042 self.write(fname, arcname) 2043 dirlist = sorted(os.listdir(pathname)) 2044 dirlist.remove("__init__.py") 2045 # Add all *.py files and package subdirectories 2046 for filename in dirlist: 2047 path = os.path.join(pathname, filename) 2048 root, ext = os.path.splitext(filename) 2049 if os.path.isdir(path): 2050 if os.path.isfile(os.path.join(path, "__init__.py")): 2051 # This is a package directory, add it 2052 self.writepy(path, basename, 2053 filterfunc=filterfunc) # Recursive call 2054 elif ext == ".py": 2055 if filterfunc and not filterfunc(path): 2056 if self.debug: 2057 print('file %r skipped by filterfunc' % path) 2058 continue 2059 fname, arcname = self._get_codename(path[0:-3], 2060 basename) 2061 if self.debug: 2062 print("Adding", arcname) 2063 self.write(fname, arcname) 2064 else: 2065 # This is NOT a package directory, add its files at top level 2066 if self.debug: 2067 print("Adding files from directory", pathname) 2068 for filename in sorted(os.listdir(pathname)): 2069 path = os.path.join(pathname, filename) 2070 root, ext = os.path.splitext(filename) 2071 if ext == ".py": 2072 if filterfunc and not filterfunc(path): 2073 if self.debug: 2074 print('file %r skipped by filterfunc' % path) 2075 continue 2076 fname, arcname = self._get_codename(path[0:-3], 2077 basename) 2078 if self.debug: 2079 print("Adding", arcname) 2080 self.write(fname, arcname) 2081 else: 2082 if pathname[-3:] != ".py": 2083 raise RuntimeError( 2084 'Files added with writepy() must end with ".py"') 2085 fname, arcname = self._get_codename(pathname[0:-3], basename) 2086 if self.debug: 2087 print("Adding file", arcname) 2088 self.write(fname, arcname) 2089 2090 def _get_codename(self, pathname, basename): 2091 """Return (filename, archivename) for the path. 2092 2093 Given a module name path, return the correct file path and 2094 archive name, compiling if necessary. For example, given 2095 /python/lib/string, return (/python/lib/string.pyc, string). 2096 """ 2097 def _compile(file, optimize=-1): 2098 import py_compile 2099 if self.debug: 2100 print("Compiling", file) 2101 try: 2102 py_compile.compile(file, doraise=True, optimize=optimize) 2103 except py_compile.PyCompileError as err: 2104 print(err.msg) 2105 return False 2106 return True 2107 2108 file_py = pathname + ".py" 2109 file_pyc = pathname + ".pyc" 2110 pycache_opt0 = importlib.util.cache_from_source(file_py, optimization='') 2111 pycache_opt1 = importlib.util.cache_from_source(file_py, optimization=1) 2112 pycache_opt2 = importlib.util.cache_from_source(file_py, optimization=2) 2113 if self._optimize == -1: 2114 # legacy mode: use whatever file is present 2115 if (os.path.isfile(file_pyc) and 2116 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime): 2117 # Use .pyc file. 2118 arcname = fname = file_pyc 2119 elif (os.path.isfile(pycache_opt0) and 2120 os.stat(pycache_opt0).st_mtime >= os.stat(file_py).st_mtime): 2121 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2122 # file name in the archive. 2123 fname = pycache_opt0 2124 arcname = file_pyc 2125 elif (os.path.isfile(pycache_opt1) and 2126 os.stat(pycache_opt1).st_mtime >= os.stat(file_py).st_mtime): 2127 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2128 # file name in the archive. 2129 fname = pycache_opt1 2130 arcname = file_pyc 2131 elif (os.path.isfile(pycache_opt2) and 2132 os.stat(pycache_opt2).st_mtime >= os.stat(file_py).st_mtime): 2133 # Use the __pycache__/*.pyc file, but write it to the legacy pyc 2134 # file name in the archive. 2135 fname = pycache_opt2 2136 arcname = file_pyc 2137 else: 2138 # Compile py into PEP 3147 pyc file. 2139 if _compile(file_py): 2140 if sys.flags.optimize == 0: 2141 fname = pycache_opt0 2142 elif sys.flags.optimize == 1: 2143 fname = pycache_opt1 2144 else: 2145 fname = pycache_opt2 2146 arcname = file_pyc 2147 else: 2148 fname = arcname = file_py 2149 else: 2150 # new mode: use given optimization level 2151 if self._optimize == 0: 2152 fname = pycache_opt0 2153 arcname = file_pyc 2154 else: 2155 arcname = file_pyc 2156 if self._optimize == 1: 2157 fname = pycache_opt1 2158 elif self._optimize == 2: 2159 fname = pycache_opt2 2160 else: 2161 msg = "invalid value for 'optimize': {!r}".format(self._optimize) 2162 raise ValueError(msg) 2163 if not (os.path.isfile(fname) and 2164 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime): 2165 if not _compile(file_py, optimize=self._optimize): 2166 fname = arcname = file_py 2167 archivename = os.path.split(arcname)[1] 2168 if basename: 2169 archivename = "%s/%s" % (basename, archivename) 2170 return (fname, archivename) 2171 2172 2173def _parents(path): 2174 """ 2175 Given a path with elements separated by 2176 posixpath.sep, generate all parents of that path. 2177 2178 >>> list(_parents('b/d')) 2179 ['b'] 2180 >>> list(_parents('/b/d/')) 2181 ['/b'] 2182 >>> list(_parents('b/d/f/')) 2183 ['b/d', 'b'] 2184 >>> list(_parents('b')) 2185 [] 2186 >>> list(_parents('')) 2187 [] 2188 """ 2189 return itertools.islice(_ancestry(path), 1, None) 2190 2191 2192def _ancestry(path): 2193 """ 2194 Given a path with elements separated by 2195 posixpath.sep, generate all elements of that path 2196 2197 >>> list(_ancestry('b/d')) 2198 ['b/d', 'b'] 2199 >>> list(_ancestry('/b/d/')) 2200 ['/b/d', '/b'] 2201 >>> list(_ancestry('b/d/f/')) 2202 ['b/d/f', 'b/d', 'b'] 2203 >>> list(_ancestry('b')) 2204 ['b'] 2205 >>> list(_ancestry('')) 2206 [] 2207 """ 2208 path = path.rstrip(posixpath.sep) 2209 while path and path != posixpath.sep: 2210 yield path 2211 path, tail = posixpath.split(path) 2212 2213 2214_dedupe = dict.fromkeys 2215"""Deduplicate an iterable in original order""" 2216 2217 2218def _difference(minuend, subtrahend): 2219 """ 2220 Return items in minuend not in subtrahend, retaining order 2221 with O(1) lookup. 2222 """ 2223 return itertools.filterfalse(set(subtrahend).__contains__, minuend) 2224 2225 2226class CompleteDirs(ZipFile): 2227 """ 2228 A ZipFile subclass that ensures that implied directories 2229 are always included in the namelist. 2230 """ 2231 2232 @staticmethod 2233 def _implied_dirs(names): 2234 parents = itertools.chain.from_iterable(map(_parents, names)) 2235 as_dirs = (p + posixpath.sep for p in parents) 2236 return _dedupe(_difference(as_dirs, names)) 2237 2238 def namelist(self): 2239 names = super(CompleteDirs, self).namelist() 2240 return names + list(self._implied_dirs(names)) 2241 2242 def _name_set(self): 2243 return set(self.namelist()) 2244 2245 def resolve_dir(self, name): 2246 """ 2247 If the name represents a directory, return that name 2248 as a directory (with the trailing slash). 2249 """ 2250 names = self._name_set() 2251 dirname = name + '/' 2252 dir_match = name not in names and dirname in names 2253 return dirname if dir_match else name 2254 2255 def getinfo(self, name): 2256 """ 2257 Supplement getinfo for implied dirs. 2258 """ 2259 try: 2260 return super().getinfo(name) 2261 except KeyError: 2262 if not name.endswith('/') or name not in self._name_set(): 2263 raise 2264 return ZipInfo(filename=name) 2265 2266 @classmethod 2267 def make(cls, source): 2268 """ 2269 Given a source (filename or zipfile), return an 2270 appropriate CompleteDirs subclass. 2271 """ 2272 if isinstance(source, CompleteDirs): 2273 return source 2274 2275 if not isinstance(source, ZipFile): 2276 return cls(source) 2277 2278 # Only allow for FastLookup when supplied zipfile is read-only 2279 if 'r' not in source.mode: 2280 cls = CompleteDirs 2281 2282 source.__class__ = cls 2283 return source 2284 2285 2286class FastLookup(CompleteDirs): 2287 """ 2288 ZipFile subclass to ensure implicit 2289 dirs exist and are resolved rapidly. 2290 """ 2291 2292 def namelist(self): 2293 with contextlib.suppress(AttributeError): 2294 return self.__names 2295 self.__names = super(FastLookup, self).namelist() 2296 return self.__names 2297 2298 def _name_set(self): 2299 with contextlib.suppress(AttributeError): 2300 return self.__lookup 2301 self.__lookup = super(FastLookup, self)._name_set() 2302 return self.__lookup 2303 2304 2305def _extract_text_encoding(encoding=None, *args, **kwargs): 2306 # stacklevel=3 so that the caller of the caller see any warning. 2307 return io.text_encoding(encoding, 3), args, kwargs 2308 2309 2310class Path: 2311 """ 2312 A pathlib-compatible interface for zip files. 2313 2314 Consider a zip file with this structure:: 2315 2316 . 2317 ├── a.txt 2318 └── b 2319 ├── c.txt 2320 └── d 2321 └── e.txt 2322 2323 >>> data = io.BytesIO() 2324 >>> zf = ZipFile(data, 'w') 2325 >>> zf.writestr('a.txt', 'content of a') 2326 >>> zf.writestr('b/c.txt', 'content of c') 2327 >>> zf.writestr('b/d/e.txt', 'content of e') 2328 >>> zf.filename = 'mem/abcde.zip' 2329 2330 Path accepts the zipfile object itself or a filename 2331 2332 >>> root = Path(zf) 2333 2334 From there, several path operations are available. 2335 2336 Directory iteration (including the zip file itself): 2337 2338 >>> a, b = root.iterdir() 2339 >>> a 2340 Path('mem/abcde.zip', 'a.txt') 2341 >>> b 2342 Path('mem/abcde.zip', 'b/') 2343 2344 name property: 2345 2346 >>> b.name 2347 'b' 2348 2349 join with divide operator: 2350 2351 >>> c = b / 'c.txt' 2352 >>> c 2353 Path('mem/abcde.zip', 'b/c.txt') 2354 >>> c.name 2355 'c.txt' 2356 2357 Read text: 2358 2359 >>> c.read_text() 2360 'content of c' 2361 2362 existence: 2363 2364 >>> c.exists() 2365 True 2366 >>> (b / 'missing.txt').exists() 2367 False 2368 2369 Coercion to string: 2370 2371 >>> import os 2372 >>> str(c).replace(os.sep, posixpath.sep) 2373 'mem/abcde.zip/b/c.txt' 2374 2375 At the root, ``name``, ``filename``, and ``parent`` 2376 resolve to the zipfile. Note these attributes are not 2377 valid and will raise a ``ValueError`` if the zipfile 2378 has no filename. 2379 2380 >>> root.name 2381 'abcde.zip' 2382 >>> str(root.filename).replace(os.sep, posixpath.sep) 2383 'mem/abcde.zip' 2384 >>> str(root.parent) 2385 'mem' 2386 """ 2387 2388 __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" 2389 2390 def __init__(self, root, at=""): 2391 """ 2392 Construct a Path from a ZipFile or filename. 2393 2394 Note: When the source is an existing ZipFile object, 2395 its type (__class__) will be mutated to a 2396 specialized type. If the caller wishes to retain the 2397 original type, the caller should either create a 2398 separate ZipFile object or pass a filename. 2399 """ 2400 self.root = FastLookup.make(root) 2401 self.at = at 2402 2403 def open(self, mode='r', *args, pwd=None, **kwargs): 2404 """ 2405 Open this entry as text or binary following the semantics 2406 of ``pathlib.Path.open()`` by passing arguments through 2407 to io.TextIOWrapper(). 2408 """ 2409 if self.is_dir(): 2410 raise IsADirectoryError(self) 2411 zip_mode = mode[0] 2412 if not self.exists() and zip_mode == 'r': 2413 raise FileNotFoundError(self) 2414 stream = self.root.open(self.at, zip_mode, pwd=pwd) 2415 if 'b' in mode: 2416 if args or kwargs: 2417 raise ValueError("encoding args invalid for binary operation") 2418 return stream 2419 # Text mode: 2420 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 2421 return io.TextIOWrapper(stream, encoding, *args, **kwargs) 2422 2423 @property 2424 def name(self): 2425 return pathlib.Path(self.at).name or self.filename.name 2426 2427 @property 2428 def suffix(self): 2429 return pathlib.Path(self.at).suffix or self.filename.suffix 2430 2431 @property 2432 def suffixes(self): 2433 return pathlib.Path(self.at).suffixes or self.filename.suffixes 2434 2435 @property 2436 def stem(self): 2437 return pathlib.Path(self.at).stem or self.filename.stem 2438 2439 @property 2440 def filename(self): 2441 return pathlib.Path(self.root.filename).joinpath(self.at) 2442 2443 def read_text(self, *args, **kwargs): 2444 encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) 2445 with self.open('r', encoding, *args, **kwargs) as strm: 2446 return strm.read() 2447 2448 def read_bytes(self): 2449 with self.open('rb') as strm: 2450 return strm.read() 2451 2452 def _is_child(self, path): 2453 return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") 2454 2455 def _next(self, at): 2456 return self.__class__(self.root, at) 2457 2458 def is_dir(self): 2459 return not self.at or self.at.endswith("/") 2460 2461 def is_file(self): 2462 return self.exists() and not self.is_dir() 2463 2464 def exists(self): 2465 return self.at in self.root._name_set() 2466 2467 def iterdir(self): 2468 if not self.is_dir(): 2469 raise ValueError("Can't listdir a file") 2470 subs = map(self._next, self.root.namelist()) 2471 return filter(self._is_child, subs) 2472 2473 def __str__(self): 2474 return posixpath.join(self.root.filename, self.at) 2475 2476 def __repr__(self): 2477 return self.__repr.format(self=self) 2478 2479 def joinpath(self, *other): 2480 next = posixpath.join(self.at, *other) 2481 return self._next(self.root.resolve_dir(next)) 2482 2483 __truediv__ = joinpath 2484 2485 @property 2486 def parent(self): 2487 if not self.at: 2488 return self.filename.parent 2489 parent_at = posixpath.dirname(self.at.rstrip('/')) 2490 if parent_at: 2491 parent_at += '/' 2492 return self._next(parent_at) 2493 2494 2495def main(args=None): 2496 import argparse 2497 2498 description = 'A simple command-line interface for zipfile module.' 2499 parser = argparse.ArgumentParser(description=description) 2500 group = parser.add_mutually_exclusive_group(required=True) 2501 group.add_argument('-l', '--list', metavar='<zipfile>', 2502 help='Show listing of a zipfile') 2503 group.add_argument('-e', '--extract', nargs=2, 2504 metavar=('<zipfile>', '<output_dir>'), 2505 help='Extract zipfile into target dir') 2506 group.add_argument('-c', '--create', nargs='+', 2507 metavar=('<name>', '<file>'), 2508 help='Create zipfile from sources') 2509 group.add_argument('-t', '--test', metavar='<zipfile>', 2510 help='Test if a zipfile is valid') 2511 parser.add_argument('--metadata-encoding', metavar='<encoding>', 2512 help='Specify encoding of member names for -l, -e and -t') 2513 args = parser.parse_args(args) 2514 2515 encoding = args.metadata_encoding 2516 2517 if args.test is not None: 2518 src = args.test 2519 with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 2520 badfile = zf.testzip() 2521 if badfile: 2522 print("The following enclosed file is corrupted: {!r}".format(badfile)) 2523 print("Done testing") 2524 2525 elif args.list is not None: 2526 src = args.list 2527 with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 2528 zf.printdir() 2529 2530 elif args.extract is not None: 2531 src, curdir = args.extract 2532 with ZipFile(src, 'r', metadata_encoding=encoding) as zf: 2533 zf.extractall(curdir) 2534 2535 elif args.create is not None: 2536 if encoding: 2537 print("Non-conforming encodings not supported with -c.", 2538 file=sys.stderr) 2539 sys.exit(1) 2540 2541 zip_name = args.create.pop(0) 2542 files = args.create 2543 2544 def addToZip(zf, path, zippath): 2545 if os.path.isfile(path): 2546 zf.write(path, zippath, ZIP_DEFLATED) 2547 elif os.path.isdir(path): 2548 if zippath: 2549 zf.write(path, zippath) 2550 for nm in sorted(os.listdir(path)): 2551 addToZip(zf, 2552 os.path.join(path, nm), os.path.join(zippath, nm)) 2553 # else: ignore 2554 2555 with ZipFile(zip_name, 'w') as zf: 2556 for path in files: 2557 zippath = os.path.basename(path) 2558 if not zippath: 2559 zippath = os.path.basename(os.path.dirname(path)) 2560 if zippath in ('', os.curdir, os.pardir): 2561 zippath = '' 2562 addToZip(zf, path, zippath) 2563 2564 2565if __name__ == "__main__": 2566 main() 2567