1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package zip
6
7import (
8	"bufio"
9	"encoding/binary"
10	"errors"
11	"hash"
12	"hash/crc32"
13	"internal/godebug"
14	"io"
15	"io/fs"
16	"os"
17	"path"
18	"path/filepath"
19	"slices"
20	"strings"
21	"sync"
22	"time"
23)
24
25var zipinsecurepath = godebug.New("zipinsecurepath")
26
27var (
28	ErrFormat       = errors.New("zip: not a valid zip file")
29	ErrAlgorithm    = errors.New("zip: unsupported compression algorithm")
30	ErrChecksum     = errors.New("zip: checksum error")
31	ErrInsecurePath = errors.New("zip: insecure file path")
32)
33
34// A Reader serves content from a ZIP archive.
35type Reader struct {
36	r             io.ReaderAt
37	File          []*File
38	Comment       string
39	decompressors map[uint16]Decompressor
40
41	// Some JAR files are zip files with a prefix that is a bash script.
42	// The baseOffset field is the start of the zip file proper.
43	baseOffset int64
44
45	// fileList is a list of files sorted by ename,
46	// for use by the Open method.
47	fileListOnce sync.Once
48	fileList     []fileListEntry
49}
50
51// A ReadCloser is a [Reader] that must be closed when no longer needed.
52type ReadCloser struct {
53	f *os.File
54	Reader
55}
56
57// A File is a single file in a ZIP archive.
58// The file information is in the embedded [FileHeader].
59// The file content can be accessed by calling [File.Open].
60type File struct {
61	FileHeader
62	zip          *Reader
63	zipr         io.ReaderAt
64	headerOffset int64 // includes overall ZIP archive baseOffset
65	zip64        bool  // zip64 extended information extra field presence
66}
67
68// OpenReader will open the Zip file specified by name and return a ReadCloser.
69//
70// If any file inside the archive uses a non-local name
71// (as defined by [filepath.IsLocal]) or a name containing backslashes
72// and the GODEBUG environment variable contains `zipinsecurepath=0`,
73// OpenReader returns the reader with an ErrInsecurePath error.
74// A future version of Go may introduce this behavior by default.
75// Programs that want to accept non-local names can ignore
76// the ErrInsecurePath error and use the returned reader.
77func OpenReader(name string) (*ReadCloser, error) {
78	f, err := os.Open(name)
79	if err != nil {
80		return nil, err
81	}
82	fi, err := f.Stat()
83	if err != nil {
84		f.Close()
85		return nil, err
86	}
87	r := new(ReadCloser)
88	if err = r.init(f, fi.Size()); err != nil && err != ErrInsecurePath {
89		f.Close()
90		return nil, err
91	}
92	r.f = f
93	return r, err
94}
95
96// NewReader returns a new [Reader] reading from r, which is assumed to
97// have the given size in bytes.
98//
99// If any file inside the archive uses a non-local name
100// (as defined by [filepath.IsLocal]) or a name containing backslashes
101// and the GODEBUG environment variable contains `zipinsecurepath=0`,
102// NewReader returns the reader with an [ErrInsecurePath] error.
103// A future version of Go may introduce this behavior by default.
104// Programs that want to accept non-local names can ignore
105// the [ErrInsecurePath] error and use the returned reader.
106func NewReader(r io.ReaderAt, size int64) (*Reader, error) {
107	if size < 0 {
108		return nil, errors.New("zip: size cannot be negative")
109	}
110	zr := new(Reader)
111	var err error
112	if err = zr.init(r, size); err != nil && err != ErrInsecurePath {
113		return nil, err
114	}
115	return zr, err
116}
117
118func (r *Reader) init(rdr io.ReaderAt, size int64) error {
119	end, baseOffset, err := readDirectoryEnd(rdr, size)
120	if err != nil {
121		return err
122	}
123	r.r = rdr
124	r.baseOffset = baseOffset
125	// Since the number of directory records is not validated, it is not
126	// safe to preallocate r.File without first checking that the specified
127	// number of files is reasonable, since a malformed archive may
128	// indicate it contains up to 1 << 128 - 1 files. Since each file has a
129	// header which will be _at least_ 30 bytes we can safely preallocate
130	// if (data size / 30) >= end.directoryRecords.
131	if end.directorySize < uint64(size) && (uint64(size)-end.directorySize)/30 >= end.directoryRecords {
132		r.File = make([]*File, 0, end.directoryRecords)
133	}
134	r.Comment = end.comment
135	rs := io.NewSectionReader(rdr, 0, size)
136	if _, err = rs.Seek(r.baseOffset+int64(end.directoryOffset), io.SeekStart); err != nil {
137		return err
138	}
139	buf := bufio.NewReader(rs)
140
141	// The count of files inside a zip is truncated to fit in a uint16.
142	// Gloss over this by reading headers until we encounter
143	// a bad one, and then only report an ErrFormat or UnexpectedEOF if
144	// the file count modulo 65536 is incorrect.
145	for {
146		f := &File{zip: r, zipr: rdr}
147		err = readDirectoryHeader(f, buf)
148		if err == ErrFormat || err == io.ErrUnexpectedEOF {
149			break
150		}
151		if err != nil {
152			return err
153		}
154		f.headerOffset += r.baseOffset
155		r.File = append(r.File, f)
156	}
157	if uint16(len(r.File)) != uint16(end.directoryRecords) { // only compare 16 bits here
158		// Return the readDirectoryHeader error if we read
159		// the wrong number of directory entries.
160		return err
161	}
162	if zipinsecurepath.Value() == "0" {
163		for _, f := range r.File {
164			if f.Name == "" {
165				// Zip permits an empty file name field.
166				continue
167			}
168			// The zip specification states that names must use forward slashes,
169			// so consider any backslashes in the name insecure.
170			if !filepath.IsLocal(f.Name) || strings.Contains(f.Name, `\`) {
171				zipinsecurepath.IncNonDefault()
172				return ErrInsecurePath
173			}
174		}
175	}
176	return nil
177}
178
179// RegisterDecompressor registers or overrides a custom decompressor for a
180// specific method ID. If a decompressor for a given method is not found,
181// [Reader] will default to looking up the decompressor at the package level.
182func (r *Reader) RegisterDecompressor(method uint16, dcomp Decompressor) {
183	if r.decompressors == nil {
184		r.decompressors = make(map[uint16]Decompressor)
185	}
186	r.decompressors[method] = dcomp
187}
188
189func (r *Reader) decompressor(method uint16) Decompressor {
190	dcomp := r.decompressors[method]
191	if dcomp == nil {
192		dcomp = decompressor(method)
193	}
194	return dcomp
195}
196
197// Close closes the Zip file, rendering it unusable for I/O.
198func (rc *ReadCloser) Close() error {
199	return rc.f.Close()
200}
201
202// DataOffset returns the offset of the file's possibly-compressed
203// data, relative to the beginning of the zip file.
204//
205// Most callers should instead use [File.Open], which transparently
206// decompresses data and verifies checksums.
207func (f *File) DataOffset() (offset int64, err error) {
208	bodyOffset, err := f.findBodyOffset()
209	if err != nil {
210		return
211	}
212	return f.headerOffset + bodyOffset, nil
213}
214
215// Open returns a [ReadCloser] that provides access to the [File]'s contents.
216// Multiple files may be read concurrently.
217func (f *File) Open() (io.ReadCloser, error) {
218	bodyOffset, err := f.findBodyOffset()
219	if err != nil {
220		return nil, err
221	}
222	if strings.HasSuffix(f.Name, "/") {
223		// The ZIP specification (APPNOTE.TXT) specifies that directories, which
224		// are technically zero-byte files, must not have any associated file
225		// data. We previously tried failing here if f.CompressedSize64 != 0,
226		// but it turns out that a number of implementations (namely, the Java
227		// jar tool) don't properly set the storage method on directories
228		// resulting in a file with compressed size > 0 but uncompressed size ==
229		// 0. We still want to fail when a directory has associated uncompressed
230		// data, but we are tolerant of cases where the uncompressed size is
231		// zero but compressed size is not.
232		if f.UncompressedSize64 != 0 {
233			return &dirReader{ErrFormat}, nil
234		} else {
235			return &dirReader{io.EOF}, nil
236		}
237	}
238	size := int64(f.CompressedSize64)
239	r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
240	dcomp := f.zip.decompressor(f.Method)
241	if dcomp == nil {
242		return nil, ErrAlgorithm
243	}
244	var rc io.ReadCloser = dcomp(r)
245	var desr io.Reader
246	if f.hasDataDescriptor() {
247		desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen)
248	}
249	rc = &checksumReader{
250		rc:   rc,
251		hash: crc32.NewIEEE(),
252		f:    f,
253		desr: desr,
254	}
255	return rc, nil
256}
257
258// OpenRaw returns a [Reader] that provides access to the [File]'s contents without
259// decompression.
260func (f *File) OpenRaw() (io.Reader, error) {
261	bodyOffset, err := f.findBodyOffset()
262	if err != nil {
263		return nil, err
264	}
265	r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64))
266	return r, nil
267}
268
269type dirReader struct {
270	err error
271}
272
273func (r *dirReader) Read([]byte) (int, error) {
274	return 0, r.err
275}
276
277func (r *dirReader) Close() error {
278	return nil
279}
280
281type checksumReader struct {
282	rc    io.ReadCloser
283	hash  hash.Hash32
284	nread uint64 // number of bytes read so far
285	f     *File
286	desr  io.Reader // if non-nil, where to read the data descriptor
287	err   error     // sticky error
288}
289
290func (r *checksumReader) Stat() (fs.FileInfo, error) {
291	return headerFileInfo{&r.f.FileHeader}, nil
292}
293
294func (r *checksumReader) Read(b []byte) (n int, err error) {
295	if r.err != nil {
296		return 0, r.err
297	}
298	n, err = r.rc.Read(b)
299	r.hash.Write(b[:n])
300	r.nread += uint64(n)
301	if r.nread > r.f.UncompressedSize64 {
302		return 0, ErrFormat
303	}
304	if err == nil {
305		return
306	}
307	if err == io.EOF {
308		if r.nread != r.f.UncompressedSize64 {
309			return 0, io.ErrUnexpectedEOF
310		}
311		if r.desr != nil {
312			if err1 := readDataDescriptor(r.desr, r.f); err1 != nil {
313				if err1 == io.EOF {
314					err = io.ErrUnexpectedEOF
315				} else {
316					err = err1
317				}
318			} else if r.hash.Sum32() != r.f.CRC32 {
319				err = ErrChecksum
320			}
321		} else {
322			// If there's not a data descriptor, we still compare
323			// the CRC32 of what we've read against the file header
324			// or TOC's CRC32, if it seems like it was set.
325			if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 {
326				err = ErrChecksum
327			}
328		}
329	}
330	r.err = err
331	return
332}
333
334func (r *checksumReader) Close() error { return r.rc.Close() }
335
336// findBodyOffset does the minimum work to verify the file has a header
337// and returns the file body offset.
338func (f *File) findBodyOffset() (int64, error) {
339	var buf [fileHeaderLen]byte
340	if _, err := f.zipr.ReadAt(buf[:], f.headerOffset); err != nil {
341		return 0, err
342	}
343	b := readBuf(buf[:])
344	if sig := b.uint32(); sig != fileHeaderSignature {
345		return 0, ErrFormat
346	}
347	b = b[22:] // skip over most of the header
348	filenameLen := int(b.uint16())
349	extraLen := int(b.uint16())
350	return int64(fileHeaderLen + filenameLen + extraLen), nil
351}
352
353// readDirectoryHeader attempts to read a directory header from r.
354// It returns io.ErrUnexpectedEOF if it cannot read a complete header,
355// and ErrFormat if it doesn't find a valid header signature.
356func readDirectoryHeader(f *File, r io.Reader) error {
357	var buf [directoryHeaderLen]byte
358	if _, err := io.ReadFull(r, buf[:]); err != nil {
359		return err
360	}
361	b := readBuf(buf[:])
362	if sig := b.uint32(); sig != directoryHeaderSignature {
363		return ErrFormat
364	}
365	f.CreatorVersion = b.uint16()
366	f.ReaderVersion = b.uint16()
367	f.Flags = b.uint16()
368	f.Method = b.uint16()
369	f.ModifiedTime = b.uint16()
370	f.ModifiedDate = b.uint16()
371	f.CRC32 = b.uint32()
372	f.CompressedSize = b.uint32()
373	f.UncompressedSize = b.uint32()
374	f.CompressedSize64 = uint64(f.CompressedSize)
375	f.UncompressedSize64 = uint64(f.UncompressedSize)
376	filenameLen := int(b.uint16())
377	extraLen := int(b.uint16())
378	commentLen := int(b.uint16())
379	b = b[4:] // skipped start disk number and internal attributes (2x uint16)
380	f.ExternalAttrs = b.uint32()
381	f.headerOffset = int64(b.uint32())
382	d := make([]byte, filenameLen+extraLen+commentLen)
383	if _, err := io.ReadFull(r, d); err != nil {
384		return err
385	}
386	f.Name = string(d[:filenameLen])
387	f.Extra = d[filenameLen : filenameLen+extraLen]
388	f.Comment = string(d[filenameLen+extraLen:])
389
390	// Determine the character encoding.
391	utf8Valid1, utf8Require1 := detectUTF8(f.Name)
392	utf8Valid2, utf8Require2 := detectUTF8(f.Comment)
393	switch {
394	case !utf8Valid1 || !utf8Valid2:
395		// Name and Comment definitely not UTF-8.
396		f.NonUTF8 = true
397	case !utf8Require1 && !utf8Require2:
398		// Name and Comment use only single-byte runes that overlap with UTF-8.
399		f.NonUTF8 = false
400	default:
401		// Might be UTF-8, might be some other encoding; preserve existing flag.
402		// Some ZIP writers use UTF-8 encoding without setting the UTF-8 flag.
403		// Since it is impossible to always distinguish valid UTF-8 from some
404		// other encoding (e.g., GBK or Shift-JIS), we trust the flag.
405		f.NonUTF8 = f.Flags&0x800 == 0
406	}
407
408	needUSize := f.UncompressedSize == ^uint32(0)
409	needCSize := f.CompressedSize == ^uint32(0)
410	needHeaderOffset := f.headerOffset == int64(^uint32(0))
411
412	// Best effort to find what we need.
413	// Other zip authors might not even follow the basic format,
414	// and we'll just ignore the Extra content in that case.
415	var modified time.Time
416parseExtras:
417	for extra := readBuf(f.Extra); len(extra) >= 4; { // need at least tag and size
418		fieldTag := extra.uint16()
419		fieldSize := int(extra.uint16())
420		if len(extra) < fieldSize {
421			break
422		}
423		fieldBuf := extra.sub(fieldSize)
424
425		switch fieldTag {
426		case zip64ExtraID:
427			f.zip64 = true
428
429			// update directory values from the zip64 extra block.
430			// They should only be consulted if the sizes read earlier
431			// are maxed out.
432			// See golang.org/issue/13367.
433			if needUSize {
434				needUSize = false
435				if len(fieldBuf) < 8 {
436					return ErrFormat
437				}
438				f.UncompressedSize64 = fieldBuf.uint64()
439			}
440			if needCSize {
441				needCSize = false
442				if len(fieldBuf) < 8 {
443					return ErrFormat
444				}
445				f.CompressedSize64 = fieldBuf.uint64()
446			}
447			if needHeaderOffset {
448				needHeaderOffset = false
449				if len(fieldBuf) < 8 {
450					return ErrFormat
451				}
452				f.headerOffset = int64(fieldBuf.uint64())
453			}
454		case ntfsExtraID:
455			if len(fieldBuf) < 4 {
456				continue parseExtras
457			}
458			fieldBuf.uint32()        // reserved (ignored)
459			for len(fieldBuf) >= 4 { // need at least tag and size
460				attrTag := fieldBuf.uint16()
461				attrSize := int(fieldBuf.uint16())
462				if len(fieldBuf) < attrSize {
463					continue parseExtras
464				}
465				attrBuf := fieldBuf.sub(attrSize)
466				if attrTag != 1 || attrSize != 24 {
467					continue // Ignore irrelevant attributes
468				}
469
470				const ticksPerSecond = 1e7    // Windows timestamp resolution
471				ts := int64(attrBuf.uint64()) // ModTime since Windows epoch
472				secs := ts / ticksPerSecond
473				nsecs := (1e9 / ticksPerSecond) * (ts % ticksPerSecond)
474				epoch := time.Date(1601, time.January, 1, 0, 0, 0, 0, time.UTC)
475				modified = time.Unix(epoch.Unix()+secs, nsecs)
476			}
477		case unixExtraID, infoZipUnixExtraID:
478			if len(fieldBuf) < 8 {
479				continue parseExtras
480			}
481			fieldBuf.uint32()              // AcTime (ignored)
482			ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
483			modified = time.Unix(ts, 0)
484		case extTimeExtraID:
485			if len(fieldBuf) < 5 || fieldBuf.uint8()&1 == 0 {
486				continue parseExtras
487			}
488			ts := int64(fieldBuf.uint32()) // ModTime since Unix epoch
489			modified = time.Unix(ts, 0)
490		}
491	}
492
493	msdosModified := msDosTimeToTime(f.ModifiedDate, f.ModifiedTime)
494	f.Modified = msdosModified
495	if !modified.IsZero() {
496		f.Modified = modified.UTC()
497
498		// If legacy MS-DOS timestamps are set, we can use the delta between
499		// the legacy and extended versions to estimate timezone offset.
500		//
501		// A non-UTC timezone is always used (even if offset is zero).
502		// Thus, FileHeader.Modified.Location() == time.UTC is useful for
503		// determining whether extended timestamps are present.
504		// This is necessary for users that need to do additional time
505		// calculations when dealing with legacy ZIP formats.
506		if f.ModifiedTime != 0 || f.ModifiedDate != 0 {
507			f.Modified = modified.In(timeZone(msdosModified.Sub(modified)))
508		}
509	}
510
511	// Assume that uncompressed size 2³²-1 could plausibly happen in
512	// an old zip32 file that was sharding inputs into the largest chunks
513	// possible (or is just malicious; search the web for 42.zip).
514	// If needUSize is true still, it means we didn't see a zip64 extension.
515	// As long as the compressed size is not also 2³²-1 (implausible)
516	// and the header is not also 2³²-1 (equally implausible),
517	// accept the uncompressed size 2³²-1 as valid.
518	// If nothing else, this keeps archive/zip working with 42.zip.
519	_ = needUSize
520
521	if needCSize || needHeaderOffset {
522		return ErrFormat
523	}
524
525	return nil
526}
527
528func readDataDescriptor(r io.Reader, f *File) error {
529	var buf [dataDescriptorLen]byte
530	// The spec says: "Although not originally assigned a
531	// signature, the value 0x08074b50 has commonly been adopted
532	// as a signature value for the data descriptor record.
533	// Implementers should be aware that ZIP files may be
534	// encountered with or without this signature marking data
535	// descriptors and should account for either case when reading
536	// ZIP files to ensure compatibility."
537	//
538	// dataDescriptorLen includes the size of the signature but
539	// first read just those 4 bytes to see if it exists.
540	if _, err := io.ReadFull(r, buf[:4]); err != nil {
541		return err
542	}
543	off := 0
544	maybeSig := readBuf(buf[:4])
545	if maybeSig.uint32() != dataDescriptorSignature {
546		// No data descriptor signature. Keep these four
547		// bytes.
548		off += 4
549	}
550	if _, err := io.ReadFull(r, buf[off:12]); err != nil {
551		return err
552	}
553	b := readBuf(buf[:12])
554	if b.uint32() != f.CRC32 {
555		return ErrChecksum
556	}
557
558	// The two sizes that follow here can be either 32 bits or 64 bits
559	// but the spec is not very clear on this and different
560	// interpretations has been made causing incompatibilities. We
561	// already have the sizes from the central directory so we can
562	// just ignore these.
563
564	return nil
565}
566
567func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, baseOffset int64, err error) {
568	// look for directoryEndSignature in the last 1k, then in the last 65k
569	var buf []byte
570	var directoryEndOffset int64
571	for i, bLen := range []int64{1024, 65 * 1024} {
572		if bLen > size {
573			bLen = size
574		}
575		buf = make([]byte, int(bLen))
576		if _, err := r.ReadAt(buf, size-bLen); err != nil && err != io.EOF {
577			return nil, 0, err
578		}
579		if p := findSignatureInBlock(buf); p >= 0 {
580			buf = buf[p:]
581			directoryEndOffset = size - bLen + int64(p)
582			break
583		}
584		if i == 1 || bLen == size {
585			return nil, 0, ErrFormat
586		}
587	}
588
589	// read header into struct
590	b := readBuf(buf[4:]) // skip signature
591	d := &directoryEnd{
592		diskNbr:            uint32(b.uint16()),
593		dirDiskNbr:         uint32(b.uint16()),
594		dirRecordsThisDisk: uint64(b.uint16()),
595		directoryRecords:   uint64(b.uint16()),
596		directorySize:      uint64(b.uint32()),
597		directoryOffset:    uint64(b.uint32()),
598		commentLen:         b.uint16(),
599	}
600	l := int(d.commentLen)
601	if l > len(b) {
602		return nil, 0, errors.New("zip: invalid comment length")
603	}
604	d.comment = string(b[:l])
605
606	// These values mean that the file can be a zip64 file
607	if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
608		p, err := findDirectory64End(r, directoryEndOffset)
609		if err == nil && p >= 0 {
610			directoryEndOffset = p
611			err = readDirectory64End(r, p, d)
612		}
613		if err != nil {
614			return nil, 0, err
615		}
616	}
617
618	maxInt64 := uint64(1<<63 - 1)
619	if d.directorySize > maxInt64 || d.directoryOffset > maxInt64 {
620		return nil, 0, ErrFormat
621	}
622
623	baseOffset = directoryEndOffset - int64(d.directorySize) - int64(d.directoryOffset)
624
625	// Make sure directoryOffset points to somewhere in our file.
626	if o := baseOffset + int64(d.directoryOffset); o < 0 || o >= size {
627		return nil, 0, ErrFormat
628	}
629
630	// If the directory end data tells us to use a non-zero baseOffset,
631	// but we would find a valid directory entry if we assume that the
632	// baseOffset is 0, then just use a baseOffset of 0.
633	// We've seen files in which the directory end data gives us
634	// an incorrect baseOffset.
635	if baseOffset > 0 {
636		off := int64(d.directoryOffset)
637		rs := io.NewSectionReader(r, off, size-off)
638		if readDirectoryHeader(&File{}, rs) == nil {
639			baseOffset = 0
640		}
641	}
642
643	return d, baseOffset, nil
644}
645
646// findDirectory64End tries to read the zip64 locator just before the
647// directory end and returns the offset of the zip64 directory end if
648// found.
649func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
650	locOffset := directoryEndOffset - directory64LocLen
651	if locOffset < 0 {
652		return -1, nil // no need to look for a header outside the file
653	}
654	buf := make([]byte, directory64LocLen)
655	if _, err := r.ReadAt(buf, locOffset); err != nil {
656		return -1, err
657	}
658	b := readBuf(buf)
659	if sig := b.uint32(); sig != directory64LocSignature {
660		return -1, nil
661	}
662	if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory
663		return -1, nil // the file is not a valid zip64-file
664	}
665	p := b.uint64()      // relative offset of the zip64 end of central directory record
666	if b.uint32() != 1 { // total number of disks
667		return -1, nil // the file is not a valid zip64-file
668	}
669	return int64(p), nil
670}
671
672// readDirectory64End reads the zip64 directory end and updates the
673// directory end with the zip64 directory end values.
674func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
675	buf := make([]byte, directory64EndLen)
676	if _, err := r.ReadAt(buf, offset); err != nil {
677		return err
678	}
679
680	b := readBuf(buf)
681	if sig := b.uint32(); sig != directory64EndSignature {
682		return ErrFormat
683	}
684
685	b = b[12:]                        // skip dir size, version and version needed (uint64 + 2x uint16)
686	d.diskNbr = b.uint32()            // number of this disk
687	d.dirDiskNbr = b.uint32()         // number of the disk with the start of the central directory
688	d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk
689	d.directoryRecords = b.uint64()   // total number of entries in the central directory
690	d.directorySize = b.uint64()      // size of the central directory
691	d.directoryOffset = b.uint64()    // offset of start of central directory with respect to the starting disk number
692
693	return nil
694}
695
696func findSignatureInBlock(b []byte) int {
697	for i := len(b) - directoryEndLen; i >= 0; i-- {
698		// defined from directoryEndSignature in struct.go
699		if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
700			// n is length of comment
701			n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
702			if n+directoryEndLen+i > len(b) {
703				// Truncated comment.
704				// Some parsers (such as Info-ZIP) ignore the truncated comment
705				// rather than treating it as a hard error.
706				return -1
707			}
708			return i
709		}
710	}
711	return -1
712}
713
714type readBuf []byte
715
716func (b *readBuf) uint8() uint8 {
717	v := (*b)[0]
718	*b = (*b)[1:]
719	return v
720}
721
722func (b *readBuf) uint16() uint16 {
723	v := binary.LittleEndian.Uint16(*b)
724	*b = (*b)[2:]
725	return v
726}
727
728func (b *readBuf) uint32() uint32 {
729	v := binary.LittleEndian.Uint32(*b)
730	*b = (*b)[4:]
731	return v
732}
733
734func (b *readBuf) uint64() uint64 {
735	v := binary.LittleEndian.Uint64(*b)
736	*b = (*b)[8:]
737	return v
738}
739
740func (b *readBuf) sub(n int) readBuf {
741	b2 := (*b)[:n]
742	*b = (*b)[n:]
743	return b2
744}
745
746// A fileListEntry is a File and its ename.
747// If file == nil, the fileListEntry describes a directory without metadata.
748type fileListEntry struct {
749	name  string
750	file  *File
751	isDir bool
752	isDup bool
753}
754
755type fileInfoDirEntry interface {
756	fs.FileInfo
757	fs.DirEntry
758}
759
760func (f *fileListEntry) stat() (fileInfoDirEntry, error) {
761	if f.isDup {
762		return nil, errors.New(f.name + ": duplicate entries in zip file")
763	}
764	if !f.isDir {
765		return headerFileInfo{&f.file.FileHeader}, nil
766	}
767	return f, nil
768}
769
770// Only used for directories.
771func (f *fileListEntry) Name() string      { _, elem, _ := split(f.name); return elem }
772func (f *fileListEntry) Size() int64       { return 0 }
773func (f *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0555 }
774func (f *fileListEntry) Type() fs.FileMode { return fs.ModeDir }
775func (f *fileListEntry) IsDir() bool       { return true }
776func (f *fileListEntry) Sys() any          { return nil }
777
778func (f *fileListEntry) ModTime() time.Time {
779	if f.file == nil {
780		return time.Time{}
781	}
782	return f.file.FileHeader.Modified.UTC()
783}
784
785func (f *fileListEntry) Info() (fs.FileInfo, error) { return f, nil }
786
787func (f *fileListEntry) String() string {
788	return fs.FormatDirEntry(f)
789}
790
791// toValidName coerces name to be a valid name for fs.FS.Open.
792func toValidName(name string) string {
793	name = strings.ReplaceAll(name, `\`, `/`)
794	p := path.Clean(name)
795
796	p = strings.TrimPrefix(p, "/")
797
798	for strings.HasPrefix(p, "../") {
799		p = p[len("../"):]
800	}
801
802	return p
803}
804
805func (r *Reader) initFileList() {
806	r.fileListOnce.Do(func() {
807		// files and knownDirs map from a file/directory name
808		// to an index into the r.fileList entry that we are
809		// building. They are used to mark duplicate entries.
810		files := make(map[string]int)
811		knownDirs := make(map[string]int)
812
813		// dirs[name] is true if name is known to be a directory,
814		// because it appears as a prefix in a path.
815		dirs := make(map[string]bool)
816
817		for _, file := range r.File {
818			isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/'
819			name := toValidName(file.Name)
820			if name == "" {
821				continue
822			}
823
824			if idx, ok := files[name]; ok {
825				r.fileList[idx].isDup = true
826				continue
827			}
828			if idx, ok := knownDirs[name]; ok {
829				r.fileList[idx].isDup = true
830				continue
831			}
832
833			for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) {
834				dirs[dir] = true
835			}
836
837			idx := len(r.fileList)
838			entry := fileListEntry{
839				name:  name,
840				file:  file,
841				isDir: isDir,
842			}
843			r.fileList = append(r.fileList, entry)
844			if isDir {
845				knownDirs[name] = idx
846			} else {
847				files[name] = idx
848			}
849		}
850		for dir := range dirs {
851			if _, ok := knownDirs[dir]; !ok {
852				if idx, ok := files[dir]; ok {
853					r.fileList[idx].isDup = true
854				} else {
855					entry := fileListEntry{
856						name:  dir,
857						file:  nil,
858						isDir: true,
859					}
860					r.fileList = append(r.fileList, entry)
861				}
862			}
863		}
864
865		slices.SortFunc(r.fileList, func(a, b fileListEntry) int {
866			return fileEntryCompare(a.name, b.name)
867		})
868	})
869}
870
871func fileEntryCompare(x, y string) int {
872	xdir, xelem, _ := split(x)
873	ydir, yelem, _ := split(y)
874	if xdir != ydir {
875		return strings.Compare(xdir, ydir)
876	}
877	return strings.Compare(xelem, yelem)
878}
879
880// Open opens the named file in the ZIP archive,
881// using the semantics of fs.FS.Open:
882// paths are always slash separated, with no
883// leading / or ../ elements.
884func (r *Reader) Open(name string) (fs.File, error) {
885	r.initFileList()
886
887	if !fs.ValidPath(name) {
888		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
889	}
890	e := r.openLookup(name)
891	if e == nil {
892		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist}
893	}
894	if e.isDir {
895		return &openDir{e, r.openReadDir(name), 0}, nil
896	}
897	rc, err := e.file.Open()
898	if err != nil {
899		return nil, err
900	}
901	return rc.(fs.File), nil
902}
903
904func split(name string) (dir, elem string, isDir bool) {
905	if len(name) > 0 && name[len(name)-1] == '/' {
906		isDir = true
907		name = name[:len(name)-1]
908	}
909	i := len(name) - 1
910	for i >= 0 && name[i] != '/' {
911		i--
912	}
913	if i < 0 {
914		return ".", name, isDir
915	}
916	return name[:i], name[i+1:], isDir
917}
918
919var dotFile = &fileListEntry{name: "./", isDir: true}
920
921func (r *Reader) openLookup(name string) *fileListEntry {
922	if name == "." {
923		return dotFile
924	}
925
926	dir, elem, _ := split(name)
927	files := r.fileList
928	i, _ := slices.BinarySearchFunc(files, dir, func(a fileListEntry, dir string) (ret int) {
929		idir, ielem, _ := split(a.name)
930		if dir != idir {
931			return strings.Compare(idir, dir)
932		}
933		return strings.Compare(ielem, elem)
934	})
935	if i < len(files) {
936		fname := files[i].name
937		if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name {
938			return &files[i]
939		}
940	}
941	return nil
942}
943
944func (r *Reader) openReadDir(dir string) []fileListEntry {
945	files := r.fileList
946	i, _ := slices.BinarySearchFunc(files, dir, func(a fileListEntry, dir string) int {
947		idir, _, _ := split(a.name)
948		if dir != idir {
949			return strings.Compare(idir, dir)
950		}
951		// find the first entry with dir
952		return +1
953	})
954	j, _ := slices.BinarySearchFunc(files, dir, func(a fileListEntry, dir string) int {
955		jdir, _, _ := split(a.name)
956		if dir != jdir {
957			return strings.Compare(jdir, dir)
958		}
959		// find the last entry with dir
960		return -1
961	})
962	return files[i:j]
963}
964
965type openDir struct {
966	e      *fileListEntry
967	files  []fileListEntry
968	offset int
969}
970
971func (d *openDir) Close() error               { return nil }
972func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat() }
973
974func (d *openDir) Read([]byte) (int, error) {
975	return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")}
976}
977
978func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) {
979	n := len(d.files) - d.offset
980	if count > 0 && n > count {
981		n = count
982	}
983	if n == 0 {
984		if count <= 0 {
985			return nil, nil
986		}
987		return nil, io.EOF
988	}
989	list := make([]fs.DirEntry, n)
990	for i := range list {
991		s, err := d.files[d.offset+i].stat()
992		if err != nil {
993			return nil, err
994		}
995		list[i] = s
996	}
997	d.offset += n
998	return list, nil
999}
1000