1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5/*
6Package zip provides support for reading and writing ZIP archives.
7
8See the [ZIP specification] for details.
9
10This package does not support disk spanning.
11
12A note about ZIP64:
13
14To be backwards compatible the FileHeader has both 32 and 64 bit Size
15fields. The 64 bit fields will always contain the correct value and
16for normal archives both fields will be the same. For files requiring
17the ZIP64 format the 32 bit fields will be 0xffffffff and the 64 bit
18fields must be used instead.
19
20[ZIP specification]: https://support.pkware.com/pkzip/appnote
21*/
22package zip
23
24import (
25	"io/fs"
26	"path"
27	"time"
28)
29
30// Compression methods.
31const (
32	Store   uint16 = 0 // no compression
33	Deflate uint16 = 8 // DEFLATE compressed
34)
35
36const (
37	fileHeaderSignature      = 0x04034b50
38	directoryHeaderSignature = 0x02014b50
39	directoryEndSignature    = 0x06054b50
40	directory64LocSignature  = 0x07064b50
41	directory64EndSignature  = 0x06064b50
42	dataDescriptorSignature  = 0x08074b50 // de-facto standard; required by OS X Finder
43	fileHeaderLen            = 30         // + filename + extra
44	directoryHeaderLen       = 46         // + filename + extra + comment
45	directoryEndLen          = 22         // + comment
46	dataDescriptorLen        = 16         // four uint32: descriptor signature, crc32, compressed size, size
47	dataDescriptor64Len      = 24         // two uint32: signature, crc32 | two uint64: compressed size, size
48	directory64LocLen        = 20         //
49	directory64EndLen        = 56         // + extra
50
51	// Constants for the first byte in CreatorVersion.
52	creatorFAT    = 0
53	creatorUnix   = 3
54	creatorNTFS   = 11
55	creatorVFAT   = 14
56	creatorMacOSX = 19
57
58	// Version numbers.
59	zipVersion20 = 20 // 2.0
60	zipVersion45 = 45 // 4.5 (reads and writes zip64 archives)
61
62	// Limits for non zip64 files.
63	uint16max = (1 << 16) - 1
64	uint32max = (1 << 32) - 1
65
66	// Extra header IDs.
67	//
68	// IDs 0..31 are reserved for official use by PKWARE.
69	// IDs above that range are defined by third-party vendors.
70	// Since ZIP lacked high precision timestamps (nor an official specification
71	// of the timezone used for the date fields), many competing extra fields
72	// have been invented. Pervasive use effectively makes them "official".
73	//
74	// See http://mdfs.net/Docs/Comp/Archiving/Zip/ExtraField
75	zip64ExtraID       = 0x0001 // Zip64 extended information
76	ntfsExtraID        = 0x000a // NTFS
77	unixExtraID        = 0x000d // UNIX
78	extTimeExtraID     = 0x5455 // Extended timestamp
79	infoZipUnixExtraID = 0x5855 // Info-ZIP Unix extension
80)
81
82// FileHeader describes a file within a ZIP file.
83// See the [ZIP specification] for details.
84//
85// [ZIP specification]: https://support.pkware.com/pkzip/appnote
86type FileHeader struct {
87	// Name is the name of the file.
88	//
89	// It must be a relative path, not start with a drive letter (such as "C:"),
90	// and must use forward slashes instead of back slashes. A trailing slash
91	// indicates that this file is a directory and should have no data.
92	Name string
93
94	// Comment is any arbitrary user-defined string shorter than 64KiB.
95	Comment string
96
97	// NonUTF8 indicates that Name and Comment are not encoded in UTF-8.
98	//
99	// By specification, the only other encoding permitted should be CP-437,
100	// but historically many ZIP readers interpret Name and Comment as whatever
101	// the system's local character encoding happens to be.
102	//
103	// This flag should only be set if the user intends to encode a non-portable
104	// ZIP file for a specific localized region. Otherwise, the Writer
105	// automatically sets the ZIP format's UTF-8 flag for valid UTF-8 strings.
106	NonUTF8 bool
107
108	CreatorVersion uint16
109	ReaderVersion  uint16
110	Flags          uint16
111
112	// Method is the compression method. If zero, Store is used.
113	Method uint16
114
115	// Modified is the modified time of the file.
116	//
117	// When reading, an extended timestamp is preferred over the legacy MS-DOS
118	// date field, and the offset between the times is used as the timezone.
119	// If only the MS-DOS date is present, the timezone is assumed to be UTC.
120	//
121	// When writing, an extended timestamp (which is timezone-agnostic) is
122	// always emitted. The legacy MS-DOS date field is encoded according to the
123	// location of the Modified time.
124	Modified time.Time
125
126	// ModifiedTime is an MS-DOS-encoded time.
127	//
128	// Deprecated: Use Modified instead.
129	ModifiedTime uint16
130
131	// ModifiedDate is an MS-DOS-encoded date.
132	//
133	// Deprecated: Use Modified instead.
134	ModifiedDate uint16
135
136	// CRC32 is the CRC32 checksum of the file content.
137	CRC32 uint32
138
139	// CompressedSize is the compressed size of the file in bytes.
140	// If either the uncompressed or compressed size of the file
141	// does not fit in 32 bits, CompressedSize is set to ^uint32(0).
142	//
143	// Deprecated: Use CompressedSize64 instead.
144	CompressedSize uint32
145
146	// UncompressedSize is the uncompressed size of the file in bytes.
147	// If either the uncompressed or compressed size of the file
148	// does not fit in 32 bits, UncompressedSize is set to ^uint32(0).
149	//
150	// Deprecated: Use UncompressedSize64 instead.
151	UncompressedSize uint32
152
153	// CompressedSize64 is the compressed size of the file in bytes.
154	CompressedSize64 uint64
155
156	// UncompressedSize64 is the uncompressed size of the file in bytes.
157	UncompressedSize64 uint64
158
159	Extra         []byte
160	ExternalAttrs uint32 // Meaning depends on CreatorVersion
161}
162
163// FileInfo returns an fs.FileInfo for the [FileHeader].
164func (h *FileHeader) FileInfo() fs.FileInfo {
165	return headerFileInfo{h}
166}
167
168// headerFileInfo implements [fs.FileInfo].
169type headerFileInfo struct {
170	fh *FileHeader
171}
172
173func (fi headerFileInfo) Name() string { return path.Base(fi.fh.Name) }
174func (fi headerFileInfo) Size() int64 {
175	if fi.fh.UncompressedSize64 > 0 {
176		return int64(fi.fh.UncompressedSize64)
177	}
178	return int64(fi.fh.UncompressedSize)
179}
180func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() }
181func (fi headerFileInfo) ModTime() time.Time {
182	if fi.fh.Modified.IsZero() {
183		return fi.fh.ModTime()
184	}
185	return fi.fh.Modified.UTC()
186}
187func (fi headerFileInfo) Mode() fs.FileMode { return fi.fh.Mode() }
188func (fi headerFileInfo) Type() fs.FileMode { return fi.fh.Mode().Type() }
189func (fi headerFileInfo) Sys() any          { return fi.fh }
190
191func (fi headerFileInfo) Info() (fs.FileInfo, error) { return fi, nil }
192
193func (fi headerFileInfo) String() string {
194	return fs.FormatFileInfo(fi)
195}
196
197// FileInfoHeader creates a partially-populated [FileHeader] from an
198// fs.FileInfo.
199// Because fs.FileInfo's Name method returns only the base name of
200// the file it describes, it may be necessary to modify the Name field
201// of the returned header to provide the full path name of the file.
202// If compression is desired, callers should set the FileHeader.Method
203// field; it is unset by default.
204func FileInfoHeader(fi fs.FileInfo) (*FileHeader, error) {
205	size := fi.Size()
206	fh := &FileHeader{
207		Name:               fi.Name(),
208		UncompressedSize64: uint64(size),
209	}
210	fh.SetModTime(fi.ModTime())
211	fh.SetMode(fi.Mode())
212	if fh.UncompressedSize64 > uint32max {
213		fh.UncompressedSize = uint32max
214	} else {
215		fh.UncompressedSize = uint32(fh.UncompressedSize64)
216	}
217	return fh, nil
218}
219
220type directoryEnd struct {
221	diskNbr            uint32 // unused
222	dirDiskNbr         uint32 // unused
223	dirRecordsThisDisk uint64 // unused
224	directoryRecords   uint64
225	directorySize      uint64
226	directoryOffset    uint64 // relative to file
227	commentLen         uint16
228	comment            string
229}
230
231// timeZone returns a *time.Location based on the provided offset.
232// If the offset is non-sensible, then this uses an offset of zero.
233func timeZone(offset time.Duration) *time.Location {
234	const (
235		minOffset   = -12 * time.Hour  // E.g., Baker island at -12:00
236		maxOffset   = +14 * time.Hour  // E.g., Line island at +14:00
237		offsetAlias = 15 * time.Minute // E.g., Nepal at +5:45
238	)
239	offset = offset.Round(offsetAlias)
240	if offset < minOffset || maxOffset < offset {
241		offset = 0
242	}
243	return time.FixedZone("", int(offset/time.Second))
244}
245
246// msDosTimeToTime converts an MS-DOS date and time into a time.Time.
247// The resolution is 2s.
248// See: https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-dosdatetimetofiletime
249func msDosTimeToTime(dosDate, dosTime uint16) time.Time {
250	return time.Date(
251		// date bits 0-4: day of month; 5-8: month; 9-15: years since 1980
252		int(dosDate>>9+1980),
253		time.Month(dosDate>>5&0xf),
254		int(dosDate&0x1f),
255
256		// time bits 0-4: second/2; 5-10: minute; 11-15: hour
257		int(dosTime>>11),
258		int(dosTime>>5&0x3f),
259		int(dosTime&0x1f*2),
260		0, // nanoseconds
261
262		time.UTC,
263	)
264}
265
266// timeToMsDosTime converts a time.Time to an MS-DOS date and time.
267// The resolution is 2s.
268// See: https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-filetimetodosdatetime
269func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16) {
270	fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9)
271	fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11)
272	return
273}
274
275// ModTime returns the modification time in UTC using the legacy
276// [ModifiedDate] and [ModifiedTime] fields.
277//
278// Deprecated: Use [Modified] instead.
279func (h *FileHeader) ModTime() time.Time {
280	return msDosTimeToTime(h.ModifiedDate, h.ModifiedTime)
281}
282
283// SetModTime sets the [Modified], [ModifiedTime], and [ModifiedDate] fields
284// to the given time in UTC.
285//
286// Deprecated: Use [Modified] instead.
287func (h *FileHeader) SetModTime(t time.Time) {
288	t = t.UTC() // Convert to UTC for compatibility
289	h.Modified = t
290	h.ModifiedDate, h.ModifiedTime = timeToMsDosTime(t)
291}
292
293const (
294	// Unix constants. The specification doesn't mention them,
295	// but these seem to be the values agreed on by tools.
296	s_IFMT   = 0xf000
297	s_IFSOCK = 0xc000
298	s_IFLNK  = 0xa000
299	s_IFREG  = 0x8000
300	s_IFBLK  = 0x6000
301	s_IFDIR  = 0x4000
302	s_IFCHR  = 0x2000
303	s_IFIFO  = 0x1000
304	s_ISUID  = 0x800
305	s_ISGID  = 0x400
306	s_ISVTX  = 0x200
307
308	msdosDir      = 0x10
309	msdosReadOnly = 0x01
310)
311
312// Mode returns the permission and mode bits for the [FileHeader].
313func (h *FileHeader) Mode() (mode fs.FileMode) {
314	switch h.CreatorVersion >> 8 {
315	case creatorUnix, creatorMacOSX:
316		mode = unixModeToFileMode(h.ExternalAttrs >> 16)
317	case creatorNTFS, creatorVFAT, creatorFAT:
318		mode = msdosModeToFileMode(h.ExternalAttrs)
319	}
320	if len(h.Name) > 0 && h.Name[len(h.Name)-1] == '/' {
321		mode |= fs.ModeDir
322	}
323	return mode
324}
325
326// SetMode changes the permission and mode bits for the [FileHeader].
327func (h *FileHeader) SetMode(mode fs.FileMode) {
328	h.CreatorVersion = h.CreatorVersion&0xff | creatorUnix<<8
329	h.ExternalAttrs = fileModeToUnixMode(mode) << 16
330
331	// set MSDOS attributes too, as the original zip does.
332	if mode&fs.ModeDir != 0 {
333		h.ExternalAttrs |= msdosDir
334	}
335	if mode&0200 == 0 {
336		h.ExternalAttrs |= msdosReadOnly
337	}
338}
339
340// isZip64 reports whether the file size exceeds the 32 bit limit
341func (h *FileHeader) isZip64() bool {
342	return h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max
343}
344
345func (h *FileHeader) hasDataDescriptor() bool {
346	return h.Flags&0x8 != 0
347}
348
349func msdosModeToFileMode(m uint32) (mode fs.FileMode) {
350	if m&msdosDir != 0 {
351		mode = fs.ModeDir | 0777
352	} else {
353		mode = 0666
354	}
355	if m&msdosReadOnly != 0 {
356		mode &^= 0222
357	}
358	return mode
359}
360
361func fileModeToUnixMode(mode fs.FileMode) uint32 {
362	var m uint32
363	switch mode & fs.ModeType {
364	default:
365		m = s_IFREG
366	case fs.ModeDir:
367		m = s_IFDIR
368	case fs.ModeSymlink:
369		m = s_IFLNK
370	case fs.ModeNamedPipe:
371		m = s_IFIFO
372	case fs.ModeSocket:
373		m = s_IFSOCK
374	case fs.ModeDevice:
375		m = s_IFBLK
376	case fs.ModeDevice | fs.ModeCharDevice:
377		m = s_IFCHR
378	}
379	if mode&fs.ModeSetuid != 0 {
380		m |= s_ISUID
381	}
382	if mode&fs.ModeSetgid != 0 {
383		m |= s_ISGID
384	}
385	if mode&fs.ModeSticky != 0 {
386		m |= s_ISVTX
387	}
388	return m | uint32(mode&0777)
389}
390
391func unixModeToFileMode(m uint32) fs.FileMode {
392	mode := fs.FileMode(m & 0777)
393	switch m & s_IFMT {
394	case s_IFBLK:
395		mode |= fs.ModeDevice
396	case s_IFCHR:
397		mode |= fs.ModeDevice | fs.ModeCharDevice
398	case s_IFDIR:
399		mode |= fs.ModeDir
400	case s_IFIFO:
401		mode |= fs.ModeNamedPipe
402	case s_IFLNK:
403		mode |= fs.ModeSymlink
404	case s_IFREG:
405		// nothing to do
406	case s_IFSOCK:
407		mode |= fs.ModeSocket
408	}
409	if m&s_ISGID != 0 {
410		mode |= fs.ModeSetgid
411	}
412	if m&s_ISUID != 0 {
413		mode |= fs.ModeSetuid
414	}
415	if m&s_ISVTX != 0 {
416		mode |= fs.ModeSticky
417	}
418	return mode
419}
420