1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package zip
6
7import (
8	"bufio"
9	"encoding/binary"
10	"errors"
11	"hash"
12	"hash/crc32"
13	"io"
14	"io/fs"
15	"strings"
16	"unicode/utf8"
17)
18
19var (
20	errLongName  = errors.New("zip: FileHeader.Name too long")
21	errLongExtra = errors.New("zip: FileHeader.Extra too long")
22)
23
24// Writer implements a zip file writer.
25type Writer struct {
26	cw          *countWriter
27	dir         []*header
28	last        *fileWriter
29	closed      bool
30	compressors map[uint16]Compressor
31	comment     string
32
33	// testHookCloseSizeOffset if non-nil is called with the size
34	// of offset of the central directory at Close.
35	testHookCloseSizeOffset func(size, offset uint64)
36}
37
38type header struct {
39	*FileHeader
40	offset uint64
41	raw    bool
42}
43
44// NewWriter returns a new [Writer] writing a zip file to w.
45func NewWriter(w io.Writer) *Writer {
46	return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}}
47}
48
49// SetOffset sets the offset of the beginning of the zip data within the
50// underlying writer. It should be used when the zip data is appended to an
51// existing file, such as a binary executable.
52// It must be called before any data is written.
53func (w *Writer) SetOffset(n int64) {
54	if w.cw.count != 0 {
55		panic("zip: SetOffset called after data was written")
56	}
57	w.cw.count = n
58}
59
60// Flush flushes any buffered data to the underlying writer.
61// Calling Flush is not normally necessary; calling Close is sufficient.
62func (w *Writer) Flush() error {
63	return w.cw.w.(*bufio.Writer).Flush()
64}
65
66// SetComment sets the end-of-central-directory comment field.
67// It can only be called before [Writer.Close].
68func (w *Writer) SetComment(comment string) error {
69	if len(comment) > uint16max {
70		return errors.New("zip: Writer.Comment too long")
71	}
72	w.comment = comment
73	return nil
74}
75
76// Close finishes writing the zip file by writing the central directory.
77// It does not close the underlying writer.
78func (w *Writer) Close() error {
79	if w.last != nil && !w.last.closed {
80		if err := w.last.close(); err != nil {
81			return err
82		}
83		w.last = nil
84	}
85	if w.closed {
86		return errors.New("zip: writer closed twice")
87	}
88	w.closed = true
89
90	// write central directory
91	start := w.cw.count
92	for _, h := range w.dir {
93		var buf [directoryHeaderLen]byte
94		b := writeBuf(buf[:])
95		b.uint32(uint32(directoryHeaderSignature))
96		b.uint16(h.CreatorVersion)
97		b.uint16(h.ReaderVersion)
98		b.uint16(h.Flags)
99		b.uint16(h.Method)
100		b.uint16(h.ModifiedTime)
101		b.uint16(h.ModifiedDate)
102		b.uint32(h.CRC32)
103		if h.isZip64() || h.offset >= uint32max {
104			// the file needs a zip64 header. store maxint in both
105			// 32 bit size fields (and offset later) to signal that the
106			// zip64 extra header should be used.
107			b.uint32(uint32max) // compressed size
108			b.uint32(uint32max) // uncompressed size
109
110			// append a zip64 extra block to Extra
111			var buf [28]byte // 2x uint16 + 3x uint64
112			eb := writeBuf(buf[:])
113			eb.uint16(zip64ExtraID)
114			eb.uint16(24) // size = 3x uint64
115			eb.uint64(h.UncompressedSize64)
116			eb.uint64(h.CompressedSize64)
117			eb.uint64(h.offset)
118			h.Extra = append(h.Extra, buf[:]...)
119		} else {
120			b.uint32(h.CompressedSize)
121			b.uint32(h.UncompressedSize)
122		}
123
124		b.uint16(uint16(len(h.Name)))
125		b.uint16(uint16(len(h.Extra)))
126		b.uint16(uint16(len(h.Comment)))
127		b = b[4:] // skip disk number start and internal file attr (2x uint16)
128		b.uint32(h.ExternalAttrs)
129		if h.offset > uint32max {
130			b.uint32(uint32max)
131		} else {
132			b.uint32(uint32(h.offset))
133		}
134		if _, err := w.cw.Write(buf[:]); err != nil {
135			return err
136		}
137		if _, err := io.WriteString(w.cw, h.Name); err != nil {
138			return err
139		}
140		if _, err := w.cw.Write(h.Extra); err != nil {
141			return err
142		}
143		if _, err := io.WriteString(w.cw, h.Comment); err != nil {
144			return err
145		}
146	}
147	end := w.cw.count
148
149	records := uint64(len(w.dir))
150	size := uint64(end - start)
151	offset := uint64(start)
152
153	if f := w.testHookCloseSizeOffset; f != nil {
154		f(size, offset)
155	}
156
157	if records >= uint16max || size >= uint32max || offset >= uint32max {
158		var buf [directory64EndLen + directory64LocLen]byte
159		b := writeBuf(buf[:])
160
161		// zip64 end of central directory record
162		b.uint32(directory64EndSignature)
163		b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64)
164		b.uint16(zipVersion45)           // version made by
165		b.uint16(zipVersion45)           // version needed to extract
166		b.uint32(0)                      // number of this disk
167		b.uint32(0)                      // number of the disk with the start of the central directory
168		b.uint64(records)                // total number of entries in the central directory on this disk
169		b.uint64(records)                // total number of entries in the central directory
170		b.uint64(size)                   // size of the central directory
171		b.uint64(offset)                 // offset of start of central directory with respect to the starting disk number
172
173		// zip64 end of central directory locator
174		b.uint32(directory64LocSignature)
175		b.uint32(0)           // number of the disk with the start of the zip64 end of central directory
176		b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record
177		b.uint32(1)           // total number of disks
178
179		if _, err := w.cw.Write(buf[:]); err != nil {
180			return err
181		}
182
183		// store max values in the regular end record to signal
184		// that the zip64 values should be used instead
185		records = uint16max
186		size = uint32max
187		offset = uint32max
188	}
189
190	// write end record
191	var buf [directoryEndLen]byte
192	b := writeBuf(buf[:])
193	b.uint32(uint32(directoryEndSignature))
194	b = b[4:]                        // skip over disk number and first disk number (2x uint16)
195	b.uint16(uint16(records))        // number of entries this disk
196	b.uint16(uint16(records))        // number of entries total
197	b.uint32(uint32(size))           // size of directory
198	b.uint32(uint32(offset))         // start of directory
199	b.uint16(uint16(len(w.comment))) // byte size of EOCD comment
200	if _, err := w.cw.Write(buf[:]); err != nil {
201		return err
202	}
203	if _, err := io.WriteString(w.cw, w.comment); err != nil {
204		return err
205	}
206
207	return w.cw.w.(*bufio.Writer).Flush()
208}
209
210// Create adds a file to the zip file using the provided name.
211// It returns a [Writer] to which the file contents should be written.
212// The file contents will be compressed using the [Deflate] method.
213// The name must be a relative path: it must not start with a drive
214// letter (e.g. C:) or leading slash, and only forward slashes are
215// allowed. To create a directory instead of a file, add a trailing
216// slash to the name. Duplicate names will not overwrite previous entries
217// and are appended to the zip file.
218// The file's contents must be written to the [io.Writer] before the next
219// call to [Writer.Create], [Writer.CreateHeader], or [Writer.Close].
220func (w *Writer) Create(name string) (io.Writer, error) {
221	header := &FileHeader{
222		Name:   name,
223		Method: Deflate,
224	}
225	return w.CreateHeader(header)
226}
227
228// detectUTF8 reports whether s is a valid UTF-8 string, and whether the string
229// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
230// or any other common encoding).
231func detectUTF8(s string) (valid, require bool) {
232	for i := 0; i < len(s); {
233		r, size := utf8.DecodeRuneInString(s[i:])
234		i += size
235		// Officially, ZIP uses CP-437, but many readers use the system's
236		// local character encoding. Most encoding are compatible with a large
237		// subset of CP-437, which itself is ASCII-like.
238		//
239		// Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those
240		// characters with localized currency and overline characters.
241		if r < 0x20 || r > 0x7d || r == 0x5c {
242			if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
243				return false, false
244			}
245			require = true
246		}
247	}
248	return true, require
249}
250
251// prepare performs the bookkeeping operations required at the start of
252// CreateHeader and CreateRaw.
253func (w *Writer) prepare(fh *FileHeader) error {
254	if w.last != nil && !w.last.closed {
255		if err := w.last.close(); err != nil {
256			return err
257		}
258	}
259	if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
260		// See https://golang.org/issue/11144 confusion.
261		return errors.New("archive/zip: invalid duplicate FileHeader")
262	}
263	return nil
264}
265
266// CreateHeader adds a file to the zip archive using the provided [FileHeader]
267// for the file metadata. [Writer] takes ownership of fh and may mutate
268// its fields. The caller must not modify fh after calling [Writer.CreateHeader].
269//
270// This returns a [Writer] to which the file contents should be written.
271// The file's contents must be written to the io.Writer before the next
272// call to [Writer.Create], [Writer.CreateHeader], [Writer.CreateRaw], or [Writer.Close].
273func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
274	if err := w.prepare(fh); err != nil {
275		return nil, err
276	}
277
278	// The ZIP format has a sad state of affairs regarding character encoding.
279	// Officially, the name and comment fields are supposed to be encoded
280	// in CP-437 (which is mostly compatible with ASCII), unless the UTF-8
281	// flag bit is set. However, there are several problems:
282	//
283	//	* Many ZIP readers still do not support UTF-8.
284	//	* If the UTF-8 flag is cleared, several readers simply interpret the
285	//	name and comment fields as whatever the local system encoding is.
286	//
287	// In order to avoid breaking readers without UTF-8 support,
288	// we avoid setting the UTF-8 flag if the strings are CP-437 compatible.
289	// However, if the strings require multibyte UTF-8 encoding and is a
290	// valid UTF-8 string, then we set the UTF-8 bit.
291	//
292	// For the case, where the user explicitly wants to specify the encoding
293	// as UTF-8, they will need to set the flag bit themselves.
294	utf8Valid1, utf8Require1 := detectUTF8(fh.Name)
295	utf8Valid2, utf8Require2 := detectUTF8(fh.Comment)
296	switch {
297	case fh.NonUTF8:
298		fh.Flags &^= 0x800
299	case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2):
300		fh.Flags |= 0x800
301	}
302
303	fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte
304	fh.ReaderVersion = zipVersion20
305
306	// If Modified is set, this takes precedence over MS-DOS timestamp fields.
307	if !fh.Modified.IsZero() {
308		// Contrary to the FileHeader.SetModTime method, we intentionally
309		// do not convert to UTC, because we assume the user intends to encode
310		// the date using the specified timezone. A user may want this control
311		// because many legacy ZIP readers interpret the timestamp according
312		// to the local timezone.
313		//
314		// The timezone is only non-UTC if a user directly sets the Modified
315		// field directly themselves. All other approaches sets UTC.
316		fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified)
317
318		// Use "extended timestamp" format since this is what Info-ZIP uses.
319		// Nearly every major ZIP implementation uses a different format,
320		// but at least most seem to be able to understand the other formats.
321		//
322		// This format happens to be identical for both local and central header
323		// if modification time is the only timestamp being encoded.
324		var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32)
325		mt := uint32(fh.Modified.Unix())
326		eb := writeBuf(mbuf[:])
327		eb.uint16(extTimeExtraID)
328		eb.uint16(5)  // Size: SizeOf(uint8) + SizeOf(uint32)
329		eb.uint8(1)   // Flags: ModTime
330		eb.uint32(mt) // ModTime
331		fh.Extra = append(fh.Extra, mbuf[:]...)
332	}
333
334	var (
335		ow io.Writer
336		fw *fileWriter
337	)
338	h := &header{
339		FileHeader: fh,
340		offset:     uint64(w.cw.count),
341	}
342
343	if strings.HasSuffix(fh.Name, "/") {
344		// Set the compression method to Store to ensure data length is truly zero,
345		// which the writeHeader method always encodes for the size fields.
346		// This is necessary as most compression formats have non-zero lengths
347		// even when compressing an empty string.
348		fh.Method = Store
349		fh.Flags &^= 0x8 // we will not write a data descriptor
350
351		// Explicitly clear sizes as they have no meaning for directories.
352		fh.CompressedSize = 0
353		fh.CompressedSize64 = 0
354		fh.UncompressedSize = 0
355		fh.UncompressedSize64 = 0
356
357		ow = dirWriter{}
358	} else {
359		fh.Flags |= 0x8 // we will write a data descriptor
360
361		fw = &fileWriter{
362			zipw:      w.cw,
363			compCount: &countWriter{w: w.cw},
364			crc32:     crc32.NewIEEE(),
365		}
366		comp := w.compressor(fh.Method)
367		if comp == nil {
368			return nil, ErrAlgorithm
369		}
370		var err error
371		fw.comp, err = comp(fw.compCount)
372		if err != nil {
373			return nil, err
374		}
375		fw.rawCount = &countWriter{w: fw.comp}
376		fw.header = h
377		ow = fw
378	}
379	w.dir = append(w.dir, h)
380	if err := writeHeader(w.cw, h); err != nil {
381		return nil, err
382	}
383	// If we're creating a directory, fw is nil.
384	w.last = fw
385	return ow, nil
386}
387
388func writeHeader(w io.Writer, h *header) error {
389	const maxUint16 = 1<<16 - 1
390	if len(h.Name) > maxUint16 {
391		return errLongName
392	}
393	if len(h.Extra) > maxUint16 {
394		return errLongExtra
395	}
396
397	var buf [fileHeaderLen]byte
398	b := writeBuf(buf[:])
399	b.uint32(uint32(fileHeaderSignature))
400	b.uint16(h.ReaderVersion)
401	b.uint16(h.Flags)
402	b.uint16(h.Method)
403	b.uint16(h.ModifiedTime)
404	b.uint16(h.ModifiedDate)
405	// In raw mode (caller does the compression), the values are either
406	// written here or in the trailing data descriptor based on the header
407	// flags.
408	if h.raw && !h.hasDataDescriptor() {
409		b.uint32(h.CRC32)
410		b.uint32(uint32(min(h.CompressedSize64, uint32max)))
411		b.uint32(uint32(min(h.UncompressedSize64, uint32max)))
412	} else {
413		// When this package handle the compression, these values are
414		// always written to the trailing data descriptor.
415		b.uint32(0) // crc32
416		b.uint32(0) // compressed size
417		b.uint32(0) // uncompressed size
418	}
419	b.uint16(uint16(len(h.Name)))
420	b.uint16(uint16(len(h.Extra)))
421	if _, err := w.Write(buf[:]); err != nil {
422		return err
423	}
424	if _, err := io.WriteString(w, h.Name); err != nil {
425		return err
426	}
427	_, err := w.Write(h.Extra)
428	return err
429}
430
431// CreateRaw adds a file to the zip archive using the provided [FileHeader] and
432// returns a [Writer] to which the file contents should be written. The file's
433// contents must be written to the io.Writer before the next call to [Writer.Create],
434// [Writer.CreateHeader], [Writer.CreateRaw], or [Writer.Close].
435//
436// In contrast to [Writer.CreateHeader], the bytes passed to Writer are not compressed.
437//
438// CreateRaw's argument is stored in w. If the argument is a pointer to the embedded
439// [FileHeader] in a [File] obtained from a [Reader] created from in-memory data,
440// then w will refer to all of that memory.
441func (w *Writer) CreateRaw(fh *FileHeader) (io.Writer, error) {
442	if err := w.prepare(fh); err != nil {
443		return nil, err
444	}
445
446	fh.CompressedSize = uint32(min(fh.CompressedSize64, uint32max))
447	fh.UncompressedSize = uint32(min(fh.UncompressedSize64, uint32max))
448
449	h := &header{
450		FileHeader: fh,
451		offset:     uint64(w.cw.count),
452		raw:        true,
453	}
454	w.dir = append(w.dir, h)
455	if err := writeHeader(w.cw, h); err != nil {
456		return nil, err
457	}
458
459	if strings.HasSuffix(fh.Name, "/") {
460		w.last = nil
461		return dirWriter{}, nil
462	}
463
464	fw := &fileWriter{
465		header: h,
466		zipw:   w.cw,
467	}
468	w.last = fw
469	return fw, nil
470}
471
472// Copy copies the file f (obtained from a [Reader]) into w. It copies the raw
473// form directly bypassing decompression, compression, and validation.
474func (w *Writer) Copy(f *File) error {
475	r, err := f.OpenRaw()
476	if err != nil {
477		return err
478	}
479	// Copy the FileHeader so w doesn't store a pointer to the data
480	// of f's entire archive. See #65499.
481	fh := f.FileHeader
482	fw, err := w.CreateRaw(&fh)
483	if err != nil {
484		return err
485	}
486	_, err = io.Copy(fw, r)
487	return err
488}
489
490// RegisterCompressor registers or overrides a custom compressor for a specific
491// method ID. If a compressor for a given method is not found, [Writer] will
492// default to looking up the compressor at the package level.
493func (w *Writer) RegisterCompressor(method uint16, comp Compressor) {
494	if w.compressors == nil {
495		w.compressors = make(map[uint16]Compressor)
496	}
497	w.compressors[method] = comp
498}
499
500// AddFS adds the files from fs.FS to the archive.
501// It walks the directory tree starting at the root of the filesystem
502// adding each file to the zip using deflate while maintaining the directory structure.
503func (w *Writer) AddFS(fsys fs.FS) error {
504	return fs.WalkDir(fsys, ".", func(name string, d fs.DirEntry, err error) error {
505		if err != nil {
506			return err
507		}
508		if d.IsDir() {
509			return nil
510		}
511		info, err := d.Info()
512		if err != nil {
513			return err
514		}
515		if !info.Mode().IsRegular() {
516			return errors.New("zip: cannot add non-regular file")
517		}
518		h, err := FileInfoHeader(info)
519		if err != nil {
520			return err
521		}
522		h.Name = name
523		h.Method = Deflate
524		fw, err := w.CreateHeader(h)
525		if err != nil {
526			return err
527		}
528		f, err := fsys.Open(name)
529		if err != nil {
530			return err
531		}
532		defer f.Close()
533		_, err = io.Copy(fw, f)
534		return err
535	})
536}
537
538func (w *Writer) compressor(method uint16) Compressor {
539	comp := w.compressors[method]
540	if comp == nil {
541		comp = compressor(method)
542	}
543	return comp
544}
545
546type dirWriter struct{}
547
548func (dirWriter) Write(b []byte) (int, error) {
549	if len(b) == 0 {
550		return 0, nil
551	}
552	return 0, errors.New("zip: write to directory")
553}
554
555type fileWriter struct {
556	*header
557	zipw      io.Writer
558	rawCount  *countWriter
559	comp      io.WriteCloser
560	compCount *countWriter
561	crc32     hash.Hash32
562	closed    bool
563}
564
565func (w *fileWriter) Write(p []byte) (int, error) {
566	if w.closed {
567		return 0, errors.New("zip: write to closed file")
568	}
569	if w.raw {
570		return w.zipw.Write(p)
571	}
572	w.crc32.Write(p)
573	return w.rawCount.Write(p)
574}
575
576func (w *fileWriter) close() error {
577	if w.closed {
578		return errors.New("zip: file closed twice")
579	}
580	w.closed = true
581	if w.raw {
582		return w.writeDataDescriptor()
583	}
584	if err := w.comp.Close(); err != nil {
585		return err
586	}
587
588	// update FileHeader
589	fh := w.header.FileHeader
590	fh.CRC32 = w.crc32.Sum32()
591	fh.CompressedSize64 = uint64(w.compCount.count)
592	fh.UncompressedSize64 = uint64(w.rawCount.count)
593
594	if fh.isZip64() {
595		fh.CompressedSize = uint32max
596		fh.UncompressedSize = uint32max
597		fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions
598	} else {
599		fh.CompressedSize = uint32(fh.CompressedSize64)
600		fh.UncompressedSize = uint32(fh.UncompressedSize64)
601	}
602
603	return w.writeDataDescriptor()
604}
605
606func (w *fileWriter) writeDataDescriptor() error {
607	if !w.hasDataDescriptor() {
608		return nil
609	}
610	// Write data descriptor. This is more complicated than one would
611	// think, see e.g. comments in zipfile.c:putextended() and
612	// https://bugs.openjdk.org/browse/JDK-7073588.
613	// The approach here is to write 8 byte sizes if needed without
614	// adding a zip64 extra in the local header (too late anyway).
615	var buf []byte
616	if w.isZip64() {
617		buf = make([]byte, dataDescriptor64Len)
618	} else {
619		buf = make([]byte, dataDescriptorLen)
620	}
621	b := writeBuf(buf)
622	b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X
623	b.uint32(w.CRC32)
624	if w.isZip64() {
625		b.uint64(w.CompressedSize64)
626		b.uint64(w.UncompressedSize64)
627	} else {
628		b.uint32(w.CompressedSize)
629		b.uint32(w.UncompressedSize)
630	}
631	_, err := w.zipw.Write(buf)
632	return err
633}
634
635type countWriter struct {
636	w     io.Writer
637	count int64
638}
639
640func (w *countWriter) Write(p []byte) (int, error) {
641	n, err := w.w.Write(p)
642	w.count += int64(n)
643	return n, err
644}
645
646type nopCloser struct {
647	io.Writer
648}
649
650func (w nopCloser) Close() error {
651	return nil
652}
653
654type writeBuf []byte
655
656func (b *writeBuf) uint8(v uint8) {
657	(*b)[0] = v
658	*b = (*b)[1:]
659}
660
661func (b *writeBuf) uint16(v uint16) {
662	binary.LittleEndian.PutUint16(*b, v)
663	*b = (*b)[2:]
664}
665
666func (b *writeBuf) uint32(v uint32) {
667	binary.LittleEndian.PutUint32(*b, v)
668	*b = (*b)[4:]
669}
670
671func (b *writeBuf) uint64(v uint64) {
672	binary.LittleEndian.PutUint64(*b, v)
673	*b = (*b)[8:]
674}
675