1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package buildid
6
7import (
8	"bytes"
9	"cmd/internal/codesign"
10	"crypto/sha256"
11	"debug/macho"
12	"fmt"
13	"io"
14)
15
16// FindAndHash reads all of r and returns the offsets of occurrences of id.
17// While reading, findAndHash also computes and returns
18// a hash of the content of r, but with occurrences of id replaced by zeros.
19// FindAndHash reads bufSize bytes from r at a time.
20// If bufSize == 0, FindAndHash uses a reasonable default.
21func FindAndHash(r io.Reader, id string, bufSize int) (matches []int64, hash [32]byte, err error) {
22	if bufSize == 0 {
23		bufSize = 31 * 1024 // bufSize+little will likely fit in 32 kB
24	}
25	if len(id) == 0 {
26		return nil, [32]byte{}, fmt.Errorf("buildid.FindAndHash: no id specified")
27	}
28	if len(id) > bufSize {
29		return nil, [32]byte{}, fmt.Errorf("buildid.FindAndHash: buffer too small")
30	}
31	zeros := make([]byte, len(id))
32	idBytes := []byte(id)
33
34	// For Mach-O files, we want to exclude the code signature.
35	// The code signature contains hashes of the whole file (except the signature
36	// itself), including the buildid. So the buildid cannot contain the signature.
37	r = excludeMachoCodeSignature(r)
38
39	// The strategy is to read the file through buf, looking for id,
40	// but we need to worry about what happens if id is broken up
41	// and returned in parts by two different reads.
42	// We allocate a tiny buffer (at least len(id)) and a big buffer (bufSize bytes)
43	// next to each other in memory and then copy the tail of
44	// one read into the tiny buffer before reading new data into the big buffer.
45	// The search for id is over the entire tiny+big buffer.
46	tiny := (len(id) + 127) &^ 127 // round up to 128-aligned
47	buf := make([]byte, tiny+bufSize)
48	h := sha256.New()
49	start := tiny
50	for offset := int64(0); ; {
51		// The file offset maintained by the loop corresponds to &buf[tiny].
52		// buf[start:tiny] is left over from previous iteration.
53		// After reading n bytes into buf[tiny:], we process buf[start:tiny+n].
54		n, err := io.ReadFull(r, buf[tiny:])
55		if err != io.ErrUnexpectedEOF && err != io.EOF && err != nil {
56			return nil, [32]byte{}, err
57		}
58
59		// Process any matches.
60		for {
61			i := bytes.Index(buf[start:tiny+n], idBytes)
62			if i < 0 {
63				break
64			}
65			matches = append(matches, offset+int64(start+i-tiny))
66			h.Write(buf[start : start+i])
67			h.Write(zeros)
68			start += i + len(id)
69		}
70		if n < bufSize {
71			// Did not fill buffer, must be at end of file.
72			h.Write(buf[start : tiny+n])
73			break
74		}
75
76		// Process all but final tiny bytes of buf (bufSize = len(buf)-tiny).
77		// Note that start > len(buf)-tiny is possible, if the search above
78		// found an id ending in the final tiny fringe. That's OK.
79		if start < len(buf)-tiny {
80			h.Write(buf[start : len(buf)-tiny])
81			start = len(buf) - tiny
82		}
83
84		// Slide ending tiny-sized fringe to beginning of buffer.
85		copy(buf[0:], buf[bufSize:])
86		start -= bufSize
87		offset += int64(bufSize)
88	}
89	h.Sum(hash[:0])
90	return matches, hash, nil
91}
92
93func Rewrite(w io.WriterAt, pos []int64, id string) error {
94	b := []byte(id)
95	for _, p := range pos {
96		if _, err := w.WriteAt(b, p); err != nil {
97			return err
98		}
99	}
100
101	// Update Mach-O code signature, if any.
102	if f, cmd, ok := findMachoCodeSignature(w); ok {
103		if codesign.Size(int64(cmd.Dataoff), "a.out") == int64(cmd.Datasize) {
104			// Update the signature if the size matches, so we don't need to
105			// fix up headers. Binaries generated by the Go linker should have
106			// the expected size. Otherwise skip.
107			text := f.Segment("__TEXT")
108			cs := make([]byte, cmd.Datasize)
109			codesign.Sign(cs, w.(io.Reader), "a.out", int64(cmd.Dataoff), int64(text.Offset), int64(text.Filesz), f.Type == macho.TypeExec)
110			if _, err := w.WriteAt(cs, int64(cmd.Dataoff)); err != nil {
111				return err
112			}
113		}
114	}
115
116	return nil
117}
118
119func excludeMachoCodeSignature(r io.Reader) io.Reader {
120	_, cmd, ok := findMachoCodeSignature(r)
121	if !ok {
122		return r
123	}
124	return &excludedReader{r, 0, int64(cmd.Dataoff), int64(cmd.Dataoff + cmd.Datasize)}
125}
126
127// excludedReader wraps an io.Reader. Reading from it returns the bytes from
128// the underlying reader, except that when the byte offset is within the
129// range between start and end, it returns zero bytes.
130type excludedReader struct {
131	r          io.Reader
132	off        int64 // current offset
133	start, end int64 // the range to be excluded (read as zero)
134}
135
136func (r *excludedReader) Read(p []byte) (int, error) {
137	n, err := r.r.Read(p)
138	if n > 0 && r.off+int64(n) > r.start && r.off < r.end {
139		cstart := r.start - r.off
140		if cstart < 0 {
141			cstart = 0
142		}
143		cend := r.end - r.off
144		if cend > int64(n) {
145			cend = int64(n)
146		}
147		zeros := make([]byte, cend-cstart)
148		copy(p[cstart:cend], zeros)
149	}
150	r.off += int64(n)
151	return n, err
152}
153
154func findMachoCodeSignature(r any) (*macho.File, codesign.CodeSigCmd, bool) {
155	ra, ok := r.(io.ReaderAt)
156	if !ok {
157		return nil, codesign.CodeSigCmd{}, false
158	}
159	f, err := macho.NewFile(ra)
160	if err != nil {
161		return nil, codesign.CodeSigCmd{}, false
162	}
163	cmd, ok := codesign.FindCodeSigCmd(f)
164	return f, cmd, ok
165}
166