1// Copyright 2021 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package pkgbits
6
7import (
8	"bytes"
9	"crypto/md5"
10	"encoding/binary"
11	"go/constant"
12	"io"
13	"math/big"
14	"runtime"
15	"strings"
16)
17
18// currentVersion is the current version number.
19//
20//   - v0: initial prototype
21//
22//   - v1: adds the flags uint32 word
23//
24// TODO(mdempsky): For the next version bump:
25//   - remove the legacy "has init" bool from the public root
26//   - remove obj's "derived func instance" bool
27const currentVersion uint32 = 1
28
29// A PkgEncoder provides methods for encoding a package's Unified IR
30// export data.
31type PkgEncoder struct {
32	// elems holds the bitstream for previously encoded elements.
33	elems [numRelocs][]string
34
35	// stringsIdx maps previously encoded strings to their index within
36	// the RelocString section, to allow deduplication. That is,
37	// elems[RelocString][stringsIdx[s]] == s (if present).
38	stringsIdx map[string]Index
39
40	// syncFrames is the number of frames to write at each sync
41	// marker. A negative value means sync markers are omitted.
42	syncFrames int
43}
44
45// SyncMarkers reports whether pw uses sync markers.
46func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
47
48// NewPkgEncoder returns an initialized PkgEncoder.
49//
50// syncFrames is the number of caller frames that should be serialized
51// at Sync points. Serializing additional frames results in larger
52// export data files, but can help diagnosing desync errors in
53// higher-level Unified IR reader/writer code. If syncFrames is
54// negative, then sync markers are omitted entirely.
55func NewPkgEncoder(syncFrames int) PkgEncoder {
56	return PkgEncoder{
57		stringsIdx: make(map[string]Index),
58		syncFrames: syncFrames,
59	}
60}
61
62// DumpTo writes the package's encoded data to out0 and returns the
63// package fingerprint.
64func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
65	h := md5.New()
66	out := io.MultiWriter(out0, h)
67
68	writeUint32 := func(x uint32) {
69		assert(binary.Write(out, binary.LittleEndian, x) == nil)
70	}
71
72	writeUint32(currentVersion)
73
74	var flags uint32
75	if pw.SyncMarkers() {
76		flags |= flagSyncMarkers
77	}
78	writeUint32(flags)
79
80	// Write elemEndsEnds.
81	var sum uint32
82	for _, elems := range &pw.elems {
83		sum += uint32(len(elems))
84		writeUint32(sum)
85	}
86
87	// Write elemEnds.
88	sum = 0
89	for _, elems := range &pw.elems {
90		for _, elem := range elems {
91			sum += uint32(len(elem))
92			writeUint32(sum)
93		}
94	}
95
96	// Write elemData.
97	for _, elems := range &pw.elems {
98		for _, elem := range elems {
99			_, err := io.WriteString(out, elem)
100			assert(err == nil)
101		}
102	}
103
104	// Write fingerprint.
105	copy(fingerprint[:], h.Sum(nil))
106	_, err := out0.Write(fingerprint[:])
107	assert(err == nil)
108
109	return
110}
111
112// StringIdx adds a string value to the strings section, if not
113// already present, and returns its index.
114func (pw *PkgEncoder) StringIdx(s string) Index {
115	if idx, ok := pw.stringsIdx[s]; ok {
116		assert(pw.elems[RelocString][idx] == s)
117		return idx
118	}
119
120	idx := Index(len(pw.elems[RelocString]))
121	pw.elems[RelocString] = append(pw.elems[RelocString], s)
122	pw.stringsIdx[s] = idx
123	return idx
124}
125
126// NewEncoder returns an Encoder for a new element within the given
127// section, and encodes the given SyncMarker as the start of the
128// element bitstream.
129func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
130	e := pw.NewEncoderRaw(k)
131	e.Sync(marker)
132	return e
133}
134
135// NewEncoderRaw returns an Encoder for a new element within the given
136// section.
137//
138// Most callers should use NewEncoder instead.
139func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
140	idx := Index(len(pw.elems[k]))
141	pw.elems[k] = append(pw.elems[k], "") // placeholder
142
143	return Encoder{
144		p:   pw,
145		k:   k,
146		Idx: idx,
147	}
148}
149
150// An Encoder provides methods for encoding an individual element's
151// bitstream data.
152type Encoder struct {
153	p *PkgEncoder
154
155	Relocs   []RelocEnt
156	RelocMap map[RelocEnt]uint32
157	Data     bytes.Buffer // accumulated element bitstream data
158
159	encodingRelocHeader bool
160
161	k   RelocKind
162	Idx Index // index within relocation section
163}
164
165// Flush finalizes the element's bitstream and returns its Index.
166func (w *Encoder) Flush() Index {
167	var sb strings.Builder
168
169	// Backup the data so we write the relocations at the front.
170	var tmp bytes.Buffer
171	io.Copy(&tmp, &w.Data)
172
173	// TODO(mdempsky): Consider writing these out separately so they're
174	// easier to strip, along with function bodies, so that we can prune
175	// down to just the data that's relevant to go/types.
176	if w.encodingRelocHeader {
177		panic("encodingRelocHeader already true; recursive flush?")
178	}
179	w.encodingRelocHeader = true
180	w.Sync(SyncRelocs)
181	w.Len(len(w.Relocs))
182	for _, rEnt := range w.Relocs {
183		w.Sync(SyncReloc)
184		w.Len(int(rEnt.Kind))
185		w.Len(int(rEnt.Idx))
186	}
187
188	io.Copy(&sb, &w.Data)
189	io.Copy(&sb, &tmp)
190	w.p.elems[w.k][w.Idx] = sb.String()
191
192	return w.Idx
193}
194
195func (w *Encoder) checkErr(err error) {
196	if err != nil {
197		errorf("unexpected encoding error: %v", err)
198	}
199}
200
201func (w *Encoder) rawUvarint(x uint64) {
202	var buf [binary.MaxVarintLen64]byte
203	n := binary.PutUvarint(buf[:], x)
204	_, err := w.Data.Write(buf[:n])
205	w.checkErr(err)
206}
207
208func (w *Encoder) rawVarint(x int64) {
209	// Zig-zag encode.
210	ux := uint64(x) << 1
211	if x < 0 {
212		ux = ^ux
213	}
214
215	w.rawUvarint(ux)
216}
217
218func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
219	e := RelocEnt{r, idx}
220	if w.RelocMap != nil {
221		if i, ok := w.RelocMap[e]; ok {
222			return int(i)
223		}
224	} else {
225		w.RelocMap = make(map[RelocEnt]uint32)
226	}
227
228	i := len(w.Relocs)
229	w.RelocMap[e] = uint32(i)
230	w.Relocs = append(w.Relocs, e)
231	return i
232}
233
234func (w *Encoder) Sync(m SyncMarker) {
235	if !w.p.SyncMarkers() {
236		return
237	}
238
239	// Writing out stack frame string references requires working
240	// relocations, but writing out the relocations themselves involves
241	// sync markers. To prevent infinite recursion, we simply trim the
242	// stack frame for sync markers within the relocation header.
243	var frames []string
244	if !w.encodingRelocHeader && w.p.syncFrames > 0 {
245		pcs := make([]uintptr, w.p.syncFrames)
246		n := runtime.Callers(2, pcs)
247		frames = fmtFrames(pcs[:n]...)
248	}
249
250	// TODO(mdempsky): Save space by writing out stack frames as a
251	// linked list so we can share common stack frames.
252	w.rawUvarint(uint64(m))
253	w.rawUvarint(uint64(len(frames)))
254	for _, frame := range frames {
255		w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
256	}
257}
258
259// Bool encodes and writes a bool value into the element bitstream,
260// and then returns the bool value.
261//
262// For simple, 2-alternative encodings, the idiomatic way to call Bool
263// is something like:
264//
265//	if w.Bool(x != 0) {
266//		// alternative #1
267//	} else {
268//		// alternative #2
269//	}
270//
271// For multi-alternative encodings, use Code instead.
272func (w *Encoder) Bool(b bool) bool {
273	w.Sync(SyncBool)
274	var x byte
275	if b {
276		x = 1
277	}
278	err := w.Data.WriteByte(x)
279	w.checkErr(err)
280	return b
281}
282
283// Int64 encodes and writes an int64 value into the element bitstream.
284func (w *Encoder) Int64(x int64) {
285	w.Sync(SyncInt64)
286	w.rawVarint(x)
287}
288
289// Uint64 encodes and writes a uint64 value into the element bitstream.
290func (w *Encoder) Uint64(x uint64) {
291	w.Sync(SyncUint64)
292	w.rawUvarint(x)
293}
294
295// Len encodes and writes a non-negative int value into the element bitstream.
296func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
297
298// Int encodes and writes an int value into the element bitstream.
299func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
300
301// Len encodes and writes a uint value into the element bitstream.
302func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
303
304// Reloc encodes and writes a relocation for the given (section,
305// index) pair into the element bitstream.
306//
307// Note: Only the index is formally written into the element
308// bitstream, so bitstream decoders must know from context which
309// section an encoded relocation refers to.
310func (w *Encoder) Reloc(r RelocKind, idx Index) {
311	w.Sync(SyncUseReloc)
312	w.Len(w.rawReloc(r, idx))
313}
314
315// Code encodes and writes a Code value into the element bitstream.
316func (w *Encoder) Code(c Code) {
317	w.Sync(c.Marker())
318	w.Len(c.Value())
319}
320
321// String encodes and writes a string value into the element
322// bitstream.
323//
324// Internally, strings are deduplicated by adding them to the strings
325// section (if not already present), and then writing a relocation
326// into the element bitstream.
327func (w *Encoder) String(s string) {
328	w.StringRef(w.p.StringIdx(s))
329}
330
331// StringRef writes a reference to the given index, which must be a
332// previously encoded string value.
333func (w *Encoder) StringRef(idx Index) {
334	w.Sync(SyncString)
335	w.Reloc(RelocString, idx)
336}
337
338// Strings encodes and writes a variable-length slice of strings into
339// the element bitstream.
340func (w *Encoder) Strings(ss []string) {
341	w.Len(len(ss))
342	for _, s := range ss {
343		w.String(s)
344	}
345}
346
347// Value encodes and writes a constant.Value into the element
348// bitstream.
349func (w *Encoder) Value(val constant.Value) {
350	w.Sync(SyncValue)
351	if w.Bool(val.Kind() == constant.Complex) {
352		w.scalar(constant.Real(val))
353		w.scalar(constant.Imag(val))
354	} else {
355		w.scalar(val)
356	}
357}
358
359func (w *Encoder) scalar(val constant.Value) {
360	switch v := constant.Val(val).(type) {
361	default:
362		errorf("unhandled %v (%v)", val, val.Kind())
363	case bool:
364		w.Code(ValBool)
365		w.Bool(v)
366	case string:
367		w.Code(ValString)
368		w.String(v)
369	case int64:
370		w.Code(ValInt64)
371		w.Int64(v)
372	case *big.Int:
373		w.Code(ValBigInt)
374		w.bigInt(v)
375	case *big.Rat:
376		w.Code(ValBigRat)
377		w.bigInt(v.Num())
378		w.bigInt(v.Denom())
379	case *big.Float:
380		w.Code(ValBigFloat)
381		w.bigFloat(v)
382	}
383}
384
385func (w *Encoder) bigInt(v *big.Int) {
386	b := v.Bytes()
387	w.String(string(b)) // TODO: More efficient encoding.
388	w.Bool(v.Sign() < 0)
389}
390
391func (w *Encoder) bigFloat(v *big.Float) {
392	b := v.Append(nil, 'p', -1)
393	w.String(string(b)) // TODO: More efficient encoding.
394}
395