xref: /aosp_15_r20/external/starlark-go/internal/compile/serial.go (revision 4947cdc739c985f6d86941e22894f5cefe7c9e9a)
1*4947cdc7SCole Faustpackage compile
2*4947cdc7SCole Faust
3*4947cdc7SCole Faust// This file defines functions to read and write a compile.Program to a file.
4*4947cdc7SCole Faust//
5*4947cdc7SCole Faust// It is the client's responsibility to avoid version skew between the
6*4947cdc7SCole Faust// compiler used to produce a file and the interpreter that consumes it.
7*4947cdc7SCole Faust// The version number is provided as a constant.
8*4947cdc7SCole Faust// Incompatible protocol changes should also increment the version number.
9*4947cdc7SCole Faust//
10*4947cdc7SCole Faust// Encoding
11*4947cdc7SCole Faust//
12*4947cdc7SCole Faust// Program:
13*4947cdc7SCole Faust//	"sky!"		[4]byte		# magic number
14*4947cdc7SCole Faust//	str		uint32le	# offset of <strings> section
15*4947cdc7SCole Faust//	version		varint		# must match Version
16*4947cdc7SCole Faust//	filename	string
17*4947cdc7SCole Faust//	numloads	varint
18*4947cdc7SCole Faust//	loads		[]Ident
19*4947cdc7SCole Faust//	numnames	varint
20*4947cdc7SCole Faust//	names		[]string
21*4947cdc7SCole Faust//	numconsts	varint
22*4947cdc7SCole Faust//	consts		[]Constant
23*4947cdc7SCole Faust//	numglobals	varint
24*4947cdc7SCole Faust//	globals		[]Ident
25*4947cdc7SCole Faust//	toplevel	Funcode
26*4947cdc7SCole Faust//	numfuncs	varint
27*4947cdc7SCole Faust//	funcs		[]Funcode
28*4947cdc7SCole Faust//	<strings>	[]byte		# concatenation of all referenced strings
29*4947cdc7SCole Faust//	EOF
30*4947cdc7SCole Faust//
31*4947cdc7SCole Faust// Funcode:
32*4947cdc7SCole Faust//	id		Ident
33*4947cdc7SCole Faust//	code		[]byte
34*4947cdc7SCole Faust//	pclinetablen	varint
35*4947cdc7SCole Faust//	pclinetab	[]varint
36*4947cdc7SCole Faust//	numlocals	varint
37*4947cdc7SCole Faust//	locals		[]Ident
38*4947cdc7SCole Faust//	numcells	varint
39*4947cdc7SCole Faust//	cells		[]int
40*4947cdc7SCole Faust//	numfreevars	varint
41*4947cdc7SCole Faust//	freevar		[]Ident
42*4947cdc7SCole Faust//	maxstack	varint
43*4947cdc7SCole Faust//	numparams	varint
44*4947cdc7SCole Faust//	numkwonlyparams	varint
45*4947cdc7SCole Faust//	hasvarargs	varint (0 or 1)
46*4947cdc7SCole Faust//	haskwargs	varint (0 or 1)
47*4947cdc7SCole Faust//
48*4947cdc7SCole Faust// Ident:
49*4947cdc7SCole Faust//	filename	string
50*4947cdc7SCole Faust//	line, col	varint
51*4947cdc7SCole Faust//
52*4947cdc7SCole Faust// Constant:                            # type      data
53*4947cdc7SCole Faust//      type            varint          # 0=string  string
54*4947cdc7SCole Faust//      data            ...             # 1=bytes   string
55*4947cdc7SCole Faust//                                      # 2=int     varint
56*4947cdc7SCole Faust//                                      # 3=float   varint (bits as uint64)
57*4947cdc7SCole Faust//                                      # 4=bigint  string (decimal ASCII text)
58*4947cdc7SCole Faust//
59*4947cdc7SCole Faust// The encoding starts with a four-byte magic number.
60*4947cdc7SCole Faust// The next four bytes are a little-endian uint32
61*4947cdc7SCole Faust// that provides the offset of the string section
62*4947cdc7SCole Faust// at the end of the file, which contains the ordered
63*4947cdc7SCole Faust// concatenation of all strings referenced by the
64*4947cdc7SCole Faust// program. This design permits the decoder to read
65*4947cdc7SCole Faust// the first and second parts of the file into different
66*4947cdc7SCole Faust// memory allocations: the first (the encoded program)
67*4947cdc7SCole Faust// is transient, but the second (the strings) persists
68*4947cdc7SCole Faust// for the life of the Program.
69*4947cdc7SCole Faust//
70*4947cdc7SCole Faust// Within the encoded program, all strings are referred
71*4947cdc7SCole Faust// to by their length. As the encoder and decoder process
72*4947cdc7SCole Faust// the entire file sequentially, they are in lock step,
73*4947cdc7SCole Faust// so the start offset of each string is implicit.
74*4947cdc7SCole Faust//
75*4947cdc7SCole Faust// Program.Code is represented as a []byte slice to permit
76*4947cdc7SCole Faust// modification when breakpoints are set. All other strings
77*4947cdc7SCole Faust// are represented as strings. They all (unsafely) share the
78*4947cdc7SCole Faust// same backing byte slice.
79*4947cdc7SCole Faust//
80*4947cdc7SCole Faust// Aside from the str field, all integers are encoded as varints.
81*4947cdc7SCole Faust
82*4947cdc7SCole Faustimport (
83*4947cdc7SCole Faust	"encoding/binary"
84*4947cdc7SCole Faust	"fmt"
85*4947cdc7SCole Faust	"math"
86*4947cdc7SCole Faust	"math/big"
87*4947cdc7SCole Faust	debugpkg "runtime/debug"
88*4947cdc7SCole Faust	"unsafe"
89*4947cdc7SCole Faust
90*4947cdc7SCole Faust	"go.starlark.net/syntax"
91*4947cdc7SCole Faust)
92*4947cdc7SCole Faust
93*4947cdc7SCole Faustconst magic = "!sky"
94*4947cdc7SCole Faust
95*4947cdc7SCole Faust// Encode encodes a compiled Starlark program.
96*4947cdc7SCole Faustfunc (prog *Program) Encode() []byte {
97*4947cdc7SCole Faust	var e encoder
98*4947cdc7SCole Faust	e.p = append(e.p, magic...)
99*4947cdc7SCole Faust	e.p = append(e.p, "????"...) // string data offset; filled in later
100*4947cdc7SCole Faust	e.int(Version)
101*4947cdc7SCole Faust	e.string(prog.Toplevel.Pos.Filename())
102*4947cdc7SCole Faust	e.bindings(prog.Loads)
103*4947cdc7SCole Faust	e.int(len(prog.Names))
104*4947cdc7SCole Faust	for _, name := range prog.Names {
105*4947cdc7SCole Faust		e.string(name)
106*4947cdc7SCole Faust	}
107*4947cdc7SCole Faust	e.int(len(prog.Constants))
108*4947cdc7SCole Faust	for _, c := range prog.Constants {
109*4947cdc7SCole Faust		switch c := c.(type) {
110*4947cdc7SCole Faust		case string:
111*4947cdc7SCole Faust			e.int(0)
112*4947cdc7SCole Faust			e.string(c)
113*4947cdc7SCole Faust		case Bytes:
114*4947cdc7SCole Faust			e.int(1)
115*4947cdc7SCole Faust			e.string(string(c))
116*4947cdc7SCole Faust		case int64:
117*4947cdc7SCole Faust			e.int(2)
118*4947cdc7SCole Faust			e.int64(c)
119*4947cdc7SCole Faust		case float64:
120*4947cdc7SCole Faust			e.int(3)
121*4947cdc7SCole Faust			e.uint64(math.Float64bits(c))
122*4947cdc7SCole Faust		case *big.Int:
123*4947cdc7SCole Faust			e.int(4)
124*4947cdc7SCole Faust			e.string(c.Text(10))
125*4947cdc7SCole Faust		}
126*4947cdc7SCole Faust	}
127*4947cdc7SCole Faust	e.bindings(prog.Globals)
128*4947cdc7SCole Faust	e.function(prog.Toplevel)
129*4947cdc7SCole Faust	e.int(len(prog.Functions))
130*4947cdc7SCole Faust	for _, fn := range prog.Functions {
131*4947cdc7SCole Faust		e.function(fn)
132*4947cdc7SCole Faust	}
133*4947cdc7SCole Faust
134*4947cdc7SCole Faust	// Patch in the offset of the string data section.
135*4947cdc7SCole Faust	binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p)))
136*4947cdc7SCole Faust
137*4947cdc7SCole Faust	return append(e.p, e.s...)
138*4947cdc7SCole Faust}
139*4947cdc7SCole Faust
140*4947cdc7SCole Fausttype encoder struct {
141*4947cdc7SCole Faust	p   []byte // encoded program
142*4947cdc7SCole Faust	s   []byte // strings
143*4947cdc7SCole Faust	tmp [binary.MaxVarintLen64]byte
144*4947cdc7SCole Faust}
145*4947cdc7SCole Faust
146*4947cdc7SCole Faustfunc (e *encoder) int(x int) {
147*4947cdc7SCole Faust	e.int64(int64(x))
148*4947cdc7SCole Faust}
149*4947cdc7SCole Faust
150*4947cdc7SCole Faustfunc (e *encoder) int64(x int64) {
151*4947cdc7SCole Faust	n := binary.PutVarint(e.tmp[:], x)
152*4947cdc7SCole Faust	e.p = append(e.p, e.tmp[:n]...)
153*4947cdc7SCole Faust}
154*4947cdc7SCole Faust
155*4947cdc7SCole Faustfunc (e *encoder) uint64(x uint64) {
156*4947cdc7SCole Faust	n := binary.PutUvarint(e.tmp[:], x)
157*4947cdc7SCole Faust	e.p = append(e.p, e.tmp[:n]...)
158*4947cdc7SCole Faust}
159*4947cdc7SCole Faust
160*4947cdc7SCole Faustfunc (e *encoder) string(s string) {
161*4947cdc7SCole Faust	e.int(len(s))
162*4947cdc7SCole Faust	e.s = append(e.s, s...)
163*4947cdc7SCole Faust}
164*4947cdc7SCole Faust
165*4947cdc7SCole Faustfunc (e *encoder) bytes(b []byte) {
166*4947cdc7SCole Faust	e.int(len(b))
167*4947cdc7SCole Faust	e.s = append(e.s, b...)
168*4947cdc7SCole Faust}
169*4947cdc7SCole Faust
170*4947cdc7SCole Faustfunc (e *encoder) binding(bind Binding) {
171*4947cdc7SCole Faust	e.string(bind.Name)
172*4947cdc7SCole Faust	e.int(int(bind.Pos.Line))
173*4947cdc7SCole Faust	e.int(int(bind.Pos.Col))
174*4947cdc7SCole Faust}
175*4947cdc7SCole Faust
176*4947cdc7SCole Faustfunc (e *encoder) bindings(binds []Binding) {
177*4947cdc7SCole Faust	e.int(len(binds))
178*4947cdc7SCole Faust	for _, bind := range binds {
179*4947cdc7SCole Faust		e.binding(bind)
180*4947cdc7SCole Faust	}
181*4947cdc7SCole Faust}
182*4947cdc7SCole Faust
183*4947cdc7SCole Faustfunc (e *encoder) function(fn *Funcode) {
184*4947cdc7SCole Faust	e.binding(Binding{fn.Name, fn.Pos})
185*4947cdc7SCole Faust	e.string(fn.Doc)
186*4947cdc7SCole Faust	e.bytes(fn.Code)
187*4947cdc7SCole Faust	e.int(len(fn.pclinetab))
188*4947cdc7SCole Faust	for _, x := range fn.pclinetab {
189*4947cdc7SCole Faust		e.int64(int64(x))
190*4947cdc7SCole Faust	}
191*4947cdc7SCole Faust	e.bindings(fn.Locals)
192*4947cdc7SCole Faust	e.int(len(fn.Cells))
193*4947cdc7SCole Faust	for _, index := range fn.Cells {
194*4947cdc7SCole Faust		e.int(index)
195*4947cdc7SCole Faust	}
196*4947cdc7SCole Faust	e.bindings(fn.Freevars)
197*4947cdc7SCole Faust	e.int(fn.MaxStack)
198*4947cdc7SCole Faust	e.int(fn.NumParams)
199*4947cdc7SCole Faust	e.int(fn.NumKwonlyParams)
200*4947cdc7SCole Faust	e.int(b2i(fn.HasVarargs))
201*4947cdc7SCole Faust	e.int(b2i(fn.HasKwargs))
202*4947cdc7SCole Faust}
203*4947cdc7SCole Faust
204*4947cdc7SCole Faustfunc b2i(b bool) int {
205*4947cdc7SCole Faust	if b {
206*4947cdc7SCole Faust		return 1
207*4947cdc7SCole Faust	} else {
208*4947cdc7SCole Faust		return 0
209*4947cdc7SCole Faust	}
210*4947cdc7SCole Faust}
211*4947cdc7SCole Faust
212*4947cdc7SCole Faust// DecodeProgram decodes a compiled Starlark program from data.
213*4947cdc7SCole Faustfunc DecodeProgram(data []byte) (_ *Program, err error) {
214*4947cdc7SCole Faust	if len(data) < len(magic) {
215*4947cdc7SCole Faust		return nil, fmt.Errorf("not a compiled module: no magic number")
216*4947cdc7SCole Faust	}
217*4947cdc7SCole Faust	if got := string(data[:4]); got != magic {
218*4947cdc7SCole Faust		return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q",
219*4947cdc7SCole Faust			got, magic)
220*4947cdc7SCole Faust	}
221*4947cdc7SCole Faust	defer func() {
222*4947cdc7SCole Faust		if x := recover(); x != nil {
223*4947cdc7SCole Faust			debugpkg.PrintStack()
224*4947cdc7SCole Faust			err = fmt.Errorf("internal error while decoding program: %v", x)
225*4947cdc7SCole Faust		}
226*4947cdc7SCole Faust	}()
227*4947cdc7SCole Faust
228*4947cdc7SCole Faust	offset := binary.LittleEndian.Uint32(data[4:8])
229*4947cdc7SCole Faust	d := decoder{
230*4947cdc7SCole Faust		p: data[8:offset],
231*4947cdc7SCole Faust		s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist
232*4947cdc7SCole Faust	}
233*4947cdc7SCole Faust
234*4947cdc7SCole Faust	if v := d.int(); v != Version {
235*4947cdc7SCole Faust		return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version)
236*4947cdc7SCole Faust	}
237*4947cdc7SCole Faust
238*4947cdc7SCole Faust	filename := d.string()
239*4947cdc7SCole Faust	d.filename = &filename
240*4947cdc7SCole Faust
241*4947cdc7SCole Faust	loads := d.bindings()
242*4947cdc7SCole Faust
243*4947cdc7SCole Faust	names := make([]string, d.int())
244*4947cdc7SCole Faust	for i := range names {
245*4947cdc7SCole Faust		names[i] = d.string()
246*4947cdc7SCole Faust	}
247*4947cdc7SCole Faust
248*4947cdc7SCole Faust	// constants
249*4947cdc7SCole Faust	constants := make([]interface{}, d.int())
250*4947cdc7SCole Faust	for i := range constants {
251*4947cdc7SCole Faust		var c interface{}
252*4947cdc7SCole Faust		switch d.int() {
253*4947cdc7SCole Faust		case 0:
254*4947cdc7SCole Faust			c = d.string()
255*4947cdc7SCole Faust		case 1:
256*4947cdc7SCole Faust			c = Bytes(d.string())
257*4947cdc7SCole Faust		case 2:
258*4947cdc7SCole Faust			c = d.int64()
259*4947cdc7SCole Faust		case 3:
260*4947cdc7SCole Faust			c = math.Float64frombits(d.uint64())
261*4947cdc7SCole Faust		case 4:
262*4947cdc7SCole Faust			c, _ = new(big.Int).SetString(d.string(), 10)
263*4947cdc7SCole Faust		}
264*4947cdc7SCole Faust		constants[i] = c
265*4947cdc7SCole Faust	}
266*4947cdc7SCole Faust
267*4947cdc7SCole Faust	globals := d.bindings()
268*4947cdc7SCole Faust	toplevel := d.function()
269*4947cdc7SCole Faust	funcs := make([]*Funcode, d.int())
270*4947cdc7SCole Faust	for i := range funcs {
271*4947cdc7SCole Faust		funcs[i] = d.function()
272*4947cdc7SCole Faust	}
273*4947cdc7SCole Faust
274*4947cdc7SCole Faust	prog := &Program{
275*4947cdc7SCole Faust		Loads:     loads,
276*4947cdc7SCole Faust		Names:     names,
277*4947cdc7SCole Faust		Constants: constants,
278*4947cdc7SCole Faust		Globals:   globals,
279*4947cdc7SCole Faust		Functions: funcs,
280*4947cdc7SCole Faust		Toplevel:  toplevel,
281*4947cdc7SCole Faust	}
282*4947cdc7SCole Faust	toplevel.Prog = prog
283*4947cdc7SCole Faust	for _, f := range funcs {
284*4947cdc7SCole Faust		f.Prog = prog
285*4947cdc7SCole Faust	}
286*4947cdc7SCole Faust
287*4947cdc7SCole Faust	if len(d.p)+len(d.s) > 0 {
288*4947cdc7SCole Faust		return nil, fmt.Errorf("internal error: unconsumed data during decoding")
289*4947cdc7SCole Faust	}
290*4947cdc7SCole Faust
291*4947cdc7SCole Faust	return prog, nil
292*4947cdc7SCole Faust}
293*4947cdc7SCole Faust
294*4947cdc7SCole Fausttype decoder struct {
295*4947cdc7SCole Faust	p        []byte  // encoded program
296*4947cdc7SCole Faust	s        []byte  // strings
297*4947cdc7SCole Faust	filename *string // (indirect to avoid keeping decoder live)
298*4947cdc7SCole Faust}
299*4947cdc7SCole Faust
300*4947cdc7SCole Faustfunc (d *decoder) int() int {
301*4947cdc7SCole Faust	return int(d.int64())
302*4947cdc7SCole Faust}
303*4947cdc7SCole Faust
304*4947cdc7SCole Faustfunc (d *decoder) int64() int64 {
305*4947cdc7SCole Faust	x, len := binary.Varint(d.p[:])
306*4947cdc7SCole Faust	d.p = d.p[len:]
307*4947cdc7SCole Faust	return x
308*4947cdc7SCole Faust}
309*4947cdc7SCole Faust
310*4947cdc7SCole Faustfunc (d *decoder) uint64() uint64 {
311*4947cdc7SCole Faust	x, len := binary.Uvarint(d.p[:])
312*4947cdc7SCole Faust	d.p = d.p[len:]
313*4947cdc7SCole Faust	return x
314*4947cdc7SCole Faust}
315*4947cdc7SCole Faust
316*4947cdc7SCole Faustfunc (d *decoder) string() (s string) {
317*4947cdc7SCole Faust	if slice := d.bytes(); len(slice) > 0 {
318*4947cdc7SCole Faust		// Avoid a memory allocation for each string
319*4947cdc7SCole Faust		// by unsafely aliasing slice.
320*4947cdc7SCole Faust		type string struct {
321*4947cdc7SCole Faust			data *byte
322*4947cdc7SCole Faust			len  int
323*4947cdc7SCole Faust		}
324*4947cdc7SCole Faust		ptr := (*string)(unsafe.Pointer(&s))
325*4947cdc7SCole Faust		ptr.data = &slice[0]
326*4947cdc7SCole Faust		ptr.len = len(slice)
327*4947cdc7SCole Faust	}
328*4947cdc7SCole Faust	return s
329*4947cdc7SCole Faust}
330*4947cdc7SCole Faust
331*4947cdc7SCole Faustfunc (d *decoder) bytes() []byte {
332*4947cdc7SCole Faust	len := d.int()
333*4947cdc7SCole Faust	r := d.s[:len:len]
334*4947cdc7SCole Faust	d.s = d.s[len:]
335*4947cdc7SCole Faust	return r
336*4947cdc7SCole Faust}
337*4947cdc7SCole Faust
338*4947cdc7SCole Faustfunc (d *decoder) binding() Binding {
339*4947cdc7SCole Faust	name := d.string()
340*4947cdc7SCole Faust	line := int32(d.int())
341*4947cdc7SCole Faust	col := int32(d.int())
342*4947cdc7SCole Faust	return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)}
343*4947cdc7SCole Faust}
344*4947cdc7SCole Faust
345*4947cdc7SCole Faustfunc (d *decoder) bindings() []Binding {
346*4947cdc7SCole Faust	bindings := make([]Binding, d.int())
347*4947cdc7SCole Faust	for i := range bindings {
348*4947cdc7SCole Faust		bindings[i] = d.binding()
349*4947cdc7SCole Faust	}
350*4947cdc7SCole Faust	return bindings
351*4947cdc7SCole Faust}
352*4947cdc7SCole Faust
353*4947cdc7SCole Faustfunc (d *decoder) ints() []int {
354*4947cdc7SCole Faust	ints := make([]int, d.int())
355*4947cdc7SCole Faust	for i := range ints {
356*4947cdc7SCole Faust		ints[i] = d.int()
357*4947cdc7SCole Faust	}
358*4947cdc7SCole Faust	return ints
359*4947cdc7SCole Faust}
360*4947cdc7SCole Faust
361*4947cdc7SCole Faustfunc (d *decoder) bool() bool { return d.int() != 0 }
362*4947cdc7SCole Faust
363*4947cdc7SCole Faustfunc (d *decoder) function() *Funcode {
364*4947cdc7SCole Faust	id := d.binding()
365*4947cdc7SCole Faust	doc := d.string()
366*4947cdc7SCole Faust	code := d.bytes()
367*4947cdc7SCole Faust	pclinetab := make([]uint16, d.int())
368*4947cdc7SCole Faust	for i := range pclinetab {
369*4947cdc7SCole Faust		pclinetab[i] = uint16(d.int())
370*4947cdc7SCole Faust	}
371*4947cdc7SCole Faust	locals := d.bindings()
372*4947cdc7SCole Faust	cells := d.ints()
373*4947cdc7SCole Faust	freevars := d.bindings()
374*4947cdc7SCole Faust	maxStack := d.int()
375*4947cdc7SCole Faust	numParams := d.int()
376*4947cdc7SCole Faust	numKwonlyParams := d.int()
377*4947cdc7SCole Faust	hasVarargs := d.int() != 0
378*4947cdc7SCole Faust	hasKwargs := d.int() != 0
379*4947cdc7SCole Faust	return &Funcode{
380*4947cdc7SCole Faust		// Prog is filled in later.
381*4947cdc7SCole Faust		Pos:             id.Pos,
382*4947cdc7SCole Faust		Name:            id.Name,
383*4947cdc7SCole Faust		Doc:             doc,
384*4947cdc7SCole Faust		Code:            code,
385*4947cdc7SCole Faust		pclinetab:       pclinetab,
386*4947cdc7SCole Faust		Locals:          locals,
387*4947cdc7SCole Faust		Cells:           cells,
388*4947cdc7SCole Faust		Freevars:        freevars,
389*4947cdc7SCole Faust		MaxStack:        maxStack,
390*4947cdc7SCole Faust		NumParams:       numParams,
391*4947cdc7SCole Faust		NumKwonlyParams: numKwonlyParams,
392*4947cdc7SCole Faust		HasVarargs:      hasVarargs,
393*4947cdc7SCole Faust		HasKwargs:       hasKwargs,
394*4947cdc7SCole Faust	}
395*4947cdc7SCole Faust}
396