1*4947cdc7SCole Faustpackage compile 2*4947cdc7SCole Faust 3*4947cdc7SCole Faust// This file defines functions to read and write a compile.Program to a file. 4*4947cdc7SCole Faust// 5*4947cdc7SCole Faust// It is the client's responsibility to avoid version skew between the 6*4947cdc7SCole Faust// compiler used to produce a file and the interpreter that consumes it. 7*4947cdc7SCole Faust// The version number is provided as a constant. 8*4947cdc7SCole Faust// Incompatible protocol changes should also increment the version number. 9*4947cdc7SCole Faust// 10*4947cdc7SCole Faust// Encoding 11*4947cdc7SCole Faust// 12*4947cdc7SCole Faust// Program: 13*4947cdc7SCole Faust// "sky!" [4]byte # magic number 14*4947cdc7SCole Faust// str uint32le # offset of <strings> section 15*4947cdc7SCole Faust// version varint # must match Version 16*4947cdc7SCole Faust// filename string 17*4947cdc7SCole Faust// numloads varint 18*4947cdc7SCole Faust// loads []Ident 19*4947cdc7SCole Faust// numnames varint 20*4947cdc7SCole Faust// names []string 21*4947cdc7SCole Faust// numconsts varint 22*4947cdc7SCole Faust// consts []Constant 23*4947cdc7SCole Faust// numglobals varint 24*4947cdc7SCole Faust// globals []Ident 25*4947cdc7SCole Faust// toplevel Funcode 26*4947cdc7SCole Faust// numfuncs varint 27*4947cdc7SCole Faust// funcs []Funcode 28*4947cdc7SCole Faust// <strings> []byte # concatenation of all referenced strings 29*4947cdc7SCole Faust// EOF 30*4947cdc7SCole Faust// 31*4947cdc7SCole Faust// Funcode: 32*4947cdc7SCole Faust// id Ident 33*4947cdc7SCole Faust// code []byte 34*4947cdc7SCole Faust// pclinetablen varint 35*4947cdc7SCole Faust// pclinetab []varint 36*4947cdc7SCole Faust// numlocals varint 37*4947cdc7SCole Faust// locals []Ident 38*4947cdc7SCole Faust// numcells varint 39*4947cdc7SCole Faust// cells []int 40*4947cdc7SCole Faust// numfreevars varint 41*4947cdc7SCole Faust// freevar []Ident 42*4947cdc7SCole Faust// maxstack varint 43*4947cdc7SCole Faust// numparams varint 44*4947cdc7SCole Faust// numkwonlyparams varint 45*4947cdc7SCole Faust// hasvarargs varint (0 or 1) 46*4947cdc7SCole Faust// haskwargs varint (0 or 1) 47*4947cdc7SCole Faust// 48*4947cdc7SCole Faust// Ident: 49*4947cdc7SCole Faust// filename string 50*4947cdc7SCole Faust// line, col varint 51*4947cdc7SCole Faust// 52*4947cdc7SCole Faust// Constant: # type data 53*4947cdc7SCole Faust// type varint # 0=string string 54*4947cdc7SCole Faust// data ... # 1=bytes string 55*4947cdc7SCole Faust// # 2=int varint 56*4947cdc7SCole Faust// # 3=float varint (bits as uint64) 57*4947cdc7SCole Faust// # 4=bigint string (decimal ASCII text) 58*4947cdc7SCole Faust// 59*4947cdc7SCole Faust// The encoding starts with a four-byte magic number. 60*4947cdc7SCole Faust// The next four bytes are a little-endian uint32 61*4947cdc7SCole Faust// that provides the offset of the string section 62*4947cdc7SCole Faust// at the end of the file, which contains the ordered 63*4947cdc7SCole Faust// concatenation of all strings referenced by the 64*4947cdc7SCole Faust// program. This design permits the decoder to read 65*4947cdc7SCole Faust// the first and second parts of the file into different 66*4947cdc7SCole Faust// memory allocations: the first (the encoded program) 67*4947cdc7SCole Faust// is transient, but the second (the strings) persists 68*4947cdc7SCole Faust// for the life of the Program. 69*4947cdc7SCole Faust// 70*4947cdc7SCole Faust// Within the encoded program, all strings are referred 71*4947cdc7SCole Faust// to by their length. As the encoder and decoder process 72*4947cdc7SCole Faust// the entire file sequentially, they are in lock step, 73*4947cdc7SCole Faust// so the start offset of each string is implicit. 74*4947cdc7SCole Faust// 75*4947cdc7SCole Faust// Program.Code is represented as a []byte slice to permit 76*4947cdc7SCole Faust// modification when breakpoints are set. All other strings 77*4947cdc7SCole Faust// are represented as strings. They all (unsafely) share the 78*4947cdc7SCole Faust// same backing byte slice. 79*4947cdc7SCole Faust// 80*4947cdc7SCole Faust// Aside from the str field, all integers are encoded as varints. 81*4947cdc7SCole Faust 82*4947cdc7SCole Faustimport ( 83*4947cdc7SCole Faust "encoding/binary" 84*4947cdc7SCole Faust "fmt" 85*4947cdc7SCole Faust "math" 86*4947cdc7SCole Faust "math/big" 87*4947cdc7SCole Faust debugpkg "runtime/debug" 88*4947cdc7SCole Faust "unsafe" 89*4947cdc7SCole Faust 90*4947cdc7SCole Faust "go.starlark.net/syntax" 91*4947cdc7SCole Faust) 92*4947cdc7SCole Faust 93*4947cdc7SCole Faustconst magic = "!sky" 94*4947cdc7SCole Faust 95*4947cdc7SCole Faust// Encode encodes a compiled Starlark program. 96*4947cdc7SCole Faustfunc (prog *Program) Encode() []byte { 97*4947cdc7SCole Faust var e encoder 98*4947cdc7SCole Faust e.p = append(e.p, magic...) 99*4947cdc7SCole Faust e.p = append(e.p, "????"...) // string data offset; filled in later 100*4947cdc7SCole Faust e.int(Version) 101*4947cdc7SCole Faust e.string(prog.Toplevel.Pos.Filename()) 102*4947cdc7SCole Faust e.bindings(prog.Loads) 103*4947cdc7SCole Faust e.int(len(prog.Names)) 104*4947cdc7SCole Faust for _, name := range prog.Names { 105*4947cdc7SCole Faust e.string(name) 106*4947cdc7SCole Faust } 107*4947cdc7SCole Faust e.int(len(prog.Constants)) 108*4947cdc7SCole Faust for _, c := range prog.Constants { 109*4947cdc7SCole Faust switch c := c.(type) { 110*4947cdc7SCole Faust case string: 111*4947cdc7SCole Faust e.int(0) 112*4947cdc7SCole Faust e.string(c) 113*4947cdc7SCole Faust case Bytes: 114*4947cdc7SCole Faust e.int(1) 115*4947cdc7SCole Faust e.string(string(c)) 116*4947cdc7SCole Faust case int64: 117*4947cdc7SCole Faust e.int(2) 118*4947cdc7SCole Faust e.int64(c) 119*4947cdc7SCole Faust case float64: 120*4947cdc7SCole Faust e.int(3) 121*4947cdc7SCole Faust e.uint64(math.Float64bits(c)) 122*4947cdc7SCole Faust case *big.Int: 123*4947cdc7SCole Faust e.int(4) 124*4947cdc7SCole Faust e.string(c.Text(10)) 125*4947cdc7SCole Faust } 126*4947cdc7SCole Faust } 127*4947cdc7SCole Faust e.bindings(prog.Globals) 128*4947cdc7SCole Faust e.function(prog.Toplevel) 129*4947cdc7SCole Faust e.int(len(prog.Functions)) 130*4947cdc7SCole Faust for _, fn := range prog.Functions { 131*4947cdc7SCole Faust e.function(fn) 132*4947cdc7SCole Faust } 133*4947cdc7SCole Faust 134*4947cdc7SCole Faust // Patch in the offset of the string data section. 135*4947cdc7SCole Faust binary.LittleEndian.PutUint32(e.p[4:8], uint32(len(e.p))) 136*4947cdc7SCole Faust 137*4947cdc7SCole Faust return append(e.p, e.s...) 138*4947cdc7SCole Faust} 139*4947cdc7SCole Faust 140*4947cdc7SCole Fausttype encoder struct { 141*4947cdc7SCole Faust p []byte // encoded program 142*4947cdc7SCole Faust s []byte // strings 143*4947cdc7SCole Faust tmp [binary.MaxVarintLen64]byte 144*4947cdc7SCole Faust} 145*4947cdc7SCole Faust 146*4947cdc7SCole Faustfunc (e *encoder) int(x int) { 147*4947cdc7SCole Faust e.int64(int64(x)) 148*4947cdc7SCole Faust} 149*4947cdc7SCole Faust 150*4947cdc7SCole Faustfunc (e *encoder) int64(x int64) { 151*4947cdc7SCole Faust n := binary.PutVarint(e.tmp[:], x) 152*4947cdc7SCole Faust e.p = append(e.p, e.tmp[:n]...) 153*4947cdc7SCole Faust} 154*4947cdc7SCole Faust 155*4947cdc7SCole Faustfunc (e *encoder) uint64(x uint64) { 156*4947cdc7SCole Faust n := binary.PutUvarint(e.tmp[:], x) 157*4947cdc7SCole Faust e.p = append(e.p, e.tmp[:n]...) 158*4947cdc7SCole Faust} 159*4947cdc7SCole Faust 160*4947cdc7SCole Faustfunc (e *encoder) string(s string) { 161*4947cdc7SCole Faust e.int(len(s)) 162*4947cdc7SCole Faust e.s = append(e.s, s...) 163*4947cdc7SCole Faust} 164*4947cdc7SCole Faust 165*4947cdc7SCole Faustfunc (e *encoder) bytes(b []byte) { 166*4947cdc7SCole Faust e.int(len(b)) 167*4947cdc7SCole Faust e.s = append(e.s, b...) 168*4947cdc7SCole Faust} 169*4947cdc7SCole Faust 170*4947cdc7SCole Faustfunc (e *encoder) binding(bind Binding) { 171*4947cdc7SCole Faust e.string(bind.Name) 172*4947cdc7SCole Faust e.int(int(bind.Pos.Line)) 173*4947cdc7SCole Faust e.int(int(bind.Pos.Col)) 174*4947cdc7SCole Faust} 175*4947cdc7SCole Faust 176*4947cdc7SCole Faustfunc (e *encoder) bindings(binds []Binding) { 177*4947cdc7SCole Faust e.int(len(binds)) 178*4947cdc7SCole Faust for _, bind := range binds { 179*4947cdc7SCole Faust e.binding(bind) 180*4947cdc7SCole Faust } 181*4947cdc7SCole Faust} 182*4947cdc7SCole Faust 183*4947cdc7SCole Faustfunc (e *encoder) function(fn *Funcode) { 184*4947cdc7SCole Faust e.binding(Binding{fn.Name, fn.Pos}) 185*4947cdc7SCole Faust e.string(fn.Doc) 186*4947cdc7SCole Faust e.bytes(fn.Code) 187*4947cdc7SCole Faust e.int(len(fn.pclinetab)) 188*4947cdc7SCole Faust for _, x := range fn.pclinetab { 189*4947cdc7SCole Faust e.int64(int64(x)) 190*4947cdc7SCole Faust } 191*4947cdc7SCole Faust e.bindings(fn.Locals) 192*4947cdc7SCole Faust e.int(len(fn.Cells)) 193*4947cdc7SCole Faust for _, index := range fn.Cells { 194*4947cdc7SCole Faust e.int(index) 195*4947cdc7SCole Faust } 196*4947cdc7SCole Faust e.bindings(fn.Freevars) 197*4947cdc7SCole Faust e.int(fn.MaxStack) 198*4947cdc7SCole Faust e.int(fn.NumParams) 199*4947cdc7SCole Faust e.int(fn.NumKwonlyParams) 200*4947cdc7SCole Faust e.int(b2i(fn.HasVarargs)) 201*4947cdc7SCole Faust e.int(b2i(fn.HasKwargs)) 202*4947cdc7SCole Faust} 203*4947cdc7SCole Faust 204*4947cdc7SCole Faustfunc b2i(b bool) int { 205*4947cdc7SCole Faust if b { 206*4947cdc7SCole Faust return 1 207*4947cdc7SCole Faust } else { 208*4947cdc7SCole Faust return 0 209*4947cdc7SCole Faust } 210*4947cdc7SCole Faust} 211*4947cdc7SCole Faust 212*4947cdc7SCole Faust// DecodeProgram decodes a compiled Starlark program from data. 213*4947cdc7SCole Faustfunc DecodeProgram(data []byte) (_ *Program, err error) { 214*4947cdc7SCole Faust if len(data) < len(magic) { 215*4947cdc7SCole Faust return nil, fmt.Errorf("not a compiled module: no magic number") 216*4947cdc7SCole Faust } 217*4947cdc7SCole Faust if got := string(data[:4]); got != magic { 218*4947cdc7SCole Faust return nil, fmt.Errorf("not a compiled module: got magic number %q, want %q", 219*4947cdc7SCole Faust got, magic) 220*4947cdc7SCole Faust } 221*4947cdc7SCole Faust defer func() { 222*4947cdc7SCole Faust if x := recover(); x != nil { 223*4947cdc7SCole Faust debugpkg.PrintStack() 224*4947cdc7SCole Faust err = fmt.Errorf("internal error while decoding program: %v", x) 225*4947cdc7SCole Faust } 226*4947cdc7SCole Faust }() 227*4947cdc7SCole Faust 228*4947cdc7SCole Faust offset := binary.LittleEndian.Uint32(data[4:8]) 229*4947cdc7SCole Faust d := decoder{ 230*4947cdc7SCole Faust p: data[8:offset], 231*4947cdc7SCole Faust s: append([]byte(nil), data[offset:]...), // allocate a copy, which will persist 232*4947cdc7SCole Faust } 233*4947cdc7SCole Faust 234*4947cdc7SCole Faust if v := d.int(); v != Version { 235*4947cdc7SCole Faust return nil, fmt.Errorf("version mismatch: read %d, want %d", v, Version) 236*4947cdc7SCole Faust } 237*4947cdc7SCole Faust 238*4947cdc7SCole Faust filename := d.string() 239*4947cdc7SCole Faust d.filename = &filename 240*4947cdc7SCole Faust 241*4947cdc7SCole Faust loads := d.bindings() 242*4947cdc7SCole Faust 243*4947cdc7SCole Faust names := make([]string, d.int()) 244*4947cdc7SCole Faust for i := range names { 245*4947cdc7SCole Faust names[i] = d.string() 246*4947cdc7SCole Faust } 247*4947cdc7SCole Faust 248*4947cdc7SCole Faust // constants 249*4947cdc7SCole Faust constants := make([]interface{}, d.int()) 250*4947cdc7SCole Faust for i := range constants { 251*4947cdc7SCole Faust var c interface{} 252*4947cdc7SCole Faust switch d.int() { 253*4947cdc7SCole Faust case 0: 254*4947cdc7SCole Faust c = d.string() 255*4947cdc7SCole Faust case 1: 256*4947cdc7SCole Faust c = Bytes(d.string()) 257*4947cdc7SCole Faust case 2: 258*4947cdc7SCole Faust c = d.int64() 259*4947cdc7SCole Faust case 3: 260*4947cdc7SCole Faust c = math.Float64frombits(d.uint64()) 261*4947cdc7SCole Faust case 4: 262*4947cdc7SCole Faust c, _ = new(big.Int).SetString(d.string(), 10) 263*4947cdc7SCole Faust } 264*4947cdc7SCole Faust constants[i] = c 265*4947cdc7SCole Faust } 266*4947cdc7SCole Faust 267*4947cdc7SCole Faust globals := d.bindings() 268*4947cdc7SCole Faust toplevel := d.function() 269*4947cdc7SCole Faust funcs := make([]*Funcode, d.int()) 270*4947cdc7SCole Faust for i := range funcs { 271*4947cdc7SCole Faust funcs[i] = d.function() 272*4947cdc7SCole Faust } 273*4947cdc7SCole Faust 274*4947cdc7SCole Faust prog := &Program{ 275*4947cdc7SCole Faust Loads: loads, 276*4947cdc7SCole Faust Names: names, 277*4947cdc7SCole Faust Constants: constants, 278*4947cdc7SCole Faust Globals: globals, 279*4947cdc7SCole Faust Functions: funcs, 280*4947cdc7SCole Faust Toplevel: toplevel, 281*4947cdc7SCole Faust } 282*4947cdc7SCole Faust toplevel.Prog = prog 283*4947cdc7SCole Faust for _, f := range funcs { 284*4947cdc7SCole Faust f.Prog = prog 285*4947cdc7SCole Faust } 286*4947cdc7SCole Faust 287*4947cdc7SCole Faust if len(d.p)+len(d.s) > 0 { 288*4947cdc7SCole Faust return nil, fmt.Errorf("internal error: unconsumed data during decoding") 289*4947cdc7SCole Faust } 290*4947cdc7SCole Faust 291*4947cdc7SCole Faust return prog, nil 292*4947cdc7SCole Faust} 293*4947cdc7SCole Faust 294*4947cdc7SCole Fausttype decoder struct { 295*4947cdc7SCole Faust p []byte // encoded program 296*4947cdc7SCole Faust s []byte // strings 297*4947cdc7SCole Faust filename *string // (indirect to avoid keeping decoder live) 298*4947cdc7SCole Faust} 299*4947cdc7SCole Faust 300*4947cdc7SCole Faustfunc (d *decoder) int() int { 301*4947cdc7SCole Faust return int(d.int64()) 302*4947cdc7SCole Faust} 303*4947cdc7SCole Faust 304*4947cdc7SCole Faustfunc (d *decoder) int64() int64 { 305*4947cdc7SCole Faust x, len := binary.Varint(d.p[:]) 306*4947cdc7SCole Faust d.p = d.p[len:] 307*4947cdc7SCole Faust return x 308*4947cdc7SCole Faust} 309*4947cdc7SCole Faust 310*4947cdc7SCole Faustfunc (d *decoder) uint64() uint64 { 311*4947cdc7SCole Faust x, len := binary.Uvarint(d.p[:]) 312*4947cdc7SCole Faust d.p = d.p[len:] 313*4947cdc7SCole Faust return x 314*4947cdc7SCole Faust} 315*4947cdc7SCole Faust 316*4947cdc7SCole Faustfunc (d *decoder) string() (s string) { 317*4947cdc7SCole Faust if slice := d.bytes(); len(slice) > 0 { 318*4947cdc7SCole Faust // Avoid a memory allocation for each string 319*4947cdc7SCole Faust // by unsafely aliasing slice. 320*4947cdc7SCole Faust type string struct { 321*4947cdc7SCole Faust data *byte 322*4947cdc7SCole Faust len int 323*4947cdc7SCole Faust } 324*4947cdc7SCole Faust ptr := (*string)(unsafe.Pointer(&s)) 325*4947cdc7SCole Faust ptr.data = &slice[0] 326*4947cdc7SCole Faust ptr.len = len(slice) 327*4947cdc7SCole Faust } 328*4947cdc7SCole Faust return s 329*4947cdc7SCole Faust} 330*4947cdc7SCole Faust 331*4947cdc7SCole Faustfunc (d *decoder) bytes() []byte { 332*4947cdc7SCole Faust len := d.int() 333*4947cdc7SCole Faust r := d.s[:len:len] 334*4947cdc7SCole Faust d.s = d.s[len:] 335*4947cdc7SCole Faust return r 336*4947cdc7SCole Faust} 337*4947cdc7SCole Faust 338*4947cdc7SCole Faustfunc (d *decoder) binding() Binding { 339*4947cdc7SCole Faust name := d.string() 340*4947cdc7SCole Faust line := int32(d.int()) 341*4947cdc7SCole Faust col := int32(d.int()) 342*4947cdc7SCole Faust return Binding{Name: name, Pos: syntax.MakePosition(d.filename, line, col)} 343*4947cdc7SCole Faust} 344*4947cdc7SCole Faust 345*4947cdc7SCole Faustfunc (d *decoder) bindings() []Binding { 346*4947cdc7SCole Faust bindings := make([]Binding, d.int()) 347*4947cdc7SCole Faust for i := range bindings { 348*4947cdc7SCole Faust bindings[i] = d.binding() 349*4947cdc7SCole Faust } 350*4947cdc7SCole Faust return bindings 351*4947cdc7SCole Faust} 352*4947cdc7SCole Faust 353*4947cdc7SCole Faustfunc (d *decoder) ints() []int { 354*4947cdc7SCole Faust ints := make([]int, d.int()) 355*4947cdc7SCole Faust for i := range ints { 356*4947cdc7SCole Faust ints[i] = d.int() 357*4947cdc7SCole Faust } 358*4947cdc7SCole Faust return ints 359*4947cdc7SCole Faust} 360*4947cdc7SCole Faust 361*4947cdc7SCole Faustfunc (d *decoder) bool() bool { return d.int() != 0 } 362*4947cdc7SCole Faust 363*4947cdc7SCole Faustfunc (d *decoder) function() *Funcode { 364*4947cdc7SCole Faust id := d.binding() 365*4947cdc7SCole Faust doc := d.string() 366*4947cdc7SCole Faust code := d.bytes() 367*4947cdc7SCole Faust pclinetab := make([]uint16, d.int()) 368*4947cdc7SCole Faust for i := range pclinetab { 369*4947cdc7SCole Faust pclinetab[i] = uint16(d.int()) 370*4947cdc7SCole Faust } 371*4947cdc7SCole Faust locals := d.bindings() 372*4947cdc7SCole Faust cells := d.ints() 373*4947cdc7SCole Faust freevars := d.bindings() 374*4947cdc7SCole Faust maxStack := d.int() 375*4947cdc7SCole Faust numParams := d.int() 376*4947cdc7SCole Faust numKwonlyParams := d.int() 377*4947cdc7SCole Faust hasVarargs := d.int() != 0 378*4947cdc7SCole Faust hasKwargs := d.int() != 0 379*4947cdc7SCole Faust return &Funcode{ 380*4947cdc7SCole Faust // Prog is filled in later. 381*4947cdc7SCole Faust Pos: id.Pos, 382*4947cdc7SCole Faust Name: id.Name, 383*4947cdc7SCole Faust Doc: doc, 384*4947cdc7SCole Faust Code: code, 385*4947cdc7SCole Faust pclinetab: pclinetab, 386*4947cdc7SCole Faust Locals: locals, 387*4947cdc7SCole Faust Cells: cells, 388*4947cdc7SCole Faust Freevars: freevars, 389*4947cdc7SCole Faust MaxStack: maxStack, 390*4947cdc7SCole Faust NumParams: numParams, 391*4947cdc7SCole Faust NumKwonlyParams: numKwonlyParams, 392*4947cdc7SCole Faust HasVarargs: hasVarargs, 393*4947cdc7SCole Faust HasKwargs: hasKwargs, 394*4947cdc7SCole Faust } 395*4947cdc7SCole Faust} 396