1// Copyright 2021 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package pkgbits 6 7import ( 8 "bytes" 9 "crypto/md5" 10 "encoding/binary" 11 "go/constant" 12 "io" 13 "math/big" 14 "runtime" 15 "strings" 16) 17 18// currentVersion is the current version number. 19// 20// - v0: initial prototype 21// 22// - v1: adds the flags uint32 word 23// 24// TODO(mdempsky): For the next version bump: 25// - remove the legacy "has init" bool from the public root 26// - remove obj's "derived func instance" bool 27const currentVersion uint32 = 1 28 29// A PkgEncoder provides methods for encoding a package's Unified IR 30// export data. 31type PkgEncoder struct { 32 // elems holds the bitstream for previously encoded elements. 33 elems [numRelocs][]string 34 35 // stringsIdx maps previously encoded strings to their index within 36 // the RelocString section, to allow deduplication. That is, 37 // elems[RelocString][stringsIdx[s]] == s (if present). 38 stringsIdx map[string]Index 39 40 // syncFrames is the number of frames to write at each sync 41 // marker. A negative value means sync markers are omitted. 42 syncFrames int 43} 44 45// SyncMarkers reports whether pw uses sync markers. 46func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 } 47 48// NewPkgEncoder returns an initialized PkgEncoder. 49// 50// syncFrames is the number of caller frames that should be serialized 51// at Sync points. Serializing additional frames results in larger 52// export data files, but can help diagnosing desync errors in 53// higher-level Unified IR reader/writer code. If syncFrames is 54// negative, then sync markers are omitted entirely. 55func NewPkgEncoder(syncFrames int) PkgEncoder { 56 return PkgEncoder{ 57 stringsIdx: make(map[string]Index), 58 syncFrames: syncFrames, 59 } 60} 61 62// DumpTo writes the package's encoded data to out0 and returns the 63// package fingerprint. 64func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) { 65 h := md5.New() 66 out := io.MultiWriter(out0, h) 67 68 writeUint32 := func(x uint32) { 69 assert(binary.Write(out, binary.LittleEndian, x) == nil) 70 } 71 72 writeUint32(currentVersion) 73 74 var flags uint32 75 if pw.SyncMarkers() { 76 flags |= flagSyncMarkers 77 } 78 writeUint32(flags) 79 80 // Write elemEndsEnds. 81 var sum uint32 82 for _, elems := range &pw.elems { 83 sum += uint32(len(elems)) 84 writeUint32(sum) 85 } 86 87 // Write elemEnds. 88 sum = 0 89 for _, elems := range &pw.elems { 90 for _, elem := range elems { 91 sum += uint32(len(elem)) 92 writeUint32(sum) 93 } 94 } 95 96 // Write elemData. 97 for _, elems := range &pw.elems { 98 for _, elem := range elems { 99 _, err := io.WriteString(out, elem) 100 assert(err == nil) 101 } 102 } 103 104 // Write fingerprint. 105 copy(fingerprint[:], h.Sum(nil)) 106 _, err := out0.Write(fingerprint[:]) 107 assert(err == nil) 108 109 return 110} 111 112// StringIdx adds a string value to the strings section, if not 113// already present, and returns its index. 114func (pw *PkgEncoder) StringIdx(s string) Index { 115 if idx, ok := pw.stringsIdx[s]; ok { 116 assert(pw.elems[RelocString][idx] == s) 117 return idx 118 } 119 120 idx := Index(len(pw.elems[RelocString])) 121 pw.elems[RelocString] = append(pw.elems[RelocString], s) 122 pw.stringsIdx[s] = idx 123 return idx 124} 125 126// NewEncoder returns an Encoder for a new element within the given 127// section, and encodes the given SyncMarker as the start of the 128// element bitstream. 129func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder { 130 e := pw.NewEncoderRaw(k) 131 e.Sync(marker) 132 return e 133} 134 135// NewEncoderRaw returns an Encoder for a new element within the given 136// section. 137// 138// Most callers should use NewEncoder instead. 139func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder { 140 idx := Index(len(pw.elems[k])) 141 pw.elems[k] = append(pw.elems[k], "") // placeholder 142 143 return Encoder{ 144 p: pw, 145 k: k, 146 Idx: idx, 147 } 148} 149 150// An Encoder provides methods for encoding an individual element's 151// bitstream data. 152type Encoder struct { 153 p *PkgEncoder 154 155 Relocs []RelocEnt 156 RelocMap map[RelocEnt]uint32 157 Data bytes.Buffer // accumulated element bitstream data 158 159 encodingRelocHeader bool 160 161 k RelocKind 162 Idx Index // index within relocation section 163} 164 165// Flush finalizes the element's bitstream and returns its Index. 166func (w *Encoder) Flush() Index { 167 var sb strings.Builder 168 169 // Backup the data so we write the relocations at the front. 170 var tmp bytes.Buffer 171 io.Copy(&tmp, &w.Data) 172 173 // TODO(mdempsky): Consider writing these out separately so they're 174 // easier to strip, along with function bodies, so that we can prune 175 // down to just the data that's relevant to go/types. 176 if w.encodingRelocHeader { 177 panic("encodingRelocHeader already true; recursive flush?") 178 } 179 w.encodingRelocHeader = true 180 w.Sync(SyncRelocs) 181 w.Len(len(w.Relocs)) 182 for _, rEnt := range w.Relocs { 183 w.Sync(SyncReloc) 184 w.Len(int(rEnt.Kind)) 185 w.Len(int(rEnt.Idx)) 186 } 187 188 io.Copy(&sb, &w.Data) 189 io.Copy(&sb, &tmp) 190 w.p.elems[w.k][w.Idx] = sb.String() 191 192 return w.Idx 193} 194 195func (w *Encoder) checkErr(err error) { 196 if err != nil { 197 errorf("unexpected encoding error: %v", err) 198 } 199} 200 201func (w *Encoder) rawUvarint(x uint64) { 202 var buf [binary.MaxVarintLen64]byte 203 n := binary.PutUvarint(buf[:], x) 204 _, err := w.Data.Write(buf[:n]) 205 w.checkErr(err) 206} 207 208func (w *Encoder) rawVarint(x int64) { 209 // Zig-zag encode. 210 ux := uint64(x) << 1 211 if x < 0 { 212 ux = ^ux 213 } 214 215 w.rawUvarint(ux) 216} 217 218func (w *Encoder) rawReloc(r RelocKind, idx Index) int { 219 e := RelocEnt{r, idx} 220 if w.RelocMap != nil { 221 if i, ok := w.RelocMap[e]; ok { 222 return int(i) 223 } 224 } else { 225 w.RelocMap = make(map[RelocEnt]uint32) 226 } 227 228 i := len(w.Relocs) 229 w.RelocMap[e] = uint32(i) 230 w.Relocs = append(w.Relocs, e) 231 return i 232} 233 234func (w *Encoder) Sync(m SyncMarker) { 235 if !w.p.SyncMarkers() { 236 return 237 } 238 239 // Writing out stack frame string references requires working 240 // relocations, but writing out the relocations themselves involves 241 // sync markers. To prevent infinite recursion, we simply trim the 242 // stack frame for sync markers within the relocation header. 243 var frames []string 244 if !w.encodingRelocHeader && w.p.syncFrames > 0 { 245 pcs := make([]uintptr, w.p.syncFrames) 246 n := runtime.Callers(2, pcs) 247 frames = fmtFrames(pcs[:n]...) 248 } 249 250 // TODO(mdempsky): Save space by writing out stack frames as a 251 // linked list so we can share common stack frames. 252 w.rawUvarint(uint64(m)) 253 w.rawUvarint(uint64(len(frames))) 254 for _, frame := range frames { 255 w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame)))) 256 } 257} 258 259// Bool encodes and writes a bool value into the element bitstream, 260// and then returns the bool value. 261// 262// For simple, 2-alternative encodings, the idiomatic way to call Bool 263// is something like: 264// 265// if w.Bool(x != 0) { 266// // alternative #1 267// } else { 268// // alternative #2 269// } 270// 271// For multi-alternative encodings, use Code instead. 272func (w *Encoder) Bool(b bool) bool { 273 w.Sync(SyncBool) 274 var x byte 275 if b { 276 x = 1 277 } 278 err := w.Data.WriteByte(x) 279 w.checkErr(err) 280 return b 281} 282 283// Int64 encodes and writes an int64 value into the element bitstream. 284func (w *Encoder) Int64(x int64) { 285 w.Sync(SyncInt64) 286 w.rawVarint(x) 287} 288 289// Uint64 encodes and writes a uint64 value into the element bitstream. 290func (w *Encoder) Uint64(x uint64) { 291 w.Sync(SyncUint64) 292 w.rawUvarint(x) 293} 294 295// Len encodes and writes a non-negative int value into the element bitstream. 296func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) } 297 298// Int encodes and writes an int value into the element bitstream. 299func (w *Encoder) Int(x int) { w.Int64(int64(x)) } 300 301// Len encodes and writes a uint value into the element bitstream. 302func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) } 303 304// Reloc encodes and writes a relocation for the given (section, 305// index) pair into the element bitstream. 306// 307// Note: Only the index is formally written into the element 308// bitstream, so bitstream decoders must know from context which 309// section an encoded relocation refers to. 310func (w *Encoder) Reloc(r RelocKind, idx Index) { 311 w.Sync(SyncUseReloc) 312 w.Len(w.rawReloc(r, idx)) 313} 314 315// Code encodes and writes a Code value into the element bitstream. 316func (w *Encoder) Code(c Code) { 317 w.Sync(c.Marker()) 318 w.Len(c.Value()) 319} 320 321// String encodes and writes a string value into the element 322// bitstream. 323// 324// Internally, strings are deduplicated by adding them to the strings 325// section (if not already present), and then writing a relocation 326// into the element bitstream. 327func (w *Encoder) String(s string) { 328 w.StringRef(w.p.StringIdx(s)) 329} 330 331// StringRef writes a reference to the given index, which must be a 332// previously encoded string value. 333func (w *Encoder) StringRef(idx Index) { 334 w.Sync(SyncString) 335 w.Reloc(RelocString, idx) 336} 337 338// Strings encodes and writes a variable-length slice of strings into 339// the element bitstream. 340func (w *Encoder) Strings(ss []string) { 341 w.Len(len(ss)) 342 for _, s := range ss { 343 w.String(s) 344 } 345} 346 347// Value encodes and writes a constant.Value into the element 348// bitstream. 349func (w *Encoder) Value(val constant.Value) { 350 w.Sync(SyncValue) 351 if w.Bool(val.Kind() == constant.Complex) { 352 w.scalar(constant.Real(val)) 353 w.scalar(constant.Imag(val)) 354 } else { 355 w.scalar(val) 356 } 357} 358 359func (w *Encoder) scalar(val constant.Value) { 360 switch v := constant.Val(val).(type) { 361 default: 362 errorf("unhandled %v (%v)", val, val.Kind()) 363 case bool: 364 w.Code(ValBool) 365 w.Bool(v) 366 case string: 367 w.Code(ValString) 368 w.String(v) 369 case int64: 370 w.Code(ValInt64) 371 w.Int64(v) 372 case *big.Int: 373 w.Code(ValBigInt) 374 w.bigInt(v) 375 case *big.Rat: 376 w.Code(ValBigRat) 377 w.bigInt(v.Num()) 378 w.bigInt(v.Denom()) 379 case *big.Float: 380 w.Code(ValBigFloat) 381 w.bigFloat(v) 382 } 383} 384 385func (w *Encoder) bigInt(v *big.Int) { 386 b := v.Bytes() 387 w.String(string(b)) // TODO: More efficient encoding. 388 w.Bool(v.Sign() < 0) 389} 390 391func (w *Encoder) bigFloat(v *big.Float) { 392 b := v.Append(nil, 'p', -1) 393 w.String(string(b)) // TODO: More efficient encoding. 394} 395