1*1c12ee1eSDan Willemsen// Copyright 2019 The Go Authors. All rights reserved. 2*1c12ee1eSDan Willemsen// Use of this source code is governed by a BSD-style 3*1c12ee1eSDan Willemsen// license that can be found in the LICENSE file. 4*1c12ee1eSDan Willemsen 5*1c12ee1eSDan Willemsenpackage proto 6*1c12ee1eSDan Willemsen 7*1c12ee1eSDan Willemsenimport ( 8*1c12ee1eSDan Willemsen "google.golang.org/protobuf/encoding/protowire" 9*1c12ee1eSDan Willemsen "google.golang.org/protobuf/internal/encoding/messageset" 10*1c12ee1eSDan Willemsen "google.golang.org/protobuf/internal/order" 11*1c12ee1eSDan Willemsen "google.golang.org/protobuf/internal/pragma" 12*1c12ee1eSDan Willemsen "google.golang.org/protobuf/reflect/protoreflect" 13*1c12ee1eSDan Willemsen "google.golang.org/protobuf/runtime/protoiface" 14*1c12ee1eSDan Willemsen) 15*1c12ee1eSDan Willemsen 16*1c12ee1eSDan Willemsen// MarshalOptions configures the marshaler. 17*1c12ee1eSDan Willemsen// 18*1c12ee1eSDan Willemsen// Example usage: 19*1c12ee1eSDan Willemsen// 20*1c12ee1eSDan Willemsen// b, err := MarshalOptions{Deterministic: true}.Marshal(m) 21*1c12ee1eSDan Willemsentype MarshalOptions struct { 22*1c12ee1eSDan Willemsen pragma.NoUnkeyedLiterals 23*1c12ee1eSDan Willemsen 24*1c12ee1eSDan Willemsen // AllowPartial allows messages that have missing required fields to marshal 25*1c12ee1eSDan Willemsen // without returning an error. If AllowPartial is false (the default), 26*1c12ee1eSDan Willemsen // Marshal will return an error if there are any missing required fields. 27*1c12ee1eSDan Willemsen AllowPartial bool 28*1c12ee1eSDan Willemsen 29*1c12ee1eSDan Willemsen // Deterministic controls whether the same message will always be 30*1c12ee1eSDan Willemsen // serialized to the same bytes within the same binary. 31*1c12ee1eSDan Willemsen // 32*1c12ee1eSDan Willemsen // Setting this option guarantees that repeated serialization of 33*1c12ee1eSDan Willemsen // the same message will return the same bytes, and that different 34*1c12ee1eSDan Willemsen // processes of the same binary (which may be executing on different 35*1c12ee1eSDan Willemsen // machines) will serialize equal messages to the same bytes. 36*1c12ee1eSDan Willemsen // It has no effect on the resulting size of the encoded message compared 37*1c12ee1eSDan Willemsen // to a non-deterministic marshal. 38*1c12ee1eSDan Willemsen // 39*1c12ee1eSDan Willemsen // Note that the deterministic serialization is NOT canonical across 40*1c12ee1eSDan Willemsen // languages. It is not guaranteed to remain stable over time. It is 41*1c12ee1eSDan Willemsen // unstable across different builds with schema changes due to unknown 42*1c12ee1eSDan Willemsen // fields. Users who need canonical serialization (e.g., persistent 43*1c12ee1eSDan Willemsen // storage in a canonical form, fingerprinting, etc.) must define 44*1c12ee1eSDan Willemsen // their own canonicalization specification and implement their own 45*1c12ee1eSDan Willemsen // serializer rather than relying on this API. 46*1c12ee1eSDan Willemsen // 47*1c12ee1eSDan Willemsen // If deterministic serialization is requested, map entries will be 48*1c12ee1eSDan Willemsen // sorted by keys in lexographical order. This is an implementation 49*1c12ee1eSDan Willemsen // detail and subject to change. 50*1c12ee1eSDan Willemsen Deterministic bool 51*1c12ee1eSDan Willemsen 52*1c12ee1eSDan Willemsen // UseCachedSize indicates that the result of a previous Size call 53*1c12ee1eSDan Willemsen // may be reused. 54*1c12ee1eSDan Willemsen // 55*1c12ee1eSDan Willemsen // Setting this option asserts that: 56*1c12ee1eSDan Willemsen // 57*1c12ee1eSDan Willemsen // 1. Size has previously been called on this message with identical 58*1c12ee1eSDan Willemsen // options (except for UseCachedSize itself). 59*1c12ee1eSDan Willemsen // 60*1c12ee1eSDan Willemsen // 2. The message and all its submessages have not changed in any 61*1c12ee1eSDan Willemsen // way since the Size call. 62*1c12ee1eSDan Willemsen // 63*1c12ee1eSDan Willemsen // If either of these invariants is violated, 64*1c12ee1eSDan Willemsen // the results are undefined and may include panics or corrupted output. 65*1c12ee1eSDan Willemsen // 66*1c12ee1eSDan Willemsen // Implementations MAY take this option into account to provide 67*1c12ee1eSDan Willemsen // better performance, but there is no guarantee that they will do so. 68*1c12ee1eSDan Willemsen // There is absolutely no guarantee that Size followed by Marshal with 69*1c12ee1eSDan Willemsen // UseCachedSize set will perform equivalently to Marshal alone. 70*1c12ee1eSDan Willemsen UseCachedSize bool 71*1c12ee1eSDan Willemsen} 72*1c12ee1eSDan Willemsen 73*1c12ee1eSDan Willemsen// Marshal returns the wire-format encoding of m. 74*1c12ee1eSDan Willemsenfunc Marshal(m Message) ([]byte, error) { 75*1c12ee1eSDan Willemsen // Treat nil message interface as an empty message; nothing to output. 76*1c12ee1eSDan Willemsen if m == nil { 77*1c12ee1eSDan Willemsen return nil, nil 78*1c12ee1eSDan Willemsen } 79*1c12ee1eSDan Willemsen 80*1c12ee1eSDan Willemsen out, err := MarshalOptions{}.marshal(nil, m.ProtoReflect()) 81*1c12ee1eSDan Willemsen if len(out.Buf) == 0 && err == nil { 82*1c12ee1eSDan Willemsen out.Buf = emptyBytesForMessage(m) 83*1c12ee1eSDan Willemsen } 84*1c12ee1eSDan Willemsen return out.Buf, err 85*1c12ee1eSDan Willemsen} 86*1c12ee1eSDan Willemsen 87*1c12ee1eSDan Willemsen// Marshal returns the wire-format encoding of m. 88*1c12ee1eSDan Willemsenfunc (o MarshalOptions) Marshal(m Message) ([]byte, error) { 89*1c12ee1eSDan Willemsen // Treat nil message interface as an empty message; nothing to output. 90*1c12ee1eSDan Willemsen if m == nil { 91*1c12ee1eSDan Willemsen return nil, nil 92*1c12ee1eSDan Willemsen } 93*1c12ee1eSDan Willemsen 94*1c12ee1eSDan Willemsen out, err := o.marshal(nil, m.ProtoReflect()) 95*1c12ee1eSDan Willemsen if len(out.Buf) == 0 && err == nil { 96*1c12ee1eSDan Willemsen out.Buf = emptyBytesForMessage(m) 97*1c12ee1eSDan Willemsen } 98*1c12ee1eSDan Willemsen return out.Buf, err 99*1c12ee1eSDan Willemsen} 100*1c12ee1eSDan Willemsen 101*1c12ee1eSDan Willemsen// emptyBytesForMessage returns a nil buffer if and only if m is invalid, 102*1c12ee1eSDan Willemsen// otherwise it returns a non-nil empty buffer. 103*1c12ee1eSDan Willemsen// 104*1c12ee1eSDan Willemsen// This is to assist the edge-case where user-code does the following: 105*1c12ee1eSDan Willemsen// 106*1c12ee1eSDan Willemsen// m1.OptionalBytes, _ = proto.Marshal(m2) 107*1c12ee1eSDan Willemsen// 108*1c12ee1eSDan Willemsen// where they expect the proto2 "optional_bytes" field to be populated 109*1c12ee1eSDan Willemsen// if any only if m2 is a valid message. 110*1c12ee1eSDan Willemsenfunc emptyBytesForMessage(m Message) []byte { 111*1c12ee1eSDan Willemsen if m == nil || !m.ProtoReflect().IsValid() { 112*1c12ee1eSDan Willemsen return nil 113*1c12ee1eSDan Willemsen } 114*1c12ee1eSDan Willemsen return emptyBuf[:] 115*1c12ee1eSDan Willemsen} 116*1c12ee1eSDan Willemsen 117*1c12ee1eSDan Willemsen// MarshalAppend appends the wire-format encoding of m to b, 118*1c12ee1eSDan Willemsen// returning the result. 119*1c12ee1eSDan Willemsenfunc (o MarshalOptions) MarshalAppend(b []byte, m Message) ([]byte, error) { 120*1c12ee1eSDan Willemsen // Treat nil message interface as an empty message; nothing to append. 121*1c12ee1eSDan Willemsen if m == nil { 122*1c12ee1eSDan Willemsen return b, nil 123*1c12ee1eSDan Willemsen } 124*1c12ee1eSDan Willemsen 125*1c12ee1eSDan Willemsen out, err := o.marshal(b, m.ProtoReflect()) 126*1c12ee1eSDan Willemsen return out.Buf, err 127*1c12ee1eSDan Willemsen} 128*1c12ee1eSDan Willemsen 129*1c12ee1eSDan Willemsen// MarshalState returns the wire-format encoding of a message. 130*1c12ee1eSDan Willemsen// 131*1c12ee1eSDan Willemsen// This method permits fine-grained control over the marshaler. 132*1c12ee1eSDan Willemsen// Most users should use Marshal instead. 133*1c12ee1eSDan Willemsenfunc (o MarshalOptions) MarshalState(in protoiface.MarshalInput) (protoiface.MarshalOutput, error) { 134*1c12ee1eSDan Willemsen return o.marshal(in.Buf, in.Message) 135*1c12ee1eSDan Willemsen} 136*1c12ee1eSDan Willemsen 137*1c12ee1eSDan Willemsen// marshal is a centralized function that all marshal operations go through. 138*1c12ee1eSDan Willemsen// For profiling purposes, avoid changing the name of this function or 139*1c12ee1eSDan Willemsen// introducing other code paths for marshal that do not go through this. 140*1c12ee1eSDan Willemsenfunc (o MarshalOptions) marshal(b []byte, m protoreflect.Message) (out protoiface.MarshalOutput, err error) { 141*1c12ee1eSDan Willemsen allowPartial := o.AllowPartial 142*1c12ee1eSDan Willemsen o.AllowPartial = true 143*1c12ee1eSDan Willemsen if methods := protoMethods(m); methods != nil && methods.Marshal != nil && 144*1c12ee1eSDan Willemsen !(o.Deterministic && methods.Flags&protoiface.SupportMarshalDeterministic == 0) { 145*1c12ee1eSDan Willemsen in := protoiface.MarshalInput{ 146*1c12ee1eSDan Willemsen Message: m, 147*1c12ee1eSDan Willemsen Buf: b, 148*1c12ee1eSDan Willemsen } 149*1c12ee1eSDan Willemsen if o.Deterministic { 150*1c12ee1eSDan Willemsen in.Flags |= protoiface.MarshalDeterministic 151*1c12ee1eSDan Willemsen } 152*1c12ee1eSDan Willemsen if o.UseCachedSize { 153*1c12ee1eSDan Willemsen in.Flags |= protoiface.MarshalUseCachedSize 154*1c12ee1eSDan Willemsen } 155*1c12ee1eSDan Willemsen if methods.Size != nil { 156*1c12ee1eSDan Willemsen sout := methods.Size(protoiface.SizeInput{ 157*1c12ee1eSDan Willemsen Message: m, 158*1c12ee1eSDan Willemsen Flags: in.Flags, 159*1c12ee1eSDan Willemsen }) 160*1c12ee1eSDan Willemsen if cap(b) < len(b)+sout.Size { 161*1c12ee1eSDan Willemsen in.Buf = make([]byte, len(b), growcap(cap(b), len(b)+sout.Size)) 162*1c12ee1eSDan Willemsen copy(in.Buf, b) 163*1c12ee1eSDan Willemsen } 164*1c12ee1eSDan Willemsen in.Flags |= protoiface.MarshalUseCachedSize 165*1c12ee1eSDan Willemsen } 166*1c12ee1eSDan Willemsen out, err = methods.Marshal(in) 167*1c12ee1eSDan Willemsen } else { 168*1c12ee1eSDan Willemsen out.Buf, err = o.marshalMessageSlow(b, m) 169*1c12ee1eSDan Willemsen } 170*1c12ee1eSDan Willemsen if err != nil { 171*1c12ee1eSDan Willemsen return out, err 172*1c12ee1eSDan Willemsen } 173*1c12ee1eSDan Willemsen if allowPartial { 174*1c12ee1eSDan Willemsen return out, nil 175*1c12ee1eSDan Willemsen } 176*1c12ee1eSDan Willemsen return out, checkInitialized(m) 177*1c12ee1eSDan Willemsen} 178*1c12ee1eSDan Willemsen 179*1c12ee1eSDan Willemsenfunc (o MarshalOptions) marshalMessage(b []byte, m protoreflect.Message) ([]byte, error) { 180*1c12ee1eSDan Willemsen out, err := o.marshal(b, m) 181*1c12ee1eSDan Willemsen return out.Buf, err 182*1c12ee1eSDan Willemsen} 183*1c12ee1eSDan Willemsen 184*1c12ee1eSDan Willemsen// growcap scales up the capacity of a slice. 185*1c12ee1eSDan Willemsen// 186*1c12ee1eSDan Willemsen// Given a slice with a current capacity of oldcap and a desired 187*1c12ee1eSDan Willemsen// capacity of wantcap, growcap returns a new capacity >= wantcap. 188*1c12ee1eSDan Willemsen// 189*1c12ee1eSDan Willemsen// The algorithm is mostly identical to the one used by append as of Go 1.14. 190*1c12ee1eSDan Willemsenfunc growcap(oldcap, wantcap int) (newcap int) { 191*1c12ee1eSDan Willemsen if wantcap > oldcap*2 { 192*1c12ee1eSDan Willemsen newcap = wantcap 193*1c12ee1eSDan Willemsen } else if oldcap < 1024 { 194*1c12ee1eSDan Willemsen // The Go 1.14 runtime takes this case when len(s) < 1024, 195*1c12ee1eSDan Willemsen // not when cap(s) < 1024. The difference doesn't seem 196*1c12ee1eSDan Willemsen // significant here. 197*1c12ee1eSDan Willemsen newcap = oldcap * 2 198*1c12ee1eSDan Willemsen } else { 199*1c12ee1eSDan Willemsen newcap = oldcap 200*1c12ee1eSDan Willemsen for 0 < newcap && newcap < wantcap { 201*1c12ee1eSDan Willemsen newcap += newcap / 4 202*1c12ee1eSDan Willemsen } 203*1c12ee1eSDan Willemsen if newcap <= 0 { 204*1c12ee1eSDan Willemsen newcap = wantcap 205*1c12ee1eSDan Willemsen } 206*1c12ee1eSDan Willemsen } 207*1c12ee1eSDan Willemsen return newcap 208*1c12ee1eSDan Willemsen} 209*1c12ee1eSDan Willemsen 210*1c12ee1eSDan Willemsenfunc (o MarshalOptions) marshalMessageSlow(b []byte, m protoreflect.Message) ([]byte, error) { 211*1c12ee1eSDan Willemsen if messageset.IsMessageSet(m.Descriptor()) { 212*1c12ee1eSDan Willemsen return o.marshalMessageSet(b, m) 213*1c12ee1eSDan Willemsen } 214*1c12ee1eSDan Willemsen fieldOrder := order.AnyFieldOrder 215*1c12ee1eSDan Willemsen if o.Deterministic { 216*1c12ee1eSDan Willemsen // TODO: This should use a more natural ordering like NumberFieldOrder, 217*1c12ee1eSDan Willemsen // but doing so breaks golden tests that make invalid assumption about 218*1c12ee1eSDan Willemsen // output stability of this implementation. 219*1c12ee1eSDan Willemsen fieldOrder = order.LegacyFieldOrder 220*1c12ee1eSDan Willemsen } 221*1c12ee1eSDan Willemsen var err error 222*1c12ee1eSDan Willemsen order.RangeFields(m, fieldOrder, func(fd protoreflect.FieldDescriptor, v protoreflect.Value) bool { 223*1c12ee1eSDan Willemsen b, err = o.marshalField(b, fd, v) 224*1c12ee1eSDan Willemsen return err == nil 225*1c12ee1eSDan Willemsen }) 226*1c12ee1eSDan Willemsen if err != nil { 227*1c12ee1eSDan Willemsen return b, err 228*1c12ee1eSDan Willemsen } 229*1c12ee1eSDan Willemsen b = append(b, m.GetUnknown()...) 230*1c12ee1eSDan Willemsen return b, nil 231*1c12ee1eSDan Willemsen} 232*1c12ee1eSDan Willemsen 233*1c12ee1eSDan Willemsenfunc (o MarshalOptions) marshalField(b []byte, fd protoreflect.FieldDescriptor, value protoreflect.Value) ([]byte, error) { 234*1c12ee1eSDan Willemsen switch { 235*1c12ee1eSDan Willemsen case fd.IsList(): 236*1c12ee1eSDan Willemsen return o.marshalList(b, fd, value.List()) 237*1c12ee1eSDan Willemsen case fd.IsMap(): 238*1c12ee1eSDan Willemsen return o.marshalMap(b, fd, value.Map()) 239*1c12ee1eSDan Willemsen default: 240*1c12ee1eSDan Willemsen b = protowire.AppendTag(b, fd.Number(), wireTypes[fd.Kind()]) 241*1c12ee1eSDan Willemsen return o.marshalSingular(b, fd, value) 242*1c12ee1eSDan Willemsen } 243*1c12ee1eSDan Willemsen} 244*1c12ee1eSDan Willemsen 245*1c12ee1eSDan Willemsenfunc (o MarshalOptions) marshalList(b []byte, fd protoreflect.FieldDescriptor, list protoreflect.List) ([]byte, error) { 246*1c12ee1eSDan Willemsen if fd.IsPacked() && list.Len() > 0 { 247*1c12ee1eSDan Willemsen b = protowire.AppendTag(b, fd.Number(), protowire.BytesType) 248*1c12ee1eSDan Willemsen b, pos := appendSpeculativeLength(b) 249*1c12ee1eSDan Willemsen for i, llen := 0, list.Len(); i < llen; i++ { 250*1c12ee1eSDan Willemsen var err error 251*1c12ee1eSDan Willemsen b, err = o.marshalSingular(b, fd, list.Get(i)) 252*1c12ee1eSDan Willemsen if err != nil { 253*1c12ee1eSDan Willemsen return b, err 254*1c12ee1eSDan Willemsen } 255*1c12ee1eSDan Willemsen } 256*1c12ee1eSDan Willemsen b = finishSpeculativeLength(b, pos) 257*1c12ee1eSDan Willemsen return b, nil 258*1c12ee1eSDan Willemsen } 259*1c12ee1eSDan Willemsen 260*1c12ee1eSDan Willemsen kind := fd.Kind() 261*1c12ee1eSDan Willemsen for i, llen := 0, list.Len(); i < llen; i++ { 262*1c12ee1eSDan Willemsen var err error 263*1c12ee1eSDan Willemsen b = protowire.AppendTag(b, fd.Number(), wireTypes[kind]) 264*1c12ee1eSDan Willemsen b, err = o.marshalSingular(b, fd, list.Get(i)) 265*1c12ee1eSDan Willemsen if err != nil { 266*1c12ee1eSDan Willemsen return b, err 267*1c12ee1eSDan Willemsen } 268*1c12ee1eSDan Willemsen } 269*1c12ee1eSDan Willemsen return b, nil 270*1c12ee1eSDan Willemsen} 271*1c12ee1eSDan Willemsen 272*1c12ee1eSDan Willemsenfunc (o MarshalOptions) marshalMap(b []byte, fd protoreflect.FieldDescriptor, mapv protoreflect.Map) ([]byte, error) { 273*1c12ee1eSDan Willemsen keyf := fd.MapKey() 274*1c12ee1eSDan Willemsen valf := fd.MapValue() 275*1c12ee1eSDan Willemsen keyOrder := order.AnyKeyOrder 276*1c12ee1eSDan Willemsen if o.Deterministic { 277*1c12ee1eSDan Willemsen keyOrder = order.GenericKeyOrder 278*1c12ee1eSDan Willemsen } 279*1c12ee1eSDan Willemsen var err error 280*1c12ee1eSDan Willemsen order.RangeEntries(mapv, keyOrder, func(key protoreflect.MapKey, value protoreflect.Value) bool { 281*1c12ee1eSDan Willemsen b = protowire.AppendTag(b, fd.Number(), protowire.BytesType) 282*1c12ee1eSDan Willemsen var pos int 283*1c12ee1eSDan Willemsen b, pos = appendSpeculativeLength(b) 284*1c12ee1eSDan Willemsen 285*1c12ee1eSDan Willemsen b, err = o.marshalField(b, keyf, key.Value()) 286*1c12ee1eSDan Willemsen if err != nil { 287*1c12ee1eSDan Willemsen return false 288*1c12ee1eSDan Willemsen } 289*1c12ee1eSDan Willemsen b, err = o.marshalField(b, valf, value) 290*1c12ee1eSDan Willemsen if err != nil { 291*1c12ee1eSDan Willemsen return false 292*1c12ee1eSDan Willemsen } 293*1c12ee1eSDan Willemsen b = finishSpeculativeLength(b, pos) 294*1c12ee1eSDan Willemsen return true 295*1c12ee1eSDan Willemsen }) 296*1c12ee1eSDan Willemsen return b, err 297*1c12ee1eSDan Willemsen} 298*1c12ee1eSDan Willemsen 299*1c12ee1eSDan Willemsen// When encoding length-prefixed fields, we speculatively set aside some number of bytes 300*1c12ee1eSDan Willemsen// for the length, encode the data, and then encode the length (shifting the data if necessary 301*1c12ee1eSDan Willemsen// to make room). 302*1c12ee1eSDan Willemsenconst speculativeLength = 1 303*1c12ee1eSDan Willemsen 304*1c12ee1eSDan Willemsenfunc appendSpeculativeLength(b []byte) ([]byte, int) { 305*1c12ee1eSDan Willemsen pos := len(b) 306*1c12ee1eSDan Willemsen b = append(b, "\x00\x00\x00\x00"[:speculativeLength]...) 307*1c12ee1eSDan Willemsen return b, pos 308*1c12ee1eSDan Willemsen} 309*1c12ee1eSDan Willemsen 310*1c12ee1eSDan Willemsenfunc finishSpeculativeLength(b []byte, pos int) []byte { 311*1c12ee1eSDan Willemsen mlen := len(b) - pos - speculativeLength 312*1c12ee1eSDan Willemsen msiz := protowire.SizeVarint(uint64(mlen)) 313*1c12ee1eSDan Willemsen if msiz != speculativeLength { 314*1c12ee1eSDan Willemsen for i := 0; i < msiz-speculativeLength; i++ { 315*1c12ee1eSDan Willemsen b = append(b, 0) 316*1c12ee1eSDan Willemsen } 317*1c12ee1eSDan Willemsen copy(b[pos+msiz:], b[pos+speculativeLength:]) 318*1c12ee1eSDan Willemsen b = b[:pos+msiz+mlen] 319*1c12ee1eSDan Willemsen } 320*1c12ee1eSDan Willemsen protowire.AppendVarint(b[:pos], uint64(mlen)) 321*1c12ee1eSDan Willemsen return b 322*1c12ee1eSDan Willemsen} 323